001: /*
002: * RegistryLoader.java
003: *
004: * Version: $Revision: 1373 $
005: *
006: * Date: $Date: 2005-11-16 15:40:53 -0600 (Wed, 16 Nov 2005) $
007: *
008: * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
009: * Institute of Technology. All rights reserved.
010: *
011: * Redistribution and use in source and binary forms, with or without
012: * modification, are permitted provided that the following conditions are
013: * met:
014: *
015: * - Redistributions of source code must retain the above copyright
016: * notice, this list of conditions and the following disclaimer.
017: *
018: * - Redistributions in binary form must reproduce the above copyright
019: * notice, this list of conditions and the following disclaimer in the
020: * documentation and/or other materials provided with the distribution.
021: *
022: * - Neither the name of the Hewlett-Packard Company nor the name of the
023: * Massachusetts Institute of Technology nor the names of their
024: * contributors may be used to endorse or promote products derived from
025: * this software without specific prior written permission.
026: *
027: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
028: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
029: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
030: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
031: * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
032: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
033: * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
034: * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
035: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
036: * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
037: * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
038: * DAMAGE.
039: */
040: package org.dspace.administer;
041:
042: import java.io.File;
043: import java.io.IOException;
044: import java.sql.SQLException;
045:
046: import javax.xml.parsers.DocumentBuilder;
047: import javax.xml.parsers.DocumentBuilderFactory;
048: import javax.xml.parsers.ParserConfigurationException;
049: import javax.xml.transform.TransformerException;
050:
051: import org.apache.log4j.Logger;
052: import org.apache.xpath.XPathAPI;
053: import org.dspace.authorize.AuthorizeException;
054: import org.dspace.content.BitstreamFormat;
055: import org.dspace.content.MetadataField;
056: import org.dspace.content.MetadataSchema;
057: import org.dspace.content.NonUniqueMetadataException;
058: import org.dspace.core.Context;
059: import org.dspace.core.LogManager;
060: import org.w3c.dom.Document;
061: import org.w3c.dom.Node;
062: import org.w3c.dom.NodeList;
063: import org.xml.sax.SAXException;
064:
065: /**
066: * Loads the bitstream format and Dublin Core type registries into the database.
067: * Intended for use as a command-line tool.
068: * <P>
069: * Example usage:
070: * <P>
071: * <code>RegistryLoader -bitstream bitstream-formats.xml</code>
072: * <P>
073: * <code>RegistryLoader -dc dc-types.xml</code>
074: *
075: * @author Robert Tansley
076: * @version $Revision: 1373 $
077: */
078: public class RegistryLoader {
079: /** log4j category */
080: private static Logger log = Logger.getLogger(RegistryLoader.class);
081:
082: /**
083: * For invoking via the command line
084: *
085: * @param argv
086: * command-line arguments
087: */
088: public static void main(String[] argv) throws Exception {
089: String usage = "Usage: " + RegistryLoader.class.getName()
090: + " (-bitstream | -dc) registry-file.xml";
091:
092: Context context = null;
093:
094: try {
095: context = new Context();
096:
097: // Can't update registries anonymously, so we need to turn off
098: // authorisation
099: context.setIgnoreAuthorization(true);
100:
101: // Work out what we're loading
102: if (argv[0].equalsIgnoreCase("-bitstream")) {
103: RegistryLoader.loadBitstreamFormats(context, argv[1]);
104: } else if (argv[0].equalsIgnoreCase("-dc")) {
105: loadDublinCoreTypes(context, argv[1]);
106: } else {
107: System.err.println(usage);
108: }
109:
110: context.complete();
111:
112: System.exit(0);
113: } catch (ArrayIndexOutOfBoundsException ae) {
114: System.err.println(usage);
115:
116: if (context != null) {
117: context.abort();
118: }
119:
120: System.exit(1);
121: } catch (Exception e) {
122: log.fatal(LogManager.getHeader(context,
123: "error_loading_registries", ""), e);
124:
125: if (context != null) {
126: context.abort();
127: }
128:
129: System.exit(1);
130: }
131: }
132:
133: /**
134: * Load Bitstream Format metadata
135: *
136: * @param context
137: * DSpace context object
138: * @param filename
139: * the filename of the XML file to load
140: */
141: public static void loadBitstreamFormats(Context context,
142: String filename) throws SQLException, IOException,
143: ParserConfigurationException, SAXException,
144: TransformerException, AuthorizeException {
145: Document document = loadXML(filename);
146:
147: // Get the nodes corresponding to formats
148: NodeList typeNodes = XPathAPI.selectNodeList(document,
149: "dspace-bitstream-types/bitstream-type");
150:
151: // Add each one as a new format to the registry
152: for (int i = 0; i < typeNodes.getLength(); i++) {
153: Node n = typeNodes.item(i);
154: loadFormat(context, n);
155: }
156:
157: log.info(LogManager.getHeader(context,
158: "load_bitstream_formats", "number_loaded="
159: + typeNodes.getLength()));
160: }
161:
162: /**
163: * Process a node in the bitstream format registry XML file. The node must
164: * be a "bitstream-type" node
165: *
166: * @param context
167: * DSpace context object
168: * @param node
169: * the node in the DOM tree
170: */
171: private static void loadFormat(Context context, Node node)
172: throws SQLException, IOException, TransformerException,
173: AuthorizeException {
174: // Get the values
175: String mimeType = getElementData(node, "mimetype");
176: String shortDesc = getElementData(node, "short_description");
177: String desc = getElementData(node, "description");
178:
179: String supportLevelString = getElementData(node,
180: "support_level");
181: int supportLevel = Integer.parseInt(supportLevelString);
182:
183: String internalString = getElementData(node, "internal");
184: boolean internal = new Boolean(internalString).booleanValue();
185:
186: String[] extensions = getRepeatedElementData(node, "extension");
187:
188: // Create the format object
189: BitstreamFormat format = BitstreamFormat.create(context);
190:
191: // Fill it out with the values
192: format.setMIMEType(mimeType);
193: format.setShortDescription(shortDesc);
194: format.setDescription(desc);
195: format.setSupportLevel(supportLevel);
196: format.setInternal(internal);
197: format.setExtensions(extensions);
198:
199: // Write to database
200: format.update();
201: }
202:
203: /**
204: * Load Dublin Core types
205: *
206: * @param context
207: * DSpace context object
208: * @param filename
209: * the filename of the XML file to load
210: * @throws NonUniqueMetadataException
211: */
212: public static void loadDublinCoreTypes(Context context,
213: String filename) throws SQLException, IOException,
214: ParserConfigurationException, SAXException,
215: TransformerException, AuthorizeException,
216: NonUniqueMetadataException {
217: Document document = loadXML(filename);
218:
219: // Get the nodes corresponding to formats
220: NodeList typeNodes = XPathAPI.selectNodeList(document,
221: "/dspace-dc-types/dc-type");
222:
223: // Add each one as a new format to the registry
224: for (int i = 0; i < typeNodes.getLength(); i++) {
225: Node n = typeNodes.item(i);
226: loadDCType(context, n);
227: }
228:
229: log.info(LogManager.getHeader(context,
230: "load_dublin_core_types", "number_loaded="
231: + typeNodes.getLength()));
232: }
233:
234: /**
235: * Process a node in the bitstream format registry XML file. The node must
236: * be a "bitstream-type" node
237: *
238: * @param context
239: * DSpace context object
240: * @param node
241: * the node in the DOM tree
242: * @throws NonUniqueMetadataException
243: */
244: private static void loadDCType(Context context, Node node)
245: throws SQLException, IOException, TransformerException,
246: AuthorizeException, NonUniqueMetadataException {
247: // Get the values
248: String schema = getElementData(node, "schema");
249: String element = getElementData(node, "element");
250: String qualifier = getElementData(node, "qualifier");
251: String scopeNote = getElementData(node, "scope_note");
252:
253: // If the schema is not provided default to DC
254: if (schema == null) {
255: schema = MetadataSchema.DC_SCHEMA;
256: }
257:
258: // Find the matching schema object
259: MetadataSchema schemaObj = MetadataSchema.find(context, schema);
260:
261: MetadataField field = new MetadataField();
262: field.setSchemaID(schemaObj.getSchemaID());
263: field.setElement(element);
264: field.setQualifier(qualifier);
265: field.setScopeNote(scopeNote);
266: field.create(context);
267: }
268:
269: // ===================== XML Utility Methods =========================
270:
271: /**
272: * Load in the XML from file.
273: *
274: * @param filename
275: * the filename to load from
276: *
277: * @return the DOM representation of the XML file
278: */
279: private static Document loadXML(String filename)
280: throws IOException, ParserConfigurationException,
281: SAXException {
282: DocumentBuilder builder = DocumentBuilderFactory.newInstance()
283: .newDocumentBuilder();
284:
285: return builder.parse(new File(filename));
286: }
287:
288: /**
289: * Get the CDATA of a particular element. For example, if the XML document
290: * contains:
291: * <P>
292: * <code>
293: * <foo><mimetype>application/pdf</mimetype></foo>
294: * </code>
295: * passing this the <code>foo</code> node and <code>mimetype</code> will
296: * return <code>application/pdf</code>.
297: * </P>
298: * Why this isn't a core part of the XML API I do not know...
299: *
300: * @param parentElement
301: * the element, whose child element you want the CDATA from
302: * @param childName
303: * the name of the element you want the CDATA from
304: *
305: * @return the CDATA as a <code>String</code>
306: */
307: private static String getElementData(Node parentElement,
308: String childName) throws TransformerException {
309: // Grab the child node
310: Node childNode = XPathAPI.selectSingleNode(parentElement,
311: childName);
312:
313: if (childNode == null) {
314: // No child node, so no values
315: return null;
316: }
317:
318: // Get the #text
319: Node dataNode = childNode.getFirstChild();
320:
321: if (dataNode == null) {
322: return null;
323: }
324:
325: // Get the data
326: String value = dataNode.getNodeValue().trim();
327:
328: return value;
329: }
330:
331: /**
332: * Get repeated CDATA for a particular element. For example, if the XML
333: * document contains:
334: * <P>
335: * <code>
336: * <foo>
337: * <bar>val1</bar>
338: * <bar>val2</bar>
339: * </foo>
340: * </code>
341: * passing this the <code>foo</code> node and <code>bar</code> will
342: * return <code>val1</code> and <code>val2</code>.
343: * </P>
344: * Why this also isn't a core part of the XML API I do not know...
345: *
346: * @param parentElement
347: * the element, whose child element you want the CDATA from
348: * @param childName
349: * the name of the element you want the CDATA from
350: *
351: * @return the CDATA as a <code>String</code>
352: */
353: private static String[] getRepeatedElementData(Node parentElement,
354: String childName) throws TransformerException {
355: // Grab the child node
356: NodeList childNodes = XPathAPI.selectNodeList(parentElement,
357: childName);
358:
359: String[] data = new String[childNodes.getLength()];
360:
361: for (int i = 0; i < childNodes.getLength(); i++) {
362: // Get the #text node
363: Node dataNode = childNodes.item(i).getFirstChild();
364:
365: // Get the data
366: data[i] = dataNode.getNodeValue().trim();
367: }
368:
369: return data;
370: }
371: }
|