001: /*
002: * METSManifest.java
003: *
004: * Version: $Revision: 1446 $
005: *
006: * Date: $Date: 2006-03-16 18:04:39 -0600 (Thu, 16 Mar 2006) $
007: *
008: * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
009: * Institute of Technology. All rights reserved.
010: *
011: * Redistribution and use in source and binary forms, with or without
012: * modification, are permitted provided that the following conditions are
013: * met:
014: *
015: * - Redistributions of source code must retain the above copyright
016: * notice, this list of conditions and the following disclaimer.
017: *
018: * - Redistributions in binary form must reproduce the above copyright
019: * notice, this list of conditions and the following disclaimer in the
020: * documentation and/or other materials provided with the distribution.
021: *
022: * - Neither the name of the Hewlett-Packard Company nor the name of the
023: * Massachusetts Institute of Technology nor the names of their
024: * contributors may be used to endorse or promote products derived from
025: * this software without specific prior written permission.
026: *
027: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
028: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
029: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
030: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
031: * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
032: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
033: * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
034: * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
035: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
036: * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
037: * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
038: * DAMAGE.
039: */
040:
041: package org.dspace.content.packager;
042:
043: import java.io.ByteArrayInputStream;
044: import java.io.File;
045: import java.io.IOException;
046: import java.io.InputStream;
047: import java.sql.SQLException;
048: import java.util.ArrayList;
049: import java.util.Enumeration;
050: import java.util.Iterator;
051: import java.util.List;
052:
053: import org.apache.commons.codec.binary.Base64;
054: import org.apache.log4j.Logger;
055: import org.dspace.authorize.AuthorizeException;
056: import org.dspace.content.Bitstream;
057: import org.dspace.content.DSpaceObject;
058: import org.dspace.content.Item;
059: import org.dspace.content.crosswalk.CrosswalkException;
060: import org.dspace.content.crosswalk.CrosswalkObjectNotSupported;
061: import org.dspace.content.crosswalk.MetadataValidationException;
062: import org.dspace.content.crosswalk.IngestionCrosswalk;
063: import org.dspace.core.ConfigurationManager;
064: import org.dspace.core.Constants;
065: import org.dspace.core.Context;
066: import org.dspace.core.PluginManager;
067: import org.jdom.Document;
068: import org.jdom.Element;
069: import org.jdom.JDOMException;
070: import org.jdom.Namespace;
071: import org.jdom.input.SAXBuilder;
072: import org.jdom.output.Format;
073: import org.jdom.output.XMLOutputter;
074: import org.jdom.xpath.XPath;
075:
076: /**
077: * <P>
078: * Manage the METS manifest document for METS importer classes,
079: * such as the package importer <code>org.dspace.content.packager.MetsSubmission</code>
080: * and the federated importer <code>org.dspace.app.mets.FederatedMETSImport</code>
081: * </P>
082: * <P>
083: * It can parse the METS document, build an internal model, and give the importers
084: * access to that model. It also crosswalks
085: * all of the descriptive and administrative metadata in the METS
086: * manifest into the target DSpace Item, under control of the importer.
087: * </P>
088: *
089: * <P>
090: * It reads the following DSpace Configuration entries:
091: * </P>
092: * <UL>
093: * <LI>Local XML schema (XSD) declarations, in the general format:
094: * <br><code>mets.xsd.<em>identifier</em> = <em>namespace</em> <em>xsd-URL</em></code>
095: * <br> eg. <code>mets.xsd.dc = http://purl.org/dc/elements/1.1/ dc.xsd</code>
096: * <br>Add a separate config entry for each schema.
097: * </LI>
098: * <p><LI>Crosswalk plugin mappings:
099: * These tell it the name of the crosswalk plugin to invoke for metadata sections
100: * with a particular value of <code>MDTYPE</code> (or <code>OTHERMDTYPE</code>)
101: * By default, the crosswalk mechanism will look for a plugin with the
102: * same name as the metadata type (e.g. <code>"MODS"</code>,
103: * <code>"DC"</code>). This example line invokes the <code>QDC</code>
104: * plugin when <code>MDTYPE="DC"</code>
105: * <br><code>mets.submission.crosswalk.DC = QDC </code>
106: * <br> general format is:
107: * <br><code>mets.submission.crosswalk.<em>mdType</em> = <em>pluginName</em> </code>
108: * </LI>
109: * </UL>
110: *
111: *
112: * @author Robert Tansley
113: * @author WeiHua Huang
114: * @author Rita Lee
115: * @author Larry Stone
116: * @see org.dspace.content.packager.MetsSubmission
117: * @see org.dspace.app.mets.FederatedMETSImport
118: */
119: public class METSManifest {
120: /**
121: * Callback interface to retrieve data streams in mdRef elements.
122: * "Package" or file reader returns an input stream for the
123: * given relative path, e.g. to dereference <code>mdRef</code> elements.
124: */
125: public interface Mdref {
126: /**
127: * Make the contents of an external resource mentioned in
128: * an <code>mdRef</code> element available as an <code>InputStream</code>.
129: * The implementation must use the information in the
130: * <code>mdRef</code> element, and the state in the object that
131: * implements this interface, to find the actual metadata content.
132: * <p>
133: * For example, an implementation that ingests a directory of
134: * files on the local filesystem would get a relative pathname
135: * out of the <code>mdRef</code> and open that file.
136: *
137: * @param mdRef JDOM element of mdRef in the METS manifest.
138: * @return stream containing the metadata mentioned in mdRef.
139: * @throw MetadataValidationException if the mdRef is unacceptable or missing required information.
140: * @throw IOException if it is returned by services called by this method.
141: * @throw SQLException if it is returned by services called by this method.
142: * @throw AuthorizeException if it is returned by services called by this method.
143: */
144: public InputStream getInputStream(Element mdRef)
145: throws MetadataValidationException, IOException,
146: SQLException, AuthorizeException;
147: }
148:
149: /** log4j category */
150: private static Logger log = Logger.getLogger(METSManifest.class);
151:
152: /** Canonical filename of METS manifest within a package or as a bitstream. */
153: public final static String MANIFEST_FILE = "mets.xml";
154:
155: /** Prefix of DSpace configuration lines that map METS metadata type to
156: * crosswalk plugin names.
157: */
158: private final static String CONFIG_METADATA_PREFIX = "mets.submission.crosswalk.";
159:
160: /** prefix of config lines identifying local XML Schema (XSD) files */
161: private final static String CONFIG_XSD_PREFIX = "mets.xsd.";
162:
163: /** Dublin core element namespace */
164: private static Namespace dcNS = Namespace
165: .getNamespace("http://purl.org/dc/elements/1.1/");
166:
167: /** Dublin core term namespace (for qualified DC) */
168: private static Namespace dcTermNS = Namespace
169: .getNamespace("http://purl.org/dc/terms/");
170:
171: /** METS namespace -- includes "mets" prefix for use in XPaths */
172: public static Namespace metsNS = Namespace.getNamespace("mets",
173: "http://www.loc.gov/METS/");
174:
175: /** XLink namespace -- includes "xlink" prefix prefix for use in XPaths */
176: private static Namespace xlinkNS = Namespace.getNamespace("xlink",
177: "http://www.w3.org/1999/xlink");
178:
179: /** root element of the current METS manifest. */
180: private Element mets = null;
181:
182: /** all mdRef elements in the manifest */
183: private List mdFiles = null;
184:
185: /** <file> elements in "original" filegroup (bundle) */
186: private List contentFiles = null;
187:
188: /** builder to use for mdRef streams, inherited from create() */
189: private SAXBuilder parser = null;
190:
191: // Create list of local schemas at load time, since it depends only
192: // on the DSpace configuration.
193: private static String localSchemas;
194: static {
195: String dspace_dir = ConfigurationManager
196: .getProperty("dspace.dir");
197: File xsdPath1 = new File(dspace_dir + "/config/schemas/");
198: File xsdPath2 = new File(dspace_dir + "/config/");
199:
200: Enumeration pe = ConfigurationManager.propertyNames();
201: StringBuffer result = new StringBuffer();
202: while (pe.hasMoreElements()) {
203: // config lines have the format:
204: // mets.xsd.{identifier} = {namespace} {xsd-URL}
205: // e.g.
206: // mets.xsd.dc = http://purl.org/dc/elements/1.1/ dc.xsd
207: // (filename is relative to {dspace_dir}/config/schemas/)
208: String key = (String) pe.nextElement();
209: if (key.startsWith(CONFIG_XSD_PREFIX)) {
210: String spec = ConfigurationManager.getProperty(key);
211: String val[] = spec.trim().split("\\s+");
212: if (val.length == 2) {
213: File xsd = new File(xsdPath1, val[1]);
214: if (!xsd.exists())
215: xsd = new File(xsdPath2, val[1]);
216: if (!xsd.exists())
217: log
218: .warn("Schema file not found for config entry=\""
219: + spec + "\"");
220: else {
221: try {
222: String u = xsd.toURL().toString();
223: if (result.length() > 0)
224: result.append(" ");
225: result.append(val[0]).append(" ").append(u);
226: } catch (java.net.MalformedURLException e) {
227: log.warn("Skipping badly formed XSD URL: "
228: + e.toString());
229: }
230: }
231: } else
232: log
233: .warn("Schema config entry has wrong format, entry=\""
234: + spec + "\"");
235: }
236: }
237: localSchemas = result.toString();
238: log.debug("Got local schemas = \"" + localSchemas + "\"");
239: }
240:
241: /**
242: * Default constructor, only called internally.
243: * @param builder XML parser (for parsing mdRef'd files and binData)
244: * @param mets parsed METS document
245: */
246: private METSManifest(SAXBuilder builder, Element mets) {
247: super ();
248: this .mets = mets;
249: parser = builder;
250: }
251:
252: /**
253: * Create a new manifest object from a serialized METS XML document.
254: * Parse document read from the input stream, optionally validating.
255: * @param is input stream containing serialized XML
256: * @param validate if true, enable XML validation using schemas
257: * in document. Also validates any sub-documents.
258: * @throws MetadataValidationException if there is any error parsing
259: * or validating the METS.
260: * @return new METSManifest object.
261: */
262: public static METSManifest create(InputStream is, boolean validate)
263: throws IOException, MetadataValidationException {
264: SAXBuilder builder = new SAXBuilder(validate);
265:
266: // Set validation feature
267: if (validate)
268: builder.setFeature(
269: "http://apache.org/xml/features/validation/schema",
270: true);
271:
272: // Tell the parser where local copies of schemas are, to speed up
273: // validation. Local XSDs are identified in the configuration file.
274: if (localSchemas.length() > 0)
275: builder
276: .setProperty(
277: "http://apache.org/xml/properties/schema/external-schemaLocation",
278: localSchemas);
279:
280: // Parse the METS file
281: Document metsDocument;
282:
283: try {
284: metsDocument = builder.build(is);
285:
286: // XXX for temporary debugging
287: /*
288: XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
289: log.debug("Got METS DOCUMENT:");
290: log.debug(outputPretty.outputString(metsDocument));
291: */
292: } catch (JDOMException je) {
293: throw new MetadataValidationException(
294: "Error validating METS in " + is.toString(), je);
295: }
296:
297: return new METSManifest(builder, metsDocument.getRootElement());
298: }
299:
300: /**
301: * Gets name of the profile to which this METS document conforms.
302: * @return value the PROFILE attribute of mets element, or null if none.
303: */
304: public String getProfile() {
305: return mets.getAttributeValue("PROFILE");
306: }
307:
308: /**
309: * Gets all <code>file</code> elements which make up
310: * the item's content.
311: * @return a List of <code>Element</code>s.
312: */
313: public List getContentFiles() throws MetadataValidationException {
314: if (contentFiles != null)
315: return contentFiles;
316:
317: Element fileSec = mets.getChild("fileSec", metsNS);
318: if (fileSec == null)
319: throw new MetadataValidationException(
320: "Invalid METS Manifest: DSpace requires a fileSec element, but it is missing.");
321:
322: contentFiles = new ArrayList();
323: Iterator fgi = fileSec.getChildren("fileGrp", metsNS)
324: .iterator();
325: while (fgi.hasNext()) {
326: Element fg = (Element) fgi.next();
327: Iterator fi = fg.getChildren("file", metsNS).iterator();
328: while (fi.hasNext()) {
329: Element f = (Element) fi.next();
330: contentFiles.add(f);
331: }
332: }
333: return contentFiles;
334: }
335:
336: /**
337: * Gets list of all <code>mdRef</code> elements in the METS
338: * document. Used by ingester to e.g. check that all
339: * required files are present.
340: * @return a List of <code>Element</code>s.
341: */
342: public List getMdFiles() throws MetadataValidationException {
343: if (mdFiles == null) {
344: try {
345: // Use a special namespace with known prefix
346: // so we get the right prefix.
347: XPath xpath = XPath
348: .newInstance("descendant::mets:mdRef");
349: xpath.addNamespace(metsNS);
350: mdFiles = xpath.selectNodes(mets);
351: } catch (JDOMException je) {
352: throw new MetadataValidationException(
353: "Failed while searching for mdRef elements in manifest: ",
354: je);
355: }
356: }
357: return mdFiles;
358: }
359:
360: /**
361: * Get the "original" file element for a derived file.
362: * Finds the original from which this was derived by matching the GROUPID
363: * attribute that binds it to its original. For instance, the file for
364: * a thumbnail image would have the same GROUPID as its full-size version.
365: * <p>
366: * NOTE: This pattern of relating derived files through the GROUPID
367: * attribute is peculiar to the DSpace METS SIP profile, and may not be
368: * generally useful with other sorts of METS documents.
369: * @param file METS file element of derived file
370: * @return file Element of original or null if none found.
371: */
372: public Element getOriginalFile(Element file) {
373: String groupID = file.getAttributeValue("GROUPID");
374: if (groupID == null || groupID.equals(""))
375: return null;
376:
377: try {
378: XPath xpath = XPath
379: .newInstance("mets:fileSec/mets:fileGrp[@USE=\"CONTENT\"]/mets:file[@GROUPID=\""
380: + groupID + "\"]");
381: xpath.addNamespace(metsNS);
382: List oFiles = xpath.selectNodes(mets);
383: if (oFiles.size() > 0) {
384: log.debug("Got ORIGINAL file for derived="
385: + file.toString());
386: return (Element) oFiles.get(0);
387: } else
388: return null;
389: } catch (JDOMException je) {
390: log
391: .warn("Got exception on XPATH looking for Original file, "
392: + je.toString());
393: return null;
394: }
395: }
396:
397: // translate bundle name from METS to DSpace; METS may be "CONTENT"
398: // or "ORIGINAL" for the DSPace "ORIGINAL", rest are left alone.
399: private static String normalizeBundleName(String in) {
400: if (in.equals("CONTENT"))
401: return Constants.CONTENT_BUNDLE_NAME;
402: else if (in.equals("MANIFESTMD"))
403: return Constants.METADATA_BUNDLE_NAME;
404: return in;
405: }
406:
407: /**
408: * Get the DSpace bundle name corresponding to the <code>USE</code> attribute of the file group enclosing this <code>file</code> element.
409: * @return DSpace bundle name
410: * @throws MetadataValidationException when there is no USE attribute on the enclosing fileGrp.
411: */
412: public static String getBundleName(Element file)
413: throws MetadataValidationException {
414: Element fg = file.getParentElement();
415: String fgUse = fg.getAttributeValue("USE");
416: if (fgUse == null)
417: throw new MetadataValidationException(
418: "Invalid METS Manifest: every fileGrp element must have a USE attribute.");
419: return normalizeBundleName(fgUse);
420: }
421:
422: /**
423: * Get the "local" file name of this <code>file</code> or <code>mdRef</code> element.
424: * By "local" we mean the reference to the actual resource containing
425: * the data for this file, e.g. a relative path within a Zip or tar archive
426: * if the METS is serving as a manifest for that sort of package.
427: * @return "local" file name (i.e. relative to package or content
428: * directory) corresponding to this <code>file</code> or <code>mdRef</code> element.
429: * @throws MetadataValidationException when there is not enough information to find a resource identifier.
430: */
431: public static String getFileName(Element file)
432: throws MetadataValidationException {
433: Element ref;
434: if (file.getName().equals("file")) {
435: ref = file.getChild("FLocat", metsNS);
436: if (ref == null) {
437: // check for forbidden FContent child first:
438: if (file.getChild("FContent", metsNS) == null)
439: throw new MetadataValidationException(
440: "Invalid METS Manifest: Every file element must have FLocat child.");
441: else
442: throw new MetadataValidationException(
443: "Invalid METS Manifest: file element has forbidden FContent child, only FLocat is allowed.");
444: }
445: } else if (file.getName().equals("mdRef"))
446: ref = file;
447: else
448: throw new MetadataValidationException(
449: "getFileName() called with recognized element type: "
450: + file.toString());
451: String loctype = ref.getAttributeValue("LOCTYPE");
452: if (loctype != null && loctype.equals("URL")) {
453: String result = ref.getAttributeValue("href", xlinkNS);
454: if (result == null)
455: throw new MetadataValidationException(
456: "Invalid METS Manifest: FLocat/mdRef is missing the required xlink:href attribute.");
457: return result;
458: }
459: throw new MetadataValidationException(
460: "Invalid METS Manifest: FLocat/mdRef does not have LOCTYPE=\"URL\" attribute.");
461: }
462:
463: /**
464: * Returns file element corresponding to primary bitstream.
465: * There is <i>ONLY</i> a primary bitstream if the first <code>div</code> under
466: * first </code>structMap</code> has an </code>fptr</code>.
467: *
468: * @return file element of Item's primary bitstream, or null if there is none.
469: */
470: public Element getPrimaryBitstream()
471: throws MetadataValidationException {
472: Element firstDiv = getFirstDiv();
473: Element fptr = firstDiv.getChild("fptr", metsNS);
474: if (fptr == null)
475: return null;
476: String id = fptr.getAttributeValue("FILEID");
477: if (id == null)
478: throw new MetadataValidationException(
479: "fptr for Primary Bitstream is missing the required FILEID attribute.");
480: Element result = getElementByXPath(
481: "descendant::mets:file[@ID=\"" + id + "\"]", false);
482: if (result == null)
483: throw new MetadataValidationException(
484: "Cannot find file element for Primary Bitstream: looking for ID="
485: + id);
486: return result;
487: }
488:
489: /** Get the metadata type from within a *mdSec element.
490: * @return metadata type name.
491: */
492: public String getMdType(Element mdSec)
493: throws MetadataValidationException {
494: Element md = mdSec.getChild("mdRef", metsNS);
495: if (md == null)
496: md = mdSec.getChild("mdWrap", metsNS);
497: if (md == null)
498: throw new MetadataValidationException(
499: "Invalid METS Manifest: ?mdSec element has neither mdRef nor mdWrap child.");
500: String result = md.getAttributeValue("MDTYPE");
501: if (result != null && result.equals("OTHER"))
502: result = md.getAttributeValue("OTHERMDTYPE");
503: if (result == null)
504: throw new MetadataValidationException(
505: "Invalid METS Manifest: "
506: + md.getName()
507: + " has no MDTYPE or OTHERMDTYPE attribute.");
508: return result;
509: }
510:
511: /**
512: * Returns MIME type of metadata content, if available.
513: * @return MIMEtype word, or null if none is available.
514: */
515: public String getMdContentMimeType(Element mdSec)
516: throws MetadataValidationException {
517: Element mdWrap = mdSec.getChild("mdWrap", metsNS);
518: if (mdWrap != null) {
519: String mimeType = mdWrap.getAttributeValue("MIMETYPE");
520: if (mimeType == null
521: && mdWrap.getChild("xmlData", metsNS) != null)
522: mimeType = "text/xml";
523: return mimeType;
524: }
525: Element mdRef = mdSec.getChild("mdRef", metsNS);
526: if (mdRef != null)
527: return mdRef.getAttributeValue("MIMETYPE");
528: return null;
529: }
530:
531: /**
532: * Return contents of *md element as List of XML Element objects.
533: * Gets content, dereferecing mdRef if necessary, or decoding and parsing
534: * a binData that contains XML.
535: * @return contents of metadata section, or empty list if no XML content is available.
536: * @throws MetadataValidationException if METS is invalid, or there is an error parsing the XML.
537: */
538: public List getMdContentAsXml(Element mdSec, Mdref callback)
539: throws MetadataValidationException, IOException,
540: SQLException, AuthorizeException {
541: try {
542: Element mdRef = null;
543: Element mdWrap = mdSec.getChild("mdWrap", metsNS);
544: if (mdWrap != null) {
545: Element xmlData = mdWrap.getChild("xmlData", metsNS);
546: if (xmlData == null) {
547: Element bin = mdWrap.getChild("binData", metsNS);
548: if (bin == null)
549: throw new MetadataValidationException(
550: "Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");
551:
552: // if binData is actually XML, return it; otherwise ignore.
553: else {
554: String mimeType = mdWrap
555: .getAttributeValue("MIMETYPE");
556: if (mimeType != null
557: && mimeType
558: .equalsIgnoreCase("text/xml")) {
559: byte value[] = Base64.decodeBase64(bin
560: .getText().getBytes());
561: Document mdd = parser
562: .build(new ByteArrayInputStream(
563: value));
564: List result = new ArrayList(1);
565: result.add(mdd.getRootElement());
566: return result;
567: } else {
568: log
569: .warn("Ignoring binData section because MIMETYPE is not XML, but: "
570: + mimeType);
571: return new ArrayList(0);
572: }
573: }
574: } else {
575: return xmlData.getChildren();
576: }
577: } else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null) {
578: String mimeType = mdRef.getAttributeValue("MIMETYPE");
579: if (mimeType != null
580: && mimeType.equalsIgnoreCase("text/xml")) {
581: Document mdd = parser.build(callback
582: .getInputStream(mdRef));
583: List result = new ArrayList(1);
584: result.add(mdd.getRootElement());
585: return result;
586: } else {
587: log
588: .warn("Ignoring mdRef section because MIMETYPE is not XML, but: "
589: + mimeType);
590: return new ArrayList(0);
591: }
592: } else
593: throw new MetadataValidationException(
594: "Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
595: } catch (JDOMException je) {
596: throw new MetadataValidationException(
597: "Error parsing or validating metadata section in mdRef or binData within "
598: + mdSec.toString(), je);
599: }
600:
601: }
602:
603: /**
604: * Return contents of *md element as stream.
605: * Gets content, dereferecing mdRef if necessary, or decoding
606: * a binData element if necessary.
607: * @return Stream containing contents of metadata section. Never returns null.
608: * @throws MetadataValidationException if METS format does not contain any metadata.
609: */
610: public InputStream getMdContentAsStream(Element mdSec,
611: Mdref callback) throws MetadataValidationException,
612: IOException, SQLException, AuthorizeException {
613: Element mdRef = null;
614: Element mdWrap = mdSec.getChild("mdWrap", metsNS);
615: if (mdWrap != null) {
616: Element xmlData = mdWrap.getChild("xmlData", metsNS);
617: if (xmlData == null) {
618: Element bin = mdWrap.getChild("binData", metsNS);
619: if (bin == null)
620: throw new MetadataValidationException(
621: "Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");
622:
623: else {
624: byte value[] = Base64.decodeBase64(bin.getText()
625: .getBytes());
626: return new ByteArrayInputStream(value);
627: }
628: } else {
629: XMLOutputter outputPretty = new XMLOutputter(Format
630: .getPrettyFormat());
631: return new ByteArrayInputStream(outputPretty
632: .outputString(xmlData.getChildren()).getBytes());
633: }
634: } else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null) {
635: return callback.getInputStream(mdRef);
636: } else
637: throw new MetadataValidationException(
638: "Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
639: }
640:
641: // special call to crosswalk the guts of a metadata *Sec (dmdSec, amdSec)
642: // because mdRef and mdWrap have to be handled differently.
643: // It's a lot like getMdContentAsXml but cannot use that because xwalk
644: // should be called with root element OR list depending on what was given.
645: private void crosswalkMdContent(Element mdSec, Mdref callback,
646: IngestionCrosswalk xwalk, Context context, DSpaceObject dso)
647: throws CrosswalkException, IOException, SQLException,
648: AuthorizeException {
649: List xml = getMdContentAsXml(mdSec, callback);
650:
651: // if we get inappropriate metadata, e.g. PREMIS for Item, let it go.
652: try {
653: xwalk.ingest(context, dso, xml);
654: } catch (CrosswalkObjectNotSupported e) {
655: log
656: .warn("Skipping metadata for inappropriate type of object: Object="
657: + dso.toString()
658: + ", error="
659: + e.toString());
660: }
661: }
662:
663: // return first <div> of first <structMap>;
664: // in DSpace profile, this is where item-wide dmd and other metadata
665: // lives as IDrefs.
666: private Element getFirstDiv() throws MetadataValidationException {
667: Element sm = mets.getChild("structMap", metsNS);
668: if (sm == null)
669: throw new MetadataValidationException(
670: "METS document is missing the required structMap element.");
671:
672: Element result = sm.getChild("div", metsNS);
673: if (result == null)
674: throw new MetadataValidationException(
675: "METS document is missing the required first div element in first structMap.");
676:
677: log.debug("Got firstDiv result=" + result.toString());
678: return (Element) result;
679: }
680:
681: // return a single Element node found by one-off path.
682: // use only when path varies each time you call it.
683: private Element getElementByXPath(String path, boolean nullOk)
684: throws MetadataValidationException {
685: try {
686: XPath xpath = XPath.newInstance(path);
687: xpath.addNamespace(metsNS);
688: xpath.addNamespace(xlinkNS);
689: Object result = xpath.selectSingleNode(mets);
690: if (result == null && nullOk)
691: return null;
692: else if (result instanceof Element)
693: return (Element) result;
694: else
695: throw new MetadataValidationException(
696: "METSManifest: Failed to resolve XPath, path=\""
697: + path + "\"");
698: } catch (JDOMException je) {
699: throw new MetadataValidationException(
700: "METSManifest: Failed to resolve XPath, path=\""
701: + path + "\"", je);
702: }
703: }
704:
705: // Find crosswalk for the indicated metadata type (e.g. "DC", "MODS")
706: // The crosswalk plugin name MAY be indirected in config file,
707: // through an entry like
708: // mets.submission.crosswalk.{mdType} = {pluginName}
709: // e.g.
710: // mets.submission.crosswalk.DC = mysite-QDC
711: private IngestionCrosswalk getCrosswalk(String type) {
712: String xwalkName = ConfigurationManager
713: .getProperty(CONFIG_METADATA_PREFIX + type);
714: if (xwalkName == null)
715: xwalkName = type;
716: return (IngestionCrosswalk) PluginManager.getNamedPlugin(
717: IngestionCrosswalk.class, xwalkName);
718: }
719:
720: /**
721: * Gets all dmdSec elements containing metadata for the DSpace Item.
722: *
723: * @return array of Elements, each a dmdSec. May be empty but NOT null.
724: * @throws MetadataValidationException if the METS is missing a reference to item-wide
725: * DMDs in the correct place.
726: */
727: public Element[] getItemDmds() throws MetadataValidationException {
728: // div@DMDID is actually IDREFS, a space-separated list of IDs:
729: Element firstDiv = getFirstDiv();
730: String dmds = firstDiv.getAttributeValue("DMDID");
731: if (dmds == null)
732: throw new MetadataValidationException(
733: "Invalid METS: Missing reference to Item descriptive metadata, first div on first structmap must have a DMDID attribute.");
734: String dmdID[] = dmds.split("\\s+");
735: Element result[] = new Element[dmdID.length];
736:
737: for (int i = 0; i < dmdID.length; ++i)
738: result[i] = getElementByXPath("mets:dmdSec[@ID=\""
739: + dmdID[i] + "\"]", false);
740: return result;
741: }
742:
743: /**
744: * Return rights metadata section(s) relevant to item as a whole.
745: * @return array of rightsMd elements, possibly empty but never null.
746: * @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing.
747: */
748: public Element[] getItemRightsMD()
749: throws MetadataValidationException {
750: // div@ADMID is actually IDREFS, a space-separated list of IDs:
751: Element firstDiv = getFirstDiv();
752: String amds = firstDiv.getAttributeValue("ADMID");
753: if (amds == null) {
754: log.debug("getItemRightsMD: No ADMID references found.");
755: return new Element[0];
756: }
757: String amdID[] = amds.split("\\s+");
758: List resultList = new ArrayList();
759: for (int i = 0; i < amdID.length; ++i) {
760: List rmds = getElementByXPath(
761: "mets:amdSec[@ID=\"" + amdID[i] + "\"]", false)
762: .getChildren("rightsMD", metsNS);
763: if (rmds.size() > 0)
764: resultList.addAll(rmds);
765: }
766: return (Element[]) resultList.toArray(new Element[resultList
767: .size()]);
768: }
769:
770: /**
771: * Invokes appropriate crosswalks on Item-wide descriptive metadata.
772: */
773: public void crosswalkItem(Context context, Item item, Element dmd,
774: Mdref callback) throws MetadataValidationException,
775: CrosswalkException, IOException, SQLException,
776: AuthorizeException {
777: String type = getMdType(dmd);
778: IngestionCrosswalk xwalk = getCrosswalk(type);
779:
780: if (xwalk == null)
781: throw new MetadataValidationException(
782: "Cannot process METS Manifest: "
783: + "No crosswalk found for MDTYPE=" + type);
784: crosswalkMdContent(dmd, callback, xwalk, context, item);
785: }
786:
787: /**
788: * Crosswalk the metadata associated with a particular <code>file</code>
789: * element into the bitstream it corresponds to.
790: * @param context a dspace context.
791: * @param bs bitstream target of the crosswalk
792: * @param fileId value of ID attribute in the file element responsible
793: * for the contents of that bitstream.
794: */
795: public void crosswalkBitstream(Context context,
796: Bitstream bitstream, String fileId, Mdref callback)
797: throws MetadataValidationException, CrosswalkException,
798: IOException, SQLException, AuthorizeException {
799: Element file = getElementByXPath("descendant::mets:file[@ID=\""
800: + fileId + "\"]", false);
801: if (file == null)
802: throw new MetadataValidationException(
803: "Failed in Bitstream crosswalk, Could not find file element with ID="
804: + fileId);
805:
806: // In DSpace METS SIP spec, admin metadata is only "highly
807: // recommended", not "required", so it is OK if there is no ADMID.
808: String amds = file.getAttributeValue("ADMID");
809: if (amds == null) {
810: log.warn("Got no bitstream ADMID, file@ID=" + fileId);
811: return;
812: }
813: String amdID[] = amds.split("\\s+");
814: for (int i = 0; i < amdID.length; ++i) {
815: List techMDs = getElementByXPath(
816: "mets:amdSec[@ID=\"" + amdID[i] + "\"]", false)
817: .getChildren("techMD", metsNS);
818: Iterator ti = techMDs.iterator();
819: while (ti.hasNext()) {
820: Element techMD = (Element) ti.next();
821: if (techMD != null) {
822: String type = getMdType(techMD);
823: IngestionCrosswalk xwalk = getCrosswalk(type);
824: log.debug("Got bitstream techMD of type=" + type
825: + ", for file ID=" + fileId);
826:
827: if (xwalk == null)
828: throw new MetadataValidationException(
829: "Cannot process METS Manifest: "
830: + "No crosswalk found for techMD MDTYPE="
831: + type);
832: crosswalkMdContent(techMD, callback, xwalk,
833: context, bitstream);
834: }
835: }
836: }
837: }
838:
839: /**
840: * Find Handle (if any) identifier labelling this manifest.
841: * @return handle (never null)
842: * @throws MetadataValidationException if no handle available.
843: */
844: public String getHandle() throws MetadataValidationException {
845: // TODO: XXX Make configurable? Handle optionally passed in?
846: // FIXME: Not sure if OBJID is really the right place
847:
848: String handle = mets.getAttributeValue("OBJID");
849:
850: if (handle != null && handle.startsWith("hdl:")) {
851: return handle.substring(4);
852: } else {
853: throw new MetadataValidationException(
854: "Item has no valid Handle (OBJID)");
855: }
856: }
857: }
|