Source Code Cross Referenced for METSManifest.java in  » Content-Management-System » dspace » org » dspace » content » packager » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Content Management System » dspace » org.dspace.content.packager 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * METSManifest.java
003:         *
004:         * Version: $Revision: 1446 $
005:         *
006:         * Date: $Date: 2006-03-16 18:04:39 -0600 (Thu, 16 Mar 2006) $
007:         *
008:         * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
009:         * Institute of Technology.  All rights reserved.
010:         *
011:         * Redistribution and use in source and binary forms, with or without
012:         * modification, are permitted provided that the following conditions are
013:         * met:
014:         *
015:         * - Redistributions of source code must retain the above copyright
016:         * notice, this list of conditions and the following disclaimer.
017:         *
018:         * - Redistributions in binary form must reproduce the above copyright
019:         * notice, this list of conditions and the following disclaimer in the
020:         * documentation and/or other materials provided with the distribution.
021:         *
022:         * - Neither the name of the Hewlett-Packard Company nor the name of the
023:         * Massachusetts Institute of Technology nor the names of their
024:         * contributors may be used to endorse or promote products derived from
025:         * this software without specific prior written permission.
026:         *
027:         * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
028:         * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
029:         * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
030:         * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
031:         * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
032:         * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
033:         * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
034:         * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
035:         * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
036:         * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
037:         * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
038:         * DAMAGE.
039:         */
040:
041:        package org.dspace.content.packager;
042:
043:        import java.io.ByteArrayInputStream;
044:        import java.io.File;
045:        import java.io.IOException;
046:        import java.io.InputStream;
047:        import java.sql.SQLException;
048:        import java.util.ArrayList;
049:        import java.util.Enumeration;
050:        import java.util.Iterator;
051:        import java.util.List;
052:
053:        import org.apache.commons.codec.binary.Base64;
054:        import org.apache.log4j.Logger;
055:        import org.dspace.authorize.AuthorizeException;
056:        import org.dspace.content.Bitstream;
057:        import org.dspace.content.DSpaceObject;
058:        import org.dspace.content.Item;
059:        import org.dspace.content.crosswalk.CrosswalkException;
060:        import org.dspace.content.crosswalk.CrosswalkObjectNotSupported;
061:        import org.dspace.content.crosswalk.MetadataValidationException;
062:        import org.dspace.content.crosswalk.IngestionCrosswalk;
063:        import org.dspace.core.ConfigurationManager;
064:        import org.dspace.core.Constants;
065:        import org.dspace.core.Context;
066:        import org.dspace.core.PluginManager;
067:        import org.jdom.Document;
068:        import org.jdom.Element;
069:        import org.jdom.JDOMException;
070:        import org.jdom.Namespace;
071:        import org.jdom.input.SAXBuilder;
072:        import org.jdom.output.Format;
073:        import org.jdom.output.XMLOutputter;
074:        import org.jdom.xpath.XPath;
075:
076:        /**
077:         * <P>
078:         * Manage the METS manifest document for METS importer classes,
079:         * such as the package importer <code>org.dspace.content.packager.MetsSubmission</code>
080:         * and the federated importer <code>org.dspace.app.mets.FederatedMETSImport</code>
081:         * </P>
082:         * <P>
083:         * It can parse the METS document, build an internal model, and give the importers
084:         * access to that model.  It also crosswalks
085:         * all of the descriptive and administrative metadata in the METS
086:         * manifest into the target DSpace Item, under control of the importer.
087:         * </P>
088:         *
089:         * <P>
090:         * It reads the following DSpace Configuration entries:
091:         * </P>
092:         * <UL>
093:         * <LI>Local XML schema (XSD) declarations, in the general format:
094:         * <br><code>mets.xsd.<em>identifier</em> = <em>namespace</em> <em>xsd-URL</em></code>
095:         * <br> eg. <code>mets.xsd.dc =  http://purl.org/dc/elements/1.1/ dc.xsd</code>
096:         * <br>Add a separate config entry for each schema.
097:         * </LI>
098:         * <p><LI>Crosswalk plugin mappings:
099:         * These tell it the name of the crosswalk plugin to invoke for metadata sections
100:         * with a particular value of <code>MDTYPE</code> (or <code>OTHERMDTYPE</code>)
101:         * By default, the crosswalk mechanism will look for a plugin with the
102:         * same name as the metadata type (e.g.  <code>"MODS"</code>,
103:         * <code>"DC"</code>).  This example line invokes the <code>QDC</code>
104:         * plugin when <code>MDTYPE="DC"</code>
105:         * <br><code>mets.submission.crosswalk.DC = QDC </code>
106:         * <br> general format is:
107:         * <br><code>mets.submission.crosswalk.<em>mdType</em> = <em>pluginName</em> </code>
108:         * </LI>
109:         * </UL>
110:         *
111:         *
112:         * @author Robert Tansley
113:         * @author WeiHua Huang
114:         * @author Rita Lee
115:         * @author Larry Stone
116:         * @see org.dspace.content.packager.MetsSubmission
117:         * @see org.dspace.app.mets.FederatedMETSImport
118:         */
119:        public class METSManifest {
120:            /**
121:             * Callback interface to retrieve data streams in mdRef elements.
122:             * "Package" or file reader returns an input stream for the
123:             * given relative path, e.g. to dereference <code>mdRef</code> elements.
124:             */
125:            public interface Mdref {
126:                /**
127:                 * Make the contents of an external resource mentioned in
128:                 * an <code>mdRef</code> element available as an <code>InputStream</code>.
129:                 * The implementation must use the information in the
130:                 * <code>mdRef</code> element, and the state in the object that
131:                 * implements this interface, to find the actual metadata content.
132:                 * <p>
133:                 * For example, an implementation that ingests a directory of
134:                 * files on the local filesystem would get a relative pathname
135:                 * out of the <code>mdRef</code> and open that file.
136:                 *
137:                 * @param mdRef JDOM element of mdRef in the METS manifest.
138:                 * @return stream containing the metadata mentioned in mdRef.
139:                 * @throw MetadataValidationException if the mdRef is unacceptable or missing required information.
140:                 * @throw IOException if it is returned by services called by this method.
141:                 * @throw SQLException if it is returned by services called by this method.
142:                 * @throw AuthorizeException if it is returned by services called by this method.
143:                 */
144:                public InputStream getInputStream(Element mdRef)
145:                        throws MetadataValidationException, IOException,
146:                        SQLException, AuthorizeException;
147:            }
148:
149:            /** log4j category */
150:            private static Logger log = Logger.getLogger(METSManifest.class);
151:
152:            /** Canonical filename of METS manifest within a package or as a bitstream. */
153:            public final static String MANIFEST_FILE = "mets.xml";
154:
155:            /** Prefix of DSpace configuration lines that map METS metadata type to
156:             * crosswalk plugin names.
157:             */
158:            private final static String CONFIG_METADATA_PREFIX = "mets.submission.crosswalk.";
159:
160:            /** prefix of config lines identifying local XML Schema (XSD) files */
161:            private final static String CONFIG_XSD_PREFIX = "mets.xsd.";
162:
163:            /** Dublin core element namespace */
164:            private static Namespace dcNS = Namespace
165:                    .getNamespace("http://purl.org/dc/elements/1.1/");
166:
167:            /** Dublin core term namespace (for qualified DC) */
168:            private static Namespace dcTermNS = Namespace
169:                    .getNamespace("http://purl.org/dc/terms/");
170:
171:            /** METS namespace -- includes "mets" prefix for use in XPaths */
172:            public static Namespace metsNS = Namespace.getNamespace("mets",
173:                    "http://www.loc.gov/METS/");
174:
175:            /** XLink namespace -- includes "xlink" prefix prefix for use in XPaths */
176:            private static Namespace xlinkNS = Namespace.getNamespace("xlink",
177:                    "http://www.w3.org/1999/xlink");
178:
179:            /** root element of the current METS manifest. */
180:            private Element mets = null;
181:
182:            /** all mdRef elements in the manifest */
183:            private List mdFiles = null;
184:
185:            /** <file> elements in "original" filegroup (bundle) */
186:            private List contentFiles = null;
187:
188:            /** builder to use for mdRef streams, inherited from create() */
189:            private SAXBuilder parser = null;
190:
191:            // Create list of local schemas at load time, since it depends only
192:            // on the DSpace configuration.
193:            private static String localSchemas;
194:            static {
195:                String dspace_dir = ConfigurationManager
196:                        .getProperty("dspace.dir");
197:                File xsdPath1 = new File(dspace_dir + "/config/schemas/");
198:                File xsdPath2 = new File(dspace_dir + "/config/");
199:
200:                Enumeration pe = ConfigurationManager.propertyNames();
201:                StringBuffer result = new StringBuffer();
202:                while (pe.hasMoreElements()) {
203:                    // config lines have the format:
204:                    //  mets.xsd.{identifier} = {namespace} {xsd-URL}
205:                    // e.g.
206:                    //  mets.xsd.dc =  http://purl.org/dc/elements/1.1/ dc.xsd
207:                    // (filename is relative to {dspace_dir}/config/schemas/)
208:                    String key = (String) pe.nextElement();
209:                    if (key.startsWith(CONFIG_XSD_PREFIX)) {
210:                        String spec = ConfigurationManager.getProperty(key);
211:                        String val[] = spec.trim().split("\\s+");
212:                        if (val.length == 2) {
213:                            File xsd = new File(xsdPath1, val[1]);
214:                            if (!xsd.exists())
215:                                xsd = new File(xsdPath2, val[1]);
216:                            if (!xsd.exists())
217:                                log
218:                                        .warn("Schema file not found for config entry=\""
219:                                                + spec + "\"");
220:                            else {
221:                                try {
222:                                    String u = xsd.toURL().toString();
223:                                    if (result.length() > 0)
224:                                        result.append(" ");
225:                                    result.append(val[0]).append(" ").append(u);
226:                                } catch (java.net.MalformedURLException e) {
227:                                    log.warn("Skipping badly formed XSD URL: "
228:                                            + e.toString());
229:                                }
230:                            }
231:                        } else
232:                            log
233:                                    .warn("Schema config entry has wrong format, entry=\""
234:                                            + spec + "\"");
235:                    }
236:                }
237:                localSchemas = result.toString();
238:                log.debug("Got local schemas = \"" + localSchemas + "\"");
239:            }
240:
241:            /**
242:             * Default constructor, only called internally.
243:             * @param builder XML parser (for parsing mdRef'd files and binData)
244:             * @param mets parsed METS document
245:             */
246:            private METSManifest(SAXBuilder builder, Element mets) {
247:                super ();
248:                this .mets = mets;
249:                parser = builder;
250:            }
251:
252:            /**
253:             * Create a new manifest object from a serialized METS XML document.
254:             * Parse document read from the input stream, optionally validating.
255:             * @param is input stream containing serialized XML
256:             * @param validate if true, enable XML validation using schemas
257:             *   in document.  Also validates any sub-documents.
258:             * @throws MetadataValidationException if there is any error parsing
259:             *          or validating the METS.
260:             * @return new METSManifest object.
261:             */
262:            public static METSManifest create(InputStream is, boolean validate)
263:                    throws IOException, MetadataValidationException {
264:                SAXBuilder builder = new SAXBuilder(validate);
265:
266:                // Set validation feature
267:                if (validate)
268:                    builder.setFeature(
269:                            "http://apache.org/xml/features/validation/schema",
270:                            true);
271:
272:                // Tell the parser where local copies of schemas are, to speed up
273:                // validation.  Local XSDs are identified in the configuration file.
274:                if (localSchemas.length() > 0)
275:                    builder
276:                            .setProperty(
277:                                    "http://apache.org/xml/properties/schema/external-schemaLocation",
278:                                    localSchemas);
279:
280:                // Parse the METS file
281:                Document metsDocument;
282:
283:                try {
284:                    metsDocument = builder.build(is);
285:
286:                    // XXX for temporary debugging
287:                    /*
288:                    XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
289:                    log.debug("Got METS DOCUMENT:");
290:                    log.debug(outputPretty.outputString(metsDocument));
291:                     */
292:                } catch (JDOMException je) {
293:                    throw new MetadataValidationException(
294:                            "Error validating METS in " + is.toString(), je);
295:                }
296:
297:                return new METSManifest(builder, metsDocument.getRootElement());
298:            }
299:
300:            /**
301:             * Gets name of the profile to which this METS document conforms.
302:             * @return value the PROFILE attribute of mets element, or null if none.
303:             */
304:            public String getProfile() {
305:                return mets.getAttributeValue("PROFILE");
306:            }
307:
308:            /**
309:             * Gets all <code>file</code> elements which make up
310:             *   the item's content.
311:             * @return a List of <code>Element</code>s.
312:             */
313:            public List getContentFiles() throws MetadataValidationException {
314:                if (contentFiles != null)
315:                    return contentFiles;
316:
317:                Element fileSec = mets.getChild("fileSec", metsNS);
318:                if (fileSec == null)
319:                    throw new MetadataValidationException(
320:                            "Invalid METS Manifest: DSpace requires a fileSec element, but it is missing.");
321:
322:                contentFiles = new ArrayList();
323:                Iterator fgi = fileSec.getChildren("fileGrp", metsNS)
324:                        .iterator();
325:                while (fgi.hasNext()) {
326:                    Element fg = (Element) fgi.next();
327:                    Iterator fi = fg.getChildren("file", metsNS).iterator();
328:                    while (fi.hasNext()) {
329:                        Element f = (Element) fi.next();
330:                        contentFiles.add(f);
331:                    }
332:                }
333:                return contentFiles;
334:            }
335:
336:            /**
337:             * Gets list of all <code>mdRef</code> elements in the METS
338:             *   document.  Used by ingester to e.g. check that all
339:             *   required files are present.
340:             * @return a List of <code>Element</code>s.
341:             */
342:            public List getMdFiles() throws MetadataValidationException {
343:                if (mdFiles == null) {
344:                    try {
345:                        // Use a special namespace with known prefix
346:                        // so we get the right prefix.
347:                        XPath xpath = XPath
348:                                .newInstance("descendant::mets:mdRef");
349:                        xpath.addNamespace(metsNS);
350:                        mdFiles = xpath.selectNodes(mets);
351:                    } catch (JDOMException je) {
352:                        throw new MetadataValidationException(
353:                                "Failed while searching for mdRef elements in manifest: ",
354:                                je);
355:                    }
356:                }
357:                return mdFiles;
358:            }
359:
360:            /**
361:             * Get the "original" file element for a derived file.
362:             * Finds the original from which this was derived by matching the GROUPID
363:             * attribute that binds it to its original.  For instance, the file for
364:             * a thumbnail image would have the same GROUPID as its full-size version.
365:             * <p>
366:             * NOTE: This pattern of relating derived files through the GROUPID
367:             * attribute is peculiar to the DSpace METS SIP profile, and may not be
368:             * generally useful with other sorts of METS documents.
369:             * @param file METS file element of derived file
370:             * @return file Element of original or null if none found.
371:             */
372:            public Element getOriginalFile(Element file) {
373:                String groupID = file.getAttributeValue("GROUPID");
374:                if (groupID == null || groupID.equals(""))
375:                    return null;
376:
377:                try {
378:                    XPath xpath = XPath
379:                            .newInstance("mets:fileSec/mets:fileGrp[@USE=\"CONTENT\"]/mets:file[@GROUPID=\""
380:                                    + groupID + "\"]");
381:                    xpath.addNamespace(metsNS);
382:                    List oFiles = xpath.selectNodes(mets);
383:                    if (oFiles.size() > 0) {
384:                        log.debug("Got ORIGINAL file for derived="
385:                                + file.toString());
386:                        return (Element) oFiles.get(0);
387:                    } else
388:                        return null;
389:                } catch (JDOMException je) {
390:                    log
391:                            .warn("Got exception on XPATH looking for Original file, "
392:                                    + je.toString());
393:                    return null;
394:                }
395:            }
396:
397:            // translate bundle name from METS to DSpace; METS may be "CONTENT"
398:            // or "ORIGINAL" for the DSPace "ORIGINAL", rest are left alone.
399:            private static String normalizeBundleName(String in) {
400:                if (in.equals("CONTENT"))
401:                    return Constants.CONTENT_BUNDLE_NAME;
402:                else if (in.equals("MANIFESTMD"))
403:                    return Constants.METADATA_BUNDLE_NAME;
404:                return in;
405:            }
406:
407:            /**
408:             * Get the DSpace bundle name corresponding to the <code>USE</code> attribute of the file group enclosing this <code>file</code> element.
409:             * @return DSpace bundle name
410:             * @throws MetadataValidationException when there is no USE attribute on the enclosing fileGrp.
411:             */
412:            public static String getBundleName(Element file)
413:                    throws MetadataValidationException {
414:                Element fg = file.getParentElement();
415:                String fgUse = fg.getAttributeValue("USE");
416:                if (fgUse == null)
417:                    throw new MetadataValidationException(
418:                            "Invalid METS Manifest: every fileGrp element must have a USE attribute.");
419:                return normalizeBundleName(fgUse);
420:            }
421:
422:            /**
423:             * Get the "local" file name of this <code>file</code> or <code>mdRef</code> element.
424:             * By "local" we mean the reference to the actual resource containing
425:             * the data for this file, e.g. a relative path within a Zip or tar archive
426:             * if the METS is serving as a manifest for that sort of package.
427:             * @return "local" file name (i.e.  relative to package or content
428:             *  directory) corresponding to this <code>file</code> or <code>mdRef</code> element.
429:             * @throws MetadataValidationException when there is not enough information to find a resource identifier.
430:             */
431:            public static String getFileName(Element file)
432:                    throws MetadataValidationException {
433:                Element ref;
434:                if (file.getName().equals("file")) {
435:                    ref = file.getChild("FLocat", metsNS);
436:                    if (ref == null) {
437:                        // check for forbidden FContent child first:
438:                        if (file.getChild("FContent", metsNS) == null)
439:                            throw new MetadataValidationException(
440:                                    "Invalid METS Manifest: Every file element must have FLocat child.");
441:                        else
442:                            throw new MetadataValidationException(
443:                                    "Invalid METS Manifest: file element has forbidden FContent child, only FLocat is allowed.");
444:                    }
445:                } else if (file.getName().equals("mdRef"))
446:                    ref = file;
447:                else
448:                    throw new MetadataValidationException(
449:                            "getFileName() called with recognized element type: "
450:                                    + file.toString());
451:                String loctype = ref.getAttributeValue("LOCTYPE");
452:                if (loctype != null && loctype.equals("URL")) {
453:                    String result = ref.getAttributeValue("href", xlinkNS);
454:                    if (result == null)
455:                        throw new MetadataValidationException(
456:                                "Invalid METS Manifest: FLocat/mdRef is missing the required xlink:href attribute.");
457:                    return result;
458:                }
459:                throw new MetadataValidationException(
460:                        "Invalid METS Manifest: FLocat/mdRef does not have LOCTYPE=\"URL\" attribute.");
461:            }
462:
463:            /**
464:             * Returns file element corresponding to primary bitstream.
465:             * There is <i>ONLY</i> a primary bitstream if the first <code>div</code> under
466:             * first </code>structMap</code> has an </code>fptr</code>.
467:             *
468:             * @return file element of Item's primary bitstream, or null if there is none.
469:             */
470:            public Element getPrimaryBitstream()
471:                    throws MetadataValidationException {
472:                Element firstDiv = getFirstDiv();
473:                Element fptr = firstDiv.getChild("fptr", metsNS);
474:                if (fptr == null)
475:                    return null;
476:                String id = fptr.getAttributeValue("FILEID");
477:                if (id == null)
478:                    throw new MetadataValidationException(
479:                            "fptr for Primary Bitstream is missing the required FILEID attribute.");
480:                Element result = getElementByXPath(
481:                        "descendant::mets:file[@ID=\"" + id + "\"]", false);
482:                if (result == null)
483:                    throw new MetadataValidationException(
484:                            "Cannot find file element for Primary Bitstream: looking for ID="
485:                                    + id);
486:                return result;
487:            }
488:
489:            /** Get the metadata type from within a *mdSec element.
490:             * @return metadata type name.
491:             */
492:            public String getMdType(Element mdSec)
493:                    throws MetadataValidationException {
494:                Element md = mdSec.getChild("mdRef", metsNS);
495:                if (md == null)
496:                    md = mdSec.getChild("mdWrap", metsNS);
497:                if (md == null)
498:                    throw new MetadataValidationException(
499:                            "Invalid METS Manifest: ?mdSec element has neither mdRef nor mdWrap child.");
500:                String result = md.getAttributeValue("MDTYPE");
501:                if (result != null && result.equals("OTHER"))
502:                    result = md.getAttributeValue("OTHERMDTYPE");
503:                if (result == null)
504:                    throw new MetadataValidationException(
505:                            "Invalid METS Manifest: "
506:                                    + md.getName()
507:                                    + " has no MDTYPE or OTHERMDTYPE attribute.");
508:                return result;
509:            }
510:
511:            /**
512:             *  Returns MIME type of metadata content, if available.
513:             *  @return MIMEtype word, or null if none is available.
514:             */
515:            public String getMdContentMimeType(Element mdSec)
516:                    throws MetadataValidationException {
517:                Element mdWrap = mdSec.getChild("mdWrap", metsNS);
518:                if (mdWrap != null) {
519:                    String mimeType = mdWrap.getAttributeValue("MIMETYPE");
520:                    if (mimeType == null
521:                            && mdWrap.getChild("xmlData", metsNS) != null)
522:                        mimeType = "text/xml";
523:                    return mimeType;
524:                }
525:                Element mdRef = mdSec.getChild("mdRef", metsNS);
526:                if (mdRef != null)
527:                    return mdRef.getAttributeValue("MIMETYPE");
528:                return null;
529:            }
530:
531:            /**
532:             * Return contents of *md element as List of XML Element objects.
533:             * Gets content, dereferecing mdRef if necessary, or decoding and parsing
534:             * a binData that contains XML.
535:             * @return contents of metadata section, or empty list if no XML content is available.
536:             * @throws MetadataValidationException if METS is invalid, or there is an error parsing the XML.
537:             */
538:            public List getMdContentAsXml(Element mdSec, Mdref callback)
539:                    throws MetadataValidationException, IOException,
540:                    SQLException, AuthorizeException {
541:                try {
542:                    Element mdRef = null;
543:                    Element mdWrap = mdSec.getChild("mdWrap", metsNS);
544:                    if (mdWrap != null) {
545:                        Element xmlData = mdWrap.getChild("xmlData", metsNS);
546:                        if (xmlData == null) {
547:                            Element bin = mdWrap.getChild("binData", metsNS);
548:                            if (bin == null)
549:                                throw new MetadataValidationException(
550:                                        "Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");
551:
552:                            // if binData is actually XML, return it; otherwise ignore.
553:                            else {
554:                                String mimeType = mdWrap
555:                                        .getAttributeValue("MIMETYPE");
556:                                if (mimeType != null
557:                                        && mimeType
558:                                                .equalsIgnoreCase("text/xml")) {
559:                                    byte value[] = Base64.decodeBase64(bin
560:                                            .getText().getBytes());
561:                                    Document mdd = parser
562:                                            .build(new ByteArrayInputStream(
563:                                                    value));
564:                                    List result = new ArrayList(1);
565:                                    result.add(mdd.getRootElement());
566:                                    return result;
567:                                } else {
568:                                    log
569:                                            .warn("Ignoring binData section because MIMETYPE is not XML, but: "
570:                                                    + mimeType);
571:                                    return new ArrayList(0);
572:                                }
573:                            }
574:                        } else {
575:                            return xmlData.getChildren();
576:                        }
577:                    } else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null) {
578:                        String mimeType = mdRef.getAttributeValue("MIMETYPE");
579:                        if (mimeType != null
580:                                && mimeType.equalsIgnoreCase("text/xml")) {
581:                            Document mdd = parser.build(callback
582:                                    .getInputStream(mdRef));
583:                            List result = new ArrayList(1);
584:                            result.add(mdd.getRootElement());
585:                            return result;
586:                        } else {
587:                            log
588:                                    .warn("Ignoring mdRef section because MIMETYPE is not XML, but: "
589:                                            + mimeType);
590:                            return new ArrayList(0);
591:                        }
592:                    } else
593:                        throw new MetadataValidationException(
594:                                "Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
595:                } catch (JDOMException je) {
596:                    throw new MetadataValidationException(
597:                            "Error parsing or validating metadata section in mdRef or binData within "
598:                                    + mdSec.toString(), je);
599:                }
600:
601:            }
602:
603:            /**
604:             * Return contents of *md element as stream.
605:             * Gets content, dereferecing mdRef if necessary, or decoding
606:             * a binData element if necessary.
607:             * @return Stream containing contents of metadata section.  Never returns null.
608:             * @throws MetadataValidationException if METS format does not contain any metadata.
609:             */
610:            public InputStream getMdContentAsStream(Element mdSec,
611:                    Mdref callback) throws MetadataValidationException,
612:                    IOException, SQLException, AuthorizeException {
613:                Element mdRef = null;
614:                Element mdWrap = mdSec.getChild("mdWrap", metsNS);
615:                if (mdWrap != null) {
616:                    Element xmlData = mdWrap.getChild("xmlData", metsNS);
617:                    if (xmlData == null) {
618:                        Element bin = mdWrap.getChild("binData", metsNS);
619:                        if (bin == null)
620:                            throw new MetadataValidationException(
621:                                    "Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");
622:
623:                        else {
624:                            byte value[] = Base64.decodeBase64(bin.getText()
625:                                    .getBytes());
626:                            return new ByteArrayInputStream(value);
627:                        }
628:                    } else {
629:                        XMLOutputter outputPretty = new XMLOutputter(Format
630:                                .getPrettyFormat());
631:                        return new ByteArrayInputStream(outputPretty
632:                                .outputString(xmlData.getChildren()).getBytes());
633:                    }
634:                } else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null) {
635:                    return callback.getInputStream(mdRef);
636:                } else
637:                    throw new MetadataValidationException(
638:                            "Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
639:            }
640:
641:            // special call to crosswalk the guts of a metadata *Sec (dmdSec, amdSec)
642:            // because mdRef and mdWrap have to be handled differently.
643:            // It's a lot like getMdContentAsXml but cannot use that because xwalk
644:            // should be called with root element OR list depending on what was given.
645:            private void crosswalkMdContent(Element mdSec, Mdref callback,
646:                    IngestionCrosswalk xwalk, Context context, DSpaceObject dso)
647:                    throws CrosswalkException, IOException, SQLException,
648:                    AuthorizeException {
649:                List xml = getMdContentAsXml(mdSec, callback);
650:
651:                // if we get inappropriate metadata, e.g. PREMIS for Item, let it go.
652:                try {
653:                    xwalk.ingest(context, dso, xml);
654:                } catch (CrosswalkObjectNotSupported e) {
655:                    log
656:                            .warn("Skipping metadata for inappropriate type of object: Object="
657:                                    + dso.toString()
658:                                    + ", error="
659:                                    + e.toString());
660:                }
661:            }
662:
663:            // return first <div> of first <structMap>;
664:            // in DSpace profile, this is where item-wide dmd and other metadata
665:            // lives as IDrefs.
666:            private Element getFirstDiv() throws MetadataValidationException {
667:                Element sm = mets.getChild("structMap", metsNS);
668:                if (sm == null)
669:                    throw new MetadataValidationException(
670:                            "METS document is missing the required structMap element.");
671:
672:                Element result = sm.getChild("div", metsNS);
673:                if (result == null)
674:                    throw new MetadataValidationException(
675:                            "METS document is missing the required first div element in first structMap.");
676:
677:                log.debug("Got firstDiv result=" + result.toString());
678:                return (Element) result;
679:            }
680:
681:            // return a single Element node found by one-off path.
682:            // use only when path varies each time you call it.
683:            private Element getElementByXPath(String path, boolean nullOk)
684:                    throws MetadataValidationException {
685:                try {
686:                    XPath xpath = XPath.newInstance(path);
687:                    xpath.addNamespace(metsNS);
688:                    xpath.addNamespace(xlinkNS);
689:                    Object result = xpath.selectSingleNode(mets);
690:                    if (result == null && nullOk)
691:                        return null;
692:                    else if (result instanceof  Element)
693:                        return (Element) result;
694:                    else
695:                        throw new MetadataValidationException(
696:                                "METSManifest: Failed to resolve XPath, path=\""
697:                                        + path + "\"");
698:                } catch (JDOMException je) {
699:                    throw new MetadataValidationException(
700:                            "METSManifest: Failed to resolve XPath, path=\""
701:                                    + path + "\"", je);
702:                }
703:            }
704:
705:            // Find crosswalk for the indicated metadata type (e.g. "DC", "MODS")
706:            // The crosswalk plugin name MAY be indirected in config file,
707:            // through an entry like
708:            //  mets.submission.crosswalk.{mdType} = {pluginName}
709:            //   e.g.
710:            //  mets.submission.crosswalk.DC = mysite-QDC
711:            private IngestionCrosswalk getCrosswalk(String type) {
712:                String xwalkName = ConfigurationManager
713:                        .getProperty(CONFIG_METADATA_PREFIX + type);
714:                if (xwalkName == null)
715:                    xwalkName = type;
716:                return (IngestionCrosswalk) PluginManager.getNamedPlugin(
717:                        IngestionCrosswalk.class, xwalkName);
718:            }
719:
720:            /**
721:             * Gets all dmdSec elements containing metadata for the DSpace Item.
722:             *
723:             * @return array of Elements, each a dmdSec.  May be empty but NOT null.
724:             * @throws MetadataValidationException if the METS is missing a reference to item-wide
725:             *          DMDs in the correct place.
726:             */
727:            public Element[] getItemDmds() throws MetadataValidationException {
728:                // div@DMDID is actually IDREFS, a space-separated list of IDs:
729:                Element firstDiv = getFirstDiv();
730:                String dmds = firstDiv.getAttributeValue("DMDID");
731:                if (dmds == null)
732:                    throw new MetadataValidationException(
733:                            "Invalid METS: Missing reference to Item descriptive metadata, first div on first structmap must have a DMDID attribute.");
734:                String dmdID[] = dmds.split("\\s+");
735:                Element result[] = new Element[dmdID.length];
736:
737:                for (int i = 0; i < dmdID.length; ++i)
738:                    result[i] = getElementByXPath("mets:dmdSec[@ID=\""
739:                            + dmdID[i] + "\"]", false);
740:                return result;
741:            }
742:
743:            /**
744:             * Return rights metadata section(s) relevant to item as a whole.
745:             * @return array of rightsMd elements, possibly empty but never null.
746:             * @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing.
747:             */
748:            public Element[] getItemRightsMD()
749:                    throws MetadataValidationException {
750:                // div@ADMID is actually IDREFS, a space-separated list of IDs:
751:                Element firstDiv = getFirstDiv();
752:                String amds = firstDiv.getAttributeValue("ADMID");
753:                if (amds == null) {
754:                    log.debug("getItemRightsMD: No ADMID references found.");
755:                    return new Element[0];
756:                }
757:                String amdID[] = amds.split("\\s+");
758:                List resultList = new ArrayList();
759:                for (int i = 0; i < amdID.length; ++i) {
760:                    List rmds = getElementByXPath(
761:                            "mets:amdSec[@ID=\"" + amdID[i] + "\"]", false)
762:                            .getChildren("rightsMD", metsNS);
763:                    if (rmds.size() > 0)
764:                        resultList.addAll(rmds);
765:                }
766:                return (Element[]) resultList.toArray(new Element[resultList
767:                        .size()]);
768:            }
769:
770:            /**
771:             * Invokes appropriate crosswalks on Item-wide descriptive metadata.
772:             */
773:            public void crosswalkItem(Context context, Item item, Element dmd,
774:                    Mdref callback) throws MetadataValidationException,
775:                    CrosswalkException, IOException, SQLException,
776:                    AuthorizeException {
777:                String type = getMdType(dmd);
778:                IngestionCrosswalk xwalk = getCrosswalk(type);
779:
780:                if (xwalk == null)
781:                    throw new MetadataValidationException(
782:                            "Cannot process METS Manifest: "
783:                                    + "No crosswalk found for MDTYPE=" + type);
784:                crosswalkMdContent(dmd, callback, xwalk, context, item);
785:            }
786:
787:            /**
788:             * Crosswalk the metadata associated with a particular <code>file</code>
789:             * element into the bitstream it corresponds to.
790:             * @param context a dspace context.
791:             * @param bs bitstream target of the crosswalk
792:             * @param fileId value of ID attribute in the file element responsible
793:             *  for the contents of that bitstream.
794:             */
795:            public void crosswalkBitstream(Context context,
796:                    Bitstream bitstream, String fileId, Mdref callback)
797:                    throws MetadataValidationException, CrosswalkException,
798:                    IOException, SQLException, AuthorizeException {
799:                Element file = getElementByXPath("descendant::mets:file[@ID=\""
800:                        + fileId + "\"]", false);
801:                if (file == null)
802:                    throw new MetadataValidationException(
803:                            "Failed in Bitstream crosswalk, Could not find file element with ID="
804:                                    + fileId);
805:
806:                // In DSpace METS SIP spec, admin metadata is only "highly
807:                // recommended", not "required", so it is OK if there is no ADMID.
808:                String amds = file.getAttributeValue("ADMID");
809:                if (amds == null) {
810:                    log.warn("Got no bitstream ADMID, file@ID=" + fileId);
811:                    return;
812:                }
813:                String amdID[] = amds.split("\\s+");
814:                for (int i = 0; i < amdID.length; ++i) {
815:                    List techMDs = getElementByXPath(
816:                            "mets:amdSec[@ID=\"" + amdID[i] + "\"]", false)
817:                            .getChildren("techMD", metsNS);
818:                    Iterator ti = techMDs.iterator();
819:                    while (ti.hasNext()) {
820:                        Element techMD = (Element) ti.next();
821:                        if (techMD != null) {
822:                            String type = getMdType(techMD);
823:                            IngestionCrosswalk xwalk = getCrosswalk(type);
824:                            log.debug("Got bitstream techMD of type=" + type
825:                                    + ", for file ID=" + fileId);
826:
827:                            if (xwalk == null)
828:                                throw new MetadataValidationException(
829:                                        "Cannot process METS Manifest: "
830:                                                + "No crosswalk found for techMD MDTYPE="
831:                                                + type);
832:                            crosswalkMdContent(techMD, callback, xwalk,
833:                                    context, bitstream);
834:                        }
835:                    }
836:                }
837:            }
838:
839:            /**
840:             * Find Handle (if any) identifier labelling this manifest.
841:             * @return handle (never null)
842:             * @throws MetadataValidationException if no handle available.
843:             */
844:            public String getHandle() throws MetadataValidationException {
845:                // TODO: XXX Make configurable? Handle optionally passed in?
846:                // FIXME: Not sure if OBJID is really the right place
847:
848:                String handle = mets.getAttributeValue("OBJID");
849:
850:                if (handle != null && handle.startsWith("hdl:")) {
851:                    return handle.substring(4);
852:                } else {
853:                    throw new MetadataValidationException(
854:                            "Item has no valid Handle (OBJID)");
855:                }
856:            }
857:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.