Source Code Cross Referenced for AssetIterator.java in » ERP-CRM-Financial » sakai » org » sakaibrary » osid » repository » xserver » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » ERP CRM Financial » sakai » org.sakaibrary.osid.repository.xserver
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        package org.sakaibrary.osid.repository.xserver;
002:
003:        import java.io.IOException;
004:        import java.util.regex.Matcher;
005:        import java.util.regex.Pattern;
006:
007:        import javax.xml.parsers.ParserConfigurationException;
008:
009:        import org.sakaibrary.xserver.session.MetasearchSession;
010:        import org.sakaibrary.xserver.session.MetasearchSessionManager;
011:        import org.xml.sax.SAXException;
012:        import org.xml.sax.SAXParseException;
013:
014:        /** 
015:         * @author gbhatnag
016:         * @version
017:         */
018:        public class AssetIterator extends org.xml.sax.helpers.DefaultHandler
019:                implements  org.osid.repository.AssetIterator {
020:
021:            private static final long serialVersionUID = 1L;
022:            private static final String REGULAR_EXPRESSION_FILE = "/data/citationRegex.txt";
023:            private static final org.apache.commons.logging.Log LOG = org.apache.commons.logging.LogFactory
024:                    .getLog("org.sakaibrary.osid.repository.xserver.AssetIterator");
025:
026:            private java.util.LinkedList assetQueue;
027:            private java.util.ArrayList regexArray;
028:            private String guid;
029:            private int totalRecordsCursor = 0;
030:            private int numRecordsReturned = 0;
031:            private org.osid.shared.Id repositoryId;
032:            private org.osid.shared.Id recordStructureId;
033:            private org.osid.repository.Asset asset;
034:            private org.osid.repository.Record record;
035:
036:            // for SAX parsing
037:            private StringBuffer textBuffer;
038:
039:            // session
040:            private MetasearchSessionManager msm;
041:            org.osid.shared.Properties statusProperties;
042:
043:            /**
044:             * Constructs an empty AssetIterator
045:             * 
046:             * @param guid globally unique identifier for this session
047:             * @throws org.osid.repository.RepositoryException
048:             */
049:            protected AssetIterator(String guid)
050:                    throws org.osid.repository.RepositoryException {
051:                this .guid = guid;
052:
053:                // get session cache manager
054:                msm = MetasearchSessionManager.getInstance();
055:
056:                // create assetQueue
057:                assetQueue = new java.util.LinkedList();
058:
059:                // load citation regular expressions
060:                try {
061:                    regexArray = loadCitationRegularExpressions(REGULAR_EXPRESSION_FILE);
062:                } catch (java.io.IOException ioe) {
063:                    LOG.warn("AssetIterator() failed reading citation regular "
064:                            + "expressions - regex file: "
065:                            + REGULAR_EXPRESSION_FILE, ioe);
066:                }
067:            }
068:
069:            private java.util.ArrayList loadCitationRegularExpressions(
070:                    String filename) throws java.io.IOException {
071:                java.util.ArrayList regexArray = new java.util.ArrayList();
072:
073:                java.io.InputStream is = this .getClass().getResourceAsStream(
074:                        filename);
075:                java.io.BufferedReader regexes = new java.io.BufferedReader(
076:                        new java.io.InputStreamReader(is));
077:
078:                // read the regex file and add regexes to array
079:                String regex;
080:                while ((regex = regexes.readLine()) != null) {
081:                    String[] nameRegex = regex.split("=");
082:
083:                    CitationRegex citationRegex = new CitationRegex();
084:                    citationRegex.setName(nameRegex[0].trim());
085:                    citationRegex.setRegex(nameRegex[1].trim());
086:
087:                    regexArray.add(citationRegex);
088:                }
089:                regexes.close();
090:                is.close();
091:
092:                return regexArray;
093:            }
094:
095:            public boolean hasNextAsset()
096:                    throws org.osid.repository.RepositoryException {
097:                MetasearchSession metasearchSession = msm
098:                        .getMetasearchSession(guid);
099:
100:                // get an XServer to check status and update number of records found
101:                org.sakaibrary.xserver.XServer xserver = null;
102:                statusProperties = null;
103:                try {
104:                    xserver = new org.sakaibrary.xserver.XServer(guid);
105:                    xserver.updateSearchStatusProperties();
106:                    statusProperties = xserver.getSearchStatusProperties();
107:                } catch (org.sakaibrary.xserver.XServerException xse) {
108:                    LOG.warn("X-Server error: " + xse.getErrorCode() + " - "
109:                            + xse.getErrorText());
110:
111:                    // throw exception now that status has been updated
112:                    throw new org.osid.repository.RepositoryException(
113:                            org.sakaibrary.osid.repository.xserver.MetasearchException.METASEARCH_ERROR);
114:                }
115:
116:                // check status for error/timeout
117:                String status = null;
118:
119:                try {
120:                    status = (String) statusProperties.getProperty("status");
121:                } catch (org.osid.shared.SharedException se) {
122:                    LOG.warn("hasNextAsset() failed getting status "
123:                            + "property", se);
124:                }
125:
126:                if (status != null) {
127:                    // status and statusMessage are set by XServer.updateSearchStatusProperties
128:                    if (status.equals("error")) {
129:                        throw new org.osid.repository.RepositoryException(
130:                                org.sakaibrary.osid.repository.xserver.MetasearchException.METASEARCH_ERROR);
131:                    } else if (status.equals("timeout")) {
132:                        throw new org.osid.repository.RepositoryException(
133:                                org.sakaibrary.osid.repository.xserver.MetasearchException.SESSION_TIMED_OUT);
134:                    } else if (status.equals("empty")) {
135:                        // no records found
136:                        return false;
137:                    }
138:                } else {
139:                    LOG.warn("hasNextAsset() - status property is null");
140:                }
141:
142:                // get updated metasearchSession
143:                metasearchSession = msm.getMetasearchSession(guid);
144:                Integer numRecordsFound = metasearchSession
145:                        .getNumRecordsFound();
146:
147:                if (numRecordsFound == null || numRecordsFound.intValue() == 0) {
148:                    // still searching for records, return true
149:                    return true;
150:                }
151:
152:                // check if passed max number of attainable records
153:                int maxAttainable;
154:                boolean gotMergeError = metasearchSession.isGotMergeError();
155:                if (gotMergeError) {
156:                    maxAttainable = 300;
157:                } else {
158:                    maxAttainable = numRecordsFound.intValue();
159:                }
160:
161:                return (numRecordsReturned < maxAttainable);
162:            }
163:
164:            public org.osid.repository.Asset nextAsset()
165:                    throws org.osid.repository.RepositoryException {
166:                LOG.debug("nextAsset() [entry] - returned: "
167:                        + numRecordsReturned + "; total: " + totalRecordsCursor
168:                        + "; in queue: " + assetQueue.size());
169:
170:                // return Asset, if ready
171:                if (assetQueue.size() > 0) {
172:                    numRecordsReturned++;
173:                    return (org.osid.repository.Asset) assetQueue.removeFirst();
174:                }
175:
176:                // assetQueue is empty - check whether we should get more records
177:                // or throw an Exception
178:                if (hasNextAsset()) {
179:                    // hasNextAsset() will throw timeout/error Exceptions if any
180:                    String status = null;
181:
182:                    try {
183:                        status = (String) statusProperties
184:                                .getProperty("status");
185:                    } catch (org.osid.shared.SharedException se) {
186:                        LOG.warn("nextAsset() failed getting status property",
187:                                se);
188:                    }
189:
190:                    if (!status.equals("ready")) {
191:                        // the X-Server is still searching/fetching - try again later
192:                        throw new org.osid.repository.RepositoryException(
193:                                org.sakaibrary.osid.repository.xserver.MetasearchException.ASSET_NOT_FETCHED);
194:                    }
195:
196:                    // get records from the X-Server
197:                    MetasearchSession metasearchSession = msm
198:                            .getMetasearchSession(guid);
199:                    org.osid.shared.Id repositoryId = metasearchSession
200:                            .getRepositoryId();
201:
202:                    try {
203:                        org.sakaibrary.xserver.XServer xserver = new org.sakaibrary.xserver.XServer(
204:                                guid);
205:
206:                        LOG
207:                                .debug("nextAsset() calling XServer.getRecordsXML() - assets in "
208:                                        + "queue: " + assetQueue.size());
209:                        createAssets(xserver.getRecordsXML(totalRecordsCursor),
210:                                repositoryId);
211:                    } catch (org.sakaibrary.xserver.XServerException xse) {
212:                        LOG.warn("X-Server error: " + xse.getErrorCode()
213:                                + " - " + xse.getErrorText());
214:
215:                        throw new org.osid.repository.RepositoryException(
216:                                org.sakaibrary.osid.repository.xserver.MetasearchException.METASEARCH_ERROR);
217:                    }
218:                    LOG
219:                            .debug("nextAsset(), XServer.getRecordsXML() returns - assets in "
220:                                    + "queue: " + assetQueue.size());
221:
222:                    // records have been fetched and Assets queued
223:                    totalRecordsCursor += assetQueue.size();
224:                    numRecordsReturned++;
225:                    return (org.osid.repository.Asset) assetQueue.removeFirst();
226:                } else {
227:                    // no assets available
228:                    throw new org.osid.repository.RepositoryException(
229:                            org.osid.shared.SharedException.NO_MORE_ITERATOR_ELEMENTS);
230:                }
231:            }
232:
233:            /**
234:             * This method parses the xml StringBuffer and creates Assets, Records
235:             * and Parts in the Repository with the given repositoryId.
236:             *
237:             * @param xml input xml in "sakaibrary" format
238:             * @param log the log being used by the Repository
239:             * @param repositoryId the Id of the Repository in which to create Assets,
240:             * Records and Parts.
241:             * 
242:             * @throws org.osid.repository.RepositoryException
243:             */
244:            private void createAssets(java.io.ByteArrayInputStream xml,
245:                    org.osid.shared.Id repositoryId)
246:                    throws org.osid.repository.RepositoryException {
247:                this .repositoryId = repositoryId;
248:                recordStructureId = RecordStructure.getInstance().getId();
249:                textBuffer = new StringBuffer();
250:
251:                // use a SAX parser
252:                javax.xml.parsers.SAXParserFactory factory;
253:                javax.xml.parsers.SAXParser saxParser;
254:
255:                // set up the parser
256:                factory = javax.xml.parsers.SAXParserFactory.newInstance();
257:                factory.setNamespaceAware(true);
258:
259:                // start parsing
260:                try {
261:                    saxParser = factory.newSAXParser();
262:                    saxParser.parse(xml, this );
263:                    xml.close();
264:                } catch (SAXParseException spe) {
265:                    // Use the contained exception, if any
266:                    Exception x = spe;
267:
268:                    if (spe.getException() != null) {
269:                        x = spe.getException();
270:                    }
271:
272:                    // Error generated by the parser
273:                    LOG.warn("createAssets() parsing exception: "
274:                            + spe.getMessage() + " - xml line "
275:                            + spe.getLineNumber() + ", uri "
276:                            + spe.getSystemId(), x);
277:                } catch (SAXException sxe) {
278:                    // Error generated by this application
279:                    // (or a parser-initialization error)
280:                    Exception x = sxe;
281:
282:                    if (sxe.getException() != null) {
283:                        x = sxe.getException();
284:                    }
285:
286:                    LOG.warn("createAssets() SAX exception: "
287:                            + sxe.getMessage(), x);
288:                } catch (ParserConfigurationException pce) {
289:                    // Parser with specified options can't be built
290:                    LOG.warn("createAssets() SAX parser cannot be built with "
291:                            + "specified options");
292:                } catch (IOException ioe) {
293:                    // I/O error
294:                    LOG.warn("createAssets() IO exception", ioe);
295:                }
296:            }
297:
298:            //----------------------------------
299:            // SAX DEFAULT HANDLER IMPLEMENTATIONS -
300:            //----------------------------------
301:
302:            /**
303:             * Receive notification of the beginning of an element.
304:             *   
305:             * @see DefaultHandler
306:             */
307:            public void startElement(String namespaceURI, String sName,
308:                    String qName, org.xml.sax.Attributes attrs)
309:                    throws org.xml.sax.SAXException {
310:                if (qName.equals("record")) {
311:                    populateAssetFromText("record_start");
312:                }
313:            }
314:
315:            /**
316:             * Receive notification of the end of an element.
317:             *   
318:             * @see DefaultHandler
319:             */
320:            public void endElement(String namespaceURI, String sName,
321:                    String qName) throws org.xml.sax.SAXException {
322:                populateAssetFromText(qName);
323:            }
324:
325:            /**
326:             * Receive notification of character data inside an element.
327:             *   
328:             * @see DefaultHandler
329:             */
330:            public void characters(char[] buf, int offset, int len)
331:                    throws org.xml.sax.SAXException {
332:                // store character data
333:                String text = new String(buf, offset, len);
334:
335:                if (textBuffer == null) {
336:                    textBuffer = new StringBuffer(text);
337:                } else {
338:                    textBuffer.append(text);
339:                }
340:            }
341:
342:            private void populateAssetFromText(String elementName) {
343:                // new record
344:                if (elementName.equals("record_start")) {
345:                    try {
346:                        // create a new asset... need title, description, assetId
347:                        asset = new Asset(null, null, getId(), repositoryId);
348:
349:                        // create a new record
350:                        record = asset.createRecord(recordStructureId);
351:                    } catch (org.osid.repository.RepositoryException re) {
352:                        LOG.warn("populateAssetFromText() failed to "
353:                                + "create new Asset/Record pair.", re);
354:                    }
355:                } else if (elementName.equals("record")) {
356:                    // a record has ended: do post-processing //
357:
358:                    // set dateRetrieved
359:                    setDateRetrieved();
360:
361:                    // use inLineCitation to fill in other fields, if possible
362:                    org.osid.repository.Part inLineCitation;
363:                    try {
364:                        if ((inLineCitation = recordHasPart(InLineCitationPartStructure
365:                                .getInstance().getType())) != null) {
366:                            doRegexParse((String) inLineCitation.getValue());
367:                        }
368:                    } catch (org.osid.repository.RepositoryException re) {
369:                        LOG.warn("populateAssetFromText() failed to "
370:                                + "gracefully process inLineCitation value.",
371:                                re);
372:                    }
373:
374:                    assetQueue.add(asset);
375:                }
376:
377:                if (textBuffer == null) {
378:                    return;
379:                }
380:
381:                String text = textBuffer.toString().trim();
382:                if (text.equals("")) {
383:                    return;
384:                }
385:
386:                try {
387:                    if (elementName.equals("title")) {
388:                        asset.updateDisplayName(text);
389:                    } else if (elementName.equals("abstract")) {
390:                        asset.updateDescription(text);
391:                    } else if (elementName.equals("author")) {
392:                        record.createPart(CreatorPartStructure.getInstance()
393:                                .getId(), text);
394:                    } else if (elementName.equals("date")) {
395:                        record.createPart(DatePartStructure.getInstance()
396:                                .getId(), text);
397:                    } else if (elementName.equals("doi")) {
398:                        record.createPart(DOIPartStructure.getInstance()
399:                                .getId(), text);
400:                    } else if (elementName.equals("edition")) {
401:                        record.createPart(EditionPartStructure.getInstance()
402:                                .getId(), text);
403:                    } else if (elementName.equals("inLineCitation")) {
404:                        record.createPart(InLineCitationPartStructure
405:                                .getInstance().getId(), text);
406:                    } else if (elementName.equals("isnIdentifier")) {
407:                        record.createPart(IsnIdentifierPartStructure
408:                                .getInstance().getId(), text);
409:                    } else if (elementName.equals("issue")) {
410:                        record.createPart(IssuePartStructure.getInstance()
411:                                .getId(), text);
412:                    } else if (elementName.equals("language")) {
413:                        record.createPart(LanguagePartStructure.getInstance()
414:                                .getId(), text);
415:                    } else if (elementName.equals("note")) {
416:                        record.createPart(NotePartStructure.getInstance()
417:                                .getId(), text);
418:                    } else if (elementName.equals("openUrl")) {
419:                        record.createPart(OpenUrlPartStructure.getInstance()
420:                                .getId(), text);
421:                    } else if (elementName.equals("pages")) {
422:                        createPagesPart(text);
423:                    } else if (elementName.equals("publisherInfo")) {
424:                        record.createPart(PublisherPartStructure.getInstance()
425:                                .getId(), text);
426:                    } else if (elementName.equals("rights")) {
427:                        record.createPart(RightsPartStructure.getInstance()
428:                                .getId(), text);
429:                    } else if (elementName.equals("sourceTitle")) {
430:                        record.createPart(SourceTitlePartStructure
431:                                .getInstance().getId(), text);
432:                    } else if (elementName.equals("subject")) {
433:                        record.createPart(SubjectPartStructure.getInstance()
434:                                .getId(), text);
435:                    } else if (elementName.equals("type")) {
436:                        record.createPart(TypePartStructure.getInstance()
437:                                .getId(), text);
438:                    } else if (elementName.equals("url")) {
439:                        record.createPart(URLPartStructure.getInstance()
440:                                .getId(), text);
441:                    } else if (elementName.equals("urlLabel")) {
442:                        record.createPart(URLLabelPartStructure.getInstance()
443:                                .getId(), text);
444:                    } else if (elementName.equals("urlFormat")) {
445:                        record.createPart(URLFormatPartStructure.getInstance()
446:                                .getId(), text);
447:                    } else if (elementName.equals("volume")) {
448:                        record.createPart(VolumePartStructure.getInstance()
449:                                .getId(), text);
450:                    } else if (elementName.equals("volumeIssue")) {
451:                        doRegexParse(text);
452:                    } else if (elementName.equals("year")) {
453:                        record.createPart(YearPartStructure.getInstance()
454:                                .getId(), text);
455:                    }
456:                } catch (org.osid.repository.RepositoryException re) {
457:                    LOG.warn("populateAssetFromText() failed to "
458:                            + "create new Part.", re);
459:                }
460:
461:                textBuffer = null;
462:            }
463:
464:            private void setDateRetrieved() {
465:                java.util.GregorianCalendar now = new java.util.GregorianCalendar();
466:                int month = now.get(java.util.Calendar.MONTH) + 1;
467:                int date = now.get(java.util.Calendar.DATE);
468:                String monthStr, dateStr;
469:
470:                if (month < 10) {
471:                    monthStr = "0" + month;
472:                } else {
473:                    monthStr = String.valueOf(month);
474:                }
475:
476:                if (date < 10) {
477:                    dateStr = "0" + date;
478:                } else {
479:                    dateStr = String.valueOf(date);
480:                }
481:                String dateRetrieved = now.get(java.util.Calendar.YEAR) + "-"
482:                        + monthStr + "-" + dateStr;
483:
484:                try {
485:                    record.createPart(DateRetrievedPartStructure.getInstance()
486:                            .getId(), dateRetrieved);
487:                } catch (org.osid.repository.RepositoryException re) {
488:                    LOG.warn("setDateRetrieved() failed "
489:                            + "creating new dateRetrieved Part.", re);
490:                }
491:            }
492:
493:            /**
494:             * This method searches the current record for a Part using its 
495:             * PartStructure Type.
496:             * 
497:             * @param partStructureType PartStructure Type of Part you need.
498:             * @return the Part if it exists in the current record, null if it does not.
499:             */
500:            private org.osid.repository.Part recordHasPart(
501:                    org.osid.shared.Type partStructureType) {
502:                try {
503:                    org.osid.repository.PartIterator pit = record.getParts();
504:
505:                    while (pit.hasNextPart()) {
506:                        org.osid.repository.Part part = pit.nextPart();
507:
508:                        if (part.getPartStructure().getType().isEqual(
509:                                partStructureType)) {
510:                            return part;
511:                        }
512:                    }
513:                } catch (org.osid.repository.RepositoryException re) {
514:                    LOG.warn("recordHasPart() failed getting Parts.", re);
515:                }
516:
517:                // did not find the Part
518:                return null;
519:            }
520:
521:            /**
522:             * This method does its best to map data contained in an inLineCitation to
523:             * other fields such as volume, issue, etc. in the case that they are empty.
524:             * It compares the citation to a known set of regular expressions contained
525:             * in REGULAR_EXPRESSION_FILE.  Adding a new regular expression entails
526:             * adding a new case for parsing in this method.
527:             * 
528:             * @param citation inLineCitation to be parsed
529:             */
530:            private void doRegexParse(String citation) {
531:                String regexName = null;
532:                Pattern pattern;
533:                Matcher matcher;
534:                boolean hasVolume = false;
535:                boolean hasIssue = false;
536:                boolean hasDate = false;
537:                boolean hasPages = false;
538:                boolean hasSourceTitle = false;
539:
540:                for (int i = 0; i < regexArray.size(); i++) {
541:                    CitationRegex citationRegex = (CitationRegex) regexArray
542:                            .get(i);
543:                    pattern = Pattern.compile(citationRegex.getRegex());
544:                    matcher = pattern.matcher(citation);
545:
546:                    if (matcher.find()) {
547:                        regexName = citationRegex.getName();
548:                        break;
549:                    }
550:                }
551:
552:                if (regexName != null) {
553:                    // determine which fields are necessary
554:                    try {
555:                        hasVolume = recordHasPart(VolumePartStructure
556:                                .getInstance().getType()) == null ? false
557:                                : true;
558:
559:                        hasIssue = recordHasPart(IssuePartStructure
560:                                .getInstance().getType()) == null ? false
561:                                : true;
562:
563:                        hasDate = recordHasPart(DatePartStructure.getInstance()
564:                                .getType()) == null ? false : true;
565:
566:                        hasPages = recordHasPart(PagesPartStructure
567:                                .getInstance().getType()) == null ? false
568:                                : true;
569:
570:                        hasSourceTitle = recordHasPart(SourceTitlePartStructure
571:                                .getInstance().getType()) == null ? false
572:                                : true;
573:
574:                        // if all true, no need to go further
575:                        if (hasVolume && hasIssue && hasDate && hasPages
576:                                && hasSourceTitle) {
577:                            return;
578:                        }
579:
580:                        // check for matching regex
581:                        if (regexName.equals("zooRec")) {
582:                            // .+ \d+(\(\d+\))?, (.*)? \d{4}: \d+-\d+
583:                            if (!hasVolume) {
584:                                pattern = Pattern.compile("\\d+");
585:                                matcher = pattern.matcher(citation);
586:                                if (matcher.find()) {
587:                                    record.createPart(VolumePartStructure
588:                                            .getInstance().getId(), matcher
589:                                            .group());
590:                                }
591:                            }
592:
593:                            if (!hasIssue) {
594:                                pattern = Pattern.compile("\\(\\d+\\)");
595:                                matcher = pattern.matcher(citation);
596:                                if (matcher.find()) {
597:                                    record.createPart(IssuePartStructure
598:                                            .getInstance().getId(), matcher
599:                                            .group().replaceAll("\\D", ""));
600:                                }
601:                            }
602:
603:                            if (!hasDate) {
604:                                pattern = Pattern.compile(", (.*)? \\d{4}:");
605:                                matcher = pattern.matcher(citation);
606:                                if (matcher.find()) {
607:                                    String date = matcher.group().substring(2,
608:                                            matcher.group().length() - 1);
609:                                    record.createPart(DatePartStructure
610:                                            .getInstance().getId(), date);
611:                                }
612:                            }
613:
614:                            if (!hasPages) {
615:                                pattern = Pattern.compile("\\d+-\\d+");
616:                                matcher = pattern.matcher(citation);
617:                                if (matcher.find()) {
618:                                    createPagesPart(matcher.group());
619:                                }
620:                            }
621:
622:                            if (!hasSourceTitle) {
623:                                pattern = Pattern.compile("\\D+\\d");
624:                                matcher = pattern.matcher(citation);
625:                                if (matcher.find()) {
626:                                    String sourceTitle = matcher
627:                                            .group()
628:                                            .substring(
629:                                                    0,
630:                                                    matcher.group().length() - 2);
631:                                    record
632:                                            .createPart(
633:                                                    SourceTitlePartStructure
634:                                                            .getInstance()
635:                                                            .getId(),
636:                                                    sourceTitle);
637:                                }
638:                            }
639:                        } else if (regexName.equals("animBehavAbs")) {
640:                            // .+ Vol\. \d+, no\. \d+, (\d+)? pp\.|p\. \d+(-\d+.)? (.*)? \d{4}\.$
641:                            if (!hasVolume) {
642:                                pattern = Pattern.compile("Vol\\. \\d+");
643:                                matcher = pattern.matcher(citation);
644:                                if (matcher.find()) {
645:                                    record.createPart(VolumePartStructure
646:                                            .getInstance().getId(), matcher
647:                                            .group().replaceAll("\\D", ""));
648:                                }
649:                            }
650:
651:                            if (!hasIssue) {
652:                                pattern = Pattern.compile("no\\. \\d+");
653:                                matcher = pattern.matcher(citation);
654:                                if (matcher.find()) {
655:                                    record.createPart(IssuePartStructure
656:                                            .getInstance().getId(), matcher
657:                                            .group().replaceAll("\\D", ""));
658:                                }
659:                            }
660:
661:                            if (!hasDate) {
662:                                pattern = Pattern
663:                                        .compile("(pp\\.|p\\.) \\d+(-\\d+\\.)? (.*)? \\d{4}\\.$");
664:                                matcher = pattern.matcher(citation);
665:                                if (matcher.find()) {
666:                                    String date = matcher
667:                                            .group()
668:                                            .substring(
669:                                                    matcher.group().indexOf(
670:                                                            " ", 4) + 1,
671:                                                    matcher.group().length() - 1);
672:                                    record.createPart(DatePartStructure
673:                                            .getInstance().getId(), date);
674:                                }
675:                            }
676:
677:                            if (!hasPages) {
678:                                pattern = Pattern
679:                                        .compile("(pp\\.|p\\.) \\d+(-\\d+\\.)?");
680:                                matcher = pattern.matcher(citation);
681:                                if (matcher.find()) {
682:                                    createPagesPart(matcher.group());
683:                                }
684:                            }
685:
686:                            if (!hasSourceTitle) {
687:                                pattern = Pattern.compile(".+ \\[");
688:                                matcher = pattern.matcher(citation);
689:                                if (matcher.find()) {
690:                                    String sourceTitle = matcher
691:                                            .group()
692:                                            .substring(
693:                                                    0,
694:                                                    matcher.group().length() - 2);
695:                                    record
696:                                            .createPart(
697:                                                    SourceTitlePartStructure
698:                                                            .getInstance()
699:                                                            .getId(),
700:                                                    sourceTitle);
701:                                }
702:                            }
703:                        } else if (regexName.equals("pubMed")) {
704:                            // .+ (Volume: \\d+, )?Issue: ((\\d+)|(\\w+)), Date: \\d{4} \\d+ \\d+,( Pages: \\d+-\\d+)?
705:                            if (!hasVolume) {
706:                                pattern = Pattern.compile("Volume: \\d+");
707:                                matcher = pattern.matcher(citation);
708:                                if (matcher.find()) {
709:                                    record.createPart(VolumePartStructure
710:                                            .getInstance().getId(), matcher
711:                                            .group().replaceAll("\\D", ""));
712:                                }
713:                            }
714:
715:                            if (!hasIssue) {
716:                                pattern = Pattern
717:                                        .compile("Issue: ((\\d+)|(\\w+))");
718:                                matcher = pattern.matcher(citation);
719:                                if (matcher.find()) {
720:                                    String issue = matcher.group().substring(7,
721:                                            matcher.group().length());
722:                                    record.createPart(IssuePartStructure
723:                                            .getInstance().getId(), issue);
724:                                }
725:                            }
726:
727:                            if (!hasDate) {
728:                                pattern = Pattern
729:                                        .compile("Date: \\d{4} \\d+ \\d+");
730:                                matcher = pattern.matcher(citation);
731:                                if (matcher.find()) {
732:                                    String date = matcher.group().substring(6,
733:                                            matcher.group().length());
734:                                    date = date.replaceAll("\\s", "-");
735:                                    record.createPart(DatePartStructure
736:                                            .getInstance().getId(), date);
737:                                }
738:                            }
739:
740:                            if (!hasPages) {
741:                                pattern = Pattern.compile("\\d+-\\d+");
742:                                matcher = pattern.matcher(citation);
743:                                if (matcher.find()) {
744:                                    createPagesPart(matcher.group());
745:                                }
746:                            }
747:
748:                            if (!hasSourceTitle) {
749:                                pattern = Pattern.compile(".+\\. Vol");
750:                                matcher = pattern.matcher(citation);
751:                                if (matcher.find()) {
752:                                    String sourceTitle = matcher
753:                                            .group()
754:                                            .substring(
755:                                                    0,
756:                                                    matcher.group().length() - 5);
757:                                    record
758:                                            .createPart(
759:                                                    SourceTitlePartStructure
760:                                                            .getInstance()
761:                                                            .getId(),
762:                                                    sourceTitle);
763:                                }
764:                            }
765:                        } else if (regexName.equals("isiWos")) {
766:                            // ^\d+( \(\d+\))?: \w+-.+(.+)?( \w{3})?( \w{3}-\w{3})?( \d+)? \d{4}$
767:                            if (!hasVolume) {
768:                                pattern = Pattern.compile("^\\d+");
769:                                matcher = pattern.matcher(citation);
770:                                if (matcher.find()) {
771:                                    record.createPart(VolumePartStructure
772:                                            .getInstance().getId(), matcher
773:                                            .group());
774:                                }
775:                            }
776:
777:                            if (!hasIssue) {
778:                                pattern = Pattern.compile("\\(\\d+\\)");
779:                                matcher = pattern.matcher(citation);
780:                                if (matcher.find()) {
781:                                    record.createPart(IssuePartStructure
782:                                            .getInstance().getId(), matcher
783:                                            .group().replaceAll("\\D", ""));
784:                                }
785:                            }
786:
787:                            if (!hasDate) {
788:                                pattern = Pattern
789:                                        .compile("( \\w{3})?( \\w{3}-\\w{3})?( \\d+)? \\d{4}$");
790:                                matcher = pattern.matcher(citation);
791:                                if (matcher.find()) {
792:                                    record.createPart(DatePartStructure
793:                                            .getInstance().getId(), matcher
794:                                            .group().trim());
795:                                }
796:                            }
797:
798:                            if (!hasPages) {
799:                                pattern = Pattern.compile(" \\w+(-\\w+)?");
800:                                matcher = pattern.matcher(citation);
801:                                if (matcher.find()) {
802:                                    createPagesPart(matcher.group().trim());
803:                                }
804:                            }
805:                        } else if (regexName.equals("jstor")) {
806:                            // .+, Vol\. \d+(, No\. \d+)?
807:                            if (!hasVolume) {
808:                                pattern = Pattern.compile("Vol\\. \\d+");
809:                                matcher = pattern.matcher(citation);
810:                                if (matcher.find()) {
811:                                    record.createPart(VolumePartStructure
812:                                            .getInstance().getId(), matcher
813:                                            .group().replaceAll("\\D", ""));
814:                                }
815:                            }
816:
817:                            if (!hasIssue) {
818:                                pattern = Pattern.compile("No\\. \\d+");
819:                                matcher = pattern.matcher(citation);
820:                                if (matcher.find()) {
821:                                    record.createPart(IssuePartStructure
822:                                            .getInstance().getId(), matcher
823:                                            .group().replaceAll("\\D", ""));
824:                                }
825:                            }
826:
827:                            if (!hasSourceTitle) {
828:                                pattern = Pattern.compile(".+, Vol");
829:                                matcher = pattern.matcher(citation);
830:                                if (matcher.find()) {
831:                                    String sourceTitle = matcher
832:                                            .group()
833:                                            .substring(
834:                                                    0,
835:                                                    matcher.group().length() - 5);
836:                                    record
837:                                            .createPart(
838:                                                    SourceTitlePartStructure
839:                                                            .getInstance()
840:                                                            .getId(),
841:                                                    sourceTitle);
842:                                }
843:                            }
844:                        } else if (regexName.equals("eric")) {
845:                            // ^v\d+ n|v\d+ p\d+-\d+( \w{3})?( \w{3}-\w{3})?( \d+)? \d{4}$
846:                            if (!hasVolume) {
847:                                pattern = Pattern.compile("^v\\d+");
848:                                matcher = pattern.matcher(citation);
849:                                if (matcher.find()) {
850:                                    record.createPart(VolumePartStructure
851:                                            .getInstance().getId(), matcher
852:                                            .group().replaceAll("\\D", ""));
853:                                }
854:                            }
855:
856:                            if (!hasIssue) {
857:                                pattern = Pattern.compile(" (n|v)\\d+");
858:                                matcher = pattern.matcher(citation);
859:                                if (matcher.find()) {
860:                                    record.createPart(IssuePartStructure
861:                                            .getInstance().getId(), matcher
862:                                            .group().trim().replaceAll("\\D",
863:                                                    ""));
864:                                }
865:                            }
866:
867:                            if (!hasDate) {
868:                                pattern = Pattern
869:                                        .compile("( \\w{3})?( \\w{3}-\\w{3})?( \\d+)? \\d{4}$");
870:                                matcher = pattern.matcher(citation);
871:                                if (matcher.find()) {
872:                                    record.createPart(DatePartStructure
873:                                            .getInstance().getId(), matcher
874:                                            .group().trim());
875:                                }
876:                            }
877:
878:                            if (!hasPages) {
879:                                pattern = Pattern.compile("\\d+-\\d+");
880:                                matcher = pattern.matcher(citation);
881:                                if (matcher.find()) {
882:                                    createPagesPart(matcher.group());
883:                                }
884:                            }
885:                        } else if (regexName.equals("proquest")) {
886:                            // ^\d+; \d+(; .+)?
887:                            if (!hasVolume) {
888:                                pattern = Pattern.compile("^\\d+");
889:                                matcher = pattern.matcher(citation);
890:                                if (matcher.find()) {
891:                                    record.createPart(VolumePartStructure
892:                                            .getInstance().getId(), matcher
893:                                            .group());
894:                                }
895:                            }
896:
897:                            if (!hasIssue) {
898:                                pattern = Pattern.compile("; \\d+");
899:                                matcher = pattern.matcher(citation);
900:                                if (matcher.find()) {
901:                                    record.createPart(IssuePartStructure
902:                                            .getInstance().getId(), matcher
903:                                            .group().replaceAll("\\D", ""));
904:                                }
905:                            }
906:
907:                            if (!hasSourceTitle) {
908:                                pattern = Pattern.compile("; \\D+$");
909:                                matcher = pattern.matcher(citation);
910:                                if (matcher.find()) {
911:                                    record.createPart(SourceTitlePartStructure
912:                                            .getInstance().getId(), matcher
913:                                            .group().substring(2,
914:                                                    matcher.group().length()));
915:                                }
916:                            }
917:                        } else if (regexName.equals("psycInfo")) {
918:                            // ^Vol \d+\([\w\p{Punct}]+\))
919:                            if (!hasVolume) {
920:                                pattern = Pattern.compile("^Vol \\d+");
921:                                matcher = pattern.matcher(citation);
922:                                if (matcher.find()) {
923:                                    record.createPart(VolumePartStructure
924:                                            .getInstance().getId(), matcher
925:                                            .group().replaceAll("\\D", ""));
926:                                }
927:                            }
928:
929:                            if (!hasIssue) {
930:                                pattern = Pattern.compile("\\(.+\\)");
931:                                matcher = pattern.matcher(citation);
932:                                if (matcher.find()) {
933:                                    record
934:                                            .createPart(
935:                                                    IssuePartStructure
936:                                                            .getInstance()
937:                                                            .getId(),
938:                                                    matcher
939:                                                            .group()
940:                                                            .substring(
941:                                                                    1,
942:                                                                    matcher
943:                                                                            .group()
944:                                                                            .length() - 1));
945:                                }
946:                            }
947:                        }
948:                    } catch (org.osid.repository.RepositoryException re) {
949:                        LOG.warn("doRegexParse() failed getting "
950:                                + "PartStructure Types.", re);
951:                    }
952:                }
953:            }
954:
955:            private void createPagesPart(String text)
956:                    throws org.osid.repository.RepositoryException {
957:                if (text.charAt(0) == ',') {
958:                    // getting a poorly formatted field
959:                    return;
960:                }
961:
962:                record.createPart(PagesPartStructure.getInstance().getId(),
963:                        text);
964:
965:                // get start and end page if possible
966:                String[] pages = text.split("-");
967:
968:                if (pages.length == 0) {
969:                    // cannot create start/end page.
970:                    return;
971:                }
972:
973:                String spage = pages[0].trim();
974:
975:                // delete all non-digit chars (ie: p., pp., etc.)
976:                spage = spage.replaceAll("\\D", "");
977:
978:                // create startPage part
979:                record.createPart(StartPagePartStructure.getInstance().getId(),
980:                        spage);
981:
982:                // end page
983:                if (pages.length == 2) {
984:                    String epage = pages[1].trim();
985:                    epage = epage.replaceAll("\\D", "");
986:                    record.createPart(EndPagePartStructure.getInstance()
987:                            .getId(), epage);
988:                }
989:            }
990:
991:            private String getId() {
992:                return "asset" + Math.random() * 1000
993:                        + System.currentTimeMillis();
994:            }
995:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.