001: package edu.indiana.lib.osid.base.repository.http;
002:
003: import java.util.regex.Matcher;
004: import java.util.regex.Pattern;
005:
006: public class DataSource {
007:
008: public String dataSource;
009: private boolean dataSourceFound;
010: private String citationString = "";
011: private String volumeToken = "";
012: private String issueToken = "";
013: private String dateToken = "";
014: private String yearToken = "";
015: private String pagesToken = "";
016: private String sourceTitleToken = "";
017: private String regularExp = "";
018:
019: private int replaceStartToken = 1;
020: private int replaceEndToken = 1;
021:
022: private static final String JSTOR = "jstor";
023: private static final String JSTOR_REG_EXP = "(.+,)?Vol\\. \\d+, No\\. (\\d+)(/\\d+)?(, .*)? \\((.*)?\\d{4}\\), (pp\\.|p\\.) \\d+(-\\d+)?";
024:
025: private static final String PsycINFO = "PsycINFO";
026: private static final String PsycINFO_REG_EXP = "(.+)?( Vol \\d+\\(\\d+\\),)?( \\(Vol\\. \\d+\\).)?((\\(\\d{4}\\)|\\w{3} \\d{4}))?(.|,)? (pp\\. \\d+-\\d+|\\d+ pp\\.|\\(pp\\. \\d+-\\d+\\).) (.+)?";
027:
028: private static final String EBSCOERIC = "ERIC (EBSCO)";
029: private static final String EBSCOERIC_REG_EXP = ".+, v\\d+ n\\d+ p\\d+(-\\d+)? (.+)? \\d{4} \\(.+\\)";
030:
031: private static final String Blackwell = "Blackwell Publishing";
032: private static final String BLACKWELL_REG_EXP = "Volume \\d+(, Issue \\d+)?(, .*)?, Page \\d+-\\d+, (.*)?\\d{4}";
033:
034: private static final String PUBMED = "PubMed";
035: private static final String PUBMED_REG_EXP = "(.+)?. \\d{4}(.+)?;\\d+\\(\\d+( .+)?\\):\\d+-\\d+.";
036:
037: private static final String ProjectMuse = "Project Muse";
038: private static final String ProjectMuse_REG_EXP = ".+,( Volume)? \\d+, Number \\d+,( .+)? \\d{4}, pp. \\d+-\\d+";
039:
040: private static final String ComputerMusicJournal = "Academic Search (EBSCO)";
041: private static final String ComputerMusicJournal_REG_EXP = ".+, \\w{3}\\d{4}, Vol\\. \\d+ Issue \\d+(/\\d+)?, p\\d+-\\d+,.+";
042:
043: private static final String ScienceDirect = "Science Direct";
044: private static final String ScienceDirect_REG_EXP = ".+, Volume \\d+, Issue \\d+,( \\d+)? (.+)? \\d{4}, Pages \\d+-\\d+";
045:
046: private static final String CSAIlluminaERIC = "ERIC (CSA)";
047: private static final String CSAIlluminaERIC_REG_EXP = ".+; v\\d+ n\\d+ p\\d+(-\\d+)? (.+)? \\d{4}";
048:
049: private static final String ISIZoologicalRecord = "Zoological Record";
050: private static final String ISIZoologicalRecord_REG_EXP = "(.+)?( \\d+ \\(.+\\) :)? \\d+-\\d+ (: (.+) )?\\d{4}";
051:
052: private static final String OvidBooks = "Ovid Books";
053: private static final String OvidBooks_REG_EXP = "(.+)? \\(\\d+(.+)?\\)";
054:
055: private static final String Factiva = "Factiva";
056: private static final String Factiva_REG_EXP = "(.+)?, \\d+ (.+)? \\d{4}, (.+)?";
057:
058: private static final String LexisNexisAcademic = "Lexis-Nexis Academic";
059: private static final String LexisNexisAcademic_REG_EXP = "(.+, )?(.+)?\\w+ \\d+, \\d{4}( .+)?,(.+)?((.+)?Pg\\.(.+)?(\\w+)?\\d+,)?(.+)?";
060:
061: private static final String FirstSearchWorldCat = "WorldCat";
062: private static final String FirstSearchWorldCat_REG_EXP = "(.+)?";
063:
064: public DataSource(String dataSourceCode, String citation) {
065: this .dataSource = dataSourceCode;
066: dataSourceFound = true;
067:
068: if (this .isJSTOR()) {
069: initJstor(citation);
070:
071: } else if (this .isPsycINFO()) {
072: initPsycINFO(citation);
073:
074: } else if (this .isEBSCOERIC()) {
075: initEBSCOERIC(citation);
076:
077: } else if (this .isBlackwell()) {
078: initBlackwell(citation);
079:
080: } else if (this .isPubMed()) {
081: initPubMed(citation);
082:
083: } else if (this .isProjectMuse()) {
084: initProjectMuse(citation);
085:
086: } else if (this .isComputerMusicJournal()) {
087: initComputerMusicJournal(citation);
088:
089: } else if (this .isScienceDirect()) {
090: initScienceDirect(citation);
091:
092: } else if (this .isCSAIlluminaERIC()) {
093: initCSAIlluminaERIC(citation);
094:
095: } else if (this .isISIZoologicalRecord()) {
096: initISIZoologicalRecord(citation);
097:
098: } else if (this .isFirstSearchWorldCat()) {
099: initFirstSearchWorldCat(citation);
100:
101: } else if (this .isOvidBooks()) {
102: initOvidBooks(citation);
103:
104: } else if (this .isFactiva()) {
105: initFactiva(citation);
106:
107: } else if (this .isLexisNexisAcademic()) {
108: initLexisNexisAcademic(citation);
109:
110: } else {
111: dataSourceFound = false;
112: }
113: }
114:
115: private void initJstor(String citation) {
116: this .setCitationString(citation);
117: // (.+,)?Vol\\. \\d+, No\\. (\\d+)(/\\d+)?(, .*)? \\((.*)?\\d{4}\\), (pp\\.|p\\.) \\d+(-\\d+)?
118: this .setRegularExp(DataSource.JSTOR_REG_EXP);
119: this .setVolumeToken("Vol\\. \\d+");
120: this .setIssueToken("No\\. (\\d+)(/\\d+)?");
121: this .setDateToken("\\((.*)?\\d{4}\\)");
122: this .setYearToken("\\s\\d{4}\\)");
123: this .setPagesToken("(pp\\.|p\\.) \\d+(-\\d+)?");
124: }
125:
126: private void initBlackwell(String citation) {
127: this .setCitationString(citation);
128: // Volume \\d+(, Issue \\d+)?(, .*)?, Page \\d+-\\d+, (.*)?\\d{4}
129: this .setRegularExp(DataSource.BLACKWELL_REG_EXP);
130: this .setVolumeToken("Volume \\d+");
131: this .setIssueToken("Issue \\d+");
132: this .setDateToken("(.*)?\\d{4}");
133: this .setPagesToken("Page \\d+-\\d+");
134:
135: }
136:
137: private void initPsycINFO(String citation) {
138: this .setCitationString(citation);
139: // (.+)?( Vol \\d+\\(\\d+\\),)?( \\(Vol\\.
140: // \\d+\\).)?((\\(\\d{4}\\)|\\w{3} \\d{4}))?(.|,)? (pp\\. \\d+-\\d+|\\d+
141: // pp\\.|\\(pp\\. \\d+-\\d+\\).) (.+)?
142: this .setRegularExp(DataSource.PsycINFO_REG_EXP);
143: this .setVolumeToken("Vol \\d+");
144: this .setIssueToken("\\(\\d+\\)");
145: this .setDateToken("\\d{4}(\\(\\d{4}\\)|\\w{3} \\d{4})");
146: this .setPagesToken("\\d+-\\d+");
147: }
148:
149: private void initEBSCOERIC(String citation) {
150: this .setCitationString(citation);
151: // .+, v\\d+ n\\d+ p\\d+(-\\d+)? (.+)? \\d{4} \\(.+\\)
152: this .setRegularExp(DataSource.EBSCOERIC_REG_EXP);
153: this .setVolumeToken("v\\d+");
154: this .setIssueToken("n\\d+");
155: this .setDateToken("(.+)? \\d{4}");
156: this .setPagesToken("p\\d+(-\\d+)?");
157:
158: }
159:
160: private void initPubMed(String citation) {
161: this .setCitationString(citation);
162: // (.+)?. \\d{4}(.+)?;\\d+\\(\\d+( .+)?\\):\\d+-\\d+.
163: this .setRegularExp(DataSource.PUBMED_REG_EXP);
164: this .setVolumeToken("\\d+\\");
165: this .setIssueToken("(\\d+( .+)?\\)");
166: this .setDateToken("\\d{4}(.+)?");
167: this .setPagesToken("\\d+-\\d+.");
168: }
169:
170: private void initProjectMuse(String citation) {
171: this .setCitationString(citation);
172: // .+,( Volume)? \\d+, Number \\d+,( .+)? \\d{4}, pp. \\d+-\\d+
173: this .setRegularExp(DataSource.ProjectMuse_REG_EXP);
174: this .setVolumeToken("( Volume)? \\d+");
175: this .setIssueToken("Number \\d+");
176: this .setDateToken("( .+)? \\d{4}");
177: this .setPagesToken("pp. \\d+-\\d+");
178: }
179:
180: private void initComputerMusicJournal(String citation) {
181: this .setCitationString(citation);
182: // .+, \\w{3}\\d{4}, Vol\\. \\d+ Issue \\d+(/\\d+)?, p\\d+-\\d+,.+
183: this .setRegularExp(DataSource.ComputerMusicJournal_REG_EXP);
184: this .setVolumeToken("Vol\\. \\d+");
185: this .setIssueToken("Issue \\d+(/\\d+)?");
186: this .setDateToken("\\w{3}\\d{4}");
187: this .setPagesToken("p\\d+-\\d+");
188: }
189:
190: private void initScienceDirect(String citation) {
191: this .setCitationString(citation);
192: // .+, Volume \\d+, Issue \\d+,( \\d+)? (.+)? \\d{4}, Pages \\d+-\\d+
193: this .setRegularExp(DataSource.ScienceDirect_REG_EXP);
194: this .setVolumeToken("Volume \\d+");
195: this .setIssueToken("Issue \\d+");
196: this .setDateToken("( \\d+)? (.+)? \\d{4}");
197: this .setPagesToken("Pages \\d+-\\d+");
198: }
199:
200: private void initCSAIlluminaERIC(String citation) {
201: this .setCitationString(citation);
202: // .+; v\\d+ n\\d+ p\\d+(-\\d+)? (.+)? \\d{4}
203: this .setRegularExp(DataSource.CSAIlluminaERIC_REG_EXP);
204: this .setVolumeToken("v\\d+");
205: this .setIssueToken("n\\d+");
206: this .setDateToken("(.+)? \\d{4}");
207: this .setPagesToken("p\\d+(-\\d+)?");
208: }
209:
210: private void initISIZoologicalRecord(String citation) {
211: this .setCitationString(citation);
212: // (.+)?( \\d+ \\(.+\\) :)? \\d+-\\d+ (: (.+) )?\\d{4}
213: this .setRegularExp(DataSource.ISIZoologicalRecord_REG_EXP);
214: this .setVolumeToken("\\d+");
215: this .setIssueToken("\\(.+\\)");
216: this .setDateToken("\\d{4}");
217: this .setPagesToken("\\d+-\\d+");
218: }
219:
220: private void initFirstSearchWorldCat(String citation) {
221: this .setCitationString(citation);
222: // (.+)?
223: this .setRegularExp(DataSource.FirstSearchWorldCat_REG_EXP);
224: this .setSourceTitleToken("(.+)?");
225: this .setDateToken("\\d{4}");
226: this .setPagesToken("\\d+-\\d+");
227: }
228:
229: private void initOvidBooks(String citation) {
230: this .setCitationString(citation);
231: // (.+)? \\(\\d+(.+)?\\)
232: this .setRegularExp(DataSource.OvidBooks_REG_EXP);
233: this .setVolumeToken("");
234: this .setIssueToken("");
235: this .setDateToken("\\d{4}");
236: this .setPagesToken("\\d+-\\d+");
237: }
238:
239: private void initFactiva(String citation) {
240: this .setCitationString(citation);
241: // (.+)?,( \\d+)? (.+)? \\d{4}, (.+)?
242: this .setRegularExp(DataSource.Factiva_REG_EXP);
243: this .setDateToken("\\d+ (.+)? \\d{4}");
244: this .setPagesToken("\\d+-\\d+");
245: this .setReplaceStartToken(0);
246: this .setReplaceEndToken(0);
247:
248: }
249:
250: private void initLexisNexisAcademic(String citation) {
251:
252: this .setCitationString(citation);
253: //(.+, )?(.+)?\\w+ \\d+, \\d{4}( .+)?,(.+)?( Pg\\. (\\w)?\\d+,)?(.+)?
254:
255: this .setRegularExp(DataSource.LexisNexisAcademic_REG_EXP);
256: this
257: .setSourceTitleToken("(.+, )?(.+)?\\w+ \\d+, \\d{4}( .+)?,");
258: this .setDateToken("\\w+ \\d+, \\d{4}?");
259: this .setReplaceStartToken(0);
260: this .setReplaceEndToken(0);
261: //this.setPagesToken("Pg\\. (\\w)?\\d+");
262: this .setPagesToken("Pg\\.(.+)?(\\w+)?\\d+,");
263:
264: }
265:
266: public boolean findRegExp() {
267: boolean found = false;
268: Pattern pattern;
269: Matcher matcher;
270:
271: if (!dataSourceFound) {
272: return false;
273: }
274:
275: pattern = Pattern.compile(this .getRegularExp());
276: matcher = pattern.matcher(this .getCitationString());
277:
278: if (matcher.find()) {
279: found = true;
280: }
281:
282: return found;
283: }
284:
285: private boolean isJSTOR() {
286: return this .dataSource.equalsIgnoreCase(JSTOR);
287: }
288:
289: private boolean isPsycINFO() {
290: return this .dataSource.equalsIgnoreCase(PsycINFO);
291: }
292:
293: private boolean isEBSCOERIC() {
294:
295: return this .dataSource.equalsIgnoreCase(EBSCOERIC);
296: }
297:
298: private boolean isBlackwell() {
299:
300: return this .dataSource.equalsIgnoreCase(Blackwell);
301: }
302:
303: private boolean isPubMed() {
304:
305: return this .dataSource.equalsIgnoreCase(PUBMED);
306: }
307:
308: private boolean isProjectMuse() {
309:
310: return this .dataSource.equalsIgnoreCase(ProjectMuse);
311: }
312:
313: private boolean isComputerMusicJournal() {
314:
315: return this .dataSource.equalsIgnoreCase(ComputerMusicJournal);
316: }
317:
318: private boolean isScienceDirect() {
319:
320: return this .dataSource.equalsIgnoreCase(ScienceDirect);
321: }
322:
323: private boolean isCSAIlluminaERIC() {
324:
325: return this .dataSource.equalsIgnoreCase(CSAIlluminaERIC);
326: }
327:
328: private boolean isISIZoologicalRecord() {
329:
330: return this .dataSource.equalsIgnoreCase(ISIZoologicalRecord);
331: }
332:
333: private boolean isFirstSearchWorldCat() {
334:
335: return this .dataSource.equalsIgnoreCase(FirstSearchWorldCat);
336: }
337:
338: private boolean isOvidBooks() {
339:
340: return this .dataSource.equalsIgnoreCase(OvidBooks);
341: }
342:
343: private boolean isFactiva() {
344:
345: return this .dataSource.equalsIgnoreCase(Factiva);
346: }
347:
348: private boolean isLexisNexisAcademic() {
349:
350: return this .dataSource.equalsIgnoreCase(LexisNexisAcademic);
351: }
352:
353: public String getDataSource() {
354: return dataSource;
355: }
356:
357: public void setDataSource(String dataSource) {
358: this .dataSource = dataSource;
359: }
360:
361: public String getDateToken() {
362: return dateToken;
363: }
364:
365: public void setDateToken(String dateToken) {
366: this .dateToken = dateToken;
367: }
368:
369: public String getYearToken() {
370: return yearToken;
371: }
372:
373: public void setYearToken(String yearToken) {
374: this .yearToken = yearToken;
375: }
376:
377: public String getIssueToken() {
378: return issueToken;
379: }
380:
381: public void setIssueToken(String issueToken) {
382: this .issueToken = issueToken;
383: }
384:
385: public String getPagesToken() {
386: return pagesToken;
387: }
388:
389: public void setPagesToken(String pagesToken) {
390: this .pagesToken = pagesToken;
391: }
392:
393: public String getSourceTitleToken() {
394: return sourceTitleToken;
395: }
396:
397: public void setSourceTitleToken(String sourceTitleToken) {
398: this .sourceTitleToken = sourceTitleToken;
399: }
400:
401: public String getVolumeToken() {
402: return volumeToken;
403: }
404:
405: public void setVolumeToken(String volumeToken) {
406: this .volumeToken = volumeToken;
407: }
408:
409: public String getCitationString() {
410: return citationString;
411: }
412:
413: public void setCitationString(String citationRegExp) {
414: this .citationString = citationRegExp;
415: }
416:
417: public String getRegularExp() {
418: return regularExp;
419: }
420:
421: public void setRegularExp(String regularExp) {
422: this .regularExp = regularExp;
423: }
424:
425: public int getReplaceEndToken() {
426: return replaceEndToken;
427: }
428:
429: public void setReplaceEndToken(int replaceEndToken) {
430: this .replaceEndToken = replaceEndToken;
431: }
432:
433: public int getReplaceStartToken() {
434: return replaceStartToken;
435: }
436:
437: public void setReplaceStartToken(int replaceStartToken) {
438: this.replaceStartToken = replaceStartToken;
439: }
440:
441: }
|