001: //=============================================================================
002: //=== Copyright (C) 2001-2007 Food and Agriculture Organization of the
003: //=== United Nations (FAO-UN), United Nations World Food Programme (WFP)
004: //=== and United Nations Environment Programme (UNEP)
005: //===
006: //=== This program is free software; you can redistribute it and/or modify
007: //=== it under the terms of the GNU General Public License as published by
008: //=== the Free Software Foundation; either version 2 of the License, or (at
009: //=== your option) any later version.
010: //===
011: //=== This program is distributed in the hope that it will be useful, but
012: //=== WITHOUT ANY WARRANTY; without even the implied warranty of
013: //=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: //=== General Public License for more details.
015: //===
016: //=== You should have received a copy of the GNU General Public License
017: //=== along with this program; if not, write to the Free Software
018: //=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
019: //===
020: //=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
021: //=== Rome - Italy. email: geonetwork@osgeo.org
022: //==============================================================================
023:
024: package org.fao.geonet.kernel.harvest.harvester.oaipmh;
025:
026: import java.io.File;
027: import java.net.URL;
028: import java.util.HashSet;
029: import java.util.Set;
030: import jeeves.exceptions.OperationAbortedEx;
031: import jeeves.interfaces.Logger;
032: import jeeves.resources.dbms.Dbms;
033: import jeeves.server.context.ServiceContext;
034: import jeeves.utils.Xml;
035: import org.fao.geonet.GeonetContext;
036: import org.fao.geonet.constants.Geonet;
037: import org.fao.geonet.kernel.DataManager;
038: import org.fao.geonet.kernel.harvest.harvester.CategoryMapper;
039: import org.fao.geonet.kernel.harvest.harvester.GroupMapper;
040: import org.fao.geonet.kernel.harvest.harvester.Privileges;
041: import org.fao.geonet.kernel.harvest.harvester.UUIDMapper;
042: import org.fao.geonet.util.ISODate;
043: import org.fao.oaipmh.OaiPmh;
044: import org.fao.oaipmh.exceptions.NoRecordsMatchException;
045: import org.fao.oaipmh.requests.GetRecordRequest;
046: import org.fao.oaipmh.requests.ListIdentifiersRequest;
047: import org.fao.oaipmh.requests.Transport;
048: import org.fao.oaipmh.responses.GetRecordResponse;
049: import org.fao.oaipmh.responses.Header;
050: import org.fao.oaipmh.responses.ListIdentifiersResponse;
051: import org.jdom.Element;
052: import org.jdom.JDOMException;
053:
054: //=============================================================================
055:
056: class Harvester {
057: //--------------------------------------------------------------------------
058: //---
059: //--- Constructor
060: //---
061: //--------------------------------------------------------------------------
062:
063: public Harvester(Logger log, ServiceContext context, Dbms dbms,
064: OaiPmhParams params) {
065: this .log = log;
066: this .context = context;
067: this .dbms = dbms;
068: this .params = params;
069:
070: result = new OaiPmhResult();
071:
072: GeonetContext gc = (GeonetContext) context
073: .getHandlerContext(Geonet.CONTEXT_NAME);
074: dataMan = gc.getDataManager();
075: }
076:
077: //---------------------------------------------------------------------------
078: //---
079: //--- API methods
080: //---
081: //---------------------------------------------------------------------------
082:
083: public OaiPmhResult harvest() throws Exception {
084: ListIdentifiersRequest req = new ListIdentifiersRequest();
085: req.setValidationSchema(new File(context.getAppPath()
086: + Geonet.SchemaPath.OAI_PMH));
087:
088: Transport t = req.getTransport();
089: t.setUrl(new URL(params.url));
090:
091: if (params.useAccount)
092: t.setCredentials(params.username, params.password);
093:
094: //--- perform all searches
095:
096: Set<RecordInfo> records = new HashSet<RecordInfo>();
097:
098: for (Search s : params.getSearches())
099: records.addAll(search(req, s));
100:
101: if (params.isSearchEmpty())
102: records.addAll(search(req, Search.createEmptySearch()));
103:
104: log.info("Total records processed in all searches :"
105: + records.size());
106:
107: //--- align local node
108:
109: if (records.size() != 0)
110: align(t, records);
111:
112: return result;
113: }
114:
115: //---------------------------------------------------------------------------
116: //---
117: //--- Private methods
118: //---
119: //---------------------------------------------------------------------------
120:
121: private Set<RecordInfo> search(ListIdentifiersRequest req, Search s)
122: throws OperationAbortedEx {
123: //--- setup search parameters
124:
125: if (s.from.length() != 0)
126: req.setFrom(new ISODate(s.from));
127: else
128: req.setFrom(null);
129:
130: if (s.until.length() != 0)
131: req.setUntil(new ISODate(s.from));
132: else
133: req.setUntil(null);
134:
135: if (s.set.length() != 0)
136: req.setSet(s.set);
137: else
138: req.setSet(null);
139:
140: req.setMetadataPrefix(s.prefix);
141:
142: //--- execute request and loop on response
143:
144: Set<RecordInfo> records = new HashSet<RecordInfo>();
145:
146: log.info("Searching on : " + params.name);
147:
148: try {
149: ListIdentifiersResponse response = req.execute();
150:
151: while (response.hasNext()) {
152: Header h = response.next();
153:
154: if (!h.isDeleted())
155: records.add(new RecordInfo(h, s.prefix));
156: }
157:
158: log
159: .info("Records added to result list : "
160: + records.size());
161:
162: return records;
163: } catch (NoRecordsMatchException e) {
164: //--- return gracefully
165: return records;
166: }
167:
168: catch (Exception e) {
169: log.warning("Raised exception when searching : " + e);
170: throw new OperationAbortedEx(
171: "Raised exception when searching", e);
172: }
173: }
174:
175: //---------------------------------------------------------------------------
176:
177: private void align(Transport t, Set<RecordInfo> records)
178: throws Exception {
179: log.info("Start of alignment for : " + params.name);
180:
181: //-----------------------------------------------------------------------
182: //--- retrieve all local categories and groups
183: //--- retrieve harvested uuids for given harvesting node
184:
185: localCateg = new CategoryMapper(dbms);
186: localGroups = new GroupMapper(dbms);
187: localUuids = new UUIDMapper(dbms, params.uuid);
188: dbms.commit();
189:
190: //-----------------------------------------------------------------------
191: //--- remove old metadata
192:
193: for (String uuid : localUuids.getUUIDs())
194: if (!exists(records, uuid)) {
195: String id = localUuids.getID(uuid);
196:
197: log.debug(" - Removing old metadata with local id:"
198: + id);
199: dataMan.deleteMetadata(dbms, id);
200: dbms.commit();
201: result.locallyRemoved++;
202: }
203:
204: //-----------------------------------------------------------------------
205: //--- insert/update new metadata
206:
207: for (RecordInfo ri : records) {
208: result.total++;
209:
210: String id = localUuids.getID(ri.id);
211:
212: if (id == null)
213: addMetadata(t, ri);
214: else
215: updateMetadata(t, ri, id);
216: }
217:
218: log.info("End of alignment for : " + params.name);
219: }
220:
221: //--------------------------------------------------------------------------
222: /** Return true if the uuid is present in the remote records */
223:
224: private boolean exists(Set<RecordInfo> records, String uuid) {
225: for (RecordInfo ri : records)
226: if (uuid.equals(ri.id))
227: return true;
228:
229: return false;
230: }
231:
232: //--------------------------------------------------------------------------
233: //---
234: //--- Private methods : addMetadata
235: //---
236: //--------------------------------------------------------------------------
237:
238: private void addMetadata(Transport t, RecordInfo ri)
239: throws Exception {
240: Element md = retrieveMetadata(t, ri);
241:
242: if (md == null)
243: return;
244:
245: //--- schema handled check already done
246:
247: String schema = dataMan.autodetectSchema(md);
248:
249: log.debug(" - Adding metadata with remote id : " + ri.id);
250:
251: String id = dataMan.insertMetadataExt(dbms, schema, md, context
252: .getSerialFactory(), params.uuid, ri.changeDate
253: .toString(), ri.changeDate.toString(), ri.id, 1, null);
254:
255: int iId = Integer.parseInt(id);
256:
257: dataMan.setTemplate(dbms, iId, "n", null);
258: dataMan.setHarvested(dbms, iId, params.uuid, null);
259:
260: addPrivileges(id);
261: addCategories(id);
262:
263: dbms.commit();
264: dataMan.indexMetadata(dbms, id);
265: result.added++;
266: }
267:
268: //--------------------------------------------------------------------------
269:
270: private Element retrieveMetadata(Transport t, RecordInfo ri) {
271: try {
272: log.debug(" - Getting remote metadata with id : " + ri.id);
273:
274: GetRecordRequest req = new GetRecordRequest();
275: req.setValidationSchema(new File(context.getAppPath()
276: + Geonet.SchemaPath.OAI_PMH));
277: req.setTransport(t);
278: req.setIdentifier(ri.id);
279: req.setMetadataPrefix(ri.prefix);
280:
281: GetRecordResponse res = req.execute();
282:
283: Element md = res.getRecord().getMetadata();
284:
285: log.debug(" - Record got:\n" + Xml.getString(md));
286:
287: if (isOaiDc(md)) {
288: log.debug(" - Converting oai_dc to dublin core");
289: md = toDublinCore(md);
290:
291: if (md == null)
292: return null;
293: }
294:
295: String schema = dataMan.autodetectSchema(md);
296:
297: if (schema == null) {
298: log
299: .warning("Skipping metadata with unknown schema. Remote id : "
300: + ri.id);
301: result.unknownSchema++;
302: } else {
303: if (!params.validate || validates(schema, md))
304: return (Element) md.detach();
305:
306: log
307: .warning("Skipping metadata that does not validate. Remote id : "
308: + ri.id);
309: result.doesNotValidate++;
310: }
311: }
312:
313: catch (JDOMException e) {
314: log
315: .warning("Skipping metadata with bad XML format. Remote id : "
316: + ri.id);
317: result.badFormat++;
318: }
319:
320: catch (Exception e) {
321: log
322: .warning("Raised exception while getting metadata file : "
323: + e);
324: result.unretrievable++;
325: }
326:
327: //--- we don't raise any exception here. Just try to go on
328: return null;
329: }
330:
331: //--------------------------------------------------------------------------
332:
333: private boolean isOaiDc(Element md) {
334: return (md.getName().equals("dc"))
335: && (md.getNamespace().equals(OaiPmh.Namespaces.OAI_DC));
336: }
337:
338: //--------------------------------------------------------------------------
339:
340: private Element toDublinCore(Element md) {
341: String styleSheet = context.getAppPath()
342: + "conversion/oai_dc-to-dublin-core/main.xsl";
343:
344: try {
345: return Xml.transform(md, styleSheet);
346: } catch (Exception e) {
347: log.warning("Cannot convert oai_dc to dublin core : " + e);
348: return null;
349: }
350: }
351:
352: //--------------------------------------------------------------------------
353:
354: private boolean validates(String schema, Element md) {
355: try {
356: dataMan.validate(schema, md);
357: return true;
358: } catch (Exception e) {
359: return false;
360: }
361: }
362:
363: //--------------------------------------------------------------------------
364: //--- Categories
365: //--------------------------------------------------------------------------
366:
367: private void addCategories(String id) throws Exception {
368: for (String catId : params.getCategories()) {
369: String name = localCateg.getName(catId);
370:
371: if (name == null)
372: log.debug(" - Skipping removed category with id:"
373: + catId);
374: else {
375: log.debug(" - Setting category : " + name);
376: dataMan.setCategory(dbms, id, catId);
377: }
378: }
379: }
380:
381: //--------------------------------------------------------------------------
382: //--- Privileges
383: //--------------------------------------------------------------------------
384:
385: private void addPrivileges(String id) throws Exception {
386: for (Privileges priv : params.getPrivileges()) {
387: String name = localGroups.getName(priv.getGroupId());
388:
389: if (name == null)
390: log.debug(" - Skipping removed group with id:"
391: + priv.getGroupId());
392: else {
393: log.debug(" - Setting privileges for group : "
394: + name);
395:
396: for (int opId : priv.getOperations()) {
397: name = dataMan.getAccessManager().getPrivilegeName(
398: opId);
399:
400: //--- allow only: view, dynamic, featured
401: if (opId == 0 || opId == 5 || opId == 6) {
402: log.debug(" --> " + name);
403: dataMan.setOperation(dbms, id, priv
404: .getGroupId(), opId + "");
405: } else
406: log.debug(" --> " + name + " (skipped)");
407: }
408: }
409: }
410: }
411:
412: //--------------------------------------------------------------------------
413: //---
414: //--- Private methods : updateMetadata
415: //---
416: //--------------------------------------------------------------------------
417:
418: private void updateMetadata(Transport t, RecordInfo ri, String id)
419: throws Exception {
420: String date = localUuids.getChangeDate(ri.id);
421:
422: if (!ri.isMoreRecentThan(date)) {
423: log.debug(" - Metadata XML not changed for remote id : "
424: + ri.id);
425: result.unchanged++;
426: } else {
427: log.debug(" - Updating local metadata for remote id : "
428: + ri.id);
429:
430: Element md = retrieveMetadata(t, ri);
431:
432: if (md == null)
433: return;
434:
435: dataMan.updateMetadataExt(dbms, id, md, ri.changeDate
436: .toString());
437:
438: //--- the administrator could change privileges and categories using the
439: //--- web interface so we have to re-set both
440:
441: dbms.execute(
442: "DELETE FROM OperationAllowed WHERE metadataId=?",
443: Integer.parseInt(id));
444: addPrivileges(id);
445:
446: dbms.execute(
447: "DELETE FROM MetadataCateg WHERE metadataId=?",
448: Integer.parseInt(id));
449: addCategories(id);
450:
451: dbms.commit();
452: dataMan.indexMetadata(dbms, id);
453: result.updated++;
454: }
455: }
456:
457: //---------------------------------------------------------------------------
458: //---
459: //--- Variables
460: //---
461: //---------------------------------------------------------------------------
462:
463: private Logger log;
464: private ServiceContext context;
465: private Dbms dbms;
466: private OaiPmhParams params;
467: private DataManager dataMan;
468: private CategoryMapper localCateg;
469: private GroupMapper localGroups;
470: private UUIDMapper localUuids;
471: private OaiPmhResult result;
472: }
473:
474: //=============================================================================
|