001: //=============================================================================
002: //=== Copyright (C) 2001-2007 Food and Agriculture Organization of the
003: //=== United Nations (FAO-UN), United Nations World Food Programme (WFP)
004: //=== and United Nations Environment Programme (UNEP)
005: //===
006: //=== This program is free software; you can redistribute it and/or modify
007: //=== it under the terms of the GNU General Public License as published by
008: //=== the Free Software Foundation; either version 2 of the License, or (at
009: //=== your option) any later version.
010: //===
011: //=== This program is distributed in the hope that it will be useful, but
012: //=== WITHOUT ANY WARRANTY; without even the implied warranty of
013: //=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: //=== General Public License for more details.
015: //===
016: //=== You should have received a copy of the GNU General Public License
017: //=== along with this program; if not, write to the Free Software
018: //=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
019: //===
020: //=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
021: //=== Rome - Italy. email: geonetwork@osgeo.org
022: //==============================================================================
023:
024: package org.fao.geonet.kernel.harvest.harvester.csw;
025:
026: import java.net.URL;
027: import java.util.ArrayList;
028: import java.util.HashSet;
029: import java.util.List;
030: import java.util.Set;
031: import jeeves.exceptions.BadParameterEx;
032: import jeeves.exceptions.OperationAbortedEx;
033: import jeeves.interfaces.Logger;
034: import jeeves.resources.dbms.Dbms;
035: import jeeves.server.context.ServiceContext;
036: import jeeves.utils.Xml;
037: import jeeves.utils.XmlRequest;
038: import org.fao.geonet.csw.common.Csw;
039: import org.fao.geonet.csw.common.Csw.ConstraintLanguage;
040: import org.fao.geonet.csw.common.Csw.ElementSetName;
041: import org.fao.geonet.csw.common.Csw.ResultType;
042: import org.fao.geonet.csw.common.exceptions.CatalogException;
043: import org.fao.geonet.csw.common.requests.CatalogRequest;
044: import org.fao.geonet.csw.common.requests.GetRecordsRequest;
045: import org.fao.geonet.csw.common.util.CswServer;
046: import org.fao.geonet.kernel.harvest.harvester.RecordInfo;
047: import org.fao.geonet.lib.Lib;
048: import org.jdom.Element;
049: import org.jdom.Namespace;
050:
051: //=============================================================================
052:
053: class Harvester {
054: //--------------------------------------------------------------------------
055: //---
056: //--- Constructor
057: //---
058: //--------------------------------------------------------------------------
059:
060: public Harvester(Logger log, ServiceContext context, Dbms dbms,
061: CswParams params) {
062: this .log = log;
063: this .context = context;
064: this .dbms = dbms;
065: this .params = params;
066: }
067:
068: //---------------------------------------------------------------------------
069: //---
070: //--- API methods
071: //---
072: //---------------------------------------------------------------------------
073:
074: public CswResult harvest() throws Exception {
075: log.info("Retrieving capabilities file for : " + params.name);
076:
077: CswServer server = retrieveCapabilities(log);
078:
079: //--- perform all searches
080:
081: Set<RecordInfo> records = new HashSet<RecordInfo>();
082:
083: for (Search s : params.getSearches())
084: records.addAll(search(server, s));
085:
086: if (params.isSearchEmpty())
087: records.addAll(search(server, Search.createEmptySearch()));
088:
089: log.info("Total records processed in all searches :"
090: + records.size());
091:
092: //--- align local node
093:
094: Aligner aligner = new Aligner(log, context, dbms, server,
095: params);
096:
097: return aligner.align(records);
098: }
099:
100: //---------------------------------------------------------------------------
101:
102: private CswServer retrieveCapabilities(Logger log) throws Exception {
103: if (!Lib.net.isUrlValid(params.capabUrl))
104: throw new BadParameterEx("Capabilities URL",
105: params.capabUrl);
106:
107: XmlRequest req = new XmlRequest(new URL(params.capabUrl));
108:
109: Lib.net.setupProxy(context, req);
110:
111: if (params.useAccount)
112: req.setCredentials(params.username, params.password);
113:
114: Element capabil = req.execute();
115:
116: log.debug("Capabilities:\n" + Xml.getString(capabil));
117:
118: if (capabil.getName().equals("ExceptionReport"))
119: CatalogException.unmarshal(capabil);
120:
121: CswServer server = new CswServer(capabil);
122:
123: if (!checkOperation(log, server, "GetRecords"))
124: throw new OperationAbortedEx(
125: "GetRecords operation not found");
126:
127: if (!checkOperation(log, server, "GetRecordById"))
128: throw new OperationAbortedEx(
129: "GetRecordById operation not found");
130:
131: return server;
132: }
133:
134: //---------------------------------------------------------------------------
135:
136: private boolean checkOperation(Logger log, CswServer server,
137: String name) {
138: CswServer.Operation oper = server.getOperation(name);
139:
140: if (oper == null) {
141: log.warning("Operation not present in capabilities : "
142: + name);
143: return false;
144: }
145:
146: if (oper.getUrl == null && oper.postUrl == null) {
147: log.warning("Operation has no GET and POST bindings : "
148: + name);
149: return false;
150: }
151:
152: return true;
153: }
154:
155: //---------------------------------------------------------------------------
156:
157: private Set<RecordInfo> search(CswServer server, Search s)
158: throws Exception {
159: int start = 1;
160: int max = 10;
161:
162: GetRecordsRequest request = new GetRecordsRequest();
163:
164: request.setResultType(ResultType.RESULTS);
165: request.setElementSetName(ElementSetName.SUMMARY);
166: request.setMaxRecords(max + "");
167:
168: CswServer.Operation oper = server
169: .getOperation(CswServer.GET_RECORDS);
170:
171: if (oper.postUrl != null) {
172: request.setUrl(oper.postUrl);
173: request.setConstraintLanguage(ConstraintLanguage.FILTER);
174: request.setConstraintLangVersion("1.1.0");
175: request.setConstraint(getFilterConstraint(s));
176: request.setMethod(CatalogRequest.Method.POST);
177: } else {
178: request.setUrl(oper.getUrl);
179: request.setConstraintLanguage(ConstraintLanguage.CQL);
180: request.setConstraintLangVersion("1.0");
181: request.setConstraint(getCqlConstraint(s));
182: request.setMethod(CatalogRequest.Method.GET);
183: }
184:
185: if (params.useAccount)
186: request.setCredentials(params.username, params.password);
187:
188: Set<RecordInfo> records = new HashSet<RecordInfo>();
189:
190: while (true) {
191: request.setStartPosition(start + "");
192:
193: Element response = doSearch(request, start, max);
194: Element results = response.getChild("SearchResults",
195: Csw.NAMESPACE_CSW);
196:
197: if (results == null)
198: throw new OperationAbortedEx("Missing 'SearchResults'",
199: response);
200:
201: List list = results.getChildren();
202:
203: int counter = 0;
204:
205: for (Object e : list) {
206: Element record = (Element) e;
207: RecordInfo recInfo = getRecordInfo(record);
208:
209: if (recInfo != null)
210: records.add(recInfo);
211:
212: counter++;
213: }
214:
215: //--- check to see if we have to perform other searches
216:
217: int recCount = getRecordCount(results);
218:
219: log.debug("Records declared in response : " + recCount);
220: log.debug("Records found in response : " + counter);
221:
222: if (start + max > recCount)
223: break;
224:
225: start += max;
226: }
227:
228: log.info("Records added to result list : " + records.size());
229:
230: return records;
231: }
232:
233: //---------------------------------------------------------------------------
234:
235: private String getFilterConstraint(Search s) {
236: //--- collect queriables
237:
238: ArrayList<Element> queriables = new ArrayList<Element>();
239:
240: buildFilterQueryable(queriables, "AnyText", s.freeText);
241: buildFilterQueryable(queriables, "dc:title", s.title);
242: buildFilterQueryable(queriables, "dct:abstract", s.abstrac);
243: buildFilterQueryable(queriables, "dc:subject", s.subject);
244:
245: //--- build filter expression
246:
247: if (queriables.isEmpty())
248: return null;
249:
250: Element filter = new Element("Filter", Csw.NAMESPACE_OGC);
251:
252: if (queriables.size() == 1)
253: filter.addContent(queriables.get(0));
254: else {
255: Element and = new Element("And", Csw.NAMESPACE_OGC);
256:
257: for (Element prop : queriables)
258: and.addContent(prop);
259:
260: filter.addContent(and);
261: }
262:
263: return Xml.getString(filter);
264: }
265:
266: //---------------------------------------------------------------------------
267:
268: private void buildFilterQueryable(List<Element> queryables,
269: String name, String value) {
270: if (value.length() == 0)
271: return;
272:
273: Element prop = new Element("PropertyIsEqualTo",
274: Csw.NAMESPACE_OGC);
275: Element propName = new Element("PropertyName",
276: Csw.NAMESPACE_OGC);
277: Element literal = new Element("Literal", Csw.NAMESPACE_OGC);
278:
279: propName.setText(name);
280: literal.setText(value);
281:
282: prop.addContent(propName);
283: prop.addContent(literal);
284:
285: queryables.add(prop);
286: }
287:
288: //---------------------------------------------------------------------------
289:
290: private String getCqlConstraint(Search s) {
291: //--- collect queriables
292:
293: ArrayList<String> queryables = new ArrayList<String>();
294:
295: buildCqlQueryable(queryables, "AnyText", s.freeText);
296: buildCqlQueryable(queryables, "dc:title", s.title);
297: buildCqlQueryable(queryables, "dct:abstract", s.abstrac);
298: buildCqlQueryable(queryables, "dc:subject", s.subject);
299:
300: //--- build CQL query
301:
302: StringBuffer sb = new StringBuffer();
303:
304: for (int i = 0; i < queryables.size(); i++) {
305: sb.append(queryables.get(i));
306:
307: if (i < queryables.size() - 1)
308: sb.append(" AND ");
309: }
310:
311: return (queryables.size() == 0) ? null : sb.toString();
312: }
313:
314: //---------------------------------------------------------------------------
315:
316: private void buildCqlQueryable(List<String> queryables,
317: String name, String value) {
318: if (value.length() != 0)
319: queryables.add("(" + name + " = " + value + ")");
320: }
321:
322: //---------------------------------------------------------------------------
323:
324: private Element doSearch(CatalogRequest request, int start, int max)
325: throws Exception {
326: try {
327: log.info("Searching on : " + params.name + " (" + start
328: + ".." + max + ")");
329: Element response = request.execute();
330: log.debug("Search results:\n" + Xml.getString(response));
331:
332: return response;
333: } catch (Exception e) {
334: log.warning("Raised exception when searching : " + e);
335: throw new OperationAbortedEx(
336: "Raised exception when searching", e);
337: }
338: }
339:
340: //---------------------------------------------------------------------------
341:
342: private int getRecordCount(Element results)
343: throws OperationAbortedEx {
344: String numRec = results
345: .getAttributeValue("numberOfRecordsMatched");
346:
347: if (numRec == null)
348: throw new OperationAbortedEx(
349: "Missing 'numberOfRecordsMatched' in 'SearchResults'");
350:
351: if (!Lib.type.isInteger(numRec))
352: throw new OperationAbortedEx(
353: "Bad value for 'numberOfRecordsMatched'", numRec);
354:
355: return Integer.parseInt(numRec);
356: }
357:
358: //---------------------------------------------------------------------------
359:
360: private RecordInfo getRecordInfo(Element record) {
361: String name = record.getName();
362:
363: if (!name.equals("SummaryRecord")) {
364: log
365: .warning("Skipped record not in 'SummaryRecord' format : "
366: + name);
367: return null;
368: }
369:
370: Namespace dc = Namespace
371: .getNamespace("http://purl.org/dc/elements/1.1/");
372: Namespace dct = Namespace
373: .getNamespace("http://purl.org/dc/terms/");
374:
375: String identif = record.getChildText("identifier", dc);
376: String modified = record.getChildText("modified", dct);
377:
378: if (identif == null) {
379: log
380: .warning("Skipped record with no 'dc:identifier' element : "
381: + name);
382: return null;
383: }
384:
385: return new RecordInfo(identif, modified);
386: }
387:
388: //---------------------------------------------------------------------------
389: //---
390: //--- Variables
391: //---
392: //---------------------------------------------------------------------------
393:
394: private Logger log;
395: private Dbms dbms;
396: private CswParams params;
397: private ServiceContext context;
398: }
399:
400: //=============================================================================
|