001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.request;
017:
018: import org.apache.solr.util.NamedList;
019: import org.apache.solr.util.XML;
020: import org.apache.solr.search.SolrIndexSearcher;
021: import org.apache.solr.search.DocList;
022: import org.apache.solr.search.DocIterator;
023: import org.apache.solr.search.DocSet;
024: import org.apache.solr.schema.IndexSchema;
025: import org.apache.solr.schema.SchemaField;
026: import org.apache.solr.schema.TextField;
027:
028: import java.io.Writer;
029: import java.io.IOException;
030: import java.util.*;
031:
032: import org.apache.lucene.document.Fieldable;
033: import org.apache.lucene.document.Document;
034:
035: /**
036: * @author yonik
037: * @version $Id: XMLWriter.java 514254 2007-03-03 22:21:27Z yonik $
038: */
039: final public class XMLWriter {
040:
041: public static float CURRENT_VERSION = 2.2f;
042:
043: //
044: // static thread safe part
045: //
046: private static final char[] XML_START1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
047: .toCharArray();
048:
049: private static final char[] XML_STYLESHEET = "<?xml-stylesheet type=\"text/xsl\" href=\"/admin/"
050: .toCharArray();
051: private static final char[] XML_STYLESHEET_END = ".xsl\"?>\n"
052: .toCharArray();
053:
054: private static final char[] XML_START2_SCHEMA = ("<response xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
055: + " xsi:noNamespaceSchemaLocation=\"http://pi.cnet.com/cnet-search/response.xsd\">\n")
056: .toCharArray();
057: private static final char[] XML_START2_NOSCHEMA = ("<response>\n")
058: .toCharArray();
059:
060: public static void writeResponse(Writer writer,
061: SolrQueryRequest req, SolrQueryResponse rsp)
062: throws IOException {
063:
064: String ver = req.getParam("version");
065:
066: writer.write(XML_START1);
067:
068: String stylesheet = req.getParam("stylesheet");
069: if (stylesheet != null && stylesheet.length() > 0) {
070: writer.write(XML_STYLESHEET);
071: writer.write(stylesheet);
072: writer.write(XML_STYLESHEET_END);
073: }
074:
075: String noSchema = req.getParam("noSchema");
076: // todo - change when schema becomes available?
077: if (false && noSchema == null)
078: writer.write(XML_START2_SCHEMA);
079: else
080: writer.write(XML_START2_NOSCHEMA);
081:
082: // create an instance for each request to handle
083: // non-thread safe stuff (indentation levels, etc)
084: // and to encapsulate writer, schema, and searcher so
085: // they don't have to be passed around in every function.
086: //
087: XMLWriter xw = new XMLWriter(writer, req.getSchema(), req, ver);
088: xw.defaultFieldList = rsp.getReturnFields();
089:
090: String indent = req.getParam("indent");
091: if (indent != null) {
092: if ("".equals(indent) || "off".equals(indent)) {
093: xw.setIndent(false);
094: } else {
095: xw.setIndent(true);
096: }
097: }
098:
099: // dump response values
100: NamedList lst = rsp.getValues();
101: int sz = lst.size();
102: int start = 0;
103:
104: // special case the response header if the version is 2.1 or less
105: if (xw.version <= 2100 && sz > 0) {
106: Object header = lst.getVal(0);
107: if (header instanceof NamedList
108: && "responseHeader".equals(lst.getName(0))) {
109: writer.write("<responseHeader>");
110: xw.incLevel();
111: NamedList nl = (NamedList) header;
112: for (int i = 0; i < nl.size(); i++) {
113: String name = nl.getName(i);
114: Object val = nl.getVal(i);
115: if ("status".equals(name) || "QTime".equals(name)) {
116: xw.writePrim(name, null, val.toString(), false);
117: } else {
118: xw.writeVal(name, val);
119: }
120: }
121: xw.decLevel();
122: writer.write("</responseHeader>");
123: start = 1;
124: }
125: }
126:
127: for (int i = start; i < sz; i++) {
128: xw.writeVal(lst.getName(i), lst.getVal(i));
129: }
130:
131: writer.write("\n</response>\n");
132: }
133:
134: ////////////////////////////////////////////////////////////
135: // request instance specific (non-static, not shared between threads)
136: ////////////////////////////////////////////////////////////
137:
138: private final Writer writer;
139: private final IndexSchema schema; // needed to write fields of docs
140: private final SolrQueryRequest request; // the request
141:
142: private int level;
143: private boolean defaultIndent = false;
144: private boolean doIndent = false;
145:
146: // fieldList... the set of fields to return for each document
147: private Set<String> defaultFieldList;
148:
149: // if a list smaller than this threshold is encountered, elements
150: // will be written on the same line.
151: // maybe constructed types should always indent first?
152: private final int indentThreshold = 0;
153:
154: final int version;
155:
156: // temporary working objects...
157: // be careful not to use these recursively...
158: private final ArrayList tlst = new ArrayList();
159: private final Calendar cal = Calendar.getInstance(TimeZone
160: .getTimeZone("GMT"));
161: private final StringBuilder sb = new StringBuilder();
162:
163: public XMLWriter(Writer writer, IndexSchema schema,
164: SolrQueryRequest req, String version) {
165: this .writer = writer;
166: this .schema = schema;
167: this .request = req;
168:
169: float ver = version == null ? CURRENT_VERSION : Float
170: .parseFloat(version);
171: this .version = (int) (ver * 1000);
172: }
173:
174: //
175: // Functions to manipulate the current logical nesting level.
176: // Any indentation will be partially based on level.
177: //
178: public void setLevel(int level) {
179: this .level = level;
180: }
181:
182: public int level() {
183: return level;
184: }
185:
186: public int incLevel() {
187: return ++level;
188: }
189:
190: public int decLevel() {
191: return --level;
192: }
193:
194: public void setIndent(boolean doIndent) {
195: this .doIndent = doIndent;
196: defaultIndent = doIndent;
197: }
198:
199: public void writeAttr(String name, String val) throws IOException {
200: if (val != null) {
201: writer.write(' ');
202: writer.write(name);
203: writer.write("=\"");
204: XML.escapeAttributeValue(val, writer);
205: writer.write('"');
206: }
207: }
208:
209: public void startTag(String tag, String name, boolean closeTag)
210: throws IOException {
211: if (doIndent)
212: indent();
213:
214: writer.write('<');
215: writer.write(tag);
216: if (name != null) {
217: writeAttr("name", name);
218: if (closeTag) {
219: writer.write("/>");
220: } else {
221: writer.write(">");
222: }
223: } else {
224: if (closeTag) {
225: writer.write("/>");
226: } else {
227: writer.write('>');
228: }
229: }
230: }
231:
232: private static final String[] indentArr = new String[] { "\n",
233: "\n ", "\n ", "\n\t", "\n\t ", "\n\t ", // could skip this one (the only 3 char seq)
234: "\n\t\t" };
235:
236: public void indent() throws IOException {
237: indent(level);
238: }
239:
240: public void indent(int lev) throws IOException {
241: int arrsz = indentArr.length - 1;
242: // another option would be lev % arrsz (wrap around)
243: String istr = indentArr[lev > arrsz ? arrsz : lev];
244: writer.write(istr);
245: }
246:
247: private static final Comparator fieldnameComparator = new Comparator() {
248: public int compare(Object o, Object o1) {
249: Fieldable f1 = (Fieldable) o;
250: Fieldable f2 = (Fieldable) o1;
251: int cmp = f1.name().compareTo(f2.name());
252: return cmp;
253: // note - the sort is stable, so this should not have affected the ordering
254: // of fields with the same name w.r.t eachother.
255: }
256: };
257:
258: public final void writeDoc(String name, Document doc,
259: Set<String> returnFields, float score, boolean includeScore)
260: throws IOException {
261: startTag("doc", name, false);
262: incLevel();
263:
264: if (includeScore) {
265: writeFloat("score", score);
266: }
267:
268: // Lucene Documents have multivalued types as multiple fields
269: // with the same name.
270: // The XML needs to represent these as
271: // an array. The fastest way to detect multiple fields
272: // with the same name is to sort them first.
273:
274: // using global tlst here, so we shouldn't call any other
275: // function that uses it until we are done.
276: tlst.clear();
277: for (Object obj : doc.getFields()) {
278: Fieldable ff = (Fieldable) obj;
279: // skip this field if it is not a field to be returned.
280: if (returnFields != null
281: && !returnFields.contains(ff.name())) {
282: continue;
283: }
284: tlst.add(ff);
285: }
286: Collections.sort(tlst, fieldnameComparator);
287:
288: int sz = tlst.size();
289: int fidx1 = 0, fidx2 = 0;
290: while (fidx1 < sz) {
291: Fieldable f1 = (Fieldable) tlst.get(fidx1);
292: String fname = f1.name();
293:
294: // find the end of fields with this name
295: fidx2 = fidx1 + 1;
296: while (fidx2 < sz
297: && fname.equals(((Fieldable) tlst.get(fidx2))
298: .name())) {
299: fidx2++;
300: }
301:
302: /***
303: // more efficient to use getFieldType instead of
304: // getField since that way dynamic fields won't have
305: // to create a SchemaField on the fly.
306: FieldType ft = schema.getFieldType(fname);
307: ***/
308:
309: SchemaField sf = schema.getFieldOrNull(fname);
310: if (sf == null) {
311: sf = new SchemaField(fname, new TextField());
312: }
313: if (fidx1 + 1 == fidx2) {
314: // single field value
315: if (version >= 2100 && sf.multiValued()) {
316: startTag("arr", fname, false);
317: doIndent = false;
318: sf.write(this , null, f1);
319: writer.write("</arr>");
320: doIndent = defaultIndent;
321: } else {
322: sf.write(this , f1.name(), f1);
323: }
324: } else {
325: // multiple fields with same name detected
326:
327: startTag("arr", fname, false);
328: incLevel();
329: doIndent = false;
330: int cnt = 0;
331: for (int i = fidx1; i < fidx2; i++) {
332: if (defaultIndent && ++cnt == 4) { // only indent every 4th item
333: indent();
334: cnt = 0;
335: }
336: sf.write(this , null, (Fieldable) tlst.get(i));
337: }
338: decLevel();
339: // if (doIndent) indent();
340: writer.write("</arr>");
341: // doIndent=true;
342: doIndent = defaultIndent;
343: }
344: fidx1 = fidx2;
345: }
346:
347: decLevel();
348: if (doIndent)
349: indent();
350: writer.write("</doc>");
351: }
352:
353: public final void writeDocList(String name, DocList ids,
354: Set<String> fields) throws IOException {
355: boolean includeScore = false;
356: if (fields != null) {
357: includeScore = fields.contains("score");
358: if (fields.size() == 0
359: || (fields.size() == 1 && includeScore)
360: || fields.contains("*")) {
361: fields = null; // null means return all stored fields
362: }
363: }
364:
365: int sz = ids.size();
366:
367: if (doIndent)
368: indent();
369: writer.write("<result");
370: writeAttr("name", name);
371: writeAttr("numFound", Integer.toString(ids.matches()));
372: writeAttr("start", Integer.toString(ids.offset()));
373: if (includeScore) {
374: writeAttr("maxScore", Float.toString(ids.maxScore()));
375: }
376: if (sz == 0) {
377: writer.write("/>");
378: return;
379: } else {
380: writer.write('>');
381: }
382:
383: incLevel();
384: SolrIndexSearcher searcher = request.getSearcher();
385: DocIterator iterator = ids.iterator();
386: for (int i = 0; i < sz; i++) {
387: int id = iterator.nextDoc();
388: Document doc = searcher.doc(id, fields);
389: writeDoc(null, doc, fields, (includeScore ? iterator
390: .score() : 0.0f), includeScore);
391: }
392: decLevel();
393:
394: if (doIndent)
395: indent();
396: writer.write("</result>");
397: }
398:
399: public void writeVal(String name, Object val) throws IOException {
400:
401: // if there get to be enough types, perhaps hashing on the type
402: // to get a handler might be faster (but types must be exact to do that...)
403:
404: // go in order of most common to least common
405: if (val == null) {
406: writeNull(name);
407: } else if (val instanceof String) {
408: writeStr(name, (String) val);
409: } else if (val instanceof Integer) {
410: // it would be slower to pass the int ((Integer)val).intValue()
411: writeInt(name, val.toString());
412: } else if (val instanceof Boolean) {
413: // could be optimized... only two vals
414: writeBool(name, val.toString());
415: } else if (val instanceof Long) {
416: writeLong(name, val.toString());
417: } else if (val instanceof Date) {
418: writeDate(name, (Date) val);
419: } else if (val instanceof Float) {
420: // we pass the float instead of using toString() because
421: // it may need special formatting. same for double.
422: writeFloat(name, ((Float) val).floatValue());
423: } else if (val instanceof Double) {
424: writeDouble(name, ((Double) val).doubleValue());
425: } else if (val instanceof Document) {
426: writeDoc(name, (Document) val, defaultFieldList, 0.0f,
427: false);
428: } else if (val instanceof DocList) {
429: // requires access to IndexReader
430: writeDocList(name, (DocList) val, defaultFieldList);
431: } else if (val instanceof DocSet) {
432: // how do we know what fields to read?
433: // todo: have a DocList/DocSet wrapper that
434: // restricts the fields to write...?
435: } else if (val instanceof Map) {
436: writeMap(name, (Map) val);
437: } else if (val instanceof NamedList) {
438: writeNamedList(name, (NamedList) val);
439: } else if (val instanceof Iterable) {
440: writeArray(name, ((Iterable) val).iterator());
441: } else if (val instanceof Object[]) {
442: writeArray(name, (Object[]) val);
443: } else if (val instanceof Iterator) {
444: writeArray(name, (Iterator) val);
445: } else {
446: // default...
447: writeStr(name, val.getClass().getName() + ':'
448: + val.toString());
449: }
450: }
451:
452: //
453: // Generic compound types
454: //
455:
456: public void writeNamedList(String name, NamedList val)
457: throws IOException {
458: int sz = val.size();
459: startTag("lst", name, sz <= 0);
460:
461: if (sz < indentThreshold) {
462: doIndent = false;
463: }
464:
465: incLevel();
466: for (int i = 0; i < sz; i++) {
467: writeVal(val.getName(i), val.getVal(i));
468: }
469: decLevel();
470:
471: if (sz > 0) {
472: if (doIndent)
473: indent();
474: writer.write("</lst>");
475: }
476: }
477:
478: //A map is currently represented as a named list
479: public void writeMap(String name, Map val) throws IOException {
480: Map map = val;
481: int sz = map.size();
482: startTag("lst", name, sz <= 0);
483: incLevel();
484: for (Map.Entry entry : (Set<Map.Entry>) map.entrySet()) {
485: // possible class-cast exception here...
486: String k = (String) entry.getKey();
487: Object v = entry.getValue();
488: // if (sz<indentThreshold) indent();
489: writeVal(k, v);
490: }
491: decLevel();
492: if (sz > 0) {
493: if (doIndent)
494: indent();
495: writer.write("</lst>");
496: }
497: }
498:
499: public void writeArray(String name, Object[] val)
500: throws IOException {
501: writeArray(name, Arrays.asList(val).iterator());
502: }
503:
504: public void writeArray(String name, Iterator iter)
505: throws IOException {
506: if (iter.hasNext()) {
507: startTag("arr", name, false);
508: incLevel();
509: while (iter.hasNext()) {
510: writeVal(null, iter.next());
511: }
512: decLevel();
513: if (doIndent)
514: indent();
515: writer.write("</arr>");
516: } else {
517: startTag("arr", name, true);
518: }
519: }
520:
521: //
522: // Primitive types
523: //
524:
525: public void writeNull(String name) throws IOException {
526: writePrim("null", name, "", false);
527: }
528:
529: public void writeStr(String name, String val) throws IOException {
530: writePrim("str", name, val, true);
531: }
532:
533: public void writeInt(String name, String val) throws IOException {
534: writePrim("int", name, val, false);
535: }
536:
537: public void writeInt(String name, int val) throws IOException {
538: writeInt(name, Integer.toString(val));
539: }
540:
541: public void writeLong(String name, String val) throws IOException {
542: writePrim("long", name, val, false);
543: }
544:
545: public void writeLong(String name, long val) throws IOException {
546: writeLong(name, Long.toString(val));
547: }
548:
549: public void writeBool(String name, String val) throws IOException {
550: writePrim("bool", name, val, false);
551: }
552:
553: public void writeBool(String name, boolean val) throws IOException {
554: writeBool(name, Boolean.toString(val));
555: }
556:
557: public void writeFloat(String name, String val) throws IOException {
558: writePrim("float", name, val, false);
559: }
560:
561: public void writeFloat(String name, float val) throws IOException {
562: writeFloat(name, Float.toString(val));
563: }
564:
565: public void writeDouble(String name, String val) throws IOException {
566: writePrim("double", name, val, false);
567: }
568:
569: public void writeDouble(String name, double val) throws IOException {
570: writeDouble(name, Double.toString(val));
571: }
572:
573: public void writeDate(String name, Date val) throws IOException {
574: // using a stringBuilder for numbers can be nice since
575: // a temporary string isn't used (it's added directly to the
576: // builder's buffer.
577:
578: cal.setTime(val);
579:
580: sb.setLength(0);
581: int i = cal.get(Calendar.YEAR);
582: sb.append(i);
583: sb.append('-');
584: i = cal.get(Calendar.MONTH) + 1; // 0 based, so add 1
585: if (i < 10)
586: sb.append('0');
587: sb.append(i);
588: sb.append('-');
589: i = cal.get(Calendar.DAY_OF_MONTH);
590: if (i < 10)
591: sb.append('0');
592: sb.append(i);
593: sb.append('T');
594: i = cal.get(Calendar.HOUR_OF_DAY); // 24 hour time format
595: if (i < 10)
596: sb.append('0');
597: sb.append(i);
598: sb.append(':');
599: i = cal.get(Calendar.MINUTE);
600: if (i < 10)
601: sb.append('0');
602: sb.append(i);
603: sb.append(':');
604: i = cal.get(Calendar.SECOND);
605: if (i < 10)
606: sb.append('0');
607: sb.append(i);
608: i = cal.get(Calendar.MILLISECOND);
609: if (i != 0) {
610: sb.append('.');
611: if (i < 100)
612: sb.append('0');
613: if (i < 10)
614: sb.append('0');
615: sb.append(i);
616:
617: // handle canonical format specifying fractional
618: // seconds shall not end in '0'. Given the slowness of
619: // integer div/mod, simply checking the last character
620: // is probably the fastest way to check.
621: int lastIdx = sb.length() - 1;
622: if (sb.charAt(lastIdx) == '0') {
623: lastIdx--;
624: if (sb.charAt(lastIdx) == '0') {
625: lastIdx--;
626: }
627: sb.setLength(lastIdx + 1);
628: }
629:
630: }
631: sb.append('Z');
632: writeDate(name, sb.toString());
633: }
634:
635: public void writeDate(String name, String val) throws IOException {
636: writePrim("date", name, val, false);
637: }
638:
639: //
640: // OPT - specific writeInt, writeFloat, methods might be faster since
641: // there would be less write calls (write("<int name=\"" + name + ... + </int>)
642: //
643: public void writePrim(String tag, String name, String val,
644: boolean escape) throws IOException {
645: // OPT - we could use a temp char[] (or a StringBuilder) and if the
646: // size was small enough to fit (if escape==false we can calc exact size)
647: // then we could put things directly in the temp buf.
648: // need to see what percent of CPU this takes up first though...
649: // Could test a reusable StringBuilder...
650:
651: // is this needed here???
652: // Only if a fieldtype calls writeStr or something
653: // with a null val instead of calling writeNull
654: /***
655: if (val==null) {
656: if (name==null) writer.write("<null/>");
657: else writer.write("<null name=\"" + name + "/>");
658: }
659: ***/
660:
661: int contentLen = val.length();
662:
663: startTag(tag, name, contentLen == 0);
664: if (contentLen == 0)
665: return;
666:
667: if (escape) {
668: XML.escapeCharData(val, writer);
669: } else {
670: writer.write(val, 0, contentLen);
671: }
672:
673: writer.write("</");
674: writer.write(tag);
675: writer.write('>');
676: }
677:
678: }
|