001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005:
006: package com.sun.portal.search.soif;
007:
008: import java.io.*;
009: import java.util.*;
010:
011: /**
012: * SOIF (Summary Object Interchange Format) is a general purpose
013: * syntax and data structure for representing meta data. It is
014: * commonly used to create summaries, or Resource Descriptions,
015: * for storage, indexing, and transmission purposes.
016: * <p>
017: * A SOIF object has a standard serialized form, for example:
018: * <pre>
019: * @DOCUMENT { http://www.siroe.com
020: * content-type{29}: text/html; charset=iso-8859-1
021: * title{16}: Siroe Consulting
022: * author-1{14}: William Lawson
023: * author-2{16}: Gregory Blaxland
024: * author-3{17}: William Wentworth
025: * description{245}:
026: * Siroe Consulting
027: * - Business & Industry Solutions
028: * - Downloads
029: * - Documentation
030: * - Developers
031: * - Partners
032: * - Executives
033: * - Press
034: * Company Info
035: * - Investor Relations
036: * - News & Events
037: * - Feature Stories
038: * - Employment
039: * - Siroe Labs
040: * Copyright 1994-2002 Siroe Consulting
041: * }
042: * </pre>
043: *
044: * <p>
045: * Each SOIF object has a schema (eg, DOCUMENT) and URL.<br>
046: * SOIF attribute values can be plain text or binary.<br>
047: * Multivalued attributes are supported.<br>
048: * The standard SOIF serialized form uses UTF-8 encoding
049: * for all character based data.<br>
050: * SOIF attribute names are case-insensitive.
051: *
052: * <p>
053: * More information on SOIF and the RDM search protocol can
054: * be found at <a href=http://www.w3.org/TR/NOTE-rdm.html>http://www.w3.org/TR/NOTE-rdm.html</a>
055: * @see AVPair
056: */
057: public class SOIF extends HashMap {
058:
059: /**
060: * The SOIF schema name.
061: */
062: private String schemaName;
063:
064: /**
065: * The SOIF URL.
066: */
067: private String URL;
068:
069: /**
070: * Byte encoding.
071: */
072: String encoding = defaultEncoding;
073:
074: /**
075: * Default char encoding.
076: */
077: public static String defaultEncoding = "UTF-8";
078:
079: static final int ENC_UTF8 = 0;
080: static final int ENC_UTF16 = 1;
081: static final int ENC_8BIT = 2;
082: static final int ENC_DEFAULT = ENC_UTF8;
083:
084: /**
085: * Error message for invalid SOIF.
086: */
087: public static String INVALIDSOIF = "invalid soif";
088:
089: /**
090: * Constructor.
091: * Creates an empty SOIF object with schema = "-" and URL = "-"
092: */
093: public SOIF() {
094: schemaName = "-";
095: URL = "-";
096: }
097:
098: /**
099: * Constructor.
100: * Creates an empty SOIF object with the given schema and URL.
101: * @param schemaName the SOIF schema name
102: * @param URL the SOIF URL
103: */
104: public SOIF(String schemaName, String URL) {
105: this ();
106: if (schemaName != null)
107: this .schemaName = schemaName;
108: if (URL != null)
109: this .URL = URL;
110: }
111:
112: /**
113: * Constructor. Creates a SOIF by parsing the given byte array using the
114: * given character encoding.
115: * @param data a byte array SOIF representation
116: * @param encoding character encoding for byte/String conversion
117: * @param allowed a restricted attribute set for the conversion (use lower case)
118: * @throws SOIFException on parsing error
119: * @throws IOException if byte array is empty
120: */
121: public SOIF(byte[] data, String encoding, Set allowed)
122: throws IOException {
123: try {
124: SOIFInputStream ss = new SOIFInputStream(data, encoding);
125: ss.setAllowed(allowed);
126: SOIF s = ss.readSOIF();
127: if (s == null) {
128: // parser returns null soif at EOS
129: throw new SOIFException("failed to parse");
130: }
131: this .schemaName = s.schemaName;
132: this .URL = s.URL;
133: this .putAll((HashMap) s);
134: } catch (IOException cant_happen) {
135: throw new SOIFException("failed to parse");
136: }
137: }
138:
139: /**
140: * Constructor. Creates a SOIF by parsing the given byte array using the
141: * given character encoding.
142: * @param data a byte array SOIF representation
143: * @param encoding character encoding for byte/String conversion
144: * @throws SOIFException on parsing error
145: * @throws IOException if byte array is empty
146: */
147: public SOIF(byte[] data, String encoding) throws IOException {
148: this (data, encoding, null);
149: }
150:
151: /**
152: * Constructor. Creates a SOIF by parsing the given byte array using the
153: * default character encoding (UTF-8).
154: * @param data a byte array SOIF representation
155: * @param allowed a restricted attribute set for the conversion (use lower case)
156: * @throws SOIFException if parsing error
157: * @throws IOException if byte array is empty
158: */
159: public SOIF(byte[] data, Set allowed) throws IOException {
160: this (data, defaultEncoding, allowed);
161: }
162:
163: /**
164: * Constructor. Creates a SOIF by parsing the given byte array using the
165: * default character encoding (UTF-8).
166: * @param data a byte array SOIF representation
167: * @throws SOIFException if parsing error
168: * @throws IOException if byte array is empty
169: */
170: public SOIF(byte[] data) throws IOException {
171: this (data, defaultEncoding);
172: }
173:
174: /**
175: * @return the SOIF schema name.
176: */
177: public String getSchemaName() {
178: return schemaName;
179: }
180:
181: /**
182: * Sets the SOIF schema name.
183: */
184: public void setSchemaName(String s) {
185: schemaName = s;
186: }
187:
188: /**
189: * @return the SOIF URL.
190: */
191: public String getURL() {
192: return URL;
193: }
194:
195: /**
196: * Sets the SOIF URL.
197: */
198: public void setURL(String u) {
199: URL = u;
200: }
201:
202: /**
203: * Gets a String value by attribute.
204: * Ignores case of attribute name.
205: * @param a the attribute name
206: */
207: public String getValue(String a) {
208: return getValue(a, 0);
209: }
210:
211: /**
212: * Gets all valid values of this attribute.
213: * @return an array of Strings for
214: * multiple values for an attribute, e.g., for Bob, return
215: * values for Bob-1 and Bob-2.
216: *
217: * Ignores case of attribute name.
218: * @param a the attribute name
219: */
220: public String[] getStringValues(String a) {
221: AVPair av = (AVPair) get(a);
222: if (av != null)
223: return av.getStringValues();
224: return null;
225: }
226:
227: /**
228: * Gets a byte array value by attribute.
229: * Ignores case of attribute name.
230: * @param a the attribute name
231: */
232: public byte[] getBytes(String a) {
233: return getBytes(a, 0);
234: }
235:
236: /**
237: * Gets a String value by multivalue attribute and index.
238: * Ignores case of attribute name.
239: * @param a the attribute name
240: * @param n the index
241: */
242: public String getValue(String a, int n) {
243: AVPair av = (AVPair) get(a);
244: if (av != null)
245: return av.getValue(n);
246: return null;
247: }
248:
249: /**
250: * Gets a byte array value by multivalue attribute and index.
251: * Ignores case of attribute name.
252: * @param a the attribute name
253: * @param n the index
254: */
255: public byte[] getBytes(String a, int n) {
256: AVPair av = (AVPair) get(a);
257: if (av != null)
258: return av.getBytes(n);
259: return null;
260: }
261:
262: /**
263: * Gets an AVPair by attribute.
264: * Ignores case of attribute name.
265: * @param a the attribute name
266: */
267: public AVPair getAVPair(String a) {
268: return (AVPair) get(a);
269: }
270:
271: /**
272: * Tests for attribute existence.
273: * Ignores case of attribute name.
274: * @param a the attribute name
275: * @return true if the attribute exists in this SOIF
276: */
277: public boolean contains(String a) {
278: return get(a) != null;
279: }
280:
281: /**
282: * Inserts a String valued attribute.
283: * Ignores case of attribute name.
284: * @param a the attribute name
285: * @param v the String value to insert
286: * @return true if added, false if ignored (duplicate)
287: */
288: public boolean insert(String a, String v) {
289: return insert(a, v, 0);
290: }
291:
292: /**
293: * Inserts a byte array valued attribute.
294: * Ignores case of attribute name.
295: * @param a the attribute name
296: * @param b the byte array value to insert
297: * @return true if added, false if ignored (duplicate)
298: */
299: public boolean insert(String a, byte[] b) {
300: return insert(a, b, 0);
301: }
302:
303: /**
304: * Inserts a String valued attribute with index.
305: * Ignores case of attribute name.
306: * @param a the attribute name
307: * @param v the String value to insert
308: * @param n the multivalue index position for this value
309: * @return true if added, false if ignored (duplicate or MV index already taken)
310: */
311: public boolean insert(String a, String v, int n) {
312: AVPair old = (AVPair) get(a);
313: if (old != null)
314: return old.insert(v, n);
315: else {
316: put(a, new AVPair(a, v, n));
317: return true;
318: }
319: }
320:
321: /**
322: * Inserts a String valued attribute with index.
323: * Ignores case of attribute name.
324: * @param a the attribute name
325: * @param b the byte array value to insert
326: * @param n the multivalue index position for this value
327: * @return true if added, false if ignored (duplicate or MV index already taken)
328: */
329: public boolean insert(String a, byte[] b, int n) {
330: AVPair old = (AVPair) get(a);
331: if (old != null)
332: return old.insert(b, n);
333: else {
334: put(a, new AVPair(a, b, n, encoding));
335: return true;
336: }
337: }
338:
339: /**
340: * Inserts an AVPair, which may be either single or multivalued.
341: * Ignores case of attribute name.
342: * @param a the AVPair to insert
343: * @return true if added, false if ignored (duplicate attribute name).
344: */
345: public boolean insert(AVPair a) {
346: AVPair old = (AVPair) get(a.getAttribute());
347: if (old != null)
348: return false;
349: else {
350: put(a.getAttribute(), a);
351: return true;
352: }
353: }
354:
355: /**
356: * Replaces a single-valued attribute.
357: * Inserts the attribute if not already present.
358: * Ignores case of attribute name.
359: * @param a the attibute name
360: * @param v the attribute value
361: * @param s the attribute
362: */
363: public void replace(String a, String v) {
364: remove(a);
365: insert(a, v);
366: }
367:
368: /**
369: * Replaces a multivalued attribute by index.
370: * Inserts the attribute if not already present.
371: * Ignores case of attribute name.
372: * @param a the attibute name
373: * @param v the attribute value
374: * @param n the multivalued index position to replace
375: */
376: public void replace(String a, String v, int n) {
377: remove(a, n);
378: insert(a, v, n);
379: }
380:
381: /**
382: * Replaces an attribute-value pair by name.
383: * Ignores case of attribute name.
384: * @param avp the attibute-value pair
385: */
386: public void replace(AVPair avp) {
387: remove(avp.getAttribute());
388: put(avp.getAttribute(), avp);
389: }
390:
391: /**
392: * Removes an attribute.
393: * Ignores case of attribute name.
394: * @param a the attibute name
395: * @return true if present and removed, false otherwise
396: */
397: public boolean remove(String a) {
398: return remove((Object) a) != null;
399: }
400:
401: /**
402: * Removes an attribute by index.
403: * Ignores case of attribute name.
404: * @return true if present and removed, false if not
405: */
406: public boolean remove(String a, int n) {
407: AVPair av = (AVPair) get(a);
408: if (av != null)
409: return av.remove(n);
410: else
411: return false;
412: }
413:
414: /**
415: * Renames attribute a to b.
416: * Ignores case of attribute name.
417: * @return true if successful
418: */
419: public boolean rename(String a, String b) {
420: AVPair avp = (AVPair) get(a);
421: if (avp == null)
422: return false;
423: remove(a);
424: avp.setAttribute(b);
425: put(b, avp);
426: return true;
427: }
428:
429: /**
430: * Copies all data from another SOIF with replacement.
431: * Ignores case of all attribute names.
432: */
433: public void merge(SOIF s) {
434: putAll(s);
435: }
436:
437: /**
438: * Copies selected data from another SOIF with replacement.
439: * Ignores case of all attribute names.
440: */
441: public void merge(SOIF s, String[] filter) {
442: if (filter != null) {
443: for (int i = 0; i < filter.length; i++) {
444: AVPair av = (AVPair) s.get(filter[i]);
445: if (av == null)
446: continue;
447: put(filter[i], av); // XXX should use av.clone() ?
448: }
449: } else
450: merge(s);
451: }
452:
453: /**
454: * Copies all data from another SOIF without replacement.
455: * Ignores case of all attribute names.
456: */
457: public void absorb(SOIF s) {
458: for (Iterator i = s.values().iterator(); i.hasNext();) {
459: AVPair av = (AVPair) i.next();
460: insert(av); // no dups - XXX use av.clone()?
461: }
462: }
463:
464: /**
465: * Copies selected data from another SOIF without replacement
466: * Ignores case of all attribute names.
467: */
468: public void absorb(SOIF s, String[] filter) {
469: for (int i = 0; i < filter.length; i++) {
470: AVPair av = s.getAVPair(filter[i]);
471: insert(av); // no dups - XXX use av.clone()?
472: }
473: }
474:
475: // XXX need getMaxIndex() and/or append() methods
476:
477: /**
478: * Squeezes multivalued attributes. Packs the AVPair arrays
479: * by closing any holes in them towards index zero.
480: */
481: public void squeezeMV() {
482: for (Iterator i = values().iterator(); i.hasNext();) {
483: AVPair av = (AVPair) i.next();
484: av.squeeze();
485: }
486: }
487:
488: /**
489: * @return the set of all attribute names in this SOIF.
490: * e.g. for Bob-1, Bob-2, Jim-1, return { Bob, Jim }
491: */
492: public Set getAttributes() {
493: return keySet();
494: }
495:
496: /**
497: * @return the set of all attribute names in this SOIF as an array.
498: * e.g. for Bob-1, Bob-2, Jim-1, return { Bob, Jim }
499: */
500: public String[] getAttributesArray() {
501: String[] attrs = new String[size()];
502: keySet().toArray(attrs);
503: return attrs;
504: }
505:
506: /**
507: * @return the number of attributes in this SOIF.
508: */
509: public int getAttributeCount() {
510: return size();
511: }
512:
513: /**
514: * @return the entire contents size of all AVPairs in this SOIF
515: */
516: public int contentSize() {
517: int size = 0;
518: for (Iterator i = values().iterator(); i.hasNext();) {
519: AVPair av = (AVPair) i.next();
520: size += av.contentSize();
521: }
522: return size;
523: }
524:
525: /**
526: * Case insensistive collection function.
527: */
528: public Object put(Object key, Object value) {
529: // XXX this is quite inefficient - maybe better to intern attr
530: // names or use a case insensitive SOIFString
531: String s = (String) key;
532: AVPair av = (AVPair) value;
533: return super .put(s.toLowerCase(), av);
534: }
535:
536: /**
537: * Case insensistive collection function.
538: */
539: public Object get(Object key) {
540: String s = (String) key;
541: return super .get(s.toLowerCase());
542: }
543:
544: /**
545: * Case insensistive collection function.
546: */
547: public Object remove(Object key) {
548: String s = (String) key;
549: return super .remove(s.toLowerCase());
550: }
551:
552: /**
553: * Case insensistive collection function.
554: */
555: public boolean containsKey(Object key) {
556: String s = (String) key;
557: return super .containsKey(s.toLowerCase());
558: }
559:
560: /**
561: * Creates a byte array representation of this SOIF.
562: * Convert this SOIF to a byte array.
563: * @return this SOIF as a byte array using the current encoding
564: * @throws UnsupportedEncodingException
565: * @throws IOException
566: */
567: public byte[] toByteArray() throws IOException {
568: return toByteArray(encoding);
569: }
570:
571: /**
572: * Creates a byte array representation of this SOIF.
573: * Convert this SOIF to a byte array.
574: * @param allowed a restricted attribute set for the conversion (use lower case)
575: * @return this SOIF as a byte array using the current encoding
576: * @throws UnsupportedEncodingException
577: * @throws IOException
578: */
579: public byte[] toByteArray(Set allowed) throws IOException {
580: return toByteArray(encoding, allowed);
581: }
582:
583: /**
584: * Creates a byte array representation of this SOIF.
585: * @param enc character encoding for String/byte conversion
586: * @return this SOIF as a byte array using the given encoding
587: * @throws UnsupportedEncodingException
588: * @throws IOException
589: * @see SOIFInputStream#readSOIF()
590: * @see SOIFOutputStream#write(SOIF s)
591: */
592: public byte[] toByteArray(String enc) throws IOException {
593: return toByteArray(enc, null);
594: }
595:
596: private byte[] toByteArrayOld(String enc, Set allowed)
597: throws IOException {
598: try {
599: SOIFBuffer sb = new SOIFBuffer(4000);
600: DataOutputStream dos = new DataOutputStream(sb);
601: dos.write('@');
602: dos.write(schemaName.getBytes(enc));
603: dos.writeBytes(" { ");
604: dos.write(URL.getBytes(enc)); // how encoded?
605: dos.write('\n');
606: for (Iterator it = values().iterator(); it.hasNext();) {
607: AVPair av = (AVPair) it.next();
608: String attr = av.getAttribute();
609: if (allowed == null || allowed.contains(attr)) { // XXX case sensitive
610: int mx = av.getMaxIndex();
611: for (int i = 0; i < mx + 1; ++i) {
612: byte[] b = av.getBytes(i);
613: if (b != null) {
614: dos.write(attr.getBytes(enc));
615: if (i > 0 || mx > 0) {
616: // an attr with only one MV index of '-1' is printed as single valued
617: dos.write('-');
618: dos.writeBytes(java.lang.String
619: .valueOf(i + 1));
620: }
621: dos.writeBytes("{" + b.length + "}:\t");
622: dos.write(b);
623: dos.write('\n');
624: }
625: }
626: }
627: }
628: dos.writeBytes("}\n\n");
629: return sb.toByteArray();
630: } catch (IOException e) {
631: // shouldn't happen
632: }
633: return null;
634: }
635:
636: /**
637: * Creates a byte array representation of this SOIF.
638: * @param enc character encoding for String/byte conversion
639: * @param allowed a restricted attribute set for the conversion (use lower case)
640: * @return this SOIF as a byte array using the given encoding
641: * @throws IOException
642: * @see SOIFInputStream#readSOIF()
643: * @see SOIFOutputStream#write(SOIF s)
644: */
645: public byte[] toByteArray(String enc, Set allowed)
646: throws IOException {
647: // XXX need to do some performance test on this
648: try {
649: SOIFBuffer sb = new SOIFBuffer(4000);
650: DataOutputStream dos = new DataOutputStream(sb);
651: // force a string conversion if outputting in a different encoding
652: // XXX will break on binary data
653: boolean reencode = !enc.equalsIgnoreCase(encoding);
654: if (enc.equalsIgnoreCase("UTF-16"))
655: enc = "UTF-16BE";
656: dos.write(("@" + schemaName + " { " + URL + "\n")
657: .getBytes(enc));
658: for (Iterator it = values().iterator(); it.hasNext();) {
659: AVPair av = (AVPair) it.next();
660: String attr = av.getAttribute();
661: StringBuffer stb = new StringBuffer(500);
662: if (allowed == null
663: || allowed.contains(attr.toLowerCase())) { // XXX allowed set must be lower case?
664: int mx = av.getMaxIndex();
665: for (int i = 0; i < mx + 1; ++i) {
666: String v = null;
667: byte[] b = null;
668: if (reencode && (v = av.getValue(i)) != null)
669: b = v.getBytes(enc); // XXX should be in SOIFOutputStream
670: else
671: b = av.getBytes(i);
672: if (b != null) {
673: stb.setLength(0);
674: stb.append(attr);
675: if (i > 0 || mx > 0) {
676: // an attr with only one MV index of '-1' is printed as single valued
677: stb.append("-");
678: stb.append(java.lang.String
679: .valueOf(i + 1));
680: }
681: stb.append("{");
682: stb.append(java.lang.String
683: .valueOf(b.length));
684: stb.append("}:\t");
685: dos.write(stb.toString().getBytes(enc));
686: dos.write(b);
687: stb.setLength(0);
688: dos.write("\n".getBytes(enc));
689: }
690: }
691: }
692: }
693: dos.write("}\n\n".getBytes(enc)); // XXX won't pick up utf-16be switch
694: return sb.toByteArray();
695: } catch (IOException e) {
696: // shouldn't happen
697: }
698: return null;
699: }
700:
701: /**
702: * *WARNING*: This should be used for debugging only.
703: * @return a String representation of this SOIF
704: * *Warning*: this is not a valid SOIF representation
705: * @see #toByteArray()
706: * @see SOIFInputStream#readSOIF()
707: * @see SOIFOutputStream#write(SOIF s)
708: */
709: public String toString() {
710: StringBuffer sb = new StringBuffer("SOIF @" + schemaName
711: + " { " + URL + "\n");
712: for (Iterator it = values().iterator(); it.hasNext();) {
713: AVPair av = (AVPair) it.next();
714: int mx = av.getMaxIndex();
715: for (int i = 0; i < mx + 1; ++i) {
716: String v = av.getValue(i);
717: if (v != null) {
718: sb.append(av.getAttribute());
719: if (i > 0 || mx > 0) {
720: sb.append("-");
721: sb.append(i + 1);
722: }
723: sb.append("{" + v.length() + "}:\t" + v + "\n");
724: }
725: }
726: }
727: sb.append("}\n\n");
728: return sb.toString();
729: }
730:
731: }
|