001: /*
002:
003: This software is OSI Certified Open Source Software.
004: OSI Certified is a certification mark of the Open Source Initiative.
005:
006: The license (Mozilla version 1.0) can be read at the MMBase site.
007: See http://www.MMBase.org/license
008:
009: */
010: package org.mmbase.util;
011:
012: import java.util.*;
013:
014: /**
015: * StringTagger, Creates a object with tags and fields from a String.
016: * Its ideal for name-value pairs and name-value pairs with multivalues.
017: * It also provides support for quoted values, and recognizes values that are 'function' calls with
018: * their own parameter list (allowing to ignore any tokens within these lists when parsing).
019: *
020: * @application SCAN
021: * @code-conventions Some methods (Values, Value etc) have wrong names (and are duplicating Map methods btw)
022: * @author Daniel Ockeloen
023: * @author Pierre van Rooden
024: * @version $Id: StringTagger.java,v 1.17 2005/01/30 16:46:35 nico Exp $
025: */
026: public class StringTagger implements Map {
027:
028: /**
029: * The name-value pairs where the value is a single string
030: */
031: private Hashtable tokens;
032: /**
033: * The name-value pairs where the value is a list of strings
034: */
035: private Map multitokens;
036: /**
037: * Token used to separate tags (default a space).
038: */
039: private char tagStart;
040: /**
041: * Token used to separate the tag name from its value (default '=').
042: */
043: private char tagSeparator;
044: /**
045: * Token used to separate multiple values within a tag (default ',').
046: */
047: private char fieldSeparator;
048: /**
049: * Token used to indicate quoted values (default '\"').
050: */
051: private char quote;
052: /**
053: * Token used to indicate the start of a function parameter list (default '(').
054: */
055: private char functionOpen;
056: /**
057: * Token used to indicate the end of a function parameter list (default ')').
058: */
059: private char functionClose;
060:
061: /**
062: * The line that was parsed.
063: */
064: private String startline = "";
065:
066: /**
067: * Creates a StringTag for the given line.
068: * Example : StringTagger("cmd=lookup names='Daniel Ockeloen, Rico Jansen'",' ','=',','\'','('.')')
069: * @param line : to be tagged line
070: * @param tagStart : Seperator for the Tags
071: * @param tagSeparator : Seperator inside the Tag (between name and value)
072: * @param fieldSeparator : Seperator inside the value
073: * @param quote : Char used if a quoted value
074: * @param functionOpen char used to open a function parameter list
075: * @param functionClose char used to close a function parameter list
076: */
077: public StringTagger(String line, char tagStart, char tagSeparator,
078: char fieldSeparator, char quote, char functionOpen,
079: char functionClose) {
080: this .tagStart = tagStart;
081: this .startline = line;
082: this .tagSeparator = tagSeparator;
083: this .fieldSeparator = fieldSeparator;
084: this .quote = quote;
085: this .functionOpen = functionOpen;
086: this .functionClose = functionClose;
087: tokens = new Hashtable(); //needing elements(), keys()
088: multitokens = new HashMap();
089: createTagger(line);
090: }
091:
092: /**
093: * Creates a StringTag for the given line.
094: * Uses default characters for the function parameter list tokens.
095: * Example : StringTagger("cmd=lookup names='Daniel Ockeloen, Rico Jansen'",' ','=',','\'')
096: * @param line : to be tagged line
097: * @param tagStart : Seperator for the Tags
098: * @param tagSeparator : Seperator inside the Tag (between name and value)
099: * @param fieldSeparator : Seperator inside the value
100: * @param quote : Char used if a quoted value
101: */
102: public StringTagger(String line, char tagStart, char tagSeparator,
103: char fieldSeparator, char quote) {
104: this (line, tagStart, tagSeparator, fieldSeparator, quote, '(',
105: ')');
106: }
107:
108: /**
109: * Creates a StringTag for the given line.
110: * Uses default characters for all tokens.
111: * @param line : to be tagged line
112: */
113: public StringTagger(String line) {
114: this (line, ' ', '=', ',', '"', '(', ')');
115: }
116:
117: /**
118: * Parses the given line, and stores all value-pairs found in the
119: * tokens and multitokens fields.
120: * @param line : to be tagged line (why is this a parameter when it can eb retrieved from startline?)
121: * @since MMBase-1.7
122: */
123: protected void createTagger(String line) {
124: StringTokenizer tok2 = new StringTokenizer(line + tagStart, ""
125: + tagSeparator + tagStart, true);
126: String part, tag, prevtok, tok;
127: boolean isTag, isPart, isQuoted;
128:
129: isTag = true;
130: isPart = false;
131: isQuoted = false;
132: prevtok = "";
133: tag = part = ""; // should be StringBuffer
134: // log.debug("Tagger -> |"+tagStart+"|"+tagSeparator+"|"+quote+"|");
135: while (tok2.hasMoreTokens()) {
136: tok = tok2.nextToken();
137: // log.debug("tagger tok ("+isTag+","+isPart+","+isQuoted+") |"+tok+"|"+prevtok+"|");
138: if (tok.equals("" + tagSeparator)) {
139: if (isTag) {
140: tag = prevtok;
141: isTag = false;
142: } else {
143: if (!isQuoted) {
144: splitTag(tag + tagSeparator + part);
145: isTag = true;
146: isPart = false;
147: part = "";
148: } else {
149: part += tok;
150: }
151: }
152: } else if (tok.equals("" + tagStart)) {
153: if (isPart) {
154: if (isQuoted) {
155: part += tok;
156: } else {
157: if (!prevtok.equals("" + tagStart)) {
158: splitTag(tag + tagSeparator + part);
159: isTag = true;
160: isPart = false;
161: part = "";
162: }
163: }
164: prevtok = tok;
165: }
166: } else {
167: if (!isTag)
168: isPart = true;
169: // log.debug("isTag "+isTag+" "+isPart);
170: if (isPart) {
171: if (isQuoted) {
172: // Check end quote
173: if (tok.charAt(tok.length() - 1) == quote) {
174: isQuoted = false;
175: }
176: part += tok;
177: } else {
178: if (tok.charAt(0) == quote
179: && !(tok.charAt(tok.length() - 1) == quote)) {
180: isQuoted = true;
181: }
182: part += tok;
183: }
184: }
185: // log.debug("isTag "+isTag+" "+isPart+" "+isQuoted);
186: prevtok = tok;
187: }
188: }
189: }
190:
191: /**
192: * Handles and splits a tag in its component parts, and store the elemements in
193: * the tokens and multitokens fields.
194: * @param tag the string containing the tag
195: * @since MMBase-1.7
196: */
197: protected void splitTag(String tag) {
198: int tagPos = tag.indexOf(tagSeparator);
199: String name = tag.substring(0, tagPos);
200: String result = tag.substring(tagPos + 1);
201: // log.debug("SplitTag |"+name+"|"+result+"|");
202:
203: if (result.length() > 1 && result.charAt(0) == quote
204: && result.charAt(result.length() - 1) == quote) {
205: result = result.substring(1, result.length() - 1);
206: }
207: tokens.put(name, result);
208:
209: StringTokenizer toks = new StringTokenizer(result, ""
210: + fieldSeparator + functionOpen + functionClose, true);
211: // If quoted, strip the " " from beginning and end ?
212: Vector multi = new Vector();
213: if (toks.hasMoreTokens()) {
214: String tokvalue = "";
215: int nesting = 0;
216: while (toks.hasMoreTokens()) {
217: String tok = toks.nextToken();
218: if (tok.equals("" + fieldSeparator)) {
219: if (nesting == 0) {
220: multi.add(tokvalue);
221: tokvalue = "";
222: } else {
223: tokvalue += tok;
224: }
225: } else if (tok.equals("" + functionOpen)) {
226: nesting++;
227: tokvalue += tok;
228: } else if (tok.equals("" + functionClose)) {
229: nesting--;
230: tokvalue += tok;
231: } else {
232: tokvalue += tok;
233: }
234: }
235: multi.add(tokvalue);
236: }
237: multitokens.put(name, multi);
238: }
239:
240: // Map interface methods
241:
242: /**
243: * Clears all data
244: */
245: public void clear() {
246: tokens.clear();
247: multitokens.clear();
248: startline = "";
249: }
250:
251: /**
252: * Checks whether a key exits.
253: */
254: public boolean containsKey(Object ob) {
255: return tokens.containsKey(ob);
256: }
257:
258: /**
259: * Checks whether a value exits.
260: */
261: public boolean containsValue(Object ob) {
262: return tokens.containsValue(ob);
263: }
264:
265: /**
266: * returns all values
267: */
268: public Set entrySet() {
269: return tokens.entrySet();
270: }
271:
272: /**
273: * Returns whether two objects are the same
274: * @param ob the key of the value to retrieve
275: */
276: public boolean equals(Object ob) {
277: return (ob instanceof Map)
278: && (ob.hashCode() == this .hashCode());
279: }
280:
281: /**
282: * Returns the value of a key as an Object.
283: * The value returned is a single, unseparated, string.<br />
284: * Use {@link #Values} to get a list of multi-values as a <code>Vector</code>.<br />
285: * Use {@link #Value} to get the first value as a String
286: * @param ob the key of the value to retrieve
287: */
288: public Object get(Object ob) {
289: return tokens.get(ob);
290: }
291:
292: /**
293: * Hashcode for sorting and comparing
294: */
295: public int hashCode() {
296: return multitokens.hashCode();
297: }
298:
299: /**
300: * Checks whether the tagger is empty
301: */
302: public boolean isEmpty() {
303: return tokens.isEmpty();
304: }
305:
306: /**
307: * Returns a Set of the name keys.
308: */
309: public Set keySet() {
310: return tokens.keySet();
311: }
312:
313: /**
314: * sets a value (for the Map interface).
315: */
316: public Object put(Object key, Object value) {
317: Object res = tokens.get(key);
318: setValue((String) key, (String) value);
319: return res;
320: }
321:
322: /**
323: * Manually sets a set of values (for the Map interface).
324: */
325: public void putAll(Map map) {
326: throw new UnsupportedOperationException();
327: }
328:
329: /**
330: * remove a value (for the Map interface).
331: */
332: public Object remove(Object key) {
333: Object res = tokens.get(key);
334: tokens.remove(key);
335: multitokens.remove(key);
336: return res;
337: }
338:
339: /**
340: * sets a value (for the Map interface).
341: */
342: public int size() {
343: return tokens.size();
344: }
345:
346: /**
347: * returns all values
348: */
349: public Collection values() {
350: return tokens.values();
351: }
352:
353: // Custom methods
354:
355: /**
356: * Returns a Enumeration of the name keys.
357: */
358: public Enumeration keys() {
359: return tokens.keys();
360: }
361:
362: /**
363: * toString
364: */
365: public String toString() {
366: StringBuffer content = new StringBuffer("[");
367: for (Enumeration e = keys(); e.hasMoreElements();) {
368: String key = (String) e.nextElement();
369: content.append('<').append(key);
370: content.append('=').append(Values(key));
371: content.append('>');
372: }
373: content.append(']');
374: return content.toString();
375: }
376:
377: /**
378: * Returns a Enumeration of the values as String.
379: * The values returned are all single, unsepartated, strings.
380: * Use {@link #multiElements} to get a list of multi-values.
381: */
382: public Enumeration elements() {
383: return tokens.elements();
384: }
385:
386: /**
387: * Returns a Enumeration of the values as Vectors that contain
388: * the seperated values.
389: * Use {@link #elements} to get a list of single, unseparated, values.
390: */
391: public Enumeration multiElements(String token) {
392: Vector tmp = (Vector) multitokens.get(token);
393: if (tmp != null) {
394: return tmp.elements();
395: } else {
396: return null;
397: }
398: }
399:
400: /**
401: * Returns the values as a Vector that contains the separated values.
402: * <br />
403: * Use {@link #get} to get the list of values as a <code>String</code><br />
404: * Use {@link #Value} to get the first value as a String
405: * @param token the key of the value to retrieve
406: */
407: public Vector Values(String token) {
408: Vector tmp = (Vector) multitokens.get(token);
409: return tmp;
410: }
411:
412: /**
413: * Returns the original parsed line
414: * @param token unused
415: */
416: public String ValuesString(String token) {
417: return startline;
418: }
419:
420: /**
421: * Returns the first value as a <code>String</code>.
422: * In case of a single value, it returns that value. In case of multiple values,
423: * it returns the
424: * Use {@link #get} to get the list of values as a <code>String</code><br />
425: * Use {@link #Values} to get a list of multi-values as a <code>Vector</code>.<br />
426: * @param token the key of the value to retrieve
427: */
428: public String Value(String token) {
429: String val;
430: Vector tmp = (Vector) multitokens.get(token);
431: if (tmp != null && tmp.size() > 0) {
432: val = (String) tmp.elementAt(0);
433: if (val != null) {
434: val = Strip.DoubleQuote(val, Strip.BOTH); // added stripping daniel
435: return val;
436: } else {
437: return null;
438: }
439: } else {
440: return null;
441: }
442: }
443:
444: /**
445: * Manually sets a single value.
446: */
447: public void setValue(String token, String val) {
448: Vector newval = new Vector();
449: newval.addElement(val);
450: tokens.put(token, newval);
451: multitokens.put(token, newval);
452: }
453:
454: /**
455: * Manually sets a multi-value value.
456: */
457: public void setValues(String token, Vector values) {
458: tokens.put(token, values.toString());
459: multitokens.put(token, values);
460: }
461:
462: /**
463: * For testing
464: */
465: public static void main(String args[]) {
466: StringTagger tag = new StringTagger(args[0]);
467: }
468:
469: }
|