0001: /*
0002: * AbstractTokenizerProperties.java: Core implementation of TokenizerProperties
0003: *
0004: * Copyright (C) 2003 Heiko Blau
0005: *
0006: * This file belongs to the JTopas Library.
0007: * JTopas is free software; you can redistribute it and/or modify it
0008: * under the terms of the GNU Lesser General Public License as published by the
0009: * Free Software Foundation; either version 2.1 of the License, or (at your
0010: * option) any later version.
0011: *
0012: * This software is distributed in the hope that it will be useful, but WITHOUT
0013: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0014: * FITNESS FOR A PARTICULAR PURPOSE.
0015: * See the GNU Lesser General Public License for more details.
0016: *
0017: * You should have received a copy of the GNU Lesser General Public License along
0018: * with JTopas. If not, write to the
0019: *
0020: * Free Software Foundation, Inc.
0021: * 59 Temple Place, Suite 330,
0022: * Boston, MA 02111-1307
0023: * USA
0024: *
0025: * or check the Internet: http://www.fsf.org
0026: *
0027: * Contact:
0028: * email: heiko@susebox.de
0029: */
0030:
0031: package de.susebox.jtopas;
0032:
0033: //-----------------------------------------------------------------------------
0034: // Imports
0035: //
0036: import java.lang.ref.WeakReference;
0037: import java.util.Iterator;
0038: import java.util.LinkedList;
0039: import java.util.Map;
0040: import java.util.TreeMap;
0041:
0042: import de.susebox.java.lang.ExtIllegalArgumentException;
0043:
0044: //-----------------------------------------------------------------------------
0045: // Class AbstractTokenizerProperties
0046: //
0047:
0048: /**<p>
0049: * The class <code>AbstractTokenizerProperties</code> provides the skeleton for
0050: * implementations of the {@link TokenizerProperties} interface. It leaves only
0051: * the more general methods to these implementations. For instance, the simple
0052: * method {@link #addKeyword(String)} can and should call the more complex
0053: * method {@link #addKeyword(String, String)} with the second parameter
0054: * <code>null</code>. That method in turn should call
0055: * {@link #addKeyword(String, String, int)} etc.
0056: *</p>
0057: *
0058: * @see TokenizerProperties
0059: * @see Tokenizer
0060: * @author Heiko Blau
0061: */
0062: public abstract class AbstractTokenizerProperties implements
0063: TokenizerProperties {
0064:
0065: //---------------------------------------------------------------------------
0066: // Abstract methods
0067: //
0068:
0069: /**
0070: * This method must be implemented by derived classes to register a
0071: * {@link TokenizerProperty}. When called, the given <code>property</code>
0072: * has already been checked for not being <code>null</code> or incomplete
0073: * (no leading image).
0074: *<br>
0075: * If the property to register is already known, perhaps with a different
0076: * type, different flags or a different companion, it is replaced and returned.
0077: *<br>
0078: * The notification of the registered {@link TokenizerPropertyListener} is
0079: * done by this abstract class, the implementations must not do it themselves.
0080: *<br>
0081: * The method is called in a thread-safe way. That means only one thread can
0082: * enter the method at a given time.
0083: *
0084: * @param property a non-null, complete token description
0085: * @return the old, replaced property or <code>null</code>
0086: */
0087: protected abstract TokenizerProperty doAddProperty(
0088: TokenizerProperty property);
0089:
0090: /**
0091: * This method must be implemented by derived classes to deregister a
0092: * {@link TokenizerProperty}. When called, the given <code>property</code>
0093: * has already been checked for not being <code>null</code> or incomplete
0094: * (no leading image).
0095: *<br>
0096: * According to the {@link TokenizerProperties} interface specification of the
0097: * {@link TokenizerProperties#removeProperty} method, this method does nothing
0098: * if the given property is unknown. In this case <code>null</code> is returned.
0099: *<br>
0100: * Otherwise the removed property is returned.
0101: *<br>
0102: * The notification of the registered {@link TokenizerPropertyListener} is
0103: * done by this abstract class, the implementations must not do it themselves.
0104: *<br>
0105: * The method is called in a thread-safe way. That means only one thread can
0106: * enter the method at a given time.
0107: *
0108: * @param property a non-null, complete token description
0109: * @return the removed property or <code>null</code>
0110: */
0111: protected abstract TokenizerProperty doRemoveProperty(
0112: TokenizerProperty property);
0113:
0114: /**
0115: * This method must be implemented by derived classes to retrieve the
0116: * {@link TokenizerProperty} for the given image. When called, the given
0117: * <code>image</code> has already been checked for not being <code>null</code>.
0118: *<br>
0119: * According to the {@link TokenizerProperties} interface specification of the
0120: * various get methods, this method must return <code>null</code> if there is
0121: * no property that matches the given image.
0122: *<br>
0123: * The method is called in a thread-safe way. That means only one thread can
0124: * enter the method at a given time.
0125: *
0126: * @param type the type the returned property should have
0127: * @param startImage the (starting) image
0128: * @return the token description for the image or <code>null</code>
0129: */
0130: protected abstract TokenizerProperty doGetProperty(int type,
0131: String startImage);
0132:
0133: /**
0134: * This method must be implemented by derived classes to register the given
0135: * simple whitespaces. When called, the calling method has already ensured that
0136: * the parameter is not <code>null</code> (but could be empty) and uppercase
0137: * if the {@link Flags#F_NO_CASE} flag is set.
0138: *<br>
0139: * The method should return the old whitespace set. When first called, this
0140: * is the default whitespace set. It is ok to return <code>null</code> if the
0141: * old set is empty.
0142: *<br>
0143: * The notification of the registered {@link TokenizerPropertyListener} is
0144: * done by this abstract class, the implementations must not do it themselves.
0145: *<br>
0146: * The method is called in a thread-safe way. That means only one thread can
0147: * enter the method at a given time.
0148: *
0149: * @param whitespaces the new whitespace set
0150: * @return the old whitespace set
0151: */
0152: protected abstract String doSetWhitespaces(String whitespaces);
0153:
0154: /**
0155: * This method must be implemented by derived classes to set the given
0156: * simple separators. When called, the calling method
0157: * {@link #setSeparators} has already ensured that the parameter is not
0158: * <code>null</code> (but empty) and uppercase if the {@link Flags#F_NO_CASE}
0159: * flag is set.
0160: *<br>
0161: * The method should return the old separator set. When first called, this
0162: * is the default separator set. It is ok to return <code>null</code> if the
0163: * old set is empty.
0164: *<br>
0165: * The notification of the registered {@link TokenizerPropertyListener} is
0166: * done by this abstract class, the implementations must not do it themselves.
0167: *<br>
0168: * The method is called in a thread-safe way. That means only one thread can
0169: * enter the method at a given time.
0170: *
0171: * @param separators the new separator set
0172: * @return the old separator set
0173: */
0174: protected abstract String doSetSeparators(String separators);
0175:
0176: //---------------------------------------------------------------------------
0177: // Methods of the TokenizerProperties interface
0178: //
0179:
0180: /**
0181: * See the method description in {@link TokenizerProperties}.
0182: *
0183: * @param flags the parser control flags
0184: * @see #getParseFlags
0185: */
0186: public void setParseFlags(int flags) {
0187: // normalize flags
0188: flags = normalizeFlags(flags, flags);
0189:
0190: // set flags
0191: synchronized (this ) {
0192: int oldFlags = _flags;
0193:
0194: _flags = flags;
0195: if (oldFlags != _flags) {
0196: notifyListeners(new TokenizerPropertyEvent(
0197: TokenizerPropertyEvent.PROPERTY_MODIFIED,
0198: new TokenizerProperty(
0199: TokenizerProperty.PARSE_FLAG_MASK,
0200: new String[] { Integer
0201: .toBinaryString(_flags) }),
0202: new TokenizerProperty(
0203: TokenizerProperty.PARSE_FLAG_MASK,
0204: new String[] { Integer
0205: .toBinaryString(oldFlags) })));
0206: }
0207: }
0208: }
0209:
0210: /**
0211: * See the method description in {@link TokenizerProperties}.
0212: *
0213: * @return the current parser control flags
0214: * @see #setParseFlags
0215: */
0216: public int getParseFlags() {
0217: return _flags;
0218: }
0219:
0220: /**
0221: * Returns <code>true</code> if a given flag is set in the current parse flags.
0222: * If the parameter contains more than one bit the method returns only
0223: * <code>true</code> if all bits are set.
0224: *
0225: * @param flag the flag to test
0226: * @return <code>true</code> if all bits in flag are set.
0227: * @see #setParseFlags
0228: */
0229: public boolean isFlagSet(int flag) {
0230: return (_flags & flag) == flag;
0231: }
0232:
0233: /**
0234: * Checks if a given flag (see the constants in {@link Flags}) is set for the
0235: * given {@link TokenizerProperty} in the context of this <code>TokenizerProperties</code>
0236: * instance.
0237: *
0238: * @param prop the {@link TokenizerProperty} concerned
0239: * @param flag the flag to check (may contain more than one bit)
0240: * @return <code>true</code> if the flag is set either explicit in the property
0241: * or globally for this <code>TokenizerProperties</code> object,
0242: * <code>false</code> otherwise
0243: * @throws NullPointerException if no property is given
0244: */
0245: public boolean isFlagSet(TokenizerProperty prop, int flag)
0246: throws NullPointerException {
0247: return prop.isFlagSet(flag, isFlagSet(flag));
0248: }
0249:
0250: //---------------------------------------------------------------------------
0251: // simple whitespaces and separators
0252: //
0253:
0254: /**
0255: * Setting the whitespace character set of the tokenizer.
0256: * See the method description in {@link TokenizerProperties}.
0257: *<br>
0258: * This method calls the abstract method {@link #doSetWhitespaces}. It guaranties
0259: * that the parameter passed to <code>doSetWhitespaces</code> is not null
0260: * (instead empty) and uppercase if the Flag {@link Flags#F_NO_CASE}
0261: * is set.
0262: *
0263: * @param whitespaces the whitespace set
0264: */
0265: public void setWhitespaces(String whitespaces) {
0266: // normalize whitespaces
0267: String newValue = (whitespaces != null) ? whitespaces : "";
0268: String oldValue;
0269:
0270: if ((_flags & Flags.F_NO_CASE) != 0) {
0271: newValue = newValue.toUpperCase();
0272: }
0273:
0274: // set new whitespaces
0275: synchronized (this ) {
0276: oldValue = doSetWhitespaces(newValue);
0277:
0278: // notify listeners
0279: handleEvent(Token.WHITESPACE, newValue, oldValue);
0280: }
0281: }
0282:
0283: /**
0284: * Adding new whitespaces to the existing set.
0285: *
0286: * @param additional whitespaces for the whitespace set
0287: * @throws IllegalArgumentException when <code>null</code> is passed or incomplete
0288: * ranges are specified (e.g. <code>"a-"</code>)
0289: */
0290: public void addWhitespaces(String whitespaces)
0291: throws IllegalArgumentException {
0292: try {
0293: if (whitespaces.length() > 0) {
0294: setWhitespaces(mergeSet(getWhitespaces(), whitespaces,
0295: false));
0296: }
0297: } catch (NullPointerException ex) {
0298: throw new IllegalArgumentException();
0299: }
0300: }
0301:
0302: /**
0303: * Removing whitespaces from the existing set.
0304: *
0305: * @param whitespaces whitespaces to remove from the whitespace set
0306: * @throws IllegalArgumentException when <code>null</code> is passed or incomplete
0307: * ranges are specified (e.g. <code>"a-"</code>)
0308: */
0309: public void removeWhitespaces(String whitespaces)
0310: throws IllegalArgumentException {
0311: try {
0312: if (whitespaces.length() > 0) {
0313: setWhitespaces(mergeSet(getWhitespaces(), whitespaces,
0314: true));
0315: }
0316: } catch (NullPointerException ex) {
0317: throw new IllegalArgumentException();
0318: }
0319: }
0320:
0321: /**
0322: * Setting the separator character set of the tokenizer.
0323: * See the method description in {@link TokenizerProperties}.
0324: *<br>
0325: * This method calls the abstract method {@link #doSetSeparators}. It guaranties
0326: * that the parameter passed to <code>doSetSeparators</code> is not null
0327: * (instead empty) and uppercase if the Flag {@link Flags#F_NO_CASE}
0328: * is set.
0329: *
0330: * @param separators the separator set
0331: */
0332: public void setSeparators(String separators) {
0333: // normalize separators
0334: String newValue = (separators != null) ? separators : "";
0335: String oldValue;
0336:
0337: if ((_flags & Flags.F_NO_CASE) != 0) {
0338: newValue = newValue.toUpperCase();
0339: }
0340:
0341: // set new separator set
0342: synchronized (this ) {
0343: oldValue = doSetSeparators(newValue);
0344:
0345: // notify listeners
0346: handleEvent(Token.SEPARATOR, newValue, oldValue);
0347: }
0348: }
0349:
0350: /**
0351: * Adding new separators to the existing set.
0352: *
0353: * @param separators additional separators for the separator set
0354: * @throws IllegalArgumentException when <code>null</code> is passed or incomplete
0355: * ranges are specified (e.g. <code>"a-"</code>)
0356: */
0357: public void addSeparators(String separators)
0358: throws IllegalArgumentException {
0359: try {
0360: if (separators.length() > 0) {
0361: setSeparators(mergeSet(getSeparators(), separators,
0362: false));
0363: }
0364: } catch (NullPointerException ex) {
0365: throw new IllegalArgumentException();
0366: }
0367: }
0368:
0369: /**
0370: * Removing separators from the existing set.
0371: *
0372: * @param separators separating characters to remove from the separator set
0373: * @throws IllegalArgumentException when <code>null</code> is passed or incomplete
0374: * ranges are specified (e.g. <code>"a-"</code>)
0375: */
0376: public void removeSeparators(String separators)
0377: throws IllegalArgumentException {
0378: try {
0379: if (separators.length() > 0) {
0380: setSeparators(mergeSet(getSeparators(), separators,
0381: true));
0382: }
0383: } catch (NullPointerException ex) {
0384: throw new IllegalArgumentException();
0385: }
0386: }
0387:
0388: //---------------------------------------------------------------------------
0389: // string properties
0390: //
0391:
0392: /**
0393: * Registering a string description. See the method description in the interface
0394: * {@link TokenizerProperties}.
0395: *
0396: * @param start the starting sequence of a string
0397: * @param end the finishing sequence of a string
0398: * @param escape the escape sequence inside the string
0399: * @throws IllegalArgumentException when <code>null</code> or an empty string
0400: * is passed for start or end
0401: * @see #removeString
0402: * @see #addString(String, String, String, Object)
0403: */
0404: public void addString(String start, String end, String escape)
0405: throws IllegalArgumentException {
0406: addString(start, end, escape, null);
0407: }
0408:
0409: /**
0410: * Registering a the sequences that are used for string-like text parts.
0411: * See the method description in {@link TokenizerProperties}.
0412: *
0413: * @param start the starting sequence of a string
0414: * @param end the finishing sequence of a string
0415: * @param escape the escape sequence inside the string
0416: * @param companion the associated information
0417: * @throws IllegalArgumentException when <code>null</code> or an empty string is passed for start or end
0418: *
0419: */
0420: public void addString(String start, String end, String escape,
0421: Object companion) throws IllegalArgumentException {
0422: addString(start, end, escape, companion, getParseFlags());
0423: }
0424:
0425: /**
0426: * Registering a the sequences that are used for string-like text parts.
0427: * See the method description in {@link TokenizerProperties}.
0428: *
0429: * @param start the starting sequence of a string
0430: * @param end the finishing sequence of a string
0431: * @param escape the escape sequence inside the string
0432: * @param companion the associated information
0433: * @param flags modification flags
0434: * @throws IllegalArgumentException when <code>null</code> or an empty string is passed for start or end
0435: */
0436: public void addString(String start, String end, String escape,
0437: Object companion, int flags)
0438: throws IllegalArgumentException {
0439: addString(start, end, escape, companion, flags, flags);
0440: }
0441:
0442: /**
0443: * Registering a string with a set of flags and an associated flag mask.
0444: *
0445: * @param start the starting sequence of a string
0446: * @param end the finishing sequence of a string
0447: * @param escape the escape sequence inside the string
0448: * @param companion the associated information
0449: * @param flags modification flags
0450: * @param flagMask flags that have valid values in the parameter <code>flags</code>
0451: * @throws IllegalArgumentException when <code>null</code> or an empty string
0452: * is passed for keyword
0453: */
0454: public void addString(String start, String end, String escape,
0455: Object companion, int flags, int flagMask)
0456: throws IllegalArgumentException {
0457: addProperty(new TokenizerProperty(Token.STRING, new String[] {
0458: start, end, escape }, companion, flags, flagMask));
0459: }
0460:
0461: /**
0462: * Removing a string description.
0463: * See the method description in {@link TokenizerProperties}.
0464: *
0465: * @param start the starting sequence of a string
0466: * @throws IllegalArgumentException when <code>null</code> or an empty string is passed for start
0467: */
0468: public void removeString(String start)
0469: throws IllegalArgumentException {
0470: TokenizerProperty prop = getString(start);
0471:
0472: if (prop != null) {
0473: removeProperty(prop);
0474: }
0475: }
0476:
0477: /**
0478: * Retrieving the information associated with a certain string. See the method
0479: * description in {@link TokenizerProperties}.
0480: *
0481: * @param start the starting sequence of a string
0482: * @return the associated information or <code>null</code>
0483: * @throws IllegalArgumentException when <code>null</code> or an empty string is passed for start
0484: */
0485: public Object getStringCompanion(String start)
0486: throws IllegalArgumentException {
0487: TokenizerProperty prop = getString(start);
0488:
0489: if (prop != null) {
0490: return prop.getCompanion();
0491: } else {
0492: return null;
0493: }
0494: }
0495:
0496: /**
0497: * Checks if the given starting sequence of the string is known to the parser.
0498: * See the method description in {@link TokenizerProperties}.
0499: *
0500: * @param start the starting sequence of a string
0501: * @return <code>true</code> if the string is registered,
0502: * <code>false</code> otherwise
0503: */
0504: public boolean stringExists(String start) {
0505: try {
0506: return getString(start) != null;
0507: } catch (IllegalArgumentException ex) {
0508: return false;
0509: }
0510: }
0511:
0512: /**
0513: * Get the full description of a string property.
0514: * See the method description in {@link TokenizerProperties}.
0515: *
0516: * @param start the starting sequence of a string
0517: * @return the full string description or <code>null</code>
0518: * @throws IllegalArgumentException if the given keyword is empty or null
0519: */
0520: public TokenizerProperty getString(String start)
0521: throws IllegalArgumentException {
0522: // check parameter
0523: checkArgument(start, "String");
0524:
0525: // get the real thing
0526: synchronized (this ) {
0527: return doGetProperty(Token.STRING, start);
0528: }
0529: }
0530:
0531: //---------------------------------------------------------------------------
0532: // line comment properties
0533: //
0534:
0535: /**
0536: * Registering a the starting sequence of a line comment.
0537: * See the method description in {@link TokenizerProperties}.
0538: *
0539: * @param lineComment the starting sequence of the line comment
0540: * @throws IllegalArgumentException when <code>null</code> or an empty string is passed for start sequence of the line comment
0541: */
0542: public void addLineComment(String lineComment)
0543: throws IllegalArgumentException {
0544: addLineComment(lineComment, null);
0545: }
0546:
0547: /**
0548: * Registering a the starting sequence of a line comment.
0549: *
0550: * See the method description in {@link TokenizerProperties}.
0551: * @param lineComment the starting sequence of a line comment
0552: * @param companion the associated information
0553: * @throws IllegalArgumentException when <code>null</code> or an empty string is passed for start sequence of the line comment
0554: */
0555: public void addLineComment(String lineComment, Object companion)
0556: throws IllegalArgumentException {
0557: addLineComment(lineComment, companion, getParseFlags());
0558: }
0559:
0560: /**
0561: * Registering a the starting sequence of a line comment.
0562: * See the method description in {@link TokenizerProperties}.
0563: *
0564: * @param lineComment the starting sequence of a line comment
0565: * @param companion the associated information
0566: * @param flags modification flags
0567: * @throws IllegalArgumentException when <code>null</code> or an empty string
0568: * is passed for start sequence of the line comment
0569: */
0570: public void addLineComment(String lineComment, Object companion,
0571: int flags) throws IllegalArgumentException {
0572: addLineComment(lineComment, companion, flags, flags);
0573: }
0574:
0575: /**
0576: * Registering a line comment with a set of flags and an associated flag mask.
0577: *
0578: * @param lineComment the starting sequence of a line comment
0579: * @param companion the associated information
0580: * @param flags modification flags
0581: * @param flagMask flags that have valid values in the parameter <code>flags</code>
0582: * @throws IllegalArgumentException when <code>null</code> or an empty string
0583: * is passed for keyword
0584: */
0585: public void addLineComment(String lineComment, Object companion,
0586: int flags, int flagMask) throws IllegalArgumentException {
0587: addProperty(new TokenizerProperty(Token.LINE_COMMENT,
0588: new String[] { lineComment }, companion, flags,
0589: flagMask));
0590: }
0591:
0592: /**
0593: * Removing a certain line comment.
0594: * See the method description in {@link TokenizerProperties}.
0595: *
0596: * @param lineComment the starting sequence of the line comment
0597: * @throws IllegalArgumentException when <code>null</code> or an empty string is passed for start sequence of the line comment
0598: */
0599: public void removeLineComment(String lineComment)
0600: throws IllegalArgumentException {
0601: TokenizerProperty prop = getLineComment(lineComment);
0602:
0603: if (prop != null) {
0604: removeProperty(prop);
0605: }
0606: }
0607:
0608: /**
0609: * Retrieving the associated object of a certain line comment.
0610: * See the method description in {@link TokenizerProperties}.
0611: *
0612: * @param lineComment the starting sequence of the line comment
0613: * @return the object associated with the line comment
0614: * @throws IllegalArgumentException when <code>null</code> or an empty string is passed for start sequence of the line comment
0615: */
0616: public Object getLineCommentCompanion(String lineComment)
0617: throws IllegalArgumentException {
0618: TokenizerProperty prop = getLineComment(lineComment);
0619:
0620: if (prop != null) {
0621: return prop.getCompanion();
0622: } else {
0623: return null;
0624: }
0625: }
0626:
0627: /**
0628: * Checks if the give line comment is known.
0629: * See the method description in {@link TokenizerProperties}.
0630: *
0631: * @param lineComment the starting sequence of the line comment
0632: * @return <code>true</code> if the line comment is known,
0633: * <code>false</code> otherwise
0634: */
0635: public boolean lineCommentExists(String lineComment) {
0636: try {
0637: return getLineComment(lineComment) != null;
0638: } catch (IllegalArgumentException ex) {
0639: return false;
0640: }
0641: }
0642:
0643: /**
0644: * Get the full description of a line comment property.
0645: * See the method description in {@link TokenizerProperties}.
0646: *
0647: * @param lineComment the starting sequence of the line comment
0648: * @return the full line comment description or <code>null</code>
0649: * @throws IllegalArgumentException if the given image is empty or null
0650: */
0651: public TokenizerProperty getLineComment(String lineComment)
0652: throws IllegalArgumentException {
0653: // check parameter
0654: checkArgument(lineComment, "Line comment");
0655:
0656: // get the real thing
0657: synchronized (this ) {
0658: return doGetProperty(Token.LINE_COMMENT, lineComment);
0659: }
0660: }
0661:
0662: //---------------------------------------------------------------------------
0663: // block comment properties
0664: //
0665:
0666: /**
0667: * Registering a block comment. See the method description in
0668: * {@link TokenizerProperties}.
0669: *
0670: * @param start the starting sequence of the block comment
0671: * @param end the finishing sequence of the block comment
0672: * @throws IllegalArgumentException when <code>null</code> or an empty string
0673: * is passed for start / end sequence of the block comment
0674: */
0675: public void addBlockComment(String start, String end)
0676: throws IllegalArgumentException {
0677: addBlockComment(start, end, null);
0678: }
0679:
0680: /**
0681: * Registering a block comment.
0682: * See the method description in {@link TokenizerProperties}.
0683: *
0684: * @param start the starting sequence of the block comment
0685: * @param end the finishing sequence of the block comment
0686: * @param companion information object associated with this block comment
0687: * @throws IllegalArgumentException when <code>null</code> or an empty string
0688: * is passed for start / end sequence of the block comment
0689: */
0690: public void addBlockComment(String start, String end,
0691: Object companion) throws IllegalArgumentException {
0692: addBlockComment(start, end, companion, getParseFlags());
0693: }
0694:
0695: /**
0696: * Registering a block comment.
0697: * See the method description in {@link TokenizerProperties}.
0698: *
0699: * @param start the starting sequence of the block comment
0700: * @param end the finishing sequence of the block comment
0701: * @param companion information object associated with this block comment
0702: * @param flags modification flags
0703: * @throws IllegalArgumentException when <code>null</code> or an empty string
0704: * is passed for start / end sequence of the block comment
0705: */
0706: public void addBlockComment(String start, String end,
0707: Object companion, int flags)
0708: throws IllegalArgumentException {
0709: addBlockComment(start, end, companion, flags, flags);
0710: }
0711:
0712: /**
0713: * Registering a block comment with a set of flags and an associated flag mask.
0714: *
0715: * @param start the starting sequence of the block comment
0716: * @param end the finishing sequence of the block comment
0717: * @param companion information object associated with this block comment
0718: * @param flags modification flags
0719: * @param flagMask flags that have valid values in the parameter <code>flags</code>
0720: * @throws IllegalArgumentException when <code>null</code> or an empty string
0721: * is passed for keyword
0722: */
0723: public void addBlockComment(String start, String end,
0724: Object companion, int flags, int flagMask)
0725: throws IllegalArgumentException {
0726: addProperty(new TokenizerProperty(Token.BLOCK_COMMENT,
0727: new String[] { start, end }, companion, flags, flagMask));
0728: }
0729:
0730: /**
0731: * Removing a certain block comment.
0732: * See the method description in {@link TokenizerProperties}.
0733: *
0734: * @param start the starting sequence of the block comment
0735: * @throws IllegalArgumentException when <code>null</code> or an empty string
0736: * is passed for start sequence of the block comment
0737: */
0738: public void removeBlockComment(String start)
0739: throws IllegalArgumentException {
0740: TokenizerProperty prop = getBlockComment(start);
0741:
0742: if (prop != null) {
0743: removeProperty(prop);
0744: }
0745: }
0746:
0747: /**
0748: * Retrieving a certain block comment.
0749: * See the method description in {@link TokenizerProperties}.
0750: *
0751: * @param start the starting sequence of the block comment
0752: * @return the associated object of the block comment
0753: * @throws IllegalArgumentException when <code>null</code> or an empty string
0754: * is passed for start sequence of the block comment
0755: */
0756: public Object getBlockCommentCompanion(String start)
0757: throws IllegalArgumentException {
0758: TokenizerProperty prop = getBlockComment(start);
0759:
0760: if (prop != null) {
0761: return prop.getCompanion();
0762: } else {
0763: return null;
0764: }
0765: }
0766:
0767: /**
0768: * Checks if the given block comment is known.
0769: * See the method description in {@link TokenizerProperties}.
0770: *
0771: * @param start the starting sequence of the block comment
0772: * @return <code>true</code> if the block comment is known,
0773: * <code>false</code> otherwise
0774: */
0775: public boolean blockCommentExists(String start) {
0776: try {
0777: return getBlockComment(start) != null;
0778: } catch (IllegalArgumentException ex) {
0779: return false;
0780: }
0781: }
0782:
0783: /**
0784: * Get the full description of a block comment property.
0785: * See the method description in {@link TokenizerProperties}.
0786: *
0787: * @param start the starting sequence of the block comment
0788: * @return the full block comment description or <code>null</code>
0789: * @throws IllegalArgumentException if the given image is empty or null
0790: */
0791: public TokenizerProperty getBlockComment(String start)
0792: throws IllegalArgumentException {
0793: // check parameter
0794: checkArgument(start, "Block comment");
0795:
0796: // get the real thing
0797: synchronized (this ) {
0798: return doGetProperty(Token.BLOCK_COMMENT, start);
0799: }
0800: }
0801:
0802: //---------------------------------------------------------------------------
0803: // special sequence properties
0804: //
0805:
0806: /**
0807: * Registering a special sequence of characters.
0808: * See the method description in {@link TokenizerProperties}.
0809: *
0810: * @param specSeq special sequence to register
0811: * @throws IllegalArgumentException if the given sequence is empty or null
0812: * @see #addKeyword
0813: * @see #setSeparators
0814: */
0815: public void addSpecialSequence(String specSeq)
0816: throws IllegalArgumentException {
0817: addSpecialSequence(specSeq, null);
0818: }
0819:
0820: /**
0821: * Registering a special sequence of characters.
0822: * See the method description in {@link TokenizerProperties}.
0823: *
0824: * @param specSeq special sequence to register
0825: * @param companion information object associated with this special sequence
0826: * @throws IllegalArgumentException if the given sequence is empty or null
0827: * @see #addKeyword
0828: * @see #setSeparators
0829: */
0830: public void addSpecialSequence(String specSeq, Object companion)
0831: throws IllegalArgumentException {
0832: addSpecialSequence(specSeq, companion, getParseFlags());
0833: }
0834:
0835: /**
0836: * Registering a special sequence of characters.
0837: * See the method description in {@link TokenizerProperties}.
0838: *
0839: * @param specSeq special sequence to register
0840: * @param companion information object associated with this special sequence
0841: * @param flags modification flags
0842: * @throws IllegalArgumentException if the given sequence is empty or null
0843: * @see #addKeyword
0844: * @see #setSeparators
0845: */
0846: public void addSpecialSequence(String specSeq, Object companion,
0847: int flags) throws IllegalArgumentException {
0848: addSpecialSequence(specSeq, companion, flags, flags);
0849: }
0850:
0851: /**
0852: * Registering a special sequence with a set of flags and an associated flag mask.
0853: *
0854: * @param specSeq special sequence to register
0855: * @param companion information object associated with this special sequence
0856: * @param flags modification flags
0857: * @param flagMask flags that have valid values in the parameter <code>flags</code>
0858: * @throws IllegalArgumentException when <code>null</code> or an empty string
0859: * is passed for keyword
0860: */
0861: public void addSpecialSequence(String specSeq, Object companion,
0862: int flags, int flagMask) throws IllegalArgumentException {
0863: addProperty(new TokenizerProperty(Token.SPECIAL_SEQUENCE,
0864: new String[] { specSeq }, companion, flags, flagMask));
0865: }
0866:
0867: /**
0868: * Deregistering a special sequence from the parser.
0869: * See the method description in {@link TokenizerProperties}.
0870: *
0871: * @param specSeq sequence to remove
0872: * @throws IllegalArgumentException if the given sequence is empty or null
0873: */
0874: public void removeSpecialSequence(String specSeq)
0875: throws IllegalArgumentException {
0876: TokenizerProperty prop = getSpecialSequence(specSeq);
0877:
0878: if (prop != null) {
0879: removeProperty(prop);
0880: }
0881: }
0882:
0883: /**
0884: * Retrieving the companion of the given special sequence.
0885: * See the method description in {@link TokenizerProperties}.
0886: *
0887: * @param specSeq sequence to remove
0888: * @return the object associated with the special sequence
0889: * @throws IllegalArgumentException if the given sequence is empty or null
0890: */
0891: public Object getSpecialSequenceCompanion(String specSeq)
0892: throws IllegalArgumentException {
0893: TokenizerProperty prop = getSpecialSequence(specSeq);
0894:
0895: if (prop != null) {
0896: return prop.getCompanion();
0897: } else {
0898: return null;
0899: }
0900: }
0901:
0902: /**
0903: * Checks if the given special sequence is known to the <code>Tokenizer</code>.
0904: * See the method description in {@link TokenizerProperties}.
0905: *
0906: * @param specSeq sequence to check
0907: * @return <code>true</code> if the block comment is known,
0908: * <code>false</code> otherwise
0909: */
0910: public boolean specialSequenceExists(String specSeq) {
0911: try {
0912: return getSpecialSequence(specSeq) != null;
0913: } catch (IllegalArgumentException ex) {
0914: return false;
0915: }
0916: }
0917:
0918: /**
0919: * Get the full description of a special sequence property.
0920: * See the method description in {@link TokenizerProperties}.
0921: *
0922: * @param specSeq sequence to search
0923: * @return the full sequence description or <code>null</code>
0924: * @throws IllegalArgumentException if the given keyword is empty or null
0925: */
0926: public TokenizerProperty getSpecialSequence(String specSeq)
0927: throws IllegalArgumentException {
0928: // check parameter
0929: checkArgument(specSeq, "Special sequence");
0930:
0931: // get the keyword
0932: synchronized (this ) {
0933: return doGetProperty(Token.SPECIAL_SEQUENCE, specSeq);
0934: }
0935: }
0936:
0937: //---------------------------------------------------------------------------
0938: // keyword properties
0939: //
0940:
0941: /**
0942: * Registering a keyword.
0943: * See the method description in {@link TokenizerProperties}.
0944: *
0945: * @param keyword keyword to register
0946: * @throws IllegalArgumentException if the given keyword is empty or null
0947: */
0948: public void addKeyword(String keyword)
0949: throws IllegalArgumentException {
0950: addKeyword(keyword, null);
0951: }
0952:
0953: /**
0954: * Registering a keyword.
0955: * See the method description in {@link TokenizerProperties}.
0956: *
0957: * @param keyword keyword to register
0958: * @param companion information object associated with this keyword
0959: * @throws IllegalArgumentException if the given keyword is empty or null
0960: */
0961: public void addKeyword(String keyword, Object companion)
0962: throws IllegalArgumentException {
0963: addKeyword(keyword, companion, getParseFlags());
0964: }
0965:
0966: /**
0967: * Registering a keyword.
0968: * See the method description in {@link TokenizerProperties}.
0969: *
0970: * @param keyword keyword to register
0971: * @param companion information object associated with this keyword
0972: * @throws IllegalArgumentException if the given keyword is empty or null
0973: */
0974: public void addKeyword(String keyword, Object companion, int flags)
0975: throws IllegalArgumentException {
0976: addKeyword(keyword, companion, flags, flags);
0977: }
0978:
0979: /**
0980: * Registering a keyword with a set of flags and an associated flag mask..
0981: *
0982: * @param keyword keyword to register
0983: * @param companion information object associated with this keyword
0984: * @param flags modification flags
0985: * @param flagMask flags that have valid values in the parameter <code>flags</code>
0986: * @throws IllegalArgumentException when <code>null</code> or an empty string
0987: * is passed for keyword
0988: */
0989: public void addKeyword(String keyword, Object companion, int flags,
0990: int flagMask) throws IllegalArgumentException {
0991: addProperty(new TokenizerProperty(Token.KEYWORD,
0992: new String[] { keyword }, companion, flags, flagMask));
0993: }
0994:
0995: /**
0996: * Deregistering a keyword.
0997: * See the method description in {@link TokenizerProperties}.
0998: *
0999: * @param keyword keyword to remove
1000: * @throws IllegalArgumentException if the given keyword is empty or null
1001: */
1002: public void removeKeyword(String keyword)
1003: throws IllegalArgumentException {
1004: TokenizerProperty prop = getKeyword(keyword);
1005:
1006: if (prop != null) {
1007: removeProperty(prop);
1008: }
1009: }
1010:
1011: /**
1012: * Retrieving the companion of the given keyword.
1013: * See the method description in {@link TokenizerProperties}.
1014: *
1015: * @param keyword keyword thats companion is sought
1016: * @return the object associated with the keyword
1017: * @throws IllegalArgumentException if the given keyword is empty or null
1018: */
1019: public Object getKeywordCompanion(String keyword)
1020: throws IllegalArgumentException {
1021: TokenizerProperty prop = getKeyword(keyword);
1022:
1023: if (prop != null) {
1024: return prop.getCompanion();
1025: } else {
1026: return null;
1027: }
1028: }
1029:
1030: /**
1031: * Checks if the given keyword is known to the <code>Tokenizer</code>.
1032: * See the method description in {@link TokenizerProperties}.
1033: *
1034: * @param keyword keyword to search
1035: * @return <code>true</code> if the keyword is known,
1036: * <code>false</code> otherwise
1037: */
1038: public boolean keywordExists(String keyword) {
1039: try {
1040: return getKeyword(keyword) != null;
1041: } catch (IllegalArgumentException ex) {
1042: return false;
1043: }
1044: }
1045:
1046: /**
1047: * Get the full description of a keyword property.
1048: * See the method description in {@link TokenizerProperties}.
1049: *
1050: * @param keyword keyword to search
1051: * @return the full keyword description or <code>null</code>
1052: * @throws IllegalArgumentException if the given keyword is empty or null
1053: */
1054: public TokenizerProperty getKeyword(String keyword)
1055: throws IllegalArgumentException {
1056: // check parameter
1057: checkArgument(keyword, "Keyword");
1058:
1059: // get the keyword
1060: synchronized (this ) {
1061: return doGetProperty(Token.KEYWORD, keyword);
1062: }
1063: }
1064:
1065: //---------------------------------------------------------------------------
1066: // pattern properties
1067: //
1068:
1069: /**
1070: * Registering a pattern. See the method description in {@link TokenizerProperties}.
1071: *
1072: * @param pattern the regular expression to be added
1073: * @throws IllegalArgumentException when <code>null</code> or an empty pattern
1074: * is passed
1075: * @see #removePattern
1076: * @see #addPattern(String, Object)
1077: * @see #addPattern(String, Object, int)
1078: */
1079: public void addPattern(String pattern)
1080: throws IllegalArgumentException {
1081: addPattern(pattern, null);
1082: }
1083:
1084: /**
1085: * Registering a pattern with an associated object. See the method description
1086: * in {@link TokenizerProperties}.
1087: *
1088: * @param pattern the regular expression to be added
1089: * @param companion information object associated with this pattern
1090: * @throws IllegalArgumentException when <code>null</code> or an empty pattern
1091: * is passed
1092: * @see #removePattern
1093: * @see #addPattern(String)
1094: * @see #addPattern(String, Object, int)
1095: */
1096: public void addPattern(String pattern, Object companion)
1097: throws IllegalArgumentException {
1098: addPattern(pattern, companion, getParseFlags());
1099: }
1100:
1101: /**
1102: * Registering a pattern with an associated object. See the method description
1103: * in {@link TokenizerProperties}.
1104: *
1105: * @param pattern the regular expression to be added
1106: * @param companion information object associated with this keyword
1107: * @param flags modification flags
1108: * @throws IllegalArgumentException when <code>null</code> or an empty pattern
1109: * is passed
1110: * @see #removePattern
1111: * @see #addPattern(String)
1112: * @see #addPattern(String, Object)
1113: */
1114: public void addPattern(String pattern, Object companion, int flags)
1115: throws IllegalArgumentException {
1116: addPattern(pattern, companion, flags, flags);
1117: }
1118:
1119: /**
1120: * Registering a pattern with an associated object and explicitely given flags.
1121: * See the description of the {@link #addPattern(String)} for details on pattern.
1122: *
1123: * @param pattern the regular expression to be added
1124: * @param companion information object associated with this keyword
1125: * @param flags values for modification flags
1126: * @param flagMask flags that have valid values in the parameter <code>flags</code>
1127: * @throws IllegalArgumentException when <code>null</code> or an empty pattern
1128: * is passed
1129: */
1130: public void addPattern(String pattern, Object companion, int flags,
1131: int flagMask) throws IllegalArgumentException {
1132: addProperty(new TokenizerProperty(Token.PATTERN,
1133: new String[] { pattern }, companion, flags, flagMask));
1134: }
1135:
1136: /**
1137: * Removing a pattern. See the method description in {@link TokenizerProperties}.
1138: *
1139: * @param pattern the regular expression to be removed
1140: * @throws IllegalArgumentException when <code>null</code> or an empty string
1141: * is passed
1142: */
1143: public void removePattern(String pattern)
1144: throws IllegalArgumentException {
1145: TokenizerProperty prop = getPattern(pattern);
1146:
1147: if (prop != null) {
1148: removeProperty(prop);
1149: }
1150: }
1151:
1152: /**
1153: * Retrieving the information associated with a given pattern. See the method
1154: * description in {@link TokenizerProperties}.
1155: *
1156: * @param pattern the regular expression to be removed
1157: * @return the associated information or <code>null</code>
1158: * @throws IllegalArgumentException when <code>null</code> or an emtpy pattern
1159: * is passed
1160: */
1161: public Object getPatternCompanion(String pattern)
1162: throws IllegalArgumentException {
1163: TokenizerProperty prop = getPattern(pattern);
1164:
1165: if (prop != null) {
1166: return prop.getCompanion();
1167: } else {
1168: return null;
1169: }
1170: }
1171:
1172: /**
1173: * Checks if the given pattern is known to the parser.
1174: * See the method description in {@link TokenizerProperties}.
1175: *
1176: * @param pattern the regular expression to be looked for
1177: * @return <code>true</code> if the pattern is registered,
1178: * <code>false</code> otherwise
1179: */
1180: public boolean patternExists(String pattern) {
1181: try {
1182: return getPattern(pattern) != null;
1183: } catch (IllegalArgumentException ex) {
1184: return false;
1185: }
1186: }
1187:
1188: /**
1189: * Get the full description of a string property starting with the given
1190: * prefix. The method returns <code>null</code> if the passed <code>start</code>
1191: * parameter cannot be mapped to a known string description ({@link #stringExists}
1192: * would return <code>false</code>).
1193: *
1194: * @param pattern the regular expression to be looked for
1195: * @return the full pattern description or <code>null</code>
1196: * @throws IllegalArgumentException when <code>null</code> or an emtpy pattern
1197: * is passed
1198: */
1199: public TokenizerProperty getPattern(String pattern)
1200: throws IllegalArgumentException {
1201: // check parameter
1202: checkArgument(pattern, "Pattern");
1203:
1204: // get the pattern
1205: synchronized (this ) {
1206: return doGetProperty(Token.PATTERN, pattern);
1207: }
1208: }
1209:
1210: //---------------------------------------------------------------------------
1211: // Common properties
1212: //
1213:
1214: /**
1215: * Registering a {@link TokenizerProperty}.
1216: * See the method description in {@link TokenizerProperties}.
1217: *
1218: * @param property property to register
1219: * @throws IllegalArgumentException when <code>null</code>, an incomplete or
1220: * otherwise unusable property is passed
1221: */
1222: public void addProperty(TokenizerProperty property)
1223: throws IllegalArgumentException {
1224: // check the parameter
1225: checkPropertyArgument(property);
1226:
1227: // check special cases
1228: String[] images = property.getImages();
1229:
1230: switch (property.getType()) {
1231: case Token.STRING:
1232: case Token.BLOCK_COMMENT:
1233: checkArgument((images.length < 2) ? null : images[1],
1234: "End sequence");
1235: break;
1236: }
1237:
1238: // add property according to type
1239: synchronized (this ) {
1240: TokenizerProperty oldProp = doAddProperty(property);
1241:
1242: if (oldProp == null) {
1243: notifyListeners(new TokenizerPropertyEvent(
1244: TokenizerPropertyEvent.PROPERTY_ADDED, property));
1245: } else if (!oldProp.equals(property)) {
1246: notifyListeners(new TokenizerPropertyEvent(
1247: TokenizerPropertyEvent.PROPERTY_MODIFIED,
1248: property, oldProp));
1249: }
1250: }
1251: }
1252:
1253: /**
1254: * Deregistering a {@link TokenizerProperty} from the store.
1255: * See the method description in {@link TokenizerProperties}.
1256: *
1257: * @param property property to register
1258: * @throws IllegalArgumentException when <code>null</code>, an incomplete or
1259: * otherwise unusable property is passed
1260: */
1261: public void removeProperty(TokenizerProperty property)
1262: throws IllegalArgumentException {
1263: // check the parameter
1264: checkPropertyArgument(property);
1265:
1266: // removing property according to type
1267: synchronized (this ) {
1268: TokenizerProperty removed = doRemoveProperty(property);
1269:
1270: if (removed != null) {
1271: notifyListeners(new TokenizerPropertyEvent(
1272: TokenizerPropertyEvent.PROPERTY_REMOVED,
1273: removed));
1274: }
1275: }
1276: }
1277:
1278: /**
1279: * Checks if the given {@link TokenizerProperty} is known.
1280: * See the method description in {@link TokenizerProperties}.
1281: *
1282: * @param property the property to search
1283: * @return <code>true</code> if the property is known,
1284: * <code>false</code> otherwise
1285: */
1286: public boolean propertyExists(TokenizerProperty property) {
1287: try {
1288: checkPropertyArgument(property);
1289: synchronized (this ) {
1290: return doGetProperty(property.getType(), property
1291: .getImages()[0]) != null;
1292: }
1293: } catch (IllegalArgumentException ex) {
1294: return false;
1295: }
1296: }
1297:
1298: //---------------------------------------------------------------------------
1299: // Methods of the DataMapper interface
1300: //
1301:
1302: /**
1303: * Registering a new {@link TokenizerPropertyListener}.
1304: * See the method description in {@link TokenizerProperties}.
1305: *
1306: * @param listener the new {@link TokenizerPropertyListener}
1307: * @see #removeTokenizerPropertyListener
1308: */
1309: public void addTokenizerPropertyListener(
1310: TokenizerPropertyListener listener) {
1311: if (listener != null) {
1312: synchronized (_listeners) {
1313: WeakReference ref = new WeakReference(listener);
1314: _listeners.add(ref);
1315: }
1316: }
1317: }
1318:
1319: /**
1320: * Removing a listener from the list of registered {@link TokenizerPropertyListener}
1321: * instances.
1322: * See the method description in {@link TokenizerProperties}.
1323: *
1324: * @param listener the {@link TokenizerPropertyListener} to deregister
1325: * @see #addTokenizerPropertyListener
1326: */
1327: public void removeTokenizerPropertyListener(
1328: TokenizerPropertyListener listener) {
1329: if (listener != null) {
1330: synchronized (_listeners) {
1331: Iterator iter = _listeners.iterator();
1332:
1333: while (iter.hasNext()) {
1334: WeakReference ref = (WeakReference) iter.next();
1335: Object elem = ref.get();
1336:
1337: if (elem == null) {
1338: // implicit cleanup
1339: iter.remove();
1340: } else if (listener.equals(elem)) {
1341: // found the real one
1342: iter.remove();
1343: break;
1344: }
1345: }
1346: }
1347: }
1348: }
1349:
1350: //---------------------------------------------------------------------------
1351: // Implementation
1352: //
1353:
1354: /**
1355: * Puts or removes all characters in the given set into or from a given
1356: * {@link java.util.Map}.
1357: *
1358: * @param map put the characters of the set into this map
1359: * @param set the character set to map
1360: * @param removeIt if <code>true</code> remove the characters of the set, otherwise add them
1361: * @throws IllegalArgumentException if the set contains incomplete ranges
1362: */
1363: private void mapCharacterSet(Map map, String set, boolean removeIt)
1364: throws IllegalArgumentException {
1365: for (int index = 0; index < set.length(); ++index) {
1366: char cc = set.charAt(index);
1367:
1368: switch (cc) {
1369: case '-':
1370: try {
1371: char start = set.charAt(index - 1);
1372: char end = set.charAt(index + 1);
1373: if (end == '\\') {
1374: end = set.charAt(index + 2);
1375: index += 2;
1376: } else {
1377: index++;
1378: }
1379: for (char rangeCC = start; rangeCC <= end; ++rangeCC) {
1380: if (removeIt) {
1381: map.remove(new Character(rangeCC));
1382: } else {
1383: map.put(new Character(rangeCC), null);
1384: }
1385: }
1386: } catch (Exception ex) {
1387: throw new IllegalArgumentException(set);
1388: }
1389: break;
1390:
1391: case '\\':
1392: index++;
1393: cc = set.charAt(index);
1394: /* no break; */
1395: default:
1396: if (index + 1 >= set.length()
1397: || set.charAt(index + 1) != '-') {
1398: if (removeIt) {
1399: map.remove(new Character(cc));
1400: } else {
1401: map.put(new Character(cc), null);
1402: }
1403: }
1404: }
1405: }
1406: }
1407:
1408: /**
1409: * Build the escape sequence for a character in a set if nessecary.
1410: *
1411: * @param cc the character to test
1412: * @return <code>true</code> if the given character must be escaped,
1413: * <code>false</code> otherwise
1414: */
1415: private boolean escapeChar(char cc) {
1416: switch (cc) {
1417: case '\\':
1418: case '-':
1419: return true;
1420: default:
1421: return false;
1422: }
1423: }
1424:
1425: /**
1426: * Add a character range to a string. The method checks if the given start
1427: * and end characters actually form a range.
1428: *
1429: * @param buffer add range to this buffer
1430: * @param rangeStart first character in range
1431: * @param rangeEnd last character in range
1432: */
1433: private void addRange(StringBuffer buffer, char rangeStart,
1434: char rangeEnd) {
1435: if (escapeChar(rangeStart)) {
1436: buffer.append('\\');
1437: }
1438: buffer.append((char) rangeStart);
1439: if (rangeStart < rangeEnd - 1) {
1440: buffer.append('-');
1441: }
1442: if (rangeStart != rangeEnd) {
1443: if (escapeChar(rangeEnd)) {
1444: buffer.append('\\');
1445: }
1446: buffer.append((char) rangeEnd);
1447: }
1448: }
1449:
1450: /**
1451: * Merges tho character set strings that may contain characters ranges like
1452: * "a-z". The result is united character set of both parameters.
1453: *
1454: * @param set1 first character set
1455: * @param set2 second character set
1456: * @param removeSet2 should the second set ber removed rather than added?
1457: * @return the characters of the first set + the characters of the second set
1458: * if not already present in the first set.
1459: * @throws IllegalArgumentException if the set contains incomplete ranges
1460: */
1461: private String mergeSet(String set1, String set2, boolean removeSet2)
1462: throws IllegalArgumentException {
1463: // merge the sets into a map
1464: TreeMap map = new TreeMap();
1465:
1466: mapCharacterSet(map, set1, false);
1467: mapCharacterSet(map, set2, removeSet2);
1468:
1469: // iterate through the map in a predefined order
1470: StringBuffer buffer = new StringBuffer(set1.length()
1471: + set2.length());
1472:
1473: if (map.size() > 0) {
1474: Iterator iter = map.keySet().iterator();
1475: char rangeStart = ((Character) map.firstKey()).charValue();
1476: char rangeEnd = rangeStart;
1477:
1478: while (iter.hasNext()) {
1479: char cc = ((Character) iter.next()).charValue();
1480:
1481: if (cc > rangeEnd + 1) {
1482: addRange(buffer, rangeStart, rangeEnd);
1483: rangeStart = rangeEnd = cc;
1484: } else {
1485: rangeEnd = cc;
1486: }
1487: }
1488: addRange(buffer, rangeStart, rangeEnd);
1489: }
1490:
1491: // ready
1492: return buffer.toString();
1493: }
1494:
1495: /**
1496: * Normalize flags. This is nessecary for the case-sensitivity flags
1497: * {@link Flags#F_CASE} and {@link Flags#F_NO_CASE}.
1498: * If neither <code>F_CASE</code> nor <code>F_NO_CASE</code> is set, <code>F_CASE</code>
1499: * is assumed. If both flags are set, <code>F_CASE</code> takes preceedence.
1500: *
1501: * @param flags not yet normalized flags
1502: * @param flagMask which flags should be handled
1503: * @return the normalized flags
1504: */
1505: private int normalizeFlags(int flags, int flagMask) {
1506: if ((flagMask & (Flags.F_CASE | Flags.F_NO_CASE)) == (Flags.F_CASE | Flags.F_NO_CASE)) {
1507: if ((flags & (Flags.F_CASE | Flags.F_NO_CASE)) == 0) {
1508: // none set: F_CASE is the default
1509: flags |= Flags.F_CASE;
1510: } else if ((flags & Flags.F_CASE) != 0) {
1511: // perhaps both set: F_CASE weights more
1512: flags &= ~Flags.F_NO_CASE;
1513: }
1514: }
1515: return flags;
1516: }
1517:
1518: /**
1519: * Checking a string parameter on null or emptiness. The method encapsulates
1520: * commonly used code (see {@link #addKeyword} or {@link #addSpecialSequence}
1521: * for example).
1522: *
1523: * @param arg the parameter to check
1524: * @param name a name for the <code>arg</code> parameter
1525: * @throws IllegalArgumentException if the given <code>arg</code> is null or empty
1526: */
1527: protected void checkArgument(String arg, String name)
1528: throws IllegalArgumentException {
1529: if (arg == null) {
1530: throw new ExtIllegalArgumentException("{0} is null.",
1531: new Object[] { name });
1532: } else if (arg.length() <= 0) {
1533: throw new ExtIllegalArgumentException("{0} is empty.",
1534: new Object[] { name });
1535: }
1536: }
1537:
1538: /**
1539: * Checking a {@link TokenizerProperty} parameter on null or missing nessecary
1540: * values. The method encapsulates commonly used code (see {@link #addProperty}
1541: * and {@link #removeProperty}).
1542: *
1543: * @param property the parameter to check
1544: * @throws IllegalArgumentException if the given <code>arg</code> is null or empty
1545: */
1546: protected void checkPropertyArgument(TokenizerProperty property)
1547: throws IllegalArgumentException {
1548: // check the parameter
1549: if (property == null) {
1550: throw new ExtIllegalArgumentException("Property is null.",
1551: null);
1552: } else if (property.getImages() == null) {
1553: throw new ExtIllegalArgumentException(
1554: "No image(s) given in property.", null);
1555: } else if (property.getImages()[0] == null) {
1556: throw new ExtIllegalArgumentException(
1557: "No (leading) image given in property.", null);
1558: }
1559: }
1560:
1561: /**
1562: * The method fires the nessecary events when whitespace or separator sets
1563: * change.
1564: *
1565: * @param type token type
1566: * @param newValue the newly set value
1567: * @param oldValue the old value with case-sensitive handling
1568: */
1569: protected void handleEvent(int type, String newValue,
1570: String oldValue) {
1571: if (newValue != null && newValue.length() > 0) {
1572: if (oldValue == null) {
1573: notifyListeners(new TokenizerPropertyEvent(
1574: TokenizerPropertyEvent.PROPERTY_ADDED,
1575: new TokenizerProperty(type,
1576: new String[] { newValue })));
1577: } else if (!oldValue.equals(newValue)) {
1578: notifyListeners(new TokenizerPropertyEvent(
1579: TokenizerPropertyEvent.PROPERTY_MODIFIED,
1580: new TokenizerProperty(type,
1581: new String[] { newValue }),
1582: new TokenizerProperty(type,
1583: new String[] { oldValue })));
1584: }
1585: } else if (oldValue != null && oldValue.length() > 0) {
1586: notifyListeners(new TokenizerPropertyEvent(
1587: TokenizerPropertyEvent.PROPERTY_REMOVED,
1588: new TokenizerProperty(type,
1589: new String[] { oldValue })));
1590: }
1591: }
1592:
1593: /**
1594: * Notifying the registered listeners about a change in the properties. Listeners
1595: * are called in the order of their registration (see {@link #addTokenizerPropertyListener}).
1596: *
1597: * @param event the {@link TokenizerPropertyEvent} to communicate to the listeners
1598: */
1599: protected void notifyListeners(TokenizerPropertyEvent event) {
1600: Iterator iter = _listeners.iterator();
1601:
1602: while (iter.hasNext()) {
1603: WeakReference ref = (WeakReference) iter.next();
1604: TokenizerPropertyListener listener = (TokenizerPropertyListener) ref
1605: .get();
1606:
1607: if (listener == null) {
1608: // implicit cleanup of unused listeners
1609: iter.remove();
1610: } else {
1611: // call listener
1612: listener.propertyChanged(event);
1613: }
1614: }
1615: }
1616:
1617: //---------------------------------------------------------------------------
1618: // Members
1619: //
1620:
1621: /**
1622: * overall tokenizer flags.
1623: */
1624: protected int _flags = 0;
1625:
1626: /**
1627: * List of {@link TokenizerPropertyListener} instances.
1628: */
1629: private LinkedList _listeners = new LinkedList();
1630: }
|