001: /*
002: * TokenizerProperty.java: Various characteristics of Tokenizer.
003: *
004: * Copyright (C) 2002 Heiko Blau
005: *
006: * This file belongs to the JTopas Library.
007: * JTopas is free software; you can redistribute it and/or modify it
008: * under the terms of the GNU Lesser General Public License as published by the
009: * Free Software Foundation; either version 2.1 of the License, or (at your
010: * option) any later version.
011: *
012: * This software is distributed in the hope that it will be useful, but WITHOUT
013: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
014: * FITNESS FOR A PARTICULAR PURPOSE.
015: * See the GNU Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public License along
018: * with JTopas. If not, write to the
019: *
020: * Free Software Foundation, Inc.
021: * 59 Temple Place, Suite 330,
022: * Boston, MA 02111-1307
023: * USA
024: *
025: * or check the Internet: http://www.fsf.org
026: *
027: * Contact:
028: * email: heiko@susebox.de
029: */
030:
031: package de.susebox.jtopas;
032:
033: //-----------------------------------------------------------------------------
034: // Imports
035: //
036:
037: //-----------------------------------------------------------------------------
038: // Class TokenizerProperty
039: //
040:
041: /**<p>
042: * This class is mainly used by {@link TokenizerProperties} implementations to
043: * return {@link java.util.Iterator} objects of their various properties (keywords,
044: * special sequences etc.). Moreover, it can be used whereever information about
045: * a lexical element description is needed, for instance when firing a
046: * {@link TokenizerPropertyEvent}.
047: *</p><p>
048: * This class replaces the older {@link de.susebox.java.util.TokenizerProperty}
049: * class which is deprecated.
050: *</p>
051: *
052: * @see TokenizerProperties
053: * @author Heiko Blau
054: */
055: public class TokenizerProperty {
056:
057: //---------------------------------------------------------------------------
058: // special property types
059: //
060:
061: /**
062: * The property describes a parse flag mask. This type is nessecary for the
063: * "parse flag changed" event fired by {@link TokenizerProperties}
064: * implementations.
065: */
066: public static final byte PARSE_FLAG_MASK = 127;
067:
068: //---------------------------------------------------------------------------
069: // getter- and setter methods
070: //
071:
072: /**
073: * Setting the type of the <code>TokenizerProperty</code>. Usually, one of the
074: * constants defined in {@link Token} is passed to this method, for instance
075: * {@link Token#NORMAL}, {@link Token#KEYWORD} or {@link Token#WHITESPACE}.
076: *<br>
077: * However, implementations and subclasses of the interfaces and classes in the
078: * package {@link de.susebox.jtopas} may define, set and return their own type
079: * constants.
080: *
081: * @param type type of the tokenizer property (one of the {@link Token} constants)
082: * @see Token
083: * @see #getType
084: */
085: public void setType(int type) {
086: _type = type;
087: }
088:
089: /**
090: * Retrieving the type of this property. Usually, one of the constants defined
091: * in {@link Token} is returned, for instance {@link Token#NORMAL}, {@link Token#KEYWORD}
092: * or {@link Token#WHITESPACE}. However, implementations and subclasses of the
093: * interfaces and classes in the package {@link de.susebox.jtopas} may define,
094: * set and return their own type constants.
095: *
096: * @return type of the property
097: * @see #setType
098: */
099: public int getType() {
100: return _type;
101: }
102:
103: /**
104: * Setting flags. These flags are not specified here. Usually, a combination
105: * of {@link TokenizerProperties} <code>F_...</code> constants is used here.
106: *<br>
107: * The method is equivalent to <code>{@link #setFlags(int, int)}(flags, flags)</code>.
108: *
109: * @param flags a bitmask
110: * @see #getFlags
111: */
112: public void setFlags(int flags) {
113: setFlags(flags, flags);
114: }
115:
116: /**
117: * Setting the values for the flags contained in the flag mask. The flags
118: * are not specified here. Usually, a combination of {@link TokenizerProperties}
119: * <code>F_...</code> constants is used here.
120: *<br>
121: * The flag mask contains the set flags that are valid for this
122: * <code>TokenizerProperty</code>. Flags that are not contained in the flag mask,
123: * have an "unknown" value in this instance.
124: *<br>
125: * A flag mask value of -1 means that all flags should are valid. <code>flag = 0</code>
126: * and <code>flagMask = TokenizerProperties.F_CASE</code> means that the property
127: * is case-insensitive, but there are no other characteristics explicitely set
128: * for this property.
129: *<br>
130: *
131: * @param flags a bitmask of flags to be set or clear
132: * @param flagMask a bitmask containing the flags that are valid in <code>flags</code>
133: * @see #getFlags
134: */
135: public void setFlags(int flags, int flagMask) {
136: _flags = (flags & flagMask);
137: _flagMask = flagMask;
138: }
139:
140: /**
141: * Retrieving the flags of this property.
142: *
143: * @return flags of the property
144: * @see #setFlags
145: * @see #getFlags(int)
146: */
147: public int getFlags() {
148: return _flags;
149: }
150:
151: /**
152: * Retrieving the current flag mask containing the bits that are valid in the
153: * return value of {@link #getFlags}. The return value -1 means that all bits
154: * in <code>getFlags</code> are explicitely set for this property.
155: *
156: * @return a bitmask containing the flags that are explicitely set for this property
157: * @see #getFlags()
158: * @see #setFlags(int, int)
159: */
160: public int getFlagMask() {
161: return _flagMask;
162: }
163:
164: /**
165: * Checks wether a given flag is set or cleared for this property. The default
166: * value is returned if the current flag mask (see {@link #getFlagMask}) does
167: * not contain the given flag. This is the case if {@link #containsFlag} would
168: * return <code>false</code> for the given flag.
169: *<br>
170: * If <code>containsFlag</code> returns <code>true</code> for the given flag,
171: * <code>isFlagSet</code> returns <code>true</code> only if the all bits in the
172: * given flag are in the current flags.
173: *
174: * @param flag the flag to check
175: * @param defVal the default value if the flag is not contained in the
176: * current flag mask
177: * @return <code>true</code> if the given flag is explicitely set or the default
178: * value is <code>true</code>, <code>false</code> if the given flag is
179: * explicitely cleared or the default value is <code>false</code>.
180: */
181: public boolean isFlagSet(int flag, boolean defVal) {
182: if (containsFlag(flag)) {
183: return (getFlags() & flag) == flag;
184: } else {
185: return defVal;
186: }
187: }
188:
189: /**
190: * Checks wether a given flag is in the current flag mask. That means that the
191: * property contains its own explicit value for the flag.
192: *<br>
193: * The method can be used in combination with {@link #isFlagSet} to determine
194: * wether the return value of <code>isFlagSet</code> is actually the default
195: * value given to that method, or the flag value of the property itself.
196: *<br>
197: * The method returns <code>true</code> only if the all bits in the given flag
198: * are in the current flag mask.
199: *
200: * @param flag the flag to check
201: * @param defVal the default value if the flag is not contained in the
202: * current flag mask
203: * @return <code>true</code> if the given flag is explicitely set or the default
204: * value is <code>true</code>, <code>false</code> if the given flag is
205: * explicitely cleared or the default value is <code>false</code>.
206: */
207: public boolean containsFlag(int flag) {
208: return (getFlagMask() & flag) == flag;
209: }
210:
211: /**
212: * Images of lexical elements are quite different. Starting sequences of line
213: * comments, keywords and special sequences are strings representing only
214: * themselfes. Whitespaces and separators are represented as string consisting
215: * of the single whitespace and separator characters and / or character ranges.
216: *<br>
217: * A block comment is represented an array of two strings. The first is the
218: * starting sequence, the second the finishing sequence. The same is true for
219: * string elements. However, string elements usually have an escape sequence.
220: *
221: * @param images the characterising images of a lexical element
222: */
223: public void setImages(String[] images)
224: throws IllegalArgumentException {
225: _images = images;
226: }
227:
228: /**
229: * Retrieving the one or more images a lexical element description has.
230: *
231: * @return the array with images like string start and end sequences etc.
232: * @see #setImages
233: */
234: public String[] getImages() {
235: return _images;
236: }
237:
238: /**
239: * Some token may have associated informations for the user of the <code>Token</code>.
240: * A popular thing would be the association of an integer constant to a special
241: * sequence or keyword to be used in fast <code>switch</code> statetents.
242: *
243: * @param companion the associated information for the lexical element
244: */
245: public void setCompanion(Object companion) {
246: _companion = companion;
247: }
248:
249: /**
250: * Obtaining the associated information of the token. Can be <code>null</code>. See
251: * {@link #setCompanion} for details.
252: *
253: * @return the associated information of this token
254: */
255: public Object getCompanion() {
256: return _companion;
257: }
258:
259: //---------------------------------------------------------------------------
260: // construction
261: //
262:
263: /**
264: * Default constructor. Initializes a property with type {@link Token#UNKNOWN}
265: * and no images or flags.
266: */
267: public TokenizerProperty() {
268: this (Token.UNKNOWN);
269: }
270:
271: /**
272: * Constructs a <code>TokenizerProperty</code> where only the type is known so
273: * far. For the type, one of the constants defined in {@link Token} must be
274: * used.
275: *
276: * @param type the property type
277: */
278: public TokenizerProperty(int type) {
279: this (type, null);
280: }
281:
282: /**
283: * Constructs a <code>TokenizerProperty</code> with type and image(s). For the
284: * type, one of the constants defined in {@link Token} must be used.
285: *
286: * @param type the property type
287: * @param images the characterising images of a lexical element
288: */
289: public TokenizerProperty(int type, String[] images) {
290: this (type, images, null);
291: }
292:
293: /**
294: * Constructs a <code>TokenProperty</code> object with a set of type, image(s)
295: * and companion.
296: *
297: * @param type the property type
298: * @param images the characterising images of a lexical element
299: * @param companion the associated information for the lexical element
300: */
301: public TokenizerProperty(int type, String[] images, Object companion) {
302: this (type, images, companion, 0);
303: }
304:
305: /**
306: * Constructs a <code>TokenProperty</code> object with a set of type, image(s),
307: * companion object and flags.
308: *
309: * @param type the property type
310: * @param images the characterising images of a lexical element
311: * @param companion the associated information for the lexical element
312: * @param flags the specific parse flags for this lexical element
313: */
314: public TokenizerProperty(int type, String[] images,
315: Object companion, int flags) {
316: this (type, images, companion, flags, flags);
317: }
318:
319: /**
320: * Constructs a <code>TokenProperty</code> object with a set of type, image(s),
321: * companion object and flags with an associated flag mask ({@see #setFlags(int, int)}
322: * for details).
323: *
324: * @param type the property type
325: * @param images the characterising images of a lexical element
326: * @param companion the associated information for the lexical element
327: * @param flags the specific parse flags for this lexical element
328: * @param flagMask which bits of the parse flags are actually valid
329: */
330: public TokenizerProperty(int type, String[] images,
331: Object companion, int flags, int flagMask) {
332: setType(type);
333: setImages(images);
334: setCompanion(companion);
335: setFlags(flags, flagMask);
336: }
337:
338: //---------------------------------------------------------------------------
339: // overloaded methods
340: //
341:
342: /**
343: * Redefinition of the well-known {@link java.lang.Object#equals} method.
344: *
345: * @param that compare this instance with that object
346: * @return <code>true</code> if the two object describe the same property,
347: * <code>false</code> otherwise
348: */
349: public boolean equals(Object that) {
350: // primitive tests
351: if (that == null) {
352: return false;
353: } else if (that == this ) {
354: return true;
355: } else if (!(that.getClass() == getClass())) {
356: return false;
357: }
358:
359: // compare contents
360: TokenizerProperty thatProp = (TokenizerProperty) that;
361:
362: if (getType() == thatProp.getType()
363: && getCompanion() == thatProp.getCompanion()
364: && getFlags() == thatProp.getFlags()) {
365: // compare images
366: String[] this Img = getImages();
367: String[] thatImg = thatProp.getImages();
368:
369: if (this Img != thatImg) {
370: if (this Img == null || thatImg == null
371: || this Img.length != thatImg.length) {
372: return false;
373: }
374: for (int index = 0; index < this Img.length; ++index) {
375: if (!this Img[index].equals(thatImg[index])) {
376: return false;
377: }
378: }
379: }
380: return true;
381: } else {
382: return false;
383: }
384: }
385:
386: /**
387: * Redefinition of the well-known {@link java.lang.Object#toString} method.
388: *
389: * @return a string representation of this <code>TokenizerProperty</code>
390: */
391: public String toString() {
392: StringBuffer buffer = new StringBuffer();
393:
394: buffer.append(getClass().getName());
395: buffer.append(':');
396:
397: switch (getType()) {
398: case Token.NORMAL:
399: buffer.append(" NORMAL, ");
400: break;
401: case Token.BLOCK_COMMENT:
402: buffer.append(" BLOCK_COMMENT, ");
403: break;
404: case Token.LINE_COMMENT:
405: buffer.append(" LINE_COMMENT, ");
406: break;
407: case Token.STRING:
408: buffer.append(" STRING, ");
409: break;
410: case Token.PATTERN:
411: buffer.append(" PATTERN, ");
412: break;
413: case Token.KEYWORD:
414: buffer.append(" KEYWORD, ");
415: break;
416: case Token.WHITESPACE:
417: buffer.append(" WHITESPACE, ");
418: break;
419: case Token.SEPARATOR:
420: buffer.append(" SEPARATOR, ");
421: break;
422: case Token.SPECIAL_SEQUENCE:
423: buffer.append(" SPECIAL_SEQUENCE, ");
424: break;
425: case Token.EOF:
426: buffer.append(" EOF, ");
427: break;
428: case TokenizerProperty.PARSE_FLAG_MASK:
429: buffer.append(" PARSE FLAG MASK, ");
430: break;
431: default:
432: buffer.append(" UNKNOWN, ");
433: }
434:
435: // add the flags
436: buffer.append("flags/mask 0x");
437: buffer.append(Integer.toHexString(_flags));
438: buffer.append("/0x");
439: buffer.append(Integer.toHexString(_flagMask));
440:
441: // add images
442: if (_images != null) {
443: buffer.append(':');
444: for (int index = 0; index < _images.length; ++index) {
445: if (_images[index] != null) {
446: buffer.append(' ');
447: buffer.append(_images[index]);
448: } else {
449: break;
450: }
451: }
452: }
453:
454: // ready
455: return buffer.toString();
456: }
457:
458: //---------------------------------------------------------------------------
459: // implementation
460: //
461:
462: //---------------------------------------------------------------------------
463: // members
464: //
465: protected int _type;
466: protected int _flags;
467: protected int _flagMask;
468: protected String[] _images;
469: protected Object _companion;
470: }
|