001: /*
002: * SequenceHandler.java: string, comment and special sequence handling in tokenizers
003: *
004: * Copyright (C) 2002 Heiko Blau
005: *
006: * This file belongs to the JTopas Library.
007: * JTopas is free software; you can redistribute it and/or modify it
008: * under the terms of the GNU Lesser General Public License as published by the
009: * Free Software Foundation; either version 2.1 of the License, or (at your
010: * option) any later version.
011: *
012: * This software is distributed in the hope that it will be useful, but WITHOUT
013: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
014: * FITNESS FOR A PARTICULAR PURPOSE.
015: * See the GNU Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public License along
018: * with JTopas. If not, write to the
019: *
020: * Free Software Foundation, Inc.
021: * 59 Temple Place, Suite 330,
022: * Boston, MA 02111-1307
023: * USA
024: *
025: * or check the Internet: http://www.fsf.org
026: *
027: * Contact:
028: * email: heiko@susebox.de
029: */
030:
031: package de.susebox.jtopas.spi;
032:
033: //-----------------------------------------------------------------------------
034: // Imports
035: //
036: import de.susebox.jtopas.TokenizerProperty;
037: import de.susebox.jtopas.TokenizerException;
038:
039: //-----------------------------------------------------------------------------
040: // Interface SequenceHandler
041: //
042:
043: /**<p>
044: * This interface must be implemented by classes that should be used as a
045: * special sequence, string and comment start sequence checker for
046: * {@link de.susebox.jtopas.Tokenizer} implementations.
047: *</p>
048: *
049: * @see de.susebox.jtopas.Tokenizer
050: * @see de.susebox.jtopas.TokenizerProperties
051: * @see de.susebox.jtopas.spi.DataMapper
052: * @author Heiko Blau
053: */
054: public interface SequenceHandler {
055:
056: /**
057: * This method can be used by a {@link de.susebox.jtopas.Tokenizer} implementation
058: * for a fast detection if special sequence checking must be performed at all.
059: * If the method returns <code>false</code> time-consuming preparations can be
060: * skipped.
061: *
062: * @return <code>true</code> if there actually are pattern that can be tested
063: * for a match, <code>false</code> otherwise.
064: */
065: public boolean hasSequenceCommentOrString();
066:
067: /**
068: * This method checks if a given range of data starts with a special sequence,
069: * a comment or a string. These three types of token are tested together since
070: * both comment and string prefixes are ordinary special sequences. Only the
071: * actions preformed <strong>after</strong> a string or comment has been detected,
072: * are different.
073: *<br>
074: * The method returns <code>null</code> if no special sequence, comment or string
075: * could matches the the leading part of the data range given through the
076: * {@link DataProvider}.
077: *<br>
078: * In cases of strings or comments, the return value contains the description
079: * for the introducing character sequence, <strong>NOT</strong> the whole
080: * string or comment. The reading of the rest of the string or comment is done
081: * by the calling {@link de.susebox.jtopas.Tokenizer}.
082: *
083: * @param dataProvider the source to get the data range from
084: * @return a {@link de.susebox.jtopas.TokenizerProperty} if a special sequence,
085: * comment or string could be detected, <code>null</code> otherwise
086: * @throws TokenizerException generic exception
087: * @throws NullPointerException if no {@link DataProvider} is given
088: */
089: public TokenizerProperty startsWithSequenceCommentOrString(
090: DataProvider dataProvider) throws TokenizerException,
091: NullPointerException;
092:
093: /**
094: * This method returns the length of the longest special sequence, comment or
095: * string prefix that is known to this <code>SequenceHandler</code>. When
096: * calling {@link #startsWithSequenceCommentOrString}, the passed {@link DataProvider}
097: * parameter will supply at least this number of characters (see {@link DataProvider#getLength}).
098: * If less characters are provided, EOF is reached.
099: *<br>
100: * The method is an easy approach to the problem of how to provide more data
101: * in case a test runs out of characters. The invoking {@link de.susebox.jtopas.Tokenizer}
102: * (represented by the given {@link DataProvider}) can supply enough data for
103: * the {@link #startsWithSequenceCommentOrString} method.
104: *
105: * @return the number of characters needed in the worst case to identify a
106: * special sequence
107: */
108: public int getSequenceMaxLength();
109: }
|