001: /*
002: * Portions Copyright 2000-2007 Sun Microsystems, Inc. All Rights
003: * Reserved. Use is subject to license terms.
004: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
005: *
006: * This program is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU General Public License version
008: * 2 only, as published by the Free Software Foundation.
009: *
010: * This program is distributed in the hope that it will be useful, but
011: * WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013: * General Public License version 2 for more details (a copy is
014: * included at /legal/license.txt).
015: *
016: * You should have received a copy of the GNU General Public License
017: * version 2 along with this work; if not, write to the Free Software
018: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
019: * 02110-1301 USA
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
022: * Clara, CA 95054 or visit www.sun.com if you need additional
023: * information or have any questions.
024: */
025: package gov.nist.core;
026:
027: import java.util.*;
028:
029: /**
030: * Base string token splitter.
031: *
032: * @version JAIN-SIP-1.1
033: *
034: *
035: * <a href="{@docRoot}/uncopyright.html">This code is in the public domain.</a>
036: *
037: */
038: public class StringTokenizer {
039: /** Current buffer to be parsed. */
040: protected String buffer;
041: /** Current offset int input buffer. */
042: protected int ptr;
043: /** Saved pointer for peek operations. */
044: protected int savedPtr;
045: /** Current token delimiter. */
046: char delimiter;
047:
048: /**
049: * Default constructor.
050: * Resets the buffer offset to 0 and the default
051: * newline delimiter.
052: */
053: public StringTokenizer() {
054: this .delimiter = '\n';
055: this .ptr = 0;
056: }
057:
058: /**
059: * Constructs a string tokenizer for input buffer.
060: * @param buffer the text to be parsed
061: */
062: public StringTokenizer(String buffer) {
063: this .buffer = buffer;
064: this .ptr = 0;
065: this .delimiter = '\n';
066:
067: }
068:
069: /**
070: * Constructs a string tokenizer for input buffer
071: * and specified field separator.
072: * @param buffer the text to be parsed
073: * @param delimiter the field separator character
074: */
075: public StringTokenizer(String buffer, char delimiter) {
076: this .buffer = buffer;
077: this .delimiter = delimiter;
078: this .ptr = 0;
079: }
080:
081: /**
082: * Gets the next token.
083: * @return the next token, not including the field separator
084: */
085: public String nextToken() {
086: StringBuffer retval = new StringBuffer();
087:
088: while (ptr < buffer.length()) {
089: if (buffer.charAt(ptr) == delimiter) {
090: retval.append(buffer.charAt(ptr));
091: ptr++;
092: break;
093: } else {
094: retval.append(buffer.charAt(ptr));
095: ptr++;
096: }
097: }
098:
099: return retval.toString();
100: }
101:
102: /**
103: * Checks if more characters are available.
104: * @return true if more characters can be processed
105: */
106: public boolean hasMoreChars() {
107: return ptr < buffer.length();
108: }
109:
110: /**
111: * Checks if character is part of a hexadecimal number.
112: * @param ch character to be checked
113: * @return true if the character is a hex digit
114: */
115: public static boolean isHexDigit(char ch) {
116: if (isDigit(ch))
117: return true;
118: else {
119: char ch1 = Character.toUpperCase(ch);
120: return ch1 == 'A' || ch1 == 'B' || ch1 == 'C' || ch1 == 'D'
121: || ch1 == 'E' || ch1 == 'F';
122: }
123: }
124:
125: /**
126: * Checks if the character is an alphabetic character.
127: * @param ch the character to be checked.
128: * @return true if the character is alphabetic
129: */
130: public static boolean isAlpha(char ch) {
131: boolean retval = Character.isUpperCase(ch)
132: || Character.isLowerCase(ch);
133: // Debug.println("isAlpha is returning " + retval + " for " + ch);
134: return retval;
135: }
136:
137: /**
138: * Checks if the character is a numeric character.
139: * @param ch the character to be checked.
140: * @return true if the character is a deciomal digit
141: */
142: public static boolean isDigit(char ch) {
143: boolean retval = Character.isDigit(ch);
144: // Debug.println("isDigit is returning " + retval + " for " + ch);
145: return retval;
146: }
147:
148: /**
149: * Checks if the string contains numeric characters only.
150: * @param str the string to be checked.
151: * @return true if the string contains numeric characters only
152: */
153: public static boolean isDigitString(String str) {
154: int len = str.length();
155: if (len == 0) { // empty string - return false
156: return false;
157: } else {
158: boolean retval = true;
159: for (int i = 0; i < str.length(); i++) {
160: if (!Character.isDigit(str.charAt(i))) {
161: retval = false;
162: break;
163: }
164: }
165: return retval;
166: }
167: }
168:
169: /**
170: * Checks if the given character is allowed in method/header/parameter name.
171: * The character is valid if it is: (1) a digit or (2) a letter, or
172: * (3) is one of the characters on the next list: -.!%*_+`'~
173: * @param ch the character to check
174: * @return true if the character is valid, false otherwise
175: */
176: public static boolean isValidChar(char ch) {
177: String validChars = "-.!%*_+`'~";
178:
179: if (!((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))
180: && (validChars.indexOf(ch) == -1)) {
181: // ("Invalid character '" + ch + "' in the name.");
182: return false;
183: }
184:
185: return true;
186: }
187:
188: /**
189: * Checks if the given symbol belongs to the escaped group.
190: * The character is escaped if it is satisfies the next ABNF
191: * (see RFC3261 p.220): <br>
192: * escaped = "%" HEXDIG HEXDIG
193: * <br>
194: * @param name the string to be parsed for escaped value
195: * @param index shift inside parsed string
196: * @return true if string contains escaped value, false otherwise
197: */
198: public static boolean isEscaped(String name, int index) {
199: // RFC3261 p.220
200: // escaped = "%" HEXDIG HEXDIG
201: //
202: if (name.charAt(index) != '%'
203: || (name.length() - index - 2) < 0
204: || !isHexDigit(name.charAt(index + 1))
205: || !isHexDigit(name.charAt(index + 2))) {
206: return false;
207: }
208:
209: return true;
210: }
211:
212: /**
213: * Checks if the given sequence is quoted pair.
214: *
215: * @param name the string to be parsed for quoted pair
216: * @param offset inside parsed string
217: * @return true if quoted pair is placed at <code>name</code>
218: * [<code>offset</code>], false otherwise
219: */
220: public static boolean isQuotedPair(String name, int offset) {
221: // RFC3261 p.222
222: // quoted-pair = "\" (%x00-09 / %x0B-0C
223: // / %x0E-7F)
224: //
225: if (name.charAt(offset) != '\\'
226: || (name.length() - offset - 1) <= 0) {
227: return false;
228: }
229:
230: char ch = name.charAt(offset + 1);
231: if (ch == 0xA || ch == 0xD || ch > 0x7F) {
232: return false;
233: }
234:
235: return true;
236:
237: }
238:
239: /**
240: * Gets the next line of text.
241: * @return characters up to the next newline
242: */
243: public String getLine() {
244: StringBuffer retval = new StringBuffer();
245: while (ptr < buffer.length() && buffer.charAt(ptr) != '\n') {
246: retval.append(buffer.charAt(ptr));
247: ptr++;
248: }
249: if (ptr < buffer.length() && buffer.charAt(ptr) == '\n') {
250: retval.append('\n');
251: ptr++;
252: }
253: return retval.toString();
254: }
255:
256: /**
257: * Peeks at the next line without consuming the
258: * characters.
259: * @return the next line of text
260: */
261: public String peekLine() {
262: int curPos = ptr;
263: String retval = this .getLine();
264: ptr = curPos;
265: return retval;
266: }
267:
268: /**
269: * Looks ahead one character in the input buffer
270: * without consuming the character.
271: * @return the next character in the input buffer
272: * @exception ParseException if a parsing error occurs
273: */
274: public char lookAhead() throws ParseException {
275: return lookAhead(0);
276: }
277:
278: /**
279: * Looks ahead a specified number of characters in the input buffer
280: * without consuming the character.
281: * @param k the number of characters to advance the
282: * current buffer offset
283: * @return the requested character in the input buffer
284: * @exception ParseException if a parsing error occurs
285: */
286: public char lookAhead(int k) throws ParseException {
287: // Debug.out.println("ptr = " + ptr);
288: if (ptr + k < buffer.length())
289: return buffer.charAt(ptr + k);
290: else
291: return '\0';
292: }
293:
294: /**
295: * Gets one character in the input buffer
296: * and consumes the character.
297: * @return the next character in the input buffer
298: * @exception ParseException if a parsing error occurs
299: */
300: public char getNextChar() throws ParseException {
301: if (ptr >= buffer.length())
302: throw new ParseException(buffer
303: + " getNextChar: End of buffer", ptr);
304: else
305: return buffer.charAt(ptr++);
306: }
307:
308: /**
309: * Advances the current pointer to the saved peek pointer
310: * to consume the characters that were pending parsing
311: * completion.
312: */
313: public void consume() {
314: ptr = savedPtr;
315: }
316:
317: /**
318: * Consume the specified number of characters from the input
319: * buffer.
320: * @param k the number of characters to advance the
321: * current buffer offset
322: */
323: public void consume(int k) {
324: ptr += k;
325: }
326:
327: /**
328: * Gets a Vector of the buffer tokenized by lines.
329: * @return vector of tokens
330: */
331: public Vector getLines() {
332: Vector result = new Vector();
333: while (hasMoreChars()) {
334: String line = getLine();
335: result.addElement(line);
336: }
337: return result;
338: }
339:
340: /**
341: * Gets the next token from the buffer.
342: * @param delim the field separator
343: * @return the next textual token
344: * @exception ParseException if a parsing error occurs
345: */
346: public String getNextToken(char delim) throws ParseException {
347: StringBuffer retval = new StringBuffer();
348: while (true) {
349: char la = lookAhead(0);
350: // System.out.println("la = " + la);
351: if (la == delim)
352: break;
353: else if (la == '\0')
354: throw new ParseException("EOL reached", 0);
355: retval.append(buffer.charAt(ptr));
356: consume(1);
357: }
358: return retval.toString();
359: }
360:
361: /**
362: * Gets the SDP field name of the line.
363: * @param line the input buffer to be parsed
364: * @return the SDP field name
365: */
366: public static String getSDPFieldName(String line) {
367: if (line == null)
368: return null;
369: String fieldName = null;
370: try {
371: int begin = line.indexOf("=");
372: fieldName = line.substring(0, begin);
373: } catch (IndexOutOfBoundsException e) {
374: return null;
375: }
376: return fieldName;
377: }
378:
379: /**
380: * According to the RFC 3261, section 7.3.1:
381: *
382: * Header fields can be extended over multiple lines by preceding each
383: * extra line with at least one SP or horizontal tab (HT). The line
384: * break and the whitespace at the beginning of the next line are
385: * treated as a single SP character.
386: *
387: * This function converts all pairs of newline+space/tab in the
388: * string 's' into signle spaces.
389: *
390: * @param s string to handle.
391: * @return processed string.
392: */
393: public static String convertNewLines(String s) {
394: int i;
395: char chCurr;
396: String result = "";
397: // new Exception("convertNewLines").printStackTrace();
398:
399: if (s.length() == 0) {
400: return result;
401: }
402:
403: // Eat leading spaces and carriage returns (necessary??).
404: i = 0;
405: i = skipWhiteSpace(s, i);
406:
407: while (i < s.length()) {
408: chCurr = s.charAt(i);
409:
410: // Actually, the spec requires "<CRLF> <Space|Tab>" for multiline
411: // header values, but we support also LFCR, LF and CR.
412: if (chCurr == '\n' || chCurr == '\r') {
413: if (i < s.length() - 1
414: && (s.charAt(i + 1) == '\t' || s.charAt(i + 1) == ' ')) {
415: // Check if the last saved symbol is CR or LF.
416: // This will be needed if we decide not to skip CRLF bellow.
417: result += ' ';
418: i++;
419: } else {
420: /*
421: * RFC 3261, p. 221:
422: * A CRLF is allowed in the definition of TEXT-UTF8-TRIM
423: * only as part of a header field continuation. It is
424: * expected that the folding LWS will be replaced with
425: * a single SP before interpretation of the TEXT-UTF8-TRIM
426: * value.
427: *
428: * But it's not clearly defined what to do if CRLF or CR, or
429: * LF without following LWS is occured, so we just skip it.
430: */
431: }
432: } else {
433: result += chCurr;
434: }
435:
436: i++;
437: } // end while()
438:
439: // System.out.println("@@@\nconverted from:\n<<"+s+">> " +
440: // "into:\n<<"+result+">>");
441:
442: return result;
443: }
444:
445: /**
446: * Skip whitespace that starts at offset i in the string s
447: * @param s string containing some text
448: * @param i offset where the whitespace begins
449: * @return offset of the text following the whitespace
450: */
451: private static int skipWhiteSpace(String s, int i) {
452: int len = s.length();
453: if (i >= len) {
454: return i;
455: }
456:
457: char chCurr;
458: chCurr = s.charAt(i);
459:
460: while (chCurr == '\n' || chCurr == '\r' || chCurr == '\t'
461: || chCurr == ' ') {
462: i++;
463: if (i >= len)
464: break;
465: chCurr = s.charAt(i);
466: }
467:
468: return i;
469: }
470:
471: }
|