001: /*
002: * The contents of this file are subject to the terms
003: * of the Common Development and Distribution License
004: * (the "License"). You may not use this file except
005: * in compliance with the License.
006: *
007: * You can obtain a copy of the license at
008: * https://jwsdp.dev.java.net/CDDLv1.0.html
009: * See the License for the specific language governing
010: * permissions and limitations under the License.
011: *
012: * When distributing Covered Code, include this CDDL
013: * HEADER in each file and include the License file at
014: * https://jwsdp.dev.java.net/CDDLv1.0.html If applicable,
015: * add the following below this CDDL HEADER, with the
016: * fields enclosed by brackets "[]" replaced with your
017: * own identifying information: Portions Copyright [yyyy]
018: * [name of copyright owner]
019: */
020: /*
021: * @(#)HeaderTokenizer.java 1.9 02/03/27
022: */
023:
024: /*
025: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
026: *
027: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
028: *
029: * The contents of this file are subject to the terms of either the GNU
030: * General Public License Version 2 only ("GPL") or the Common Development
031: * and Distribution License("CDDL") (collectively, the "License"). You
032: * may not use this file except in compliance with the License. You can obtain
033: * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
034: * or glassfish/bootstrap/legal/LICENSE.txt. See the License for the specific
035: * language governing permissions and limitations under the License.
036: *
037: * When distributing the software, include this License Header Notice in each
038: * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
039: * Sun designates this particular file as subject to the "Classpath" exception
040: * as provided by Sun in the GPL Version 2 section of the License file that
041: * accompanied this code. If applicable, add the following below the License
042: * Header, with the fields enclosed by brackets [] replaced by your own
043: * identifying information: "Portions Copyrighted [year]
044: * [name of copyright owner]"
045: *
046: * Contributor(s):
047: *
048: * If you wish your version of this file to be governed by only the CDDL or
049: * only the GPL Version 2, indicate your decision by adding "[Contributor]
050: * elects to include this software in this distribution under the [CDDL or GPL
051: * Version 2] license." If you don't indicate a single choice of license, a
052: * recipient has the option to distribute your version of this file under
053: * either the CDDL, the GPL Version 2 or to extend the choice of license to
054: * its licensees as provided above. However, if you add GPL Version 2 code
055: * and therefore, elected the GPL Version 2 license, then the option applies
056: * only if the new code is made subject to such option by the copyright
057: * holder.
058: */
059:
060: package com.sun.xml.ws.encoding;
061:
062: import javax.xml.ws.WebServiceException;
063:
064: /**
065: * This class tokenizes RFC822 and MIME headers into the basic
066: * symbols specified by RFC822 and MIME. <p>
067: *
068: * This class handles folded headers (ie headers with embedded
069: * CRLF SPACE sequences). The folds are removed in the returned
070: * tokens.
071: *
072: * @version 1.9, 02/03/27
073: * @author John Mani
074: */
075:
076: class HeaderTokenizer {
077:
078: /**
079: * The Token class represents tokens returned by the
080: * HeaderTokenizer.
081: */
082: static class Token {
083:
084: private int type;
085: private String value;
086:
087: /**
088: * Token type indicating an ATOM.
089: */
090: public static final int ATOM = -1;
091:
092: /**
093: * Token type indicating a quoted string. The value
094: * field contains the string without the quotes.
095: */
096: public static final int QUOTEDSTRING = -2;
097:
098: /**
099: * Token type indicating a comment. The value field
100: * contains the comment string without the comment
101: * start and end symbols.
102: */
103: public static final int COMMENT = -3;
104:
105: /**
106: * Token type indicating end of input.
107: */
108: public static final int EOF = -4;
109:
110: /**
111: * Constructor.
112: * @param type Token type
113: * @param value Token value
114: */
115: public Token(int type, String value) {
116: this .type = type;
117: this .value = value;
118: }
119:
120: /**
121: * Return the type of the token. If the token represents a
122: * delimiter or a control character, the type is that character
123: * itself, converted to an integer. Otherwise, it's value is
124: * one of the following:
125: * <ul>
126: * <li><code>ATOM</code> A sequence of ASCII characters
127: * delimited by either SPACE, CTL, "(", <"> or the
128: * specified SPECIALS
129: * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
130: * within quotes
131: * <li><code>COMMENT</code> A sequence of ASCII characters
132: * within "(" and ")".
133: * <li><code>EOF</code> End of header
134: * </ul>
135: */
136: public int getType() {
137: return type;
138: }
139:
140: /**
141: * Returns the value of the token just read. When the current
142: * token is a quoted string, this field contains the body of the
143: * string, without the quotes. When the current token is a comment,
144: * this field contains the body of the comment.
145: *
146: * @return token value
147: */
148: public String getValue() {
149: return value;
150: }
151: }
152:
153: private String string; // the string to be tokenized
154: private boolean skipComments; // should comments be skipped ?
155: private String delimiters; // delimiter string
156: private int currentPos; // current parse position
157: private int maxPos; // string length
158: private int nextPos; // track start of next Token for next()
159: private int peekPos; // track start of next Token for peek()
160:
161: /**
162: * RFC822 specials
163: */
164: private final static String RFC822 = "()<>@,;:\\\"\t .[]";
165:
166: /**
167: * MIME specials
168: */
169: final static String MIME = "()<>@,;:\\\"\t []/?=";
170:
171: // The EOF Token
172: private final static Token EOFToken = new Token(Token.EOF, null);
173:
174: /**
175: * Constructor that takes a rfc822 style header.
176: *
177: * @param header The rfc822 header to be tokenized
178: * @param delimiters Set of delimiter characters
179: * to be used to delimit ATOMS. These
180: * are usually <code>RFC822</code> or
181: * <code>MIME</code>
182: * @param skipComments If true, comments are skipped and
183: * not returned as tokens
184: */
185: HeaderTokenizer(String header, String delimiters,
186: boolean skipComments) {
187: string = (header == null) ? "" : header; // paranoia ?!
188: this .skipComments = skipComments;
189: this .delimiters = delimiters;
190: currentPos = nextPos = peekPos = 0;
191: maxPos = string.length();
192: }
193:
194: /**
195: * Constructor. Comments are ignored and not returned as tokens
196: *
197: * @param header The header that is tokenized
198: * @param delimiters The delimiters to be used
199: */
200: HeaderTokenizer(String header, String delimiters) {
201: this (header, delimiters, true);
202: }
203:
204: /**
205: * Constructor. The RFC822 defined delimiters - RFC822 - are
206: * used to delimit ATOMS. Also comments are skipped and not
207: * returned as tokens
208: */
209: HeaderTokenizer(String header) {
210: this (header, RFC822);
211: }
212:
213: /**
214: * Parses the next token from this String. <p>
215: *
216: * Clients sit in a loop calling next() to parse successive
217: * tokens until an EOF Token is returned.
218: *
219: * @return the next Token
220: * @exception WebServiceException if the parse fails
221: */
222: Token next() throws WebServiceException {
223: Token tk;
224:
225: currentPos = nextPos; // setup currentPos
226: tk = getNext();
227: nextPos = peekPos = currentPos; // update currentPos and peekPos
228: return tk;
229: }
230:
231: /**
232: * Peek at the next token, without actually removing the token
233: * from the parse stream. Invoking this method multiple times
234: * will return successive tokens, until <code>next()</code> is
235: * called. <p>
236: *
237: * @return the next Token
238: * @exception WebServiceException if the parse fails
239: */
240: Token peek() throws WebServiceException {
241: Token tk;
242:
243: currentPos = peekPos; // setup currentPos
244: tk = getNext();
245: peekPos = currentPos; // update peekPos
246: return tk;
247: }
248:
249: /**
250: * Return the rest of the Header.
251: *
252: * @return String rest of header. null is returned if we are
253: * already at end of header
254: */
255: String getRemainder() {
256: return string.substring(nextPos);
257: }
258:
259: /*
260: * Return the next token starting from 'currentPos'. After the
261: * parse, 'currentPos' is updated to point to the start of the
262: * next token.
263: */
264: private Token getNext() throws WebServiceException {
265: // If we're already at end of string, return EOF
266: if (currentPos >= maxPos)
267: return EOFToken;
268:
269: // Skip white-space, position currentPos beyond the space
270: if (skipWhiteSpace() == Token.EOF)
271: return EOFToken;
272:
273: char c;
274: int start;
275: boolean filter = false;
276:
277: c = string.charAt(currentPos);
278:
279: // Check or Skip comments and position currentPos
280: // beyond the comment
281: while (c == '(') {
282: // Parsing comment ..
283: int nesting;
284: for (start = ++currentPos, nesting = 1; nesting > 0
285: && currentPos < maxPos; currentPos++) {
286: c = string.charAt(currentPos);
287: if (c == '\\') { // Escape sequence
288: currentPos++; // skip the escaped character
289: filter = true;
290: } else if (c == '\r')
291: filter = true;
292: else if (c == '(')
293: nesting++;
294: else if (c == ')')
295: nesting--;
296: }
297: if (nesting != 0)
298: throw new WebServiceException("Unbalanced comments");
299:
300: if (!skipComments) {
301: // Return the comment, if we are asked to.
302: // Note that the comment start & end markers are ignored.
303: String s;
304: if (filter) // need to go thru the token again.
305: s = filterToken(string, start, currentPos - 1);
306: else
307: s = string.substring(start, currentPos - 1);
308:
309: return new Token(Token.COMMENT, s);
310: }
311:
312: // Skip any whitespace after the comment.
313: if (skipWhiteSpace() == Token.EOF)
314: return EOFToken;
315: c = string.charAt(currentPos);
316: }
317:
318: // Check for quoted-string and position currentPos
319: // beyond the terminating quote
320: if (c == '"') {
321: for (start = ++currentPos; currentPos < maxPos; currentPos++) {
322: c = string.charAt(currentPos);
323: if (c == '\\') { // Escape sequence
324: currentPos++;
325: filter = true;
326: } else if (c == '\r')
327: filter = true;
328: else if (c == '"') {
329: currentPos++;
330: String s;
331:
332: if (filter)
333: s = filterToken(string, start, currentPos - 1);
334: else
335: s = string.substring(start, currentPos - 1);
336:
337: return new Token(Token.QUOTEDSTRING, s);
338: }
339: }
340: throw new WebServiceException("Unbalanced quoted string");
341: }
342:
343: // Check for SPECIAL or CTL
344: if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
345: currentPos++; // re-position currentPos
346: char ch[] = new char[1];
347: ch[0] = c;
348: return new Token((int) c, new String(ch));
349: }
350:
351: // Check for ATOM
352: for (start = currentPos; currentPos < maxPos; currentPos++) {
353: c = string.charAt(currentPos);
354: // ATOM is delimited by either SPACE, CTL, "(", <">
355: // or the specified SPECIALS
356: if (c < 040 || c >= 0177 || c == '(' || c == ' '
357: || c == '"' || delimiters.indexOf(c) >= 0)
358: break;
359: }
360: return new Token(Token.ATOM, string
361: .substring(start, currentPos));
362: }
363:
364: // Skip SPACE, HT, CR and NL
365: private int skipWhiteSpace() {
366: char c;
367: for (; currentPos < maxPos; currentPos++)
368: if (((c = string.charAt(currentPos)) != ' ') && (c != '\t')
369: && (c != '\r') && (c != '\n'))
370: return currentPos;
371: return Token.EOF;
372: }
373:
374: /* Process escape sequences and embedded LWSPs from a comment or
375: * quoted string.
376: */
377: private static String filterToken(String s, int start, int end) {
378: StringBuffer sb = new StringBuffer();
379: char c;
380: boolean gotEscape = false;
381: boolean gotCR = false;
382:
383: for (int i = start; i < end; i++) {
384: c = s.charAt(i);
385: if (c == '\n' && gotCR) {
386: // This LF is part of an unescaped
387: // CRLF sequence (i.e, LWSP). Skip it.
388: gotCR = false;
389: continue;
390: }
391:
392: gotCR = false;
393: if (!gotEscape) {
394: // Previous character was NOT '\'
395: if (c == '\\') // skip this character
396: gotEscape = true;
397: else if (c == '\r') // skip this character
398: gotCR = true;
399: else
400: // append this character
401: sb.append(c);
402: } else {
403: // Previous character was '\'. So no need to
404: // bother with any special processing, just
405: // append this character
406: sb.append(c);
407: gotEscape = false;
408: }
409: }
410: return sb.toString();
411: }
412: }
|