001: // HttpParser.java
002: // $Id: HttpParser.java,v 1.15 2003/07/01 14:13:43 ylafon Exp $$
003: // (c) COPYRIGHT MIT and INRIA, 1996.
004: // Please first read the full copyright statement in file COPYRIGHT.html
005:
006: package org.w3c.www.http;
007:
008: import java.util.Date;
009:
010: /**
011: * A private class to help with the parsing.
012: * Contains only some static method, helping to parse various byte
013: * buffers into Java object (Yes, I am still and again trying to reduce
014: * memory consumption).
015: * <p>I don't know wether this sucks or not. One hand I am sparing a tremedous
016: * amount of Strings creation, on the other end I am recoding a number of
017: * parsers that are available on String instances.
018: */
019:
020: public class HttpParser {
021: private static final boolean debug = false;
022:
023: /**
024: * Emit an error.
025: * @param mth The method trigerring the error.
026: * @param msg An associated message.
027: * @exception HttpInvalidValueException To indicate the error to caller.
028: */
029:
030: protected static void error(String mth, String msg)
031: throws HttpInvalidValueException {
032: throw new HttpInvalidValueException(mth + ": " + msg);
033: }
034:
035: /**
036: * Compare two byte arrays.
037: * I am not comfident about how the equality of byte arrays is performed
038: * by other means, sorry.
039: * @param b1 The first byte array.
040: * @param o1 The offset of the bytes to compare.
041: * @param l1 The number of bytes to compare.
042: * @param b2 What to compare against.
043: * @param o2 The offset of the bytes to compare.
044: * @param l2 The length of the bytes to compare.
045: * @return An integer, <strong><0</strong> if b1 is less than b2,
046: * <strong>0</strong> if equals, <strong>>0</strong>otherwise.
047: */
048:
049: public static final int compare(byte b1[], int o1, int l1,
050: byte b2[], int o2, int l2) {
051: while ((o1 < l1) && (o2 < l2)) {
052: int cmp = (((int) b1[o1]) & 0xff) - (((int) b2[o2]) & 0xff);
053: if (cmp != 0)
054: return cmp;
055: o1++;
056: o2++;
057: }
058: return ((o1 == l1) && (o2 == l2)) ? 0 : l2 - l1;
059: }
060:
061: /**
062: * Compare two byte arrays.
063: * Short-cut version of the above version.
064: * @param b1 The first byte array.
065: * @param o1 The offset of the bytes to compare.
066: * @param l1 The number of bytes to compare.
067: * @param b2 What to compare against.
068: * @return An integer, <strong><0</strong> if b1 is less than b2,
069: * <strong>0</strong> if equals, <strong>>0</strong>otherwise.
070: */
071:
072: public static final int compare(byte b1[], int o1, int l1,
073: byte b2[]) {
074: return compare(b1, o1, l1, b2, 0, b2.length);
075: }
076:
077: /**
078: * Parse an integer, and return an updated pointer.
079: */
080:
081: public static final int parseInt(byte buf[], int radix,
082: ParseState ps) {
083: // Skip spaces if needed
084: int off = -1;
085: if (ps.isSkipable)
086: ps.start = off = skipSpaces(buf, ps);
087: else
088: ps.start = off = ps.ioff;
089: // Parse the integer from byte[] straight (without creating Strings)
090: int len = (ps.bufend > 0) ? ps.bufend : buf.length;
091: int ret = 0;
092: int oldret = 0;
093: boolean neg = false;
094: if (buf[off] == (byte) '-') {
095: neg = true;
096: off++;
097: }
098: while (off < len) {
099: int digit = ((int) buf[off]) & 0xff;
100: if ((digit >= (byte) '0') && (digit <= (byte) '9')) {
101: ret = ret * radix + (digit - (byte) '0');
102: } else if (radix >= 10) {
103: if ((digit >= 'A') && (digit <= 'Z')) {
104: if ((digit - 'A') + 10 < radix)
105: ret = ret * radix + (digit - 'A' + 10);
106: else
107: break;
108: } else if ((digit >= 'a') && (digit <= 'z')) {
109: if ((digit - 'a') + 10 < radix)
110: ret = ret * radix + digit - 'a' + 10;
111: else
112: break;
113: } else {
114: break;
115: }
116: } else {
117: break;
118: }
119: if (ret < oldret) {
120: error("parseInt", "Integer overflow: "
121: + new String(buf, 0, ps.start, len));
122: } else {
123: oldret = ret;
124: }
125: off++;
126: }
127: if (ret < oldret) {
128: error("parseInt", "Integer overflow: "
129: + new String(buf, 0, ps.start, len));
130: }
131: // Return, after updating the parsing state:
132: ps.ooff = off;
133: ps.end = off;
134: if (ps.ooff == ps.ioff)
135: // We didn't get any number, err
136: error("parseInt", "No number available.");
137: return neg ? -ret : ret;
138: }
139:
140: public static final int parseInt(byte buf[], ParseState ps) {
141: return parseInt(buf, 10, ps);
142: }
143:
144: /**
145: * Parse an integer, and return an updated pointer.
146: */
147:
148: public static final long parseLong(byte buf[], int radix,
149: ParseState ps) {
150: // Skip spaces if needed
151: int off = -1;
152: if (ps.isSkipable)
153: ps.start = off = skipSpaces(buf, ps);
154: else
155: ps.start = off = ps.ioff;
156: // Parse the integer from byte[] straight (without creating Strings)
157: int len = (ps.bufend > 0) ? ps.bufend : buf.length;
158: long ret = 0;
159: long oldret = 0;
160: boolean neg = false;
161: if (buf[off] == (byte) '-') {
162: neg = true;
163: off++;
164: }
165: while (off < len) {
166: int digit = ((int) buf[off]) & 0xff;
167: if ((digit >= (byte) '0') && (digit <= (byte) '9')) {
168: ret = ret * radix + (digit - (byte) '0');
169: } else if (radix >= 10) {
170: if ((digit >= 'A') && (digit <= 'Z')) {
171: if ((digit - 'A') + 10 < radix)
172: ret = ret * radix + (digit - 'A' + 10);
173: else
174: break;
175: } else if ((digit >= 'a') && (digit <= 'z')) {
176: if ((digit - 'a') + 10 < radix)
177: ret = ret * radix + digit - 'a' + 10;
178: else
179: break;
180: } else {
181: break;
182: }
183: } else {
184: break;
185: }
186: if (ret < oldret) {
187: error("parseLong", "Long overflow: "
188: + new String(buf, 0, ps.start, len));
189: } else {
190: oldret = ret;
191: }
192: off++;
193: }
194: if (ret < oldret) {
195: error("parseLong", "Long overflow: "
196: + new String(buf, 0, ps.start, len));
197: }
198: // Return, after updating the parsing state:
199: ps.ooff = off;
200: ps.end = off;
201: if (ps.ooff == ps.ioff)
202: // We didn't get any number, err
203: error("parseLong", "No number available.");
204: return neg ? -ret : ret;
205: }
206:
207: public static final long parseLong(byte buf[], ParseState ps) {
208: return parseLong(buf, 10, ps);
209: }
210:
211: public static boolean unquote(byte buf[], ParseState ps) {
212: int off = -1;
213: int len = -1;
214: if (ps.isSkipable)
215: off = skipSpaces(buf, ps);
216: else
217: off = ps.ioff;
218: len = (ps.bufend > 0) ? ps.bufend : buf.length;
219: if ((off < len) && (buf[off] == (byte) '"')) {
220: ps.start = ps.ioff = ++off;
221: while (off < len) {
222: if (buf[off] == (byte) '"') {
223: ps.end = ps.bufend = off;
224: return true;
225: } else {
226: off++;
227: }
228: }
229: } else {
230: ps.start = off;
231: ps.end = len;
232: }
233: return false;
234: }
235:
236: /**
237: * Skip leading LWS, <em>not</em> including CR LF.
238: * Update the input offset, <em>after</em> any leading space.
239: * @param buf The buffer to be parsed.
240: * @param ptr The buffer pointer to be updated on return.
241: * @return The potentially advanced buffer input offset.
242: */
243:
244: public static final int skipSpaces(byte buf[], ParseState ps) {
245: int len = (ps.bufend > 0) ? ps.bufend : buf.length;
246: int off = ps.ioff;
247: while (off < len) {
248: if ((buf[off] != (byte) ' ') && (buf[off] != (byte) '\t')
249: && (buf[off] != (byte) ps.separator)) {
250: ps.ioff = off;
251: return off;
252: }
253: off++;
254: }
255: return off;
256: }
257:
258: /**
259: * Parse list of items, taking care of quotes and optional LWS.
260: * The output offset points to the <em>next</em> element of the list.
261: * @eturn The starting location (i.e. <code>ps.start</code> value), or
262: * <strong>-1</strong> if no item available (end of list).
263: */
264:
265: public static final int nextItem(byte buf[], ParseState ps) {
266: // Skip leading spaces, if needed:
267: int off = -1;
268: int len = -1;
269: if (ps.isSkipable)
270: ps.start = off = skipSpaces(buf, ps);
271: else
272: ps.start = off = ps.ioff;
273: len = (ps.bufend > 0) ? ps.bufend : buf.length;
274: if (debug)
275: System.out.println("parsing: ["
276: + new String(buf, 0, off, len - off) + "]");
277: // Parse !
278: if (off >= len)
279: return -1;
280: // Setup for parsing, and parse
281: ps.start = off;
282: loop: while (off < len) {
283: if (buf[off] == (byte) '"') {
284: // A quoted item, read as one chunk
285: off++;
286: while (off < len) {
287: if (buf[off] == (byte) '\\') {
288: off += 2;
289: } else if (buf[off] == (byte) '"') {
290: off++;
291: continue loop;
292: } else {
293: off++;
294: }
295: }
296: if (off == len)
297: error("nextItem", "Un-terminated quoted item.");
298: } else if ((buf[off] == ps.separator)
299: || (ps.spaceIsSep && ((buf[off] == ' ') || (buf[off] == '\t')))) {
300: break loop;
301: }
302: off++;
303: }
304: ps.end = off;
305: // Item start is set, we are right at the end of item
306: if (ps.isSkipable) {
307: ps.ioff = off;
308: ps.ooff = skipSpaces(buf, ps);
309: }
310: // Check for either the end of the list, or the separator:
311: if (ps.ooff < ps.bufend) {
312: if (buf[ps.ooff] == (byte) ps.separator)
313: ps.ooff++;
314: }
315: if (debug)
316: System.out.println("nextItem = ["
317: + new String(buf, 0, ps.start, ps.end - ps.start)
318: + "]");
319: return (ps.end > ps.start) ? ps.start : -1;
320: }
321:
322: /**
323: * Parse the name of a month.
324: * Monthes are parsed as their three letters format.
325: * @return An integer between <strong>0</strong> and <strong>11</strong>.
326: */
327:
328: private static byte monthes[][] = {
329: { (byte) 'J', (byte) 'a', (byte) 'n' },
330: { (byte) 'F', (byte) 'e', (byte) 'b' },
331: { (byte) 'M', (byte) 'a', (byte) 'r' },
332: { (byte) 'A', (byte) 'p', (byte) 'r' },
333: { (byte) 'M', (byte) 'a', (byte) 'y' },
334: { (byte) 'J', (byte) 'u', (byte) 'n' },
335: { (byte) 'J', (byte) 'u', (byte) 'l' },
336: { (byte) 'A', (byte) 'u', (byte) 'g' },
337: { (byte) 'S', (byte) 'e', (byte) 'p' },
338: { (byte) 'O', (byte) 'c', (byte) 't' },
339: { (byte) 'N', (byte) 'o', (byte) 'v' },
340: { (byte) 'D', (byte) 'e', (byte) 'c' } };
341:
342: private final static byte lowerCase(int x) {
343: if ((x >= 'A') && (x <= 'Z'))
344: x = (x - 'A' + 'a');
345: return (byte) (x & 0xff);
346: }
347:
348: public static int parseMonth(byte buf[], ParseState ps) {
349: int off = -1;
350: if (ps.isSkipable)
351: off = ps.start = skipSpaces(buf, ps);
352: else
353: off = ps.start = ps.ioff;
354: int len = (ps.bufend > 0) ? ps.bufend : buf.length;
355: if (len < 3) {
356: error("parseMonth", "Invalid month name (too short).");
357: // NOT REACHED
358: return -1;
359: }
360: // Compare to get the month:
361: for (int i = 0; i < monthes.length; i++) {
362: int mo = off;
363: byte m[] = monthes[i];
364: boolean ok = true;
365: month_loop: for (int j = 0; j < m.length; j++, mo++) {
366: if (lowerCase(m[j]) != lowerCase(buf[mo])) {
367: ok = false;
368: break month_loop;
369: }
370: }
371: if (ok) {
372: if (mo - off == m.length) {
373: // Skip remaining chars of month
374: off += 3;
375: while (off < len) {
376: byte l = lowerCase(buf[off++]);
377: if ((l < 'a') || (l > 'z'))
378: break;
379: }
380: ps.ooff = ps.end = off;
381: }
382: return i;
383: }
384: }
385: error("parseMonth", "Invalid month name (unknown).");
386: // NOT REACHED
387: return -1;
388: }
389:
390: /**
391: * Parse a delta-second value.
392: * @return A long giving the date at which to retry as a number of
393: * milliseconds since Java epoch.
394: */
395:
396: public static long parseDeltaSecond(byte buf[], ParseState ps) {
397: return parseInt(buf, ps);
398: }
399:
400: /**
401: * Parse the given byte array as an HTTP compliant date.
402: * @param buf The byte buffer to parse.
403: * @param sp The current parsing state.
404: * @return A long giving the date as a number of milliseconds since epoch.
405: */
406:
407: public static long parseDate(byte buf[], ParseState ps) {
408: int d = -1;
409: int m = -1;
410: int y = -1;
411: int hh = -1;
412: int mm = -1;
413: int ss = -1;
414: // My prefered argument as to why HTTP is broken
415: ParseState it = new ParseState();
416: it.ioff = ps.ioff;
417: it.bufend = ((ps.bufend > -1) ? ps.bufend : buf.length);
418: // Skip the day name:
419: if (nextItem(buf, ps) < 0)
420: error("parseDate", "Invalid date format (no day)");
421: ps.prepare();
422: int off = skipSpaces(buf, ps);
423: // First fork:
424: if ((buf[off] >= (byte) '0') && (buf[off] <= (byte) '9')) {
425: // rfc 1123, or rfc 1036
426: d = parseInt(buf, ps);
427: ps.prepare();
428: if (buf[ps.ioff] == (byte) ' ') {
429: // rfc 1123
430: m = parseMonth(buf, ps);
431: ps.prepare();
432: if ((y = parseInt(buf, ps) - 1900) < 0)
433: y += 1900;
434: ps.prepare();
435: ps.separator = (byte) ':';
436: hh = parseInt(buf, ps);
437: ps.prepare();
438: mm = parseInt(buf, ps);
439: ps.prepare();
440: ss = parseInt(buf, ps);
441: } else {
442: // rfc 1036
443: ps.separator = (byte) '-';
444: m = parseMonth(buf, ps);
445: ps.prepare();
446: y = parseInt(buf, ps);
447: ps.prepare();
448: ps.separator = (byte) ':';
449: hh = parseInt(buf, ps);
450: ps.prepare();
451: mm = parseInt(buf, ps);
452: ps.prepare();
453: ss = parseInt(buf, ps);
454: }
455: } else {
456: m = parseMonth(buf, ps);
457: ps.prepare();
458: d = parseInt(buf, ps);
459: ps.prepare();
460: ps.separator = (byte) ':';
461: hh = parseInt(buf, ps);
462: ps.prepare();
463: mm = parseInt(buf, ps);
464: ps.prepare();
465: ss = parseInt(buf, ps);
466: ps.prepare();
467: ps.separator = (byte) ' ';
468: y = parseInt(buf, ps) - 1900;
469: }
470: return Date.UTC(y, m, d, hh, mm, ss);
471: }
472:
473: /**
474: * Parse a date as either a delta-second value, or a date.
475: * In case of delta seconds, we use the current time (except if one
476: * is provided), to compute the date.
477: * @return A date encoded as the number of millisconds since Java epoch.
478: */
479:
480: public static long parseDateOrDeltaSeconds(byte buf[],
481: ParseState ps, long relto) {
482: int off = -1;
483: if (ps.isSkipable)
484: off = ps.start = skipSpaces(buf, ps);
485: else
486: off = ps.ioff;
487: int len = (ps.bufend >= 0) ? ps.bufend : buf.length;
488: // If all digits, delta secs, otherwise date:
489: for (int i = off; i < len; i++) {
490: if ((buf[i] > '9') || (buf[i] < '0'))
491: return parseDate(buf, ps);
492: }
493: // Delta seconds:
494: long secs = (long) parseInt(buf, ps);
495: return ((relto >= 0) ? relto + (secs * 1000) : System
496: .currentTimeMillis()
497: + (secs * 1000));
498: }
499:
500: public static long parseDateOrDeltaSeconds(byte buf[], ParseState ps) {
501: return parseDateOrDeltaSeconds(buf, ps, (long) -1);
502: }
503:
504: public static double parseQuality(byte buf[], ParseState ps) {
505: // Skip spaces if needed
506: int off = -1;
507: if (ps.isSkipable)
508: ps.start = off = skipSpaces(buf, ps);
509: else
510: ps.start = off = ps.ioff;
511: // Parse the integer from byte[] straight (without creating Strings)
512: int len = (ps.bufend > 0) ? ps.bufend : buf.length;
513: String str = new String(buf, 0, off, len - off);
514: try {
515: return Double.valueOf(str).doubleValue();
516: } catch (Exception ex) {
517: error("parseQuality", "Invalid floating point number.");
518: }
519: // Not reached:
520: return 1.0;
521: }
522:
523: }
|