001: /* ====================================================================
002: The Jicarilla Software License
003:
004: Copyright (c) 2003 Leo Simons.
005: All rights reserved.
006:
007: Permission is hereby granted, free of charge, to any person obtaining
008: a copy of this software and associated documentation files (the
009: "Software"), to deal in the Software without restriction, including
010: without limitation the rights to use, copy, modify, merge, publish,
011: distribute, sublicense, and/or sell copies of the Software, and to
012: permit persons to whom the Software is furnished to do so, subject to
013: the following conditions:
014:
015: The above copyright notice and this permission notice shall be
016: included in all copies or substantial portions of the Software.
017:
018: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
019: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
020: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
021: IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
022: CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
023: TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
024: SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
025: ==================================================================== */
026: package org.jicarilla.http;
027:
028: import org.jicarilla.http.util.Iso646;
029:
030: import java.lang.reflect.Field;
031: import java.lang.reflect.Modifier;
032: import java.nio.ByteBuffer;
033: import java.util.Arrays;
034: import java.util.regex.Matcher;
035: import java.util.regex.Pattern;
036:
037: /**
038: * This class provides basic encoding information for the HTTP 1.1
039: * specification,
040: * <a href=""http://www.ietf.org/rfc/rfc2616.txt>RFC 2616</a>.
041: * The HTTP specifications are published by the
042: * <a href="http://www.ietf.org/">Internet Engineering Taskforce</a>.
043: *
044: * Note that this class contains some documentation snippets from the
045: * mentioned standard.
046: *
047: * @todo chunked transfer coding specification
048: * @todo media types spec
049: * @todo product tokens spec
050: * @todo quality values spec
051: * @todo language tags spec
052: * @todo entity tags spec
053: * @todo range units
054: * @todo common headers
055: * @todo rest of the spec
056: * @todo clean up and document the utility methods
057: *
058: * @author <a href="mailto: lsimons at jicarilla dot org">Leo Simons</a>
059: * @version $Id: HTTPEncoding.java,v 1.5 2004/04/03 10:13:23 lsimons Exp $
060: */
061: public class HTTPEncoding {
062: //
063: // SIMPLE CHARACTER CLASS SHORTHANDS
064: //
065:
066: /** carriage return */
067: public final static char CR = Iso646.CARRIAGE_RETURN;
068: /** linefeed */
069: public final static char LF = Iso646.LINE_FEED;
070: /** space */
071: public final static char SP = Iso646.SPACE;
072: /** tab */
073: public final static char HT = Iso646.HORIZONTAL_TABULATION;
074: /** double quote (") */
075: public final static char DQ = Iso646.QUOTATION_MARK;
076:
077: /** all ASCII characters */
078: public final static char[] CHAR = Iso646.CHAR;
079: /** all ASCII uppercase letters */
080: public final static char[] UPALPHA = Iso646.UPALPHA;
081: /** all ASCII lowercase letters */
082: public final static char[] LOALPHA = Iso646.LOALPHA;
083: /** all ASCII letters */
084: public final static char[] ALPHA = Iso646.ALPHA;
085: /** all ASCII digits */
086: public final static char[] DIGIT = Iso646.DIGIT;
087: /** all ASCII control characters */
088: public final static char[] CTL = Iso646.CTL;
089:
090: //
091: // CHARACTER CLASSES CONTAINING THE ALLOWED CHARACTERS
092: // FOR THE VARIOUS ENTITIES IN RFC2616
093: //
094:
095: /**
096: * carriage return and line feed characters.
097: *
098: * HTTP/1.1 defines the sequence CR LF as the end-of-line
099: * marker for all protocol elements except the entity-body
100: * (see appendix 19.3 of RFC2616 for tolerant applications).
101: * The end-of-line marker within an entity-body is defined
102: * by its associated media type, as described in section 3.7
103: * of RFC2616.
104: *
105: * note: doesn't encode that a new line is only defined when
106: * CR & LF appear in sequence.
107: */
108: public final static char[] CRLF = new char[] { CR, LF };
109:
110: /**
111: * lineair whitespace characters. note: doesn't encode that
112: * CR & LF may only appear once,
113: * at the beginning, and in sequence.
114: *
115: * HTTP/1.1 header field values can be folded onto multiple
116: * lines if the continuation line begins with a space or
117: * horizontal tab. All linear white space, including folding,
118: * has the same semantics as SP. A recipient MAY replace any
119: * linear white space with a single SP before interpreting the
120: * field value or forwarding the message downstream.
121: */
122: public final static char[] LWS = new char[] { CR, LF, SP, HT };
123:
124: /**
125: * The TEXT rule is only used for descriptive field contents and
126: * values that are not intended to be interpreted by the message
127: * parser. Words of *TEXT MAY contain characters from character
128: * sets other than ISO-8859-1 only when encoded according
129: * to the rules of
130: * <a href="http://www.ietf.org/rfc/rfc2047.txt">RFC 2047</a>.
131: *
132: * A CRLF is allowed in the definition of TEXT only as part of a
133: * header field continuation. It is expected that the folding LWS
134: * will be replaced with a single SP before interpretation of the
135: * TEXT value.
136: *
137: * @see org.jicarilla.http.util.Iso8859_1
138: * @see http://www.ietf.org
139: */
140: public final static char[] TEXT;
141:
142: /**
143: * Hexadecimal numeric characters are used in several protocol
144: * elements.
145: */
146: public final static char[] HEX;
147:
148: /**
149: * Many HTTP/1.1 header field values consist of words separated by
150: * LWS or special characters. These special characters MUST be in a
151: * quoted string to be used within a parameter value (as defined in
152: * section 3.6 of RFC2616).
153: */
154: public final static char[] separators = new char[] {
155: Iso646.LEFT_PARENTHESIS, Iso646.RIGHT_PARENTHESIS,
156: Iso646.GREATER_THAN_SIGN, Iso646.LESS_THAN_SIGN,
157: Iso646.COMMERCIAL_AT, Iso646.COMMA, Iso646.SEMICOLON,
158: Iso646.COLON, Iso646.SOLIDUS, Iso646.LEFT_SQUARE_BRACKET,
159: Iso646.RIGHT_SQUARE_BRACKET, Iso646.QUESTION_MARK,
160: Iso646.EQUALS_SIGN, Iso646.LEFT_CURLY_BRACKET,
161: Iso646.RIGHT_CURLY_BRACKET, SP, HT };
162:
163: /**
164: * A token is any character or sequence of characters which is not
165: * a seperator or control character.
166: */
167: public final static char[] token;
168:
169: /**
170: * Text contained in a comment. Can be any TEXT except the
171: * comment boundary characters '(' and ')'.
172: */
173: public final static char[] ctext;
174:
175: /**
176: * Comments can be included in some HTTP header fields by surrounding
177: * the comment text with parentheses. Comments are only allowed in
178: * fields containing "comment" as part of their field value definition.
179: * In all other fields, parentheses are considered part of the field
180: * value.
181: *
182: * note: doesn't encode that the string must begin with '(' and
183: * end with ')' and cannot contain ')' anywhere but as the last
184: * character.
185: */
186: public final static char[] comment;
187:
188: /**
189: * Text contained in a quote. Can be any TEXT except the
190: * quote boundary character '"'.
191: */
192: public final static char[] qdtext;
193:
194: /**
195: * A string of text is parsed as a single word if it is quoted using
196: * double-quote marks.
197: *
198: * note: doesn't encode that the string must begin with '"' and
199: * end with '"', and cannot contain '"' anywhere but as the last
200: * character or when escaped by a '\', and can contain non-TEXT
201: * characters only when preceeded a '\'.
202: */
203: public final static char[] quoted_string;
204:
205: /**
206: * The backslash character ("\") MAY be used as a single-character
207: * quoting mechanism only within quoted-string and comment constructs.
208: *
209: * note: doesn't encode that the string must consist of a
210: * backslash as the first character and any other character as
211: * the second character.
212: */
213: public final static char[] quoted_pair;
214:
215: /**
216: * HTTP uses a "<major>.<minor>" numbering scheme to indicate
217: * versions of the protocol.
218: *
219: * note: doesn't encode the specific sequence of the HTTP
220: * version string.
221: */
222: public final static char[] VERSION;
223:
224: /**
225: * Note the absence of an URI property here. This is because the
226: * URI spec currently potentially allows *any* character (in any
227: * character set) to be a valid part of an URI.
228: */
229:
230: /**
231: * HTTP applications have historically allowed three different formats
232: * for the representation of date/time stamps.
233: *
234: * note: doesn't encode the actual requirment as to how the date
235: * is to be encoded, just lists the acceptable characters.
236: */
237: public final static char[] DATE;
238:
239: /**
240: * HTTP character sets are identified by case-insensitive tokens.
241: * The complete set of tokens is defined by the IANA Character Set
242: * registry.
243: */
244: public final static char[] charset;
245:
246: /**
247: * Content coding values indicate an encoding transformation that has
248: * been or can be applied to an entity. All content-coding values are
249: * case-insensitive.
250: */
251: public final static char[] content_coding;
252:
253: /**
254: * Transfer-coding values are used to indicate an encoding
255: * transformation that has been, can be, or may need to be applied to an
256: * entity-body in order to ensure "safe transport" through the network.
257: */
258: public final static char[] transfer_coding;
259:
260: public final static int OFFSET_OF_NORMAL_CHARACTERS_IN_ASCII = 0x20;
261: public final static int NUMBER_OF_TEXT_CHARACTERS = 127 - 32;
262: public final static int NUMBER_OF_HEX_CHARACTERS = 6 + 6 + 10;
263: public final static int NUMBER_OF_VERSION_CHARACTERS = 15;
264: public final static int NUMBER_OF_TOKEN_CHARACTERS = 80;
265: // fill in the above character classes
266: static {
267: int index;
268:
269: TEXT = new char[NUMBER_OF_TEXT_CHARACTERS];
270: for (int i = 0; i < TEXT.length; i++) {
271: TEXT[i] = (char) (i + OFFSET_OF_NORMAL_CHARACTERS_IN_ASCII);
272: }
273: Arrays.sort(TEXT);
274:
275: HEX = new char[NUMBER_OF_HEX_CHARACTERS];
276: for (int i = 0; i < 7; i++)
277: HEX[i] = UPALPHA[i];
278: for (int i = 0; i < 7; i++)
279: HEX[i + 6] = LOALPHA[i];
280: for (int i = 0; i < 10; i++)
281: HEX[i + 6 + 6] = DIGIT[i];
282: Arrays.sort(HEX);
283:
284: Arrays.sort(separators);
285:
286: // won't work:
287: // token = new char[(CHAR.length-CTL.length)-separators.length-2];
288: token = new char[NUMBER_OF_TOKEN_CHARACTERS];
289: index = 0;
290: for (int i = 0; i < CHAR.length; i++) {
291: if (Arrays.binarySearch(separators, CHAR[i]) >= 0)
292: continue;
293: if (Arrays.binarySearch(CTL, CHAR[i]) >= 0)
294: continue;
295:
296: token[index] = CHAR[i];
297: index++;
298: }
299: Arrays.sort(token);
300:
301: ctext = new char[TEXT.length - 2];
302: index = 0;
303: for (int i = 0; i < TEXT.length; i++) {
304: if (TEXT[i] == Iso646.LEFT_PARENTHESIS
305: || TEXT[i] == Iso646.RIGHT_PARENTHESIS)
306: continue;
307: ctext[index++] = TEXT[i];
308: }
309: Arrays.sort(ctext);
310:
311: qdtext = new char[TEXT.length - 1];
312: index = 0;
313: for (int i = 0; i < TEXT.length; i++) {
314: if (TEXT[i] == Iso646.QUOTATION_MARK)
315: continue;
316: qdtext[index++] = TEXT[i];
317: }
318: Arrays.sort(qdtext);
319:
320: comment = TEXT;
321: quoted_string = CHAR;
322: quoted_pair = CHAR;
323:
324: VERSION = new char[NUMBER_OF_VERSION_CHARACTERS];
325: VERSION[0] = Iso646.LATIN_CAPITAL_LETTER_H;
326: VERSION[1] = Iso646.LATIN_CAPITAL_LETTER_T;
327: VERSION[2] = Iso646.LATIN_CAPITAL_LETTER_P;
328: VERSION[3] = Iso646.SOLIDUS;
329: VERSION[4] = Iso646.FULL_STOP;
330:
331: for (int i = 0; i < DIGIT.length; i++)
332: VERSION[5 + i] = DIGIT[i];
333: Arrays.sort(VERSION);
334:
335: DATE = new char[] { Iso646.DIGIT_ONE, Iso646.DIGIT_TWO,
336: Iso646.DIGIT_THREE, Iso646.DIGIT_FOUR,
337: Iso646.DIGIT_FIVE, Iso646.DIGIT_SIX,
338: Iso646.DIGIT_SEVEN, Iso646.DIGIT_EIGHT,
339: Iso646.DIGIT_NINE, Iso646.DIGIT_ZERO,
340: Iso646.LATIN_CAPITAL_LETTER_M,
341: Iso646.LATIN_CAPITAL_LETTER_T,
342: Iso646.LATIN_CAPITAL_LETTER_W,
343: Iso646.LATIN_CAPITAL_LETTER_F,
344: Iso646.LATIN_CAPITAL_LETTER_S,
345: Iso646.LATIN_CAPITAL_LETTER_J,
346: Iso646.LATIN_CAPITAL_LETTER_A,
347: Iso646.LATIN_CAPITAL_LETTER_O,
348: Iso646.LATIN_CAPITAL_LETTER_N,
349: Iso646.LATIN_SMALL_LETTER_O,
350: Iso646.LATIN_SMALL_LETTER_N,
351: Iso646.LATIN_SMALL_LETTER_U,
352: Iso646.LATIN_SMALL_LETTER_E,
353: Iso646.LATIN_SMALL_LETTER_D,
354: Iso646.LATIN_SMALL_LETTER_H,
355: Iso646.LATIN_SMALL_LETTER_R,
356: Iso646.LATIN_SMALL_LETTER_I,
357: Iso646.LATIN_SMALL_LETTER_A,
358: Iso646.LATIN_SMALL_LETTER_T,
359: Iso646.LATIN_SMALL_LETTER_Y,
360: Iso646.LATIN_SMALL_LETTER_B,
361: Iso646.LATIN_SMALL_LETTER_P,
362: Iso646.LATIN_SMALL_LETTER_L,
363: Iso646.LATIN_SMALL_LETTER_G,
364: Iso646.LATIN_SMALL_LETTER_C,
365: Iso646.LATIN_SMALL_LETTER_V, Iso646.COLON,
366: Iso646.HYPHEN_MINUS, Iso646.SPACE,
367: Iso646.LATIN_CAPITAL_LETTER_G };
368: Arrays.sort(DATE);
369:
370: charset = token;
371: content_coding = token;
372: transfer_coding = CHAR;
373: }
374:
375: //
376: // COMMONLY USED HTTP VERSIONS
377: //
378: public final static String VERSION_10 = "HTTP/1.0";
379: public final static String VERSION_11 = "HTTP/1.1";
380:
381: //
382: // COMMONLY USED HTTP METHODS
383: //
384: public final static String METHOD_OPTIONS = "OPTIONS";
385: public final static String METHOD_GET = "GET";
386: public final static String METHOD_HEAD = "HEAD";
387: public final static String METHOD_POST = "POST";
388: public final static String METHOD_PUT = "PUT";
389: public final static String METHOD_DELETE = "DELETE";
390: public final static String METHOD_TRACE = "TRACE";
391: public final static String METHOD_CONNECT = "CONNECT";
392:
393: //
394: // COMMONLY USED CONTENT CODINGS
395: //
396: public final static String CONTENT_CODING = "content-coding";
397: public final static String CONTENT_CODING_GZIP = "gzip";
398: public final static String CONTENT_CODING_COMPRESS = "compress";
399: public final static String CONTENT_CODING_DEFLATE = "deflate";
400: public final static String CONTENT_CODING_IDENTITY = "identity";
401:
402: //
403: // COMMONLY USED TRANSFER CODINGS
404: //
405: public final static String TRANSFER_CODING = "transfer-coding";
406: public final static String TRANSFER_CODING_CHUNKED = "chunked";
407: public final static String TRANSFER_CODING_GZIP = "gzip";
408: public final static String TRANSFER_CODING_COMPRESS = "compress";
409: public final static String TRANSFER_CODING_DEFLATE = "deflate";
410: public final static String TRANSFER_CODING_IDENTITY = "identity";
411:
412: //
413: // HTTP ERROR CODES
414: //
415: public final static int STATUS_100_Continue = 100,
416: STATUS_101_Switching_Protocols = 101,
417: STATUS_102_Processing = 102, STATUS_200_OK = 200,
418: STATUS_201_Created = 201, STATUS_202_Accepted = 202,
419: STATUS_203_Non_Authoritative_Information = 203,
420: STATUS_204_No_Content = 204,
421: STATUS_205_Reset_Content = 205,
422: STATUS_206_Partial_Content = 206,
423: STATUS_207_Multi_Status = 207,
424: STATUS_300_Multiple_Choices = 300,
425: STATUS_301_Moved_Permanently = 301,
426: STATUS_302_Moved_Temporarily = 302,
427: STATUS_303_See_Other = 303, STATUS_304_Not_Modified = 304,
428: STATUS_305_Use_Proxy = 305, STATUS_400_Bad_Request = 400,
429: STATUS_401_Unauthorized = 401,
430: STATUS_402_Payment_Required = 402,
431: STATUS_403_Forbidden = 403, STATUS_404_Not_Found = 404,
432: STATUS_405_Method_Not_Allowed = 405,
433: STATUS_406_Not_Acceptable = 406,
434: STATUS_407_Proxy_Authentication_Required = 407,
435: STATUS_408_Request_Timeout = 408,
436: STATUS_409_Conflict = 409, STATUS_410_Gone = 410,
437: STATUS_411_Length_Required = 411,
438: STATUS_412_Precondition_Failed = 412,
439: STATUS_413_Request_Entity_Too_Large = 413,
440: STATUS_414_Request_URI_Too_Large = 414,
441: STATUS_415_Unsupported_Media_Type = 415,
442: STATUS_416_Requested_Range_Not_Satisfiable = 416,
443: STATUS_417_Expectation_Failed = 417,
444: STATUS_422_Unprocessable_Entity = 422,
445: STATUS_423_Locked = 423,
446: STATUS_424_Failed_Dependency = 424,
447: STATUS_500_Internal_Server_Error = 500,
448: STATUS_501_Not_Implemented = 501,
449: STATUS_502_Bad_Gateway = 502,
450: STATUS_503_Service_Unavailable = 503,
451: STATUS_504_Gateway_Timeout = 504,
452: STATUS_505_HTTP_Version_Not_Supported = 505,
453: STATUS_507_Insufficient_Storage = 507;
454:
455: public final static int STATUS_999_IO_Problem = 999;
456:
457: //
458: // HTTP ERROR MESSAGES
459: //
460: public final static String STATUS_100_MSG = "Continue",
461: STATUS_101_MSG = "Switching Protocols",
462: STATUS_102_MSG = "Processing", STATUS_200_MSG = "OK",
463: STATUS_201_MSG = "Created", STATUS_202_MSG = "Accepted",
464: STATUS_203_MSG = "Non Authoritative Information",
465: STATUS_204_MSG = "No Content",
466: STATUS_205_MSG = "Reset Content",
467: STATUS_206_MSG = "Partial Content",
468: STATUS_207_MSG = "Multi Status",
469: STATUS_300_MSG = "Multiple Choices",
470: STATUS_301_MSG = "Moved Permanently",
471: STATUS_302_MSG = "Moved Temporarily",
472: STATUS_303_MSG = "See Other",
473: STATUS_304_MSG = "Not Modified",
474: STATUS_305_MSG = "Use Proxy",
475: STATUS_400_MSG = "Bad Request",
476: STATUS_401_MSG = "Unauthorized",
477: STATUS_402_MSG = "Payment Required",
478: STATUS_403_MSG = "Forbidden", STATUS_404_MSG = "Not Found",
479: STATUS_405_MSG = "Method Not Allowed",
480: STATUS_406_MSG = "Not Acceptable",
481: STATUS_407_MSG = "Proxy Authentication Required",
482: STATUS_408_MSG = "Request Timeout",
483: STATUS_409_MSG = "Conflict", STATUS_410_MSG = "Gone",
484: STATUS_411_MSG = "Length Required",
485: STATUS_412_MSG = "Precondition Failed",
486: STATUS_413_MSG = "Request Entity Too Large",
487: STATUS_414_MSG = "Request URI Too Large",
488: STATUS_415_MSG = "Unsupported Media Type",
489: STATUS_416_MSG = "Requested Range Not Satisfiable",
490: STATUS_417_MSG = "Expectation Failed",
491: STATUS_422_MSG = "Unprocessable Entity",
492: STATUS_423_MSG = "Locked",
493: STATUS_424_MSG = "Failed Dependency",
494: STATUS_500_MSG = "Internal Server Error",
495: STATUS_501_MSG = "Not Implemented",
496: STATUS_502_MSG = "Bad Gateway",
497: STATUS_503_MSG = "Service Unavailable",
498: STATUS_504_MSG = "Gateway Timeout",
499: STATUS_505_MSG = "HTTP Version Not Supported",
500: STATUS_507_MSG = "Insufficient Storage";
501:
502: public final static String STATUS_999_MSG = "IO Problem";
503:
504: public final static int NUMBER_OF_STATUS_MSG = 1000;
505: public final static String[] STATUS_MSG = new String[NUMBER_OF_STATUS_MSG];
506: static {
507: // perform some runtime-introspection to getEntry all those
508: // status messages listed above into the indexed array
509: // STATUS_MSG
510: final Field[] fields = HTTPEncoding.class.getFields();
511: for (int i = 0; i < fields.length; i++) {
512: final Pattern pattern = Pattern
513: .compile("STATUS_([0-9][0-9][0-9])_MSG");
514: if (Modifier.isStatic(fields[i].getModifiers())
515: && Modifier.isFinal(fields[i].getModifiers())
516: && Modifier.isPublic(fields[i].getModifiers())
517: && fields[i].getType().equals(String.class)) {
518: final Matcher matcher = pattern.matcher(fields[i]
519: .getName());
520: if (matcher.matches()) {
521: final String code = matcher.group(1);
522: final int num = (new Integer(code)).intValue();
523: try {
524: STATUS_MSG[num] = (String) fields[i].get(null);
525: } catch (IllegalAccessException iae) {
526: }
527: }
528: }
529: }
530:
531: // fill 'er up with defaults
532: /* actually, lets not...there might be apps
533: with custom codes and messages...
534:
535: for( int i = 0; i < STATUS_MSG.length; i++ )
536: {
537: if( STATUS_MSG[i] == null )
538: STATUS_MSG[i] = STATUS_500_MSG;
539: }*/
540: }
541:
542: //
543: // CHARACTER CODINGS
544: //
545: /** Character set used to encode HTTP protocol elements */
546: public static final String USASCII_CHARSET = "US-ASCII";
547:
548: /** Default content encoding chatset */
549: public static final String ISO8859_1_CONTENT_CHARSET = "ISO-8859-1";
550:
551: //
552: // COMMON HEADERS
553: //
554: public static final String HEADER_CONTENT_LENGTH = "content-length";
555: public static final String HEADER_TRANSFER_ENCODING = "transfer-encoding";
556: public static final String HEADER_TRAILER = "trailer";
557:
558: public static final ByteBuffer HEADER_CONTENT_LENGTH_BUFFER;
559:
560: static {
561: final byte[] arr = HEADER_CONTENT_LENGTH.getBytes();
562: HEADER_CONTENT_LENGTH_BUFFER = ByteBuffer.wrap(arr);
563: }
564:
565: //
566: // UTILITY METHODS
567: //
568: /**
569: * Determine whether the specified character is a control
570: * character.
571: *
572: * @param c the character to test
573: * @return true if it is, false otherwise
574: */
575: public final static boolean isControlChar(final char c) {
576: if (Arrays.binarySearch(CTL, c) >= 0)
577: return true;
578:
579: return false;
580: }
581:
582: /**
583: * Determine whether the specified character is a separator
584: * character.
585: *
586: * @param c the character to test
587: * @return true if it is, false otherwise
588: */
589: public final static boolean isSeparatorChar(final char c) {
590: if (Arrays.binarySearch(separators, c) >= 0)
591: return true;
592:
593: return false;
594: }
595:
596: /**
597: * Determine whether the specified character is a token
598: * character.
599: *
600: * @param c the character to test
601: * @return true if it is, false otherwise
602: */
603: public final static boolean isTokenChar(final char c) {
604: if (!isSeparatorChar(c) && !isControlChar(c))
605: return true;
606:
607: return false;
608: }
609:
610: /**
611: * Determine whether the specified character is a text
612: * character.
613: *
614: * @param c the character to test
615: * @return true if it is, false otherwise
616: */
617: public final static boolean isTextChar(final char c) {
618: return !isControlChar(c);
619: }
620:
621: /**
622: * Determine whether the specified character is a method
623: * character.
624: *
625: * @param c the character to test
626: * @return true if it is, false otherwise
627: */
628: public final static boolean isMethodChar(final char c) {
629: return isTokenChar(c);
630: }
631:
632: /**
633: * Determine whether the specified character is a linear
634: * whitespace character.
635: *
636: * @param c the character to test
637: * @return true if it is, false otherwise
638: */
639: public final static boolean isLWSChar(final char c) {
640: return c == CR || c == LF || c == SP || c == HT;
641: }
642:
643: /**
644: * Determine whether the specified character is a status code
645: * character.
646: *
647: * @param c the character to test
648: * @return true if it is, false otherwise
649: */
650: public final static boolean isStatusCodeChar(final char c) {
651: if (Arrays.binarySearch(DIGIT, c) >= 0)
652: return true;
653:
654: return false;
655: }
656:
657: /**
658: * Determine whether the specified character is a reason phrase
659: * character.
660: *
661: * @param c the character to test
662: * @return true if it is, false otherwise
663: */
664: public final static boolean isReasonPhraseChar(final char c) {
665: if (Arrays.binarySearch(TEXT, c) < 0)
666: return false;
667: if (c == CR || c == LF)
668: return false;
669:
670: return true;
671: }
672:
673: /**
674: * Returns true if the character is allowed in a URI, false
675: * otherwise. Note that it <i>does</i> allow "#" and "%", but
676: * not " "!
677: *
678: * From RFC 2396:
679: *
680: 2.4.3. Excluded US-ASCII Characters
681:
682: Although they are disallowed within the URI syntax, we include here a
683: description of those US-ASCII characters that have been excluded and
684: the reasons for their exclusion.
685:
686: The control characters in the US-ASCII coded character set are not
687: used within a URI, both because they are non-printable and because
688: they are likely to be misinterpreted by some control mechanisms.
689:
690: control = <US-ASCII coded characters 00-1F and 7F hexadecimal>
691:
692: The space character is excluded because significant spaces may
693: disappear and insignificant spaces may be introduced when URI are
694: transcribed or typeset or subjected to the treatment of word-
695: processing programs. Whitespace is also used to delimit URI in many
696: contexts.
697:
698: space = <US-ASCII coded character 20 hexadecimal>
699:
700: The angle-bracket "<" and ">" and double-quote (") characters are
701: excluded because they are often used as the delimiters around URI in
702: text documents and protocol fields. The character "#" is excluded
703: because it is used to delimit a URI from a fragment identifier in URI
704: references (Section 4). The percent character "%" is excluded because
705: it is used for the encoding of escaped characters.
706:
707: delims = "<" | ">" | "#" | "%" | <">
708: *
709: * @param c the character to test
710: * @return true if it is a valid URI char, false otherwise
711: */
712: public final static boolean isURIChar(final char c) {
713: if (isControlChar(c))
714: return false;
715: if (c == Iso646.LESS_THAN_SIGN || c == Iso646.GREATER_THAN_SIGN
716: || c == Iso646.QUOTATION_MARK || c == SP)
717: return false;
718:
719: return true;
720: }
721:
722: /**
723: * Determine whether the specified character is a version specifier
724: * character.
725: *
726: * @param c the character to test
727: * @return true if it is, false otherwise
728: */
729: public final static boolean isVersionChar(final char c) {
730: final int contained = Arrays.binarySearch(VERSION, c);
731: if (contained >= 0)
732: return true;
733:
734: return false;
735: }
736:
737: /**
738: * Nearly fully compliant verification of HTTP version
739: * strings.
740: *
741: * @param s
742: * @return
743: */
744: public final static boolean isVersionString(final String s) {
745: if (s == null)
746: return false;
747:
748: if (s.length() < 7)
749: return false;
750:
751: if (s.charAt(0) != Iso646.LATIN_CAPITAL_LETTER_H
752: | s.charAt(1) != Iso646.LATIN_CAPITAL_LETTER_T
753: | s.charAt(2) != Iso646.LATIN_CAPITAL_LETTER_T
754: | s.charAt(3) != Iso646.LATIN_CAPITAL_LETTER_P)
755: return false;
756:
757: if (s.indexOf(Iso646.SOLIDUS) < 5)
758: return false;
759:
760: for (int i = 4; i < s.length(); i++)
761: if (!isVersionChar(s.charAt(i)))
762: return false;
763:
764: return true;
765: }
766:
767: public final static int getMajorVersion(final String versionString) {
768: if (!isVersionString(versionString))
769: throw new IllegalArgumentException(
770: "Not a valid HTTP version string!");
771:
772: final StringBuffer version = new StringBuffer();
773: int index = 4;
774: while (true) {
775: final char c = versionString.charAt(index++);
776: if (c == Iso646.SOLIDUS)
777: break;
778:
779: version.append(c);
780: }
781:
782: return new Integer(version.toString()).intValue();
783: }
784:
785: public final static int getMinorVersion(final String versionString) {
786: if (!isVersionString(versionString))
787: throw new IllegalArgumentException(
788: "Not a valid HTTP version string!");
789:
790: return new Integer(versionString.substring(versionString
791: .indexOf(Iso646.SOLIDUS))).intValue();
792: }
793:
794: public final static int NUMBER_OF_CHARS_TO_SHIFT_ASCII_UPPER_TO_LOWER = 32;
795:
796: public final static char tokenCharToLowerCase(final char token) {
797: if (token >= Iso646.LATIN_CAPITAL_LETTER_A
798: && token <= Iso646.LATIN_CAPITAL_LETTER_Z)
799: return (char) (token + NUMBER_OF_CHARS_TO_SHIFT_ASCII_UPPER_TO_LOWER);
800:
801: return token;
802: }
803: }
|