001: /*
002: * Portions Copyright 2000-2007 Sun Microsystems, Inc. All Rights
003: * Reserved. Use is subject to license terms.
004: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
005: *
006: * This program is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU General Public License version
008: * 2 only, as published by the Free Software Foundation.
009: *
010: * This program is distributed in the hope that it will be useful, but
011: * WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013: * General Public License version 2 for more details (a copy is
014: * included at /legal/license.txt).
015: *
016: * You should have received a copy of the GNU General Public License
017: * version 2 along with this work; if not, write to the Free Software
018: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
019: * 02110-1301 USA
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
022: * Clara, CA 95054 or visit www.sun.com if you need additional
023: * information or have any questions.
024: */
025: package gov.nist.siplite.parser;
026:
027: import gov.nist.siplite.SIPConstants;
028: import gov.nist.siplite.message.Request;
029: import gov.nist.siplite.header.*;
030: import gov.nist.core.*;
031: import java.util.Hashtable;
032:
033: /**
034: * Lexer class for the parser.
035: *
036: *@version JAIN-SIP-1.1
037: *
038: *
039: *<a href="{@docRoot}/uncopyright.html">This code is in the public domain.</a>
040: *
041: */
042: public class Lexer extends LexerCore {
043: /**
044: * Constructor with initial lecername and buffer to
045: * process.
046: * @param lexerName lexer for processing
047: * @param buffer data to be parsed
048: */
049: public Lexer(String lexerName, String buffer) {
050: super (lexerName, buffer);
051: this .selectLexer(lexerName);
052: }
053:
054: /**
055: * Gets the header name of the line.
056: * @param line the text to be parsed
057: * @return the header name
058: */
059: public static String getHeaderName(String line) {
060: if (line == null) {
061: return null;
062: }
063: String headerName = null;
064: try {
065: int begin = line.indexOf(":");
066: headerName = null;
067: if (begin >= 1) {
068: headerName = line.substring(0, begin);
069: }
070: } catch (IndexOutOfBoundsException e) {
071: return null;
072: }
073: return headerName;
074: }
075:
076: /**
077: * Gets the header value of the line.
078: * @param line the text to be parsed
079: * @return the header value
080: */
081: public static String getHeaderValue(String line) {
082: if (line == null) {
083: return null;
084: }
085: String headerValue = null;
086: try {
087: int begin = line.indexOf(":");
088: headerValue = line.substring(begin + 1);
089: } catch (IndexOutOfBoundsException e) {
090: return null;
091: }
092: return headerValue;
093: }
094:
095: /**
096: * Checks if the given string is a valid method/header/parameter name.
097: * @param name the text to be parsed
098: * @return true if the string is a valid name, false otherwise
099: */
100: public static boolean isValidName(String name) {
101: // RFC 3261, p.p. 225, 221:
102: //
103: // Method = INVITEm / ACKm / OPTIONSm / BYEm
104: // / CANCELm / REGISTERm
105: // / extension-method
106: // extension-method = token
107: // token = 1*(alphanum / "-" / "." / "!" / "%" / "*"
108: // / "_" / "+" / "`" / "'" / "~" )
109: // alphanum = ALPHA / DIGIT
110: //
111: // p.227:
112: // generic-param = token [ EQUAL gen-value ]
113: //
114: // p. 232:
115: // extension-header = header-name HCOLON header-value
116: // header-name = token
117: //
118: if (name == null || name.length() == 0) {
119: return false;
120: }
121:
122: for (int i = 0; i < name.length(); i++) {
123: char ch = name.charAt(i);
124:
125: if (!isValidChar(ch)) {
126: return false;
127: }
128: }
129:
130: return true;
131: }
132:
133: /**
134: * Checks if the given string is a valid header/parameter value.
135: * @param value the text to be parsed
136: * @param isParameter true if the value to be checked is a parameter
137: * value, false otherwise
138: * @return true if the string is a valid value, false otherwise
139: */
140: protected static boolean isValidValue(String value,
141: boolean isParameter) {
142:
143: // System.out.println(">>> value = " + value);
144:
145: if (value == null) {
146: value = ""; // null is a valid parameter value
147: }
148:
149: // Check that the value doesn't contain unescaped semicolons
150: boolean isEscaped = false;
151: boolean isQuoteOn = false;
152: boolean isBracketOn = false;
153:
154: for (int i = 0; i < value.length(); i++) {
155: char ch = value.charAt(i);
156:
157: // Ignore escaped (with preceding '\') characters
158: if (isEscaped) {
159: isEscaped = false;
160: continue;
161: }
162:
163: // Ignore characters that are a part of the string (inside qoutes)
164: if (ch == '"') {
165: isQuoteOn = !isQuoteOn;
166: continue;
167: }
168:
169: if (isQuoteOn) {
170: continue;
171: }
172:
173: if (ch == '\\') {
174: isEscaped = true;
175: continue;
176: }
177:
178: // Ignore characters inside "<" and ">"
179: if (isBracketOn) {
180: if (ch == '>') {
181: isBracketOn = false;
182: continue;
183: }
184: } else {
185: if (ch == '<') {
186: isBracketOn = true;
187: continue;
188: }
189:
190: if (isParameter) {
191: // Restrictions on a parameter's value are more strict
192: // when header's value may be almost any text.
193: if (!isValidChar(ch)) {
194: return false;
195: }
196: } else {
197: if (ch == ';') {
198: return false;
199: }
200: }
201: }
202: }
203:
204: // System.out.println(">>> VALID");
205: return true;
206: }
207:
208: /**
209: * Checks if the given string is a valid header value.
210: * @param value the text to be parsed
211: * @return true if the string is a valid value, false otherwise
212: */
213: public static boolean isValidHeaderValue(String value) {
214: return isValidValue(value, false);
215: }
216:
217: /**
218: * Checks if the given string is a valid parameter value.
219: * @param value the text to be parsed
220: * @return true if the string is a valid value, false otherwise
221: */
222: public static boolean isValidParameterValue(String value) {
223: return isValidValue(value, true);
224: }
225:
226: /**
227: * Checks if the given string is valid as user part of a SIP(S)-URI.
228: *
229: * @param name the text to be parsed
230: * @return true if the string is a valid name, false otherwise
231: */
232: public static boolean isValidUserName(String name) {
233: // RFC3261 p.222
234: // user = 1*( unreserved / escaped / user-unreserved )
235: // user-unreserved = "&" / "=" / "+" / "$" / "," / ";" / "?" / "/"
236: // p.219
237: // alphanum = ALPHA / DIGIT
238: // p.220
239: // unreserved = alphanum / mark
240: // mark = "-" / "_" / "." / "!" / "~" / "*" / "'" / "(" / ")"
241: // escaped = "%" HEXDIG HEXDIG
242: //
243:
244: if (name == null) {
245: return true;
246: }
247: if (name.length() == 0) {
248: // Zerolength case causes wrong AT symbol appending,
249: // the name has to be null or nonempty
250: return false;
251: }
252:
253: for (int i = 0; i < name.length(); i++) {
254: char ch = name.charAt(i);
255:
256: if (URLParser.isUnreserved(ch) || isEscaped(name, i)
257: || URLParser.isUserUnreserved(ch)) {
258: continue;
259: } else {
260: return false;
261: }
262: }
263:
264: return true;
265: }
266:
267: /**
268: * Checks if the given string is valid display name.
269: *
270: * @param displayName the text to be parsed
271: * @return true if the string is a valid display name, false otherwise
272: */
273: public static boolean isValidDisplayName(String displayName) {
274: // RFC 3261 p.228
275: // display-name = *(token LWS)/ quoted-string
276: // p.220
277: // LWS = [*WSP CRLF] 1*WSP ; linear whitespace
278: // UTF8-NONASCII = %xC0-DF 1UTF8-CONT
279: // / %xE0-EF 2UTF8-CONT
280: // / %xF0-F7 3UTF8-CONT
281: // / %xF8-Fb 4UTF8-CONT
282: // / %xFC-FD 5UTF8-CONT
283: // UTF8-CONT = %x80-BF
284: // p.221
285: // token = 1*(alphanum / "-" / "." / "!" / "%" / "*"
286: // / "_" / "+" / "`" / "'" / "~" )
287: // p.222
288: // quoted-string = SWS DQUOTE *(qdtext / quoted-pair ) DQUOTE
289: // qdtext = LWS / %x21 / %x23-5B / %x5D-7E
290: // / UTF8-NONASCII
291: // quoted-pair = "\" (%x00-09 / %x0B-0C
292: // / %x0E-7F)
293: if (null == displayName) {
294: return false;
295: }
296: boolean quoted = false;
297: displayName = StringTokenizer.convertNewLines(displayName);
298: displayName = displayName.trim();
299: int i = 0;
300: if ('"' == displayName.charAt(0)) {
301: quoted = true;
302: i++;
303: }
304: while (i < displayName.length()) {
305: char ch = displayName.charAt(i);
306: if (!quoted) {
307: if (!isValidChar(ch) && ch != ' ' && ch != 0x09) {
308: return false;
309: }
310: } else {
311: // left UTF8-NONASCII proper converting on i18n subsystem
312: if (ch < 0x20
313: || (ch == '"' && i != displayName.length() - 1)
314: || (ch > 0x7E && ch < 0xC0)) {
315: return false;
316: }
317: if (ch == '\\') {
318: if (isQuotedPair(displayName, i)) {
319: i++;
320: } else {
321: return false;
322: }
323: }
324: }
325: i++;
326: }
327: return true;
328: }
329:
330: /**
331: * Checks if the given string is valid scheme name.
332: *
333: * @param scheme the text to be parsed
334: * @return true if the string is a valid scheme name, false otherwise
335: */
336: public static boolean isValidScheme(String scheme) {
337: // RFC3261 p.224
338: // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
339: if (null == scheme || 0 == scheme.length()
340: || !isAlpha(scheme.charAt(0))) {
341: return false;
342: }
343: char ch;
344: for (int i = 1; i < scheme.length(); i++) {
345: ch = scheme.charAt(i);
346: if (!Character.isDigit(ch) && !isAlpha(ch) && ch != '+'
347: && ch != '-' && ch != '.') {
348: return false;
349: }
350: }
351: return true;
352: }
353:
354: /**
355: * Checks if the given string is valid IPv6Address.
356: *
357: * BNF (RFC3261, p. 223, 232):
358: * IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
359: * IPv6address = hexpart [ ":" IPv4address ]
360: * hexpart = hexseq / hexseq "::" [ hexseq ] / "::" [ hexseq ]
361: * hexseq = hex4 *( ":" hex4)
362: * hex4 = 1*4HEXDIG
363: *
364: * @param address the text to be parsed
365: * @return true if the string is a valid IPv6Address, false otherwise
366: */
367: public static boolean isValidIpv6Address(String address) {
368: char ch;
369: if (address == null || 0 == address.length()) {
370: return false;
371: }
372: int len = address.length();
373: int colonCount = 0, hexdigCount = 0;
374:
375: for (int i = 0; i < len; i++) {
376: ch = address.charAt(i);
377:
378: if (ch == ':') {
379: colonCount++;
380: continue;
381: }
382:
383: if (ch == '.') {
384: int colonPos = address.lastIndexOf(':', i);
385: if (colonPos > 0) {
386: return isValidIpv4Address(address
387: .substring(colonPos + 1));
388: } else {
389: return false;
390: }
391: }
392:
393: if (hexdigCount > 4 || colonCount > 2) {
394: return false;
395: }
396:
397: colonCount = 0;
398:
399: // Check for IP v6:
400: // hex digit?
401: if (isHexDigit(ch)) {
402: hexdigCount++;
403: continue;
404: }
405:
406: if (hexdigCount > 0) {
407: // Hex part must be followed by ":", "::" or by the end
408: // of address. '.' means IP v6 address.
409: if ((i < len - 1) && (ch != ':') && (ch != '.')) {
410: return false;
411: }
412: }
413:
414: hexdigCount = 0;
415:
416: // Check for IP v4.
417: if (!(Character.isDigit(ch) || (ch == '.'))) {
418: return false;
419: }
420: } // end for
421:
422: // report about wrong address "::::::"
423: // and "::44444
424: if (hexdigCount > 4 || colonCount > 2) {
425: return false;
426: }
427:
428: return true;
429: }
430:
431: /**
432: * Checks if the given string is valid IPv4Address.
433: *
434: * BNF (RFC3261, p. 223, 232):
435: * IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
436: *
437: * @param address the text to be parsed
438: * @return true if the string is a valid IPv4Address, false otherwise
439: */
440: public static boolean isValidIpv4Address(String address) {
441: char ch;
442: if (address == null || 0 == address.length()) {
443: return false;
444: }
445: int len = address.length();
446: int pointCount = 0, digitCount = 0;
447: int totalPoint = 0;
448: for (int i = 0; i < len; i++) {
449: ch = address.charAt(i);
450:
451: if (ch == '.') {
452: if (i == len - 1) {
453: return false;
454: }
455: pointCount++;
456: totalPoint++;
457: digitCount = 0;
458: continue;
459: }
460:
461: if (digitCount > 3 || pointCount > 1) {
462: return false;
463: }
464:
465: pointCount = 0;
466:
467: if (!isDigit(ch)) {
468: return false;
469: } else {
470: digitCount++;
471: }
472: } // end for
473:
474: if (totalPoint != 3) {
475: return false;
476: }
477: return true;
478: }
479:
480: /**
481: * Checks if the given string is valid hostname
482: *
483: * BNF(RFC3261 p.222)
484: * hostname = *( domainlabel "." ) toplabel [ "." ]
485: * domainlabel = alphanum
486: * / alphanum *( alphanum / "-" ) alphanum
487: * toplabel = ALPHA / ALPHA *( alphanum / "-" ) alphanum
488: *
489: * @param address the text to be parsed
490: * @return true if the string is a valid hostname,
491: * false otherwise
492: */
493: public static boolean isValidHostname(String address) {
494: if (address == null || 0 == address.length()) {
495: return false;
496: }
497: int pCount = 0;
498: boolean isHostname = false;
499: for (int i = 0; i < address.length(); i++) {
500: char c = address.charAt(i);
501: if (c == '.' || isAlpha(c) || c == '-' || isDigit(c)) {
502: continue;
503: } else {
504: return false;
505: }
506: }
507:
508: int lastPointPos = address.lastIndexOf('.');
509: String toplabel;
510: if (lastPointPos == address.length() - 1) {
511: if (0 == lastPointPos) {
512: // address is "."
513: return false;
514: }
515: // get the previous point position
516: // or -1
517: lastPointPos = address.lastIndexOf('.', lastPointPos - 1);
518: }
519: // if there is no previous point toplabel equals whole string
520: toplabel = address.substring(lastPointPos + 1);
521: if (!isAlpha(toplabel.charAt(0))) {
522: return false;
523: }
524: return true;
525: }
526:
527: /**
528: * Selects the lexer to used based
529: * on the current parsing context.
530: * @param lexerName the lexer engine
531: */
532: public void selectLexer(String lexerName) {
533: currentLexer = (Hashtable) lexerTables.get(lexerName);
534: this .currentLexerName = lexerName;
535:
536: /*
537: * 'SIP'/'SIPS' keywords are added to the keyword list
538: * for all lexers except "command_keywordLexer" and
539: * "method_keywordLexer" according to the RFC 3261:
540: *
541: * For "status_lineLexer" (p. 225):
542: *
543: * Response = Status-Line
544: * *( message-header )
545: * CRLF
546: * [ message-body ]
547: * Status-Line = SIP-Version SP Status-Code SP Reason-Phrase CRLF
548: *
549: * For "request_lineLexer" (p. 223):
550: *
551: * Request = Request-Line
552: * *( message-header )
553: * CRLF
554: * [ message-body ]
555: * Request-Line = Method SP Request-URI SP SIP-Version CRLF
556: *
557: * For "sip_urlLexer" (p. 222):
558: *
559: * SIP-URI = "sip:" [ userinfo ] hostport
560: * uri-parameters [ headers ]
561: */
562:
563: if (currentLexer == null) {
564: addLexer(lexerName);
565: if (lexerName.equals("method_keywordLexer")) {
566: addKeyword(Request.REGISTER.toUpperCase(),
567: TokenTypes.REGISTER);
568: addKeyword(Request.ACK.toUpperCase(), TokenTypes.ACK);
569: addKeyword(Request.OPTIONS.toUpperCase(),
570: TokenTypes.OPTIONS);
571: addKeyword(Request.BYE.toUpperCase(), TokenTypes.BYE);
572: addKeyword(Request.INVITE.toUpperCase(),
573: TokenTypes.INVITE);
574: addKeyword(Request.SUBSCRIBE.toUpperCase(),
575: TokenTypes.SUBSCRIBE);
576: addKeyword(Request.NOTIFY.toUpperCase(),
577: TokenTypes.NOTIFY);
578: addKeyword(Request.MESSAGE.toUpperCase(),
579: TokenTypes.MESSAGE);
580: addKeyword(Request.PUBLISH.toUpperCase(),
581: TokenTypes.PUBLISH);
582: addKeyword(Request.REFER.toUpperCase(),
583: TokenTypes.REFER);
584: addKeyword(Request.INFO.toUpperCase(), TokenTypes.INFO);
585: addKeyword(Request.UPDATE.toUpperCase(),
586: TokenTypes.UPDATE);
587: } else if (lexerName.equals("command_keywordLexer")) {
588: addKeyword(Header.FROM.toUpperCase(), TokenTypes.FROM); // 1
589: addKeyword(Header.TO.toUpperCase(), TokenTypes.TO); // 2
590: addKeyword(Header.VIA.toUpperCase(), TokenTypes.VIA); // 3
591: addKeyword(Header.ROUTE.toUpperCase(), TokenTypes.ROUTE); // 4
592: addKeyword(Header.MAX_FORWARDS.toUpperCase(),
593: TokenTypes.MAX_FORWARDS); // 5
594: addKeyword(Header.AUTHORIZATION.toUpperCase(),
595: TokenTypes.AUTHORIZATION); // 6
596: addKeyword(Header.PROXY_AUTHORIZATION.toUpperCase(),
597: TokenTypes.PROXY_AUTHORIZATION); // 7
598: addKeyword(Header.DATE.toUpperCase(), TokenTypes.DATE); // 8
599: addKeyword(Header.CONTENT_ENCODING.toUpperCase(),
600: TokenTypes.CONTENT_ENCODING); // 9
601: addKeyword(Header.CONTENT_LENGTH.toUpperCase(),
602: TokenTypes.CONTENT_LENGTH); // 10
603: addKeyword(Header.CONTENT_TYPE.toUpperCase(),
604: TokenTypes.CONTENT_TYPE); // 11
605: addKeyword(Header.CONTACT.toUpperCase(),
606: TokenTypes.CONTACT); // 12
607: addKeyword(Header.CALL_ID.toUpperCase(),
608: TokenTypes.CALL_ID); // 13
609: addKeyword(Header.EXPIRES.toUpperCase(),
610: TokenTypes.EXPIRES); // 14
611: addKeyword(Header.RECORD_ROUTE.toUpperCase(),
612: TokenTypes.RECORD_ROUTE); // 15
613: addKeyword(Header.CSEQ.toUpperCase(), TokenTypes.CSEQ); // 16
614: addKeyword(Header.WWW_AUTHENTICATE.toUpperCase(),
615: TokenTypes.WWW_AUTHENTICATE); // 17
616: addKeyword(Header.PROXY_AUTHENTICATE.toUpperCase(),
617: TokenTypes.PROXY_AUTHENTICATE); // 18
618: addKeyword(Header.EVENT.toUpperCase(), TokenTypes.EVENT); // 19
619: addKeyword(Header.SUBJECT.toUpperCase(),
620: TokenTypes.SUBJECT); // 20
621: addKeyword(Header.SUPPORTED.toUpperCase(),
622: TokenTypes.SUPPORTED); // 21
623: addKeyword(Header.ALLOW_EVENTS.toUpperCase(),
624: TokenTypes.ALLOW_EVENTS); // 22
625: addKeyword(Header.ACCEPT_CONTACT.toUpperCase(),
626: TokenTypes.ACCEPT_CONTACT); // 23
627: // And now the dreaded short forms....
628: addKeyword(SIPConstants.TOKEN_LETTER_C.toUpperCase(),
629: TokenTypes.CONTENT_TYPE);
630: // CR fix
631: addKeyword(SIPConstants.TOKEN_LETTER_F.toUpperCase(),
632: TokenTypes.FROM);
633: addKeyword(SIPConstants.TOKEN_LETTER_I.toUpperCase(),
634: TokenTypes.CALL_ID);
635: addKeyword(SIPConstants.TOKEN_LETTER_M.toUpperCase(),
636: TokenTypes.CONTACT);
637: addKeyword(SIPConstants.TOKEN_LETTER_E.toUpperCase(),
638: TokenTypes.CONTENT_ENCODING);
639: addKeyword(SIPConstants.TOKEN_LETTER_L.toUpperCase(),
640: TokenTypes.CONTENT_LENGTH);
641: addKeyword(SIPConstants.TOKEN_LETTER_C.toUpperCase(),
642: TokenTypes.CONTENT_TYPE);
643: addKeyword(SIPConstants.TOKEN_LETTER_T.toUpperCase(),
644: TokenTypes.TO);
645: addKeyword(SIPConstants.TOKEN_LETTER_V.toUpperCase(),
646: TokenTypes.VIA);
647: addKeyword(SIPConstants.TOKEN_LETTER_O.toUpperCase(),
648: TokenTypes.EVENT);
649: addKeyword(SIPConstants.TOKEN_LETTER_S.toUpperCase(),
650: TokenTypes.SUBJECT);
651: addKeyword(SIPConstants.TOKEN_LETTER_K.toUpperCase(),
652: TokenTypes.SUPPORTED);
653: addKeyword(SIPConstants.TOKEN_LETTER_U.toUpperCase(),
654: TokenTypes.ALLOW_EVENTS);
655: addKeyword(SIPConstants.TOKEN_LETTER_A.toUpperCase(),
656: TokenTypes.ACCEPT_CONTACT);
657: } else if (lexerName.equals("status_lineLexer")
658: || lexerName.equals("request_lineLexer")) {
659: addKeyword(SIPConstants.SCHEME_SIP.toUpperCase(),
660: TokenTypes.SIP);
661: addKeyword(SIPConstants.SCHEME_SIPS.toUpperCase(),
662: TokenTypes.SIPS);
663: } else if (lexerName.equals("sip_urlLexer")) {
664: addKeyword(SIPConstants.SCHEME_TEL.toUpperCase(),
665: TokenTypes.TEL);
666: addKeyword(SIPConstants.SCHEME_SIP.toUpperCase(),
667: TokenTypes.SIP);
668: addKeyword(SIPConstants.SCHEME_SIPS.toUpperCase(),
669: TokenTypes.SIPS);
670: }
671:
672: }
673:
674: }
675:
676: }
|