001: /*
002: * @(#)DateParser.java 1.11 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package com.sun.cdc.io;
029:
030: import java.util.Hashtable;
031:
032: /**
033: * This class implements somewhat of a subset of the J2SE Date class.
034: * However, since the semantics of parse() is slightly different
035: * (DateParser will not handle dates prior to 1/1/1970, amd to
036: * be able to provide methods that will set timezone and DST
037: * information, it is called DateParser.
038: */
039: public class DateParser {
040: protected int year;
041: protected int month;
042: protected int day;
043: protected int hour;
044: protected int minute;
045: protected int second;
046: protected int milli;
047: protected static int tz_offset;
048:
049: /**
050: * Allocates a <code>DateParser</code> object and initializes it so that
051: * it represents the instant at the start of the second specified
052: * by the <code>year</code>, <code>month</code>, <code>date</code>,
053: * <code>hrs</code>, <code>min</code>, and <code>sec</code> arguments,
054: * in the local time zone.
055: *
056: * @param year the year, >= 1583.
057: * @param month the month between 0-11.
058: * @param date the day of the month between 1-31.
059: * @param hrs the hours between 0-23.
060: * @param min the minutes between 0-59.
061: * @param sec the seconds between 0-59.
062: */
063: DateParser(int year, int month, int day, int hour, int minute,
064: int second) {
065: if (year < 1583
066: || month < 0
067: || month > 11
068: || day < 0
069: || (day > days_in_month[month] && !(month == 1
070: && day == 29 && year % 4 == 0)) || hour < 0
071: || hour > 23 || minute < 0 || minute > 59 || second < 0
072: || second > 59) {
073: throw new IllegalArgumentException();
074: }
075:
076: this .year = year;
077: this .month = month;
078: this .day = day;
079: this .hour = hour;
080: this .minute = minute;
081: this .second = second;
082: milli = 0;
083: }
084:
085: /**
086: * Allocates a <code>DateParser</code> object and initializes it so that
087: * it represents the date and time indicated by the string
088: * <code>s</code>, which is interpreted as if by the
089: * {@link DateParser#parse} method.
090: *
091: * @param s a string representation of the date.
092: * @see com.nttdocomo.DateParser#parse(java.lang.String)
093: */
094: DateParser(String s) {
095: internalParse(s);
096: }
097:
098: /**
099: * Set the local time zone for the DateParser class.
100: * <code>tz</code> must in abbreviated format, e.g. "PST"
101: * for Pacific Standard Time.
102: */
103: static void setTimeZone(String tz) {
104: if (timezones.get(tz) == null)
105: return;
106: tz_offset = ((Integer) timezones.get(tz)).intValue();
107: }
108:
109: /**
110: * Attempts to interpret the string <tt>s</tt> as a representation
111: * of a date and time. If the attempt is successful, the time
112: * indicated is returned represented as teh distance, measured in
113: * milliseconds, of that time from the epoch (00:00:00 GMT on
114: * January 1, 1970). If the attempt fails, an
115: * <tt>IllegalArgumentException</tt> is thrown.
116: * <p>
117: * It accepts many syntaxes; in particular, it recognizes the IETF
118: * standard date syntax: "Sat, 12 Aug 1995 13:30:00 GMT". It also
119: * understands the continental U.S. time-zone abbreviations, but for
120: * general use, a time-zone offset should be used: "Sat, 12 Aug 1995
121: * 13:30:00 GMT+0430" (4 hours, 30 minutes west of the Greenwich
122: * meridian). If no time zone is specified, the local time zone is
123: * assumed. GMT and UTC are considered equivalent.
124: * <p>
125: * The string <tt>s</tt> is processed from left to right, looking for
126: * data of interest. Any material in <tt>s</tt> that is within the
127: * ASCII parenthesis characters <tt>(</tt> and <tt>)</tt> is ignored.
128: * Parentheses may be nested. Otherwise, the only characters permitted
129: * within <tt>s</tt> are these ASCII characters:
130: * <blockquote><pre>
131: * abcdefghijklmnopqrstuvwxyz
132: * ABCDEFGHIJKLMNOPQRSTUVWXYZ
133: * 0123456789,+-:/</pre></blockquote>
134: * and whitespace characters.<p>
135: * A consecutive sequence of decimal digits is treated as a decimal
136: * number:<ul>
137: * <li>If a number is preceded by <tt>+</tt> or <tt>-</tt> and a year
138: * has already been recognized, then the number is a time-zone
139: * offset. If the number is less than 24, it is an offset measured
140: * in hours. Otherwise, it is regarded as an offset in minutes,
141: * expressed in 24-hour time format without punctuation. A
142: * preceding <tt>-</tt> means a westward offset. Time zone offsets
143: * are always relative to UTC (Greenwich). Thus, for example,
144: * <tt>-5</tt> occurring in the string would mean "five hours west
145: * of Greenwich" and <tt>+0430</tt> would mean "four hours and
146: * thirty minutes east of Greenwich." It is permitted for the
147: * string to specify <tt>GMT</tt>, <tt>UT</tt>, or <tt>UTC</tt>
148: * redundantly-for example, <tt>GMT-5</tt> or <tt>utc+0430</tt>.
149: * <li>If a number is greater than 70, it is regarded as a year number.
150: * It must be followed by a space, comma, slash, or end of string.
151: * <li>If the number is followed by a colon, it is regarded as an hour,
152: * unless an hour has already been recognized, in which case it is
153: * regarded as a minute.
154: * <li>If the number is followed by a slash, it is regarded as a month
155: * (it is decreased by 1 to produce a number in the range <tt>0</tt>
156: * to <tt>11</tt>), unless a month has already been recognized, in
157: * which case it is regarded as a day of the month.
158: * <li>If the number is followed by whitespace, a comma, a hyphen, or
159: * end of string, then if an hour has been recognized but not a
160: * minute, it is regarded as a minute; otherwise, if a minute has
161: * been recognized but not a second, it is regarded as a second;
162: * otherwise, it is regarded as a day of the month. </ul><p>
163: * A consecutive sequence of letters is regarded as a word and treated
164: * as follows:<ul>
165: * <li>A word that matches <tt>AM</tt>, ignoring case, is ignored (but
166: * the parse fails if an hour has not been recognized or is less
167: * than <tt>1</tt> or greater than <tt>12</tt>).
168: * <li>A word that matches <tt>PM</tt>, ignoring case, adds <tt>12</tt>
169: * to the hour (but the parse fails if an hour has not been
170: * recognized or is less than <tt>1</tt> or greater than <tt>12</tt>).
171: * <li>Any word that matches any prefix of <tt>SUNDAY, MONDAY, TUESDAY,
172: * WEDNESDAY, THURSDAY, FRIDAY</tt>, or <tt>SATURDAY</tt>, ignoring
173: * case, is ignored. For example, <tt>sat, Friday, TUE</tt>, and
174: * <tt>Thurs</tt> are ignored.
175: * <li>Otherwise, any word that matches any prefix of <tt>JANUARY,
176: * FEBRUARY, MARCH, APRIL, MAY, JUNE, JULY, AUGUST, SEPTEMBER,
177: * OCTOBER, NOVEMBER</tt>, or <tt>DECEMBER</tt>, ignoring case, and
178: * considering them in the order given here, is recognized as
179: * specifying a month and is converted to a number (<tt>0</tt> to
180: * <tt>11</tt>). For example, <tt>aug, Sept, april</tt>, and
181: * <tt>NOV</tt> are recognized as months. So is <tt>Ma</tt>, which
182: * is recognized as <tt>MARCH</tt>, not <tt>MAY</tt>.
183: * <li>Any word that matches <tt>GMT, UT</tt>, or <tt>UTC</tt>, ignoring
184: * case, is treated as referring to UTC.
185: * <li>Any word that matches <tt>EST, CST, MST</tt>, or <tt>PST</tt>,
186: * ignoring case, is recognized as referring to the time zone in
187: * North America that is five, six, seven, or eight hours west of
188: * Greenwich, respectively. Any word that matches <tt>EDT, CDT,
189: * MDT</tt>, or <tt>PDT</tt>, ignoring case, is recognized as
190: * referring to the same time zone, respectively, during daylight
191: * saving time.</ul><p>
192: * Once the entire string s has been scanned, it is converted to a time
193: * result in one of two ways. If a time zone or time-zone offset has been
194: * recognized, then the year, month, day of month, hour, minute, and
195: * second are interpreted in UTC and then the time-zone offset is
196: * applied. Otherwise, the year, month, day of month, hour, minute, and
197: * second are interpreted in the local time zone.
198: *
199: * @param s a string to be parsed as a date.
200: * @return the distance in milliseconds from January 1, 1970, 00:00:00 GMT
201: * represented by the string argument. Note that this method will
202: * throw an <code>IllegalArgumentException</code> if the year
203: * indicated in <code>s</code> is less than 1583.
204: */
205: public static long parse(String s) {
206: return (new DateParser(s)).getTime();
207: }
208:
209: /** Return the year represented by this date. */
210: int getYear() {
211: return year;
212: }
213:
214: /** Return the month represented by this date. */
215: int getMonth() {
216: return month;
217: }
218:
219: /** Return the day of the month represented by this date. */
220: int getDay() {
221: return day;
222: }
223:
224: /** Return the hour represented by this date. */
225: int getHour() {
226: return hour;
227: }
228:
229: /** Return the minute represented by this date. */
230: int getMinute() {
231: return minute;
232: }
233:
234: /** Return the second represented by this date. */
235: int getSecond() {
236: return second;
237: }
238:
239: /** Return the number of milliseconds since 1/1/1970 represented by this date. */
240: long getTime() {
241: long julianDay = computeJulianDay(year, month, day);
242: long millis = julianDayToMillis(julianDay);
243:
244: int millisInDay = 0;
245: millisInDay += hour;
246: millisInDay *= 60;
247: millisInDay += minute; // now have minutes
248: millisInDay *= 60;
249: millisInDay += second; // now have seconds
250: millisInDay *= 1000;
251: millisInDay += milli; // now have millis
252:
253: return millis + millisInDay - tz_offset;
254: }
255:
256: private final long computeJulianDay(int year, int month, int day) {
257: int y;
258:
259: boolean isLeap = year % 4 == 0;
260: y = year - 1;
261: long julianDay = 365L * y + floorDivide(y, 4)
262: + (JAN_1_1_JULIAN_DAY - 3);
263:
264: isLeap = isLeap && ((year % 100 != 0) || (year % 400 == 0));
265: // Add 2 because Gregorian calendar starts 2 days after Julian calendar
266: julianDay += floorDivide(y, 400) - floorDivide(y, 100) + 2;
267: julianDay += isLeap ? LEAP_NUM_DAYS[month] : NUM_DAYS[month];
268: julianDay += day;
269:
270: return julianDay;
271: }
272:
273: /**
274: * Divide two long integers, returning the floor of the quotient.
275: * <p>
276: * Unlike the built-in division, this is mathematically well-behaved.
277: * E.g., <code>-1/4</code> => 0
278: * but <code>floorDivide(-1,4)</code> => -1.
279: * @param numerator the numerator
280: * @param denominator a divisor which must be > 0
281: * @return the floor of the quotient.
282: */
283: private static final long floorDivide(long numerator,
284: long denominator) {
285: // We do this computation in order to handle
286: // a numerator of Long.MIN_VALUE correctly
287: return (numerator >= 0) ? numerator / denominator
288: : ((numerator + 1) / denominator) - 1;
289: }
290:
291: // public String toString() {
292: // return "" + month + "/" + day + "/" + year
293: // + " " + hour + ":" + minute + ":" + second;
294: // }
295:
296: private long julianDayToMillis(long julian) {
297: return (julian - julianDayOffset) * millisPerDay;
298: }
299:
300: private void internalParse(String s) {
301: int year = -1;
302: int mon = -1;
303: int mday = -1;
304: int hour = -1;
305: int min = -1;
306: int sec = -1;
307: int c = -1;
308: int i = 0;
309: int n = -1;
310: int tzoffset = -1;
311: int prevc = 0;
312: syntax: {
313: if (s == null)
314: break syntax;
315: int limit = s.length();
316: while (i < limit) {
317: c = s.charAt(i);
318: i++;
319: if (c <= ' ' || c == ',')
320: continue;
321: if (c == '(') { // skip comments
322: int depth = 1;
323: while (i < limit) {
324: c = s.charAt(i);
325: i++;
326: if (c == '(')
327: depth++;
328: else if (c == ')')
329: if (--depth <= 0)
330: break;
331: }
332: continue;
333: }
334: if ('0' <= c && c <= '9') {
335: n = c - '0';
336: while (i < limit && '0' <= (c = s.charAt(i))
337: && c <= '9') {
338: n = n * 10 + c - '0';
339: i++;
340: }
341: if (prevc == '+' || prevc == '-' && year >= 0) {
342: // timezone offset
343: if (n < 24)
344: n = n * 60; // EG. "GMT-3"
345: else
346: n = n % 100 + n / 100 * 60; // eg "GMT-0430"
347: if (prevc == '+') // plus means east of GMT
348: n = -n;
349: if (tzoffset != 0 && tzoffset != -1)
350: break syntax;
351: tzoffset = n;
352: } else if (n >= 70)
353: if (year >= 0)
354: break syntax;
355: else if (c <= ' ' || c == ',' || c == '/'
356: || i >= limit)
357: // year = n < 1900 ? n : n - 1900;
358: year = n < 100 ? n + 1900 : n;
359: else
360: break syntax;
361: else if (c == ':')
362: if (hour < 0)
363: hour = (byte) n;
364: else if (min < 0)
365: min = (byte) n;
366: else
367: break syntax;
368: else if (c == '/')
369: if (mon < 0)
370: mon = (byte) (n - 1);
371: else if (mday < 0)
372: mday = (byte) n;
373: else
374: break syntax;
375: else if (i < limit && c != ',' && c > ' '
376: && c != '-')
377: break syntax;
378: else if (hour >= 0 && min < 0)
379: min = (byte) n;
380: else if (min >= 0 && sec < 0)
381: sec = (byte) n;
382: else if (mday < 0)
383: mday = (byte) n;
384: else
385: break syntax;
386: prevc = 0;
387: } else if (c == '/' || c == ':' || c == '+' || c == '-')
388: prevc = c;
389: else {
390: int st = i - 1;
391: while (i < limit) {
392: c = s.charAt(i);
393: if (!('A' <= c && c <= 'Z' || 'a' <= c
394: && c <= 'z'))
395: break;
396: i++;
397: }
398: if (i <= st + 1)
399: break syntax;
400: int k;
401: for (k = wtb.length; --k >= 0;)
402: if (wtb[k]
403: .regionMatches(true, 0, s, st, i - st)) {
404: int action = ttb[k];
405: if (action != 0) {
406: if (action == 1) { // pm
407: if (hour > 12 || hour < 1)
408: break syntax;
409: else if (hour < 12)
410: hour += 12;
411: } else if (action == 14) { // am
412: if (hour > 12 || hour < 1)
413: break syntax;
414: else if (hour == 12)
415: hour = 0;
416: } else if (action <= 13) { // month!
417: if (mon < 0)
418: mon = (byte) (action - 2);
419: else
420: break syntax;
421: } else {
422: tzoffset = action - 10000;
423: }
424: }
425: break;
426: }
427: if (k < 0)
428: break syntax;
429: prevc = 0;
430: }
431: }
432: if (year < 1583 || mon < 0 || mday < 0)
433: break syntax;
434: if (sec < 0)
435: sec = 0;
436: if (min < 0)
437: min = 0;
438: if (hour < 0)
439: hour = 0;
440:
441: this .year = year;
442: month = mon;
443: day = mday;
444: this .hour = hour;
445: minute = min;
446: second = sec;
447: milli = 0;
448: return;
449: }
450: // syntax error
451: throw new IllegalArgumentException();
452: }
453:
454: private static Hashtable timezones;
455:
456: private int[] days_in_month = { 31, 28, 31, 30, 31, 30, 31, 31, 30,
457: 31, 30, 31 };
458: private String[] month_shorts = { "Jan", "Feb", "Mar", "Apr",
459: "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
460:
461: private static long julianDayOffset = 2440588;
462: private static int millisPerHour = 60 * 60 * 1000;
463: private static int millisPerDay = 24 * millisPerHour;
464: private static final int JAN_1_1_JULIAN_DAY = 1721426; // January 1, year 1 (Gregorian)
465:
466: private final static String wtb[] = { "am", "pm", "monday",
467: "tuesday", "wednesday", "thursday", "friday", "saturday",
468: "sunday", "january", "february", "march", "april", "may",
469: "june", "july", "august", "september", "october",
470: "november", "december", "gmt", "ut", "utc", "est", "edt",
471: "cst", "cdt", "mst", "mdt", "pst", "pdt"
472: // this time zone table needs to be expanded
473: };
474:
475: private final static int ttb[] = { 14, 1, 0, 0, 0, 0, 0, 0, 0, 2,
476: 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 10000 + 0,
477: 10000 + 0,
478: 10000 + 0, // GMT/UT/UTC
479: 10000 + 5 * 60,
480: 10000 + 4 * 60, // EST/EDT
481: 10000 + 6 * 60, 10000 + 5 * 60, 10000 + 7 * 60,
482: 10000 + 6 * 60, 10000 + 8 * 60, 10000 + 7 * 60 };
483:
484: private static final int NUM_DAYS[] = { 0, 31, 59, 90, 120, 151,
485: 181, 212, 243, 273, 304, 334 };
486: private static final int LEAP_NUM_DAYS[] = { 0, 31, 60, 91, 121,
487: 152, 182, 213, 244, 274, 305, 335 };
488:
489: static {
490: timezones = new Hashtable();
491: timezones.put("GMT", new Integer(0 * millisPerHour));
492: timezones.put("UT", new Integer(0 * millisPerHour));
493: timezones.put("UTC", new Integer(0 * millisPerHour));
494: timezones.put("PST", new Integer(-8 * millisPerHour));
495: timezones.put("PDT", new Integer(-7 * millisPerHour));
496: timezones.put("JST", new Integer(9 * millisPerHour));
497:
498: tz_offset = ((Integer) timezones.get("GMT")).intValue();
499: }
500: }
|