001: /*
002: * Copyright 2004 Sun Microsystems, Inc.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: *
016: */
017: package com.sun.syndication.io.impl;
018:
019: import java.text.DateFormat;
020: import java.text.SimpleDateFormat;
021: import java.text.ParsePosition;
022: import java.util.Date;
023: import java.util.TimeZone;
024: import java.util.Locale;
025:
026: /**
027: * A helper class that parses Dates out of Strings with date time in RFC822 and W3CDateTime
028: * formats plus the variants Atom (0.3) and RSS (0.9, 0.91, 0.92, 0.93, 0.94, 1.0 and 2.0)
029: * specificators added to those formats.
030: * <p/>
031: * It uses the JDK java.text.SimpleDateFormat class attemtping the parse using a mask for
032: * each one of the possible formats.
033: * <p/>
034: *
035: * @author Alejandro Abdelnur
036: *
037: */
038: public class DateParser {
039:
040: private static String[] ADDITIONAL_MASKS;
041:
042: static {
043: ADDITIONAL_MASKS = PropertiesLoader.getPropertiesLoader()
044: .getTokenizedProperty("datetime.extra.masks", "|");
045: }
046:
047: // order is like this because the SimpleDateFormat.parse does not fail with exception
048: // if it can parse a valid date out of a substring of the full string given the mask
049: // so we have to check the most complete format first, then it fails with exception
050: private static final String[] RFC822_MASKS = {
051: "EEE, dd MMM yy HH:mm:ss z", "EEE, dd MMM yy HH:mm z",
052: "dd MMM yy HH:mm:ss z", "dd MMM yy HH:mm z" };
053:
054: // order is like this because the SimpleDateFormat.parse does not fail with exception
055: // if it can parse a valid date out of a substring of the full string given the mask
056: // so we have to check the most complete format first, then it fails with exception
057: private static final String[] W3CDATETIME_MASKS = {
058: "yyyy-MM-dd'T'HH:mm:ss.SSSz", "yyyy-MM-dd't'HH:mm:ss.SSSz",
059: "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
060: "yyyy-MM-dd't'HH:mm:ss.SSS'z'",
061: "yyyy-MM-dd'T'HH:mm:ssz",
062: "yyyy-MM-dd't'HH:mm:ssz",
063: "yyyy-MM-dd'T'HH:mm:ss'Z'",
064: "yyyy-MM-dd't'HH:mm:ss'z'",
065: "yyyy-MM-dd'T'HH:mmz", // together with logic in the parseW3CDateTime they
066: "yyyy-MM'T'HH:mmz", // handle W3C dates without time forcing them to be GMT
067: "yyyy'T'HH:mmz", "yyyy-MM-dd't'HH:mmz",
068: "yyyy-MM-dd'T'HH:mm'Z'", "yyyy-MM-dd't'HH:mm'z'",
069: "yyyy-MM-dd", "yyyy-MM", "yyyy" };
070:
071: /**
072: * The masks used to validate and parse the input to this Atom date.
073: * These are a lot more forgiving than what the Atom spec allows.
074: * The forms that are invalid according to the spec are indicated.
075: */
076: private static final String[] masks = {
077: "yyyy-MM-dd'T'HH:mm:ss.SSSz",
078: "yyyy-MM-dd't'HH:mm:ss.SSSz", // invalid
079: "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
080: "yyyy-MM-dd't'HH:mm:ss.SSS'z'", // invalid
081: "yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", // invalid
082: "yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", // invalid
083: "yyyy-MM-dd'T'HH:mmz", // invalid
084: "yyyy-MM-dd't'HH:mmz", // invalid
085: "yyyy-MM-dd'T'HH:mm'Z'", // invalid
086: "yyyy-MM-dd't'HH:mm'z'", // invalid
087: "yyyy-MM-dd", "yyyy-MM", "yyyy" };
088:
089: /**
090: * Private constructor to avoid DateParser instances creation.
091: */
092: private DateParser() {
093: }
094:
095: /**
096: * Parses a Date out of a string using an array of masks.
097: * <p/>
098: * It uses the masks in order until one of them succedes or all fail.
099: * <p/>
100: *
101: * @param masks array of masks to use for parsing the string
102: * @param sDate string to parse for a date.
103: * @return the Date represented by the given string using one of the given masks.
104: * It returns <b>null</b> if it was not possible to parse the the string with any of the masks.
105: *
106: */
107: private static Date parseUsingMask(String[] masks, String sDate) {
108: sDate = (sDate != null) ? sDate.trim() : null;
109: ParsePosition pp = null;
110: Date d = null;
111: for (int i = 0; d == null && i < masks.length; i++) {
112: DateFormat df = new SimpleDateFormat(masks[i], Locale.US);
113: //df.setLenient(false);
114: df.setLenient(true);
115: try {
116: pp = new ParsePosition(0);
117: d = df.parse(sDate, pp);
118: if (pp.getIndex() != sDate.length()) {
119: d = null;
120: }
121: //System.out.println("pp["+pp.getIndex()+"] s["+sDate+" m["+masks[i]+"] d["+d+"]");
122: } catch (Exception ex1) {
123: //System.out.println("s: "+sDate+" m: "+masks[i]+" d: "+null);
124: }
125: }
126: return d;
127: }
128:
129: /**
130: * Parses a Date out of a String with a date in RFC822 format.
131: * <p/>
132: * It parsers the following formats:
133: * <ul>
134: * <li>"EEE, dd MMM yyyy HH:mm:ss z"</li>
135: * <li>"EEE, dd MMM yyyy HH:mm z"</li>
136: * <li>"EEE, dd MMM yy HH:mm:ss z"</li>
137: * <li>"EEE, dd MMM yy HH:mm z"</li>
138: * <li>"dd MMM yyyy HH:mm:ss z"</li>
139: * <li>"dd MMM yyyy HH:mm z"</li>
140: * <li>"dd MMM yy HH:mm:ss z"</li>
141: * <li>"dd MMM yy HH:mm z"</li>
142: * </ul>
143: * <p/>
144: * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element.
145: * <p/>
146: * @param sDate string to parse for a date.
147: * @return the Date represented by the given RFC822 string.
148: * It returns <b>null</b> if it was not possible to parse the given string into a Date.
149: *
150: */
151: public static Date parseRFC822(String sDate) {
152: int utIndex = sDate.indexOf(" UT");
153: if (utIndex > -1) {
154: String pre = sDate.substring(0, utIndex);
155: String post = sDate.substring(utIndex + 3);
156: sDate = pre + " GMT" + post;
157: }
158: return parseUsingMask(RFC822_MASKS, sDate);
159: }
160:
161: /**
162: * Parses a Date out of a String with a date in W3C date-time format.
163: * <p/>
164: * It parsers the following formats:
165: * <ul>
166: * <li>"yyyy-MM-dd'T'HH:mm:ssz"</li>
167: * <li>"yyyy-MM-dd'T'HH:mmz"</li>
168: * <li>"yyyy-MM-dd"</li>
169: * <li>"yyyy-MM"</li>
170: * <li>"yyyy"</li>
171: * </ul>
172: * <p/>
173: * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element.
174: * <p/>
175: * @param sDate string to parse for a date.
176: * @return the Date represented by the given W3C date-time string.
177: * It returns <b>null</b> if it was not possible to parse the given string into a Date.
178: *
179: */
180: public static Date parseW3CDateTime(String sDate) {
181: // if sDate has time on it, it injects 'GTM' before de TZ displacement to
182: // allow the SimpleDateFormat parser to parse it properly
183: int tIndex = sDate.indexOf("T");
184: if (tIndex > -1) {
185: if (sDate.endsWith("Z")) {
186: sDate = sDate.substring(0, sDate.length() - 1)
187: + "+00:00";
188: }
189: int tzdIndex = sDate.indexOf("+", tIndex);
190: if (tzdIndex == -1) {
191: tzdIndex = sDate.indexOf("-", tIndex);
192: }
193: if (tzdIndex > -1) {
194: String pre = sDate.substring(0, tzdIndex);
195: int secFraction = pre.indexOf(",");
196: if (secFraction > -1) {
197: pre = pre.substring(0, secFraction);
198: }
199: String post = sDate.substring(tzdIndex);
200: sDate = pre + "GMT" + post;
201: }
202: } else {
203: sDate += "T00:00GMT";
204: }
205: return parseUsingMask(W3CDATETIME_MASKS, sDate);
206: }
207:
208: /**
209: * Parses a Date out of a String with a date in W3C date-time format or
210: * in a RFC822 format.
211: * <p>
212: * @param sDate string to parse for a date.
213: * @return the Date represented by the given W3C date-time string.
214: * It returns <b>null</b> if it was not possible to parse the given string into a Date.
215: *
216: * */
217: public static Date parseDate(String sDate) {
218: Date d = parseW3CDateTime(sDate);
219: if (d == null) {
220: d = parseRFC822(sDate);
221: if (d == null && ADDITIONAL_MASKS.length > 0) {
222: d = parseUsingMask(ADDITIONAL_MASKS, sDate);
223: }
224: }
225: return d;
226: }
227:
228: /**
229: * create a RFC822 representation of a date.
230: * <p/>
231: * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element.
232: * <p/>
233: * @param date Date to parse
234: * @return the RFC822 represented by the given Date
235: * It returns <b>null</b> if it was not possible to parse the date.
236: *
237: */
238: public static String formatRFC822(Date date) {
239: SimpleDateFormat dateFormater = new SimpleDateFormat(
240: "EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US);
241: dateFormater.setTimeZone(TimeZone.getTimeZone("GMT"));
242: return dateFormater.format(date);
243: }
244:
245: /**
246: * create a W3C Date Time representation of a date.
247: * <p/>
248: * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element.
249: * <p/>
250: * @param date Date to parse
251: * @return the W3C Date Time represented by the given Date
252: * It returns <b>null</b> if it was not possible to parse the date.
253: *
254: */
255: public static String formatW3CDateTime(Date date) {
256: SimpleDateFormat dateFormater = new SimpleDateFormat(
257: "yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US);
258: dateFormater.setTimeZone(TimeZone.getTimeZone("GMT"));
259: return dateFormater.format(date);
260: }
261:
262: }
|