001: //
002: // Informa -- RSS Library for Java
003: // Copyright (c) 2002 by Niko Schmuck
004: //
005: // Niko Schmuck
006: // http://sourceforge.net/projects/informa
007: // mailto:niko_schmuck@users.sourceforge.net
008: //
009: // This library is free software.
010: //
011: // You may redistribute it and/or modify it under the terms of the GNU
012: // Lesser General Public License as published by the Free Software Foundation.
013: //
014: // Version 2.1 of the license should be included with this distribution in
015: // the file LICENSE. If the license is not included with this distribution,
016: // you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
017: // or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
018: // MA 02139 USA.
019: //
020: // This library is distributed in the hope that it will be useful,
021: // but WITHOUT ANY WARRANTY; without even the implied waranty of
022: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
023: // Lesser General Public License for more details.
024: //
025:
026: // $Id: ParserUtils.java,v 1.18 2006/12/04 23:43:27 italobb Exp $
027:
028: package de.nava.informa.utils;
029:
030: import java.net.URL;
031: import java.text.SimpleDateFormat;
032: import java.util.Calendar;
033: import java.util.Date;
034: import java.util.HashMap;
035: import java.util.List;
036: import java.util.Locale;
037: import java.util.Map;
038: import java.util.TimeZone;
039:
040: import org.apache.commons.logging.Log;
041: import org.apache.commons.logging.LogFactory;
042: import org.jdom.Element;
043: import org.jdom.Namespace;
044:
045: /**
046: * Utility class providing convenience methods to (XML) parsing mechanisms.
047: *
048: * @author Niko Schmuck (niko@nava.de)
049: */
050: public final class ParserUtils {
051:
052: private static Log logger = LogFactory.getLog(ParserUtils.class);
053:
054: private ParserUtils() {
055: }
056:
057: public static URL getURL(String toURL) {
058: URL result = null;
059: try {
060: if ((toURL != null) && (toURL.trim().length() > 0))
061: result = new URL(toURL);
062: } catch (java.net.MalformedURLException e) {
063: logger.warn("Invalid URL " + toURL + " given.");
064: }
065: return result;
066: }
067:
068: public static Namespace getDefaultNS(Element element) {
069: return getNamespace(element, "");
070: }
071:
072: public static Namespace getNamespace(Element element, String prefix) {
073: // Namespace ns = null;
074: // Iterator it = element.getAdditionalNamespaces().iterator();
075: // while (it.hasNext()) {
076: // Namespace curNS = (Namespace) it.next();
077: // if (curNS.getPrefix().equals(prefix)) {
078: // ns = curNS;
079: // break;
080: // }
081: // }
082: Namespace ns = (prefix == null) ? element.getNamespace("")
083: : element.getNamespace(prefix);
084: return ns;
085: }
086:
087: private static SimpleDateFormat[] dateFormats = null;
088:
089: static {
090: final String[] possibleDateFormats = {
091: "EEE, dd MMM yyyy HH:mm:ss z", //RFC_822
092: "EEE, dd MMM yyyy HH:mm zzzz",
093: "yyyy-MM-dd'T'HH:mm:ssZ",
094: "yyyy-MM-dd'T'HH:mm:ss.SSSzzzz", //Blogger Atom feed has millisecs also
095: "yyyy-MM-dd'T'HH:mm:sszzzz",
096: "yyyy-MM-dd'T'HH:mm:ss z",
097: "yyyy-MM-dd'T'HH:mm:ssz", //ISO_8601
098: "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd'T'HHmmss.SSSz",
099: "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd" };
100:
101: dateFormats = new SimpleDateFormat[possibleDateFormats.length];
102: TimeZone gmtTZ = TimeZone.getTimeZone("GMT");
103: for (int i = 0; i < possibleDateFormats.length; i++) {
104: dateFormats[i] = new SimpleDateFormat(
105: possibleDateFormats[i], Locale.ENGLISH);
106: dateFormats[i].setTimeZone(gmtTZ);
107: }
108:
109: }
110:
111: // Mon, 07 Oct 2002 03:16:15 GMT
112: private static SimpleDateFormat dfA = new SimpleDateFormat(
113: "EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH);
114:
115: // 2002-09-19T02:51:16+0200
116: private static SimpleDateFormat dfB = new SimpleDateFormat(
117: "yyyy-MM-dd'T'HH:mm:ssZ");
118:
119: // 2002-09-19T02:51:16
120: private static SimpleDateFormat dfC = new SimpleDateFormat(
121: "yyyy-MM-dd'T'HH:mm:ss");
122:
123: // 2002-09-19
124: private static SimpleDateFormat dfD = new SimpleDateFormat(
125: "yyyy-MM-dd");
126:
127: public static Date getDate(String strdate) {
128: Date result = null;
129: strdate = strdate.trim();
130: if (strdate.length() > 10) {
131:
132: // TODO deal with +4:00 (no zero before hour)
133: if ((strdate.substring(strdate.length() - 5).indexOf("+") == 0 || strdate
134: .substring(strdate.length() - 5).indexOf("-") == 0)
135: && strdate.substring(strdate.length() - 5).indexOf(
136: ":") == 2) {
137:
138: String sign = strdate.substring(strdate.length() - 5,
139: strdate.length() - 4);
140:
141: strdate = strdate.substring(0, strdate.length() - 5)
142: + sign + "0"
143: + strdate.substring(strdate.length() - 4);
144: // logger.debug("CASE1 : new date " + strdate + " ? "
145: // + strdate.substring(0, strdate.length() - 5));
146:
147: }
148:
149: String dateEnd = strdate.substring(strdate.length() - 6);
150:
151: // try to deal with -05:00 or +02:00 at end of date
152: // replace with -0500 or +0200
153: if ((dateEnd.indexOf("-") == 0 || dateEnd.indexOf("+") == 0)
154: && dateEnd.indexOf(":") == 3) {
155: // TODO deal with GMT-00:03
156: if ("GMT".equals(strdate.substring(
157: strdate.length() - 9, strdate.length() - 6))) {
158: logger
159: .debug("General time zone with offset, no change ");
160: } else {
161: // continue treatment
162: String oldDate = strdate;
163: String newEnd = dateEnd.substring(0, 3)
164: + dateEnd.substring(4);
165: strdate = oldDate
166: .substring(0, oldDate.length() - 6)
167: + newEnd;
168: // logger.debug("!!modifying string ->"+strdate);
169: }
170: }
171: }
172: int i = 0;
173: while (i < dateFormats.length) {
174: try {
175: result = dateFormats[i].parse(strdate);
176: // logger.debug("******Parsing Success "+strdate+"->"+result+" with
177: // "+dateFormats[i].toPattern());
178: break;
179: } catch (java.text.ParseException eA) {
180: logger.debug("parsing " + strdate + " ["
181: + dateFormats[i].toPattern()
182: + "] without success, trying again.");
183: i++;
184: }
185: }
186:
187: return result;
188: }
189:
190: /**
191: * Tries different date formats to parse against the given string
192: * representation to retrieve a valid Date object.
193: */
194: public static Date getDateOLD(String strdate) {
195: Date result = null;
196:
197: try {
198: result = dfA.parse(strdate);
199: } catch (java.text.ParseException eA) {
200: logger.warn("Error parsing date (A): " + eA.getMessage());
201: try {
202: result = dfB.parse(strdate);
203: } catch (java.text.ParseException eB) {
204: logger.warn("Error parsing date (B): "
205: + eB.getMessage());
206: try {
207: result = dfC.parse(strdate);
208: // try to retrieve the timezone anyway
209: result = extractTimeZone(strdate, result);
210: } catch (java.text.ParseException eC) {
211: logger.warn("Error parsing date (C): "
212: + eC.getMessage());
213: try {
214: result = dfD.parse(strdate);
215: } catch (java.text.ParseException eD) {
216: logger.warn("Error parsing date (D): "
217: + eD.getMessage());
218: eD.printStackTrace();
219: }
220: }
221: }
222: }
223: if (logger.isDebugEnabled()) {
224: logger.debug("Parsing date '" + strdate + "' resulted in: "
225: + result);
226: }
227: if (result == null) {
228: logger.warn("No appropiate date could be extracted from "
229: + strdate);
230:
231: }
232: return result;
233: }
234:
235: private static Date extractTimeZone(String strdate, Date thedate) {
236: // try to extract -06:00
237: String tzSign = strdate.substring(strdate.length() - 6, strdate
238: .length() - 5);
239: String tzHour = strdate.substring(strdate.length() - 5, strdate
240: .length() - 3);
241: String tzMin = strdate.substring(strdate.length() - 2);
242: if (tzSign.equals("-") || tzSign.equals("+")) {
243: int h = Integer.parseInt(tzHour);
244: int m = Integer.parseInt(tzMin);
245: // NOTE: this is really plus, since perspective is from GMT
246: if (tzSign.equals("+")) {
247: h = -1 * h;
248: m = -1 * m;
249: }
250: Calendar cal = Calendar.getInstance();
251: cal.setTime(thedate);
252: cal.add(Calendar.HOUR_OF_DAY, h);
253: cal.add(Calendar.MINUTE, m);
254: // calculate according the used timezone
255: cal.add(Calendar.MILLISECOND, localTimeDiff(cal
256: .getTimeZone(), thedate));
257: thedate = cal.getTime();
258: }
259: return thedate;
260: }
261:
262: private static int localTimeDiff(TimeZone tz, Date date) {
263: if (tz.inDaylightTime(date)) {
264: int dstSavings = 0;
265: if (tz.useDaylightTime()) {
266: dstSavings = 3600000; // shortcut, JDK 1.4 allows cleaner impl
267: }
268: return tz.getRawOffset() + dstSavings;
269: }
270: return tz.getRawOffset();
271: }
272:
273: public static String formatDate(Date aDate) {
274: return dfA.format(aDate);
275: }
276:
277: public static String decodeBase64(String s) {
278: //use private class
279: return Base64Decoder.decode(s);
280: }
281:
282: /**
283: * Unescapes the string by replacing some XML entities into plain symbols.
284: *
285: * @param value value to unescape.
286: *
287: * @return unescaped content.
288: */
289: public static String unEscape(String value) {
290: value = value.replaceAll("<", "<");
291: value = value.replaceAll(">", ">");
292: value = value.replaceAll("&", "&");
293: value = value.replaceAll(""", "\"");
294: value = value.replaceAll("'", "'");
295: return value;
296: }
297:
298: /**
299: * Escapes the string by replacing reserved symbols with their XML entities.
300: *
301: * @param value value to escape.
302: *
303: * @return escaped result.
304: */
305: public static String escape(String value) {
306: value = value.replaceAll("<", "<");
307: value = value.replaceAll(">", ">");
308: value = value.replaceAll("&", "&");
309: value = value.replaceAll("\"", """);
310: value = value.replaceAll("'", "'");
311: return value;
312: }
313:
314: /**
315: * Converts names of child-tags mentioned in <code>childrenNames</code> list
316: * to that given case.
317: *
318: * @param root root element.
319: * @param childrenNames names of child tags to convert.
320: */
321: public static void matchCaseOfChildren(Element root,
322: String[] childrenNames) {
323: if (root == null || childrenNames.length == 0)
324: return;
325:
326: // Prepare list of names
327: int namesCount = childrenNames.length;
328: Map<String, String> names = new HashMap<String, String>(
329: namesCount);
330: for (int i = 0; i < namesCount; i++) {
331: String childName = childrenNames[i];
332: if (childName != null) {
333: String lower = childName.toLowerCase();
334: if (!names.containsKey(lower))
335: names.put(lower, childName);
336: }
337: }
338:
339: // Walk through the children elements
340: List elements = root.getChildren();
341: for (int i = 0; i < elements.size(); i++) {
342: Element child = (Element) elements.get(i);
343: String childName = child.getName().toLowerCase();
344: if (names.containsKey(childName))
345: child.setName(names.get(childName));
346: }
347: }
348:
349: /**
350: * Converts names of child-tags mentioned in <code>childName</code> list
351: * to that given case.
352: *
353: * @param root root element.
354: * @param childName name of child tags to convert.
355: */
356: public static void matchCaseOfChildren(Element root,
357: String childName) {
358: if (root == null || childName == null)
359: return;
360:
361: // Walk through the children elements
362: List elements = root.getChildren();
363: for (int i = 0; i < elements.size(); i++) {
364: Element child = (Element) elements.get(i);
365: String name = child.getName().toLowerCase();
366: if (name.equalsIgnoreCase(childName))
367: child.setName(childName);
368: }
369: }
370: }
|