001: /**
002: * RSS framework and reader
003: * Copyright (C) 2004 Christian Robert
004: *
005: * This library is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU Lesser General Public
007: * License as published by the Free Software Foundation; either
008: * version 2.1 of the License, or (at your option) any later version.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013: * Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General Public
016: * License along with this library; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018: */package org.jperdian.rss2;
019:
020: import java.net.MalformedURLException;
021: import java.net.URL;
022: import java.text.DateFormat;
023: import java.text.ParseException;
024: import java.text.SimpleDateFormat;
025: import java.util.Date;
026: import java.util.Locale;
027:
028: import org.w3c.dom.Element;
029: import org.w3c.dom.NamedNodeMap;
030: import org.w3c.dom.Node;
031: import org.w3c.dom.NodeList;
032:
033: /**
034: * Some static methods for easy access to parse additions
035: *
036: * @author Christian Robert
037: */
038:
039: public class RssParseHelper {
040:
041: private static final DateFormat DF = new SimpleDateFormat(
042: "EEE, dd MMM yyyy HH:mm:ss Z", Locale.US);
043:
044: /**
045: * Parses the given source <code>String</code> into a <code>URL</code>. If
046: * no value has been entered <code>null</code> will be returned
047: * @exception RssParseException
048: * thrown if the given URL is not valid
049: */
050: public static URL parseURL(String sourceString)
051: throws RssParseException {
052: if (sourceString == null || sourceString.length() < 1) {
053: return null;
054: } else {
055: try {
056: return new URL(sourceString);
057: } catch (MalformedURLException e) {
058: throw new RssParseException("Illegal URL found: "
059: + sourceString);
060: }
061: }
062: }
063:
064: /**
065: * Parse the given node into the resulting content
066: * @param info
067: * the currently available <code>RuntimeInfo</code>
068: * @param node
069: * the node to be parsed
070: * @return
071: * the content that has been generated during the parse process
072: */
073: private static String parseContent(Node node) {
074:
075: StringBuffer buffer = new StringBuffer();
076: short nodeType = node.getNodeType();
077:
078: switch (nodeType) {
079:
080: case Node.CDATA_SECTION_NODE:
081: case Node.TEXT_NODE:
082: String value = node.getNodeValue();
083: if (value.length() > 0) {
084: buffer.append(value);
085: }
086: break;
087:
088: case Node.ELEMENT_NODE:
089: Element element = (Element) node;
090:
091: buffer.append("<").append(element.getNodeName());
092:
093: // Add all attributes
094: NamedNodeMap attributes = element.getAttributes();
095: for (int i = 0; i < attributes.getLength(); i++) {
096: buffer.append(" ").append(
097: attributes.item(i).getNodeName()).append("=\"");
098: buffer.append(attributes.item(i).getNodeValue())
099: .append("\"");
100: }
101:
102: if (element.hasChildNodes()) {
103: buffer.append(">");
104: buffer.append(RssParseHelper
105: .parseContentChildren(element));
106: buffer.append("</").append(element.getNodeName())
107: .append(">");
108: } else {
109:
110: /*
111: * Several browsers have two problems in interpreting correct XHTML:
112: * <br></br> is interpreted as a double <br> and two breaks are inserted
113: * where only one should be.
114: * On the other hand a <textarea /> is interpretated as only the opening
115: * tag <textarea> and everything standing after <textarea /> will be
116: * interpreted as if it was inside the textarea.
117: * So it has to be checked wheter the tag itself is an empty-tag
118: */
119:
120: String nodeName = element.getNodeName();
121: if (nodeName.equalsIgnoreCase("br")
122: || nodeName.equalsIgnoreCase("hr")
123: || nodeName.equalsIgnoreCase("input")
124: || nodeName.equalsIgnoreCase("meta")
125: || nodeName.equalsIgnoreCase("frame")) {
126: buffer.append(">");
127: } else {
128: buffer.append("></").append(element.getNodeName())
129: .append(">");
130: }
131: }
132: }
133: return buffer.toString();
134: }
135:
136: /**
137: * Parse the content of the children from the specified node and return
138: * it as String.
139: * @param node the content which children should be parsed
140: * @return the parse result content
141: */
142: public static String parseContentChildren(Node node) {
143: StringBuffer result = new StringBuffer();
144: if (node.hasChildNodes()) {
145: NodeList children = node.getChildNodes();
146: for (int i = 0; i < children.getLength(); i++) {
147: result.append(RssParseHelper.parseContent(children
148: .item(i)));
149: }
150: }
151: return result.toString().trim();
152: }
153:
154: /**
155: * Parses the content of the given element and formats it as date, or
156: * returns <code>null</code> if no content could be read
157: */
158: public static Date parseContentDate(Element node)
159: throws RssParseException {
160: String content = RssParseHelper.parseContentChildren(node);
161: if (content.length() < 1) {
162: return null;
163: } else {
164: try {
165: return DF.parse(content);
166: } catch (ParseException e) {
167: // throw new IllegalArgumentException("Illegal date: " + content);
168: return null;
169: }
170: }
171: }
172:
173: /**
174: * Parses the content of the given element and formats it as number, or
175: * returns <code>0</code> if no content could be read
176: */
177: public static int parseContentInt(Element node)
178: throws RssParseException {
179: return RssParseHelper.parseContentInt(node, 0);
180: }
181:
182: /**
183: * Parses the content of the given element and formats it as number, or
184: * returns the default value if no content could be read
185: */
186: public static int parseContentInt(Element node, int defaultValue)
187: throws RssParseException {
188: String content = RssParseHelper.parseContentChildren(node);
189: if (content.length() < 1) {
190: return defaultValue;
191: } else {
192: try {
193: return Integer.parseInt(content);
194: } catch (NumberFormatException e) {
195: throw new RssParseException(
196: "Illegal integer value found: " + content);
197: }
198: }
199: }
200:
201: /**
202: * Parses the content of the given element and formats it as URL
203: */
204: public static URL parseContentURL(Element node)
205: throws RssParseException {
206: String content = RssParseHelper.parseContentChildren(node);
207: return RssParseHelper.parseURL(content);
208: }
209:
210: }
|