001: /*
002: * Copyright 2004 Sun Microsystems, Inc.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: *
016: */
017: package com.sun.syndication.io.impl;
018:
019: import com.sun.syndication.feed.WireFeed;
020: import com.sun.syndication.feed.rss.Channel;
021: import com.sun.syndication.feed.rss.Image;
022: import com.sun.syndication.feed.rss.Item;
023: import com.sun.syndication.feed.rss.TextInput;
024: import com.sun.syndication.io.FeedException;
025: import org.jdom.Document;
026: import org.jdom.Element;
027: import org.jdom.Namespace;
028:
029: import java.util.ArrayList;
030: import java.util.Collection;
031: import java.util.Iterator;
032: import java.util.List;
033:
034: /**
035: */
036: public class RSS090Parser extends BaseWireFeedParser {
037:
038: private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
039: private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/";
040: private static final String CONTENT_URI = "http://purl.org/rss/1.0/modules/content/";
041:
042: private static final Namespace RDF_NS = Namespace
043: .getNamespace(RDF_URI);
044: private static final Namespace RSS_NS = Namespace
045: .getNamespace(RSS_URI);
046: private static final Namespace CONTENT_NS = Namespace
047: .getNamespace(CONTENT_URI);
048:
049: public RSS090Parser() {
050: this ("rss_0.9");
051: }
052:
053: protected RSS090Parser(String type) {
054: super (type);
055: }
056:
057: public boolean isMyType(Document document) {
058: boolean ok = false;
059:
060: Element rssRoot = document.getRootElement();
061: Namespace defaultNS = rssRoot.getNamespace();
062: List additionalNSs = rssRoot.getAdditionalNamespaces();
063:
064: ok = defaultNS != null && defaultNS.equals(getRDFNamespace());
065: if (ok) {
066: if (additionalNSs == null) {
067: ok = false;
068: } else {
069: ok = false;
070: for (int i = 0; !ok && i < additionalNSs.size(); i++) {
071: ok = getRSSNamespace().equals(additionalNSs.get(i));
072: }
073: }
074: }
075: return ok;
076: }
077:
078: public WireFeed parse(Document document, boolean validate)
079: throws IllegalArgumentException, FeedException {
080: if (validate) {
081: validateFeed(document);
082: }
083: Element rssRoot = document.getRootElement();
084: return parseChannel(rssRoot);
085: }
086:
087: protected void validateFeed(Document document) throws FeedException {
088: // TBD
089: // here we have to validate the Feed against a schema or whatever
090: // not sure how to do it
091: // one posibility would be to inject our own schema for the feed (they don't exist out there)
092: // to the document, produce an ouput and attempt to parse it again with validation turned on.
093: // otherwise will have to check the document elements by hand.
094: }
095:
096: /**
097: * Returns the namespace used by RSS elements in document of the RSS version the parser supports.
098: * <P>
099: * This implementation returns the EMTPY namespace.
100: * <p>
101: *
102: * @return returns the EMPTY namespace.
103: */
104: protected Namespace getRSSNamespace() {
105: return RSS_NS;
106: }
107:
108: /**
109: * Returns the namespace used by RDF elements in document of the RSS version the parser supports.
110: * <P>
111: * This implementation returns the EMTPY namespace.
112: * <p>
113: *
114: * @return returns the EMPTY namespace.
115: */
116: protected Namespace getRDFNamespace() {
117: return RDF_NS;
118: }
119:
120: /**
121: * Returns the namespace used by Content Module elements in document.
122: * <P>
123: * This implementation returns the EMTPY namespace.
124: * <p>
125: *
126: * @return returns the EMPTY namespace.
127: */
128: protected Namespace getContentNamespace() {
129: return CONTENT_NS;
130: }
131:
132: /**
133: * Parses the root element of an RSS document into a Channel bean.
134: * <p/>
135: * It reads title, link and description and delegates to parseImage, parseItems
136: * and parseTextInput. This delegation always passes the root element of the RSS
137: * document as different RSS version may have this information in different parts
138: * of the XML tree (no assumptions made thanks to the specs variaty)
139: * <p/>
140: *
141: * @param rssRoot the root element of the RSS document to parse.
142: * @return the parsed Channel bean.
143: */
144: protected WireFeed parseChannel(Element rssRoot) {
145: Element eChannel = rssRoot.getChild("channel",
146: getRSSNamespace());
147:
148: Channel channel = new Channel(getType());
149:
150: Element e = eChannel.getChild("title", getRSSNamespace());
151: if (e != null) {
152: channel.setTitle(e.getText());
153: }
154: e = eChannel.getChild("link", getRSSNamespace());
155: if (e != null) {
156: channel.setLink(e.getText());
157: }
158: e = eChannel.getChild("description", getRSSNamespace());
159: if (e != null) {
160: channel.setDescription(e.getText());
161: }
162:
163: channel.setImage(parseImage(rssRoot));
164:
165: channel.setTextInput(parseTextInput(rssRoot));
166:
167: // Unfortunately Microsoft's SSE extension has a special case of
168: // effectively putting the sharing channel module inside the RSS tag
169: // and not inside the channel itself. So we also need to look for
170: // channel modules from the root RSS element.
171: List allFeedModules = new ArrayList();
172: List rootModules = parseFeedModules(rssRoot);
173: List channelModules = parseFeedModules(eChannel);
174: if (rootModules != null) {
175: allFeedModules.addAll(rootModules);
176: }
177: if (channelModules != null) {
178: allFeedModules.addAll(channelModules);
179: }
180: channel.setModules(allFeedModules);
181: channel.setItems(parseItems(rssRoot));
182:
183: List foreignMarkup = extractForeignMarkup(eChannel, channel,
184: getRSSNamespace());
185: if (foreignMarkup.size() > 0) {
186: channel.setForeignMarkup(foreignMarkup);
187: }
188: return channel;
189: }
190:
191: /**
192: * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment.
193: * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element.
194: * <p/>
195: */
196: protected List getItems(Element rssRoot) {
197: return rssRoot.getChildren("item", getRSSNamespace());
198: }
199:
200: /**
201: * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment.
202: * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
203: * <p/>
204: */
205: protected Element getImage(Element rssRoot) {
206: return rssRoot.getChild("image", getRSSNamespace());
207: }
208:
209: /**
210: * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment.
211: * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
212: * <p/>
213: */
214: protected Element getTextInput(Element rssRoot) {
215: return rssRoot.getChild("textinput", getRSSNamespace());
216: }
217:
218: /**
219: * Parses the root element of an RSS document looking for image information.
220: * <p/>
221: * It reads title and url out of the 'image' element.
222: * <p/>
223: *
224: * @param rssRoot the root element of the RSS document to parse for image information.
225: * @return the parsed image bean.
226: */
227: protected Image parseImage(Element rssRoot) {
228: Image image = null;
229: Element eImage = getImage(rssRoot);
230: if (eImage != null) {
231: image = new Image();
232:
233: Element e = eImage.getChild("title", getRSSNamespace());
234: if (e != null) {
235: image.setTitle(e.getText());
236: }
237: e = eImage.getChild("url", getRSSNamespace());
238: if (e != null) {
239: image.setUrl(e.getText());
240: }
241: e = eImage.getChild("link", getRSSNamespace());
242: if (e != null) {
243: image.setLink(e.getText());
244: }
245: }
246: return image;
247: }
248:
249: /**
250: * Parses the root element of an RSS document looking for all items information.
251: * <p/>
252: * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem()
253: * for each item element. The resulting RSSItem of each item element is stored in a list.
254: * <p/>
255: *
256: * @param rssRoot the root element of the RSS document to parse for all items information.
257: * @return a list with all the parsed RSSItem beans.
258: */
259: protected List parseItems(Element rssRoot) {
260: Collection eItems = getItems(rssRoot);
261:
262: List items = new ArrayList();
263: for (Iterator i = eItems.iterator(); i.hasNext();) {
264: Element eItem = (Element) i.next();
265: items.add(parseItem(rssRoot, eItem));
266: }
267: return items;
268: }
269:
270: /**
271: * Parses an item element of an RSS document looking for item information.
272: * <p/>
273: * It reads title and link out of the 'item' element.
274: * <p/>
275: *
276: * @param rssRoot the root element of the RSS document in case it's needed for context.
277: * @param eItem the item element to parse.
278: * @return the parsed RSSItem bean.
279: */
280: protected Item parseItem(Element rssRoot, Element eItem) {
281: Item item = new Item();
282: Element e = eItem.getChild("title", getRSSNamespace());
283: if (e != null) {
284: item.setTitle(e.getText());
285: }
286: e = eItem.getChild("link", getRSSNamespace());
287: if (e != null) {
288: item.setLink(e.getText());
289: }
290:
291: item.setModules(parseItemModules(eItem));
292:
293: List foreignMarkup = extractForeignMarkup(eItem, item,
294: getRSSNamespace());
295: if (foreignMarkup.size() > 0) {
296: item.setForeignMarkup(foreignMarkup);
297: }
298: return item;
299: }
300:
301: /**
302: * Parses the root element of an RSS document looking for text-input information.
303: * <p/>
304: * It reads title, description, name and link out of the 'textinput' or 'textInput' element.
305: * <p/>
306: *
307: * @param rssRoot the root element of the RSS document to parse for text-input information.
308: * @return the parsed RSSTextInput bean.
309: */
310: protected TextInput parseTextInput(Element rssRoot) {
311: TextInput textInput = null;
312: Element eTextInput = getTextInput(rssRoot);
313: if (eTextInput != null) {
314: textInput = new TextInput();
315: Element e = eTextInput.getChild("title", getRSSNamespace());
316: if (e != null) {
317: textInput.setTitle(e.getText());
318: }
319: e = eTextInput.getChild("description", getRSSNamespace());
320: if (e != null) {
321: textInput.setDescription(e.getText());
322: }
323: e = eTextInput.getChild("name", getRSSNamespace());
324: if (e != null) {
325: textInput.setName(e.getText());
326: }
327: e = eTextInput.getChild("link", getRSSNamespace());
328: if (e != null) {
329: textInput.setLink(e.getText());
330: }
331: }
332: return textInput;
333: }
334:
335: }
|