001: //$Id: RSSHandler.java,v 1.8 2004/09/04 11:58:19 taganaka Exp $
002: package org.gnu.stealthp.rsslib;
003:
004: import javax.xml.parsers.*;
005: import org.xml.sax.*;
006: import org.xml.sax.helpers.*;
007:
008: /**
009: * Handler for SAX Parser.
010: * <p>
011: * This elements are <em>not</em> handled yet:<br><br>
012: * cloud<br>
013: * rating<br>
014: * skipHours<br>
015: * skipDays<br>
016: * category<br>
017: * </p>
018: *
019: * <blockquote>
020: * <em>This module, both source code and documentation, is in the
021: * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
022: * </blockquote>
023: *
024: * @since RSSLIB4J 0.1
025: * @author Francesco aka 'StealthP' stealthp[@]stealthp.org
026: * @version 0.2
027: */
028:
029: public class RSSHandler extends DefaultHandler {
030:
031: private StringBuffer buff;
032: private String current_tag;
033: private RSSChannel chan;
034: private RSSItem itm;
035: private RSSImage img;
036: private RSSSequence seq;
037: private RSSSequenceElement seq_elem;
038: private RSSTextInput input;
039: private RSSSyndicationModule sy;
040:
041: private boolean reading_chan;
042: private boolean reading_item;
043: private boolean reading_image;
044: private boolean reading_seq;
045: private boolean reading_input;
046: private boolean have_dc;
047:
048: public static final String CHANNEL_TAG = "channel";
049: public static final String TITLE_TAG = "title";
050: public static final String LINK_TAG = "link";
051: public static final String DESCRIPTION_TAG = "description";
052: public static final String ITEM_TAG = "item";
053: public static final String IMAGE_TAG = "image";
054: public static final String IMAGE_W_TAG = "width";
055: public static final String IMAGE_H_TAG = "height";
056: public static final String URL_TAG = "url";
057: public static final String SEQ_TAG = "rdf:seq";
058: public static final String SEQ_ELEMENT_TAG = "rdf:li";
059: public static final String TEXTINPUT_TAG = "textinput";
060: public static final String NAME_TAG = "name";
061: public static final String LANGUAGE_TAG = "language";
062: public static final String MANAGING_TAG = "managingEditor";
063: public static final String WMASTER_TAG = "webMaster";
064: public static final String COPY_TAG = "copyright";
065: public static final String PUB_DATE_TAG = "pubDate";
066: public static final String LAST_B_DATE_TAG = "lastBuildDate";
067: public static final String GENERATOR_TAG = "generator";
068: public static final String DOCS_TAG = "docs";
069: public static final String TTL_TAG = "ttl";
070: public static final String AUTHOR_TAG = "author";
071: public static final String COMMENTS_TAG = "comments";
072: public static final String CLOUD_TAG = "cloud"; //TODO
073: public static final String RATING_TAG = "rating"; //TODO
074: public static final String SKIPH_TAG = "skipHours"; //TODO
075: public static final String SKIPD_TAG = "skipDays"; //TODO
076: public static final String CATEGORY_TAG = "category"; //TODO
077:
078: public static final String DC_TITLE_TAG = "dc:title";
079: public static final String DC_CREATOR_TAG = "dc:creator";
080: public static final String DC_SUBJECT_TAG = "dc:subject";
081: public static final String DC_DESCRIPTION_TAG = "dc:description";
082: public static final String DC_PUBLISHER_TAG = "dc:publisher";
083: public static final String DC_CONTRIBUTOR_TAG = "dc:contributor";
084: public static final String DC_DATE_TAG = "dc:date";
085: public static final String DC_TYPE_TAG = "dc:type";
086: public static final String DC_FORMAT_TAG = "dc:format";
087: public static final String DC_IDENTIFIER_TAG = "dc:identifier";
088: public static final String DC_SOURCE_TAG = "dc:source";
089: public static final String DC_LANGUAGE_TAG = "dc:language";
090: public static final String DC_RELATION_TAG = "dc:relation";
091: public static final String DC_COVERAGE_TAG = "dc:coverage";
092: public static final String DC_RIGHTS_TAG = "dc:rights";
093:
094: public static final String SY_PERIOD_TAG = "sy:updatePeriod";
095: public static final String SY_FREQ_TAG = "sy:updateFrequency";
096: public static final String SY_BASE_TAG = "sy:updateBase";
097:
098: public RSSHandler() {
099:
100: buff = new StringBuffer();
101: current_tag = null;
102: chan = new RSSChannel();
103: reading_chan = false;
104: reading_item = false;
105: reading_image = false;
106: reading_seq = false;
107: reading_input = false;
108: have_dc = false;
109:
110: }
111:
112: /**
113: * Receive notification of the start of an element.
114: * @param uri The Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed.
115: * @param localName The local name (without prefix), or the empty string if Namespace processing is not being performed
116: * @param qName The qualified name (with prefix), or the empty string if qualified names are not available
117: * @param attributes The attributes attached to the element. If there are no attributes, it shall be an empty Attributes object
118: */
119: public void startElement(String uri, String localName,
120: String qName, Attributes attributes) {
121:
122: if (tagIsEqual(qName, CHANNEL_TAG)) {
123: reading_chan = true;
124: processChanAboutAttribute(attributes);
125: }
126:
127: if (tagIsEqual(qName, ITEM_TAG)) {
128: reading_item = true;
129: reading_chan = false;
130: itm = new RSSItem();
131: processItemAboutAttribute(attributes);
132: }
133:
134: if (tagIsEqual(qName, IMAGE_TAG)) {
135: reading_image = true;
136: reading_chan = false;
137: img = new RSSImage();
138: }
139:
140: if (tagIsEqual(qName, SEQ_TAG)) {
141: reading_seq = true;
142: seq = new RSSSequence();
143: }
144:
145: if (tagIsEqual(qName, TEXTINPUT_TAG)) {
146: reading_input = true;
147: reading_chan = false;
148: input = new RSSTextInput();
149: }
150:
151: if (tagIsEqual(qName, SEQ_ELEMENT_TAG))
152: processSeqElement(attributes);
153:
154: if (qName.toUpperCase().startsWith("SY:"))
155: sy = new RSSSyndicationModule();
156:
157: current_tag = qName;
158:
159: }
160:
161: /**
162: * Receive notification of the end of an element
163: * @param uri The Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed.
164: * @param localName The local name (without prefix), or the empty string if Namespace processing is not being performed
165: * @param qName The qualified name (with prefix), or the empty string if qualified names are not available
166: */
167: public void endElement(String uri, String localName, String qName) {
168:
169: String data = buff.toString().trim();
170:
171: if (qName.equals(current_tag)) {
172: data = buff.toString().trim();
173: buff = new StringBuffer();
174: }
175:
176: if (reading_chan)
177: processChannel(qName, data);
178:
179: if (reading_item)
180: processItem(qName, data);
181:
182: if (reading_image)
183: processImage(qName, data);
184:
185: if (reading_input)
186: processTextInput(qName, data);
187:
188: if (tagIsEqual(qName, CHANNEL_TAG)) {
189: reading_chan = false;
190: chan.setSyndicationModule(sy);
191: }
192:
193: if (tagIsEqual(qName, ITEM_TAG)) {
194: reading_item = false;
195: chan.addItem(itm);
196: }
197:
198: if (tagIsEqual(qName, IMAGE_TAG)) {
199: reading_image = false;
200: chan.setRSSImage(img);
201: }
202:
203: if (tagIsEqual(qName, SEQ_TAG)) {
204: reading_seq = false;
205: chan.addRSSSequence(seq);
206: }
207:
208: if (tagIsEqual(qName, TEXTINPUT_TAG)) {
209: reading_input = false;
210: chan.setRSSTextInput(input);
211: }
212:
213: }
214:
215: /**
216: * Receive notification of character data inside an element
217: * @param ch The characters.
218: * @param start The start position in the character array.
219: * @param length The number of characters to use from the character array.
220: */
221: public void characters(char[] ch, int start, int length) {
222:
223: String data = new String(ch, start, length);
224:
225: //Jump blank chunk
226: if (data.trim().length() == 0)
227: return;
228:
229: buff.append(data);
230:
231: }
232:
233: /**
234: * Receive notification when parse are scannering an image
235: * @param qName The tag name
236: * @param data The tag Value
237: */
238: private void processImage(String qName, String data) {
239: //System.out.println("RSSHandler:processImage():: TAG: " + qName);
240: if (tagIsEqual(qName, TITLE_TAG))
241: img.setTitle(data);
242:
243: if (tagIsEqual(qName, LINK_TAG))
244: img.setLink(data);
245:
246: if (tagIsEqual(qName, URL_TAG))
247: img.setUrl(data);
248:
249: if (tagIsEqual(qName, IMAGE_W_TAG))
250: img.setWidth(data);
251:
252: if (tagIsEqual(qName, IMAGE_H_TAG))
253: img.setHeight(data);
254:
255: if (tagIsEqual(qName, DESCRIPTION_TAG))
256: img.setDescription(data);
257:
258: if (qName.toUpperCase().startsWith("DC:"))
259: processDoublinCoreTags(qName, data, img);
260:
261: }
262:
263: /**
264: * Receive notification when parse are scannering a textinput
265: * @param qName The tag name
266: * @param data The tag Value
267: */
268:
269: private void processTextInput(String qName, String data) {
270:
271: if (tagIsEqual(qName, TITLE_TAG))
272: input.setTitle(data);
273:
274: if (tagIsEqual(qName, LINK_TAG))
275: input.setLink(data);
276:
277: if (tagIsEqual(qName, NAME_TAG))
278: input.setInputName(data);
279:
280: if (tagIsEqual(qName, DESCRIPTION_TAG))
281: input.setDescription(data);
282:
283: if (qName.toUpperCase().startsWith("DC:"))
284: processDoublinCoreTags(qName, data, input);
285:
286: }
287:
288: /**
289: * Receive notification when parse are scannering an Item
290: * @param qName The tag name
291: * @param data The tag Value
292: */
293: private void processItem(String qName, String data) {
294:
295: if (tagIsEqual(qName, TITLE_TAG))
296: itm.setTitle(data);
297:
298: if (tagIsEqual(qName, LINK_TAG))
299: itm.setLink(data);
300:
301: if (tagIsEqual(qName, DESCRIPTION_TAG))
302: itm.setDescription(data);
303:
304: if (tagIsEqual(qName, PUB_DATE_TAG))
305: itm.setPubDate(data);
306:
307: if (tagIsEqual(qName, PUB_DATE_TAG))
308: itm.setPubDate(data);
309:
310: if (tagIsEqual(qName, AUTHOR_TAG))
311: itm.setAuthor(data);
312:
313: if (tagIsEqual(qName, COMMENTS_TAG))
314: itm.setComments(data);
315:
316: if (qName.toUpperCase().startsWith("DC:"))
317: processDoublinCoreTags(qName, data, itm);
318:
319: }
320:
321: /**
322: * Receive notification when parse are scannering the Channel
323: * @param qName The tag name
324: * @param data The tag Value
325: */
326: private void processChannel(String qName, String data) {
327:
328: if (tagIsEqual(qName, TITLE_TAG))
329: chan.setTitle(data);
330:
331: if (tagIsEqual(qName, LINK_TAG))
332: chan.setLink(data);
333:
334: if (tagIsEqual(qName, DESCRIPTION_TAG))
335: chan.setDescription(data);
336:
337: if (tagIsEqual(qName, COPY_TAG))
338: chan.setCopyright(data);
339:
340: if (tagIsEqual(qName, PUB_DATE_TAG))
341: chan.setPubDate(data);
342:
343: if (tagIsEqual(qName, LAST_B_DATE_TAG))
344: chan.setLastBuildDate(data);
345:
346: if (tagIsEqual(qName, GENERATOR_TAG))
347: chan.setGenerator(data);
348:
349: if (tagIsEqual(qName, DOCS_TAG))
350: chan.setDocs(data);
351:
352: if (tagIsEqual(qName, TTL_TAG))
353: chan.setTTL(data);
354:
355: if (tagIsEqual(qName, LANGUAGE_TAG))
356: chan.setLanguage(data);
357:
358: if (qName.toUpperCase().startsWith("DC:"))
359: processDoublinCoreTags(qName, data, chan);
360:
361: if (qName.toUpperCase().startsWith("SY:"))
362: processSyndicationTags(qName, data);
363:
364: }
365:
366: /**
367: * Receive notification when parse are scannering a doublin core element
368: * @param qName tag name
369: * @param data tag value
370: * @param o RSSObject
371: */
372: private void processDoublinCoreTags(String qName, String data,
373: RSSObject o) {
374: o.addDoublinCoreElement(qName.toLowerCase(), data);
375: }
376:
377: private void processSyndicationTags(String qName, String data) {
378:
379: if (tagIsEqual(qName, this .SY_BASE_TAG))
380: sy.setSyUpdateBase(data);
381:
382: if (tagIsEqual(qName, this .SY_FREQ_TAG))
383: sy.setSyUpdateFrequency(data);
384:
385: if (tagIsEqual(qName, this .SY_PERIOD_TAG))
386: sy.setSyUpdatePeriod(data);
387: }
388:
389: /**
390: * Receive notification when parse are scannering a Sequence Item
391: * @param a The Atrribute of the tag
392: */
393: private void processSeqElement(Attributes a) {
394:
395: String res = a.getValue(0);
396: seq_elem = new RSSSequenceElement();
397: seq_elem.setResource(res);
398: seq.addElement(seq_elem);
399:
400: }
401:
402: /**
403: * Receive notification when parse are scannering an Item attribute
404: * @param a the attribute
405: */
406: private void processItemAboutAttribute(Attributes a) {
407:
408: String res = a.getValue(0);
409: itm.setAboutAttribute(res);
410:
411: }
412:
413: /**
414: * Receive notification when parse are scannering a Chan attribute
415: * @param a the attribute
416: */
417: private void processChanAboutAttribute(Attributes a) {
418:
419: String res = a.getValue(0);
420: chan.setAboutAttribute(res);
421:
422: }
423:
424: /**
425: * Receive notification when parse are scannering an Inputtext attribute
426: * @param a the attribute
427: */
428: private void processInputAboutAttribute(Attributes a) {
429:
430: String res = a.getValue(0);
431: input.setAboutAttribute(res);
432:
433: }
434:
435: /**
436: * Check against non-casesentive tag name
437: * @param a The first tag
438: * @param b The tag to check
439: * @return True if the tags are the same
440: */
441: protected static boolean tagIsEqual(String a, String b) {
442:
443: return a.equalsIgnoreCase(b);
444:
445: }
446:
447: /**
448: * Get the RSSChannel Object back from the parser
449: * @return The RSSChannell Object
450: */
451: public RSSChannel getRSSChannel() {
452:
453: return this.chan;
454:
455: }
456:
457: }
|