001: /*---------------------------------------------------------------------------*\
002: $Id: RSSChannel.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.parser;
047:
048: import org.clapper.util.html.HTMLUtil;
049:
050: import java.util.ArrayList;
051: import java.util.Collection;
052: import java.util.Date;
053:
054: /**
055: * This abstract class defines a simplified view of an RSS channel,
056: * providing only the methods necessary for <i>curn</i> to work.
057: * <i>curn</i> uses the {@link RSSParserFactory} class to get a specific
058: * implementation of <tt>RSSParser</tt>, which returns an object that is a
059: * subclass of this class. This strategy isolates the bulk of the code from
060: * the underlying RSS parser, making it easier to substitute different
061: * parsers as more of them become available.
062: *
063: * @see RSSParserFactory
064: * @see RSSParser
065: * @see RSSItem
066: *
067: * @version <tt>$Revision: 7041 $</tt>
068: */
069: public abstract class RSSChannel extends RSSElement implements
070: Cloneable {
071: /*----------------------------------------------------------------------*\
072: Private Instance Data
073: \*----------------------------------------------------------------------*/
074:
075: private boolean htmlStripped = false;
076:
077: /*----------------------------------------------------------------------*\
078: Constructors
079: \*----------------------------------------------------------------------*/
080:
081: /**
082: * Default constructor.
083: */
084: protected RSSChannel() {
085: // Nothing to do
086: }
087:
088: /*----------------------------------------------------------------------*\
089: Public Methods
090: \*----------------------------------------------------------------------*/
091:
092: /**
093: * Clone this channel. This method simply calls the type-safe
094: * {@link #makeCopy} method. The clone is a deep-clone (i.e., the items
095: * are cloned, too).
096: *
097: * @return the cloned <tt>RSSChannel</tt>
098: *
099: * @throws CloneNotSupportedException doesn't, actually, but the
100: * <tt>Cloneable</tt> interface
101: * requires that this exception
102: * be declared
103: *
104: * @see #makeCopy
105: */
106: public Object clone() throws CloneNotSupportedException {
107: return makeCopy();
108: }
109:
110: /**
111: * Make a deep copy of this <tt>RSSChannel</tt> object.
112: *
113: * @return the copy
114: */
115: public RSSChannel makeCopy() {
116: RSSChannel newChannel = newInstance();
117:
118: newChannel.setTitle(this .getTitle());
119: newChannel.setDescription(this .getDescription());
120: newChannel.setLinks(this .getLinks());
121: newChannel.setPublicationDate(this .getPublicationDate());
122: newChannel.setCopyright(this .getCopyright());
123: newChannel.setNativeRSSFormat(this .getNativeRSSFormat());
124:
125: Collection<String> authors = this .getAuthors();
126: if (authors != null) {
127: for (String author : authors)
128: newChannel.addAuthor(author);
129: }
130:
131: Collection<RSSItem> itemCopies = new ArrayList<RSSItem>();
132:
133: for (RSSItem item : this .getItems())
134: itemCopies.add(item.makeCopy(newChannel));
135: newChannel.setItems(itemCopies);
136:
137: return newChannel;
138: }
139:
140: /**
141: * Strip all HTML and weird plain text from the channel and its items.
142: * Intended primarily for output handlers and plug-ins that produce
143: * plain text. This method edits the channel data directly; it does not
144: * produce a copy.
145: */
146: public synchronized void stripHTML() {
147: if (!htmlStripped) {
148: Collection<String> authors = getAuthors();
149: if (authors != null) {
150: Collection<String> newAuthors = new ArrayList<String>();
151: for (String author : authors) {
152: if (author != null)
153: newAuthors.add(HTMLUtil.textFromHTML(author));
154: }
155:
156: setAuthors(newAuthors);
157: }
158:
159: String title = getTitle();
160: if (title != null)
161: setTitle(HTMLUtil.textFromHTML(title));
162:
163: String desc = getDescription();
164: if (desc != null)
165: setDescription(HTMLUtil.textFromHTML(desc));
166:
167: String copyright = getCopyright();
168: if (copyright != null)
169: setCopyright(HTMLUtil.textFromHTML(copyright));
170:
171: Collection<RSSItem> items = getItems();
172: if ((items != null) && (items.size() > 0)) {
173: for (RSSItem item : items)
174: stripItemHTML(item);
175: }
176:
177: htmlStripped = true;
178: }
179: }
180:
181: /**
182: * Return a string representation of this channel.
183: *
184: * @return the string
185: */
186: public String toString() {
187: StringBuilder buf = new StringBuilder(32);
188:
189: buf.append("Channel ");
190:
191: Collection<RSSLink> links = getLinks();
192: String title;
193:
194: if (links.size() > 0)
195: buf.append(links.iterator().next().getURL().toString());
196: else if ((title = getTitle()) != null)
197: buf.append(title);
198: else
199: buf.append("???");
200:
201: buf.append(", ");
202: Collection<RSSItem> items = getItems();
203: int total = (items == null) ? 0 : items.size();
204: buf.append(String.valueOf(total));
205: buf.append(" item(s)");
206:
207: return buf.toString();
208: }
209:
210: /*----------------------------------------------------------------------*\
211: Abstract Public Methods
212: \*----------------------------------------------------------------------*/
213:
214: /**
215: * Create a new, empty instance of the underlying concrete
216: * class.
217: *
218: * @return the new instance
219: */
220: public abstract RSSChannel newInstance();
221:
222: /**
223: * Get a <tt>Collection</tt> of the items in this channel. All objects
224: * in the collection are of type <tt>RSSItem</tt>.
225: *
226: * @return a (new) <tt>Collection</tt> of <tt>RSSItem</tt> objects.
227: * The collection will be empty (never null) if there are no
228: * items. This <tt>Collection</tt> is expected to be a copy of
229: * whatever the channel is really storing. (That is, if the
230: * underlying implementation is using a <tt>Collection</tt> to
231: * store its <tt>RSSItem</tt> objects, it should not return
232: * that <tt>Collection</tt> directly; instead, it should return
233: * a copy.) The order of items in the returned collection
234: * is arbitrary and not guaranteed to be sorted, unless sorted
235: * by a plug-in.
236: */
237: public abstract Collection<RSSItem> getItems();
238:
239: /**
240: * Change the items the channel the ones in the specified collection.
241: * If the collection is empty, the items are cleared. The items are
242: * copied from the supplied collection. (A reference to the supplied
243: * collection is <i>not</i> saved in this object.)
244: *
245: * @param newItems new collection of <tt>RSSItem</tt> items.
246: */
247: public abstract void setItems(Collection<? extends RSSItem> newItems);
248:
249: /**
250: * Remove an item from the set of items.
251: *
252: * @param item the item to remove
253: *
254: * @return <tt>true</tt> if removed, <tt>false</tt> if not found
255: */
256: public abstract boolean removeItem(RSSItem item);
257:
258: /**
259: * Get the channel's title
260: *
261: * @return the channel's title, or null if there isn't one
262: *
263: * @see #setTitle(String)
264: */
265: public abstract String getTitle();
266:
267: /**
268: * Set the channel's title
269: *
270: * @param newTitle the channel's title, or null if there isn't one
271: *
272: * @see #getTitle()
273: */
274: public abstract void setTitle(String newTitle);
275:
276: /**
277: * Get the channel's description
278: *
279: * @return the channel's description, or null if there isn't one
280: *
281: * @see #setDescription
282: */
283: public abstract String getDescription();
284:
285: /**
286: * Set the channel's description
287: *
288: * @param desc the channel's description, or null if there isn't one
289: *
290: * @see #getDescription
291: */
292: public abstract void setDescription(String desc);
293:
294: /**
295: * Get the channel's list of published links (its URLs). Each
296: * element in the returned <tt>Collection</tt> is an
297: * {@link RSSLink} object.
298: *
299: * @return the collection of links, or an empty list if there are none.
300: * The result will never be null.
301: *
302: * @see #getLink
303: * @see #setLinks
304: */
305: public abstract Collection<RSSLink> getLinks();
306:
307: /**
308: * Set the channel's list of published links (its URLs).
309: *
310: * @param links the links
311: *
312: * @see #getLink
313: * @see #getLinks
314: */
315: public abstract void setLinks(Collection<RSSLink> links);
316:
317: /**
318: * Get the channel's publication date.
319: *
320: * @return the date, or null if not available
321: *
322: * @see #setPublicationDate
323: */
324: public abstract Date getPublicationDate();
325:
326: /**
327: * Set the channel's publication date.
328: *
329: * @param date the publication date, or null if not available
330: *
331: * @see #getPublicationDate
332: */
333: public abstract void setPublicationDate(Date date);
334:
335: /**
336: * Get the channel's copyright string
337: *
338: * @return the copyright string, or null if not available
339: *
340: * @see #setCopyright
341: */
342: public abstract String getCopyright();
343:
344: /**
345: * Set the channel's copyright string
346: *
347: * @param copyright the copyright string, or null if not available
348: *
349: * @see #getCopyright
350: */
351: public abstract void setCopyright(String copyright);
352:
353: /**
354: * Get the RSS format the channel is using, as a string
355: *
356: * @return the format, or null if not available
357: *
358: * @see #getNativeRSSFormat
359: * @see #setNativeRSSFormat
360: */
361: public abstract String getRSSFormat();
362:
363: /**
364: * Get the RSS format the channel is using, in native format. This
365: * method exists for underlying implementations that store the RSS
366: * format as something other than a string; the method allows the
367: * {@link #makeCopy} method to copy the RSS format without knowing
368: * how it's stored. The default implementation of this method
369: * simply calls {@link #getRSSFormat}.
370: *
371: * @return the format, or null if not available
372: *
373: * @see #getRSSFormat
374: * @see #setNativeRSSFormat
375: */
376: public Object getNativeRSSFormat() {
377: return getRSSFormat();
378: }
379:
380: /**
381: * Set the RSS format the channel is using.
382: *
383: * @param format the format, or null if not available
384: *
385: * @see #getRSSFormat
386: * @see #getNativeRSSFormat
387: */
388: public abstract void setNativeRSSFormat(Object format);
389:
390: /**
391: * Get the channel's author list.
392: *
393: * @return the authors, or null (or an empty <tt>Collection</tt>) if
394: * not available
395: *
396: * @see #addAuthor
397: * @see #clearAuthors
398: * @see #setAuthor
399: */
400: public abstract Collection<String> getAuthors();
401:
402: /**
403: * Add to the channel's author list.
404: *
405: * @param author another author string to add
406: *
407: * @see #getAuthors
408: * @see #clearAuthors
409: * @see #setAuthor
410: */
411: public abstract void addAuthor(String author);
412:
413: /**
414: * Clear the authors list.
415: *
416: * @see #getAuthors
417: * @see #addAuthor
418: * @see #setAuthor
419: */
420: public abstract void clearAuthors();
421:
422: /*----------------------------------------------------------------------*\
423: Private Methods
424: \*----------------------------------------------------------------------*/
425:
426: /**
427: ** Strip the HTML from an item.
428: *
429: * @param item the item
430: */
431: private void stripItemHTML(final RSSItem item) {
432: String title = item.getTitle();
433: if (title != null)
434: item.setTitle(HTMLUtil.textFromHTML(title));
435:
436: Collection<String> authors = item.getAuthors();
437: if (authors != null) {
438: Collection<String> newAuthors = new ArrayList<String>();
439: for (String author : authors)
440: newAuthors.add(HTMLUtil.textFromHTML(author));
441:
442: setAuthors(newAuthors);
443: }
444:
445: String summary = item.getSummary();
446: if (summary != null)
447: item.setSummary(HTMLUtil.textFromHTML(summary));
448: }
449: }
|