001: // Informa -- RSS Library for Java
002: // Copyright (c) 2002 by Niko Schmuck
003: //
004: // Niko Schmuck
005: // http://sourceforge.net/projects/informa
006: // mailto:niko_schmuck@users.sourceforge.net
007: //
008: // This library is free software.
009: //
010: // You may redistribute it and/or modify it under the terms of the GNU
011: // Lesser General Public License as published by the Free Software Foundation.
012: //
013: // Version 2.1 of the license should be included with this distribution in
014: // the file LICENSE. If the license is not included with this distribution,
015: // you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
016: // or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
017: // MA 02139 USA.
018: //
019: // This library is distributed in the hope that it will be useful,
020: // but WITHOUT ANY WARRANTY; without even the implied waranty of
021: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
022: // Lesser General Public License for more details.
023: //
024:
025: // $Id: AtomParserUtils.java,v 1.6 2007/01/06 21:33:45 niko_schmuck Exp $
026:
027: package de.nava.informa.utils;
028:
029: import org.apache.commons.logging.Log;
030: import org.apache.commons.logging.LogFactory;
031:
032: import org.jdom.CDATA;
033: import org.jdom.Element;
034: import org.jdom.Namespace;
035: import org.jdom.Text;
036:
037: import org.jdom.output.XMLOutputter;
038:
039: import java.util.ArrayList;
040: import java.util.List;
041:
042: /**
043: * Common utility functions for Atom 0.3 and Atom 1.0 parser
044: */
045: public class AtomParserUtils {
046: private static final String[] ITEM_LINK_REL_PREFERRENCE_ORDER = {
047: "alternate", null };
048: private static final String[] ITEM_LINK_TYPES_PREFERRENCE_ORDER = {
049: "text/html", "text/plain" };
050: static public final Log LOGGER = LogFactory
051: .getLog(AtomParserUtils.class);
052: private static final XMLOutputter OUTPUTER = new XMLOutputter();
053:
054: /** Clears namespace signature from the element and all of the children. */
055: private static void clearNamespace(Element elt) {
056: if (elt == null) {
057: return;
058: }
059:
060: elt.setNamespace(null);
061:
062: List content = elt.getContent();
063:
064: for (int i = 0; i < content.size(); i++) {
065: Object item = content.get(i);
066:
067: if (item instanceof Element) {
068: clearNamespace((Element) item);
069: }
070: }
071: }
072:
073: /**
074: * Looks for link sub-elements of type "link" and selects the most preferred.
075: *
076: * @param item item element.
077: * @param defNS default namespace.
078: *
079: * @return link in string or <code>null</code>.
080: */
081: public static String getItemLink(Element item, Namespace defNS) {
082: String currentHref = null;
083: int currentOrder = Integer.MAX_VALUE;
084:
085: List links = item.getChildren("link", defNS);
086:
087: for (int i = 0; (currentOrder != 0) && (i < links.size()); i++) {
088: Element link = (Element) links.get(i);
089:
090: // get type of the link
091: String type = link.getAttributeValue("type");
092: String rel = link.getAttributeValue("rel");
093:
094: if (type != null) {
095: type = type.trim().toLowerCase();
096: }
097:
098: // if we prefer this type more than the one we already have then
099: // replace current href with new one and update preference order
100: // value.
101: int preferenceOrder = getPreferenceOrderForItemLinkType(
102: type, rel);
103:
104: System.out.println("Link " + link.getAttributeValue("href")
105: + " with pref "
106: + getPreferenceOrderForItemLinkType(type, rel)
107: + " " + type + " " + rel);
108:
109: if (preferenceOrder < currentOrder) {
110: String href = link.getAttributeValue("href");
111:
112: if (href != null) {
113: currentHref = href.trim();
114: currentOrder = preferenceOrder;
115: }
116: }
117: }
118:
119: LOGGER.debug("url read : " + currentHref);
120:
121: return currentHref;
122: }
123:
124: /**
125: * Returns order of item link type preference.
126: *
127: * @param type type ("text/html", "text/plain", "application/xml" ...).
128: *
129: * @return order (the lower the more preferred).
130: */
131: public static int getPreferenceOrderForItemLinkType(String type,
132: String rel) {
133: int orderType = -1;
134:
135: if (type != null) {
136: for (int i = 0; (orderType == -1)
137: && (i < ITEM_LINK_TYPES_PREFERRENCE_ORDER.length); i++) {
138: if (type.equals(ITEM_LINK_TYPES_PREFERRENCE_ORDER[i])) {
139: orderType = i;
140: }
141: }
142: }
143:
144: if (orderType == -1) {
145: orderType = ITEM_LINK_TYPES_PREFERRENCE_ORDER.length;
146: }
147:
148: int orderRel = -1;
149:
150: if (rel != null) {
151: for (int i = 0; (orderRel == -1)
152: && (i < ITEM_LINK_REL_PREFERRENCE_ORDER.length); i++) {
153: if (rel.equals(ITEM_LINK_REL_PREFERRENCE_ORDER[i])) {
154: orderRel = i;
155: }
156: }
157: }
158:
159: if (orderRel == -1) {
160: orderRel = ITEM_LINK_REL_PREFERRENCE_ORDER.length;
161: }
162:
163: return (orderRel * (ITEM_LINK_REL_PREFERRENCE_ORDER.length + 1))
164: + orderType;
165: }
166:
167: /**
168: * Returns value of the element.
169: * @param elt the element to retrieve the value from.
170: * @param mode can be one of "escaped", "base64" or "xml". If null, "xml" is assumed.
171: */
172: public static String getValue(Element elt, String mode) {
173: if (elt == null) {
174: return "";
175: }
176:
177: mode = (mode == null) ? "xml" : mode;
178:
179: clearNamespace(elt);
180:
181: String value;
182:
183: // Here we convert the contents of element into some readable text.
184: // If the contents (after removing leading and trailing spaces) is CDATA only,
185: // we need to treat it specially by unpacking the contents it has. Otherwise,
186: // we simply output what we have.
187: List content = elt.getContent();
188:
189: content = trimContents(content);
190:
191: if ((content.size() == 1) && content.get(0) instanceof CDATA) {
192: value = ((CDATA) content.get(0)).getValue();
193: } else {
194: value = OUTPUTER.outputString(content);
195: }
196:
197: // Unescape or decode stuff if necessary
198: if ("escaped".equals(mode)) {
199: value = ParserUtils.unEscape(value);
200: } else if ("base64".equals(mode)) {
201: value = ParserUtils.decodeBase64(value);
202: }
203:
204: return value;
205: }
206:
207: /**
208: * Cuts all empty (whitespace) text blocks from the head and tail of contents list.
209: *
210: * @param contents list of contents.
211: *
212: * @return trimmed version.
213: */
214: public static List trimContents(List contents) {
215: if (contents == null) {
216: return contents;
217: }
218:
219: int head = 0;
220: int count = contents.size();
221:
222: while ((head < count)
223: && (contents.get(head) instanceof Text && (((Text) contents
224: .get(head)).getTextTrim().length() == 0)))
225: head++;
226:
227: int tail = count - 1;
228:
229: while ((tail > head)
230: && (contents.get(tail) instanceof Text && (((Text) contents
231: .get(tail)).getTextTrim().length() == 0)))
232: tail--;
233:
234: return (tail >= head) ? contents.subList(head, tail + 1)
235: : new ArrayList();
236: }
237: }
|