001: package org.methodize.nntprss.util;
002:
003: /* -----------------------------------------------------------
004: * nntp//rss - a bridge between the RSS world and NNTP clients
005: * Copyright (c) 2002, 2003 Jason Brome. All Rights Reserved.
006: *
007: * email: nntprss@methodize.org
008: * mail: Methodize Solutions
009: * PO Box 3865
010: * Grand Central Station
011: * New York NY 10163
012: *
013: * This file is part of nntp//rss
014: *
015: * nntp//rss is free software; you can redistribute it
016: * and/or modify it under the terms of the GNU General
017: * Public License as published by the Free Software Foundation;
018: * either version 2 of the License, or (at your option) any
019: * later version.
020: *
021: * This program is distributed in the hope that it will be
022: * useful, but WITHOUT ANY WARRANTY; without even the implied
023: * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
024: * PURPOSE. See the GNU General Public License for more
025: * details.
026: *
027: * You should have received a copy of the GNU General Public
028: * License along with this program; if not, write to the
029: * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
030: * Boston, MA 02111-1307 USA
031: * ----------------------------------------------------- */
032:
033: import java.util.HashMap;
034: import java.util.Iterator;
035: import java.util.Map;
036: import java.util.StringTokenizer;
037:
038: import org.hsqldb.lib.StringInputStream;
039: import org.w3c.dom.Document;
040: import org.w3c.dom.Element;
041: import org.w3c.dom.NodeList;
042:
043: /**
044: * @author Jason Brome <jason@methodize.org>
045: * @version $Id: XMLHelper.java,v 1.5 2003/03/22 16:36:11 jasonbrome Exp $
046: */
047: public class XMLHelper {
048:
049: public static String getChildElementValue(Element parentElm,
050: String elementName) {
051:
052: String elementValue = null;
053: NodeList elemList = parentElm.getElementsByTagName(elementName);
054: if (elemList != null && elemList.getLength() > 0) {
055: // Use the first matching child element
056: Element elm = (Element) elemList.item(0);
057: NodeList childNodes = elm.getChildNodes();
058: StringBuffer value = new StringBuffer();
059: for (int elemCount = 0; elemCount < childNodes.getLength(); elemCount++) {
060:
061: if (childNodes.item(elemCount) instanceof org.w3c.dom.Text) {
062: value.append(childNodes.item(elemCount)
063: .getNodeValue());
064: }
065: }
066: elementValue = value.toString();
067: }
068: return elementValue;
069:
070: }
071:
072: public static String getChildElementValue(Element parentElm,
073: String elementName, String defaultValue) {
074: String value = getChildElementValue(parentElm, elementName);
075: if (value == null) {
076: return defaultValue;
077: } else {
078: return value;
079: }
080: }
081:
082: public static String stripTags(String value) {
083: StringTokenizer strTok = new StringTokenizer(value, "<>", true);
084: StringBuffer strippedString = new StringBuffer();
085: boolean inTag = false;
086: while (strTok.hasMoreTokens()) {
087: String token = strTok.nextToken();
088: if (token.equals("<")) {
089: inTag = true;
090: } else if (token.equals(">")) {
091: inTag = false;
092: } else if (!inTag) {
093: strippedString.append(token);
094: }
095: }
096: return strippedString.toString();
097:
098: }
099:
100: private static String preprocessMarkup(String value) {
101: StringBuffer trimmedString = new StringBuffer();
102: boolean lastCharSpace = false;
103: for (int c = 0; c < value.length(); c++) {
104: char currentChar = value.charAt(c);
105: if (currentChar == '\n') {
106: trimmedString.append(currentChar);
107: } else if (currentChar < 32) {
108: continue;
109: } else if (currentChar == ' ') {
110: if (!lastCharSpace) {
111: trimmedString.append(currentChar);
112: lastCharSpace = true;
113: }
114: } else {
115: trimmedString.append(currentChar);
116: lastCharSpace = false;
117: }
118: }
119: return trimmedString.toString();
120: }
121:
122: public static String stripHtmlTags(String value) {
123: // Trim white space... Use html markup (p, br) as line breaks
124: value = preprocessMarkup(value);
125:
126: StringTokenizer strTok = new StringTokenizer(value, "<>\n",
127: true);
128: StringBuffer strippedString = new StringBuffer();
129: boolean inTag = false;
130: boolean startOfLine = true;
131: String lastURL = null;
132: while (strTok.hasMoreTokens()) {
133: String token = strTok.nextToken();
134: if (token.equals("<")) {
135: inTag = true;
136:
137: // Read entire tag... Tag contents might be split over multiple lines
138: StringBuffer concatToken = new StringBuffer();
139: while (strTok.hasMoreTokens()) {
140: token = strTok.nextToken();
141: if (token.equals(">")) {
142: inTag = false;
143: break;
144: } else {
145: if (!token.equals("\n")) {
146: concatToken.append(token);
147: }
148: }
149: }
150:
151: token = concatToken.toString();
152:
153: String upperToken = token.toUpperCase();
154: if (upperToken.startsWith("A ")) {
155: int hrefPos = upperToken.indexOf("HREF=");
156: if (hrefPos > -1) {
157: int quotePos = hrefPos + 5;
158:
159: while (quotePos < token.length()
160: && Character.isWhitespace(token
161: .charAt(quotePos))) {
162: quotePos++;
163: }
164:
165: char quote = upperToken.charAt(quotePos);
166:
167: int endPos;
168: if (quote == '"' || quote == '\'') {
169: // URL wrapped in quotes / apostrophes
170: endPos = token.indexOf(quote, quotePos + 1);
171: } else {
172: // URL not enclosed
173: endPos = quotePos + 1;
174: while (endPos < token.length()
175: && !Character.isWhitespace(token
176: .charAt(endPos))) {
177: endPos++;
178: }
179: }
180:
181: if (endPos != -1) {
182: lastURL = token.substring(quotePos + 1,
183: endPos);
184: if (upperToken.endsWith("/")) {
185: strippedString.append(" (");
186: strippedString.append(lastURL);
187: strippedString.append(')');
188: lastURL = null;
189: startOfLine = false;
190: }
191: }
192: }
193: } else if (upperToken.startsWith("/A")) {
194: if (lastURL != null) {
195: strippedString.append(" (");
196: strippedString.append(lastURL);
197: strippedString.append(')');
198: lastURL = null;
199: startOfLine = false;
200: }
201: } else if (upperToken.equals("P")
202: || upperToken.equals("P/")
203: || upperToken.equals("P /")
204: || upperToken.equals("UL")
205: || upperToken.equals("/UL")) {
206: strippedString.append("\r\n\r\n");
207: startOfLine = true;
208: } else if (upperToken.equals("BR")
209: || upperToken.equals("BR/")
210: || upperToken.equals("BR /")
211: || upperToken.equals("LI")) {
212: strippedString.append("\r\n");
213: startOfLine = true;
214: }
215:
216: } else if (token.equals(">")) {
217: inTag = false;
218: } else if (token.equals("\n")) {
219: if (!inTag && !startOfLine) {
220: strippedString.append(' ');
221: }
222: } else if (!inTag) {
223: strippedString.append(token);
224: startOfLine = false;
225: }
226: }
227: return strippedString.toString();
228:
229: }
230:
231: public static String escapeString(String value) {
232: StringBuffer escapedString = new StringBuffer();
233: for (int charCount = 0; charCount < value.length(); charCount++) {
234: char c = value.charAt(charCount);
235: switch (c) {
236: case '&':
237: escapedString.append("&");
238: break;
239: case '<':
240: escapedString.append("<");
241: break;
242: case '>':
243: escapedString.append(">");
244: break;
245: case '\"':
246: escapedString.append(""");
247: break;
248: case '\'':
249: escapedString.append("'");
250: break;
251: default:
252: escapedString.append(c);
253: }
254: }
255: return escapedString.toString();
256: }
257:
258: /**
259: * Some helper functions used to serialize String-based
260: * maps (i.e. where both key and value are strings) to
261: * an XML document.
262: */
263:
264: public static String stringMapToXML(Map stringMap) {
265: String mapXMLResult = null;
266: if (stringMap != null && stringMap.size() > 0) {
267: StringBuffer mapXML = new StringBuffer();
268: mapXML
269: .append("<?xml version='1.0' encoding='UTF-8'?>\n<map>\n");
270:
271: Iterator mapIter = stringMap.entrySet().iterator();
272:
273: while (mapIter.hasNext()) {
274: Map.Entry entry = (Map.Entry) mapIter.next();
275: mapXML.append("<entry key='");
276: mapXML.append(escapeString((String) entry.getKey()));
277: mapXML.append("' value='");
278: mapXML.append(escapeString((String) entry.getValue()));
279: mapXML.append("'/>\n");
280: }
281:
282: mapXML.append("</map>");
283:
284: mapXMLResult = mapXML.toString();
285: }
286: return mapXMLResult;
287: }
288:
289: public static Map xmlToStringHashMap(String xml) {
290: Map map = new HashMap();
291:
292: if (xml != null && xml.length() > 0) {
293: try {
294: Document doc = AppConstants.newDocumentBuilder().parse(
295: new StringInputStream(xml));
296: Element rootElm = doc.getDocumentElement();
297: NodeList entryList = rootElm
298: .getElementsByTagName("entry");
299: for (int elmCount = 0; elmCount < entryList.getLength(); elmCount++) {
300: Element entry = (Element) entryList.item(elmCount);
301: map.put(entry.getAttribute("key"), entry
302: .getAttribute("value"));
303: }
304: } catch (Exception e) {
305: // XXX do we need to handle this scenario?
306: }
307: }
308:
309: return map;
310:
311: }
312:
313: }
|