001: /*
002: * Extractor.java
003: *
004: *
005: * Copyright (c) 2003 Rimfaxe ApS (www.rimfaxe.com).
006: * All rights reserved.
007: *
008: * This package is written by Lars Andersen <lars@rimfaxe.com>
009: * and licensed by Rimfaxe ApS.
010: *
011: * Redistribution and use in source and binary forms, with or without
012: * modification, are permitted provided that the following conditions
013: * are met:
014: *
015: * 1. Redistributions of source code must retain the above copyright
016: * notice, this list of conditions and the following disclaimer.
017: *
018: * 2. Redistributions in binary form must reproduce the above copyright
019: * notice, this list of conditions and the following disclaimer in
020: * the documentation and/or other materials provided with the
021: * distribution.
022: *
023: * 3. The end-user documentation included with the redistribution, if
024: * any, must include the following acknowlegement:
025: * "This product includes software developed by Rimfaxe ApS
026: (www.rimfaxe.com)"
027: * Alternately, this acknowlegement may appear in the software itself,
028: * if and wherever such third-party acknowlegements normally appear.
029: *
030: * 4. The names "Rimfaxe", "Rimfaxe Software", "Lars Andersen" and
031: * "Rimfaxe WebServer" must not be used to endorse or promote products
032: * derived from this software without prior written permission. For written
033: * permission, please contact info@rimfaxe.com
034: *
035: * 5. Products derived from this software may not be called "Rimfaxe"
036: * nor may "Rimfaxe" appear in their names without prior written
037: * permission of the Rimfaxe ApS.
038: *
039: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
040: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
041: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
042: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
043: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
044: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
045: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
046: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
047: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
048: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: *
051: */
052:
053: package com.rimfaxe.xml.extractor;
054:
055: import org.w3c.dom.Attr;
056: import org.w3c.dom.Document;
057: import org.w3c.dom.NamedNodeMap;
058: import org.w3c.dom.Node;
059: import org.w3c.dom.NodeList;
060:
061: import java.util.*;
062:
063: /**
064: *
065: * @author Lars Andersen
066: */
067: public class Extractor extends Object {
068: com.rimfaxe.util.RimfaxeVector simpletags = new com.rimfaxe.util.RimfaxeVector();
069: String xml = "";
070: String template = "";
071: Document document = null;
072: Stack stack = new Stack();
073:
074: Condition evaluator;
075:
076: /** Creates new Extractor
077: *
078: */
079: public Extractor() {
080: simpletags.addElement("br");
081: simpletags.addElement("img");
082: simpletags.addElement("hr");
083: simpletags.addElement("meta");
084: simpletags.addElement("input");
085: }
086:
087: /** Sets the template for this extractor
088: *
089: * @param template name of template (for caching)
090: * @param str xml with template content
091: */
092: public void setContent(String template, String str) {
093: this .xml = str;
094: this .template = template;
095:
096: com.rimfaxe.xml.ParserInterface parser = com.rimfaxe.xml.ParserFactory
097: .getInstance().checkoutParser();
098: parser.parse(xml, false);
099: Document document = parser.getDocument();
100: }
101:
102: /** Pushes emaildata on the stack
103: *
104: * @param emaildata
105: */
106: public void addData(com.rimfaxe.xml.datamodel.Container data) {
107: stack.push(data);
108: }
109:
110: /** Traverse the template
111: *
112: *
113: * @param node
114: * @return
115: */
116: public String traverse(Node node) {
117: StringBuffer str = new StringBuffer();
118:
119: int type = 0;
120:
121: try {
122: if (node == null) {
123: return "";
124: }
125: type = node.getNodeType();
126:
127: } catch (Exception e) {
128: com.rimfaxe.util.Log.log("Extractor",
129: "Traverse exception 1");
130:
131: }
132:
133: switch (type) {
134: case Node.DOCUMENT_NODE: {
135: try {
136: str.append(traverse(((Document) node)
137: .getDocumentElement()));
138: } catch (Exception e) {
139: com.rimfaxe.util.Log.log("Extractor",
140: "Traverse exception 2");
141:
142: }
143: break;
144: }
145:
146: case Node.ELEMENT_NODE: {
147: try {
148: NodeList children = node.getChildNodes();
149:
150: if (node.getNodeName().equalsIgnoreCase("include")) {
151: // get condition
152: Node id = node.getAttributes().getNamedItem(
153: "condition");
154: String condition = id.getNodeValue();
155:
156: // get the section we might want to include
157: StringBuffer incsection = new StringBuffer();
158:
159: if (children != null) {
160: int len = children.getLength();
161: for (int i = 0; i < len; i++)
162: incsection
163: .append(traverse(children.item(i)));
164: }
165:
166: if (evaluator.check(condition)) {
167: str.append("" + incsection);
168: }
169:
170: break;
171: }
172:
173: if (node.getNodeName().equalsIgnoreCase("check")) {
174: String name = node.getAttributes().getNamedItem(
175: "name").getNodeValue();
176: String value = node.getAttributes().getNamedItem(
177: "value").getNodeValue();
178: com.rimfaxe.xml.datamodel.Container container = (com.rimfaxe.xml.datamodel.Container) stack
179: .peek();
180: String cval = container.getLiteral(name);
181: StringBuffer incsection = new StringBuffer();
182:
183: if (children != null) {
184: int len = children.getLength();
185: for (int i = 0; i < len; i++)
186: incsection
187: .append(traverse(children.item(i)));
188: }
189:
190: if (value.equalsIgnoreCase(cval)) {
191: str.append("" + incsection);
192: }
193: break;
194: }
195:
196: if (node.getNodeName().equalsIgnoreCase("list")) {
197: Node id = node.getAttributes().getNamedItem("id");
198: String val = id.getNodeValue();
199:
200: resetList(val);
201: while (fetchList(val)) {
202: if (children != null) {
203: int len = children.getLength();
204: for (int i = 0; i < len; i++)
205: str.append(traverse(children.item(i)));
206: }
207: endList();
208: }
209: break;
210: }
211:
212: if (node.getNodeName().equalsIgnoreCase("front")) {
213:
214: Node id = node.getAttributes().getNamedItem("id");
215: String val = id.getNodeValue();
216:
217: resetList(val);
218: if (fetchList(val)) {
219: if (children != null) {
220: int len = children.getLength();
221: for (int i = 0; i < len; i++)
222: str.append(traverse(children.item(i)));
223: }
224: endList();
225: }
226: break;
227: }
228:
229: if (node.getNodeName().equalsIgnoreCase("tail")) {
230:
231: Node id = node.getAttributes().getNamedItem("id");
232: String val = id.getNodeValue();
233:
234: while (fetchList(val)) {
235: if (children != null) {
236: int len = children.getLength();
237: for (int i = 0; i < len; i++)
238: str.append(traverse(children.item(i)));
239: }
240: endList();
241: }
242: break;
243: }
244:
245: if (node.getNodeName().equalsIgnoreCase("subsection")) {
246:
247: Node id = node.getAttributes().getNamedItem("id");
248:
249: subsection(id.getNodeValue());
250:
251: if (children != null) {
252: int len = children.getLength();
253: for (int i = 0; i < len; i++)
254: str.append(traverse(children.item(i)));
255: }
256:
257: endsection();
258: break;
259: }
260:
261: if (node.getNodeName().equalsIgnoreCase("literal")) {
262: Node id = node.getAttributes().getNamedItem("id");
263: String lit = literal(id.getNodeValue(), "std");
264: str.append(lit);
265: break;
266: }
267:
268: if (node.getNodeName().equalsIgnoreCase("dbvalue")) {
269: Node id = node.getAttributes().getNamedItem("id");
270: Node level = node.getAttributes().getNamedItem(
271: "level");
272: String lit = literal(id.getNodeValue(), level
273: .getNodeValue());
274: str.append(lit);
275: break;
276: }
277:
278: StringBuffer body = new StringBuffer();
279: if (children != null) {
280: int len = children.getLength();
281: for (int i = 0; i < len; i++) {
282: body.append(traverse(children.item(i)));
283: }
284: }
285: boolean simple = false;
286: if (children == null)
287: simple = true;
288: str.append(processTag(node.getNodeName(), node
289: .getAttributes(), body.toString(), simple));
290:
291: } catch (Exception e) {
292: com.rimfaxe.util.Log.log("Extractor",
293: "Traverse exception 3");
294:
295: }
296:
297: break;
298: }
299: case Node.ENTITY_REFERENCE_NODE: {
300: str.append(processEntity("" + node.getNodeName()));
301: break;
302: }
303:
304: case Node.ENTITY_NODE: {
305: break;
306: }
307:
308: case Node.TEXT_NODE: {
309: try {
310:
311: str.append(cleantext("" + node.getNodeValue()));
312:
313: } catch (Exception e) {
314: // TODO
315: }
316: break;
317: }
318:
319: }
320:
321: return str.toString();
322: }
323:
324: // Methods to be overridden
325: public String processEntity(String name) {
326: return "&" + name + ";";
327: }
328:
329: /** Process a Tag
330: * This method is overridden by a concrete extractor (f.ex. HTMLExtractor)
331: *
332: * @param name name of tag
333: * @param attrs tag attributes
334: * @param body body of tag
335: * @return
336: */
337: public String processTag(String name, NamedNodeMap attrs,
338: String body, boolean simple) {
339: return body;
340: }
341:
342: // default methods
343:
344: /** Get a literal
345: *
346: *
347: * @param id
348: * @param level
349: * @return
350: */
351: protected String literal(String id, String level) {
352: com.rimfaxe.xml.datamodel.Container container = (com.rimfaxe.xml.datamodel.Container) stack
353: .peek();
354: return container.getLiteral(id);
355: }
356:
357: /**
358: *
359: *
360: * @param id
361: */
362: public void subsection(String id) {
363: com.rimfaxe.xml.datamodel.Container container = (com.rimfaxe.xml.datamodel.Container) stack
364: .peek();
365: com.rimfaxe.xml.datamodel.Container top = container
366: .getContainer(id);
367: if (top == null) {
368: com.rimfaxe.util.Log.log("Couldn't find container [" + id
369: + "] inside [" + container.getName() + "]");
370: stack.push(new com.rimfaxe.xml.datamodel.Container());
371: } else
372: stack.push(container.getContainer(id));
373: }
374:
375: /**
376: *
377: *
378: */
379: public void endsection() {
380: stack.pop();
381: }
382:
383: /**
384: *
385: *
386: * @param id
387: */
388: public void resetList(String id) {
389: com.rimfaxe.xml.datamodel.Container container = (com.rimfaxe.xml.datamodel.Container) stack
390: .peek();
391: com.rimfaxe.xml.datamodel.Iterator iterator = container
392: .getIterator(id);
393: if (iterator != null)
394: iterator.reset();
395: }
396:
397: /**
398: *
399: *
400: * @param id
401: * @return
402: */
403: public boolean fetchList(String id) {
404: boolean res = false;
405:
406: com.rimfaxe.xml.datamodel.Container container = (com.rimfaxe.xml.datamodel.Container) stack
407: .peek();
408: com.rimfaxe.xml.datamodel.Iterator iterator = container
409: .getIterator(id);
410: if (iterator == null)
411: return false;
412: if (iterator.fetch()) {
413: res = true;
414: stack.push(iterator.getContainer());
415: }
416: return res;
417: }
418:
419: /**
420: *
421: *
422: */
423: public void endList() {
424: stack.pop();
425: }
426:
427: // util
428:
429: /**
430: *
431: *
432: * @param in
433: * @return
434: */
435: protected String cleantext(String in) {
436: if (in.equalsIgnoreCase("null"))
437: return " ";
438: int countspaces = 0;
439: boolean firstChar = false;
440: StringBuffer res = new StringBuffer();
441: for (int i = 0; i < in.length(); i++) {
442: switch (in.charAt(i)) {
443: case ' ': {
444: }
445: case '\n': {
446: }
447: case '\r': {
448: if ((countspaces == 0) && (firstChar == true)) {
449: res.append(" ");
450: }
451: countspaces++;
452: break;
453: }
454: default: {
455: firstChar = true;
456: countspaces = 0;
457: res.append(in.charAt(i));
458: }
459: }
460: }
461: return res.toString();
462: }
463:
464: /**
465: *
466: *
467: * @param count
468: * @return
469: */
470: protected String blanks(int count) {
471: StringBuffer res = new StringBuffer();
472: for (int i = 0; i < count; i++) {
473: res.append(" ");
474: }
475: return res.toString();
476: }
477:
478: public String genericBodyTag(String name, NamedNodeMap attrs,
479: String body) {
480: //if (simpletags.contains(name)) return genericSimpleTag(name,attrs);
481:
482: StringBuffer att = new StringBuffer();
483: att.append("<" + name);
484:
485: for (int i = 0; i < attrs.getLength(); i++) {
486: Node node = attrs.item(i);
487: att.append(" " + node.getNodeName() + "=\""
488: + node.getNodeValue() + "\"");
489: }
490:
491: att.append(">");
492: att.append(body);
493: att.append("</" + name + ">\n");
494: return att.toString();
495: }
496:
497: public String genericSimpleTag(String name, NamedNodeMap attrs) {
498: StringBuffer att = new StringBuffer();
499: att.append("<" + name);
500:
501: for (int i = 0; i < attrs.getLength(); i++) {
502: Node node = attrs.item(i);
503: att.append(" " + node.getNodeName() + "=\""
504: + node.getNodeValue() + "\"");
505: }
506:
507: att.append("/>");
508:
509: return att.toString();
510: }
511:
512: /**
513: *
514: *
515: * @param name
516: * @param attrs
517: * @param body
518: * @return
519: */
520: public String genericTag(String name, NamedNodeMap attrs,
521: String body) {
522: StringBuffer att = new StringBuffer();
523: att.append("<" + name);
524:
525: for (int i = 0; i < attrs.getLength(); i++) {
526: Node node = attrs.item(i);
527: att.append(" " + node.getNodeName() + "=\""
528: + node.getNodeValue() + "\"");
529: }
530:
531: if (simpletags.contains(name)) {
532: att.append(">");
533: } else {
534: att.append(">");
535: att.append(body);
536: att.append("</" + name + ">\n");
537: }
538: return att.toString();
539: }
540:
541: /**
542: *
543: *
544: * @param in
545: * @return
546: */
547: public int csum(String in) {
548: int sum = 0;
549: for (int i = 0; i < in.length(); i++) {
550: Integer part = new Integer("" + in.charAt(i));
551: sum += part.intValue();
552: }
553: if (sum > 9)
554: return csum("" + sum);
555: else
556: return sum;
557: }
558:
559: /** Convert DOS string to UNIX
560: *
561: * @param in String to convert
562: * @return Converted string
563: */
564: public String dos2unix(String in) {
565: StringBuffer buf = new StringBuffer();
566:
567: com.rimfaxe.util.RimfaxeStringTokenizer tkz = new com.rimfaxe.util.RimfaxeStringTokenizer(
568: in, "\r", false);
569: while (tkz.hasMoreTokens())
570: buf.append(tkz.nextToken());
571: return buf.toString();
572: }
573: }
|