001: /*
002: * Copyright 2004 Paulo Soares
003: *
004: * The contents of this file are subject to the Mozilla Public License Version 1.1
005: * (the "License"); you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at http://www.mozilla.org/MPL/
007: *
008: * Software distributed under the License is distributed on an "AS IS" basis,
009: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
010: * for the specific language governing rights and limitations under the License.
011: *
012: * The Original Code is 'iText, a free JAVA-PDF library'.
013: *
014: * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
015: * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
016: * All Rights Reserved.
017: * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
018: * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
019: *
020: * Contributor(s): all the names of the contributors are added in the source code
021: * where applicable.
022: *
023: * Alternatively, the contents of this file may be used under the terms of the
024: * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
025: * provisions of LGPL are applicable instead of those above. If you wish to
026: * allow use of your version of this file only under the terms of the LGPL
027: * License and not to allow others to use your version of this file under
028: * the MPL, indicate your decision by deleting the provisions above and
029: * replace them with the notice and other provisions required by the LGPL.
030: * If you do not delete the provisions above, a recipient may use your version
031: * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
032: *
033: * This library is free software; you can redistribute it and/or modify it
034: * under the terms of the MPL as stated above or under the terms of the GNU
035: * Library General Public License as published by the Free Software Foundation;
036: * either version 2 of the License, or any later version.
037: *
038: * This library is distributed in the hope that it will be useful, but WITHOUT
039: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
040: * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
041: * details.
042: *
043: * If you didn't download this code from the following link, you should check if
044: * you aren't using an obsolete version:
045: * http://www.lowagie.com/iText/
046: */
047:
048: package com.lowagie.text.html.simpleparser;
049:
050: import java.io.File;
051: import java.io.IOException;
052: import java.io.Reader;
053: import java.util.ArrayList;
054: import java.util.HashMap;
055: import java.util.Stack;
056: import java.util.StringTokenizer;
057:
058: import com.lowagie.text.Chunk;
059: import com.lowagie.text.DocListener;
060: import com.lowagie.text.DocumentException;
061: import com.lowagie.text.Element;
062: import com.lowagie.text.ExceptionConverter;
063: import com.lowagie.text.FontFactoryImp;
064: import com.lowagie.text.HeaderFooter;
065: import com.lowagie.text.Image;
066: import com.lowagie.text.ListItem;
067: import com.lowagie.text.Paragraph;
068: import com.lowagie.text.Phrase;
069: import com.lowagie.text.Rectangle;
070: import com.lowagie.text.TextElementArray;
071: import com.lowagie.text.pdf.PdfPTable;
072: import com.lowagie.text.xml.simpleparser.SimpleXMLDocHandler;
073: import com.lowagie.text.xml.simpleparser.SimpleXMLParser;
074:
075: public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
076:
077: protected ArrayList objectList;
078: protected DocListener document;
079: private Paragraph currentParagraph;
080: private ChainedProperties cprops = new ChainedProperties();
081: private Stack stack = new Stack();
082: private boolean pendingTR = false;
083: private boolean pendingTD = false;
084: private boolean pendingLI = false;
085: private StyleSheet style = new StyleSheet();
086: private boolean isPRE = false;
087: private Stack tableState = new Stack();
088: private boolean skipText = false;
089: private HashMap interfaceProps;
090: private FactoryProperties factoryProperties = new FactoryProperties();
091:
092: /** Creates a new instance of HTMLWorker */
093: public HTMLWorker(DocListener document) {
094: this .document = document;
095: }
096:
097: public void setStyleSheet(StyleSheet style) {
098: this .style = style;
099: }
100:
101: public StyleSheet getStyleSheet() {
102: return style;
103: }
104:
105: public void setInterfaceProps(HashMap interfaceProps) {
106: this .interfaceProps = interfaceProps;
107: FontFactoryImp ff = null;
108: if (interfaceProps != null)
109: ff = (FontFactoryImp) interfaceProps.get("font_factory");
110: if (ff != null)
111: factoryProperties.setFontImp(ff);
112: }
113:
114: public HashMap getInterfaceProps() {
115: return interfaceProps;
116: }
117:
118: public void parse(Reader reader) throws IOException {
119: SimpleXMLParser.parse(this , null, reader, true);
120: }
121:
122: public static ArrayList parseToList(Reader reader, StyleSheet style)
123: throws IOException {
124: return parseToList(reader, style, null);
125: }
126:
127: public static ArrayList parseToList(Reader reader,
128: StyleSheet style, HashMap interfaceProps)
129: throws IOException {
130: HTMLWorker worker = new HTMLWorker(null);
131: if (style != null)
132: worker.style = style;
133: worker.document = worker;
134: worker.setInterfaceProps(interfaceProps);
135: worker.objectList = new ArrayList();
136: worker.parse(reader);
137: return worker.objectList;
138: }
139:
140: public void endDocument() {
141: try {
142: for (int k = 0; k < stack.size(); ++k)
143: document.add((Element) stack.elementAt(k));
144: if (currentParagraph != null)
145: document.add(currentParagraph);
146: currentParagraph = null;
147: } catch (Exception e) {
148: throw new ExceptionConverter(e);
149: }
150: }
151:
152: public void startDocument() {
153: HashMap h = new HashMap();
154: style.applyStyle("body", h);
155: cprops.addToChain("body", h);
156: }
157:
158: public void startElement(String tag, HashMap h) {
159: if (!tagsSupported.containsKey(tag))
160: return;
161: try {
162: style.applyStyle(tag, h);
163: String follow = (String) FactoryProperties.followTags
164: .get(tag);
165: if (follow != null) {
166: HashMap prop = new HashMap();
167: prop.put(follow, null);
168: cprops.addToChain(follow, prop);
169: return;
170: }
171: FactoryProperties.insertStyle(h);
172: if (tag.equals("a")) {
173: cprops.addToChain(tag, h);
174: if (currentParagraph == null)
175: currentParagraph = new Paragraph();
176: stack.push(currentParagraph);
177: currentParagraph = new Paragraph();
178: return;
179: }
180: if (tag.equals("br")) {
181: if (currentParagraph == null)
182: currentParagraph = new Paragraph();
183: currentParagraph.add(factoryProperties.createChunk(
184: "\n", cprops));
185: return;
186: }
187: if (tag.equals("font") || tag.equals("span")) {
188: cprops.addToChain(tag, h);
189: return;
190: }
191: if (tag.equals("img")) {
192: String src = (String) h.get("src");
193: if (src == null)
194: return;
195: cprops.addToChain(tag, h);
196: Image img = null;
197: if (interfaceProps != null) {
198: ImageProvider ip = (ImageProvider) interfaceProps
199: .get("img_provider");
200: if (ip != null)
201: img = ip.getImage(src, h, cprops, document);
202: if (img == null) {
203: HashMap images = (HashMap) interfaceProps
204: .get("img_static");
205: if (images != null) {
206: Image tim = (Image) images.get(src);
207: if (tim != null)
208: img = Image.getInstance(tim);
209: } else {
210: if (!src.startsWith("http")) { // relative src references only
211: String baseurl = (String) interfaceProps
212: .get("img_baseurl");
213: if (baseurl != null) {
214: src = baseurl + src;
215: img = Image.getInstance(src);
216: }
217: }
218: }
219: }
220: }
221: if (img == null) {
222: if (!src.startsWith("http")) {
223: String path = cprops.getProperty("image_path");
224: if (path == null)
225: path = "";
226: src = new File(path, src).getPath();
227: }
228: img = Image.getInstance(src);
229: }
230: String align = (String) h.get("align");
231: String width = (String) h.get("width");
232: String height = (String) h.get("height");
233: String before = cprops.getProperty("before");
234: String after = cprops.getProperty("after");
235: if (before != null)
236: img.setSpacingBefore(Float.parseFloat(before));
237: if (after != null)
238: img.setSpacingAfter(Float.parseFloat(after));
239: float wp = lengthParse(width, (int) img.getWidth());
240: float lp = lengthParse(height, (int) img.getHeight());
241: if (wp > 0 && lp > 0)
242: img.scalePercent(wp > lp ? lp : wp);
243: else if (wp > 0)
244: img.scalePercent(wp);
245: else if (lp > 0)
246: img.scalePercent(lp);
247: img.setWidthPercentage(0);
248: if (align != null) {
249: endElement("p");
250: int ralign = Image.MIDDLE;
251: if (align.equalsIgnoreCase("left"))
252: ralign = Image.LEFT;
253: else if (align.equalsIgnoreCase("right"))
254: ralign = Image.RIGHT;
255: img.setAlignment(ralign);
256: Img i = null;
257: boolean skip = false;
258: if (interfaceProps != null) {
259: i = (Img) interfaceProps.get("img_interface");
260: if (i != null)
261: skip = i.process(img, h, cprops, document);
262: }
263: if (!skip)
264: document.add(img);
265: cprops.removeChain(tag);
266: } else {
267: cprops.removeChain(tag);
268: if (currentParagraph == null)
269: currentParagraph = FactoryProperties
270: .createParagraph(cprops);
271: currentParagraph.add(new Chunk(img, 0, 0));
272: }
273: return;
274: }
275: endElement("p");
276: if (tag.equals("h1") || tag.equals("h2")
277: || tag.equals("h3") || tag.equals("h4")
278: || tag.equals("h5") || tag.equals("h6")) {
279: if (!h.containsKey("size")) {
280: int v = 7 - Integer.parseInt(tag.substring(1));
281: h.put("size", Integer.toString(v));
282: }
283: cprops.addToChain(tag, h);
284: return;
285: }
286: if (tag.equals("ul")) {
287: if (pendingLI)
288: endElement("li");
289: skipText = true;
290: cprops.addToChain(tag, h);
291: com.lowagie.text.List list = new com.lowagie.text.List(
292: false, 10);
293: list.setListSymbol("\u2022");
294: stack.push(list);
295: return;
296: }
297: if (tag.equals("ol")) {
298: if (pendingLI)
299: endElement("li");
300: skipText = true;
301: cprops.addToChain(tag, h);
302: com.lowagie.text.List list = new com.lowagie.text.List(
303: true, 10);
304: stack.push(list);
305: return;
306: }
307: if (tag.equals("li")) {
308: if (pendingLI)
309: endElement("li");
310: skipText = false;
311: pendingLI = true;
312: cprops.addToChain(tag, h);
313: stack.push(FactoryProperties.createListItem(cprops));
314: return;
315: }
316: if (tag.equals("div") || tag.equals("body")) {
317: cprops.addToChain(tag, h);
318: return;
319: }
320: if (tag.equals("pre")) {
321: if (!h.containsKey("face")) {
322: h.put("face", "Courier");
323: }
324: cprops.addToChain(tag, h);
325: isPRE = true;
326: return;
327: }
328: if (tag.equals("p")) {
329: cprops.addToChain(tag, h);
330: currentParagraph = FactoryProperties.createParagraph(h);
331: return;
332: }
333: if (tag.equals("tr")) {
334: if (pendingTR)
335: endElement("tr");
336: skipText = true;
337: pendingTR = true;
338: cprops.addToChain("tr", h);
339: return;
340: }
341: if (tag.equals("td") || tag.equals("th")) {
342: if (pendingTD)
343: endElement(tag);
344: skipText = false;
345: pendingTD = true;
346: cprops.addToChain("td", h);
347: stack.push(new IncCell(tag, cprops));
348: return;
349: }
350: if (tag.equals("table")) {
351: cprops.addToChain("table", h);
352: IncTable table = new IncTable(h);
353: stack.push(table);
354: tableState.push(new boolean[] { pendingTR, pendingTD });
355: pendingTR = pendingTD = false;
356: skipText = true;
357: return;
358: }
359: } catch (Exception e) {
360: throw new ExceptionConverter(e);
361: }
362: }
363:
364: public void endElement(String tag) {
365: if (!tagsSupported.containsKey(tag))
366: return;
367: try {
368: String follow = (String) FactoryProperties.followTags
369: .get(tag);
370: if (follow != null) {
371: cprops.removeChain(follow);
372: return;
373: }
374: if (tag.equals("font") || tag.equals("span")) {
375: cprops.removeChain(tag);
376: return;
377: }
378: if (tag.equals("a")) {
379: if (currentParagraph == null)
380: currentParagraph = new Paragraph();
381: ALink i = null;
382: boolean skip = false;
383: if (interfaceProps != null) {
384: i = (ALink) interfaceProps.get("alink_interface");
385: if (i != null)
386: skip = i.process(currentParagraph, cprops);
387: }
388: if (!skip) {
389: String href = cprops.getProperty("href");
390: if (href != null) {
391: ArrayList chunks = currentParagraph.getChunks();
392: for (int k = 0; k < chunks.size(); ++k) {
393: Chunk ck = (Chunk) chunks.get(k);
394: ck.setAnchor(href);
395: }
396: }
397: }
398: Paragraph tmp = (Paragraph) stack.pop();
399: Phrase tmp2 = new Phrase();
400: tmp2.add(currentParagraph);
401: tmp.add(tmp2);
402: currentParagraph = tmp;
403: cprops.removeChain("a");
404: return;
405: }
406: if (tag.equals("br")) {
407: return;
408: }
409: if (currentParagraph != null) {
410: if (stack.empty())
411: document.add(currentParagraph);
412: else {
413: Object obj = stack.pop();
414: if (obj instanceof TextElementArray) {
415: TextElementArray current = (TextElementArray) obj;
416: current.add(currentParagraph);
417: }
418: stack.push(obj);
419: }
420: }
421: currentParagraph = null;
422: if (tag.equals("ul") || tag.equals("ol")) {
423: if (pendingLI)
424: endElement("li");
425: skipText = false;
426: cprops.removeChain(tag);
427: if (stack.empty())
428: return;
429: Object obj = stack.pop();
430: if (!(obj instanceof com.lowagie.text.List)) {
431: stack.push(obj);
432: return;
433: }
434: if (stack.empty())
435: document.add((Element) obj);
436: else
437: ((TextElementArray) stack.peek()).add(obj);
438: return;
439: }
440: if (tag.equals("li")) {
441: pendingLI = false;
442: skipText = true;
443: cprops.removeChain(tag);
444: if (stack.empty())
445: return;
446: Object obj = stack.pop();
447: if (!(obj instanceof ListItem)) {
448: stack.push(obj);
449: return;
450: }
451: if (stack.empty()) {
452: document.add((Element) obj);
453: return;
454: }
455: Object list = stack.pop();
456: if (!(list instanceof com.lowagie.text.List)) {
457: stack.push(list);
458: return;
459: }
460: ListItem item = (ListItem) obj;
461: ((com.lowagie.text.List) list).add(item);
462: ArrayList cks = item.getChunks();
463: if (!cks.isEmpty())
464: item.getListSymbol().setFont(
465: ((Chunk) cks.get(0)).getFont());
466: stack.push(list);
467: return;
468: }
469: if (tag.equals("div") || tag.equals("body")) {
470: cprops.removeChain(tag);
471: return;
472: }
473: if (tag.equals("pre")) {
474: cprops.removeChain(tag);
475: isPRE = false;
476: return;
477: }
478: if (tag.equals("p")) {
479: cprops.removeChain(tag);
480: return;
481: }
482: if (tag.equals("h1") || tag.equals("h2")
483: || tag.equals("h3") || tag.equals("h4")
484: || tag.equals("h5") || tag.equals("h6")) {
485: cprops.removeChain(tag);
486: return;
487: }
488: if (tag.equals("table")) {
489: if (pendingTR)
490: endElement("tr");
491: cprops.removeChain("table");
492: IncTable table = (IncTable) stack.pop();
493: PdfPTable tb = table.buildTable();
494: tb.setSplitRows(true);
495: if (stack.empty())
496: document.add(tb);
497: else
498: ((TextElementArray) stack.peek()).add(tb);
499: boolean state[] = (boolean[]) tableState.pop();
500: pendingTR = state[0];
501: pendingTD = state[1];
502: skipText = false;
503: return;
504: }
505: if (tag.equals("tr")) {
506: if (pendingTD)
507: endElement("td");
508: pendingTR = false;
509: cprops.removeChain("tr");
510: ArrayList cells = new ArrayList();
511: IncTable table = null;
512: while (true) {
513: Object obj = stack.pop();
514: if (obj instanceof IncCell) {
515: cells.add(((IncCell) obj).getCell());
516: }
517: if (obj instanceof IncTable) {
518: table = (IncTable) obj;
519: break;
520: }
521: }
522: table.addCols(cells);
523: table.endRow();
524: stack.push(table);
525: skipText = true;
526: return;
527: }
528: if (tag.equals("td") || tag.equals("th")) {
529: pendingTD = false;
530: cprops.removeChain("td");
531: skipText = true;
532: return;
533: }
534: } catch (Exception e) {
535: throw new ExceptionConverter(e);
536: }
537: }
538:
539: public void text(String str) {
540: if (skipText)
541: return;
542: String content = str;
543: if (isPRE) {
544: if (currentParagraph == null)
545: currentParagraph = new Paragraph();
546: currentParagraph.add(factoryProperties.createChunk(content,
547: cprops));
548: return;
549: }
550: if (content.trim().length() == 0 && content.indexOf(' ') < 0) {
551: return;
552: }
553:
554: StringBuffer buf = new StringBuffer();
555: int len = content.length();
556: char character;
557: boolean newline = false;
558: for (int i = 0; i < len; i++) {
559: switch (character = content.charAt(i)) {
560: case ' ':
561: if (!newline) {
562: buf.append(character);
563: }
564: break;
565: case '\n':
566: if (i > 0) {
567: newline = true;
568: buf.append(' ');
569: }
570: break;
571: case '\r':
572: break;
573: case '\t':
574: break;
575: default:
576: newline = false;
577: buf.append(character);
578: }
579: }
580: if (currentParagraph == null)
581: currentParagraph = FactoryProperties
582: .createParagraph(cprops);
583: currentParagraph.add(factoryProperties.createChunk(buf
584: .toString(), cprops));
585: }
586:
587: public boolean add(Element element) throws DocumentException {
588: objectList.add(element);
589: return true;
590: }
591:
592: public void clearTextWrap() throws DocumentException {
593: }
594:
595: public void close() {
596: }
597:
598: public boolean newPage() {
599: return true;
600: }
601:
602: public void open() {
603: }
604:
605: public void resetFooter() {
606: }
607:
608: public void resetHeader() {
609: }
610:
611: public void resetPageCount() {
612: }
613:
614: public void setFooter(HeaderFooter footer) {
615: }
616:
617: public void setHeader(HeaderFooter header) {
618: }
619:
620: public boolean setMarginMirroring(boolean marginMirroring) {
621: return true;
622: }
623:
624: public boolean setMargins(float marginLeft, float marginRight,
625: float marginTop, float marginBottom) {
626: return true;
627: }
628:
629: public void setPageCount(int pageN) {
630: }
631:
632: public boolean setPageSize(Rectangle pageSize) {
633: return true;
634: }
635:
636: public static final String tagsSupportedString = "ol ul li a pre font span br p div body table td th tr i b u sub sup em strong s strike"
637: + " h1 h2 h3 h4 h5 h6 img";
638:
639: public static final HashMap tagsSupported = new HashMap();
640:
641: static {
642: StringTokenizer tok = new StringTokenizer(tagsSupportedString);
643: while (tok.hasMoreTokens())
644: tagsSupported.put(tok.nextToken(), null);
645: }
646:
647: private static float lengthParse(String txt, int c) {
648: if (txt == null)
649: return -1;
650: if (txt.endsWith("%")) {
651: float vf = Float.parseFloat(txt.substring(0,
652: txt.length() - 1));
653: return vf;
654: }
655: int v = Integer.parseInt(txt);
656: return (float) v / c * 100f;
657: }
658: }
|