001: package fit;
002:
003: // Copyright (c) 2002 Cunningham & Cunningham, Inc.
004: // Released under the terms of the GNU General Public License version 2 or later.
005:
006: import java.io.*;
007: import java.text.ParseException;
008:
009: public class Parse {
010:
011: public String leader;
012: public String tag;
013: public String body;
014: public String end;
015: public String trailer;
016:
017: public Parse more;
018: public Parse parts;
019:
020: public Parse(String tag, String body, Parse parts, Parse more) {
021: this .leader = "\n";
022: this .tag = "<" + tag + ">";
023: this .body = body;
024: this .end = "</" + tag + ">";
025: this .trailer = "";
026: this .parts = parts;
027: this .more = more;
028: }
029:
030: public static String tags[] = { "table", "tr", "td" };
031:
032: public Parse(String text) throws ParseException {
033: this (text, tags, 0, 0);
034: }
035:
036: public Parse(String text, String tags[]) throws ParseException {
037: this (text, tags, 0, 0);
038: }
039:
040: public Parse(String text, String tags[], int level, int offset)
041: throws ParseException {
042: String lc = text.toLowerCase();
043: int startTag = lc.indexOf("<" + tags[level]);
044: int endTag = lc.indexOf(">", startTag) + 1;
045: // int startEnd = lc.indexOf("</"+tags[level], endTag);
046: int startEnd = findMatchingEndTag(lc, endTag, tags[level],
047: offset);
048: int endEnd = lc.indexOf(">", startEnd) + 1;
049: int startMore = lc.indexOf("<" + tags[level], endEnd);
050: if (startTag < 0 || endTag < 0 || startEnd < 0 || endEnd < 0) {
051: throw new ParseException("Can't find tag: " + tags[level],
052: offset);
053: }
054:
055: leader = text.substring(0, startTag);
056: tag = text.substring(startTag, endTag);
057: body = text.substring(endTag, startEnd);
058: end = text.substring(startEnd, endEnd);
059: trailer = text.substring(endEnd);
060:
061: if (level + 1 < tags.length) {
062: parts = new Parse(body, tags, level + 1, offset + endTag);
063: body = null;
064: } else { // Check for nested table
065: int index = body.indexOf("<" + tags[0]);
066: if (index >= 0) {
067: parts = new Parse(body, tags, 0, offset + endTag);
068: body = "";
069: }
070: }
071:
072: if (startMore >= 0) {
073: more = new Parse(trailer, tags, level, offset + endEnd);
074: trailer = null;
075: }
076: }
077:
078: /* Added by Rick Mugridge, Feb 2005 */
079: protected static int findMatchingEndTag(String lc,
080: int matchFromHere, String tag, int offset)
081: throws ParseException {
082: int fromHere = matchFromHere;
083: int count = 1;
084: int startEnd = 0;
085: while (count > 0) {
086: int embeddedTag = lc.indexOf("<" + tag, fromHere);
087: int embeddedTagEnd = lc.indexOf("</" + tag, fromHere);
088: // Which one is closer?
089: if (embeddedTag < 0 && embeddedTagEnd < 0)
090: throw new ParseException("Can't find tag: " + tag,
091: offset);
092: if (embeddedTag < 0)
093: embeddedTag = Integer.MAX_VALUE;
094: if (embeddedTagEnd < 0)
095: embeddedTagEnd = Integer.MAX_VALUE;
096: if (embeddedTag < embeddedTagEnd) {
097: count++;
098: startEnd = embeddedTag;
099: fromHere = lc.indexOf(">", embeddedTag) + 1;
100: } else if (embeddedTagEnd < embeddedTag) {
101: count--;
102: startEnd = embeddedTagEnd;
103: fromHere = lc.indexOf(">", embeddedTagEnd) + 1;
104: }
105: }
106: return startEnd;
107: }
108:
109: public int size() {
110: return more == null ? 1 : more.size() + 1;
111: }
112:
113: public Parse last() {
114: return more == null ? this : more.last();
115: }
116:
117: public Parse leaf() {
118: return parts == null ? this : parts.leaf();
119: }
120:
121: public Parse at(int i) {
122: return i == 0 || more == null ? this : more.at(i - 1);
123: }
124:
125: public Parse at(int i, int j) {
126: return at(i).parts.at(j);
127: }
128:
129: public Parse at(int i, int j, int k) {
130: return at(i, j).parts.at(k);
131: }
132:
133: public String text() {
134: return htmlToText(body);
135: }
136:
137: public static String htmlToText(String s) {
138: s = normalizeLineBreaks(s);
139: s = removeNonBreakTags(s);
140: s = condenseWhitespace(s);
141: s = unescape(s);
142: return s;
143: }
144:
145: private static String removeNonBreakTags(String s) {
146: int i = 0, j;
147: while ((i = s.indexOf('<', i)) >= 0) {
148: if ((j = s.indexOf('>', i + 1)) > 0) {
149: if (!(s.substring(i, j + 1).equals("<br />"))) {
150: s = s.substring(0, i) + s.substring(j + 1);
151: } else
152: i++;
153: } else
154: break;
155: }
156: return s;
157: }
158:
159: public static String unescape(String s) {
160: s = s.replaceAll("<br />", "\n");
161: s = unescapeEntities(s);
162: s = unescapeSmartQuotes(s);
163: return s;
164: }
165:
166: private static String unescapeSmartQuotes(String s) {
167: s = s.replace('\u201c', '"');
168: s = s.replace('\u201d', '"');
169: s = s.replace('\u2018', '\'');
170: s = s.replace('\u2019', '\'');
171: return s;
172: }
173:
174: private static String unescapeEntities(String s) {
175: s = s.replaceAll("<", "<");
176: s = s.replaceAll(">", ">");
177: s = s.replaceAll(" ", " ");
178: s = s.replaceAll(""", "\"");
179: s = s.replaceAll("&", "&");
180: return s;
181: }
182:
183: private static String normalizeLineBreaks(String s) {
184: s = s.replaceAll("<\\s*br\\s*/?\\s*>", "<br />");
185: s = s
186: .replaceAll("<\\s*/\\s*p\\s*>\\s*<\\s*p( .*?)?>",
187: "<br />");
188: return s;
189: }
190:
191: public static String condenseWhitespace(String s) {
192: final char NON_BREAKING_SPACE = (char) 160;
193:
194: s = s.replaceAll("\\s+", " ");
195: s = s.replace(NON_BREAKING_SPACE, ' ');
196: s = s.replaceAll(" ", " ");
197: s = s.trim();
198: return s;
199: }
200:
201: public void addToTag(String text) {
202: int last = tag.length() - 1;
203: tag = tag.substring(0, last) + text + ">";
204: }
205:
206: public void addToBody(String text) {
207: body = body + text;
208: }
209:
210: public void print(PrintWriter out) {
211: out.print(leader);
212: out.print(tag);
213: if (parts != null) {
214: parts.print(out);
215: } else {
216: out.print(body);
217: }
218: out.print(end);
219: if (more != null) {
220: more.print(out);
221: } else {
222: out.print(trailer);
223: }
224: }
225:
226: public static int footnoteFiles = 0;
227:
228: public String footnote() {
229: if (footnoteFiles >= 25) {
230: return "[-]";
231: } else {
232: try {
233: int this Footnote = ++footnoteFiles;
234: String html = "footnotes/" + this Footnote + ".html";
235: File file = new File("Reports/" + html);
236: file.delete();
237: PrintWriter output = new PrintWriter(
238: new BufferedWriter(new FileWriter(file)));
239: print(output);
240: output.close();
241: return "<a href=/fit/Release/Reports/" + html + "> ["
242: + this Footnote + "]</a>";
243: } catch (IOException e) {
244: return "[!]";
245: }
246: }
247: }
248: }
|