001: // TableToken.java
002: // ---------
003: // part of YaCy
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2007
007: // Created 22.02.2007
008: //
009: // This file is contributed by Franz Brausze
010: //
011: // $LastChangedDate: $
012: // $LastChangedRevision: $
013: // $LastChangedBy: $
014: //
015: // This program is free software; you can redistribute it and/or modify
016: // it under the terms of the GNU General Public License as published by
017: // the Free Software Foundation; either version 2 of the License, or
018: // (at your option) any later version.
019: //
020: // This program is distributed in the hope that it will be useful,
021: // but WITHOUT ANY WARRANTY; without even the implied warranty of
022: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
023: // GNU General Public License for more details.
024: //
025: // You should have received a copy of the GNU General Public License
026: // along with this program; if not, write to the Free Software
027: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
028: //
029: // Using this software in any meaning (reading, learning, copying, compiling,
030: // running) means that you agree that the Author(s) is (are) not responsible
031: // for cost, loss of data or any harm that may be caused directly or indirectly
032: // by usage of this softare or this documentation. The usage of this software
033: // is on your own risk. The installation and usage (starting/running) of this
034: // software may allow other people or application to access your computer and
035: // any attached devices and is highly dependent on the configuration of the
036: // software which must be done by the user of the software; the author(s) is
037: // (are) also not responsible for proper configuration and usage of the
038: // software, even if provoked by documentation provided together with
039: // the software.
040: //
041: // Any changes to this file according to the GPL as documented in the file
042: // gpl.txt aside this file in the shipment you received can be done to the
043: // lines that follows this copyright notice here, but changes must not be
044: // done inside the copyright notive above. A re-distribution must contain
045: // the intact and unchanged copyright notice.
046: // Contributions and changes to the program code must be marked as such.
047:
048: package de.anomic.data.wiki.tokens;
049:
050: import java.util.Arrays;
051: import java.util.HashMap;
052: import java.util.regex.Pattern;
053:
054: public class TableToken extends AbstractToken {
055:
056: private static final Pattern[] pattern = new Pattern[] { Pattern
057: .compile("\\{\\|" + // "{|"
058: "([^\n]|\n\\|[|-])*\n" + // new line must start with "||" or "|-"
059: "\\|\\}") // "|}"
060: };
061: private static final String[] blockElementNames = new String[] {
062: "table", "tr", "td" };
063:
064: protected void parse() {
065: String[] t = text.split("\n");
066: String[] tds;
067: StringBuffer sb = new StringBuffer();
068: sb.append("<table");
069: if (t[0].length() > 2)
070: sb.append(parseTableProperties(t[0].substring(2)));
071: sb.append(">\n");
072: boolean trOpen = false;
073: for (int i = 1, j, a; i < t.length - 1; i++) {
074: if (t[i].startsWith("|-")) {
075: if (trOpen)
076: sb.append("\t</tr>\n");
077: if (trOpen = (i < t.length - 2))
078: sb.append("\t<tr>\n");
079: } else if (t[i].startsWith("||")) {
080: tds = t[i].split("\\|\\|");
081: for (j = 0; j < tds.length; j++) {
082: if (tds[j].length() > (a = tds[j].indexOf('|')) + 1) { // don't print empty td's
083: sb.append("\t\t<td");
084: if (a > -1)
085: sb.append(parseTableProperties(tds[j]
086: .substring(0, a)));
087: sb.append(">").append(tds[j].substring(a + 1))
088: .append("</td>\n");
089: }
090: }
091: }
092: }
093: if (trOpen)
094: sb.append("\t</tr>\n");
095: this .markup = new String(sb.append("</table>"));
096: this .parsed = true;
097: }
098:
099: // from de.anomic.data.wikiCode.java.parseTableProperties, modified by [FB]
100: private static final String[] tps = { "rowspan", "colspan",
101: "vspace", "hspace", "cellspacing", "cellpadding", "border" };
102: private static final HashMap<String, String[]> ps = new HashMap<String, String[]>();
103: static {
104: Arrays.sort(tps);
105: String[] array;
106: Arrays.sort(array = new String[] { "void", "above", "below",
107: "hsides", "lhs", "rhs", "vsides", "box", "border" });
108: ps.put("frame", array);
109: Arrays.sort(array = new String[] { "none", "groups", "rows",
110: "cols", "all" });
111: ps.put("rules", array);
112: Arrays.sort(array = new String[] { "top", "middle", "bottom",
113: "baseline" });
114: ps.put("valign", array);
115: Arrays.sort(array = new String[] { "left", "right", "center" });
116: ps.put("align", array);
117: }
118:
119: // contributed by [MN]
120: /** This method takes possible table properties and tests if they are valid.
121: * Valid in this case means if they are a property for the table, tr or td
122: * tag as stated in the HTML Pocket Reference by Jennifer Niederst (1st edition)
123: * The method is important to avoid XSS attacks on the wiki via table properties.
124: * @param properties A string that may contain several table properties and/or junk.
125: * @return A string that only contains table properties.
126: */
127: private static StringBuffer parseTableProperties(
128: final String properties) {
129: String[] values = properties.replaceAll(""", "").split(
130: "[= ]"); //splitting the string at = and blanks
131: StringBuffer sb = new StringBuffer(properties.length());
132: String key, value;
133: String[] posVals;
134: int numberofvalues = values.length;
135: for (int i = 0; i < numberofvalues; i++) {
136: key = values[i].trim();
137: if (key.equals("nowrap")) {
138: addPair("nowrap", "nowrap", sb);
139: } else if (i + 1 < numberofvalues) {
140: value = values[++i].trim();
141: if ((key.equals("summary"))
142: || (key.equals("bgcolor") && value
143: .matches("#{0,1}[0-9a-fA-F]{1,6}|[a-zA-Z]{3,}"))
144: || ((key.equals("width") || key
145: .equals("height")) && value
146: .matches("\\d+%{0,1}"))
147: || ((posVals = (String[]) ps.get(key)) != null && Arrays
148: .binarySearch(posVals, value) >= 0)
149: || (Arrays.binarySearch(tps, key) >= 0 && value
150: .matches("\\d+"))) {
151: addPair(key, value, sb);
152: }
153: }
154: }
155: return sb;
156: }
157:
158: private static StringBuffer addPair(String key, String value,
159: StringBuffer sb) {
160: return sb.append(" ").append(key).append("=\"").append(value)
161: .append("\"");
162: }
163:
164: public Pattern[] getRegex() {
165: return pattern;
166: }
167:
168: public String[] getBlockElementNames() {
169: return blockElementNames;
170: }
171:
172: public boolean setText(String text, int patternNr) {
173: this .text = text;
174: this .parsed = false;
175: this .markup = null;
176: return true;
177: }
178: }
|