001: // SimpleToken.java
002: // ---------
003: // part of YaCy
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2007
007: // Created 22.02.2007
008: //
009: // This file is contributed by Franz Brausze
010: //
011: // $LastChangedDate: $
012: // $LastChangedRevision: $
013: // $LastChangedBy: $
014: //
015: // This program is free software; you can redistribute it and/or modify
016: // it under the terms of the GNU General Public License as published by
017: // the Free Software Foundation; either version 2 of the License, or
018: // (at your option) any later version.
019: //
020: // This program is distributed in the hope that it will be useful,
021: // but WITHOUT ANY WARRANTY; without even the implied warranty of
022: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
023: // GNU General Public License for more details.
024: //
025: // You should have received a copy of the GNU General Public License
026: // along with this program; if not, write to the Free Software
027: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
028: //
029: // Using this software in any meaning (reading, learning, copying, compiling,
030: // running) means that you agree that the Author(s) is (are) not responsible
031: // for cost, loss of data or any harm that may be caused directly or indirectly
032: // by usage of this softare or this documentation. The usage of this software
033: // is on your own risk. The installation and usage (starting/running) of this
034: // software may allow other people or application to access your computer and
035: // any attached devices and is highly dependent on the configuration of the
036: // software which must be done by the user of the software; the author(s) is
037: // (are) also not responsible for proper configuration and usage of the
038: // software, even if provoked by documentation provided together with
039: // the software.
040: //
041: // Any changes to this file according to the GPL as documented in the file
042: // gpl.txt aside this file in the shipment you received can be done to the
043: // lines that follows this copyright notice here, but changes must not be
044: // done inside the copyright notive above. A re-distribution must contain
045: // the intact and unchanged copyright notice.
046: // Contributions and changes to the program code must be marked as such.
047:
048: package de.anomic.data.wiki.tokens;
049:
050: import java.util.ArrayList;
051: import java.util.regex.Matcher;
052: import java.util.regex.Pattern;
053:
054: import de.anomic.data.wiki.wikiParserException;
055:
056: public class SimpleToken extends AbstractToken {
057:
058: protected String content = null;
059: protected int grade = 0;
060:
061: protected final Pattern[] pattern;
062: private final String[][] definitionList;
063: private final String[] blockElements;
064:
065: public SimpleToken(char firstChar, char lastChar,
066: String[][] definitionList, boolean isBlockElements) {
067: this .definitionList = definitionList;
068: int i;
069: if (isBlockElements) {
070: ArrayList<String> r = new ArrayList<String>();
071: int j;
072: for (i = 0; i < definitionList.length; i++)
073: if (definitionList[i] != null)
074: for (j = 0; j < definitionList[i].length; j++)
075: r.add(definitionList[i][j]);
076: this .blockElements = (String[]) r.toArray(new String[r
077: .size()]);
078: } else {
079: this .blockElements = null;
080: }
081:
082: for (i = 0; i < definitionList.length; i++)
083: if (definitionList[i] != null) {
084: i++;
085: break;
086: }
087: this .pattern = new Pattern[] { Pattern.compile("([\\"
088: + firstChar + "]{" + i + "," + definitionList.length
089: + "})" + "(.*?)" + "([\\" + lastChar + "]{" + i + ","
090: + definitionList.length + "})") };
091: }
092:
093: public String getMarkup() throws wikiParserException {
094: if (this .content == null) {
095: if (this .text == null) {
096: throw new IllegalArgumentException();
097: } else {
098: setText(this .text, 0);
099: }
100: }
101: if (!this .parsed)
102: parse();
103: return this .markup;
104: }
105:
106: protected void parse() throws wikiParserException {
107: String[] e;
108: if (this .grade >= this .definitionList.length
109: || (e = this .definitionList[this .grade]) == null)
110: throw new wikiParserException(
111: "Token not defined for grade: " + this .grade);
112: this .markup = getMarkup(e);
113: this .parsed = true;
114: }
115:
116: protected String getMarkup(String[] es) {
117: return getMarkup(es, false) + this .content
118: + getMarkup(es, true);
119: }
120:
121: protected String getMarkup(String[] es, boolean closing) {
122: StringBuffer result = new StringBuffer();
123: // backwards if closing
124: for (int i = (closing) ? es.length - 1 : 0, j; (closing && i >= 0)
125: ^ (!closing && i < es.length); i += (closing) ? -1 : +1) {
126: result.append("<");
127: if (closing) {
128: result.append("/");
129: if ((j = es[i].indexOf(' ')) > -1) {
130: result.append(es[i].substring(0, j));
131: } else {
132: result.append(es[i]);
133: }
134: } else {
135: result.append(es[i]);
136: }
137: result.append(">");
138: }
139: return new String(result);
140: }
141:
142: public boolean setText(String text, int patternNr) {
143: this .text = text;
144: this .markup = null;
145: this .parsed = false;
146: if (text != null) {
147: Matcher m = getRegex()[0].matcher(text);
148: if ((m.matches())
149: && (m.group(1).length() == m.group(3).length())
150: && (definitionList.length >= m.group(1).length())
151: && (definitionList[m.group(1).length() - 1] != null)) {
152: this .grade = m.group(1).length() - 1;
153: this .content = m.group(2);
154: return true;
155: }
156: }
157: return false;
158: }
159:
160: public Pattern[] getRegex() {
161: return this .pattern;
162: }
163:
164: public String[] getBlockElementNames() {
165: return this.blockElements;
166: }
167: }
|