001: /*
002: * Copyright 2005 by Paulo Soares.
003: *
004: * The contents of this file are subject to the Mozilla Public License Version 1.1
005: * (the "License"); you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at http://www.mozilla.org/MPL/
007: *
008: * Software distributed under the License is distributed on an "AS IS" basis,
009: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
010: * for the specific language governing rights and limitations under the License.
011: *
012: * The Original Code is 'iText, a free JAVA-PDF library'.
013: *
014: * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
015: * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
016: * All Rights Reserved.
017: * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
018: * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
019: *
020: * Contributor(s): all the names of the contributors are added in the source code
021: * where applicable.
022: *
023: * Alternatively, the contents of this file may be used under the terms of the
024: * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
025: * provisions of LGPL are applicable instead of those above. If you wish to
026: * allow use of your version of this file only under the terms of the LGPL
027: * License and not to allow others to use your version of this file under
028: * the MPL, indicate your decision by deleting the provisions above and
029: * replace them with the notice and other provisions required by the LGPL.
030: * If you do not delete the provisions above, a recipient may use your version
031: * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
032: *
033: * This library is free software; you can redistribute it and/or modify it
034: * under the terms of the MPL as stated above or under the terms of the GNU
035: * Library General Public License as published by the Free Software Foundation;
036: * either version 2 of the License, or any later version.
037: *
038: * This library is distributed in the hope that it will be useful, but WITHOUT
039: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
040: * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
041: * details.
042: *
043: * If you didn't download this code from the following link, you should check if
044: * you aren't using an obsolete version:
045: * http://www.lowagie.com/iText/
046: */
047:
048: package com.lowagie.text.pdf.hyphenation;
049:
050: import java.io.FileInputStream;
051: import java.io.IOException;
052: import java.io.InputStream;
053: import java.util.ArrayList;
054: import java.util.StringTokenizer;
055:
056: import com.lowagie.text.ExceptionConverter;
057: import com.lowagie.text.xml.simpleparser.SimpleXMLDocHandler;
058: import com.lowagie.text.xml.simpleparser.SimpleXMLParser;
059:
060: /** Parses the xml hyphenation pattern.
061: *
062: * @author Paulo Soares (psoares@consiste.pt)
063: */
064: public class SimplePatternParser implements SimpleXMLDocHandler,
065: PatternConsumer {
066: int currElement;
067: PatternConsumer consumer;
068: StringBuffer token;
069: ArrayList exception;
070: char hyphenChar;
071: SimpleXMLParser parser;
072:
073: static final int ELEM_CLASSES = 1;
074: static final int ELEM_EXCEPTIONS = 2;
075: static final int ELEM_PATTERNS = 3;
076: static final int ELEM_HYPHEN = 4;
077:
078: /** Creates a new instance of PatternParser2 */
079: public SimplePatternParser() {
080: token = new StringBuffer();
081: hyphenChar = '-'; // default
082: }
083:
084: public void parse(InputStream stream, PatternConsumer consumer) {
085: this .consumer = consumer;
086: try {
087: SimpleXMLParser.parse(this , stream);
088: } catch (IOException e) {
089: throw new ExceptionConverter(e);
090: } finally {
091: try {
092: stream.close();
093: } catch (Exception e) {
094: }
095: }
096: }
097:
098: protected static String getPattern(String word) {
099: StringBuffer pat = new StringBuffer();
100: int len = word.length();
101: for (int i = 0; i < len; i++) {
102: if (!Character.isDigit(word.charAt(i))) {
103: pat.append(word.charAt(i));
104: }
105: }
106: return pat.toString();
107: }
108:
109: protected ArrayList normalizeException(ArrayList ex) {
110: ArrayList res = new ArrayList();
111: for (int i = 0; i < ex.size(); i++) {
112: Object item = ex.get(i);
113: if (item instanceof String) {
114: String str = (String) item;
115: StringBuffer buf = new StringBuffer();
116: for (int j = 0; j < str.length(); j++) {
117: char c = str.charAt(j);
118: if (c != hyphenChar) {
119: buf.append(c);
120: } else {
121: res.add(buf.toString());
122: buf.setLength(0);
123: char[] h = new char[1];
124: h[0] = hyphenChar;
125: // we use here hyphenChar which is not necessarily
126: // the one to be printed
127: res.add(new Hyphen(new String(h), null, null));
128: }
129: }
130: if (buf.length() > 0) {
131: res.add(buf.toString());
132: }
133: } else {
134: res.add(item);
135: }
136: }
137: return res;
138: }
139:
140: protected String getExceptionWord(ArrayList ex) {
141: StringBuffer res = new StringBuffer();
142: for (int i = 0; i < ex.size(); i++) {
143: Object item = ex.get(i);
144: if (item instanceof String) {
145: res.append((String) item);
146: } else {
147: if (((Hyphen) item).noBreak != null) {
148: res.append(((Hyphen) item).noBreak);
149: }
150: }
151: }
152: return res.toString();
153: }
154:
155: protected static String getInterletterValues(String pat) {
156: StringBuffer il = new StringBuffer();
157: String word = pat + "a"; // add dummy letter to serve as sentinel
158: int len = word.length();
159: for (int i = 0; i < len; i++) {
160: char c = word.charAt(i);
161: if (Character.isDigit(c)) {
162: il.append(c);
163: i++;
164: } else {
165: il.append('0');
166: }
167: }
168: return il.toString();
169: }
170:
171: public void endDocument() {
172: }
173:
174: public void endElement(String tag) {
175: if (token.length() > 0) {
176: String word = token.toString();
177: switch (currElement) {
178: case ELEM_CLASSES:
179: consumer.addClass(word);
180: break;
181: case ELEM_EXCEPTIONS:
182: exception.add(word);
183: exception = normalizeException(exception);
184: consumer.addException(getExceptionWord(exception),
185: (ArrayList) exception.clone());
186: break;
187: case ELEM_PATTERNS:
188: consumer.addPattern(getPattern(word),
189: getInterletterValues(word));
190: break;
191: case ELEM_HYPHEN:
192: // nothing to do
193: break;
194: }
195: if (currElement != ELEM_HYPHEN) {
196: token.setLength(0);
197: }
198: }
199: if (currElement == ELEM_HYPHEN) {
200: currElement = ELEM_EXCEPTIONS;
201: } else {
202: currElement = 0;
203: }
204: }
205:
206: public void startDocument() {
207: }
208:
209: public void startElement(String tag, java.util.HashMap h) {
210: if (tag.equals("hyphen-char")) {
211: String hh = (String) h.get("value");
212: if (hh != null && hh.length() == 1) {
213: hyphenChar = hh.charAt(0);
214: }
215: } else if (tag.equals("classes")) {
216: currElement = ELEM_CLASSES;
217: } else if (tag.equals("patterns")) {
218: currElement = ELEM_PATTERNS;
219: } else if (tag.equals("exceptions")) {
220: currElement = ELEM_EXCEPTIONS;
221: exception = new ArrayList();
222: } else if (tag.equals("hyphen")) {
223: if (token.length() > 0) {
224: exception.add(token.toString());
225: }
226: exception.add(new Hyphen((String) h.get("pre"), (String) h
227: .get("no"), (String) h.get("post")));
228: currElement = ELEM_HYPHEN;
229: }
230: token.setLength(0);
231: }
232:
233: public void text(String str) {
234: StringTokenizer tk = new StringTokenizer(str);
235: while (tk.hasMoreTokens()) {
236: String word = tk.nextToken();
237: // System.out.println("\"" + word + "\"");
238: switch (currElement) {
239: case ELEM_CLASSES:
240: consumer.addClass(word);
241: break;
242: case ELEM_EXCEPTIONS:
243: exception.add(word);
244: exception = normalizeException(exception);
245: consumer.addException(getExceptionWord(exception),
246: (ArrayList) exception.clone());
247: exception.clear();
248: break;
249: case ELEM_PATTERNS:
250: consumer.addPattern(getPattern(word),
251: getInterletterValues(word));
252: break;
253: }
254: }
255: }
256:
257: // PatternConsumer implementation for testing purposes
258: public void addClass(String c) {
259: System.out.println("class: " + c);
260: }
261:
262: public void addException(String w, ArrayList e) {
263: System.out.println("exception: " + w + " : " + e.toString());
264: }
265:
266: public void addPattern(String p, String v) {
267: System.out.println("pattern: " + p + " : " + v);
268: }
269:
270: public static void main(String[] args) throws Exception {
271: try {
272: if (args.length > 0) {
273: SimplePatternParser pp = new SimplePatternParser();
274: pp.parse(new FileInputStream(args[0]), pp);
275: }
276: } catch (Exception e) {
277: e.printStackTrace();
278: }
279: }
280: }
|