01: /* Copyright 2004 Ryan Ackley
02: *
03: * Licensed under the Apache License, Version 2.0 (the "License");
04: * you may not use this file except in compliance with the License.
05: * You may obtain a copy of the License at
06: *
07: * http://www.apache.org/licenses/LICENSE-2.0
08: *
09: * Unless required by applicable law or agreed to in writing, software
10: * distributed under the License is distributed on an "AS IS" BASIS,
11: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12: * See the License for the specific language governing permissions and
13: * limitations under the License.
14: */
15: package org.textmining.text.extraction;
16:
17: /**
18: * This class acts as a StringBuffer for text from a word document. It allows
19: * processing of character before they
20: * @author Ryan Ackley
21: * @version 1.0
22: */
23: public class WordTextBuffer {
24:
25: StringBuffer _buf;
26:
27: public boolean _hold;
28:
29: char lastDelimiter = 0x14;
30:
31: StringBuffer last = new StringBuffer();
32: int inside = 0;
33:
34: public WordTextBuffer() {
35: _buf = new StringBuffer();
36: _hold = false;
37: }
38:
39: public void append(String text) {
40: char[] letters = text.toCharArray();
41:
42: for (int x = 0; x < letters.length; x++) {
43: switch (letters[x]) {
44: case '\r':
45: _buf.append("\r\n");
46: break;
47: case 0x13:
48: _hold = true;
49: inside++;
50: lastDelimiter = 0x13;
51: break;
52: case 0x14:
53: _hold = false;
54: last = new StringBuffer();
55: lastDelimiter = 0x14;
56: break;
57: case 0x15:
58: inside--;
59: if (inside == 0)
60: _buf.append(last);
61: last = new StringBuffer();
62: lastDelimiter = 0x15;
63: break;
64: default:
65: if (!_hold) {
66: if (!(inside > 0))
67: _buf.append(letters[x]);
68: else if (lastDelimiter != 0x15)
69: last.append(letters[x]);
70: } else if (lastDelimiter == 0x15) {
71: last.append(letters[x]);
72: }
73: break;
74: }
75: }
76: }
77:
78: public String toString() {
79: if (last != null && last.length() > 0) {
80: _buf.append(last.toString());
81: last = new StringBuffer();
82: }
83:
84: return _buf.toString();
85: }
86:
87: public void setInside(int i) {
88: inside = i;
89: }
90:
91: }
|