001: /* Copyright 2002-2005 Elliotte Rusty Harold
002:
003: This library is free software; you can redistribute it and/or modify
004: it under the terms of version 2.1 of the GNU Lesser General Public
005: License as published by the Free Software Foundation.
006:
007: This library is distributed in the hope that it will be useful,
008: but WITHOUT ANY WARRANTY; without even the implied warranty of
009: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: GNU Lesser General Public License for more details.
011:
012: You should have received a copy of the GNU Lesser General Public
013: License along with this library; if not, write to the
014: Free Software Foundation, Inc., 59 Temple Place, Suite 330,
015: Boston, MA 02111-1307 USA
016:
017: You can contact Elliotte Rusty Harold by sending e-mail to
018: elharo@metalab.unc.edu. Please include the word "XOM" in the
019: subject line. The XOM home page is located at http://www.xom.nu/
020: */
021:
022: package nu.xom;
023:
024: import java.io.UnsupportedEncodingException;
025:
026: /**
027: * <p>
028: * This class represents a run of text.
029: * CDATA sections are not treated differently than
030: * normal text. <code>Text</code> objects may be adjacent to other
031: * <code>Text</code> objects.
032: * </p>
033:
034: * @author Elliotte Rusty Harold
035: * @version 1.1b3
036: *
037: */
038: public class Text extends Node {
039:
040: private byte[] data;
041:
042: /**
043: * <p>
044: * This constructor creates a new <code>Text</code> object.
045: * The data is checked for legality according to XML 1.0 rules.
046: * Characters that can be serialized by escaping them
047: * such as < and & are allowed. However, characters
048: * such as the form feed, null, vertical tab,
049: * unmatched halves of surrogate pairs,
050: * and 0xFFFE and 0xFFFF are not allowed.
051: * </p>
052: *
053: * @param data the initial text of the object
054: *
055: * @throws IllegalCharacterDataException if data contains any
056: * characters which are illegal in well-formed XML 1.0 such as
057: * null, vertical tab, or unmatched halves of surrogate pairs
058: */
059: public Text(String data) {
060: _setValue(data);
061: }
062:
063: /**
064: * <p>
065: * Creates a copy of the specified <code>Text</code> object.
066: * </p>
067: *
068: * @param text the <code>Text</code> object to copy
069: */
070: public Text(Text text) {
071: // I'm relying here on the data array being immutable.
072: // If this ever changes, e.g. by adding an append method,
073: // this method needs to change too.
074: this .data = text.data;
075: }
076:
077: private Text() {
078: }
079:
080: static Text build(String data) {
081:
082: Text result = new Text();
083: try {
084: result.data = data.getBytes("UTF8");
085: } catch (UnsupportedEncodingException ex) {
086: throw new RuntimeException("Bad VM! Does not support UTF-8");
087: }
088: return result;
089:
090: }
091:
092: /**
093: * <p>
094: * Sets the content of the <code>Text</code> object
095: * to the specified data. The data is checked for
096: * legality according to XML 1.0 rules. Characters that
097: * can be serialized such as < and & are allowed.
098: * However, characters such as the form feed, null,
099: * vertical tab, unmatched halves of surrogate pairs,
100: * and 0xFFFE and 0xFFFF are not allowed. Passing null is the same
101: * as passing the empty string.
102: * </p>
103: *
104: * @param data the text to install in the object
105: *
106: * @throws IllegalCharacterDataException if data contains any
107: * characters which are illegal in well-formed XML 1.0 such as
108: * null, vertical tab, or unmatched halves of surrogate pairs
109: */
110: public void setValue(String data) {
111: _setValue(data);
112: }
113:
114: private void _setValue(String data) {
115:
116: if (data == null)
117: data = "";
118: else
119: Verifier.checkPCDATA(data);
120: try {
121: this .data = data.getBytes("UTF8");
122: } catch (UnsupportedEncodingException ex) {
123: throw new RuntimeException("Bad VM! Does not support UTF-8");
124: }
125:
126: }
127:
128: /**
129: * <p>
130: * Returns the XPath 1.0 string-value of this <code>Text</code>
131: * node. The XPath string-value of a text node is the same as
132: * the text of the node.
133: * </p>
134: *
135: * @return the content of the node
136: */
137: public final String getValue() {
138:
139: try {
140: return new String(data, "UTF8");
141: } catch (UnsupportedEncodingException ex) {
142: throw new RuntimeException("Bad VM! Does not support UTF-8");
143: }
144:
145: }
146:
147: /**
148: * <p>
149: * Throws <code>IndexOutOfBoundsException</code> because
150: * texts do not have children.
151: * </p>
152: *
153: * @return never returns because texts do not have children;
154: * always throws an exception.
155: *
156: * @param position the index of the child node to return
157: *
158: * @throws IndexOutOfBoundsException because texts
159: * do not have children
160: */
161: public final Node getChild(int position) {
162: throw new IndexOutOfBoundsException(
163: "LeafNodes do not have children");
164: }
165:
166: /**
167: * <p>
168: * Returns 0 because texts do not have children.
169: * </p>
170: *
171: * @return zero
172: */
173: public final int getChildCount() {
174: return 0;
175: }
176:
177: /**
178: * <p>
179: * Returns a deep copy of this <code>Text</code> with no parent,
180: * that can be added to this document or a different one.
181: * </p>
182: *
183: * @return a deep copy of this text node with no parent
184: */
185: public Node copy() {
186:
187: if (isCDATASection()) {
188: return new CDATASection(this );
189: } else {
190: return new Text(this );
191: }
192:
193: }
194:
195: /**
196: * <p>
197: * Returns a string containing the XML serialization of this text
198: * node. Unlike <code>getValue</code>, this method escapes
199: * characters such as & and < using entity references such
200: * as <code>&amp;</code> and <code>&lt;</code>.
201: * It escapes the carriage return (\r) as <code>&#x0D;</code>.
202: * If this text node is a CDATA section, then it may wrap the value
203: * in CDATA section delimiters instead of escaping.
204: * </p>
205: *
206: * @return the string form of this text node
207: */
208: public final String toXML() {
209: return escapeText();
210: }
211:
212: String escapeText() {
213:
214: String s = getValue();
215: int length = s.length();
216: // Give the string buffer enough room for a couple of escaped characters
217: StringBuffer result = new StringBuffer(length + 12);
218: for (int i = 0; i < length; i++) {
219: char c = s.charAt(i);
220: switch (c) {
221: case '\r':
222: result.append("
");
223: break;
224: case 14:
225: // impossible
226: break;
227: case 15:
228: // impossible
229: break;
230: case 16:
231: // impossible
232: break;
233: case 17:
234: // impossible
235: break;
236: case 18:
237: // impossible
238: break;
239: case 19:
240: // impossible
241: break;
242: case 20:
243: // impossible
244: break;
245: case 21:
246: // impossible
247: break;
248: case 22:
249: // impossible
250: break;
251: case 23:
252: // impossible
253: break;
254: case 24:
255: // impossible
256: break;
257: case 25:
258: // impossible
259: break;
260: case 26:
261: // impossible
262: break;
263: case 27:
264: // impossible
265: break;
266: case 28:
267: // impossible
268: break;
269: case 29:
270: // impossible
271: break;
272: case 30:
273: // impossible
274: break;
275: case 31:
276: // impossible
277: break;
278: case ' ':
279: result.append(' ');
280: break;
281: case '!':
282: result.append('!');
283: break;
284: case '"':
285: result.append('"');
286: break;
287: case '#':
288: result.append('#');
289: break;
290: case '$':
291: result.append('$');
292: break;
293: case '%':
294: result.append('%');
295: break;
296: case '&':
297: result.append("&");
298: break;
299: case '\'':
300: result.append('\'');
301: break;
302: case '(':
303: result.append('(');
304: break;
305: case ')':
306: result.append(')');
307: break;
308: case '*':
309: result.append('*');
310: break;
311: case '+':
312: result.append('+');
313: break;
314: case ',':
315: result.append(',');
316: break;
317: case '-':
318: result.append('-');
319: break;
320: case '.':
321: result.append('.');
322: break;
323: case '/':
324: result.append('/');
325: break;
326: case '0':
327: result.append('0');
328: break;
329: case '1':
330: result.append('1');
331: break;
332: case '2':
333: result.append('2');
334: break;
335: case '3':
336: result.append('3');
337: break;
338: case '4':
339: result.append('4');
340: break;
341: case '5':
342: result.append('5');
343: break;
344: case '6':
345: result.append('6');
346: break;
347: case '7':
348: result.append('7');
349: break;
350: case '8':
351: result.append('8');
352: break;
353: case '9':
354: result.append('9');
355: break;
356: case ':':
357: result.append(':');
358: break;
359: case ';':
360: result.append(';');
361: break;
362: case '<':
363: result.append("<");
364: break;
365: case '=':
366: result.append('=');
367: break;
368: case '>':
369: result.append(">");
370: break;
371: default:
372: result.append(c);
373: }
374: }
375:
376: return result.toString();
377:
378: }
379:
380: boolean isText() {
381: return true;
382: }
383:
384: /**
385: * <p>
386: * Returns a <code>String</code>
387: * representation of this <code>Text</code> suitable for
388: * debugging and diagnosis. This is <em>not</em>
389: * the XML representation of this <code>Text</code> node.
390: * </p>
391: *
392: * @return a non-XML string representation of this node
393: */
394: public final String toString() {
395:
396: return "[" + getClass().getName() + ": "
397: + escapeLineBreaksAndTruncate(getValue()) + "]";
398:
399: }
400:
401: static String escapeLineBreaksAndTruncate(String s) {
402:
403: int length = s.length();
404: boolean tooLong = length > 40;
405: if (length > 40) {
406: length = 35;
407: s = s.substring(0, 35);
408: }
409:
410: StringBuffer result = new StringBuffer(length);
411: for (int i = 0; i < length; i++) {
412: char c = s.charAt(i);
413: switch (c) {
414: case '\n':
415: result.append("\\n");
416: break;
417: case '\r':
418: result.append("\\r");
419: break;
420: case '\t':
421: result.append("\\t");
422: break;
423: default:
424: result.append(c);
425: }
426: }
427: if (tooLong)
428: result.append("...");
429:
430: return result.toString();
431:
432: }
433:
434: boolean isCDATASection() {
435: return false;
436: }
437:
438: boolean isEmpty() {
439: return this .data.length == 0;
440: }
441:
442: }
|