001: /*--
002:
003: $Id: Text.java,v 1.1 2005/04/27 09:32:40 wittek Exp $
004:
005: Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
006: All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions
010: are met:
011:
012: 1. Redistributions of source code must retain the above copyright
013: notice, this list of conditions, and the following disclaimer.
014:
015: 2. Redistributions in binary form must reproduce the above copyright
016: notice, this list of conditions, and the disclaimer that follows
017: these conditions in the documentation and/or other materials
018: provided with the distribution.
019:
020: 3. The name "JDOM" must not be used to endorse or promote products
021: derived from this software without prior written permission. For
022: written permission, please contact <request_AT_jdom_DOT_org>.
023:
024: 4. Products derived from this software may not be called "JDOM", nor
025: may "JDOM" appear in their name, without prior written permission
026: from the JDOM Project Management <request_AT_jdom_DOT_org>.
027:
028: In addition, we request (but do not require) that you include in the
029: end-user documentation provided with the redistribution and/or in the
030: software itself an acknowledgement equivalent to the following:
031: "This product includes software developed by the
032: JDOM Project (http://www.jdom.org/)."
033: Alternatively, the acknowledgment may be graphical using the logos
034: available at http://www.jdom.org/images/logos.
035:
036: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
040: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: SUCH DAMAGE.
048:
049: This software consists of voluntary contributions made by many
050: individuals on behalf of the JDOM Project and was originally
051: created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
052: Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
053: on the JDOM Project, please see <http://www.jdom.org/>.
054:
055: */
056:
057: package org.jdom;
058:
059: /**
060: * Character-based XML content. Provides a modular, parentable method of
061: * representing text. Text makes no guarantees about the underlying textual
062: * representation of character data, but does expose that data as a Java String.
063: *
064: * @version $Revision: 1.1 $, $Date: 2005/04/27 09:32:40 $
065: * @author Brett McLaughlin
066: * @author Jason Hunter
067: * @author Bradley S. Huffman
068: */
069: public class Text extends Content {
070:
071: private static final String CVS_ID = "@(#) $RCSfile: Text.java,v $ $Revision: 1.1 $ $Date: 2005/04/27 09:32:40 $ $Name: $";
072:
073: static final String EMPTY_STRING = "";
074:
075: /** The actual character content */
076: // XXX See http://www.servlets.com/archive/servlet/ReadMsg?msgId=8612
077: // from elharo for a description of why Java characters may not suffice
078: // long term
079: protected String value;
080:
081: /**
082: * This is the protected, no-args constructor standard in all JDOM
083: * classes. It allows subclassers to get a raw instance with no
084: * initialization.
085: */
086: protected Text() {
087: }
088:
089: /**
090: * This constructor creates a new <code>Text</code> node, with the
091: * supplied string value as it's character content.
092: *
093: * @param str the node's character content.
094: * @throws IllegalDataException if <code>str</code> contains an
095: * illegal character such as a vertical tab (as determined
096: * by {@link org.jdom.Verifier#checkCharacterData})
097: */
098: public Text(String str) {
099: setText(str);
100: }
101:
102: /**
103: * This returns the value of this <code>Text</code> node as a Java
104: * <code>String</code>.
105: *
106: * @return <code>String</code> - character content of this node.
107: */
108: public String getText() {
109: return value;
110: }
111:
112: /**
113: * This returns the textual content with all surrounding whitespace
114: * removed. If only whitespace exists, the empty string is returned.
115: *
116: * @return trimmed text content or empty string
117: */
118: public String getTextTrim() {
119: return getText().trim();
120: }
121:
122: /**
123: * This returns the textual content with all surrounding whitespace
124: * removed and internal whitespace normalized to a single space. If
125: * only whitespace exists, the empty string is returned.
126: *
127: * @return normalized text content or empty string
128: */
129: public String getTextNormalize() {
130: return normalizeString(getText());
131: }
132:
133: /**
134: * This returns a new string with all surrounding whitespace
135: * removed and internal whitespace normalized to a single space. If
136: * only whitespace exists, the empty string is returned.
137: * <p>
138: * Per XML 1.0 Production 3 whitespace includes: #x20, #x9, #xD, #xA
139: * </p>
140: *
141: * @param str string to be normalized.
142: * @return normalized string or empty string
143: */
144: public static String normalizeString(String str) {
145: if (str == null)
146: return EMPTY_STRING;
147:
148: char[] c = str.toCharArray();
149: char[] n = new char[c.length];
150: boolean white = true;
151: int pos = 0;
152: for (int i = 0; i < c.length; i++) {
153: if (" \t\n\r".indexOf(c[i]) != -1) {
154: if (!white) {
155: n[pos++] = ' ';
156: white = true;
157: }
158: } else {
159: n[pos++] = c[i];
160: white = false;
161: }
162: }
163: if (white && pos > 0) {
164: pos--;
165: }
166: return new String(n, 0, pos);
167: }
168:
169: /**
170: * This will set the value of this <code>Text</code> node.
171: *
172: * @param str value for node's content.
173: * @return the object on which the method was invoked
174: * @throws IllegalDataException if <code>str</code> contains an
175: * illegal character such as a vertical tab (as determined
176: * by {@link org.jdom.Verifier#checkCharacterData})
177: */
178: public Text setText(String str) {
179: String reason;
180:
181: if (str == null) {
182: value = EMPTY_STRING;
183: return this ;
184: }
185:
186: if ((reason = Verifier.checkCharacterData(str)) != null) {
187: throw new IllegalDataException(str, "character content",
188: reason);
189: }
190: value = str;
191: return this ;
192: }
193:
194: /**
195: * This will append character content to whatever content already
196: * exists within this <code>Text</code> node.
197: *
198: * @param str character content to append.
199: * @throws IllegalDataException if <code>str</code> contains an
200: * illegal character such as a vertical tab (as determined
201: * by {@link org.jdom.Verifier#checkCharacterData})
202: */
203: public void append(String str) {
204: String reason;
205:
206: if (str == null) {
207: return;
208: }
209: if ((reason = Verifier.checkCharacterData(str)) != null) {
210: throw new IllegalDataException(str, "character content",
211: reason);
212: }
213:
214: if (str == EMPTY_STRING)
215: value = str;
216: else
217: value += str;
218: }
219:
220: /**
221: * This will append the content of another <code>Text</code> node
222: * to this node.
223: *
224: * @param text Text node to append.
225: */
226: public void append(Text text) {
227: if (text == null) {
228: return;
229: }
230: value += text.getText();
231: }
232:
233: /**
234: * Returns the XPath 1.0 string value of this element, which is the
235: * text itself.
236: *
237: * @return the text
238: */
239: public String getValue() {
240: return value;
241: }
242:
243: /**
244: * This returns a <code>String</code> representation of the
245: * <code>Text</code> node, suitable for debugging. If the XML
246: * representation of the <code>Text</code> node is desired,
247: * either <code>{@link #getText}</code> or
248: * {@link org.jdom.output.XMLOutputter#outputString(Text)}</code>
249: * should be used.
250: *
251: * @return <code>String</code> - information about this node.
252: */
253: public String toString() {
254: return new StringBuffer(64).append("[Text: ").append(getText())
255: .append("]").toString();
256: }
257:
258: /**
259: * This will return a clone of this <code>Text</code> node, with the
260: * same character content, but no parent.
261: *
262: * @return <code>Text</code> - cloned node.
263: */
264: public Object clone() {
265: Text text = (Text) super.clone();
266: text.value = value;
267: return text;
268: }
269:
270: }
|