001: /*--
002:
003: $Id: Format.java,v 1.1 2005/04/27 09:32:42 wittek Exp $
004:
005: Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
006: All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions
010: are met:
011:
012: 1. Redistributions of source code must retain the above copyright
013: notice, this list of conditions, and the following disclaimer.
014:
015: 2. Redistributions in binary form must reproduce the above copyright
016: notice, this list of conditions, and the disclaimer that follows
017: these conditions in the documentation and/or other materials
018: provided with the distribution.
019:
020: 3. The name "JDOM" must not be used to endorse or promote products
021: derived from this software without prior written permission. For
022: written permission, please contact <request_AT_jdom_DOT_org>.
023:
024: 4. Products derived from this software may not be called "JDOM", nor
025: may "JDOM" appear in their name, without prior written permission
026: from the JDOM Project Management <request_AT_jdom_DOT_org>.
027:
028: In addition, we request (but do not require) that you include in the
029: end-user documentation provided with the redistribution and/or in the
030: software itself an acknowledgement equivalent to the following:
031: "This product includes software developed by the
032: JDOM Project (http://www.jdom.org/)."
033: Alternatively, the acknowledgment may be graphical using the logos
034: available at http://www.jdom.org/images/logos.
035:
036: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
040: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: SUCH DAMAGE.
048:
049: This software consists of voluntary contributions made by many
050: individuals on behalf of the JDOM Project and was originally
051: created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
052: Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
053: on the JDOM Project, please see <http://www.jdom.org/>.
054:
055: */
056:
057: package org.jdom.output;
058:
059: import java.lang.reflect.Method;
060:
061: /**
062: * Class to encapsulate XMLOutputter format options.
063: * Typical users can use the standard format configurations obtained by
064: * {@link #getRawFormat} (no whitespace changes),
065: * {@link #getPrettyFormat} (whitespace beautification), and
066: * {@link #getCompactFormat} (whitespace normalization).
067: * <p>
068: * Several modes are available to effect the way textual content is printed.
069: * See the documentation for {@link TextMode} for details.
070: *
071: * @version $Revision: 1.1 $, $Date: 2005/04/27 09:32:42 $
072: * @author Jason Hunter
073: */
074: public class Format implements Cloneable {
075:
076: private static final String CVS_ID = "@(#) $RCSfile: Format.java,v $ $Revision: 1.1 $ $Date: 2005/04/27 09:32:42 $ $Name: $";
077:
078: /**
079: * Returns a new Format object that performs no whitespace changes, uses
080: * the UTF-8 encoding, doesn't expand empty elements, includes the
081: * declaration and encoding, and uses the default entity escape strategy.
082: * Tweaks can be made to the returned Format instance without affecting
083: * other instances.
084:
085: * @return a Format with no whitespace changes
086: */
087: public static Format getRawFormat() {
088: return new Format();
089: }
090:
091: /**
092: * Returns a new Format object that performs whitespace beautification with
093: * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
094: * includes the declaration and encoding, and uses the default entity
095: * escape strategy.
096: * Tweaks can be made to the returned Format instance without affecting
097: * other instances.
098: *
099: * @return a Format with whitespace beautification
100: */
101: public static Format getPrettyFormat() {
102: Format f = new Format();
103: f.setIndent(STANDARD_INDENT);
104: f.setTextMode(TextMode.TRIM);
105: return f;
106: }
107:
108: /**
109: * Returns a new Format object that performs whitespace normalization, uses
110: * the UTF-8 encoding, doesn't expand empty elements, includes the
111: * declaration and encoding, and uses the default entity escape strategy.
112: * Tweaks can be made to the returned Format instance without affecting
113: * other instances.
114: *
115: * @return a Format with whitespace normalization
116: */
117: public static Format getCompactFormat() {
118: Format f = new Format();
119: f.setTextMode(TextMode.NORMALIZE);
120: return f;
121: }
122:
123: /** standard value to indent by, if we are indenting */
124: private static final String STANDARD_INDENT = " ";
125:
126: /** standard string with which to end a line */
127: private static final String STANDARD_LINE_SEPARATOR = "\r\n";
128:
129: /** standard encoding */
130: private static final String STANDARD_ENCODING = "UTF-8";
131:
132: /** The default indent is no spaces (as original document) */
133: String indent = null;
134:
135: /** New line separator */
136: String lineSeparator = STANDARD_LINE_SEPARATOR;
137:
138: /** The encoding format */
139: String encoding = STANDARD_ENCODING;
140:
141: /** Whether or not to output the XML declaration
142: * - default is <code>false</code> */
143: boolean omitDeclaration = false;
144:
145: /** Whether or not to output the encoding in the XML declaration
146: * - default is <code>false</code> */
147: boolean omitEncoding = false;
148:
149: /** Whether or not to expand empty elements to
150: * <tagName></tagName> - default is <code>false</code> */
151: boolean expandEmptyElements = false;
152:
153: /** Whether TrAX output escaping disabling/enabling PIs are ignored
154: * or processed - default is <code>false</code> */
155: boolean ignoreTrAXEscapingPIs = false;
156:
157: /** text handling mode */
158: TextMode mode = TextMode.PRESERVE;
159:
160: /** entity escape logic */
161: EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding);
162:
163: /**
164: * Creates a new Format instance with default (raw) behavior.
165: */
166: private Format() {
167: }
168:
169: /**
170: * Sets the {@link EscapeStrategy} to use for character escaping.
171: *
172: * @param strategy the EscapeStrategy to use
173: * @return a pointer to this Format for chaining
174: */
175: public Format setEscapeStrategy(EscapeStrategy strategy) {
176: escapeStrategy = strategy;
177: return this ;
178: }
179:
180: /**
181: * Returns the current escape strategy
182: *
183: * @return the current escape strategy
184: */
185: public EscapeStrategy getEscapeStrategy() {
186: return escapeStrategy;
187: }
188:
189: /**
190: * This will set the newline separator (<code>lineSeparator</code>).
191: * The default is <code>\r\n</code>. Note that if the "newlines"
192: * property is false, this value is irrelevant. To make it output
193: * the system default line ending string, call
194: * <code>setLineSeparator(System.getProperty("line.separator"))</code>
195: *
196: * <p>
197: * To output "UNIX-style" documents, call
198: * <code>setLineSeparator("\n")</code>. To output "Mac-style"
199: * documents, call <code>setLineSeparator("\r")</code>. DOS-style
200: * documents use CR-LF ("\r\n"), which is the default.
201: * </p>
202: *
203: * <p>
204: * Note that this only applies to newlines generated by the
205: * outputter. If you parse an XML document that contains newlines
206: * embedded inside a text node, and you do not set TextMode.NORMALIZE,
207: * then the newlines will be output
208: * verbatim, as "\n" which is how parsers normalize them.
209: * </p>
210: *
211: * @see #setTextMode
212: *
213: * @param separator <code>String</code> line separator to use.
214: * @return a pointer to this Format for chaining
215: */
216: public Format setLineSeparator(String separator) {
217: this .lineSeparator = separator;
218: return this ;
219: }
220:
221: /**
222: * Returns the current line separator.
223: *
224: * @return the current line separator
225: */
226: public String getLineSeparator() {
227: return lineSeparator;
228: }
229:
230: /**
231: * This will set whether the XML declaration
232: * (<code><?xml version="1.0"
233: * encoding="UTF-8"?></code>)
234: * includes the encoding of the document. It is common to omit
235: * this in uses such as WML and other wireless device protocols.
236: *
237: * @param omitEncoding <code>boolean</code> indicating whether or not
238: * the XML declaration should indicate the document encoding.
239: * @return a pointer to this Format for chaining
240: */
241: public Format setOmitEncoding(boolean omitEncoding) {
242: this .omitEncoding = omitEncoding;
243: return this ;
244: }
245:
246: /**
247: * Returns whether the XML declaration encoding will be omitted.
248: *
249: * @return whether the XML declaration encoding will be omitted
250: */
251: public boolean getOmitEncoding() {
252: return omitEncoding;
253: }
254:
255: /**
256: * This will set whether the XML declaration
257: * (<code><?xml version="1.0"?gt;</code>)
258: * will be omitted or not. It is common to omit this in uses such
259: * as SOAP and XML-RPC calls.
260: *
261: * @param omitDeclaration <code>boolean</code> indicating whether or not
262: * the XML declaration should be omitted.
263: * @return a pointer to this Format for chaining
264: */
265: public Format setOmitDeclaration(boolean omitDeclaration) {
266: this .omitDeclaration = omitDeclaration;
267: return this ;
268: }
269:
270: /**
271: * Returns whether the XML declaration will be omitted.
272: *
273: * @return whether the XML declaration will be omitted
274: */
275: public boolean getOmitDeclaration() {
276: return omitDeclaration;
277: }
278:
279: /**
280: * This will set whether empty elements are expanded from
281: * <code><tagName/></code> to
282: * <code><tagName></tagName></code>.
283: *
284: * @param expandEmptyElements <code>boolean</code> indicating whether or not
285: * empty elements should be expanded.
286: * @return a pointer to this Format for chaining
287: */
288: public Format setExpandEmptyElements(boolean expandEmptyElements) {
289: this .expandEmptyElements = expandEmptyElements;
290: return this ;
291: }
292:
293: /**
294: * Returns whether empty elements are expanded.
295: *
296: * @return whether empty elements are expanded
297: */
298: public boolean getExpandEmptyElements() {
299: return expandEmptyElements;
300: }
301:
302: /**
303: * This will set whether JAXP TrAX processing instructions for
304: * disabling/enabling output escaping are ignored. Disabling
305: * output escaping allows using XML text as element content and
306: * outputing it verbatim, i.e. as element children would be.
307: * <p>
308: * When processed, these processing instructions are removed from
309: * the generated XML text and control whether the element text
310: * content is output verbatim or with escaping of the pre-defined
311: * entities in XML 1.0. The text to be output verbatim shall be
312: * surrounded by the
313: * <code><?javax.xml.transform.disable-output-escaping ?></code>
314: * and <code><?javax.xml.transform.enable-output-escaping ?></code>
315: * PIs.</p>
316: * <p>
317: * When ignored, the processing instructions are present in the
318: * generated XML text and the pre-defined entities in XML 1.0 are
319: * escaped.
320: * <p>
321: * Default: <code>false</code>.</p>
322: *
323: * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
324: * whether or not TrAX ouput escaping PIs are ignored.
325: *
326: * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
327: * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
328: */
329: public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) {
330: this .ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
331: }
332:
333: /**
334: * Returns whether JAXP TrAX processing instructions for
335: * disabling/enabling output escaping are ignored.
336: *
337: * @return whether or not TrAX ouput escaping PIs are ignored.
338: */
339: public boolean getIgnoreTrAXEscapingPIs() {
340: return ignoreTrAXEscapingPIs;
341: }
342:
343: /**
344: * This sets the text output style. Options are available as static
345: * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
346: *
347: * @return a pointer to this Format for chaining
348: */
349: public Format setTextMode(Format.TextMode mode) {
350: this .mode = mode;
351: return this ;
352: }
353:
354: /**
355: * Returns the current text output style.
356: *
357: * @return the current text output style
358: */
359: public Format.TextMode getTextMode() {
360: return mode;
361: }
362:
363: /**
364: * This will set the indent <code>String</code> to use; this
365: * is usually a <code>String</code> of empty spaces. If you pass
366: * null, or the empty string (""), then no indentation will
367: * happen. Default: none (null)
368: *
369: * @param indent <code>String</code> to use for indentation.
370: * @return a pointer to this Format for chaining
371: */
372: public Format setIndent(String indent) {
373: // if passed the empty string, change it to null, for marginal
374: // performance gains later (can compare to null first instead
375: // of calling equals())
376: if ("".equals(indent)) {
377: indent = null;
378: }
379: this .indent = indent;
380: return this ;
381: }
382:
383: /**
384: * Returns the indent string in use.
385: *
386: * @return the indent string in use
387: */
388: public String getIndent() {
389: return indent;
390: }
391:
392: /**
393: * Sets the output encoding. The name should be an accepted XML
394: * encoding.
395: *
396: * @param encoding the encoding format. Use XML-style names like
397: * "UTF-8" or "ISO-8859-1" or "US-ASCII"
398: * @return a pointer to this Format for chaining
399: */
400: public Format setEncoding(String encoding) {
401: this .encoding = encoding;
402: escapeStrategy = new DefaultEscapeStrategy(encoding);
403: return this ;
404: }
405:
406: /**
407: * Returns the configured output encoding.
408: *
409: * @return the output encoding
410: */
411: public String getEncoding() {
412: return encoding;
413: }
414:
415: protected Object clone() {
416: Format format = null;
417:
418: try {
419: format = (Format) super .clone();
420: } catch (CloneNotSupportedException ce) {
421: }
422:
423: return format;
424: }
425:
426: /**
427: * Handle common charsets quickly and easily. Use reflection
428: * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
429: * If JDK 1.4 isn't around, default to no special encoding.
430: */
431: class DefaultEscapeStrategy implements EscapeStrategy {
432: private int bits;
433: Object encoder;
434: Method canEncode;
435:
436: public DefaultEscapeStrategy(String encoding) {
437: if ("UTF-8".equalsIgnoreCase(encoding)
438: || "UTF-16".equalsIgnoreCase(encoding)) {
439: bits = 16;
440: } else if ("ISO-8859-1".equalsIgnoreCase(encoding)
441: || "Latin1".equalsIgnoreCase(encoding)) {
442: bits = 8;
443: } else if ("US-ASCII".equalsIgnoreCase(encoding)
444: || "ASCII".equalsIgnoreCase(encoding)) {
445: bits = 7;
446: } else {
447: bits = 0;
448: //encoder = Charset.forName(encoding).newEncoder();
449: try {
450: Class charsetClass = Class
451: .forName("java.nio.charset.Charset");
452: Class encoderClass = Class
453: .forName("java.nio.charset.CharsetEncoder");
454: Method forName = charsetClass.getMethod("forName",
455: new Class[] { String.class });
456: Object charsetObj = forName.invoke(null,
457: new Object[] { encoding });
458: Method newEncoder = charsetClass.getMethod(
459: "newEncoder", null);
460: encoder = newEncoder.invoke(charsetObj, null);
461: canEncode = encoderClass.getMethod("canEncode",
462: new Class[] { char.class });
463: } catch (Exception ignored) {
464: }
465: }
466: }
467:
468: public boolean shouldEscape(char ch) {
469: if (bits == 16) {
470: return false;
471: }
472: if (bits == 8) {
473: if ((int) ch > 255)
474: return true;
475: else
476: return false;
477: }
478: if (bits == 7) {
479: if ((int) ch > 127)
480: return true;
481: else
482: return false;
483: } else {
484: if (canEncode != null && encoder != null) {
485: try {
486: Boolean val = (Boolean) canEncode.invoke(
487: encoder, new Object[] { new Character(
488: ch) });
489: return !val.booleanValue();
490: } catch (Exception ignored) {
491: }
492: }
493: // Return false if we don't know. This risks not escaping
494: // things which should be escaped, but also means people won't
495: // start getting loads of unnecessary escapes.
496: return false;
497: }
498: }
499: }
500:
501: /**
502: * Class to signify how text should be handled on output. The following
503: * table provides details.
504: *
505: * <table>
506: * <tr>
507: * <th align="left">
508: * Text Mode
509: * </th>
510: * <th>
511: * Resulting behavior.
512: * </th>
513: * </tr>
514: *
515: * <tr valign="top">
516: * <td>
517: * <i>PRESERVE (Default)</i>
518: * </td>
519: * <td>
520: * All content is printed in the format it was created, no whitespace
521: * or line separators are are added or removed.
522: * </td>
523: * </tr>
524: *
525: * <tr valign="top">
526: * <td>
527: * TRIM_FULL_WHITE
528: * </td>
529: * <td>
530: * Content between tags consisting of all whitespace is not printed.
531: * If the content contains even one non-whitespace character, it is
532: * printed verbatim, whitespace and all.
533: * </td>
534: * </tr>
535: *
536: * <tr valign="top">
537: * <td>
538: * TRIM
539: * </td>
540: * <td>
541: * Same as TrimAllWhite, plus leading/trailing whitespace are
542: * trimmed.
543: * </td>
544: * </tr>
545: *
546: * <tr valign="top">
547: * <td>
548: * NORMALIZE
549: * </td>
550: * <td>
551: * Same as TextTrim, plus addition interior whitespace is compressed
552: * to a single space.
553: * </td>
554: * </tr>
555: * </table>
556: *
557: * In most cases textual content is aligned with the surrounding tags
558: * (after the appropriate text mode is applied). In the case where the only
559: * content between the start and end tags is textual, the start tag, text,
560: * and end tag are all printed on the same line. If the document being
561: * output already has whitespace, it's wise to turn on TRIM mode so the
562: * pre-existing whitespace can be trimmed before adding new whitespace.
563: * <p>
564: * When a element has a xml:space attribute with the value of "preserve",
565: * all formating is turned off and reverts back to the default until the
566: * element and its contents have been printed. If a nested element contains
567: * another xml:space with the value "default" formatting is turned back on
568: * for the child element and then off for the remainder of the parent
569: * element.
570: */
571: public static class TextMode {
572: /**
573: * Mode for literal text preservation.
574: */
575: public static final TextMode PRESERVE = new TextMode("PRESERVE");
576:
577: /**
578: * Mode for text trimming (left and right trim).
579: */
580: public static final TextMode TRIM = new TextMode("TRIM");
581:
582: /**
583: * Mode for text normalization (left and right trim plus internal
584: * whitespace is normalized to a single space.
585: * @see org.jdom.Element#getTextNormalize
586: */
587: public static final TextMode NORMALIZE = new TextMode(
588: "NORMALIZE");
589:
590: /**
591: * Mode for text trimming of content consisting of nothing but
592: * whitespace but otherwise not changing output.
593: */
594: public static final TextMode TRIM_FULL_WHITE = new TextMode(
595: "TRIM_FULL_WHITE");
596:
597: private final String name;
598:
599: private TextMode(String name) {
600: this .name = name;
601: }
602:
603: public String toString() {
604: return name;
605: }
606: }
607: }
|