Source Code Cross Referenced for Serializer.java in » XML » xom » nu » xom » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » XML » xom » nu.xom
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /* Copyright 2002-2005 Elliotte Rusty Harold
0002:           
0003:           This library is free software; you can redistribute it and/or modify
0004:           it under the terms of version 2.1 of the GNU Lesser General Public 
0005:           License as published by the Free Software Foundation.
0006:           
0007:           This library is distributed in the hope that it will be useful,
0008:           but WITHOUT ANY WARRANTY; without even the implied warranty of
0009:           MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
0010:           GNU Lesser General Public License for more details.
0011:           
0012:           You should have received a copy of the GNU Lesser General Public
0013:           License along with this library; if not, write to the 
0014:           Free Software Foundation, Inc., 59 Temple Place, Suite 330, 
0015:           Boston, MA 02111-1307  USA
0016:           
0017:           You can contact Elliotte Rusty Harold by sending e-mail to
0018:           elharo@metalab.unc.edu. Please include the word "XOM" in the
0019:           subject line. The XOM home page is located at http://www.xom.nu/
0020:         */
0021:
0022:        package nu.xom;
0023:
0024:        import java.io.IOException;
0025:        import java.io.OutputStream;
0026:        import java.io.OutputStreamWriter;
0027:        import java.io.UnsupportedEncodingException;
0028:        import java.io.Writer;
0029:        import java.util.Iterator;
0030:        import java.util.Locale;
0031:        import java.util.Map;
0032:
0033:        /**
0034:         * <p>
0035:         *  Outputs a <code>Document</code> object in a specific encoding using
0036:         *  various options for controlling white space, normalization,
0037:         *  indenting, line breaking, and base URIs. However, in general these 
0038:         *  options do affect the document's infoset. In particular, if you set 
0039:         *  either the maximum line length or the indent size to a positive  
0040:         *  value, then the serializer will not respect input white space. It 
0041:         *  may trim leading and trailing space, condense runs of white 
0042:         *  space to a single space, convert carriage returns and linefeeds
0043:         *  to spaces, add extra space where none was present before, 
0044:         *  and otherwise muck with the document's white space. 
0045:         *  The defaults, however, preserve all significant white space
0046:         *  including ignorable white space and boundary white space.
0047:         * </p>
0048:         * 
0049:         * @author Elliotte Rusty Harold
0050:         * @version 1.1b7
0051:         * 
0052:         */
0053:        public class Serializer {
0054:
0055:            private TextWriter escaper;
0056:            private boolean preserveBaseURI = false;
0057:
0058:            /**
0059:             * <p>
0060:             * Create a new serializer that uses the UTF-8 encoding.
0061:             * </p>
0062:             * 
0063:             * @param out the output stream to write the document on
0064:             * 
0065:             * @throws NullPointerException if <code>out</code> is null
0066:             */
0067:            public Serializer(OutputStream out) {
0068:
0069:                try {
0070:                    this .setOutputStream(out, "UTF-8");
0071:                } catch (UnsupportedEncodingException ex) {
0072:                    throw new RuntimeException(
0073:                            "The VM is broken. It does not understand UTF-8.");
0074:                }
0075:
0076:            }
0077:
0078:            /**
0079:             * <p>
0080:             * Create a new serializer that uses the specified encoding.
0081:             * The encoding must be recognized by the Java virtual machine. If 
0082:             * you attempt to use an encoding that the local Java virtual 
0083:             * machine does not support, the constructor will throw an 
0084:             * <code>UnsupportedEncodingException</code>.
0085:             * Currently the following encodings are recognized by XOM:
0086:             * </p>
0087:             * 
0088:             * <ul>
0089:             *   <li>UTF-8</li>
0090:             *   <li>UTF-16</li>
0091:             *   <li>UTF-16BE</li>
0092:             *   <li>UTF-16LE</li>
0093:             *   <li>ISO-10646-UCS-2</li>
0094:             *   <li>ISO-8859-1</li>
0095:             *   <li>ISO-8859-2</li>
0096:             *   <li>ISO-8859-3</li>
0097:             *   <li>ISO-8859-4</li>
0098:             *   <li>ISO-8859-5</li>
0099:             *   <li>ISO-8859-6</li>
0100:             *   <li>ISO-8859-7</li>
0101:             *   <li>ISO-8859-8</li>
0102:             *   <li>ISO-8859-9</li>
0103:             *   <li>ISO-8859-10</li>
0104:             *   <li>ISO-8859-11 (a.k.a. TIS-620)</li>
0105:             *   <li>ISO-8859-13</li>
0106:             *   <li>ISO-8859-14</li>
0107:             *   <li>ISO-8859-15</li>
0108:             *   <li>ISO-8859-16</li>
0109:             *   <li>IBM037 (a.k.a. CP037, EBCDIC-CP-US, EBCDIC-CP-CA, 
0110:             *         EBCDIC-CP-WA, EBCDIC-CP-NL, and CSIBM037)</li>
0111:             *   <li>GB18030</li>
0112:             * </ul>
0113:             * 
0114:             * <p>
0115:             * You can use encodings not in this list if the virtual
0116:             * machine supports them. However, they may be
0117:             * significantly slower than the encodings in this list.
0118:             * </p>
0119:             * 
0120:             * <p>
0121:             * I've noticed Java has significant bugs in its handling of some
0122:             * of these encodings. In some cases such as 0x80 in Big5, XOM
0123:             * will escape a character that should not need to be escaped
0124:             * because Java can't output that character in the specified 
0125:             * encoding, even though the output character set does contain it.
0126:             * :-(
0127:             * </p>
0128:             * 
0129:             * @param out the output stream to write the document on
0130:             * @param encoding the character encoding for the serialization
0131:
0132:             * @throws NullPointerException if <code>out</code> 
0133:             *     or <code>encoding</code> is null
0134:             * @throws UnsupportedEncodingException if the VM does not 
0135:             *     support the requested encoding
0136:             *  
0137:             */
0138:            public Serializer(OutputStream out, String encoding)
0139:                    throws UnsupportedEncodingException {
0140:
0141:                if (encoding == null) {
0142:                    throw new NullPointerException("Null encoding");
0143:                }
0144:                this .setOutputStream(out, encoding);
0145:
0146:            }
0147:
0148:            /**
0149:             * <p>
0150:             * Flushes the previous output stream and 
0151:             * redirects further output to the new output stream.
0152:             * </p>
0153:             * 
0154:             * 
0155:             * @param out the output stream to write the document on
0156:
0157:             * @throws NullPointerException if <code>out</code> is null
0158:             * @throws IOException if the previous output stream 
0159:             *     encounters an I/O error when flushed
0160:             *  
0161:             */
0162:            public void setOutputStream(OutputStream out) throws IOException {
0163:
0164:                // flush any data onto the old output stream
0165:                this .flush();
0166:                int maxLength = getMaxLength();
0167:                int indent = this .getIndent();
0168:                String lineSeparator = getLineSeparator();
0169:                boolean nfc = getUnicodeNormalizationFormC();
0170:                String encoding = escaper.getEncoding();
0171:                boolean lineSeparatorSet = escaper.lineSeparatorSet;
0172:                setOutputStream(out, encoding);
0173:                setIndent(indent);
0174:                setMaxLength(maxLength);
0175:                setUnicodeNormalizationFormC(nfc);
0176:                if (lineSeparatorSet)
0177:                    setLineSeparator(lineSeparator);
0178:
0179:            }
0180:
0181:            private void setOutputStream(OutputStream out, String encoding)
0182:                    throws UnsupportedEncodingException {
0183:
0184:                if (out == null) {
0185:                    throw new NullPointerException("Null OutputStream");
0186:                }
0187:                Writer writer;
0188:                String encodingUpperCase = encoding.toUpperCase(Locale.ENGLISH);
0189:                if (encodingUpperCase.equals("UTF-8")) {
0190:                    writer = new OutputStreamWriter(out, "UTF-8");
0191:                } else if (encodingUpperCase.equals("UTF-16")
0192:                        || encodingUpperCase.equals("ISO-10646-UCS-2")) {
0193:                    // For compatibility with Java 1.2 and earlier
0194:                    writer = new OutputStreamWriter(out, "UnicodeBig");
0195:                }
0196:                // Java's Cp037 encoding is broken, so we have to
0197:                // provide our own.
0198:                else if (encodingUpperCase.equals("IBM037")
0199:                        || encodingUpperCase.equals("CP037")
0200:                        || encodingUpperCase.equals("EBCDIC-CP-US")
0201:                        || encodingUpperCase.equals("EBCDIC-CP-CA")
0202:                        || encodingUpperCase.equals("EBCDIC-CP-WA")
0203:                        || encodingUpperCase.equals("EBCDIC-CP-NL")
0204:                        || encodingUpperCase.equals("CSIBM037")) {
0205:                    writer = new EBCDICWriter(out);
0206:                } else if (encodingUpperCase.equals("ISO-8859-11")
0207:                        || encodingUpperCase.equals("TIS-620")) {
0208:                    // Java doesn't recognize the name ISO-8859-11 and 
0209:                    // Java 1.3 and earlier don't recognize TIS-620
0210:                    writer = new OutputStreamWriter(out, "TIS620");
0211:                } else
0212:                    writer = new OutputStreamWriter(out, encoding);
0213:
0214:                writer = new UnsynchronizedBufferedWriter(writer);
0215:                this .escaper = TextWriterFactory
0216:                        .getTextWriter(writer, encoding);
0217:
0218:            }
0219:
0220:            /**
0221:             * <p>
0222:             * Serializes a document onto the output 
0223:             * stream using the current options.
0224:             * </p>
0225:             * 
0226:             * @param doc the <code>Document</code> to serialize
0227:             * 
0228:             * @throws IOException if the underlying output stream
0229:             *      encounters an I/O error
0230:             * @throws NullPointerException if <code>doc</code> is null
0231:             * @throws UnavailableCharacterException if the document contains 
0232:             *     an unescapable character (e.g. in an element name) that is 
0233:             *     not available in the current encoding
0234:             */
0235:            public void write(Document doc) throws IOException {
0236:
0237:                escaper.reset();
0238:                // The OutputStreamWriter automatically inserts
0239:                // the byte order mark if necessary.
0240:                writeXMLDeclaration();
0241:                int childCount = doc.getChildCount();
0242:                for (int i = 0; i < childCount; i++) {
0243:                    writeChild(doc.getChild(i));
0244:
0245:                    // Might want to remove this line break in a 
0246:                    // non-XML serializer where it's not guaranteed to be 
0247:                    // OK to add extra line breaks in the prolog
0248:                    escaper.breakLine();
0249:                }
0250:                escaper.flush();
0251:
0252:            }
0253:
0254:            /**
0255:             * <p>
0256:             * Writes the XML declaration onto the output stream,
0257:             * followed by a line break.
0258:             * </p>
0259:             * 
0260:             * @throws IOException if the underlying output stream
0261:             *      encounters an I/O error
0262:             */
0263:            protected void writeXMLDeclaration() throws IOException {
0264:
0265:                escaper.writeMarkup("<?xml version=\"1.0\" encoding=\"");
0266:                escaper.writeMarkup(escaper.getEncoding());
0267:                escaper.writeMarkup("\"?>");
0268:                escaper.breakLine();
0269:
0270:            }
0271:
0272:            /**
0273:             * <p>
0274:             * Serializes an element onto the output stream using the current
0275:             * options. The result is guaranteed to be well-formed. If 
0276:             * <code>element</code> does not have a parent element, the output  
0277:             * will also be namespace well-formed.
0278:             * </p>
0279:             * 
0280:             * <p>
0281:             * If the element is empty, this method invokes 
0282:             * <code>writeEmptyElementTag</code>. If the element is not 
0283:             * empty, then: 
0284:             * </p>
0285:             * 
0286:             * <ol>
0287:             *   <li>It calls <code>writeStartTag</code>.</li>
0288:             *   <li>It passes each of the element's children to 
0289:             *       <code>writeChild</code> in order.</li>
0290:             *   <li>It calls <code>writeEndTag</code>.</li>
0291:             * </ol>
0292:             * 
0293:             * <p>
0294:             *   It may break lines or add white space if the serializer has
0295:             *   been configured to indent or use a maximum line length.
0296:             * </p>
0297:             * 
0298:             * @param element the <code>Element</code> to serialize
0299:             * 
0300:             * @throws IOException if the underlying output stream
0301:             *     encounters an I/O error
0302:             * @throws UnavailableCharacterException if the element name   
0303:             *     contains a character that is not available in the 
0304:             *     current encoding
0305:             */
0306:            protected void write(Element element) throws IOException {
0307:
0308:                boolean wasPreservingWhiteSpace = escaper.isPreserveSpace();
0309:                if (escaper.isIndenting() && !wasPreservingWhiteSpace
0310:                        && !escaper.justBroke()) {
0311:                    escaper.breakLine();
0312:                }
0313:
0314:                // workaround for case where only children are empty text nodes
0315:                boolean hasRealChildren = false;
0316:                int childCount = element.getChildCount();
0317:                for (int i = 0; i < childCount; i++) {
0318:                    Node child = element.getChild(i);
0319:                    if (child.isText()) {
0320:                        Text t = (Text) child;
0321:                        if (t.isEmpty())
0322:                            continue;
0323:                    }
0324:                    hasRealChildren = true;
0325:                    break;
0326:                }
0327:
0328:                if (hasRealChildren) {
0329:                    writeStartTag(element);
0330:                    // adjust for xml:space
0331:                    String newXMLSpaceValue = element.getAttributeValue(
0332:                            "space", "http://www.w3.org/XML/1998/namespace");
0333:                    if (newXMLSpaceValue != null) {
0334:                        if ("preserve".equals(newXMLSpaceValue)) {
0335:                            escaper.setPreserveSpace(true);
0336:                        } else if ("default".equals(newXMLSpaceValue)) {
0337:                            escaper.setPreserveSpace(false);
0338:                        }
0339:                    }
0340:
0341:                    escaper.incrementIndent();
0342:                    // children
0343:                    for (int i = 0; i < childCount; i++) {
0344:                        Node child = element.getChild(i);
0345:                        // need to work around a very tricky case here where
0346:                        // denormalized characters cross boundaries of
0347:                        // consecutive text nodes
0348:                        if (escaper.getNFC() && child.isText()) {
0349:                            Text t = (Text) child;
0350:                            while (i < childCount - 1) { // not the last node
0351:                                Node next = element.getChild(i + 1);
0352:                                if (next.isText()) {
0353:                                    t = new Text(t.getValue() + next.getValue());
0354:                                    i++;
0355:                                } else
0356:                                    break;
0357:                            }
0358:                            writeChild(t);
0359:                        } else {
0360:                            writeChild(child);
0361:                        }
0362:                    }
0363:                    escaper.decrementIndent();
0364:                    if (escaper.getIndent() > 0 && !escaper.isPreserveSpace()) {
0365:                        if (hasNonTextChildren(element)) {
0366:                            escaper.breakLine();
0367:                        }
0368:                    }
0369:                    writeEndTag(element);
0370:
0371:                    // restore parent value
0372:                    if (newXMLSpaceValue != null) {
0373:                        escaper.setPreserveSpace(wasPreservingWhiteSpace);
0374:                    }
0375:
0376:                } else {
0377:                    writeEmptyElementTag(element);
0378:                }
0379:
0380:            }
0381:
0382:            private boolean hasNonTextChildren(Element element) {
0383:
0384:                int childCount = element.getChildCount();
0385:                for (int i = 0; i < childCount; i++) {
0386:                    if (!element.getChild(i).isText())
0387:                        return true;
0388:                }
0389:                return false;
0390:
0391:            }
0392:
0393:            // writeEndTag should not normally throw UnavailableCharacterException 
0394:            // because that would already have been thrown for the
0395:            // corresponding start-tag.
0396:            /**
0397:             * <p>
0398:             *   Writes the end-tag for an element in the form
0399:             *   <code>&lt;/<i>name</i>&gt;</code>.
0400:             * </p>
0401:             * 
0402:             * @param element the element whose end-tag is written
0403:             * 
0404:             * @throws IOException if the underlying output stream
0405:             *     encounters an I/O error
0406:             */
0407:            protected void writeEndTag(Element element) throws IOException {
0408:                escaper.writeMarkup("</");
0409:                escaper.writeMarkup(element.getQualifiedName());
0410:                escaper.writeMarkup('>');
0411:            }
0412:
0413:            /**
0414:             * 
0415:             * <p>
0416:             *  Writes the start-tag for the element including
0417:             *  all its namespace declarations and attributes.
0418:             * </p>
0419:             * 
0420:             * <p>
0421:             *   The <code>writeAttributes</code> method is called to write
0422:             *   all the non-namespace-declaration attributes. 
0423:             *   The <code>writeNamespaceDeclarations</code> method
0424:             *   is called to write all the namespace declaration attributes.
0425:             * </p>
0426:             * 
0427:             * @param element the element whose start-tag is written
0428:             * 
0429:             * @throws IOException if the underlying output stream
0430:             *     encounters an I/O error
0431:             * @throws UnavailableCharacterException if the name of the element
0432:             *     or the name of any of its attributes contains a character  
0433:             *     that is not available in the current encoding
0434:             */
0435:            protected void writeStartTag(Element element) throws IOException {
0436:                writeTagBeginning(element);
0437:                escaper.writeMarkup('>');
0438:            }
0439:
0440:            /**
0441:             * 
0442:             * <p>
0443:             *  Writes an empty-element tag for the element 
0444:             *  including all its namespace declarations and attributes.
0445:             * </p>
0446:             * 
0447:             * <p>
0448:             *   The <code>writeAttributes</code> method is called to write
0449:             *   all the non-namespace-declaration attributes. 
0450:             *   The <code>writeNamespaceDeclarations</code> method
0451:             *   is called to write all the namespace declaration attributes.
0452:             * </p>
0453:             * 
0454:             * <p>
0455:             *   If subclasses don't wish empty-element tags to be used,
0456:             *   they can override this method to simply invoke 
0457:             *   <code>writeStartTag</code> followed by 
0458:             *   <code>writeEndTag</code>.
0459:             * </p>
0460:             * 
0461:             * @param element the element whose empty-element tag is written
0462:             * 
0463:             * @throws IOException if the underlying output stream
0464:             *     encounters an I/O error
0465:             * @throws UnavailableCharacterException if the name of the element or the name of
0466:             *     any of its attributes contains a character that is not 
0467:             *     available in the current encoding
0468:             */
0469:            protected void writeEmptyElementTag(Element element)
0470:                    throws IOException {
0471:                writeTagBeginning(element);
0472:                escaper.writeMarkup("/>");
0473:            }
0474:
0475:            // This just extracts the commonality between writeStartTag  
0476:            // and writeEmptyElementTag
0477:            private void writeTagBeginning(Element element) throws IOException {
0478:                escaper.writeMarkup('<');
0479:                escaper.writeMarkup(element.getQualifiedName());
0480:                writeAttributes(element);
0481:                writeNamespaceDeclarations(element);
0482:            }
0483:
0484:            /**
0485:             * <p>
0486:             *   Writes all the attributes of the specified
0487:             *   element onto the output stream, one at a time, separated
0488:             *   by white space. If preserveBaseURI is true, and it is
0489:             *   necessary to add an <code>xml:base</code> attribute
0490:             *   to the element in order to preserve the base URI, then 
0491:             *   that attribute is also written here.
0492:             *   Each individual attribute is written by invoking
0493:             *   <code>write(Attribute)</code>.
0494:             * </p>
0495:             * 
0496:             * @param element the <code>Element</code> whose attributes are 
0497:             *     written
0498:             * @throws IOException if the underlying output stream
0499:             *     encounters an I/O error
0500:             * @throws UnavailableCharacterException if the name of any of
0501:             *     the element's attributes contains a character that is not 
0502:             *     available in the current encoding
0503:             */
0504:            protected void writeAttributes(Element element) throws IOException {
0505:
0506:                // check to see if we need an xml:base attribute
0507:                if (preserveBaseURI) {
0508:                    ParentNode parent = element.getParent();
0509:                    if (element.getAttribute("base",
0510:                            "http://www.w3.org/XML/1998/namespace") == null) {
0511:                        String baseValue = element.getBaseURI();
0512:                        if (parent == null
0513:                                || parent.isDocument()
0514:                                || !element.getBaseURI().equals(
0515:                                        parent.getBaseURI())) {
0516:
0517:                            escaper.writeMarkup(' ');
0518:                            Attribute baseAttribute = new Attribute("xml:base",
0519:                                    "http://www.w3.org/XML/1998/namespace",
0520:                                    baseValue);
0521:                            write(baseAttribute);
0522:                        }
0523:                    }
0524:                }
0525:
0526:                int attributeCount = element.getAttributeCount();
0527:                for (int i = 0; i < attributeCount; i++) {
0528:                    Attribute attribute = element.getAttribute(i);
0529:                    escaper.writeMarkup(' ');
0530:                    write(attribute);
0531:                }
0532:            }
0533:
0534:            /**
0535:             * <p>
0536:             *   Writes all the namespace declaration
0537:             *   attributes of the specified element onto the output stream,
0538:             *   one at a time, separated by white space. Each individual 
0539:             *   declaration is written by invoking 
0540:             *   <code>writeNamespaceDeclaration</code>.
0541:             * </p>
0542:             * 
0543:             * @param element the <code>Element</code> whose namespace
0544:             *     declarations are written
0545:             * @throws IOException if the underlying output stream
0546:             *     encounters an I/O error
0547:             * @throws UnavailableCharacterException if any of the element's 
0548:             *     namespace prefixes contains a character that is not 
0549:             *     available in the current encoding
0550:             */
0551:            protected void writeNamespaceDeclarations(Element element)
0552:                    throws IOException {
0553:
0554:                ParentNode parent = element.getParent();
0555:
0556:                Map prefixes = element.getNamespacePrefixesInScope();
0557:                Iterator iterator = prefixes.entrySet().iterator();
0558:                while (iterator.hasNext()) {
0559:                    Map.Entry entry = (Map.Entry) iterator.next();
0560:                    String additionalPrefix = (String) entry.getKey();
0561:                    String uri = (String) entry.getValue();
0562:                    if (parent != null && parent.isElement()) {
0563:                        Element parentElement = (Element) parent;
0564:                        if (uri.equals(parentElement
0565:                                .getNamespaceURI(additionalPrefix))) {
0566:                            continue;
0567:                        }
0568:                    } else if (uri.equals("")) {
0569:                        continue; // no need to say xmlns=""   
0570:                    }
0571:
0572:                    // XXX replace with a writeSpace method????
0573:                    escaper.writeMarkup(' ');
0574:                    writeNamespaceDeclaration(additionalPrefix, uri);
0575:                }
0576:
0577:            }
0578:
0579:            /**
0580:             * <p>
0581:             *   Writes a namespace declaration in the form
0582:             *   <code>xmlns:<i>prefix</i>="<i>uri</i>"</code> or 
0583:             *   <code>xmlns="<i>uri</i>"</code>. It does not write
0584:             *   the spaces on either side of the namespace declaration.
0585:             *   These are written by <code>writeNamespaceDeclarations</code>.
0586:             * </p>
0587:             * 
0588:             * @param prefix the namespace prefix; the empty string for the
0589:             *     default namespace
0590:             * @param uri the namespace URI
0591:             * 
0592:             * @throws IOException if the underlying output stream
0593:             *     encounters an I/O error
0594:             * @throws UnavailableCharacterException if the namespace prefix contains a 
0595:             *     character that is not available in the current encoding
0596:             */
0597:            protected void writeNamespaceDeclaration(String prefix, String uri)
0598:                    throws IOException {
0599:
0600:                if ("".equals(prefix)) {
0601:                    escaper.writeMarkup("xmlns");
0602:                } else {
0603:                    escaper.writeMarkup("xmlns:");
0604:                    escaper.writeMarkup(prefix);
0605:                }
0606:                escaper.writeMarkup("=\"");
0607:                escaper.writePCDATA(uri);
0608:                escaper.writeMarkup('\"');
0609:
0610:            }
0611:
0612:            /**
0613:             * <p>
0614:             *   Writes an attribute in the form 
0615:             *   <code><i>name</i>="<i>value</i>"</code>.
0616:             *   Characters in the attribute value are escaped as necessary.
0617:             * </p>
0618:             * 
0619:             * @param attribute the <code>Attribute</code> to write
0620:             * 
0621:             * @throws IOException if the underlying output stream
0622:             *     encounters an I/O error
0623:             * @throws UnavailableCharacterException if the attribute name contains a character 
0624:             *     that is not available in the current encoding
0625:             * 
0626:             */
0627:            protected void write(Attribute attribute) throws IOException {
0628:                escaper.writeMarkup(attribute.getQualifiedName());
0629:                escaper.writeMarkup("=\"");
0630:                escaper.writeAttributeValue(attribute.getValue());
0631:                escaper.writeMarkup('\"');
0632:            }
0633:
0634:            /**
0635:             * <p>
0636:             * Writes a comment onto the output stream using the current 
0637:             * options. Since character and entity references are not resolved
0638:             * in comments, comments can only be serialized when all
0639:             * characters they contain are available in the current 
0640:             * encoding.
0641:             * </p>
0642:             * 
0643:             * @param comment the <code>Comment</code> to serialize
0644:             * 
0645:             * @throws IOException if the underlying output stream 
0646:             *     encounters an I/O error
0647:             * @throws UnavailableCharacterException if the comment contains a 
0648:             *     character that is not available in the current encoding
0649:             */
0650:            protected void write(Comment comment) throws IOException {
0651:                if (escaper.isIndenting())
0652:                    escaper.breakLine();
0653:                escaper.writeMarkup("<!--");
0654:                escaper.writeMarkup(comment.getValue());
0655:                escaper.writeMarkup("-->");
0656:            }
0657:
0658:            /**
0659:             * <p>
0660:             * Writes a processing instruction
0661:             * onto the output stream using the current options.
0662:             * Since character and entity references are not resolved
0663:             * in processing instructions, processing instructions
0664:             * can only be serialized when all
0665:             * characters they contain are available in the current 
0666:             * encoding.
0667:             * </p>
0668:             * 
0669:             * @param instruction the <code>ProcessingInstruction</code> 
0670:             *     to serialize
0671:             * 
0672:             * @throws IOException if the underlying output stream
0673:             *     encounters an I/O error
0674:             * @throws UnavailableCharacterException if the comment contains a 
0675:             *     character that is not available in the current encoding
0676:             */
0677:            protected void write(ProcessingInstruction instruction)
0678:                    throws IOException {
0679:
0680:                if (escaper.isIndenting())
0681:                    escaper.breakLine();
0682:                escaper.writeMarkup("<?");
0683:                escaper.writeMarkup(instruction.getTarget());
0684:                String value = instruction.getValue();
0685:                // for canonical XML, only output a space after the target
0686:                // if there is a value
0687:                if (!"".equals(value)) {
0688:                    escaper.writeMarkup(' ');
0689:                    escaper.writeMarkup(value);
0690:                }
0691:                escaper.writeMarkup("?>");
0692:
0693:            }
0694:
0695:            /**
0696:             * <p>
0697:             * Writes a <code>Text</code> object
0698:             * onto the output stream using the current options.
0699:             * Reserved characters such as &lt;, &gt; and "
0700:             * are escaped using the standard entity references 
0701:             * such as <code>&amp;lt;</code>, <code>&amp;gt;</code>, 
0702:             * and <code>&amp;quot;</code>.
0703:             * </p>
0704:             * 
0705:             * <p>
0706:             * Characters which cannot be encoded in the current character set
0707:             * (for example, &Omega; in ISO-8859-1) are encoded using 
0708:             * character references. 
0709:             * </p> 
0710:             * 
0711:             * @param text the <code>Text</code> to serialize
0712:             * 
0713:             * @throws IOException if the underlying output stream
0714:             *     encounters an I/O error
0715:             */
0716:            protected void write(Text text) throws IOException {
0717:
0718:                // XXX Is there a shortcut that takes advantage of the
0719:                // data being stored in UTF-8 here? perhaps even if only
0720:                // when serializing to UTF-8?
0721:                String value = text.getValue();
0722:                if (text.isCDATASection() && value.indexOf("]]>") == -1) {
0723:                    if (!(escaper instanceof  UnicodeWriter)) {
0724:                        int length = value.length();
0725:                        for (int i = 0; i < length; i++) {
0726:                            if (escaper.needsEscaping(value.charAt(i))) {
0727:                                // can't use CDATA section
0728:                                escaper.writePCDATA(value);
0729:                                return;
0730:                            }
0731:                        }
0732:                    }
0733:                    escaper.writeMarkup("<![CDATA[");
0734:                    escaper.writeMarkup(value);
0735:                    escaper.writeMarkup("]]>");
0736:                }
0737:                // is this boundary whitespace we can ignore?
0738:                else if (isBoundaryWhitespace(text, value)) {
0739:                    return; // without writing node
0740:                } else {
0741:                    escaper.writePCDATA(value);
0742:                }
0743:
0744:            }
0745:
0746:            private boolean isBoundaryWhitespace(Text text, String value) {
0747:
0748:                if (getIndent() <= 0)
0749:                    return false;
0750:
0751:                ParentNode parent = text.getParent();
0752:                if (parent == null) {
0753:                    return "".equals(value.trim());
0754:                }
0755:
0756:                // ???? cutting next line only breaks a few tests; and what it does
0757:                // break might be better off if the breakage is accepted as correct behavior
0758:                int childCount = parent.getChildCount();
0759:                if (childCount == 1)
0760:                    return false;
0761:                if (!"".equals(value.trim()))
0762:                    return false;
0763:
0764:                // ???? This is a huge Hotspot. maybe 12% of serialization time
0765:                // when indenting. Is there any way to eliminate this?
0766:                // We only actually need to test a couple of positions, 0 and
0767:                // parent.getChildCount()-1
0768:                // Instead of getting position we could get those two elements and compare
0769:                // to the text. But you still need the previous and next
0770:                int position = parent.indexOf(text);
0771:
0772:                Node previous = null;
0773:                Node next = null;
0774:
0775:                if (position != 0)
0776:                    previous = parent.getChild(position - 1);
0777:                if (position != childCount - 1) {
0778:                    next = parent.getChild(position + 1);
0779:                }
0780:                if (previous == null || !previous.isText()) {
0781:                    if (next == null || !next.isText()) {
0782:                        return true;
0783:                    }
0784:                }
0785:
0786:                return false;
0787:
0788:            }
0789:
0790:            /**
0791:             * <p>
0792:             * Writes a <code>DocType</code> object
0793:             * onto the output stream using the current options.
0794:             * </p>
0795:             * 
0796:             * @param doctype the document type declaration to serialize
0797:             * 
0798:             * @throws IOException if the underlying 
0799:             *     output stream encounters an I/O error
0800:             * @throws UnavailableCharacterException if the document type   
0801:             *     declaration contains a character that is not available 
0802:             *     in the current encoding
0803:             */
0804:            protected void write(DocType doctype) throws IOException {
0805:
0806:                escaper.writeMarkup("<!DOCTYPE ");
0807:                escaper.writeMarkup(doctype.getRootElementName());
0808:                if (doctype.getPublicID() != null) {
0809:                    escaper.writeMarkup(" PUBLIC \"" + doctype.getPublicID()
0810:                            + "\" \"" + doctype.getSystemID() + "\"");
0811:                } else if (doctype.getSystemID() != null) {
0812:                    escaper.writeMarkup(" SYSTEM \"" + doctype.getSystemID()
0813:                            + "\"");
0814:                }
0815:
0816:                String internalDTDSubset = doctype.getInternalDTDSubset();
0817:                if (!internalDTDSubset.equals("")) {
0818:                    escaper.writeMarkup(" [");
0819:                    escaper.breakLine();
0820:                    escaper.setInDocType(true);
0821:                    escaper.writeMarkup(internalDTDSubset);
0822:                    escaper.setInDocType(false);
0823:                    escaper.writeMarkup("]");
0824:                }
0825:
0826:                escaper.writeMarkup(">");
0827:
0828:            }
0829:
0830:            /**
0831:             * <p>
0832:             * Writes a child node onto the output stream using the  
0833:             * current options. It is invoked when walking the tree to
0834:             * serialize the entire document. It is not called, and indeed
0835:             * should not be called, for either the <code>Document</code> 
0836:             * node or for attributes. 
0837:             * </p>
0838:             * 
0839:             * @param node the <code>Node</code> to serialize
0840:             * 
0841:             * @throws IOException if the underlying output stream
0842:             *     encounters an I/O error
0843:             * @throws XMLException if an <code>Attribute</code>, a 
0844:             *     <code>Document</code>, or <code>Namespace</code>
0845:             *     is passed to this method
0846:             */
0847:            protected void writeChild(Node node) throws IOException {
0848:
0849:                if (node.isElement()) {
0850:                    write((Element) node);
0851:                } else if (node.isText()) {
0852:                    write((Text) node);
0853:                } else if (node.isComment()) {
0854:                    write((Comment) node);
0855:                } else if (node.isProcessingInstruction()) {
0856:                    write((ProcessingInstruction) node);
0857:                } else if (node.isDocType()) {
0858:                    write((DocType) node);
0859:                } else {
0860:                    throw new XMLException("Cannot write a "
0861:                            + node.getClass().getName()
0862:                            + " from the writeChild() method");
0863:                }
0864:
0865:            }
0866:
0867:            /** <p>
0868:             * Writes a string onto the underlying output stream.
0869:             * Non-ASCII characters that are not available in the
0870:             * current character set are encoded with numeric character
0871:             * references. The three reserved characters &lt;, &gt;, and &amp; 
0872:             * are escaped using the standard entity references 
0873:             * <code>&amp;lt;</code>, <code>&amp;gt;</code>, 
0874:             * and <code>&amp;amp;</code>.
0875:             * Double and single quotes are not escaped.
0876:             * </p> 
0877:             * 
0878:             * @param text the parsed character data to serialize
0879:             * 
0880:             * @throws IOException if the underlying output stream 
0881:             *     encounters an I/O error
0882:             */
0883:            protected final void writeEscaped(String text) throws IOException {
0884:                escaper.writePCDATA(text);
0885:            }
0886:
0887:            /** <p>
0888:             *   Writes a string onto the underlying output stream.
0889:             *   Non-ASCII characters that are not available in the
0890:             *   current character set are escaped using hexadecimal numeric
0891:             *   character references. Carriage returns, line feeds, and tabs
0892:             *   are also escaped using hexadecimal numeric character 
0893:             *   references in order to ensure their preservation on a round
0894:             *   trip. The four reserved characters &lt;, &gt;, &amp;,  
0895:             *   and &quot; are escaped using the standard entity references 
0896:             *   <code>&amp;lt;</code>, <code>&amp;gt;</code>, 
0897:             *   <code>&amp;amp;</code>, and <code>&amp;quot;</code>. 
0898:             *   The single quote is not escaped. 
0899:             * </p> 
0900:             * 
0901:             * @param value the attribute value to serialize
0902:             * 
0903:             * @throws IOException if the underlying output stream 
0904:             *     encounters an I/O error
0905:             */
0906:            protected final void writeAttributeValue(String value)
0907:                    throws IOException {
0908:                escaper.writeAttributeValue(value);
0909:            }
0910:
0911:            /** <p>
0912:             *   Writes a string onto the underlying output stream.
0913:             *   without escaping any characters.
0914:             *   Non-ASCII characters that are not available in the
0915:             *   current character set cause an <code>IOException</code>.
0916:             * </p> 
0917:             * 
0918:             * @param text the <code>String</code> to serialize
0919:             * 
0920:             * @throws IOException if the underlying output stream
0921:             *     encounters an I/O error or <code>text</code> contains 
0922:             *     characters not available in the current character set
0923:             */
0924:            protected final void writeRaw(String text) throws IOException {
0925:                escaper.writeMarkup(text);
0926:            }
0927:
0928:            /** <p>
0929:             *   Writes the current line break string
0930:             *   onto the underlying output stream and indents
0931:             *   as specified by the current level and the indent property.
0932:             * </p> 
0933:             * 
0934:             * @throws IOException if the underlying output stream 
0935:             *     encounters an I/O error
0936:             */
0937:            protected final void breakLine() throws IOException {
0938:                escaper.breakLine();
0939:            }
0940:
0941:            /**
0942:             * <p>
0943:             * Flushes the data onto the output stream.
0944:             * It is not enough to flush the output stream.
0945:             * You must flush the serializer object itself because it
0946:             * uses some internal buffering.
0947:             * The serializer will flush the underlying output stream.
0948:             * </p>
0949:             * 
0950:             * @throws IOException  if the underlying  
0951:             *     output stream encounters an I/O error
0952:             */
0953:            public void flush() throws IOException {
0954:                escaper.flush();
0955:            }
0956:
0957:            /**
0958:             * <p>
0959:             * Returns the number of spaces this serializer indents.
0960:             * </p>
0961:             * 
0962:             * @return the number of spaces this serializer indents
0963:             *     each successive level beyond the previous one
0964:             */
0965:            public int getIndent() {
0966:                return escaper.getIndent();
0967:            }
0968:
0969:            /**
0970:             * <p>
0971:             * Sets the number of additional spaces to add to each successive
0972:             * level in the hierarchy. Use 0 for no extra indenting. The 
0973:             * maximum indentation is in limited to approximately half the
0974:             * maximum line length. The serializer will not indent further 
0975:             * than that no matter how many levels deep the hierarchy is.
0976:             * </p>
0977:             * 
0978:             * <p>
0979:             *   When this variable is set to a value greater than 0,
0980:             *   the serializer does not preserve white space. Spaces,
0981:             *   tabs, carriage returns, and line feeds can all be 
0982:             *   interchanged at the serializer's discretion, and additional
0983:             *   white space may be added before and after tags.
0984:             *   Carriage returns, line feeds, and tabs will not be 
0985:             *   escaped with numeric character references.
0986:             * </p>
0987:             * 
0988:             * <p>
0989:             *   Inside elements with an <code>xml:space="preserve"</code> 
0990:             *   attribute, white space is preserved and no indenting 
0991:             *   takes place, regardless of the setting of the indent
0992:             *   property, unless, of course, an 
0993:             *   <code>xml:space="default"</code> attribute overrides the
0994:             *   <code>xml:space="preserve"</code> attribute.
0995:             * </p>
0996:             * 
0997:             * <p>
0998:             *   The default value for indent is 0; that is, the default is
0999:             *   not to add or subtract any white space from the source
1000:             *   document.  
1001:             * </p>
1002:             * 
1003:             * @param indent the number of spaces to indent 
1004:             *      each successive level of the hierarchy
1005:             * 
1006:             * @throws IllegalArgumentException if indent is less than zero
1007:             * 
1008:             */
1009:            public void setIndent(int indent) {
1010:                if (indent < 0) {
1011:                    throw new IllegalArgumentException(
1012:                            "Indent cannot be negative");
1013:                }
1014:                escaper.setIndent(indent);
1015:            }
1016:
1017:            /**
1018:             * <p>
1019:             * Returns the string used as a line separator.
1020:             * This is always <code>"\n"</code>, <code>"\r"</code>, 
1021:             * or <code>"\r\n"</code>.
1022:             * </p>
1023:             * 
1024:             * @return the line separator
1025:             */
1026:            public String getLineSeparator() {
1027:                return escaper.getLineSeparator();
1028:            }
1029:
1030:            /**
1031:             * <p>
1032:             * Sets the line separator. This can only be one of the 
1033:             * three strings <code>"\n"</code>, <code>"\r"</code>, 
1034:             * or <code>"\r\n"</code>. All other values are forbidden.
1035:             * If this method is invoked, then 
1036:             * line separators in the character data will be changed to this
1037:             * string. Line separators in attribute values will be changed
1038:             * to the hexadecimal numeric character references corresponding
1039:             * to this string.
1040:             * </p>
1041:             * 
1042:             * <p>
1043:             *  The default line separator is <code>"\r\n"</code>. However, 
1044:             *  line separators in character data and attribute values are not 
1045:             *  changed to this string, unless this method is called first.
1046:             * </p>
1047:             * 
1048:             * @param lineSeparator the line separator to set
1049:             * 
1050:             * @throws IllegalArgumentException if you attempt to use any line
1051:             *    separator other than <code>"\n"</code>, <code>"\r"</code>, 
1052:             *    or <code>"\r\n"</code>.
1053:             * 
1054:             */
1055:            public void setLineSeparator(String lineSeparator) {
1056:                escaper.setLineSeparator(lineSeparator);
1057:            }
1058:
1059:            /**
1060:             * <p>
1061:             * Returns the preferred maximum line length.
1062:             * </p>
1063:             * 
1064:             * @return the preferred maximum line length.
1065:             */
1066:            public int getMaxLength() {
1067:                return escaper.getMaxLength();
1068:            }
1069:
1070:            /**
1071:             * <p>
1072:             * Sets the suggested maximum line length for this serializer.
1073:             * Setting this to 0 indicates that no automatic wrapping is to be
1074:             * performed. When a line approaches this length, the serializer 
1075:             * begins looking for opportunities to break the line. Generally 
1076:             * it will break on any ASCII white space character (tab, carriage 
1077:             * return, linefeed, and space). In some circumstances the 
1078:             * serializer may not be able to break the line before the maximum
1079:             * length is reached. For instance, if an element name is longer 
1080:             * than the maximum line length the only way to correctly 
1081:             * serialize it is to exceed the maximum line length. In this case,
1082:             *  the serializer will exceed the maximum line length.
1083:             * </p>
1084:             * 
1085:             * <p>
1086:             * The default value for maximum line length is 0, which is  
1087:             * interpreted as no maximum line length. 
1088:             * Setting this to a negative value just sets it to 0. 
1089:             * </p>
1090:             * 
1091:             * <p>
1092:             *   When this variable is set to a value greater than 0,
1093:             *   the serializer does not preserve white space. Spaces,
1094:             *   tabs, carriage returns, and line feeds can all be 
1095:             *   interchanged at the serializer's discretion.
1096:             *   Carriage returns, line feeds, and tabs will not be 
1097:             *   escaped with numeric character references.
1098:             * </p>
1099:             * 
1100:             * <p>
1101:             *   Inside elements with an <code>xml:space="preserve"</code> 
1102:             *   attribute, the maximum line length is not enforced, 
1103:             *   regardless of the setting of the this property, unless,  
1104:             *   of course, an <code>xml:space="default"</code> attribute 
1105:             *   overrides the <code>xml:space="preserve"</code> attribute.
1106:             * </p>
1107:             * 
1108:             * @param maxLength the preferred maximum line length
1109:             */
1110:            public void setMaxLength(int maxLength) {
1111:                escaper.setMaxLength(maxLength);
1112:            }
1113:
1114:            /**
1115:             * <p>
1116:             * Returns true if this serializer preserves the original
1117:             * base URIs by inserting extra <code>xml:base</code> attributes.
1118:             * </p>
1119:             * 
1120:             * @return true if this <code>Serializer</code> inserts
1121:             *    extra <code>xml:base</code> attributes to attempt to 
1122:             *    preserve base URI information from the document.
1123:             */
1124:            public boolean getPreserveBaseURI() {
1125:                return preserveBaseURI;
1126:            }
1127:
1128:            /**
1129:             * <p>
1130:             * Determines whether this serializer inserts
1131:             * extra <code>xml:base</code> attributes to attempt to 
1132:             * preserve base URI information from the document.
1133:             * The default is false, do not preserve base URI information.
1134:             * <code>xml:base</code> attributes that have been explicitly
1135:             * added to an element are always output. This property only  
1136:             * determines whether or not extra <code>xml:base</code> 
1137:             * attributes are added.
1138:             * </p>
1139:             * 
1140:             * @param preserve true if <code>xml:base</code> 
1141:             *     attributes should be added as necessary
1142:             *     to preserve base URI information 
1143:             */
1144:            public void setPreserveBaseURI(boolean preserve) {
1145:                this .preserveBaseURI = preserve;
1146:            }
1147:
1148:            /**
1149:             * <p>
1150:             *   Returns the name of the character encoding used by 
1151:             *   this serializer.
1152:             * </p>
1153:             * 
1154:             * @return the encoding used for the output document
1155:             */
1156:            public String getEncoding() {
1157:                return escaper.getEncoding();
1158:            }
1159:
1160:            /**
1161:             * <p>
1162:             *   If true, this property indicates serialization will
1163:             *   perform Unicode normalization on all data using normalization
1164:             *   form C (NFC). Performing Unicode normalization may change the
1165:             *   document's infoset. The default is false; do not normalize.
1166:             *   This version is based on Unicode 4.0. 
1167:             * </p>
1168:             * 
1169:             * <p>
1170:             *   This feature has not yet been benchmarked or optimized.
1171:             *   It may result in substantially slower code. 
1172:             * </p>
1173:             * 
1174:             * <p>
1175:             *   If all your data is in the first 256 code points of Unicode
1176:             *   (i.e. the ISO-8859-1, Latin-1 character set), then it's 
1177:             *   already in normalization form C and normalizing won't change
1178:             *   anything.
1179:             * </p>
1180:             * 
1181:             * @param normalize true if normalization is performed; 
1182:             *     false if it isn't
1183:             */
1184:            public void setUnicodeNormalizationFormC(boolean normalize) {
1185:                escaper.setNFC(normalize);
1186:            }
1187:
1188:            /**
1189:             * <p>
1190:             *   Indicates whether serialization will
1191:             *   perform Unicode normalization on all data using normalization
1192:             *   form C (NFC). The default is false; do not normalize.
1193:             * </p>
1194:             * 
1195:             * @return true if this serializer performs Unicode 
1196:             *     normalization; false if it doesn't
1197:             */
1198:            public boolean getUnicodeNormalizationFormC() {
1199:                return escaper.getNFC();
1200:            }
1201:
1202:            /**
1203:             * <p>
1204:             *   Returns the current column number of the output stream. This 
1205:             *   method useful for subclasses that implement their own pretty
1206:             *   printing strategies by inserting white space and line breaks 
1207:             *   at appropriate points.
1208:             * </p>
1209:             * 
1210:             * <p>
1211:             *   Columns are counted based on Unicode characters, not Java
1212:             *   chars. A surrogate pair counts as one character in this 
1213:             *   context, not two. However, a character followed by a 
1214:             *   combining character (e.g. e followed by combining accent
1215:             *   acute) counts as two characters. This latter choice
1216:             *   (treating combining characters like regular characters)
1217:             *   is under review, and may change in the future if it's not
1218:             *   too big a performance hit.
1219:             * </p>
1220:             * 
1221:             * @return the current column number
1222:             */
1223:            protected final int getColumnNumber() {
1224:                return escaper.getColumnNumber();
1225:            }
1226:
1227:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.