001: /*
002: * Copyright 2002-2008 Andy Clark
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.cyberneko.html;
018:
019: import org.cyberneko.html.filters.DefaultFilter;
020:
021: import java.io.OutputStream;
022: import java.io.OutputStreamWriter;
023: import java.io.PrintWriter;
024: import java.io.UnsupportedEncodingException;
025:
026: import org.apache.xerces.util.XMLStringBuffer;
027: import org.apache.xerces.xni.Augmentations;
028: import org.apache.xerces.xni.NamespaceContext;
029: import org.apache.xerces.xni.QName;
030: import org.apache.xerces.xni.XMLAttributes;
031: import org.apache.xerces.xni.XMLLocator;
032: import org.apache.xerces.xni.XMLString;
033: import org.apache.xerces.xni.XNIException;
034:
035: /**
036: * This class implements an filter to output "canonical" files for
037: * regression testing.
038: *
039: * @author Andy Clark
040: */
041: public class Writer extends DefaultFilter {
042:
043: //
044: // Data
045: //
046:
047: /** Writer. */
048: protected PrintWriter out = new PrintWriter(System.out);
049:
050: // temp vars
051:
052: /** String buffer for collecting text content. */
053: private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
054:
055: //
056: // Constructors
057: //
058:
059: /**
060: * Creates a writer to the standard output stream using UTF-8
061: * encoding.
062: */
063: public Writer() {
064: this (System.out);
065: } // <init>()
066:
067: /**
068: * Creates a writer with the specified output stream using UTF-8
069: * encoding.
070: */
071: public Writer(OutputStream stream) {
072: this (stream, "UTF8");
073: } // <init>(OutputStream)
074:
075: /** Creates a writer with the specified output stream and encoding. */
076: public Writer(OutputStream stream, String encoding) {
077: try {
078: out = new PrintWriter(new OutputStreamWriter(stream,
079: encoding), true);
080: } catch (UnsupportedEncodingException e) {
081: throw new RuntimeException("JVM must have " + encoding
082: + " decoder");
083: }
084: } // <init>(OutputStream,String)
085:
086: /** Creates a writer with the specified Java Writer. */
087: public Writer(java.io.Writer writer) {
088: out = new PrintWriter(writer);
089: } // <init>(java.io.Writer)
090:
091: //
092: // XMLDocumentHandler methods
093: //
094:
095: // since Xerces-J 2.2.0
096:
097: /** Start document. */
098: public void startDocument(XMLLocator locator, String encoding,
099: NamespaceContext nscontext, Augmentations augs)
100: throws XNIException {
101: fStringBuffer.clear();
102: } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
103:
104: // old methods
105:
106: /** Start document. */
107: public void startDocument(XMLLocator locator, String encoding,
108: Augmentations augs) throws XNIException {
109: startDocument(locator, encoding, null, augs);
110: } // startDocument(XMLLocator,String,Augmentations)
111:
112: /** XML declaration. */
113: public void xmlDecl(String version, String encoding,
114: String standalone, Augmentations augs) throws XNIException {
115: if (version != null) {
116: out.print("xversion ");
117: out.println(version);
118: }
119: if (encoding != null) {
120: out.print("xencoding ");
121: out.println(encoding);
122: }
123: if (standalone != null) {
124: out.print("xstandalone ");
125: out.println(standalone);
126: }
127: out.flush();
128: } // xmlDecl(String,String,String,Augmentations)
129:
130: /** Doctype declaration. */
131: public void doctypeDecl(String root, String pubid, String sysid,
132: Augmentations augs) throws XNIException {
133: chars();
134: out.print('!');
135: if (root != null) {
136: out.print(root);
137: }
138: out.println();
139: if (pubid != null) {
140: out.print('p');
141: out.print(pubid);
142: out.println();
143: }
144: if (sysid != null) {
145: out.print('s');
146: out.print(sysid);
147: out.println();
148: }
149: out.flush();
150: } // doctypeDecl(String,String,String,Augmentations)
151:
152: /** Processing instruction. */
153: public void processingInstruction(String target, XMLString data,
154: Augmentations augs) throws XNIException {
155: chars();
156: out.print('?');
157: out.print(target);
158: if (data != null && data.length > 0) {
159: out.print(' ');
160: print(data.toString());
161: }
162: out.println();
163: out.flush();
164: } // processingInstruction(String,XMLString,Augmentations)
165:
166: /** Comment. */
167: public void comment(XMLString text, Augmentations augs)
168: throws XNIException {
169: chars();
170: out.print('#');
171: print(text.toString());
172: out.println();
173: out.flush();
174: } // comment(XMLString,Augmentations)
175:
176: /** Start element. */
177: public void startElement(QName element, XMLAttributes attrs,
178: Augmentations augs) throws XNIException {
179: chars();
180: out.print('(');
181: out.print(element.rawname);
182: int acount = attrs != null ? attrs.getLength() : 0;
183: if (acount > 0) {
184: String[] anames = new String[acount];
185: String[] auris = new String[acount];
186: sortAttrNames(attrs, anames, auris);
187: for (int i = 0; i < acount; i++) {
188: String aname = anames[i];
189: out.println();
190: out.flush();
191: out.print('A');
192: if (auris[i] != null) {
193: out.print('{');
194: out.print(auris[i]);
195: out.print('}');
196: }
197: out.print(aname);
198: out.print(' ');
199: print(attrs.getValue(aname));
200: }
201: }
202: out.println();
203: out.flush();
204: } // startElement(QName,XMLAttributes,Augmentations)
205:
206: /** End element. */
207: public void endElement(QName element, Augmentations augs)
208: throws XNIException {
209: chars();
210: out.print(')');
211: out.print(element.rawname);
212: out.println();
213: out.flush();
214: } // endElement(QName,Augmentations)
215:
216: /** Empty element. */
217: public void emptyElement(QName element, XMLAttributes attrs,
218: Augmentations augs) throws XNIException {
219: startElement(element, attrs, augs);
220: endElement(element, augs);
221: } // emptyElement(QName,XMLAttributes,Augmentations)
222:
223: /** Characters. */
224: public void characters(XMLString text, Augmentations augs)
225: throws XNIException {
226: fStringBuffer.append(text);
227: } // characters(XMLString,Augmentations)
228:
229: /** Ignorable whitespace. */
230: public void ignorableWhitespace(XMLString text, Augmentations augs)
231: throws XNIException {
232: characters(text, augs);
233: } // ignorableWhitespace(XMLString,Augmentations)
234:
235: //
236: // Protected methods
237: //
238:
239: /** Prints collected characters. */
240: protected void chars() {
241: if (fStringBuffer.length == 0) {
242: return;
243: }
244: out.print('"');
245: print(fStringBuffer.toString());
246: out.println();
247: out.flush();
248: fStringBuffer.clear();
249: } // chars()
250:
251: /** Prints the specified string. */
252: protected void print(String s) {
253: int length = s != null ? s.length() : 0;
254: for (int i = 0; i < length; i++) {
255: char c = s.charAt(i);
256: switch (c) {
257: case '\n': {
258: out.print("\\n");
259: break;
260: }
261: case '\r': {
262: out.print("\\r");
263: break;
264: }
265: case '\t': {
266: out.print("\\t");
267: break;
268: }
269: case '\\': {
270: out.print("\\\\");
271: break;
272: }
273: default: {
274: out.print(c);
275: }
276: }
277: }
278: } // print(String)
279:
280: //
281: // Protected static methods
282: //
283:
284: /** Sorts the attribute names. */
285: protected static void sortAttrNames(XMLAttributes attrs,
286: String[] anames, String[] auris) {
287: for (int i = 0; i < anames.length; i++) {
288: anames[i] = attrs.getQName(i);
289: auris[i] = attrs.getURI(i);
290: }
291: // NOTE: This is super inefficient but it doesn't really matter. -Ac
292: for (int i = 0; i < anames.length - 1; i++) {
293: int index = i;
294: for (int j = i + 1; j < anames.length; j++) {
295: if (anames[j].compareTo(anames[index]) < 0) {
296: index = j;
297: }
298: }
299: if (index != i) {
300: String tn = anames[i];
301: anames[i] = anames[index];
302: anames[index] = tn;
303: String tu = auris[i];
304: auris[i] = auris[index];
305: auris[index] = tu;
306: }
307: }
308: } // sortAttrNames(XMLAttributes,String[])
309:
310: //
311: // MAIN
312: //
313:
314: /** Main program. */
315: public static void main(String[] argv) throws Exception {
316: org.apache.xerces.xni.parser.XMLDocumentFilter[] filters = { new Writer(), };
317: org.apache.xerces.xni.parser.XMLParserConfiguration parser = new org.cyberneko.html.HTMLConfiguration();
318: parser
319: .setProperty(
320: "http://cyberneko.org/html/properties/filters",
321: filters);
322: for (int i = 0; i < argv.length; i++) {
323: org.apache.xerces.xni.parser.XMLInputSource source = new org.apache.xerces.xni.parser.XMLInputSource(
324: null, argv[i], null);
325: parser.parse(source);
326: }
327: } // main(String[])
328:
329: } // class Writer
|