001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.wicket.util.io;
018:
019: import java.io.BufferedReader;
020: import java.io.IOException;
021: import java.io.InputStream;
022: import java.io.InputStreamReader;
023: import java.io.Reader;
024: import java.util.regex.Matcher;
025: import java.util.regex.Pattern;
026:
027: import org.apache.wicket.util.string.AppendingStringBuffer;
028:
029: /**
030: * This is a simple XmlReader. Its only purpose is to read the xml decl string
031: * from the input and apply proper character encoding to all subsequent
032: * characters. The xml decl string itself is removed from the output.
033: *
034: * @author Juergen Donnerstag
035: */
036: public final class XmlReader extends Reader {
037: /** Regex to find <?xml encoding ... ?> */
038: private static final Pattern xmlDecl = Pattern
039: .compile("[\\s\\n\\r]*<\\?xml(\\s+.*)?\\?>");
040:
041: /** Regex to find <?xml encoding ... ?> */
042: private static final Pattern encodingPattern = Pattern
043: .compile("\\s+encoding\\s*=\\s*([\"\'](.*?)[\"\']|(\\S*)).*\\?>");
044:
045: /** Null, if JVM default. Else from <?xml encoding=""> */
046: private String encoding;
047:
048: /** Null or if found in the markup, the whole <?xml ...?> string */
049: private String xmlDeclarationString;
050:
051: /** The input stream to read the data from */
052: private final InputStream inputStream;
053:
054: /** The reader which does the character encoding */
055: private Reader reader;
056:
057: /**
058: * Construct.
059: *
060: * @param inputStream
061: * The InputStream to read the xml data from
062: * @param defaultEncoding
063: * Default character encoding to use when not specified in XML declaration, specify null to use JVM default
064: * @throws IOException
065: * In case something went wrong while reading the data
066: */
067: public XmlReader(final InputStream inputStream,
068: final String defaultEncoding) throws IOException {
069: // The xml parser does not have a parent filter
070: super ();
071:
072: this .inputStream = inputStream;
073: this .encoding = defaultEncoding;
074:
075: if (inputStream == null) {
076: throw new IllegalArgumentException(
077: "Parameter 'inputStream' must not be null");
078: }
079:
080: init();
081: }
082:
083: /**
084: * Return the encoding used while reading the markup file.
085: *
086: * @return if null, then JVM default
087: */
088: public String getEncoding() {
089: return encoding;
090: }
091:
092: /**
093: * Return the XML declaration string, in case if found in the markup.
094: *
095: * @return Null, if not found.
096: */
097: public String getXmlDeclaration() {
098: return this .xmlDeclarationString;
099: }
100:
101: /**
102: * Reads and parses markup from a resource such as file.
103: *
104: * @throws IOException
105: */
106: public void init() throws IOException {
107: if (!this .inputStream.markSupported()) {
108: throw new IOException(
109: "The InputStream must support mark/reset");
110: }
111:
112: // read ahead buffer required for the first line of the markup
113: // (encoding)
114: final int readAheadSize = 80;
115: this .inputStream.mark(readAheadSize);
116:
117: // read-ahead the input stream and check if it starts with <?xml..?>.
118: if (getXmlDeclaration(this .inputStream, readAheadSize)) {
119: // If yes than determine the encoding from the xml decl
120: this .encoding = determineEncoding(this .xmlDeclarationString);
121: } else {
122: // If not, reset the input stream to the begining of the file
123: this .inputStream.reset();
124: }
125:
126: if (this .encoding == null) {
127: // Use JVM default
128: this .reader = new BufferedReader(new InputStreamReader(
129: this .inputStream));
130: } else {
131: // Use the encoding provided
132: this .reader = new BufferedReader(new InputStreamReader(
133: this .inputStream, this .encoding));
134: }
135: }
136:
137: /**
138: * Determine the encoding from the xml decl.
139: *
140: * @param string The xmlDecl string
141: * @return The encoding. Null, if not found
142: */
143: private final String determineEncoding(final String string) {
144: // Does the string match the <?xml .. ?> pattern
145: final Matcher matcher = encodingPattern.matcher(string);
146: if (!matcher.find()) {
147: // No
148: return null;
149: }
150:
151: // Extract the encoding
152: String encoding = matcher.group(2);
153: if ((encoding == null) || (encoding.length() == 0)) {
154: encoding = matcher.group(3);
155: }
156:
157: if (encoding != null) {
158: encoding = encoding.trim();
159: }
160:
161: return encoding;
162: }
163:
164: /**
165: * Read-ahead the input stream (markup file). If the first line contains
166: * <?xml...?>, than remember the xml decl for later to determine the
167: * encoding.
168: * <p>
169: * The xml decl will not be forwarded to the user.
170: *
171: * @param in
172: * The markup file
173: * @param readAheadSize
174: * The read ahead buffer available to read the xml encoding
175: * information
176: * @return true, if <?xml ..?> has been found
177: * @throws IOException
178: */
179: private final boolean getXmlDeclaration(final InputStream in,
180: final int readAheadSize) throws IOException {
181: // Max one line
182: final AppendingStringBuffer pushBack = new AppendingStringBuffer(
183: readAheadSize);
184:
185: // The current char from the markup file
186: int value;
187: while ((value = in.read()) != -1) {
188: pushBack.append((char) value);
189:
190: // Stop at the end of the first tag or end of line. If it is HTML
191: // without newlines, stop after X bytes (= characters)
192: if ((value == '>') || (value == '\n') || (value == '\r')
193: || (pushBack.length() >= (readAheadSize - 1))) {
194: break;
195: }
196: }
197:
198: // Does the string match the <?xml .. ?> pattern
199: final Matcher matcher = xmlDecl.matcher(pushBack);
200: if (!matcher.matches()) {
201: // No
202: return false;
203: }
204:
205: // Save the whole <?xml ..> string for later
206: this .xmlDeclarationString = pushBack.toString().trim();
207: return true;
208: }
209:
210: /**
211: * @see java.io.Reader#close()
212: */
213: public void close() throws IOException {
214: this .reader.close();
215: this .inputStream.close();
216: }
217:
218: /**
219: * @see java.io.Reader#read(char[], int, int)
220: */
221: public int read(char[] buf, int from, int to) throws IOException {
222: return this .reader.read(buf, from, to);
223: }
224:
225: /**
226: * @return The markup to be parsed
227: */
228: public String toString() {
229: return this .inputStream.toString() + " (" + this .encoding + ")";
230: }
231: }
|