001: /*
002: * Copyright 2002-2007 the original author or authors.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.springframework.util.xml;
018:
019: import java.io.BufferedReader;
020: import java.io.CharConversionException;
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.InputStreamReader;
024:
025: import org.springframework.util.StringUtils;
026:
027: /**
028: * Detects whether an XML stream is using DTD- or XSD-based validation.
029: *
030: * @author Rob Harrop
031: * @author Juergen Hoeller
032: * @since 2.0
033: */
034: public class XmlValidationModeDetector {
035:
036: /**
037: * Indicates that the validation should be disabled.
038: */
039: public static final int VALIDATION_NONE = 0;
040:
041: /**
042: * Indicates that the validation mode should be auto-guessed, since we cannot find
043: * a clear indication (probably choked on some special characters, or the like).
044: */
045: public static final int VALIDATION_AUTO = 1;
046:
047: /**
048: * Indicates that DTD validation should be used (we found a "DOCTYPE" declaration).
049: */
050: public static final int VALIDATION_DTD = 2;
051:
052: /**
053: * Indicates that XSD validation should be used (found no "DOCTYPE" declaration).
054: */
055: public static final int VALIDATION_XSD = 3;
056:
057: /**
058: * The token in a XML document that declares the DTD to use for validation
059: * and thus that DTD validation is being used.
060: */
061: private static final String DOCTYPE = "DOCTYPE";
062:
063: /**
064: * The token that indicates the start of an XML comment.
065: */
066: private static final String START_COMMENT = "<!--";
067:
068: /**
069: * The token that indicates the end of an XML comment.
070: */
071: private static final String END_COMMENT = "-->";
072:
073: /**
074: * Indicates whether or not the current parse position is inside an XML comment.
075: */
076: private boolean inComment;
077:
078: /**
079: * Detect the validation mode for the XML document in the supplied {@link InputStream}.
080: * Note that the supplied {@link InputStream} is closed by this method before returning.
081: * @param inputStream the InputStream to parse
082: * @throws IOException in case of I/O failure
083: * @see #VALIDATION_DTD
084: * @see #VALIDATION_XSD
085: */
086: public int detectValidationMode(InputStream inputStream)
087: throws IOException {
088: // Peek into the file to look for DOCTYPE.
089: BufferedReader reader = new BufferedReader(
090: new InputStreamReader(inputStream));
091: try {
092: boolean isDtdValidated = false;
093: String content;
094: while ((content = reader.readLine()) != null) {
095: content = consumeCommentTokens(content);
096: if (this .inComment || !StringUtils.hasText(content)) {
097: continue;
098: }
099: if (hasDoctype(content)) {
100: isDtdValidated = true;
101: break;
102: }
103: if (hasOpeningTag(content)) {
104: // End of meaningful data...
105: break;
106: }
107: }
108: return (isDtdValidated ? VALIDATION_DTD : VALIDATION_XSD);
109: } catch (CharConversionException ex) {
110: // Choked on some character encoding...
111: // Leave the decision up to the caller.
112: return VALIDATION_AUTO;
113: } finally {
114: reader.close();
115: }
116: }
117:
118: /**
119: * Does the content contain the the DTD DOCTYPE declaration?
120: */
121: private boolean hasDoctype(String content) {
122: return (content.indexOf(DOCTYPE) > -1);
123: }
124:
125: /**
126: * Does the supplied content contain an XML opening tag. If the parse state is currently
127: * in an XML comment then this method always returns false. It is expected that all comment
128: * tokens will have consumed for the supplied content before passing the remainder to this method.
129: */
130: private boolean hasOpeningTag(String content) {
131: if (this .inComment) {
132: return false;
133: }
134: int openTagIndex = content.indexOf('<');
135: return (openTagIndex > -1 && content.length() > openTagIndex && Character
136: .isLetter(content.charAt(openTagIndex + 1)));
137: }
138:
139: /**
140: * Consumes all the leading comment data in the given String and returns the remaining content, which
141: * may be empty since the supplied content might be all comment data. For our purposes it is only important
142: * to strip leading comment content on a line since the first piece of non comment content will be either
143: * the DOCTYPE declaration or the root element of the document.
144: */
145: private String consumeCommentTokens(String line) {
146: if (line.indexOf(START_COMMENT) == -1
147: && line.indexOf(END_COMMENT) == -1) {
148: return line;
149: }
150: while ((line = consume(line)) != null) {
151: if (!this .inComment
152: && !line.trim().startsWith(START_COMMENT)) {
153: return line;
154: }
155: }
156: return line;
157: }
158:
159: /**
160: * Consume the next comment token, update the "inComment" flag
161: * and return the remaining content.
162: */
163: private String consume(String line) {
164: int index = (this .inComment ? endComment(line)
165: : startComment(line));
166: return (index == -1 ? null : line.substring(index));
167: }
168:
169: /**
170: * Try to consume the {@link #START_COMMENT} token.
171: * @see #commentToken(String, String, boolean)
172: */
173: private int startComment(String line) {
174: return commentToken(line, START_COMMENT, true);
175: }
176:
177: private int endComment(String line) {
178: return commentToken(line, END_COMMENT, false);
179: }
180:
181: /**
182: * Try to consume the supplied token against the supplied content and update the
183: * in comment parse state to the supplied value. Returns the index into the content
184: * which is after the token or -1 if the token is not found.
185: */
186: private int commentToken(String line, String token,
187: boolean inCommentIfPresent) {
188: int index = line.indexOf(token);
189: if (index > -1) {
190: this .inComment = inCommentIfPresent;
191: }
192: return (index == -1 ? index : index + token.length());
193: }
194:
195: }
|