001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.xerces.impl;
019:
020: import java.io.IOException;
021:
022: import org.apache.xerces.util.SymbolTable;
023: import org.apache.xerces.util.XML11Char;
024: import org.apache.xerces.util.XMLChar;
025: import org.apache.xerces.util.XMLStringBuffer;
026: import org.apache.xerces.xni.XMLString;
027: import org.apache.xerces.xni.XNIException;
028:
029: /**
030: * This class is responsible for scanning the declarations found
031: * in the internal and external subsets of a DTD in an XML document.
032: * The scanner acts as the sources for the DTD information which is
033: * communicated to the DTD handlers.
034: * <p>
035: * This component requires the following features and properties from the
036: * component manager that uses it:
037: * <ul>
038: * <li>http://xml.org/sax/features/validation</li>
039: * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
040: * <li>http://apache.org/xml/properties/internal/symbol-table</li>
041: * <li>http://apache.org/xml/properties/internal/error-reporter</li>
042: * <li>http://apache.org/xml/properties/internal/entity-manager</li>
043: * </ul>
044: *
045: * @xerces.internal
046: *
047: * @author Arnaud Le Hors, IBM
048: * @author Andy Clark, IBM
049: * @author Glenn Marcy, IBM
050: * @author Eric Ye, IBM
051: *
052: * @version $Id: XML11DTDScannerImpl.java 572055 2007-09-02 17:55:43Z mrglavas $
053: */
054: public class XML11DTDScannerImpl extends XMLDTDScannerImpl {
055:
056: /** String buffer. */
057: private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
058:
059: //
060: // Constructors
061: //
062:
063: /** Default constructor. */
064: public XML11DTDScannerImpl() {
065: super ();
066: } // <init>()
067:
068: /** Constructor for he use of non-XMLComponentManagers. */
069: public XML11DTDScannerImpl(SymbolTable symbolTable,
070: XMLErrorReporter errorReporter,
071: XMLEntityManager entityManager) {
072: super (symbolTable, errorReporter, entityManager);
073: }
074:
075: //
076: // XMLDTDScanner methods
077: //
078:
079: //
080: // XMLScanner methods
081: //
082: // NOTE: this is a carbon copy of the code in XML11DocumentScannerImpl;
083: // we need to override these methods in both places. Ah for
084: // multiple inheritance...
085: // This needs to be refactored!!! - NG
086: /**
087: * Scans public ID literal.
088: *
089: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
090: * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
091: *
092: * The returned string is normalized according to the following rule,
093: * from http://www.w3.org/TR/REC-xml#dt-pubid:
094: *
095: * Before a match is attempted, all strings of white space in the public
096: * identifier must be normalized to single space characters (#x20), and
097: * leading and trailing white space must be removed.
098: *
099: * @param literal The string to fill in with the public ID literal.
100: * @return True on success.
101: *
102: * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
103: * the time of calling is lost.
104: */
105: protected boolean scanPubidLiteral(XMLString literal)
106: throws IOException, XNIException {
107: int quote = fEntityScanner.scanChar();
108: if (quote != '\'' && quote != '"') {
109: reportFatalError("QuoteRequiredInPublicID", null);
110: return false;
111: }
112:
113: fStringBuffer.clear();
114: // skip leading whitespace
115: boolean skipSpace = true;
116: boolean dataok = true;
117: while (true) {
118: int c = fEntityScanner.scanChar();
119: // REVISIT: it could really only be \n or 0x20; all else is normalized, no? - neilg
120: if (c == ' ' || c == '\n' || c == '\r' || c == 0x85
121: || c == 0x2028) {
122: if (!skipSpace) {
123: // take the first whitespace as a space and skip the others
124: fStringBuffer.append(' ');
125: skipSpace = true;
126: }
127: } else if (c == quote) {
128: if (skipSpace) {
129: // if we finished on a space let's trim it
130: fStringBuffer.length--;
131: }
132: literal.setValues(fStringBuffer);
133: break;
134: } else if (XMLChar.isPubid(c)) {
135: fStringBuffer.append((char) c);
136: skipSpace = false;
137: } else if (c == -1) {
138: reportFatalError("PublicIDUnterminated", null);
139: return false;
140: } else {
141: dataok = false;
142: reportFatalError("InvalidCharInPublicID",
143: new Object[] { Integer.toHexString(c) });
144: }
145: }
146: return dataok;
147: }
148:
149: /**
150: * Normalize whitespace in an XMLString converting all whitespace
151: * characters to space characters.
152: */
153: protected void normalizeWhitespace(XMLString value) {
154: int end = value.offset + value.length;
155: for (int i = value.offset; i < end; ++i) {
156: int c = value.ch[i];
157: if (XMLChar.isSpace(c)) {
158: value.ch[i] = ' ';
159: }
160: }
161: }
162:
163: /**
164: * Normalize whitespace in an XMLString converting all whitespace
165: * characters to space characters.
166: */
167: protected void normalizeWhitespace(XMLString value, int fromIndex) {
168: int end = value.offset + value.length;
169: for (int i = value.offset + fromIndex; i < end; ++i) {
170: int c = value.ch[i];
171: if (XMLChar.isSpace(c)) {
172: value.ch[i] = ' ';
173: }
174: }
175: }
176:
177: /**
178: * Checks whether this string would be unchanged by normalization.
179: *
180: * @return -1 if the value would be unchanged by normalization,
181: * otherwise the index of the first whitespace character which
182: * would be transformed.
183: */
184: protected int isUnchangedByNormalization(XMLString value) {
185: int end = value.offset + value.length;
186: for (int i = value.offset; i < end; ++i) {
187: int c = value.ch[i];
188: if (XMLChar.isSpace(c)) {
189: return i - value.offset;
190: }
191: }
192: return -1;
193: }
194:
195: // returns true if the given character is not
196: // valid with respect to the version of
197: // XML understood by this scanner.
198: protected boolean isInvalid(int value) {
199: return (!XML11Char.isXML11Valid(value));
200: } // isInvalid(int): boolean
201:
202: // returns true if the given character is not
203: // valid or may not be used outside a character reference
204: // with respect to the version of XML understood by this scanner.
205: protected boolean isInvalidLiteral(int value) {
206: return (!XML11Char.isXML11ValidLiteral(value));
207: } // isInvalidLiteral(int): boolean
208:
209: // returns true if the given character is
210: // a valid nameChar with respect to the version of
211: // XML understood by this scanner.
212: protected boolean isValidNameChar(int value) {
213: return (XML11Char.isXML11Name(value));
214: } // isValidNameChar(int): boolean
215:
216: // returns true if the given character is
217: // a valid nameStartChar with respect to the version of
218: // XML understood by this scanner.
219: protected boolean isValidNameStartChar(int value) {
220: return (XML11Char.isXML11NameStart(value));
221: } // isValidNameStartChar(int): boolean
222:
223: // returns true if the given character is
224: // a valid NCName character with respect to the version of
225: // XML understood by this scanner.
226: protected boolean isValidNCName(int value) {
227: return (XML11Char.isXML11NCName(value));
228: } // isValidNCName(int): boolean
229:
230: // returns true if the given character is
231: // a valid high surrogate for a nameStartChar
232: // with respect to the version of XML understood
233: // by this scanner.
234: protected boolean isValidNameStartHighSurrogate(int value) {
235: return XML11Char.isXML11NameHighSurrogate(value);
236: } // isValidNameStartHighSurrogate(int): boolean
237:
238: // note that, according to 4.3.4 of the XML 1.1 spec, XML 1.1
239: // documents may invoke 1.0 entities; thus either version decl (or none!)
240: // is allowed to appear in this context
241: protected boolean versionSupported(String version) {
242: return version.equals("1.1") || version.equals("1.0");
243: } // versionSupported(String): boolean
244:
245: // returns the error message key for unsupported
246: // versions of XML with respect to the version of
247: // XML understood by this scanner.
248: protected String getVersionNotSupportedKey() {
249: return "VersionNotSupported11";
250: } // getVersionNotSupportedKey: String
251:
252: } // class XML11DTDScannerImpl
|