001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.modules.xml.xdm.nodes;
043:
044: import java.io.IOException;
045: import java.util.List;
046: import java.util.ArrayList;
047: import java.util.Stack;
048: import javax.swing.text.BadLocationException;
049: import org.netbeans.editor.BaseDocument;
050: import org.netbeans.api.lexer.TokenSequence;
051: import org.netbeans.api.lexer.TokenHierarchy;
052: import org.netbeans.api.xml.lexer.XMLTokenId;
053:
054: public class XMLSyntaxParser {
055:
056: public Document parse(BaseDocument basedoc) throws IOException,
057: BadLocationException {
058: // create the core model
059: Stack<NodeImpl> stack = new Stack<NodeImpl>();
060: Document doc = new Document();
061: stack.push(doc);
062: NodeImpl currentNode = doc;
063: List<Token> currentTokens = new ArrayList<Token>();
064:
065: TokenHierarchy th = TokenHierarchy.get(basedoc);
066: TokenSequence<XMLTokenId> tokenSequence = th.tokenSequence();
067: org.netbeans.api.lexer.Token<XMLTokenId> token = tokenSequence
068: .token();
069: // Add the text token, if any, before xml decalration to document node
070: if (token != null && token.id() == XMLTokenId.TEXT) {
071: currentTokens.add(Token.create(token.text().toString(),
072: TokenType.TOKEN_CHARACTER_DATA));
073: if (tokenSequence.moveNext()) {
074: token = tokenSequence.token();
075: }
076: // if the xml decalration is not there assign this token to document
077: if (token.id() != XMLTokenId.PI_START) {
078: currentNode.setTokens(new ArrayList<Token>(
079: currentTokens));
080: currentTokens.clear();
081: }
082: }
083:
084: while (tokenSequence.moveNext()) {
085: token = tokenSequence.token();
086: XMLTokenId tokenId = token.id();
087: String image = token.text().toString();
088: TokenType tokenType = TokenType.TOKEN_WHITESPACE;
089: switch (tokenId) {
090: case PI_START: {
091: tokenType = TokenType.TOKEN_PI_START_TAG;
092: currentTokens.add(Token.create(image, tokenType));
093: break;
094: }
095: case PI_TARGET: {
096: tokenType = TokenType.TOKEN_PI_NAME;
097: currentTokens.add(Token.create(image, tokenType));
098: break;
099: }
100: case PI_CONTENT: {
101: tokenType = TokenType.TOKEN_PI_VAL;
102: currentTokens.add(Token.create(image, tokenType));
103: break;
104: }
105: case PI_END: {
106: tokenType = TokenType.TOKEN_PI_END_TAG;
107: currentTokens.add(Token.create(image, tokenType));
108: if (currentNode instanceof Document) {
109: if (tokenSequence.moveNext()) {
110: org.netbeans.api.lexer.Token t = tokenSequence
111: .token();
112: if (t.id() == XMLTokenId.TEXT) {
113: currentTokens.add(Token.create(t.text()
114: .toString(),
115: TokenType.TOKEN_CHARACTER_DATA));
116: } else {
117: tokenSequence.movePrevious();
118: }
119: }
120: if (stack.peek() != currentNode)
121: stack.push(currentNode);
122: }
123: List<Token> list = new ArrayList<Token>(currentNode
124: .getTokens());
125: list.addAll(currentTokens);
126: currentNode.setTokens(list);
127: currentTokens.clear();
128: break;
129: }
130: case TAG: {
131: int len = image.length();
132: if (image.charAt(len - 1) == '>') {
133: Token endToken = Token.create(image,
134: TokenType.TOKEN_ELEMENT_END_TAG);
135: if (len == 2) {
136: currentNode = stack.pop();
137: endToken = Token.create(image,
138: TokenType.TOKEN_ELEMENT_END_TAG);
139: } else if (!(currentNode instanceof Element)) {
140: currentNode = stack.peek();
141: }
142: currentTokens.add(endToken);
143: currentNode.getTokensForWrite().addAll(
144: currentTokens);
145: currentTokens.clear();
146: } else {
147: tokenType = TokenType.TOKEN_ELEMENT_START_TAG;
148: if (image.startsWith("</")) {
149: currentNode = stack.pop();
150: if (!currentNode.getTokens().get(0).getValue()
151: .substring(1)
152: .equals(image.substring(2))) {
153: throw new IOException(
154: "Invalid token '"
155: + image
156: + "' found in document: "
157: + "Please use the text editor to resolve the issues...");
158: } else {//check for invalid endtag: <a></a
159: String saveTokenImage = image;
160: currentTokens.add(Token.create(image,
161: tokenType));
162: tokenSequence.moveNext();
163: org.netbeans.api.lexer.Token<XMLTokenId> t = tokenSequence
164: .token();
165: while (t != null) {
166: if (t.id() != XMLTokenId.WS) {
167: tokenSequence.movePrevious();
168: break;
169: }
170: tokenType = TokenType.TOKEN_WHITESPACE;
171: currentTokens.add(Token.create(t.text()
172: .toString(), tokenType));
173: if (!tokenSequence.moveNext())
174: break;
175: t = tokenSequence.token();
176: }
177: if (t == null
178: || !t.text().toString().equals(">"))
179: throw new IOException(
180: "Invalid token '"
181: + saveTokenImage
182: + "' does not end with '>': Please use the "
183: + "text editor to resolve the issues...");
184: continue;
185: }
186: } else {
187: currentNode = new Element();
188: Node parent = stack.peek();
189: parent.appendChild(currentNode);
190: stack.push(currentNode);
191: currentTokens.add(Token
192: .create(image, tokenType));
193: currentNode.setTokens(new ArrayList<Token>(
194: currentTokens));
195: currentTokens.clear();
196: }
197: }
198: break;
199: }
200: case ARGUMENT: //attribute of an element
201: {
202: tokenType = TokenType.TOKEN_ATTR_NAME;
203: currentNode = new Attribute();
204: Element parent = (Element) stack.peek();
205: parent.appendAttribute((Attribute) currentNode);
206: currentTokens.add(Token.create(image, tokenType));
207: break;
208: }
209: case VALUE: {
210: // tokenSequence.moveNext();
211: // org.netbeans.api.lexer.Token t = tokenSequence.token();
212: // while(t.id() == XMLTokenId.VALUE || t.id() == XMLTokenId.CHARACTER) {
213: // image = image.concat(t.text().toString());
214: // tokenSequence.moveNext();
215: // t = tokenSequence.token();
216: // }
217: tokenType = TokenType.TOKEN_ATTR_VAL;
218: while (tokenSequence.moveNext()) {
219: org.netbeans.api.lexer.Token t = tokenSequence
220: .token();
221: if (t.id() != XMLTokenId.VALUE
222: && t.id() != XMLTokenId.CHARACTER) {
223: tokenSequence.movePrevious();
224: break;
225: }
226: image = image.concat(t.text().toString());
227: }
228: currentTokens.add(Token.create(image, tokenType));
229: currentNode.setTokens(new ArrayList<Token>(
230: currentTokens));
231: currentTokens.clear();
232: break;
233: }
234: case BLOCK_COMMENT: {
235: tokenType = TokenType.TOKEN_COMMENT;
236: Node parent = stack.peek();
237: currentTokens.add(Token.create(image, tokenType));
238: if (image.endsWith(Token.COMMENT_END.getValue())) {
239: String combinedString = combineString(currentTokens);
240: Comment comment = new Comment(combinedString);
241: if (parent instanceof Element) {
242: ((Element) parent).appendChild(comment, false);
243: } else {//parent is Document
244: if (token.id() != XMLTokenId.BLOCK_COMMENT
245: && token.text().toString().trim()
246: .length() > 0) {
247: throw new IOException(
248: "Invalid token '"
249: + token.text()
250: + "' found in document: "
251: + "Please use the text editor to resolve the issues...");
252: }
253: parent.appendChild(comment);
254: }
255: currentTokens.clear();
256: }
257: break;
258: }
259: case TEXT:
260: case CHARACTER: {
261: tokenType = TokenType.TOKEN_CHARACTER_DATA;
262: currentNode = new Text();
263: currentTokens.add(Token.create(image, tokenType));
264: if (token.id() == XMLTokenId.TEXT) {
265: while (tokenSequence.moveNext()) {
266: org.netbeans.api.lexer.Token t = tokenSequence
267: .token();
268: if (t.id() != XMLTokenId.TEXT
269: && t.id() != XMLTokenId.CHARACTER) {
270: tokenSequence.movePrevious();
271: break;
272: }
273: currentTokens.add(Token.create(t.text()
274: .toString(), tokenType));
275: }
276: }
277: currentNode.setTokens(new ArrayList<Token>(
278: currentTokens));
279: Node parent = stack.peek();
280: if (parent instanceof Element) {
281: ((Element) parent).appendChild(currentNode, false);
282: } else {//parent is Document
283: if (token.id() != XMLTokenId.BLOCK_COMMENT
284: && token.text().toString().trim().length() > 0) {
285: throw new IOException(
286: "Invalid token '"
287: + token.text()
288: + "' found in document: "
289: + "Please use the text editor to resolve the issues...");
290: }
291: parent.appendChild(currentNode);
292: }
293: currentTokens.clear();
294: break;
295: }
296: case WS: {
297: tokenType = TokenType.TOKEN_WHITESPACE;
298: currentTokens.add(Token.create(image, tokenType));
299: break;
300: }
301: case OPERATOR: {
302: tokenType = TokenType.TOKEN_ATTR_EQUAL;
303: currentTokens.add(Token.create(image, tokenType));
304: break;
305: }
306: case DECLARATION: {
307: tokenType = TokenType.TOKEN_DTD_VAL;
308: currentTokens.add(Token.create(image, tokenType));
309: while (tokenSequence.moveNext()) {
310: org.netbeans.api.lexer.Token<XMLTokenId> t = tokenSequence
311: .token();
312: XMLTokenId id = t.id();
313: if (id != XMLTokenId.DECLARATION
314: && id != XMLTokenId.VALUE) {
315: tokenSequence.movePrevious();
316: break;
317: }
318: currentTokens.add(Token.create(t.text().toString(),
319: tokenType));
320: }
321: break;
322: }
323: case CDATA_SECTION: {
324: Node parent = stack.peek();
325: CData cdata = new CData(image);
326: if (parent instanceof Element) {
327: ((Element) parent).appendChild(cdata, false);
328: } else {//parent is Document
329: throw new IOException(
330: "CDATA is not valid as direct child of document"
331: + "Please use the text editor to resolve the issues...");
332: }
333: tokenType = TokenType.TOKEN_CDATA_VAL;
334: break;
335: }
336: case ERROR:
337: case EOL:
338: default:
339: throw new IOException(
340: "Invalid token found in document: "
341: + "Please use the text editor to resolve the issues...");
342: }
343: }
344: Node result = stack.pop();
345: if (result instanceof Document) {
346: return (Document) result;
347: } else {
348: throw new IOException(
349: "Document not well formed/Invalid: "
350: + "Please use the text editor to resolve the issues...");
351: }
352: }
353:
354: private boolean isValid(org.netbeans.api.lexer.Token token)
355: throws IOException {
356: if (token != null && token.id() != null)
357: return true;
358: else
359: throw new IOException(
360: "Document parsed is invalid: Please use the text "
361: + "editor to resolve the issues...");
362: }
363:
364: private String combineString(List<Token> tokens) {
365: StringBuilder sb = new StringBuilder();
366: for (Token t : tokens) {
367: sb.append(t.getValue());
368: }
369: return sb.toString();
370: }
371: }
|