001: /*
002: * XmlFormatter.java
003: *
004: * Copyright (C) 1998-2003 Peter Graves
005: * $Id: XmlFormatter.java,v 1.2 2003/06/29 17:34:01 piso Exp $
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License
009: * as published by the Free Software Foundation; either version 2
010: * of the License, or (at your option) any later version.
011: *
012: * This program is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
015: * GNU General Public License for more details.
016: *
017: * You should have received a copy of the GNU General Public License
018: * along with this program; if not, write to the Free Software
019: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
020: */
021:
022: package org.armedbear.j;
023:
024: public final class XmlFormatter extends Formatter {
025: private static final byte XML_FORMAT_TEXT = 0;
026: private static final byte XML_FORMAT_COMMENT = 1;
027: private static final byte XML_FORMAT_DELIMITER = 2;
028: private static final byte XML_FORMAT_NAMESPACE = 3;
029: private static final byte XML_FORMAT_TAG = 4;
030: private static final byte XML_FORMAT_ATTRIBUTE = 5;
031: private static final byte XML_FORMAT_EQUALS = 6;
032: private static final byte XML_FORMAT_QUOTE = 7;
033:
034: private static final byte STATE_NAMESPACE = STATE_LAST + 1;
035: private static final byte STATE_TAG_STARTING = STATE_LAST + 2;
036: private static final byte STATE_TAG_ENDING = STATE_LAST + 3;
037: private static final byte STATE_ATTRIBUTE = STATE_LAST + 4;
038: private static final byte STATE_EQUALS = STATE_LAST + 5;
039:
040: private FastStringBuffer sb = new FastStringBuffer();
041:
042: public XmlFormatter(Buffer buffer) {
043: this .buffer = buffer;
044: }
045:
046: private void endToken(int state) {
047: if (sb.length() > 0) {
048: byte format;
049: switch (state) {
050: case STATE_COMMENT:
051: format = XML_FORMAT_COMMENT;
052: break;
053: case STATE_TAG_STARTING:
054: case STATE_TAG_ENDING:
055: format = XML_FORMAT_DELIMITER;
056: break;
057: case STATE_EQUALS:
058: format = XML_FORMAT_EQUALS;
059: break;
060: case STATE_NAMESPACE:
061: format = XML_FORMAT_NAMESPACE;
062: break;
063: case STATE_TAG:
064: format = XML_FORMAT_TAG;
065: break;
066: case STATE_ATTRIBUTE:
067: format = XML_FORMAT_ATTRIBUTE;
068: break;
069: case STATE_QUOTE:
070: case STATE_SINGLEQUOTE:
071: format = XML_FORMAT_QUOTE;
072: break;
073: case STATE_NEUTRAL:
074: default:
075: format = XML_FORMAT_TEXT;
076: break;
077: }
078: addSegment(sb.toString(), format);
079: sb.setLength(0);
080: }
081: }
082:
083: public LineSegmentList formatLine(Line line) {
084: clearSegmentList();
085: if (line != null)
086: parseLine(line);
087: else
088: addSegment("", XML_FORMAT_TEXT);
089: return segmentList;
090: }
091:
092: private void parseLine(Line line) {
093: final String text = getDetabbedText(line);
094: int state = line.flags();
095: sb.setLength(0);
096: int i = 0;
097: final int limit = text.length();
098: while (i < limit) {
099: char c = text.charAt(i);
100: if (state == STATE_COMMENT) {
101: if (i < limit - 2
102: && text.substring(i, i + 3).equals("-->")) {
103: sb.append("-->");
104: endToken(state);
105: state = STATE_NEUTRAL;
106: i += 3;
107: } else {
108: sb.append(c);
109: ++i;
110: }
111: continue;
112: }
113: if (state == STATE_CDATA) {
114: if (c == ']') {
115: if (text.regionMatches(i, "]]>", 0, 3)) {
116: endToken(state);
117: sb.append("]]");
118: endToken(STATE_TAG);
119: sb.append('>');
120: endToken(STATE_TAG_ENDING);
121: state = STATE_NEUTRAL;
122: i += 3;
123: continue;
124: }
125: }
126: sb.append(c);
127: ++i;
128: continue;
129: }
130: if (state == STATE_TAG_STARTING) {
131: if (c == '/' || c == '?') {
132: sb.append(c);
133: endToken(state);
134: state = STATE_NAMESPACE;
135: ++i;
136: continue;
137: }
138: if (c == '!') {
139: if (text.regionMatches(i, "![CDATA[", 0, 8)) {
140: sb.append(c);
141: endToken(state);
142: sb.append("[CDATA[");
143: endToken(STATE_TAG);
144: state = STATE_CDATA;
145: i += 8;
146: continue;
147: }
148: if (text.regionMatches(i, "!DOCTYPE", 0, 8)) {
149: sb.append(c);
150: endToken(state);
151: sb.append("DOCTYPE");
152: endToken(STATE_TAG);
153: state = STATE_NEUTRAL;
154: i += 8;
155: continue;
156: }
157: sb.append(c);
158: endToken(state);
159: state = STATE_TAG;
160: ++i;
161: continue;
162: }
163: endToken(state);
164: state = STATE_NAMESPACE;
165: sb.append(c);
166: ++i;
167: continue;
168: }
169: if (state == STATE_NAMESPACE) {
170: if (c == '/' && text.regionMatches(i, "/>", 0, 2)) {
171: // It wasn't really a namespace.
172: endToken(STATE_TAG);
173: state = STATE_TAG_ENDING;
174: sb.append("/>");
175: endToken(state);
176: state = STATE_NEUTRAL;
177: i += 2;
178: continue;
179: }
180: if (c == '?' && text.regionMatches(i, "?>", 0, 2)) {
181: // Processing instruction.
182: endToken(STATE_TAG);
183: state = STATE_TAG_ENDING;
184: sb.append("?>");
185: endToken(state);
186: state = STATE_NEUTRAL;
187: i += 2;
188: continue;
189: }
190: if (c == ':') {
191: sb.append(c);
192: endToken(state);
193: state = STATE_TAG;
194: } else if (isWhitespace(c)) {
195: // It wasn't really a namespace.
196: endToken(STATE_TAG);
197: state = STATE_ATTRIBUTE;
198: sb.append(c);
199: } else if (c == '>') {
200: // It wasn't really a namespace.
201: endToken(STATE_TAG);
202: state = STATE_TAG_ENDING;
203: sb.append(c);
204: endToken(state);
205: state = STATE_NEUTRAL;
206: } else
207: sb.append(c);
208: ++i;
209: continue;
210: }
211: if (state == STATE_TAG) {
212: if (c == '/' && text.regionMatches(i, "/>", 0, 2)) {
213: endToken(state);
214: state = STATE_TAG_ENDING;
215: sb.append("/>");
216: endToken(state);
217: state = STATE_NEUTRAL;
218: i += 2;
219: continue;
220: }
221: if (c == '?' && text.regionMatches(i, "?>", 0, 2)) {
222: // Processing instruction.
223: endToken(STATE_TAG);
224: state = STATE_TAG_ENDING;
225: sb.append("?>");
226: endToken(state);
227: state = STATE_NEUTRAL;
228: i += 2;
229: continue;
230: }
231: if (isWhitespace(c)) {
232: endToken(state);
233: state = STATE_ATTRIBUTE;
234: sb.append(c);
235: } else if (c == '>') {
236: endToken(state);
237: state = STATE_TAG_ENDING;
238: sb.append(c);
239: endToken(state);
240: state = STATE_NEUTRAL;
241: } else
242: sb.append(c);
243: ++i;
244: continue;
245: }
246: if (state == STATE_ATTRIBUTE) {
247: if (c == '/' && text.regionMatches(i, "/>", 0, 2)) {
248: endToken(state);
249: state = STATE_TAG_ENDING;
250: sb.append("/>");
251: endToken(state);
252: state = STATE_NEUTRAL;
253: i += 2;
254: continue;
255: }
256: if (c == '?' && text.regionMatches(i, "?>", 0, 2)) {
257: // Processing instruction.
258: endToken(state);
259: state = STATE_TAG_ENDING;
260: sb.append("?>");
261: endToken(state);
262: state = STATE_NEUTRAL;
263: i += 2;
264: continue;
265: }
266: if (c == '>') {
267: endToken(state);
268: state = STATE_TAG_ENDING;
269: sb.append(c);
270: endToken(state);
271: state = STATE_NEUTRAL;
272: ++i;
273: continue;
274: }
275: if (c == '=') {
276: endToken(state);
277: state = STATE_EQUALS;
278: sb.append(c);
279: endToken(state);
280: state = STATE_ATTRIBUTE;
281: ++i;
282: continue;
283: }
284: if (c == '"') {
285: endToken(state);
286: state = STATE_QUOTE;
287: sb.append(c);
288: ++i;
289: continue;
290: }
291: if (c == '\'') {
292: endToken(state);
293: state = STATE_SINGLEQUOTE;
294: sb.append(c);
295: ++i;
296: continue;
297: }
298: sb.append(c);
299: ++i;
300: continue;
301: }
302: if (state == STATE_QUOTE) {
303: sb.append(c);
304: if (c == '"') {
305: endToken(state);
306: state = STATE_ATTRIBUTE;
307: }
308: ++i;
309: continue;
310: }
311: if (state == STATE_SINGLEQUOTE) {
312: sb.append(c);
313: if (c == '\'') {
314: endToken(state);
315: state = STATE_ATTRIBUTE;
316: }
317: ++i;
318: continue;
319: }
320: // Not in comment or tag.
321: if (c == '<') {
322: endToken(state);
323: if (text.regionMatches(i, "<!--", 0, 4)) {
324: state = STATE_COMMENT;
325: sb.append("<!--");
326: i += 4;
327: continue;
328: }
329: state = STATE_TAG_STARTING;
330: sb.append(c);
331: } else
332: sb.append(c);
333: ++i;
334: }
335: // Reached end of line.
336: if (state == STATE_NAMESPACE)
337: // It wasn't really a namespace.
338: endToken(STATE_TAG);
339: else
340: endToken(state);
341: }
342:
343: public boolean parseBuffer() {
344: int state = STATE_NEUTRAL;
345: Line line = buffer.getFirstLine();
346: Position pos = new Position(line, 0);
347: boolean changed = false;
348: while (line != null) {
349: int oldflags = line.flags();
350: if (state != oldflags) {
351: line.setFlags(state);
352: changed = true;
353: }
354: final int limit = line.length();
355: for (int i = 0; i < limit; i++) {
356: char c = line.charAt(i);
357: if (state == STATE_COMMENT) {
358: if (c == '-') {
359: pos.moveTo(line, i);
360: if (pos.lookingAt("-->")) {
361: state = STATE_NEUTRAL;
362: i += 2;
363: continue;
364: }
365: }
366: continue;
367: }
368: if (state == STATE_CDATA) {
369: if (c == ']') {
370: pos.moveTo(line, i);
371: if (pos.lookingAt("]]>")) {
372: state = STATE_NEUTRAL;
373: i += 2;
374: continue;
375: }
376: }
377: continue;
378: }
379: if (state == STATE_TAG) {
380: if (!isWhitespace(c)) {
381: // OK, we shouldn't really be in STATE_ATTRIBUTE just
382: // because we've seen one non-whitespace character
383: // after the opening '<'. But if the line ends before
384: // the '>', we don't want the next line to start in
385: // STATE_TAG.
386: state = STATE_ATTRIBUTE;
387: continue;
388: }
389: }
390: if (state == STATE_ATTRIBUTE) {
391: if (c == '>')
392: state = STATE_NEUTRAL;
393: else if (c == '"')
394: state = STATE_QUOTE;
395: else if (c == '\'')
396: state = STATE_SINGLEQUOTE;
397: continue;
398: }
399: if (state == STATE_QUOTE) {
400: if (c == '"')
401: state = STATE_ATTRIBUTE;
402: continue;
403: }
404: if (state == STATE_SINGLEQUOTE) {
405: if (c == '\'')
406: state = STATE_ATTRIBUTE;
407: continue;
408: }
409: // Neutral state.
410: if (c == '<') {
411: pos.moveTo(line, i);
412: if (pos.lookingAt("<!--")) {
413: state = STATE_COMMENT;
414: i += 3;
415: continue;
416: }
417: if (pos.lookingAt("<![CDATA[")) {
418: state = STATE_CDATA;
419: i += 8;
420: continue;
421: }
422: if (pos.lookingAt("<!DOCTYPE")) {
423: // There is no STATE_DOCTYPE...
424: state = STATE_NEUTRAL;
425: i += 8;
426: continue;
427: }
428: state = STATE_TAG;
429: continue;
430: }
431: }
432: line = line.next();
433: }
434: buffer.setNeedsParsing(false);
435: return changed;
436: }
437:
438: private static final boolean isWhitespace(char c) {
439: return c <= ' ';
440: }
441:
442: public FormatTable getFormatTable() {
443: if (formatTable == null) {
444: formatTable = new FormatTable("XmlMode");
445: formatTable.addEntryFromPrefs(XML_FORMAT_TEXT, "text");
446: formatTable
447: .addEntryFromPrefs(XML_FORMAT_COMMENT, "comment");
448: formatTable.addEntryFromPrefs(XML_FORMAT_DELIMITER,
449: "delimiter");
450: formatTable.addEntryFromPrefs(XML_FORMAT_NAMESPACE,
451: "namespace");
452: formatTable.addEntryFromPrefs(XML_FORMAT_TAG, "tag");
453: formatTable.addEntryFromPrefs(XML_FORMAT_ATTRIBUTE,
454: "attribute");
455: formatTable.addEntryFromPrefs(XML_FORMAT_EQUALS, "equals",
456: "delimiter");
457: formatTable.addEntryFromPrefs(XML_FORMAT_QUOTE, "string");
458: }
459: return formatTable;
460: }
461: }
|