0001: /***** BEGIN LICENSE BLOCK *****
0002: * Version: CPL 1.0/GPL 2.0/LGPL 2.1
0003: *
0004: * The contents of this file are subject to the Common Public
0005: * License Version 1.0 (the "License"); you may not use this file
0006: * except in compliance with the License. You may obtain a copy of
0007: * the License at http://www.eclipse.org/legal/cpl-v10.html
0008: *
0009: * Software distributed under the License is distributed on an "AS
0010: * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
0011: * implied. See the License for the specific language governing
0012: * rights and limitations under the License.
0013: *
0014: * Copyright (C) 2007 Ola Bini <ola@ologix.com>
0015: *
0016: * Alternatively, the contents of this file may be used under the terms of
0017: * either of the GNU General Public License Version 2 or later (the "GPL"),
0018: * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
0019: * in which case the provisions of the GPL or the LGPL are applicable instead
0020: * of those above. If you wish to allow use of your version of this file only
0021: * under the terms of either the GPL or the LGPL, and not to allow others to
0022: * use your version of this file under the terms of the CPL, indicate your
0023: * decision by deleting the provisions above and replace them with the notice
0024: * and other provisions required by the GPL or the LGPL. If you do not delete
0025: * the provisions above, a recipient may use your version of this file under
0026: * the terms of any one of the CPL, the GPL or the LGPL.
0027: ***** END LICENSE BLOCK *****/package org.jvyamlb;
0028:
0029: import java.io.FileInputStream;
0030: import java.io.IOException;
0031: import java.io.InputStream;
0032: import java.util.Arrays;
0033: import java.util.HashMap;
0034: import java.util.Iterator;
0035: import java.util.LinkedList;
0036: import java.util.List;
0037: import java.util.Map;
0038:
0039: import org.jruby.util.ByteList;
0040: import org.jvyamlb.tokens.AliasToken;
0041: import org.jvyamlb.tokens.AnchorToken;
0042: import org.jvyamlb.tokens.DirectiveToken;
0043: import org.jvyamlb.tokens.ScalarToken;
0044: import org.jvyamlb.tokens.TagToken;
0045: import org.jvyamlb.tokens.Token;
0046:
0047: /**
0048: * <p>A Java implementation of the RbYAML scanner.</p>
0049: *
0050: * @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
0051: */
0052: public class ScannerImpl implements Scanner {
0053: private final static byte[] EMPTY = new byte[0];
0054: private final static byte[] NN = new byte[] { '\n' };
0055: private final static ByteList BANG = new ByteList(
0056: new byte[] { '!' }, false);
0057: private final static ByteList SPACE = new ByteList(
0058: new byte[] { ' ' }, false);
0059:
0060: private final static boolean[] ALL_FALSE = new boolean[256];
0061: private final static boolean[] ALL_TRUE = new boolean[256];
0062: private final static boolean[] LINEBR = new boolean[256];
0063: private final static boolean[] NULL_BL_LINEBR = new boolean[256];
0064: private final static boolean[] NULL_BL_T_LINEBR = new boolean[256];
0065: private final static boolean[] NULL_OR_LINEBR = new boolean[256];
0066: private final static boolean[] FULL_LINEBR = new boolean[256];
0067: private final static boolean[] BLANK_OR_LINEBR = new boolean[256];
0068: private final static boolean[] S4 = new boolean[256];
0069: private final static boolean[] ALPHA = new boolean[256];
0070: private final static boolean[] DIGIT = new boolean[256];
0071: private final static boolean[] HEXA = new boolean[256];
0072: private final static boolean[] STRANGE_CHAR = new boolean[256];
0073: private final static int[] RN = new int[] { '\r', '\n' };
0074: private final static boolean[] BLANK_T = new boolean[256];
0075: private final static boolean[] SPACES_AND_STUFF = new boolean[256];
0076: private final static boolean[] DOUBLE_ESC = new boolean[256];
0077: private final static boolean[] NON_ALPHA_OR_NUM = new boolean[256];
0078: private final static boolean[] NON_PRINTABLE = new boolean[256];
0079: private final static boolean[] STUPID_CHAR = new boolean[256];
0080: private final static boolean[] R_FLOWZERO = NULL_BL_T_LINEBR;
0081: private final static boolean[] R_FLOWZERO1 = new boolean[256];
0082: private final static boolean[] R_FLOWNONZERO = new boolean[256];
0083:
0084: private final static byte[] ESCAPE_REPLACEMENTS = new byte[256];
0085: private final static boolean[] IS_ESCAPE_REPLACEMENT = new boolean[256];
0086: private final static Map ESCAPE_CODES = new HashMap();
0087:
0088: static {
0089: Arrays.fill(ALL_TRUE, true);
0090: LINEBR['\n'] = true;
0091: NULL_BL_LINEBR['\0'] = true;
0092: NULL_BL_LINEBR[' '] = true;
0093: NULL_BL_LINEBR['\r'] = true;
0094: NULL_BL_LINEBR['\n'] = true;
0095: NULL_BL_T_LINEBR['\0'] = true;
0096: NULL_BL_T_LINEBR[' '] = true;
0097: NULL_BL_T_LINEBR['\t'] = true;
0098: NULL_BL_T_LINEBR['\r'] = true;
0099: NULL_BL_T_LINEBR['\n'] = true;
0100: NULL_OR_LINEBR['\0'] = true;
0101: NULL_OR_LINEBR['\r'] = true;
0102: NULL_OR_LINEBR['\n'] = true;
0103: FULL_LINEBR['\r'] = true;
0104: FULL_LINEBR['\n'] = true;
0105: BLANK_OR_LINEBR[' '] = true;
0106: BLANK_OR_LINEBR['\r'] = true;
0107: BLANK_OR_LINEBR['\n'] = true;
0108: S4['\0'] = true;
0109: S4[' '] = true;
0110: S4['\t'] = true;
0111: S4['\r'] = true;
0112: S4['\n'] = true;
0113: S4['['] = true;
0114: S4[']'] = true;
0115: S4['{'] = true;
0116: S4['}'] = true;
0117: for (char c = 'a'; c <= 'z'; c++) {
0118: ALPHA[c] = true;
0119: STRANGE_CHAR[c] = true;
0120: }
0121: for (char c = 'A'; c <= 'Z'; c++) {
0122: ALPHA[c] = true;
0123: STRANGE_CHAR[c] = true;
0124: }
0125: for (char c = '0'; c <= '9'; c++) {
0126: ALPHA[c] = true;
0127: STRANGE_CHAR[c] = true;
0128: HEXA[c] = true;
0129: DIGIT[c] = true;
0130: }
0131: for (char c = 'a'; c <= 'f'; c++) {
0132: HEXA[c] = true;
0133: }
0134: for (char c = 'A'; c <= 'F'; c++) {
0135: HEXA[c] = true;
0136: }
0137: ALPHA['-'] = true;
0138: ALPHA['_'] = true;
0139: STRANGE_CHAR['-'] = true;
0140: STRANGE_CHAR['_'] = true;
0141: STRANGE_CHAR['['] = true;
0142: STRANGE_CHAR[']'] = true;
0143: STRANGE_CHAR['('] = true;
0144: STRANGE_CHAR[')'] = true;
0145: STRANGE_CHAR['\''] = true;
0146: STRANGE_CHAR[';'] = true;
0147: STRANGE_CHAR['/'] = true;
0148: STRANGE_CHAR['?'] = true;
0149: STRANGE_CHAR[':'] = true;
0150: STRANGE_CHAR['@'] = true;
0151: STRANGE_CHAR['&'] = true;
0152: STRANGE_CHAR['='] = true;
0153: STRANGE_CHAR['+'] = true;
0154: STRANGE_CHAR['$'] = true;
0155: STRANGE_CHAR[','] = true;
0156: STRANGE_CHAR['.'] = true;
0157: STRANGE_CHAR['!'] = true;
0158: STRANGE_CHAR['~'] = true;
0159: STRANGE_CHAR['*'] = true;
0160: STRANGE_CHAR['%'] = true;
0161: STRANGE_CHAR['^'] = true;
0162: BLANK_T[' '] = true;
0163: BLANK_T['\t'] = true;
0164: SPACES_AND_STUFF['\0'] = true;
0165: SPACES_AND_STUFF[' '] = true;
0166: SPACES_AND_STUFF['\t'] = true;
0167: SPACES_AND_STUFF['\r'] = true;
0168: SPACES_AND_STUFF['\n'] = true;
0169: SPACES_AND_STUFF['\\'] = true;
0170: SPACES_AND_STUFF['\''] = true;
0171: SPACES_AND_STUFF['"'] = true;
0172: DOUBLE_ESC['\\'] = true;
0173: DOUBLE_ESC['"'] = true;
0174: NON_ALPHA_OR_NUM['\0'] = true;
0175: NON_ALPHA_OR_NUM[' '] = true;
0176: NON_ALPHA_OR_NUM['\t'] = true;
0177: NON_ALPHA_OR_NUM['\r'] = true;
0178: NON_ALPHA_OR_NUM['\n'] = true;
0179: NON_ALPHA_OR_NUM['?'] = true;
0180: NON_ALPHA_OR_NUM[':'] = true;
0181: NON_ALPHA_OR_NUM[','] = true;
0182: NON_ALPHA_OR_NUM[']'] = true;
0183: NON_ALPHA_OR_NUM['}'] = true;
0184: NON_ALPHA_OR_NUM['%'] = true;
0185: NON_ALPHA_OR_NUM['@'] = true;
0186: NON_ALPHA_OR_NUM['`'] = true;
0187:
0188: Arrays.fill(ESCAPE_REPLACEMENTS, (byte) 0);
0189: ESCAPE_REPLACEMENTS['0'] = 0;
0190: ESCAPE_REPLACEMENTS['a'] = 7;
0191: ESCAPE_REPLACEMENTS['b'] = 8;
0192: ESCAPE_REPLACEMENTS['t'] = 9;
0193: ESCAPE_REPLACEMENTS['\t'] = 9;
0194: ESCAPE_REPLACEMENTS['n'] = 10;
0195: ESCAPE_REPLACEMENTS['v'] = 11;
0196: ESCAPE_REPLACEMENTS['f'] = 12;
0197: ESCAPE_REPLACEMENTS['r'] = 13;
0198: ESCAPE_REPLACEMENTS['e'] = 27;
0199: // ESCAPE_REPLACEMENTS[' '] = 32;
0200: ESCAPE_REPLACEMENTS['"'] = (byte) '"';
0201: ESCAPE_REPLACEMENTS['\\'] = (byte) '\\';
0202: ESCAPE_REPLACEMENTS['N'] = (byte) 133;
0203: ESCAPE_REPLACEMENTS['_'] = (byte) 160;
0204: IS_ESCAPE_REPLACEMENT['0'] = true;
0205: IS_ESCAPE_REPLACEMENT['a'] = true;
0206: IS_ESCAPE_REPLACEMENT['b'] = true;
0207: IS_ESCAPE_REPLACEMENT['t'] = true;
0208: IS_ESCAPE_REPLACEMENT['\t'] = true;
0209: IS_ESCAPE_REPLACEMENT['n'] = true;
0210: IS_ESCAPE_REPLACEMENT['v'] = true;
0211: IS_ESCAPE_REPLACEMENT['f'] = true;
0212: IS_ESCAPE_REPLACEMENT['r'] = true;
0213: IS_ESCAPE_REPLACEMENT['e'] = true;
0214: // IS_ESCAPE_REPLACEMENT[' '] = true;
0215: IS_ESCAPE_REPLACEMENT['"'] = true;
0216: IS_ESCAPE_REPLACEMENT['\\'] = true;
0217: IS_ESCAPE_REPLACEMENT['N'] = true;
0218: IS_ESCAPE_REPLACEMENT['_'] = true;
0219:
0220: ESCAPE_CODES.put(new Character('x'), new Integer(2));
0221: ESCAPE_CODES.put(new Character('u'), new Integer(4));
0222: ESCAPE_CODES.put(new Character('U'), new Integer(8));
0223:
0224: Arrays.fill(STUPID_CHAR, true);
0225: STUPID_CHAR['\0'] = false;
0226: STUPID_CHAR[' '] = false;
0227: STUPID_CHAR['\t'] = false;
0228: STUPID_CHAR['\r'] = false;
0229: STUPID_CHAR['\n'] = false;
0230: STUPID_CHAR['-'] = false;
0231: STUPID_CHAR['?'] = false;
0232: STUPID_CHAR[':'] = false;
0233: STUPID_CHAR[','] = false;
0234: STUPID_CHAR['['] = false;
0235: STUPID_CHAR[']'] = false;
0236: STUPID_CHAR['{'] = false;
0237: STUPID_CHAR['#'] = false;
0238: STUPID_CHAR['&'] = false;
0239: STUPID_CHAR['*'] = false;
0240: STUPID_CHAR['!'] = false;
0241: STUPID_CHAR['|'] = false;
0242: STUPID_CHAR['>'] = false;
0243: STUPID_CHAR['\''] = false;
0244: STUPID_CHAR['"'] = false;
0245: STUPID_CHAR['@'] = false;
0246: R_FLOWZERO1[':'] = true;
0247: R_FLOWNONZERO['\0'] = true;
0248: R_FLOWNONZERO[' '] = true;
0249: R_FLOWNONZERO['\t'] = true;
0250: R_FLOWNONZERO['\r'] = true;
0251: R_FLOWNONZERO['\n'] = true;
0252: R_FLOWNONZERO['['] = true;
0253: R_FLOWNONZERO[']'] = true;
0254: R_FLOWNONZERO['{'] = true;
0255: R_FLOWNONZERO['}'] = true;
0256: R_FLOWNONZERO[','] = true;
0257: R_FLOWNONZERO[':'] = true;
0258: R_FLOWNONZERO['?'] = true;
0259: }
0260:
0261: private boolean done = false;
0262: private int flowLevel = 0;
0263: private int tokensTaken = 0;
0264: private int indent = -1;
0265: private boolean allowSimpleKey = true;
0266: private boolean eof = true;
0267: private int column = 0;
0268: private int pointer = 0;
0269: private ByteList buffer;
0270: private InputStream stream;
0271: private List tokens;
0272: private List indents;
0273: private Map possibleSimpleKeys;
0274:
0275: private boolean docStart = false;
0276:
0277: public ScannerImpl(final InputStream stream) {
0278: this .stream = stream;
0279: this .eof = false;
0280: this .buffer = new ByteList(100);
0281: this .tokens = new LinkedList();
0282: this .indents = new LinkedList();
0283: this .possibleSimpleKeys = new HashMap();
0284: fetchStreamStart();
0285: }
0286:
0287: public ScannerImpl(final ByteList stream) {
0288: this .buffer = new ByteList(stream.bytes, stream.begin,
0289: stream.realSize);
0290: this .stream = null;
0291: this .tokens = new LinkedList();
0292: this .indents = new LinkedList();
0293: this .possibleSimpleKeys = new HashMap();
0294: fetchStreamStart();
0295: }
0296:
0297: public ScannerImpl(final String stream) {
0298: try {
0299: this .buffer = new ByteList(ByteList.plain(stream), false);
0300: } catch (Exception e) {
0301: throw new RuntimeException(e.getMessage());
0302: }
0303: this .stream = null;
0304: this .tokens = new LinkedList();
0305: this .indents = new LinkedList();
0306: this .possibleSimpleKeys = new HashMap();
0307: fetchStreamStart();
0308: }
0309:
0310: private void update(final int length, final boolean reset) {
0311: if (!eof && reset) {
0312: this .buffer.delete(0, this .pointer);
0313: this .pointer = 0;
0314: }
0315: while (this .buffer.realSize < (this .pointer + length)) {
0316: byte[] rawData = ByteList.NULL_ARRAY;
0317: int converted = -2;
0318: if (!this .eof) {
0319: byte[] data = new byte[1024];
0320: try {
0321: converted = this .stream.read(data);
0322: } catch (final IOException ioe) {
0323: throw new YAMLException(ioe);
0324: }
0325: if (converted == -1) {
0326: this .eof = true;
0327: } else {
0328: rawData = data;
0329: }
0330: }
0331: if (this .eof) {
0332: this .buffer.append('\0');
0333: break;
0334: } else {
0335: checkPrintable(rawData, converted);
0336: this .buffer.append(rawData, 0, converted);
0337: }
0338: }
0339: }
0340:
0341: private void checkPrintable(final byte[] b, final int len) {
0342: for (int i = 0; i < len; i++) {
0343: if (NON_PRINTABLE[((int) b[i] & 0xFF)]) {
0344: final int position = this .buffer.length()
0345: - this .pointer + i;
0346: throw new YAMLException("At " + position
0347: + " we found: " + (char) ((int) b[i] & 0xFF)
0348: + ". Special characters are not allowed");
0349: }
0350: }
0351: }
0352:
0353: private boolean ensure(final int len, final boolean reset) {
0354: if (this .pointer + len >= this .buffer.realSize) {
0355: update(len, reset);
0356: }
0357: return true;
0358: }
0359:
0360: private char peek() {
0361: ensure(1, false);
0362: return (char) ((char) (buffer.bytes[this .pointer]) & 0xFF);
0363: }
0364:
0365: private char peek(final int index) {
0366: ensure(index + 1, false);
0367: return (char) ((char) this .buffer.bytes[this .pointer + index] & 0xFF);
0368: }
0369:
0370: private void forward() {
0371: ensure(2, true);
0372: final char ch1 = (char) ((int) this .buffer.bytes[this .pointer++] & 0xFF);
0373: if (ch1 == '\n'
0374: || (ch1 == '\r' && (((int) this .buffer.bytes[this .pointer] & 0xFF) != '\n'))) {
0375: this .possibleSimpleKeys.clear();
0376: this .column = 0;
0377: } else {
0378: this .column++;
0379: }
0380: }
0381:
0382: private void forward(final int length) {
0383: ensure(length + 1, true);
0384: int ch = 0;
0385: for (int i = 0; i < length; i++) {
0386: ch = this .buffer.bytes[this .pointer] & 0xFF;
0387: this .pointer++;
0388: if (LINEBR[ch]
0389: || (ch == '\r' && (this .buffer.bytes[this .pointer] & 0xFF) != '\n')) {
0390: this .possibleSimpleKeys.clear();
0391: this .column = 0;
0392: } else {
0393: this .column++;
0394: }
0395: }
0396: }
0397:
0398: public boolean checkToken(final Class[] choices) {
0399: while (needMoreTokens()) {
0400: fetchMoreTokens();
0401: }
0402: if (!this .tokens.isEmpty()) {
0403: if (choices.length == 0) {
0404: return true;
0405: }
0406: final Object first = this .tokens.get(0);
0407: for (int i = 0, j = choices.length; i < j; i++) {
0408: if (choices[i].isInstance(first)) {
0409: return true;
0410: }
0411: }
0412: }
0413: return false;
0414: }
0415:
0416: public Token peekToken() {
0417: while (needMoreTokens()) {
0418: fetchMoreTokens();
0419: }
0420: return (Token) (this .tokens.isEmpty() ? null : this .tokens
0421: .get(0));
0422: }
0423:
0424: public Token getToken() {
0425: while (needMoreTokens()) {
0426: fetchMoreTokens();
0427: }
0428: if (!this .tokens.isEmpty()) {
0429: this .tokensTaken++;
0430: return (Token) this .tokens.remove(0);
0431: }
0432: return null;
0433: }
0434:
0435: private class TokenIterator implements Iterator {
0436: public boolean hasNext() {
0437: return null != peekToken();
0438: }
0439:
0440: public Object next() {
0441: return getToken();
0442: }
0443:
0444: public void remove() {
0445: }
0446: }
0447:
0448: public Iterator eachToken() {
0449: return new TokenIterator();
0450: }
0451:
0452: public Iterator iterator() {
0453: return eachToken();
0454: }
0455:
0456: private boolean needMoreTokens() {
0457: if (this .done) {
0458: return false;
0459: }
0460: return this .tokens.isEmpty()
0461: || nextPossibleSimpleKey() == this .tokensTaken;
0462: }
0463:
0464: private boolean isEnding() {
0465: ensure(4, false);
0466: return (this .buffer.bytes[this .pointer] & 0xFF) == '-'
0467: && (this .buffer.bytes[this .pointer + 1] & 0xFF) == '-'
0468: && (this .buffer.bytes[this .pointer + 2] & 0xFF) == '-'
0469: && (this .buffer.bytes[this .pointer + 3] != 0)
0470: && !(this .buffer.realSize <= (this .pointer + 4) || ((this .buffer.bytes[this .pointer + 3] == '\n') && (this .buffer.bytes[this .pointer + 4] == 0)))
0471: && (NULL_BL_T_LINEBR[this .buffer.bytes[this .pointer + 3]]);
0472: }
0473:
0474: private boolean isStart() {
0475: ensure(4, false);
0476: return (this .buffer.bytes[this .pointer] & 0xFF) == '.'
0477: && (this .buffer.bytes[this .pointer + 1] & 0xFF) == '.'
0478: && (this .buffer.bytes[this .pointer + 2] & 0xFF) == '.'
0479: && (NULL_BL_T_LINEBR[this .buffer.bytes[this .pointer + 3]]);
0480: }
0481:
0482: private boolean isEndOrStart() {
0483: ensure(4, false);
0484: return (((this .buffer.bytes[this .pointer] & 0xFF) == '-'
0485: && (this .buffer.bytes[this .pointer + 1] & 0xFF) == '-' && (this .buffer.bytes[this .pointer + 2] & 0xFF) == '-') || ((this .buffer.bytes[this .pointer] & 0xFF) == '.'
0486: && (this .buffer.bytes[this .pointer + 1] & 0xFF) == '.' && (this .buffer.bytes[this .pointer + 2] & 0xFF) == '.'))
0487: && (NULL_BL_T_LINEBR[this .buffer.bytes[this .pointer + 3]]);
0488: }
0489:
0490: private Token fetchMoreTokens() {
0491: scanToNextToken();
0492: unwindIndent(this .column);
0493: final char ch = peek();
0494: final boolean colz = this .column == 0;
0495: switch (ch) {
0496: case '\0':
0497: return fetchStreamEnd();
0498: case '\'':
0499: return fetchSingle();
0500: case '"':
0501: return fetchDouble();
0502: case '?':
0503: if (this .flowLevel != 0 || NULL_BL_T_LINEBR[peek(1)]) {
0504: return fetchKey();
0505: }
0506: break;
0507: case ':':
0508: if (this .flowLevel != 0 || NULL_BL_T_LINEBR[peek(1)]) {
0509: return fetchValue();
0510: }
0511: break;
0512: case '%':
0513: if (colz) {
0514: return fetchDirective();
0515: }
0516: break;
0517: case '-':
0518: if ((colz || docStart) && isEnding()) {
0519: return fetchDocumentStart();
0520: } else if (NULL_BL_T_LINEBR[peek(1)]) {
0521: return fetchBlockEntry();
0522: }
0523: break;
0524: case '.':
0525: if (colz && isStart()) {
0526: return fetchDocumentEnd();
0527: }
0528: break;
0529: case '[':
0530: return fetchFlowSequenceStart();
0531: case '{':
0532: return fetchFlowMappingStart();
0533: case ']':
0534: return fetchFlowSequenceEnd();
0535: case '}':
0536: return fetchFlowMappingEnd();
0537: case ',':
0538: return fetchFlowEntry();
0539: case '*':
0540: return fetchAlias();
0541: case '&':
0542: return fetchAnchor();
0543: case '!':
0544: return fetchTag();
0545: case '|':
0546: if (this .flowLevel == 0) {
0547: return fetchLiteral();
0548: }
0549: break;
0550: case '>':
0551: if (this .flowLevel == 0) {
0552: return fetchFolded();
0553: }
0554: break;
0555: }
0556:
0557: //TODO: this is probably incorrect...
0558: if (STUPID_CHAR[this .buffer.bytes[this .pointer] & 0xFF]
0559: || (ensure(1, false)
0560: && (this .buffer.bytes[this .pointer] == '-'
0561: || this .buffer.bytes[this .pointer] == '?' || this .buffer.bytes[this .pointer] == ':') && !NULL_BL_T_LINEBR[this .buffer.bytes[this .pointer + 1] & 0xFF])) {
0562: return fetchPlain();
0563: }
0564:
0565: throw new ScannerException("while scanning for the next token",
0566: "found character " + ch + "(" + (int) ch
0567: + ") that cannot start any token", null);
0568: }
0569:
0570: private Token fetchStreamStart() {
0571: this .docStart = true;
0572: this .tokens.add(Token.STREAM_START);
0573: return Token.STREAM_START;
0574: }
0575:
0576: private Token fetchStreamEnd() {
0577: unwindIndent(-1);
0578: this .allowSimpleKey = false;
0579: this .possibleSimpleKeys = new HashMap();
0580: this .tokens.add(Token.STREAM_END);
0581: this .done = true;
0582: this .docStart = false;
0583: return Token.STREAM_END;
0584: }
0585:
0586: private void scanToNextToken() {
0587: for (;;) {
0588: while (peek() == ' ') {
0589: forward();
0590: }
0591: if (peek() == '#') {
0592: forward();
0593: while (!NULL_OR_LINEBR[peek()]) {
0594: forward();
0595: }
0596: }
0597: if (scanLineBreak().length != 0) {
0598: if (this .flowLevel == 0) {
0599: this .allowSimpleKey = true;
0600: }
0601: } else {
0602: break;
0603: }
0604: }
0605: }
0606:
0607: private byte[] scanLineBreak() {
0608: // Transforms:
0609: // '\r\n' : '\n'
0610: // '\r' : '\n'
0611: // '\n' : '\n'
0612: // '\x85' : '\n'
0613: // default : ''
0614: final int val = peek();
0615: if (FULL_LINEBR[val]) {
0616: ensure(2, false);
0617: if (RN[0] == buffer.bytes[this .pointer]
0618: && RN[1] == buffer.bytes[this .pointer + 1]) {
0619: forward(2);
0620: } else {
0621: forward();
0622: }
0623: return NN;
0624: } else {
0625: return EMPTY;
0626: }
0627: }
0628:
0629: private void unwindIndent(final int col) {
0630: if (this .flowLevel != 0) {
0631: return;
0632: }
0633:
0634: while (this .indent > col) {
0635: this .indent = ((Integer) (this .indents.remove(0)))
0636: .intValue();
0637: this .tokens.add(Token.BLOCK_END);
0638: }
0639: }
0640:
0641: private Token fetchDocumentStart() {
0642: this .docStart = false;
0643: return fetchDocumentIndicator(Token.DOCUMENT_START);
0644: }
0645:
0646: private Token fetchDocumentIndicator(final Token tok) {
0647: unwindIndent(-1);
0648: removePossibleSimpleKey();
0649: this .allowSimpleKey = false;
0650: forward(3);
0651: this .tokens.add(tok);
0652: return tok;
0653: }
0654:
0655: private Token fetchBlockEntry() {
0656: this .docStart = false;
0657: if (this .flowLevel == 0) {
0658: if (!this .allowSimpleKey) {
0659: throw new ScannerException(null,
0660: "sequence entries are not allowed here", null);
0661: }
0662: if (addIndent(this .column)) {
0663: this .tokens.add(Token.BLOCK_SEQUENCE_START);
0664: }
0665: }
0666: this .allowSimpleKey = true;
0667: removePossibleSimpleKey();
0668: forward();
0669: this .tokens.add(Token.BLOCK_ENTRY);
0670: return Token.BLOCK_ENTRY;
0671: }
0672:
0673: private boolean addIndent(final int col) {
0674: if (this .indent < col) {
0675: this .indents.add(0, new Integer(this .indent));
0676: this .indent = col;
0677: return true;
0678: }
0679: return false;
0680: }
0681:
0682: private Token fetchTag() {
0683: this .docStart = false;
0684: savePossibleSimpleKey();
0685: this .allowSimpleKey = false;
0686: final Token tok = scanTag();
0687: this .tokens.add(tok);
0688: return tok;
0689: }
0690:
0691: private void removePossibleSimpleKey() {
0692: SimpleKey key = (SimpleKey) this .possibleSimpleKeys
0693: .remove(new Integer(this .flowLevel));
0694: if (key != null) {
0695: if (key.isRequired()) {
0696: throw new ScannerException(
0697: "while scanning a simple key",
0698: "could not find expected ':'", null);
0699: }
0700: }
0701: }
0702:
0703: private void savePossibleSimpleKey() {
0704: if (this .allowSimpleKey) {
0705: this .removePossibleSimpleKey();
0706: this .possibleSimpleKeys.put(new Integer(this .flowLevel),
0707: new SimpleKey(
0708: this .tokensTaken + this .tokens.size(),
0709: (this .flowLevel == 0)
0710: && this .indent == this .column, -1,
0711: -1, this .column));
0712: }
0713: }
0714:
0715: private Token scanTag() {
0716: char ch = peek(1);
0717: ByteList handle = null;
0718: ByteList suffix = null;
0719: if (ch == '<') {
0720: forward(2);
0721: suffix = scanTagUri("tag");
0722: if (peek() != '>') {
0723: throw new ScannerException("while scanning a tag",
0724: "expected '>', but found " + peek() + "("
0725: + (int) peek() + ")", null);
0726: }
0727: forward();
0728: } else if (NULL_BL_T_LINEBR[ch]) {
0729: suffix = BANG;
0730: forward();
0731: } else {
0732: int length = 1;
0733: boolean useHandle = false;
0734: while (!NULL_BL_T_LINEBR[ch]) {
0735: if (ch == '!') {
0736: useHandle = true;
0737: break;
0738: }
0739: length++;
0740: ch = peek(length);
0741: }
0742: handle = BANG;
0743: if (useHandle) {
0744: handle = scanTagHandle("tag");
0745: } else {
0746: handle = BANG;
0747: forward();
0748: }
0749: suffix = scanTagUri("tag");
0750: }
0751: if (!NULL_BL_LINEBR[peek()]) {
0752: throw new ScannerException("while scanning a tag",
0753: "expected ' ', but found " + peek() + "("
0754: + (int) peek() + ")", null);
0755: }
0756: return new TagToken(new ByteList[] { handle, suffix });
0757: }
0758:
0759: private ByteList scanTagUri(final String name) {
0760: final ByteList chunks = new ByteList(10);
0761: int length = 0;
0762: char ch = peek(length);
0763: while (STRANGE_CHAR[ch]) {
0764: if ('%' == ch) {
0765: ensure(length, false);
0766: chunks.append(this .buffer.bytes, this .pointer, length);
0767: length = 0;
0768: chunks.append(scanUriEscapes(name));
0769: } else {
0770: length++;
0771: }
0772: ch = peek(length);
0773: }
0774: if (length != 0) {
0775: ensure(length, false);
0776: chunks.append(this .buffer.bytes, this .pointer, length);
0777: forward(length);
0778: }
0779: if (chunks.length() == 0) {
0780: throw new ScannerException("while scanning a " + name,
0781: "expected URI, but found " + ch + "(" + (int) ch
0782: + ")", null);
0783: }
0784: return chunks;
0785: }
0786:
0787: private ByteList scanTagHandle(final String name) {
0788: char ch = peek();
0789: if (ch != '!') {
0790: throw new ScannerException("while scanning a " + name,
0791: "expected '!', but found " + ch + "(" + (int) ch
0792: + ")", null);
0793: }
0794: int length = 1;
0795: ch = peek(length);
0796: if (ch != ' ') {
0797: while (ALPHA[ch]) {
0798: length++;
0799: ch = peek(length);
0800: }
0801: if ('!' != ch) {
0802: forward(length);
0803: throw new ScannerException("while scanning a " + name,
0804: "expected '!', but found " + ch + "("
0805: + ((int) ch) + ")", null);
0806: }
0807: length++;
0808: }
0809: ensure(length, false);
0810: final ByteList value = new ByteList(this .buffer.bytes,
0811: this .pointer, length, false);
0812: forward(length);
0813: return value;
0814: }
0815:
0816: private ByteList scanUriEscapes(final String name) {
0817: final ByteList bytes = new ByteList();
0818: while (peek() == '%') {
0819: forward();
0820: try {
0821: ensure(2, false);
0822: bytes.append(Integer
0823: .parseInt(new String(ByteList.plain(
0824: this .buffer.bytes, this .pointer, 2)),
0825: 16));
0826: } catch (final NumberFormatException nfe) {
0827: throw new ScannerException("while scanning a " + name,
0828: "expected URI escape sequence of 2 hexadecimal numbers, but found "
0829: + peek(1) + "(" + ((int) peek(1))
0830: + ") and " + peek(2) + "("
0831: + ((int) peek(2)) + ")", null);
0832: }
0833: forward(2);
0834: }
0835: return bytes;
0836: }
0837:
0838: private Token fetchPlain() {
0839: this .docStart = false;
0840: savePossibleSimpleKey();
0841: this .allowSimpleKey = false;
0842: final Token tok = scanPlain();
0843: this .tokens.add(tok);
0844: return tok;
0845: }
0846:
0847: private Token scanPlain() {
0848: final ByteList chunks = new ByteList(7);
0849: final int ind = this .indent + 1;
0850: ByteList spaces = new ByteList(0);
0851: boolean f_nzero = true;
0852: boolean[] r_check = R_FLOWNONZERO;
0853: boolean[] r_check2 = ALL_FALSE;
0854: boolean[] r_check3 = ALL_FALSE;
0855: if (this .flowLevel == 0) {
0856: f_nzero = false;
0857: r_check = R_FLOWZERO;
0858: r_check2 = R_FLOWZERO1;
0859: r_check3 = R_FLOWZERO;
0860: }
0861: while (peek() != '#') {
0862: int length = 0;
0863: int i = 0;
0864: for (;; i++) {
0865: ensure(i + 2, false);
0866: if (r_check[this .buffer.bytes[this .pointer + i] & 0xFF]
0867: || (r_check2[this .buffer.bytes[this .pointer + i] & 0xFF] && r_check3[this .buffer.bytes[this .pointer
0868: + i + 1] & 0xFF])) {
0869: length = i;
0870: break;
0871: }
0872: }
0873:
0874: final char ch = peek(length);
0875: if (f_nzero && ch == ':' && !S4[peek(length + 1)]) {
0876: forward(length);
0877: throw new ScannerException(
0878: "while scanning a plain scalar",
0879: "found unexpected ':'",
0880: "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.");
0881: }
0882: if (length == 0) {
0883: break;
0884: }
0885: this .allowSimpleKey = false;
0886: chunks.append(spaces);
0887: ensure(length, false);
0888: chunks.append(this .buffer.bytes, this .pointer, length);
0889: forward(length);
0890: spaces = scanPlainSpaces(ind);
0891: if (spaces == null
0892: || (this .flowLevel == 0 && this .column < ind)) {
0893: break;
0894: }
0895: }
0896: return new ScalarToken(chunks, true);
0897: }
0898:
0899: private int nextPossibleSimpleKey() {
0900: for (final Iterator iter = this .possibleSimpleKeys.values()
0901: .iterator(); iter.hasNext();) {
0902: final SimpleKey key = (SimpleKey) iter.next();
0903: if (key.getTokenNumber() > 0) {
0904: return key.getTokenNumber();
0905: }
0906: }
0907: return -1;
0908: }
0909:
0910: private ByteList scanPlainSpaces(final int indent) {
0911: final ByteList chunks = new ByteList();
0912: int length = 0;
0913: while (peek(length) == ' ') {
0914: length++;
0915: }
0916: final byte[] whitespaces = new byte[length];
0917: Arrays.fill(whitespaces, (byte) ' ');
0918: forward(length);
0919: char ch = peek();
0920: if (FULL_LINEBR[ch]) {
0921: final byte[] lineBreak = scanLineBreak();
0922: this .allowSimpleKey = true;
0923: if (isEndOrStart()) {
0924: return new ByteList(0);
0925: }
0926: final ByteList breaks = new ByteList();
0927: while (BLANK_OR_LINEBR[peek()]) {
0928: if (' ' == peek()) {
0929: forward();
0930: } else {
0931: breaks.append(scanLineBreak());
0932: if (isEndOrStart()) {
0933: return new ByteList(0);
0934: }
0935: }
0936: }
0937: if (!(lineBreak.length == 1 && lineBreak[0] == '\n')) {
0938: chunks.append(lineBreak);
0939: } else if (breaks == null || breaks.realSize == 0) {
0940: chunks.append(SPACE);
0941: }
0942: chunks.append(breaks);
0943: } else {
0944: chunks.append(whitespaces);
0945: }
0946: return chunks;
0947: }
0948:
0949: private Token fetchSingle() {
0950: return fetchFlowScalar('\'');
0951: }
0952:
0953: private Token fetchDouble() {
0954: return fetchFlowScalar('"');
0955: }
0956:
0957: private Token fetchFlowScalar(final char style) {
0958: this .docStart = false;
0959: savePossibleSimpleKey();
0960: this .allowSimpleKey = false;
0961: final Token tok = scanFlowScalar(style);
0962: this .tokens.add(tok);
0963: return tok;
0964: }
0965:
0966: private Token scanFlowScalar(final char style) {
0967: final boolean dbl = style == '"';
0968: final ByteList chunks = new ByteList();
0969: final char quote = peek();
0970: forward();
0971: chunks.append(scanFlowScalarNonSpaces(dbl));
0972: while (peek() != quote) {
0973: chunks.append(scanFlowScalarSpaces());
0974: chunks.append(scanFlowScalarNonSpaces(dbl));
0975: }
0976: forward();
0977: return new ScalarToken(chunks, false, style);
0978: }
0979:
0980: private final static byte[] HEXA_VALUES = new byte[256];
0981: static {
0982: Arrays.fill(HEXA_VALUES, (byte) -1);
0983: HEXA_VALUES['0'] = 0;
0984: HEXA_VALUES['1'] = 1;
0985: HEXA_VALUES['2'] = 2;
0986: HEXA_VALUES['3'] = 3;
0987: HEXA_VALUES['4'] = 4;
0988: HEXA_VALUES['5'] = 5;
0989: HEXA_VALUES['6'] = 6;
0990: HEXA_VALUES['7'] = 7;
0991: HEXA_VALUES['8'] = 8;
0992: HEXA_VALUES['9'] = 9;
0993: HEXA_VALUES['A'] = 10;
0994: HEXA_VALUES['B'] = 11;
0995: HEXA_VALUES['C'] = 12;
0996: HEXA_VALUES['D'] = 13;
0997: HEXA_VALUES['E'] = 14;
0998: HEXA_VALUES['F'] = 15;
0999: HEXA_VALUES['a'] = 10;
1000: HEXA_VALUES['b'] = 11;
1001: HEXA_VALUES['c'] = 12;
1002: HEXA_VALUES['d'] = 13;
1003: HEXA_VALUES['e'] = 14;
1004: HEXA_VALUES['f'] = 15;
1005: }
1006:
1007: private ByteList parseHexa(int length) {
1008: ensure(length, false);
1009: ByteList chunks = new ByteList(length / 2);
1010: for (int i = 0; i < length; i += 2) {
1011: byte val = HEXA_VALUES[this .buffer.bytes[this .pointer + i] & 0xFF];
1012: if (val == -1) {
1013: throw new ScannerException(
1014: "while scanning a double-quoted scalar",
1015: "expected escape sequence of "
1016: + length
1017: + " hexadecimal numbers, but found something else: "
1018: + (char) (this .buffer.bytes[this .pointer
1019: + i] & 0xFF), null);
1020: }
1021: if (i + 1 < length) {
1022: val = (byte) (val << 4);
1023: byte v2 = HEXA_VALUES[this .buffer.bytes[this .pointer
1024: + i + 1] & 0xFF];
1025: if (v2 == -1) {
1026: throw new ScannerException(
1027: "while scanning a double-quoted scalar",
1028: "expected escape sequence of "
1029: + length
1030: + " hexadecimal numbers, but found something else: "
1031: + (char) (this .buffer.bytes[this .pointer
1032: + i + 1] & 0xFF), null);
1033: }
1034: val += v2;
1035: }
1036: chunks.append(val);
1037: }
1038: forward(length);
1039: return chunks;
1040: }
1041:
1042: private ByteList scanFlowScalarNonSpaces(final boolean dbl) {
1043: final ByteList chunks = new ByteList();
1044: for (;;) {
1045: int length = 0;
1046: while (!SPACES_AND_STUFF[peek(length)]) {
1047: length++;
1048: }
1049: if (length != 0) {
1050: ensure(length, false);
1051: chunks.append(this .buffer.bytes, this .pointer, length);
1052: forward(length);
1053: }
1054: char ch = peek();
1055: if (!dbl && ch == '\'' && peek(1) == '\'') {
1056: chunks.append('\'');
1057: forward(2);
1058: } else if ((dbl && ch == '\'') || (!dbl && DOUBLE_ESC[ch])) {
1059: chunks.append(ch);
1060: forward();
1061: } else if (dbl && ch == '\\') {
1062: forward();
1063: ch = peek();
1064: if (IS_ESCAPE_REPLACEMENT[ch]) {
1065: chunks.append(ESCAPE_REPLACEMENTS[ch]);
1066: forward();
1067: } else if (ESCAPE_CODES.containsKey(new Character(ch))) {
1068: length = ((Integer) ESCAPE_CODES.get(new Character(
1069: ch))).intValue();
1070: forward();
1071: chunks.append(parseHexa(length));
1072: } else if (FULL_LINEBR[ch]) {
1073: scanLineBreak();
1074: ByteList ss = scanFlowScalarBreaks();
1075: chunks.append(ss);
1076: } else {
1077: chunks.append('\\');
1078: }
1079: } else {
1080: return chunks;
1081: }
1082: }
1083: }
1084:
1085: private ByteList scanFlowScalarSpaces() {
1086: final ByteList chunks = new ByteList();
1087: int length = 0;
1088: while (BLANK_T[peek(length)]) {
1089: length++;
1090: }
1091: ensure(length, false);
1092: ByteList whitespaces = new ByteList(this .buffer, this .pointer,
1093: length);
1094: forward(length);
1095: char ch = peek();
1096: if (ch == '\0') {
1097: throw new ScannerException(
1098: "while scanning a quoted scalar",
1099: "found unexpected end of stream", null);
1100: } else if (FULL_LINEBR[ch]) {
1101: final byte[] lineBreak = scanLineBreak();
1102: final ByteList breaks = scanFlowScalarBreaks();
1103: if (!(lineBreak.length == 1 && lineBreak[0] == '\n')) {
1104: chunks.append(lineBreak);
1105: } else if (breaks.length() == 0) {
1106: chunks.append(SPACE);
1107: }
1108: chunks.append(breaks);
1109: } else {
1110: chunks.append(whitespaces);
1111: }
1112: return chunks;
1113: }
1114:
1115: private ByteList scanFlowScalarBreaks() {
1116: final ByteList chunks = new ByteList();
1117: for (;;) {
1118: if (isEndOrStart()) {
1119: throw new ScannerException(
1120: "while scanning a quoted scalar",
1121: "found unexpected document separator", null);
1122: }
1123: while (BLANK_T[peek()]) {
1124: forward();
1125: }
1126: if (FULL_LINEBR[peek()]) {
1127: chunks.append(scanLineBreak());
1128: } else if ('\\' == peek() && BLANK_T[peek(1)]) {
1129: forward();
1130: chunks.append(scanFlowScalarSpaces());
1131: } else {
1132: return chunks;
1133: }
1134: }
1135: }
1136:
1137: private Token fetchValue() {
1138: this .docStart = false;
1139: final SimpleKey key = (SimpleKey) this .possibleSimpleKeys
1140: .get(new Integer(this .flowLevel));
1141: if (null == key) {
1142: if (this .flowLevel == 0 && !this .allowSimpleKey) {
1143: throw new ScannerException(null,
1144: "mapping values are not allowed here", null);
1145: }
1146: this .allowSimpleKey = this .flowLevel == 0;
1147: removePossibleSimpleKey();
1148: } else {
1149: this .possibleSimpleKeys.remove(new Integer(this .flowLevel));
1150: this .tokens.add(key.getTokenNumber() - this .tokensTaken,
1151: Token.KEY);
1152: if (this .flowLevel == 0 && addIndent(key.getColumn())) {
1153: this .tokens.add(
1154: key.getTokenNumber() - this .tokensTaken,
1155: Token.BLOCK_MAPPING_START);
1156: }
1157: this .allowSimpleKey = false;
1158: }
1159: forward();
1160: this .tokens.add(Token.VALUE);
1161: return Token.VALUE;
1162: }
1163:
1164: private Token fetchFlowSequenceStart() {
1165: return fetchFlowCollectionStart(Token.FLOW_SEQUENCE_START);
1166: }
1167:
1168: private Token fetchFlowMappingStart() {
1169: return fetchFlowCollectionStart(Token.FLOW_MAPPING_START);
1170: }
1171:
1172: private Token fetchFlowCollectionStart(final Token tok) {
1173: this .docStart = false;
1174: savePossibleSimpleKey();
1175: this .flowLevel++;
1176: this .allowSimpleKey = true;
1177: forward(1);
1178: this .tokens.add(tok);
1179: return tok;
1180: }
1181:
1182: private Token fetchDocumentEnd() {
1183: return fetchDocumentIndicator(Token.DOCUMENT_END);
1184: }
1185:
1186: private Token fetchFlowSequenceEnd() {
1187: return fetchFlowCollectionEnd(Token.FLOW_SEQUENCE_END);
1188: }
1189:
1190: private Token fetchFlowMappingEnd() {
1191: return fetchFlowCollectionEnd(Token.FLOW_MAPPING_END);
1192: }
1193:
1194: private Token fetchFlowCollectionEnd(final Token tok) {
1195: removePossibleSimpleKey();
1196: this .flowLevel--;
1197: this .allowSimpleKey = false;
1198: forward(1);
1199: this .tokens.add(tok);
1200: return tok;
1201: }
1202:
1203: private Token fetchFlowEntry() {
1204: this .allowSimpleKey = true;
1205: removePossibleSimpleKey();
1206: forward(1);
1207: this .tokens.add(Token.FLOW_ENTRY);
1208: return Token.FLOW_ENTRY;
1209: }
1210:
1211: private Token fetchLiteral() {
1212: return fetchBlockScalar('|');
1213: }
1214:
1215: private Token fetchFolded() {
1216: return fetchBlockScalar('>');
1217: }
1218:
1219: private Token fetchBlockScalar(final char style) {
1220: this .docStart = false;
1221: this .allowSimpleKey = true;
1222: this .removePossibleSimpleKey();
1223: final Token tok = scanBlockScalar(style);
1224: this .tokens.add(tok);
1225: return tok;
1226: }
1227:
1228: private Token scanBlockScalar(final char style) {
1229: final boolean folded = style == '>';
1230: final ByteList chunks = new ByteList();
1231: forward();
1232: final Object[] chompi = scanBlockScalarIndicators();
1233: final Boolean chomping = (Boolean) chompi[0];
1234: final int increment = ((Integer) chompi[1]).intValue();
1235:
1236: boolean sameLine = scanBlockScalarIgnoredLine();
1237:
1238: int minIndent = this .indent + 1;
1239: if (minIndent < 0) {
1240: minIndent = 0;
1241: }
1242: ByteList breaks = null;
1243: int maxIndent = 0;
1244: int ind = 0;
1245: if (sameLine) {
1246: final boolean leadingNonSpace = !BLANK_T[peek()];
1247: int length = 0;
1248: while (!NULL_OR_LINEBR[peek(length)]) {
1249: length++;
1250: }
1251: ensure(length, false);
1252: chunks.append(this .buffer.bytes, this .pointer, length);
1253: forward(length);
1254: }
1255: if (increment == -1) {
1256: final Object[] brme = scanBlockScalarIndentation();
1257: breaks = (ByteList) brme[0];
1258: maxIndent = ((Integer) brme[1]).intValue();
1259: if (minIndent > maxIndent) {
1260: ind = minIndent;
1261: } else {
1262: ind = maxIndent;
1263: }
1264: } else {
1265: ind = minIndent + increment - 1;
1266: breaks = scanBlockScalarBreaks(ind);
1267: }
1268:
1269: byte[] lineBreak = ByteList.NULL_ARRAY;
1270: while (this .column == ind && peek() != '\0') {
1271: chunks.append(breaks);
1272: final boolean leadingNonSpace = !BLANK_T[peek()];
1273: int length = 0;
1274: while (!NULL_OR_LINEBR[peek(length)]) {
1275: length++;
1276: }
1277: ensure(length, false);
1278: chunks.append(this .buffer.bytes, this .pointer, length);
1279: forward(length);
1280: lineBreak = scanLineBreak();
1281: breaks = scanBlockScalarBreaks(ind);
1282: if (this .column == ind && peek() != '\0') {
1283: if (folded && lineBreak.length == 1
1284: && lineBreak[0] == '\n' && leadingNonSpace
1285: && !BLANK_T[peek()]) {
1286: if (breaks.length() == 0) {
1287: chunks.append(SPACE);
1288: }
1289: } else {
1290: chunks.append(lineBreak);
1291: }
1292: } else {
1293: break;
1294: }
1295: }
1296:
1297: if (chomping != Boolean.FALSE) {
1298: chunks.append(lineBreak);
1299: }
1300: if (chomping == Boolean.TRUE) {
1301: chunks.append(breaks);
1302: }
1303:
1304: return new ScalarToken(chunks, false, style);
1305: }
1306:
1307: private ByteList scanBlockScalarBreaks(final int indent) {
1308: final ByteList chunks = new ByteList();
1309: while (this .column < indent && peek() == ' ') {
1310: forward();
1311: }
1312: while (FULL_LINEBR[peek()]) {
1313: chunks.append(scanLineBreak());
1314: while (this .column < indent && peek() == ' ') {
1315: forward();
1316: }
1317: }
1318: return chunks;
1319: }
1320:
1321: private Object[] scanBlockScalarIndentation() {
1322: final ByteList chunks = new ByteList();
1323: int maxIndent = 0;
1324: while (BLANK_OR_LINEBR[peek()]) {
1325: if (peek() != ' ') {
1326: chunks.append(scanLineBreak());
1327: } else {
1328: forward();
1329: if (this .column > maxIndent) {
1330: maxIndent = column;
1331: }
1332: }
1333: }
1334: return new Object[] { chunks, new Integer(maxIndent) };
1335: }
1336:
1337: private Object[] scanBlockScalarIndicators() {
1338: Boolean chomping = null;
1339: int increment = -1;
1340: char ch = peek();
1341: if (ch == '-' || ch == '+') {
1342: chomping = ch == '+' ? Boolean.TRUE : Boolean.FALSE;
1343: forward();
1344: ch = peek();
1345: if (DIGIT[ch]) {
1346: increment = ch - '0';
1347: if (increment == 0) {
1348: throw new ScannerException(
1349: "while scanning a block scalar",
1350: "expected indentation indicator in the range 1-9, but found 0",
1351: null);
1352: }
1353: forward();
1354: }
1355: } else if (DIGIT[ch]) {
1356: increment = ch - '0';
1357: if (increment == 0) {
1358: throw new ScannerException(
1359: "while scanning a block scalar",
1360: "expected indentation indicator in the range 1-9, but found 0",
1361: null);
1362: }
1363: forward();
1364: ch = peek();
1365: if (ch == '-' || ch == '+') {
1366: chomping = ch == '+' ? Boolean.TRUE : Boolean.FALSE;
1367: forward();
1368: }
1369: }
1370: if (!NULL_BL_LINEBR[peek()]) {
1371: throw new ScannerException("while scanning a block scalar",
1372: "expected chomping or indentation indicators, but found "
1373: + peek() + "(" + ((int) peek()) + ")", null);
1374: }
1375: return new Object[] { chomping, new Integer(increment) };
1376: }
1377:
1378: private boolean scanBlockScalarIgnoredLine() {
1379: boolean same = true;
1380: while (peek() == ' ') {
1381: forward();
1382: }
1383: if (peek() == '#') {
1384: while (!NULL_OR_LINEBR[peek()]) {
1385: forward();
1386: }
1387: same = false;
1388: }
1389: if (NULL_OR_LINEBR[peek()]) {
1390: scanLineBreak();
1391: return false;
1392: }
1393: return same;
1394: }
1395:
1396: private Token fetchDirective() {
1397: unwindIndent(-1);
1398: removePossibleSimpleKey();
1399: this .allowSimpleKey = false;
1400: final Token tok = scanDirective();
1401: this .tokens.add(tok);
1402: return tok;
1403: }
1404:
1405: private Token fetchKey() {
1406: if (this .flowLevel == 0) {
1407: if (!this .allowSimpleKey) {
1408: throw new ScannerException(null,
1409: "mapping keys are not allowed here", null);
1410: }
1411: if (addIndent(this .column)) {
1412: this .tokens.add(Token.BLOCK_MAPPING_START);
1413: }
1414: }
1415: this .allowSimpleKey = this .flowLevel == 0;
1416: removePossibleSimpleKey();
1417: forward();
1418: this .tokens.add(Token.KEY);
1419: return Token.KEY;
1420: }
1421:
1422: private Token fetchAlias() {
1423: savePossibleSimpleKey();
1424: this .allowSimpleKey = false;
1425: final Token tok = scanAnchor(new AliasToken());
1426: this .tokens.add(tok);
1427: return tok;
1428: }
1429:
1430: private Token fetchAnchor() {
1431: savePossibleSimpleKey();
1432: this .allowSimpleKey = false;
1433: final Token tok = scanAnchor(new AnchorToken());
1434: this .tokens.add(tok);
1435: return tok;
1436: }
1437:
1438: private Token scanDirective() {
1439: forward();
1440: final String name = scanDirectiveName();
1441: String[] value = null;
1442: if (name.equals("YAML")) {
1443: value = scanYamlDirectiveValue();
1444: } else if (name.equals("TAG")) {
1445: value = scanTagDirectiveValue();
1446: } else {
1447: while (!NULL_OR_LINEBR[peek()]) {
1448: forward();
1449: }
1450: }
1451: scanDirectiveIgnoredLine();
1452: return new DirectiveToken(name, value);
1453: }
1454:
1455: private String scanDirectiveName() {
1456: int length = 0;
1457: char ch = peek(length);
1458: boolean zlen = true;
1459: while (ALPHA[ch]) {
1460: zlen = false;
1461: length++;
1462: ch = peek(length);
1463: }
1464: if (zlen) {
1465: throw new ScannerException("while scanning a directive",
1466: "expected alphabetic or numeric character, but found "
1467: + ch + "(" + ((int) ch) + ")", null);
1468: }
1469: String value = null;
1470: try {
1471: ensure(length, false);
1472: value = new String(this .buffer.bytes, this .pointer, length,
1473: "ISO8859-1");
1474: } catch (Exception e) {
1475: }
1476: forward(length);
1477: if (!NULL_BL_LINEBR[peek()]) {
1478: throw new ScannerException("while scanning a directive",
1479: "expected alphabetic or numeric character, but found "
1480: + ch + "(" + ((int) ch) + ")", null);
1481: }
1482: return value;
1483: }
1484:
1485: private byte[] scanDirectiveIgnoredLine() {
1486: while (peek() == ' ') {
1487: forward();
1488: }
1489: if (peek() == '"') {
1490: while (!NULL_OR_LINEBR[peek()]) {
1491: forward();
1492: }
1493: }
1494: final char ch = peek();
1495: if (!NULL_OR_LINEBR[ch]) {
1496: throw new ScannerException("while scanning a directive",
1497: "expected a comment or a line break, but found "
1498: + peek() + "(" + ((int) peek()) + ")", null);
1499: }
1500: return scanLineBreak();
1501: }
1502:
1503: private Token scanAnchor(final Token tok) {
1504: final char indicator = peek();
1505: final String name = indicator == '*' ? "alias" : "anchor";
1506: forward();
1507: int length = 0;
1508: while (ALPHA[peek(length)]) {
1509: length++;
1510: }
1511: if (length == 0) {
1512: throw new ScannerException(
1513: "while scanning an " + name,
1514: "expected alphabetic or numeric character, but found something else...",
1515: null);
1516: }
1517: String value = null;
1518: try {
1519: ensure(length, false);
1520: value = new String(this .buffer.bytes, this .pointer, length,
1521: "ISO8859-1");
1522: } catch (Exception e) {
1523: }
1524: forward(length);
1525: if (!NON_ALPHA_OR_NUM[peek()]) {
1526: throw new ScannerException("while scanning an " + name,
1527: "expected alphabetic or numeric character, but found "
1528: + peek() + "(" + ((int) peek()) + ")", null);
1529:
1530: }
1531: tok.setValue(value);
1532: return tok;
1533: }
1534:
1535: private String[] scanYamlDirectiveValue() {
1536: while (peek() == ' ') {
1537: forward();
1538: }
1539: final String major = scanYamlDirectiveNumber();
1540: if (peek() != '.') {
1541: throw new ScannerException("while scanning a directive",
1542: "expected a digit or '.', but found " + peek()
1543: + "(" + ((int) peek()) + ")", null);
1544: }
1545: forward();
1546: final String minor = scanYamlDirectiveNumber();
1547: if (!NULL_BL_LINEBR[peek()]) {
1548: throw new ScannerException("while scanning a directive",
1549: "expected a digit or ' ', but found " + peek()
1550: + "(" + ((int) peek()) + ")", null);
1551: }
1552: return new String[] { major, minor };
1553: }
1554:
1555: private String scanYamlDirectiveNumber() {
1556: final char ch = peek();
1557: if (!Character.isDigit(ch)) {
1558: throw new ScannerException("while scanning a directive",
1559: "expected a digit, but found " + ch + "("
1560: + ((int) ch) + ")", null);
1561: }
1562: int length = 0;
1563: StringBuffer sb = new StringBuffer();
1564: while (Character.isDigit(peek(length))) {
1565: sb.append(peek(length));
1566: length++;
1567: }
1568: forward(length);
1569: return sb.toString();
1570: }
1571:
1572: public static String into(ByteList b) {
1573: try {
1574: return new String(b.bytes, 0, b.realSize, "ISO8859-1");
1575: } catch (Exception e) {
1576: return null; // Shouldn't happen
1577: }
1578: }
1579:
1580: private String[] scanTagDirectiveValue() {
1581: while (peek() == ' ') {
1582: forward();
1583: }
1584: final String handle = into(scanTagDirectiveHandle());
1585: while (peek() == ' ') {
1586: forward();
1587: }
1588: final String prefix = into(scanTagDirectivePrefix());
1589: return new String[] { handle, prefix };
1590: }
1591:
1592: private ByteList scanTagDirectiveHandle() {
1593: final ByteList value = scanTagHandle("directive");
1594: if (peek() != ' ') {
1595: throw new ScannerException("while scanning a directive",
1596: "expected ' ', but found " + peek() + "("
1597: + ((int) peek()) + ")", null);
1598: }
1599: return value;
1600: }
1601:
1602: private ByteList scanTagDirectivePrefix() {
1603: final ByteList value = scanTagUri("directive");
1604: if (!NULL_BL_LINEBR[peek()]) {
1605: throw new ScannerException("while scanning a directive",
1606: "expected ' ', but found " + peek() + "("
1607: + ((int) peek()) + ")", null);
1608: }
1609: return value;
1610: }
1611:
1612: /*
1613: private final static Pattern NON_PRINTABLE = Pattern.compile("[^\u0009\n\r\u0020-\u007E\u0085\u00A0-\u00FF]");
1614: private final static Pattern R_FLOWZERO = Pattern.compile("[\0 \t\r\n\u0085]|(:[\0 \t\r\n\u0085])");
1615: private final static Pattern R_FLOWNONZERO = Pattern.compile("[\0 \t\r\n\u0085\\[\\]{},:?]");
1616: private final static Pattern LINE_BR_REG = Pattern.compile("[\n\u0085]|(?:\r[^\n])");
1617: private final static Pattern END_OR_START = Pattern.compile("^(---|\\.\\.\\.)[\0 \t\r\n\u0085]$");
1618: private final static Pattern ENDING = Pattern.compile("^---[\0 \t\r\n\u0085]$");
1619: private final static Pattern START = Pattern.compile("^\\.\\.\\.[\0 \t\r\n\u0085]$");
1620: private final static Pattern BEG = Pattern.compile("^([^\0 \t\r\n\u0085\\-?:,\\[\\]{}#&*!|>'\"%@]|([\\-?:][^\0 \t\r\n\u0085]))");
1621:
1622: public static void main(final String[] args) throws Exception {
1623: // final String test1 = "--- \nA: b\nc: 3.14\n";
1624: final String filename = args[0];
1625: System.out.println("Reading of file: \"" + filename + "\"");
1626:
1627: final StringBuffer input = new StringBuffer();
1628: final Reader reader = new FileReader(filename);
1629: char[] buff = new char[1024];
1630: int read = 0;
1631: while(true) {
1632: read = reader.read(buff);
1633: input.append(buff,0,read);
1634: if(read < 1024) {
1635: break;
1636: }
1637: }
1638: reader.close();
1639: final String str = input.toString();
1640: final long before = System.currentTimeMillis();
1641: int tokens = 0;
1642: for(int i=0;i<1;i++) {
1643: final Scanner sce2 = new ScannerImpl(str);
1644: for(final Iterator iter = sce2.eachToken();iter.hasNext();) {
1645: tokens++;iter.next();
1646: // System.out.println(iter.next());
1647: }
1648: }
1649: final long after = System.currentTimeMillis();
1650: final long time = after-before;
1651: final double timeS = (after-before)/1000.0;
1652: System.out.println("Walking through the " + tokens + " tokens took " + time + "ms, or " + timeS + " seconds");
1653: }
1654: */
1655:
1656: public static void main(final String[] args) throws Exception {
1657: final String filename = args[0];
1658: System.out.println("Reading of file: \"" + filename + "\"");
1659:
1660: final ByteList input = new ByteList(1024);
1661: final InputStream reader = new FileInputStream(filename);
1662: byte[] buff = new byte[1024];
1663: int read = 0;
1664: while (true) {
1665: read = reader.read(buff);
1666: input.append(buff, 0, read);
1667: if (read < 1024) {
1668: break;
1669: }
1670: }
1671: reader.close();
1672: final long before = System.currentTimeMillis();
1673: int tokens = 0;
1674: for (int i = 0; i < 1; i++) {
1675: final Scanner sce2 = new ScannerImpl(input);
1676: for (final Iterator iter = sce2.eachToken(); iter.hasNext();) {
1677: tokens++;//iter.next();
1678: System.out.println(iter.next());
1679: }
1680: }
1681: final long after = System.currentTimeMillis();
1682: final long time = after - before;
1683: final double timeS = (after - before) / 1000.0;
1684: System.out.println("Walking through the " + tokens
1685: + " tokens took " + time + "ms, or " + timeS
1686: + " seconds");
1687: }
1688:
1689: public static void tmain(final String[] args) throws Exception {
1690: final String filename = args[0];
1691: System.out.println("Reading of file: \"" + filename + "\"");
1692:
1693: final InputStream reader = new FileInputStream(filename);
1694: final long before = System.currentTimeMillis();
1695: int tokens = 0;
1696: for (int i = 0; i < 1; i++) {
1697: final Scanner sce2 = new ScannerImpl(reader);
1698: for (final Iterator iter = sce2.eachToken(); iter.hasNext();) {
1699: tokens++;
1700: iter.next();
1701: //System.out.println(iter.next());
1702: }
1703: }
1704: reader.close();
1705: final long after = System.currentTimeMillis();
1706: final long time = after - before;
1707: final double timeS = (after - before) / 1000.0;
1708: System.out.println("Walking through the " + tokens
1709: + " tokens took " + time + "ms, or " + timeS
1710: + " seconds");
1711: }
1712: }// Scanner
|