001: /***** BEGIN LICENSE BLOCK *****
002: * Version: CPL 1.0/GPL 2.0/LGPL 2.1
003: *
004: * The contents of this file are subject to the Common Public
005: * License Version 1.0 (the "License"); you may not use this file
006: * except in compliance with the License. You may obtain a copy of
007: * the License at http://www.eclipse.org/legal/cpl-v10.html
008: *
009: * Software distributed under the License is distributed on an "AS
010: * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
011: * implied. See the License for the specific language governing
012: * rights and limitations under the License.
013: *
014: * Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
015: *
016: * Alternatively, the contents of this file may be used under the terms of
017: * either of the GNU General Public License Version 2 or later (the "GPL"),
018: * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
019: * in which case the provisions of the GPL or the LGPL are applicable instead
020: * of those above. If you wish to allow use of your version of this file only
021: * under the terms of either the GPL or the LGPL, and not to allow others to
022: * use your version of this file under the terms of the CPL, indicate your
023: * decision by deleting the provisions above and replace them with the notice
024: * and other provisions required by the GPL or the LGPL. If you do not delete
025: * the provisions above, a recipient may use your version of this file under
026: * the terms of any one of the CPL, the GPL or the LGPL.
027: ***** END LICENSE BLOCK *****/package org.jruby.lexer.yacc;
028:
029: import org.jruby.ast.RegexpNode;
030: import org.jruby.ast.StrNode;
031: import org.jruby.parser.ReOptions;
032: import org.jruby.parser.Tokens;
033: import org.jruby.util.ByteList;
034:
035: public class StringTerm extends StrTerm {
036: /* bit flags to indicate the string type */
037: private int func;
038:
039: private final char term;
040:
041: private final char paren;
042:
043: /* nested string level */
044: private int nest;
045:
046: public StringTerm(int func, char term, char paren) {
047: this .func = func;
048: this .term = term;
049: this .paren = paren;
050: this .nest = 0;
051: }
052:
053: public int parseString(final RubyYaccLexer lexer, LexerSource src)
054: throws java.io.IOException {
055: char c;
056: int space = 0;
057:
058: if (func == -1) {
059: lexer.setValue(new Token("\"", lexer.getPosition()));
060: return Tokens.tSTRING_END;
061: }
062:
063: c = src.read();
064: if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
065: && Character.isWhitespace(c)) {
066: do {
067: c = src.read();
068: } while (Character.isWhitespace(c));
069: space = 1;
070: }
071:
072: // BEGIN NETBEANS MODIFICATIONS
073: //if (c == term && nest == 0) {
074: if ((processingEmbedded == IGNORE_EMBEDDED || processingEmbedded == LOOKING_FOR_EMBEDDED)
075: && (c == term) && (nest == 0)) {
076: // END NETBEANS MODIFICATIONS
077: if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
078: // BEGIN NETBEANS MODIFICATIONS
079: if (processingEmbedded == LOOKING_FOR_EMBEDDED) { // Only make this change when lexing, not parsing
080: // I want the terminating ")" to be passed as a string closure token,
081: // not as a plain rparen, since I want it to match up with the
082: // string opening tag (and I don't want an unbalanced right paren)
083: lexer.setValue(new Token("" + term, lexer
084: .getPosition()));
085: return Tokens.tSTRING_END;
086: }
087: // END NETBEANS MODIFICATIONS
088: func = -1;
089: lexer.getPosition();
090: return ' ';
091: }
092: if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
093: lexer.setValue(new RegexpNode(src.getPosition(),
094: ByteList.create(""), parseRegexpFlags(src)));
095: return Tokens.tREGEXP_END;
096: }
097: lexer.setValue(new Token("\"", lexer.getPosition()));
098: return Tokens.tSTRING_END;
099: }
100: if (space != 0) {
101: src.unread(c);
102: lexer.getPosition();
103: return ' ';
104: }
105: ByteList buffer = new ByteList();
106:
107: // BEGIN NETBEANS MODIFICATIONS
108: if ((processingEmbedded == EMBEDDED_DEXPR) && (c == '}')) {
109: processingEmbedded = LOOKING_FOR_EMBEDDED;
110: lexer.setValue(new Token("}", lexer.getPosition()));
111: return Tokens.tSTRING_CONTENT;
112: }
113: // END NETBEANS MODIFICATIONS
114:
115: if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') {
116: c = src.read();
117: switch (c) {
118: case '$':
119: case '@':
120: // BEGIN NETBEANS MODIFICATIONS
121: if (processingEmbedded == LOOKING_FOR_EMBEDDED) {
122: processingEmbedded = EMBEDDED_DVAR;
123: }
124: // END NETBEANS MODIFICATIONS
125: src.unread(c);
126: lexer.setValue(new Token("#" + c, lexer.getPosition()));
127: return Tokens.tSTRING_DVAR;
128: case '{':
129: // BEGIN NETBEANS MODIFICATIONS
130: if (processingEmbedded == LOOKING_FOR_EMBEDDED) {
131: processingEmbedded = EMBEDDED_DEXPR;
132: }
133: // END NETBEANS MODIFICATIONS
134: lexer.setValue(new Token("#" + c, lexer.getPosition()));
135: return Tokens.tSTRING_DBEG;
136: }
137: buffer.append('#');
138: }
139: src.unread(c);
140: // BEGIN NETBEANS MODIFICATIONS
141: //if (parseStringIntoBuffer(src, buffer) == 0) {
142: int parsed;
143: if (processingEmbedded == EMBEDDED_DEXPR) {
144: parsed = parseDExprIntoBuffer(src, buffer);
145: } else {
146: parsed = parseStringIntoBuffer(src, buffer);
147: }
148: if (parsed == 0) {
149: // END NETBEANS MODIFICATIONS
150: // BEGIN NETBEANS MODIFICATIONS
151: // We've read to the end of input and haven't found a corresponding String
152: // terminator. However, we don't always want to return the rest of the input as
153: // erroneous; in lexing mode, we want to stop at the first newline
154: // (at least or normal quoted strings, possibly not for heredocs etc.)
155: // and resume parsing from there, since it's likely that we're in the middle
156: // of typing a string.
157: // We've gotta push the "unused portion" of the string back into the input;
158: // the unused portion is the portion after the first newline.
159: // int n = buffer.length();
160: // for (int j = 0; j < n; j++) {
161: // if (buffer.charAt(j) == '\n') {
162: // // Found it.
163: // j++; // Include at least one
164: // for (int k = n-1; k >= j; k--) {
165: // // push input back in reverse order
166: // src.unread(buffer.charAt(k));
167: // }
168: // // Fall through outer loop and throw SyntaxException
169: // break;
170: // }
171: // }
172: //throw new SyntaxException(src.getPosition(), "unterminated string meets end of file");
173: throw new UnterminatedStringException(src.getPosition(),
174: "unterminated string meets end of file");
175: // END NETBEANS MODIFICATIONS
176: }
177:
178: lexer.setValue(new StrNode(lexer.getPosition(), buffer));
179:
180: // BEGIN NETBEANS MODIFICATIONS
181: // DVARs last only for a single string token so shut if off here.
182: if (processingEmbedded == EMBEDDED_DVAR) {
183: processingEmbedded = LOOKING_FOR_EMBEDDED;
184: } else if ((processingEmbedded == EMBEDDED_DEXPR)
185: && (buffer.length() == 0)) {
186: // Unbalanced expression - see #96485
187: processingEmbedded = LOOKING_FOR_EMBEDDED;
188: }
189: // END NETBEANS MODIFICATIONS
190:
191: return Tokens.tSTRING_CONTENT;
192: }
193:
194: private int parseRegexpFlags(final LexerSource src)
195: throws java.io.IOException {
196: char kcode = 0;
197: int options = 0;
198: char c;
199: StringBuffer unknownFlags = new StringBuffer(10);
200:
201: for (c = src.read(); c != RubyYaccLexer.EOF
202: && Character.isLetter(c); c = src.read()) {
203: switch (c) {
204: case 'i':
205: options |= ReOptions.RE_OPTION_IGNORECASE;
206: break;
207: case 'x':
208: options |= ReOptions.RE_OPTION_EXTENDED;
209: break;
210: case 'm':
211: options |= ReOptions.RE_OPTION_MULTILINE;
212: break;
213: case 'o':
214: options |= ReOptions.RE_OPTION_ONCE;
215: break;
216: case 'n':
217: kcode = 16;
218: break;
219: case 'e':
220: kcode = 32;
221: break;
222: case 's':
223: kcode = 48;
224: break;
225: case 'u':
226: kcode = 64;
227: break;
228: default:
229: unknownFlags.append(c);
230: break;
231: }
232: }
233: src.unread(c);
234: if (unknownFlags.length() != 0) {
235: throw new SyntaxException(src.getPosition(),
236: "unknown regexp option"
237: + (unknownFlags.length() > 1 ? "s" : "")
238: + " - " + unknownFlags.toString());
239: }
240: return options | kcode;
241: }
242:
243: public char parseStringIntoBuffer(LexerSource src, ByteList buffer)
244: throws java.io.IOException {
245: char c;
246:
247: while ((c = src.read()) != RubyYaccLexer.EOF) {
248: if (paren != '\0' && c == paren) {
249: nest++;
250: // BEGIN NETBEANS MODIFICATIONS
251: } else if (processingEmbedded == EMBEDDED_DEXPR && c == '}') {
252: src.unread(c);
253: break;
254: } else if (processingEmbedded == EMBEDDED_DVAR
255: && !((c == '_') || c == '$' || c == '@' || Character
256: .isLetter(c))) {
257: src.unread(c);
258: break;
259: // END NETBEANS MODIFICATIONS
260: } else if (c == term) {
261: if (nest == 0) {
262: src.unread(c);
263: break;
264: }
265: nest--;
266: } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0
267: && c == '#' && !src.peek('\n')) {
268: char c2 = src.read();
269:
270: if (c2 == '$' || c2 == '@' || c2 == '{') {
271: src.unread(c2);
272: src.unread(c);
273: break;
274: }
275: src.unread(c2);
276: } else if (c == '\\') {
277: c = src.read();
278: switch (c) {
279: case '\n':
280: if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
281: break;
282: }
283: if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
284: continue;
285: }
286: buffer.append('\\');
287: break;
288:
289: case '\\':
290: if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
291: buffer.append(c);
292: }
293: break;
294:
295: default:
296: if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
297: src.unread(c);
298: parseEscapeIntoBuffer(src, buffer);
299: continue;
300: } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
301: src.unread(c);
302: if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
303: buffer.append('\\');
304: }
305: c = src.readEscape();
306: } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
307: && Character.isWhitespace(c)) {
308: /* ignore backslashed spaces in %w */
309: } else if (c != term
310: && !(paren != '\0' && c == paren)) {
311: buffer.append('\\');
312: }
313: }
314: } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
315: && Character.isWhitespace(c)) {
316: src.unread(c);
317: break;
318: }
319: if (c == '\0'
320: && (func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) {
321: throw new SyntaxException(src.getPosition(),
322: "symbol cannot contain '\\0'");
323: }
324: buffer.append(c);
325: }
326: return c;
327: }
328:
329: // BEGIN NETBEANS MODIFICATIONS
330: public char parseDExprIntoBuffer(LexerSource src, ByteList buffer)
331: throws java.io.IOException {
332: char c;
333:
334: assert processingEmbedded == EMBEDDED_DEXPR;
335:
336: while ((c = src.read()) != RubyYaccLexer.EOF) {
337: if (c == '{') {
338: nest++;
339: } else if (c == '}') {
340: if (nest == 0) {
341: src.unread(c);
342: break;
343: }
344: nest--;
345: } else if (c == '\\') {
346: c = src.read();
347: switch (c) {
348: case '\n':
349: if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
350: break;
351: }
352: if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
353: continue;
354: }
355: buffer.append('\\');
356: break;
357:
358: case '\\':
359: if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
360: buffer.append(c);
361: }
362: break;
363:
364: default:
365: if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
366: src.unread(c);
367: parseEscapeIntoBuffer(src, buffer);
368: continue;
369: } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
370: src.unread(c);
371: if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
372: buffer.append('\\');
373: }
374: c = src.readEscape();
375: } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
376: && Character.isWhitespace(c)) {
377: /* ignore backslashed spaces in %w */
378: } else if (c != term
379: && !(paren != '\0' && c == paren)) {
380: buffer.append('\\');
381: }
382: }
383: } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
384: && Character.isWhitespace(c)) {
385: src.unread(c);
386: break;
387: }
388: if (c == '\0'
389: && (func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) {
390: throw new SyntaxException(src.getPosition(),
391: "symbol cannot contain '\\0'");
392: }
393: buffer.append(c);
394: }
395: return c;
396: }
397:
398: // END NETBEANS MODIFICATIONS
399:
400: // Was a goto in original ruby lexer
401: private void escaped(LexerSource src, ByteList buffer)
402: throws java.io.IOException {
403: char c;
404:
405: switch (c = src.read()) {
406: case '\\':
407: parseEscapeIntoBuffer(src, buffer);
408: break;
409: case RubyYaccLexer.EOF:
410: throw new SyntaxException(src.getPosition(),
411: "Invalid escape character syntax");
412: default:
413: buffer.append(c);
414: }
415: }
416:
417: private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer)
418: throws java.io.IOException {
419: char c;
420:
421: switch (c = src.read()) {
422: case '\n':
423: break; /* just ignore */
424: case '0':
425: case '1':
426: case '2':
427: case '3': /* octal constant */
428: case '4':
429: case '5':
430: case '6':
431: case '7':
432: buffer.append('\\');
433: buffer.append(c);
434: for (int i = 0; i < 2; i++) {
435: c = src.read();
436: if (c == RubyYaccLexer.EOF) {
437: throw new SyntaxException(src.getPosition(),
438: "Invalid escape character syntax");
439: }
440: if (!RubyYaccLexer.isOctChar(c)) {
441: src.unread(c);
442: break;
443: }
444: buffer.append(c);
445: }
446: break;
447: case 'x': /* hex constant */
448: buffer.append('\\');
449: buffer.append(c);
450: c = src.read();
451: if (!RubyYaccLexer.isHexChar(c)) {
452: throw new SyntaxException(src.getPosition(),
453: "Invalid escape character syntax");
454: }
455: buffer.append(c);
456: c = src.read();
457: if (RubyYaccLexer.isHexChar(c)) {
458: buffer.append(c);
459: } else {
460: src.unread(c);
461: }
462: break;
463: case 'M':
464: if ((c = src.read()) != '-') {
465: throw new SyntaxException(src.getPosition(),
466: "Invalid escape character syntax");
467: }
468: buffer.append(new byte[] { '\\', 'M', '-' });
469: escaped(src, buffer);
470: break;
471: case 'C':
472: if ((c = src.read()) != '-') {
473: throw new SyntaxException(src.getPosition(),
474: "Invalid escape character syntax");
475: }
476: buffer.append(new byte[] { '\\', 'C', '-' });
477: escaped(src, buffer);
478: break;
479: case 'c':
480: buffer.append(new byte[] { '\\', 'c' });
481: escaped(src, buffer);
482: break;
483: case 0:
484: throw new SyntaxException(src.getPosition(),
485: "Invalid escape character syntax");
486: default:
487: if (c != '\\' || c != term) {
488: buffer.append('\\');
489: }
490: buffer.append(c);
491: }
492: }
493:
494: // BEGIN NETBEANS MODIFICATIONS
495: public boolean isSubstituting() {
496: return (func & RubyYaccLexer.STR_FUNC_EXPAND) != 0;
497: }
498:
499: public Object getMutableState() {
500: return new MutableTermState(processingEmbedded, nest);
501: }
502:
503: public void setMutableState(Object o) {
504: MutableTermState state = (MutableTermState) o;
505: if (state != null) {
506: this .processingEmbedded = state.processingEmbedded;
507: this .nest = state.nest;
508: }
509: }
510:
511: public void splitEmbeddedTokens() {
512: if (processingEmbedded == IGNORE_EMBEDDED) {
513: processingEmbedded = LOOKING_FOR_EMBEDDED;
514: }
515: }
516:
517: private class MutableTermState {
518: private MutableTermState(int embeddedCode, int nest) {
519: this .processingEmbedded = embeddedCode;
520: this .nest = nest;
521: }
522:
523: public boolean equals(Object obj) {
524: if (obj == null)
525: return false;
526: if (getClass() != obj.getClass())
527: return false;
528: final MutableTermState other = (MutableTermState) obj;
529:
530: if (this .nest != other.nest)
531: return false;
532: if (this .processingEmbedded != other.processingEmbedded)
533: return false;
534: return true;
535: }
536:
537: public int hashCode() {
538: int hash = 7;
539:
540: hash = 83 * hash + this .nest;
541: hash = 83 * hash + this .processingEmbedded;
542: return hash;
543: }
544:
545: public String toString() {
546: return "StringTermState[nest=" + nest + ",embed="
547: + processingEmbedded + "]";
548: }
549:
550: private int nest;
551: private int processingEmbedded;
552: }
553:
554: // Equals - primarily for unit testing (incremental lexing tests
555: // where we do full-file-lexing and compare state to incremental lexing)
556: public boolean equals(Object obj) {
557: if (obj == null)
558: return false;
559: if (getClass() != obj.getClass())
560: return false;
561: final StringTerm other = (StringTerm) obj;
562:
563: if (this .func != other.func)
564: return false;
565: if (this .term != other.term)
566: return false;
567: if (this .processingEmbedded != other.processingEmbedded)
568: return false;
569: if (this .paren != other.paren)
570: return false;
571: if (this .nest != other.nest)
572: return false;
573: return true;
574: }
575:
576: private static String toFuncString(int func) {
577: StringBuilder sb = new StringBuilder();
578: if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
579: sb.append("escape|");
580: }
581: if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
582: sb.append("expand|");
583: }
584: if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
585: sb.append("regexp|");
586: }
587: if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
588: sb.append("qwords|");
589: }
590: if ((func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) {
591: sb.append("symbol|");
592: }
593: if ((func & RubyYaccLexer.STR_FUNC_INDENT) != 0) {
594: sb.append("indent|");
595: }
596:
597: String s = sb.toString();
598:
599: if (s.endsWith("|")) {
600: s = s.substring(0, s.length() - 1);
601: } else if (s.length() == 0) {
602: s = "-";
603: }
604:
605: return s;
606: }
607:
608: public String toString() {
609: return "StringTerm[func=" + toFuncString(func) + ",term="
610: + term + ",paren=" + (int) paren + ",nest=" + nest
611: + ",embed=" + processingEmbedded + "]";
612: }
613:
614: public int hashCode() {
615: int hash = 7;
616:
617: hash = 13 * hash + this .func;
618: hash = 13 * hash + this .term;
619: hash = 13 * hash + this .paren;
620: hash = 13 * hash + this .nest;
621: hash = 13 * hash + this .processingEmbedded;
622: return hash;
623: }
624:
625: public static class UnterminatedStringException extends
626: SyntaxException {
627: public UnterminatedStringException(ISourcePosition pos,
628: String message) {
629: super (pos, message);
630: }
631: }
632: // END NETBEANS MODIFICATIONS
633: }
|