001: /*
002: * xtc - The eXTensible Compiler
003: * Copyright (C) 2004 Robert Grimm
004: *
005: * This program is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU General Public License
007: * as published by the Free Software Foundation; either version 2
008: * of the License, or (at your option) any later version.
009: *
010: * This program is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU General Public License for more details.
014: *
015: * You should have received a copy of the GNU General Public License
016: * along with this program; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
018: */
019: package xtc.util;
020:
021: import java.io.IOException;
022: import java.io.Reader;
023: import java.io.Writer;
024:
025: import java.util.HashMap;
026: import java.util.Iterator;
027: import java.util.List;
028:
029: import java.util.regex.Pattern;
030:
031: import xtc.tree.Location;
032:
033: /**
034: * Implmentation of utilities for language processors, focusing on
035: * handling of I/O.
036: *
037: * @author Robert Grimm
038: * @version $Revision: 1.1 $
039: */
040: public final class Utilities {
041:
042: /** Hide constructor. */
043: private Utilities() {
044: }
045:
046: /** The buffer size for pumping data. */
047: private static final int BUFFER_SIZE = 4096;
048:
049: /** The buffer for pumping data. */
050: private static char[] charBuffer = null;
051:
052: /**
053: * The regular expression pattern used to split source code along
054: * newlines.
055: */
056: public static final Pattern SPLIT = Pattern.compile("\r\n|\r|\n");
057:
058: /**
059: * Flag for using doubled escape sequences. Doubled escape
060: * sequences will still read as an escape sequence, even if they are
061: * included in a program source character or string.
062: */
063: public static final int ESCAPE_DOUBLE = 0x01;
064:
065: /**
066: * Flag for escaping the '<code>[</code>', '<code>-</code>', and
067: * '<code>]</code>' characters.
068: */
069: public static final int ESCAPE_REGEX = 0x02;
070:
071: /**
072: * Flag for escaping non-printable ASCII characters using Unicode
073: * escapes.
074: */
075: public static final int ESCAPE_UNICODE = 0x04;
076:
077: /** The escape flags for C/C++ escapes. */
078: public static final int C_ESCAPES = 0x00;
079:
080: /** The escape flags for Java escapes. */
081: public static final int JAVA_ESCAPES = ESCAPE_UNICODE;
082:
083: /** The escape flags for Java and regex escapes. */
084: public static final int FULL_ESCAPES = ESCAPE_UNICODE
085: | ESCAPE_REGEX;
086:
087: /**
088: * Pump all data from the specified reader to the specified writer.
089: *
090: * @param in The reader.
091: * @param out The writer.
092: * @throws IOException Signals an exceptinal condition during I/O.
093: */
094: public static void pump(Reader in, Writer out) throws IOException {
095: if (null == charBuffer)
096: charBuffer = new char[BUFFER_SIZE];
097:
098: int number = in.read(charBuffer);
099: while (-1 != number) {
100: out.write(charBuffer, 0, number);
101: number = in.read(charBuffer);
102: }
103: }
104:
105: /**
106: * Determine whether the double escapes flag is set.
107: *
108: * @see #ESCAPE_DOUBLE
109: *
110: * @param flags The escape flags.
111: * @return <code>true</code> if the double escapes flag is set.
112: */
113: public static boolean useDoubleEscapes(int flags) {
114: return (0 != (ESCAPE_DOUBLE & flags));
115: }
116:
117: /**
118: * Determine whether the regex escapes flag is set.
119: *
120: * @see #ESCAPE_REGEX
121: *
122: * @param flags The escape flags.
123: * @return <code>true</code> if the regex escape flag is set.
124: */
125: public static boolean useRegexEscapes(int flags) {
126: return (0 != (ESCAPE_REGEX & flags));
127: }
128:
129: /**
130: * Determine whether the Unicode escapes flag is set.
131: *
132: * @see #ESCAPE_UNICODE
133: *
134: * @param flags The escape flags.
135: * @return <code>true</code> if the Unicode escapes flag is set.
136: */
137: public static boolean useUnicodeEscapes(int flags) {
138: return (0 != (ESCAPE_UNICODE & flags));
139: }
140:
141: /**
142: * Escape the specified character into the specified string buffer.
143: *
144: * @param c The character.
145: * @param buf The string buffer.
146: * @param flags The escape flags.
147: */
148: public static void escape(char c, StringBuffer buf, int flags) {
149: boolean useDouble = useDoubleEscapes(flags);
150: boolean useRegex = useRegexEscapes(flags);
151: boolean useUnicode = useUnicodeEscapes(flags);
152:
153: switch (c) {
154: case '\b':
155: if (useDouble) {
156: buf.append("\\\\b");
157: } else {
158: buf.append("\\b");
159: }
160: break;
161: case '\t':
162: if (useDouble) {
163: buf.append("\\\\t");
164: } else {
165: buf.append("\\t");
166: }
167: break;
168: case '\n':
169: if (useDouble) {
170: buf.append("\\\\n");
171: } else {
172: buf.append("\\n");
173: }
174: break;
175: case '\f':
176: if (useDouble) {
177: buf.append("\\\\f");
178: } else {
179: buf.append("\\f");
180: }
181: break;
182: case '\r':
183: if (useDouble) {
184: buf.append("\\\\r");
185: } else {
186: buf.append("\\r");
187: }
188: break;
189: case '\"':
190: if (useDouble) {
191: buf.append("\\\\\\\"");
192: } else {
193: buf.append("\\\"");
194: }
195: break;
196: case '\'':
197: if (useDouble) {
198: buf.append("\\\\\\\'");
199: } else {
200: buf.append("\\\'");
201: }
202: break;
203: case '-':
204: if (useRegex) {
205: if (useDouble) {
206: buf.append("\\\\-");
207: } else {
208: buf.append("\\-");
209: }
210: } else {
211: buf.append('-');
212: }
213: break;
214: case '[':
215: if (useRegex) {
216: if (useDouble) {
217: buf.append("\\\\[");
218: } else {
219: buf.append("\\[");
220: }
221: } else {
222: buf.append('[');
223: }
224: break;
225: case '\\':
226: if (useDouble) {
227: buf.append("\\\\\\\\");
228: } else {
229: buf.append("\\\\");
230: }
231: break;
232: case ']':
233: if (useRegex) {
234: if (useDouble) {
235: buf.append("\\\\]");
236: } else {
237: buf.append("\\]");
238: }
239: } else {
240: buf.append(']');
241: }
242: break;
243: default:
244: if (useUnicode && ((' ' > c) || ('~' < c))) {
245: String hex = Integer.toHexString(c);
246: String padding;
247: switch (hex.length()) {
248: case 1:
249: padding = "000";
250: break;
251: case 2:
252: padding = "00";
253: break;
254: case 3:
255: padding = "0";
256: break;
257: default:
258: padding = "";
259: }
260: if (useDouble) {
261: buf.append("\\\\");
262: } else {
263: buf.append("\\");
264: }
265: buf.append('u');
266: buf.append(padding);
267: buf.append(hex);
268:
269: } else {
270: buf.append(c);
271: }
272: }
273: }
274:
275: /**
276: * Escape the specified string into the specified string buffer.
277: *
278: * @param s The string.
279: * @param buf The string buffer.
280: * @param flags The escape flags.
281: */
282: public static void escape(String s, StringBuffer buf, int flags) {
283: final int length = s.length();
284: for (int i = 0; i < length; i++) {
285: escape(s.charAt(i), buf, flags);
286: }
287: }
288:
289: /**
290: * Escape the specified character.
291: *
292: * @param c The character.
293: * @param flags The escape flags.
294: * @return The escape character as a string.
295: */
296: public static String escape(char c, int flags) {
297: StringBuffer buf = new StringBuffer(1);
298: escape(c, buf, flags);
299: return buf.toString();
300: }
301:
302: /**
303: * Escape the specified string.
304: *
305: * @param s The string.
306: * @param flags The escape flags.
307: * @return The escaped string.
308: */
309: public static String escape(String s, int flags) {
310: StringBuffer buf = new StringBuffer(s.length());
311: escape(s, buf, flags);
312: return buf.toString();
313: }
314:
315: /**
316: * Unescape the specified string. This method unescapes standard
317: * C-style escapes ('<code>\b</code>', '<code>\t</code>',
318: * '<code>\n</code>', '<code>\f</code>', <code>'\r</code>',
319: * '<code>\"</code>', '<code>\'</code>', and '<code>\\</code>') as
320: * well as Java Unicode escapes. To support regex-like character
321: * classes, it also unescapes '<code>\-</code>', '<code>\[</code>',
322: * and '<code>\]</code>'.
323: *
324: * @param s The string to unescape.
325: * @return The unescaped string.
326: */
327: public static String unescape(String s) {
328: if (-1 == s.indexOf('\\')) {
329: return s;
330: }
331:
332: final int length = s.length();
333: StringBuffer buf = new StringBuffer(length);
334:
335: for (int i = 0; i < length; i++) {
336: char c = s.charAt(i);
337: if ('\\' != c) {
338: buf.append(c);
339: } else {
340: i++;
341: c = s.charAt(i);
342:
343: switch (c) {
344: case 'b':
345: buf.append('\b');
346: break;
347: case 't':
348: buf.append('\t');
349: break;
350: case 'n':
351: buf.append('\n');
352: break;
353: case 'f':
354: buf.append('\f');
355: break;
356: case 'r':
357: buf.append('\r');
358: break;
359: case '"':
360: buf.append('"');
361: break;
362: case '\'':
363: buf.append('\'');
364: break;
365: case '-':
366: buf.append('-');
367: break;
368: case '[':
369: buf.append('[');
370: break;
371: case '\\':
372: buf.append('\\');
373: break;
374: case ']':
375: buf.append(']');
376: break;
377: case 'u':
378: i += 4;
379: int n;
380:
381: try {
382: n = Integer.parseInt(s.substring(i - 3, i + 1),
383: 16);
384: } catch (NumberFormatException x) {
385: throw new IllegalArgumentException(
386: "Illegal Unicode escape (\'\\u"
387: + s.substring(i - 3, i + 1)
388: + "\')");
389: }
390: buf.append((char) n);
391: break;
392: default:
393: throw new IllegalArgumentException(
394: "Illegal escaped character (\'\\" + c
395: + "\')");
396: }
397: }
398: }
399: return buf.toString();
400: }
401:
402: /**
403: * Convert the specified list to a human-readable representation.
404: * This method uses <code>toString()</code> for each element in the
405: * specified list to generate a human-readable representation.
406: *
407: * @param l The list.
408: * @return The human-readable representation.
409: */
410: public static String format(List l) {
411: final int length = l.size();
412:
413: if (0 == length) {
414: return "";
415: }
416:
417: StringBuffer buf = new StringBuffer();
418:
419: if (2 == length) {
420: buf.append(l.get(0));
421: buf.append(" and ");
422: buf.append(l.get(1));
423:
424: } else {
425: boolean first = true;
426: Iterator iter = l.iterator();
427: while (iter.hasNext()) {
428: Object el = iter.next();
429: if (first) {
430: first = false;
431: } else if (iter.hasNext()) {
432: buf.append(", ");
433: } else {
434: buf.append(", and ");
435: }
436: buf.append(el);
437: }
438: }
439:
440: return buf.toString();
441: }
442:
443: /**
444: * Convert the specified identifier into a human-readable
445: * description. This method breaks identifiers using an upper case
446: * character for each word component into a string of space
447: * separated lower case words.
448: *
449: * @param id The identifier.
450: * @return The corresponding description.
451: */
452: public static String toDescription(String id) {
453: // Drop any suffixes.
454: int idx = id.indexOf('$');
455: if (-1 != idx) {
456: id = id.substring(0, idx);
457: }
458:
459: // Count the number of upper case characters.
460: final int length = id.length();
461: int upperCount = 0;
462:
463: for (int i = 0; i < length; i++) {
464: if (Character.isUpperCase(id.charAt(i))) {
465: upperCount++;
466: }
467: }
468:
469: // No conversion is necessary if all characters are either lower
470: // or upper case.
471: if ((0 == upperCount) || (length == upperCount)) {
472: return id;
473: }
474:
475: // Do the actual conversion.
476: StringBuffer buf = new StringBuffer(length + upperCount);
477:
478: for (int i = 0; i < length; i++) {
479: char c = id.charAt(i);
480:
481: if (Character.isUpperCase(c)) {
482: if (0 != i) {
483: buf.append(' ');
484: }
485: buf.append(Character.toLowerCase(c));
486: } else {
487: buf.append(c);
488: }
489: }
490:
491: return buf.toString();
492: }
493:
494: /**
495: * Print the specified (error or warning) message to the error
496: * console. The message is prefixed with the specified location
497: * information and followed by the corresponding source line, with a
498: * caret ('<code>^</code>') indicating the column.
499: *
500: * @see #msg(String,Location,String,String[])
501: *
502: * @param msg The message.
503: * @param loc The source location.
504: * @param source The source file, one line per array entry.
505: */
506: public static void msg(String msg, Location loc, String[] source) {
507: msg(msg, loc, null, source);
508: }
509:
510: /**
511: * Print the specified (error or warning) message to the error
512: * console. If the specified location is non-null, the message is
513: * prefixed with the location information. Otherwise, if the
514: * specified context is non-null, the message is prefixed with the
515: * context. If both the specified location and source are non-null,
516: * the message is followed by the corresponding source line and a
517: * caret ('<code>^</code>') to indicate the column.
518: *
519: * @param msg The message.
520: * @param loc The source location.
521: * @param context The alternative context.
522: * @param source The source file, one line per array entry.
523: */
524: public static void msg(String msg, Location loc, String context,
525: String[] source) {
526: if (null != loc) {
527: System.err.print(loc.toString());
528: System.err.print(": ");
529: } else if (null != context) {
530: System.err.print(context);
531: System.err.print(": ");
532: }
533:
534: System.err.println(msg);
535:
536: if ((null != loc) && (null != source)) {
537: String line;
538: try {
539: line = source[loc.line - 1];
540: } catch (ArrayIndexOutOfBoundsException x) {
541: line = "";
542: }
543: System.err.println(line);
544: for (int i = 0; i < loc.column; i++) {
545: System.err.print(' ');
546: }
547: System.err.println('^');
548: }
549: }
550:
551: }
|