001: /* JavaLiterals
002: *
003: * Created on Dec 31, 2003
004: *
005: * Copyright (C) 2004 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.util;
024:
025: /**
026: * Utility functions to escape or unescape Java literal strings.
027: *
028: * @author gojomo
029: *
030: */
031: public class JavaLiterals {
032:
033: public static String escape(String raw) {
034: StringBuffer escaped = new StringBuffer();
035: for (int i = 0; i < raw.length(); i++) {
036: char c = raw.charAt(i);
037: switch (c) {
038: case '\b':
039: escaped.append("\\b");
040: break;
041: case '\t':
042: escaped.append("\\t");
043: break;
044: case '\n':
045: escaped.append("\\n");
046: break;
047: case '\f':
048: escaped.append("\\f");
049: break;
050: case '\r':
051: escaped.append("\\r");
052: break;
053: case '\"':
054: escaped.append("\\\"");
055: break;
056: case '\'':
057: escaped.append("\\'");
058: break;
059: case '\\':
060: escaped.append("\\\\");
061: break;
062: default:
063: if (Character.getType(c) == Character.CONTROL) {
064: String unicode = Integer.toHexString((int) c);
065: while (unicode.length() < 4) {
066: unicode = "0" + unicode;
067: }
068: escaped.append("\\u" + unicode);
069: } else {
070: escaped.append(c);
071: }
072: }
073:
074: }
075: return escaped.toString();
076: }
077:
078: public static String unescape(String escaped) {
079: StringBuffer raw = new StringBuffer();
080: for (int i = 0; i < escaped.length(); i++) {
081: char c = escaped.charAt(i);
082: if (c != '\\') {
083: raw.append(c);
084: } else {
085: i++;
086: if (i >= escaped.length()) {
087: // trailing '/'
088: raw.append(c);
089: continue;
090: }
091: c = escaped.charAt(i);
092: switch (c) {
093: case 'b':
094: raw.append('\b');
095: break;
096: case 't':
097: raw.append('\t');
098: break;
099: case 'n':
100: raw.append('\n');
101: break;
102: case 'f':
103: raw.append('\f');
104: break;
105: case 'r':
106: raw.append('r');
107: break;
108: case '"':
109: raw.append('\"');
110: break;
111: case '\'':
112: raw.append('\'');
113: break;
114: case '\\':
115: raw.append('\\');
116: break;
117: case 'u':
118: // unicode hex escape
119: try {
120: int unicode = Integer.parseInt(escaped
121: .substring(i + 1, i + 5), 16);
122: raw.append((char) unicode);
123: i = i + 4;
124: } catch (IndexOutOfBoundsException e) {
125: // err
126: raw.append("\\u");
127: }
128: break;
129: default:
130: if (Character.isDigit(c)) {
131: // octal escape
132: int end = Math.min(i + 4, escaped.length());
133: int octal = Integer.parseInt(escaped.substring(
134: i + 1, end), 8);
135: if (octal < 256) {
136: raw.append((char) octal);
137: i = end - 1;
138: } else {
139: // err
140: raw.append('\\');
141: raw.append(c);
142: }
143: }
144: break;
145: }
146: }
147: }
148: return raw.toString();
149: }
150: }
|