001: /*BEGIN_COPYRIGHT_BLOCK
002: *
003: * Copyright (c) 2001-2007, JavaPLT group at Rice University (javaplt@rice.edu)
004: * All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions are met:
008: * * Redistributions of source code must retain the above copyright
009: * notice, this list of conditions and the following disclaimer.
010: * * Redistributions in binary form must reproduce the above copyright
011: * notice, this list of conditions and the following disclaimer in the
012: * documentation and/or other materials provided with the distribution.
013: * * Neither the names of DrJava, the JavaPLT group, Rice University, nor the
014: * names of its contributors may be used to endorse or promote products
015: * derived from this software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
018: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
019: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
020: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
021: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
022: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
023: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
024: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
025: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
026: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
027: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028: *
029: * This software is Open Source Initiative approved Open Source Software.
030: * Open Source Initative Approved is a trademark of the Open Source Initiative.
031: *
032: * This file is part of DrJava. Download the current version of this project
033: * from http://www.drjava.org/ or http://sourceforge.net/projects/drjava/
034: *
035: * END_COPYRIGHT_BLOCK*/
036:
037: package edu.rice.cs.util;
038:
039: import java.util.List;
040: import java.util.LinkedList;
041:
042: /**
043: * Utility class which can tokenize a String into a list of String arguments,
044: * with behavior similar to parsing command line arguments to a program.
045: * Quoted Strings are treated as single arguments, and escaped characters
046: * are translated so that the tokenized arguments have the same meaning.
047: * Since all methods are static, the class is declared abstract to prevent
048: * instantiation.
049: * @version $Id: ArgumentTokenizer.java 4255 2007-08-28 19:17:37Z mgricken $
050: */
051: public abstract class ArgumentTokenizer {
052: private static final int NO_TOKEN_STATE = 0;
053: private static final int NORMAL_TOKEN_STATE = 1;
054: private static final int SINGLE_QUOTE_STATE = 2;
055: private static final int DOUBLE_QUOTE_STATE = 3;
056:
057: /** Tokenizes the given String into String tokens
058: * @param arguments A String containing one or more command-line style arguments to be tokenized.
059: * @return A list of parsed and properly escaped arguments.
060: */
061: public static List<String> tokenize(String arguments) {
062: return tokenize(arguments, false);
063: }
064:
065: /** Tokenizes the given String into String tokens.
066: * @param arguments A String containing one or more command-line style arguments to be tokenized.
067: * @param stringify whether or not to include escape special characters
068: * @return A list of parsed and properly escaped arguments.
069: */
070: public static List<String> tokenize(String arguments,
071: boolean stringify) {
072:
073: LinkedList<String> argList = new LinkedList<String>();
074: StringBuilder currArg = new StringBuilder();
075: boolean escaped = false;
076: int state = NO_TOKEN_STATE; // start in the NO_TOKEN_STATE
077: int len = arguments.length();
078:
079: // Loop over each character in the string
080: for (int i = 0; i < len; i++) {
081: char c = arguments.charAt(i);
082: if (escaped) {
083: // Escaped state: just append the next character to the current arg.
084: escaped = false;
085: currArg.append(c);
086: } else {
087: switch (state) {
088: case SINGLE_QUOTE_STATE:
089: if (c == '\'') {
090: // Seen the close quote; continue this arg until whitespace is seen
091: state = NORMAL_TOKEN_STATE;
092: } else {
093: currArg.append(c);
094: }
095: break;
096: case DOUBLE_QUOTE_STATE:
097: if (c == '"') {
098: // Seen the close quote; continue this arg until whitespace is seen
099: state = NORMAL_TOKEN_STATE;
100: } else if (c == '\\') {
101: // Look ahead, and only escape quotes or backslashes
102: i++;
103: char next = arguments.charAt(i);
104: if (next == '"' || next == '\\') {
105: currArg.append(next);
106: } else {
107: currArg.append(c);
108: currArg.append(next);
109: }
110: } else {
111: currArg.append(c);
112: }
113: break;
114: // case NORMAL_TOKEN_STATE:
115: // if (Character.isWhitespace(c)) {
116: // // Whitespace ends the token; start a new one
117: // argList.add(currArg.toString());
118: // currArg = new StringBuffer();
119: // state = NO_TOKEN_STATE;
120: // }
121: // else if (c == '\\') {
122: // // Backslash in a normal token: escape the next character
123: // escaped = true;
124: // }
125: // else if (c == '\'') {
126: // state = SINGLE_QUOTE_STATE;
127: // }
128: // else if (c == '"') {
129: // state = DOUBLE_QUOTE_STATE;
130: // }
131: // else {
132: // currArg.append(c);
133: // }
134: // break;
135: case NO_TOKEN_STATE:
136: case NORMAL_TOKEN_STATE:
137: switch (c) {
138: case '\\':
139: escaped = true;
140: state = NORMAL_TOKEN_STATE;
141: break;
142: case '\'':
143: state = SINGLE_QUOTE_STATE;
144: break;
145: case '"':
146: state = DOUBLE_QUOTE_STATE;
147: break;
148: default:
149: if (!Character.isWhitespace(c)) {
150: currArg.append(c);
151: state = NORMAL_TOKEN_STATE;
152: } else if (state == NORMAL_TOKEN_STATE) {
153: // Whitespace ends the token; start a new one
154: argList.add(currArg.toString());
155: currArg = new StringBuilder();
156: state = NO_TOKEN_STATE;
157: }
158: }
159: break;
160: default:
161: throw new IllegalStateException(
162: "ArgumentTokenizer state " + state
163: + " is invalid!");
164: }
165: }
166: }
167:
168: // If we're still escaped, put in the backslash
169: if (escaped) {
170: currArg.append('\\');
171: argList.add(currArg.toString());
172: }
173: // Close the last argument if we haven't yet
174: else if (state != NO_TOKEN_STATE) {
175: argList.add(currArg.toString());
176: }
177: // Format each argument if we've been told to stringify them
178: if (stringify) {
179: for (int i = 0; i < argList.size(); i++) {
180: argList.set(i, "\""
181: + _escapeQuotesAndBackslashes(argList.get(i))
182: + "\"");
183: }
184: }
185: return argList;
186: }
187:
188: /**
189: * Inserts backslashes before any occurrences of a backslash or
190: * quote in the given string. Also converts any special characters
191: * appropriately.
192: */
193: protected static String _escapeQuotesAndBackslashes(String s) {
194: final StringBuilder buf = new StringBuilder(s);
195:
196: // Walk backwards, looking for quotes or backslashes.
197: // If we see any, insert an extra backslash into the buffer at
198: // the same index. (By walking backwards, the index into the buffer
199: // will remain correct as we change the buffer.)
200: for (int i = s.length() - 1; i >= 0; i--) {
201: char c = s.charAt(i);
202: if ((c == '\\') || (c == '"')) {
203: buf.insert(i, '\\');
204: }
205: // Replace any special characters with escaped versions
206: else if (c == '\n') {
207: buf.deleteCharAt(i);
208: buf.insert(i, "\\n");
209: } else if (c == '\t') {
210: buf.deleteCharAt(i);
211: buf.insert(i, "\\t");
212: } else if (c == '\r') {
213: buf.deleteCharAt(i);
214: buf.insert(i, "\\r");
215: } else if (c == '\b') {
216: buf.deleteCharAt(i);
217: buf.insert(i, "\\b");
218: } else if (c == '\f') {
219: buf.deleteCharAt(i);
220: buf.insert(i, "\\f");
221: }
222: }
223: return buf.toString();
224: }
225: }
|