001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.commons.vfs.provider;
018:
019: import org.apache.commons.vfs.FileName;
020: import org.apache.commons.vfs.FileSystemException;
021: import org.apache.commons.vfs.FileType;
022: import org.apache.commons.vfs.VFS;
023:
024: /**
025: * Utilities for dealing with URIs. See RFC 2396 for details.
026: *
027: * @author <a href="mailto:adammurdoch@apache.org">Adam Murdoch</a>
028: * @version $Revision: 480428 $ $Date: 2005-10-13 21:11:33 +0200 (Do, 13 Okt
029: * 2005) $
030: */
031: public final class UriParser {
032: /**
033: * The normalised separator to use.
034: */
035: private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
036:
037: /**
038: * The set of valid separators. These are all converted to the normalised
039: * one. Does <i>not</i> contain the normalised separator
040: */
041: // public static final char[] separators = {'\\'};
042: public static final char TRANS_SEPARATOR = '\\';
043:
044: private UriParser() {
045: }
046:
047: /**
048: * Extracts the first element of a path.
049: */
050: public static String extractFirstElement(final StringBuffer name) {
051: final int len = name.length();
052: if (len < 1) {
053: return null;
054: }
055: int startPos = 0;
056: if (name.charAt(0) == SEPARATOR_CHAR) {
057: startPos = 1;
058: }
059: for (int pos = startPos; pos < len; pos++) {
060: if (name.charAt(pos) == SEPARATOR_CHAR) {
061: // Found a separator
062: final String elem = name.substring(startPos, pos);
063: name.delete(startPos, pos + 1);
064: return elem;
065: }
066: }
067:
068: // No separator
069: final String elem = name.substring(startPos);
070: name.setLength(0);
071: return elem;
072: }
073:
074: /**
075: * Normalises a path. Does the following:
076: * <ul>
077: * <li>Removes empty path elements.
078: * <li>Handles '.' and '..' elements.
079: * <li>Removes trailing separator.
080: * </ul>
081: *
082: * Its assumed that the separators are already fixed.
083: *
084: * @see #fixSeparators
085: */
086: public static FileType normalisePath(final StringBuffer path)
087: throws FileSystemException {
088: FileType fileType = FileType.FOLDER;
089: if (path.length() == 0) {
090: return fileType;
091: }
092:
093: if (path.charAt(path.length() - 1) != '/') {
094: fileType = FileType.FILE;
095: }
096:
097: // Adjust separators
098: // fixSeparators(path);
099:
100: // Determine the start of the first element
101: int startFirstElem = 0;
102: if (path.charAt(0) == SEPARATOR_CHAR) {
103: if (path.length() == 1) {
104: return fileType;
105: }
106: startFirstElem = 1;
107: }
108:
109: // Iterate over each element
110: int startElem = startFirstElem;
111: int maxlen = path.length();
112: while (startElem < maxlen) {
113: // Find the end of the element
114: int endElem = startElem;
115: for (; endElem < maxlen
116: && path.charAt(endElem) != SEPARATOR_CHAR; endElem++) {
117: }
118:
119: final int elemLen = endElem - startElem;
120: if (elemLen == 0) {
121: // An empty element - axe it
122: path.delete(endElem, endElem + 1);
123: maxlen = path.length();
124: continue;
125: }
126: if (elemLen == 1 && path.charAt(startElem) == '.') {
127: // A '.' element - axe it
128: path.delete(startElem, endElem + 1);
129: maxlen = path.length();
130: continue;
131: }
132: if (elemLen == 2 && path.charAt(startElem) == '.'
133: && path.charAt(startElem + 1) == '.') {
134: // A '..' element - remove the previous element
135: if (startElem == startFirstElem) {
136: // Previous element is missing
137: throw new FileSystemException(
138: "vfs.provider/invalid-relative-path.error");
139: }
140:
141: // Find start of previous element
142: int pos = startElem - 2;
143: for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--) {
144: }
145: startElem = pos + 1;
146:
147: path.delete(startElem, endElem + 1);
148: maxlen = path.length();
149: continue;
150: }
151:
152: // A regular element
153: startElem = endElem + 1;
154: }
155:
156: // Remove trailing separator
157: if (!VFS.isUriStyle()) {
158: if (maxlen > 0 && path.charAt(maxlen - 1) == SEPARATOR_CHAR
159: && maxlen > 1) {
160: path.delete(maxlen - 1, maxlen);
161: }
162: }
163:
164: return fileType;
165: }
166:
167: /**
168: * Normalises the separators in a name.
169: */
170: public static boolean fixSeparators(final StringBuffer name) {
171: boolean changed = false;
172: final int maxlen = name.length();
173: for (int i = 0; i < maxlen; i++) {
174: final char ch = name.charAt(i);
175: if (ch == TRANS_SEPARATOR) {
176: name.setCharAt(i, SEPARATOR_CHAR);
177: changed = true;
178: }
179: }
180: return changed;
181: }
182:
183: /**
184: * Extracts the scheme from a URI.
185: *
186: * @param uri
187: * The URI.
188: * @return The scheme name. Returns null if there is no scheme.
189: */
190: public static String extractScheme(final String uri) {
191: return extractScheme(uri, null);
192: }
193:
194: /**
195: * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from
196: * the front of the URI.
197: *
198: * @param uri
199: * The URI.
200: * @param buffer
201: * Returns the remainder of the URI.
202: * @return The scheme name. Returns null if there is no scheme.
203: */
204: public static String extractScheme(final String uri,
205: final StringBuffer buffer) {
206: if (buffer != null) {
207: buffer.setLength(0);
208: buffer.append(uri);
209: }
210:
211: final int maxPos = uri.length();
212: for (int pos = 0; pos < maxPos; pos++) {
213: final char ch = uri.charAt(pos);
214:
215: if (ch == ':') {
216: // Found the end of the scheme
217: final String scheme = uri.substring(0, pos);
218: if (buffer != null) {
219: buffer.delete(0, pos + 1);
220: }
221: return scheme.intern();
222: }
223:
224: if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
225: // A scheme character
226: continue;
227: }
228: if (pos > 0
229: && ((ch >= '0' && ch <= '9') || ch == '+'
230: || ch == '-' || ch == '.')) {
231: // A scheme character (these are not allowed as the first
232: // character of the scheme, but can be used as subsequent
233: // characters.
234: continue;
235: }
236:
237: // Not a scheme character
238: break;
239: }
240:
241: // No scheme in URI
242: return null;
243: }
244:
245: /**
246: * Removes %nn encodings from a string.
247: */
248: public static String decode(final String encodedStr)
249: throws FileSystemException {
250: if (encodedStr == null) {
251: return null;
252: }
253: if (encodedStr.indexOf('%') < 0) {
254: return encodedStr;
255: }
256: final StringBuffer buffer = new StringBuffer(encodedStr);
257: decode(buffer, 0, buffer.length());
258: return buffer.toString();
259: }
260:
261: /**
262: * Removes %nn encodings from a string.
263: */
264: public static void decode(final StringBuffer buffer,
265: final int offset, final int length)
266: throws FileSystemException {
267: int index = offset;
268: int count = length;
269: for (; count > 0; count--, index++) {
270: final char ch = buffer.charAt(index);
271: if (ch != '%') {
272: continue;
273: }
274: if (count < 3) {
275: throw new FileSystemException(
276: "vfs.provider/invalid-escape-sequence.error",
277: buffer.substring(index, index + count));
278: }
279:
280: // Decode
281: int dig1 = Character.digit(buffer.charAt(index + 1), 16);
282: int dig2 = Character.digit(buffer.charAt(index + 2), 16);
283: if (dig1 == -1 || dig2 == -1) {
284: throw new FileSystemException(
285: "vfs.provider/invalid-escape-sequence.error",
286: buffer.substring(index, index + 3));
287: }
288: char value = (char) (dig1 << 4 | dig2);
289:
290: // Replace
291: buffer.setCharAt(index, value);
292: buffer.delete(index + 1, index + 3);
293: count -= 2;
294: }
295: }
296:
297: /**
298: * Encodes and appends a string to a StringBuffer.
299: */
300: public static void appendEncoded(final StringBuffer buffer,
301: final String unencodedValue, final char[] reserved) {
302: final int offset = buffer.length();
303: buffer.append(unencodedValue);
304: encode(buffer, offset, unencodedValue.length(), reserved);
305: }
306:
307: /**
308: * Encodes a set of reserved characters in a StringBuffer, using the URI %nn
309: * encoding. Always encodes % characters.
310: */
311: public static void encode(final StringBuffer buffer,
312: final int offset, final int length, final char[] reserved) {
313: int index = offset;
314: int count = length;
315: for (; count > 0; index++, count--) {
316: final char ch = buffer.charAt(index);
317: boolean match = (ch == '%');
318: if (reserved != null) {
319: for (int i = 0; !match && i < reserved.length; i++) {
320: if (ch == reserved[i]) {
321: match = true;
322: }
323: }
324: }
325: if (match) {
326: // Encode
327: char[] digits = {
328: Character.forDigit(((ch >> 4) & 0xF), 16),
329: Character.forDigit((ch & 0xF), 16) };
330: buffer.setCharAt(index, '%');
331: buffer.insert(index + 1, digits);
332: index += 2;
333: }
334: }
335: }
336:
337: /**
338: * Removes %nn encodings from a string.
339: */
340: public static String encode(final String decodedStr) {
341: return encode(decodedStr, null);
342: }
343:
344: public static String encode(final String decodedStr,
345: final char[] reserved) {
346: if (decodedStr == null) {
347: return null;
348: }
349: final StringBuffer buffer = new StringBuffer(decodedStr);
350: encode(buffer, 0, buffer.length(), reserved);
351: return buffer.toString();
352: }
353:
354: public static String[] encode(String[] strings) {
355: if (strings == null) {
356: return null;
357: }
358: for (int i = 0; i < strings.length; i++) {
359: strings[i] = encode(strings[i]);
360: }
361: return strings;
362: }
363:
364: public static void checkUriEncoding(String uri)
365: throws FileSystemException {
366: decode(uri);
367: }
368:
369: public static void canonicalizePath(StringBuffer buffer,
370: int offset, int length, FileNameParser fileNameParser)
371: throws FileSystemException {
372: int index = offset;
373: int count = length;
374: for (; count > 0; count--, index++) {
375: final char ch = buffer.charAt(index);
376: if (ch == '%') {
377: if (count < 3) {
378: throw new FileSystemException(
379: "vfs.provider/invalid-escape-sequence.error",
380: buffer.substring(index, index + count));
381: }
382:
383: // Decode
384: int dig1 = Character
385: .digit(buffer.charAt(index + 1), 16);
386: int dig2 = Character
387: .digit(buffer.charAt(index + 2), 16);
388: if (dig1 == -1 || dig2 == -1) {
389: throw new FileSystemException(
390: "vfs.provider/invalid-escape-sequence.error",
391: buffer.substring(index, index + 3));
392: }
393: char value = (char) (dig1 << 4 | dig2);
394:
395: boolean match = (value == '%')
396: || (fileNameParser != null && fileNameParser
397: .encodeCharacter(value));
398:
399: if (match) {
400: // this is a reserved character, not allowed to decode
401: index += 2;
402: count -= 2;
403: continue;
404: }
405:
406: // Replace
407: buffer.setCharAt(index, value);
408: buffer.delete(index + 1, index + 3);
409: count -= 2;
410: } else if (fileNameParser.encodeCharacter(ch)) {
411: // Encode
412: char[] digits = {
413: Character.forDigit(((ch >> 4) & 0xF), 16),
414: Character.forDigit((ch & 0xF), 16) };
415: buffer.setCharAt(index, '%');
416: buffer.insert(index + 1, digits);
417: index += 2;
418: }
419: }
420: }
421:
422: public static String extractQueryString(StringBuffer name) {
423: for (int pos = 0; pos < name.length(); pos++) {
424: if (name.charAt(pos) == '?') {
425: String queryString = name.substring(pos + 1);
426: name.delete(pos, name.length());
427: return queryString;
428: }
429: }
430:
431: return null;
432: }
433: }
|