001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.util;
018:
019: import org.apache.cocoon.environment.Request;
020: import org.apache.commons.lang.StringUtils;
021: import org.apache.commons.lang.SystemUtils;
022: import org.apache.excalibur.source.SourceParameters;
023:
024: import java.io.ByteArrayOutputStream;
025: import java.io.IOException;
026: import java.io.OutputStreamWriter;
027: import java.io.UnsupportedEncodingException;
028: import java.lang.reflect.InvocationTargetException;
029: import java.lang.reflect.Method;
030: import java.net.URLDecoder;
031: import java.net.URLEncoder;
032: import java.util.BitSet;
033: import java.util.Enumeration;
034: import java.util.Iterator;
035: import java.util.LinkedList;
036: import java.util.Map;
037: import java.util.StringTokenizer;
038:
039: /**
040: * A collection of <code>File</code>, <code>URL</code> and filename
041: * utility methods
042: *
043: * @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
044: * @version CVS $Id: NetUtils.java 433543 2006-08-22 06:22:54Z crossley $
045: */
046: public class NetUtils {
047:
048: /**
049: * Array containing the safe characters set as defined by RFC 1738
050: */
051: private static BitSet safeCharacters;
052:
053: private static final char[] hexadecimal = { '0', '1', '2', '3',
054: '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
055:
056: static {
057: safeCharacters = new BitSet(256);
058: int i;
059: // 'lowalpha' rule
060: for (i = 'a'; i <= 'z'; i++) {
061: safeCharacters.set(i);
062: }
063: // 'hialpha' rule
064: for (i = 'A'; i <= 'Z'; i++) {
065: safeCharacters.set(i);
066: }
067: // 'digit' rule
068: for (i = '0'; i <= '9'; i++) {
069: safeCharacters.set(i);
070: }
071:
072: // 'safe' rule
073: safeCharacters.set('$');
074: safeCharacters.set('-');
075: safeCharacters.set('_');
076: safeCharacters.set('.');
077: safeCharacters.set('+');
078:
079: // 'extra' rule
080: safeCharacters.set('!');
081: safeCharacters.set('*');
082: safeCharacters.set('\'');
083: safeCharacters.set('(');
084: safeCharacters.set(')');
085: safeCharacters.set(',');
086:
087: // special characters common to http: file: and ftp: URLs ('fsegment' and 'hsegment' rules)
088: safeCharacters.set('/');
089: safeCharacters.set(':');
090: safeCharacters.set('@');
091: safeCharacters.set('&');
092: safeCharacters.set('=');
093: }
094:
095: /**
096: * Decode a path.
097: *
098: * <p>Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
099: * <p>The validity of the input path is not checked (i.e. characters that were not encoded will
100: * not be reported as errors).
101: * <p>This method differs from URLDecoder.decode in that it always uses UTF-8 (while URLDecoder
102: * uses the platform default encoding, often ISO-8859-1), and doesn't translate + characters to spaces.
103: *
104: * @param path the path to decode
105: * @return the decoded path
106: */
107: public static String decodePath(String path) {
108: StringBuffer translatedPath = new StringBuffer(path.length());
109: byte[] encodedchars = new byte[path.length() / 3];
110: int i = 0;
111: int length = path.length();
112: int encodedcharsLength = 0;
113: while (i < length) {
114: if (path.charAt(i) == '%') {
115: // we must process all consecutive %-encoded characters in one go, because they represent
116: // an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
117: while (i < length && path.charAt(i) == '%') {
118: if (i + 2 < length) {
119: try {
120: byte x = (byte) Integer.parseInt(path
121: .substring(i + 1, i + 3), 16);
122: encodedchars[encodedcharsLength] = x;
123: } catch (NumberFormatException e) {
124: throw new IllegalArgumentException(
125: "NetUtils.decodePath: "
126: + "Illegal hex characters in pattern %"
127: + path.substring(i + 1,
128: i + 3));
129: }
130: encodedcharsLength++;
131: i += 3;
132: } else {
133: throw new IllegalArgumentException(
134: "NetUtils.decodePath: "
135: + "% character should be followed by 2 hexadecimal characters.");
136: }
137: }
138: try {
139: String translatedPart = new String(encodedchars, 0,
140: encodedcharsLength, "UTF-8");
141: translatedPath.append(translatedPart);
142: } catch (UnsupportedEncodingException e) {
143: // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
144: throw new RuntimeException(
145: "Problem in decodePath: UTF-8 encoding not supported.");
146: }
147: encodedcharsLength = 0;
148: } else {
149: // a normal character
150: translatedPath.append(path.charAt(i));
151: i++;
152: }
153: }
154: return translatedPath.toString();
155: }
156:
157: /**
158: * Encode a path as required by the URL specification (<a href="http://www.ietf.org/rfc/rfc1738.txt">
159: * RFC 1738</a>). This differs from <code>java.net.URLEncoder.encode()</code> which encodes according
160: * to the <code>x-www-form-urlencoded</code> MIME format.
161: *
162: * @param path the path to encode
163: * @return the encoded path
164: */
165: public static String encodePath(String path) {
166: // stolen from org.apache.catalina.servlets.DefaultServlet ;)
167:
168: /**
169: * Note: This code portion is very similar to URLEncoder.encode.
170: * Unfortunately, there is no way to specify to the URLEncoder which
171: * characters should be encoded. Here, ' ' should be encoded as "%20"
172: * and '/' shouldn't be encoded.
173: */
174:
175: int maxBytesPerChar = 10;
176: StringBuffer rewrittenPath = new StringBuffer(path.length());
177: ByteArrayOutputStream buf = new ByteArrayOutputStream(
178: maxBytesPerChar);
179: OutputStreamWriter writer = null;
180: try {
181: writer = new OutputStreamWriter(buf, "UTF8");
182: } catch (Exception e) {
183: e.printStackTrace();
184: writer = new OutputStreamWriter(buf);
185: }
186:
187: for (int i = 0; i < path.length(); i++) {
188: int c = path.charAt(i);
189: if (safeCharacters.get(c)) {
190: rewrittenPath.append((char) c);
191: } else {
192: // convert to external encoding before hex conversion
193: try {
194: writer.write(c);
195: writer.flush();
196: } catch (IOException e) {
197: buf.reset();
198: continue;
199: }
200: byte[] ba = buf.toByteArray();
201: for (int j = 0; j < ba.length; j++) {
202: // Converting each byte in the buffer
203: byte toEncode = ba[j];
204: rewrittenPath.append('%');
205: int low = (toEncode & 0x0f);
206: int high = ((toEncode & 0xf0) >> 4);
207: rewrittenPath.append(hexadecimal[high]);
208: rewrittenPath.append(hexadecimal[low]);
209: }
210: buf.reset();
211: }
212: }
213: return rewrittenPath.toString();
214: }
215:
216: /**
217: * Returns the path of the given resource.
218: *
219: * @param uri The URI of the resource
220: * @return the resource path
221: */
222: public static String getPath(String uri) {
223: int i = uri.lastIndexOf('/');
224: if (i > -1) {
225: return uri.substring(0, i);
226: }
227: i = uri.indexOf(':');
228: return (i > -1) ? uri.substring(i + 1, uri.length()) : "";
229: }
230:
231: /**
232: * Remove path and file information from a filename returning only its
233: * extension component
234: *
235: * @param uri The filename
236: * @return The filename extension (with starting dot!) or null if filename extension is not found
237: */
238: public static String getExtension(String uri) {
239: int dot = uri.lastIndexOf('.');
240: if (dot > -1) {
241: uri = uri.substring(dot);
242: int slash = uri.lastIndexOf('/');
243: if (slash > -1) {
244: return null;
245: } else {
246: int sharp = uri.lastIndexOf('#');
247: if (sharp > -1) {
248: // uri starts with dot already
249: return uri.substring(0, sharp);
250: } else {
251: int mark = uri.lastIndexOf('?');
252: if (mark > -1) {
253: // uri starts with dot already
254: return uri.substring(0, mark);
255: } else {
256: return uri;
257: }
258: }
259: }
260: } else {
261: return null;
262: }
263: }
264:
265: /**
266: * Absolutize a relative resource path on the given absolute base path.
267: *
268: * @param path The absolute base path
269: * @param resource The relative resource path
270: * @return The absolutized resource path
271: */
272: public static String absolutize(String path, String resource) {
273: if (StringUtils.isEmpty(path)) {
274: return resource;
275: } else if (StringUtils.isEmpty(resource)) {
276: return path;
277: } else if (resource.charAt(0) == '/') {
278: // Resource path is already absolute
279: return resource;
280: }
281:
282: boolean slash = (path.charAt(path.length() - 1) == '/');
283:
284: StringBuffer b = new StringBuffer(path.length() + 1
285: + resource.length());
286: b.append(path);
287: if (!slash) {
288: b.append('/');
289: }
290: b.append(resource);
291: return b.toString();
292: }
293:
294: /**
295: * Relativize an absolute resource on a given absolute path.
296: *
297: * @param path The absolute path
298: * @param absoluteResource The absolute resource
299: * @return the resource relative to the given path
300: */
301: public static String relativize(String path, String absoluteResource) {
302: if (StringUtils.isEmpty(path)) {
303: return absoluteResource;
304: }
305:
306: if (path.charAt(path.length() - 1) != '/') {
307: path += "/";
308: }
309:
310: if (absoluteResource.startsWith(path)) {
311: // resource is direct descentant
312: return absoluteResource.substring(path.length());
313: } else {
314: // resource is not direct descendant
315: int index = StringUtils.indexOfDifference(path,
316: absoluteResource);
317: if (index > 0 && path.charAt(index - 1) != '/') {
318: index = path.substring(0, index).lastIndexOf('/');
319: index++;
320: }
321: String pathDiff = path.substring(index);
322: String resource = absoluteResource.substring(index);
323: int levels = StringUtils.countMatches(pathDiff, "/");
324: StringBuffer b = new StringBuffer(levels * 3
325: + resource.length());
326: for (int i = 0; i < levels; i++) {
327: b.append("../");
328: }
329: b.append(resource);
330: return b.toString();
331: }
332: }
333:
334: /**
335: * Normalize a uri containing ../ and ./ paths.
336: *
337: * @param uri The uri path to normalize
338: * @return The normalized uri
339: */
340: public static String normalize(String uri) {
341: if ("".equals(uri)) {
342: return uri;
343: }
344: int leadingSlashes = 0;
345: for (leadingSlashes = 0; leadingSlashes < uri.length()
346: && uri.charAt(leadingSlashes) == '/'; ++leadingSlashes) {
347: }
348: boolean isDir = (uri.charAt(uri.length() - 1) == '/');
349: StringTokenizer st = new StringTokenizer(uri, "/");
350: LinkedList clean = new LinkedList();
351: while (st.hasMoreTokens()) {
352: String token = st.nextToken();
353: if ("..".equals(token)) {
354: if (!clean.isEmpty() && !"..".equals(clean.getLast())) {
355: clean.removeLast();
356: if (!st.hasMoreTokens()) {
357: isDir = true;
358: }
359: } else {
360: clean.add("..");
361: }
362: } else if (!".".equals(token) && !"".equals(token)) {
363: clean.add(token);
364: }
365: }
366: StringBuffer sb = new StringBuffer();
367: while (leadingSlashes-- > 0) {
368: sb.append('/');
369: }
370: for (Iterator it = clean.iterator(); it.hasNext();) {
371: sb.append(it.next());
372: if (it.hasNext()) {
373: sb.append('/');
374: }
375: }
376: if (isDir && sb.length() > 0
377: && sb.charAt(sb.length() - 1) != '/') {
378: sb.append('/');
379: }
380: return sb.toString();
381: }
382:
383: /**
384: * Remove parameters from a uri.
385: * Resulting Map will have either String for single value attributes,
386: * or String arrays for multivalue attributes.
387: *
388: * @param uri The uri path to deparameterize.
389: * @param parameters The map that collects parameters.
390: * @return The cleaned uri
391: */
392: public static String deparameterize(String uri, Map parameters) {
393: int i = uri.lastIndexOf('?');
394: if (i == -1) {
395: return uri;
396: }
397:
398: String[] params = StringUtils.split(uri.substring(i + 1), '&');
399: for (int j = 0; j < params.length; j++) {
400: String p = params[j];
401: int k = p.indexOf('=');
402: if (k == -1) {
403: break;
404: }
405: String name = p.substring(0, k);
406: String value = p.substring(k + 1);
407: Object values = parameters.get(name);
408: if (values == null) {
409: parameters.put(name, value);
410: } else if (values.getClass().isArray()) {
411: String[] v1 = (String[]) values;
412: String[] v2 = new String[v1.length + 1];
413: System.arraycopy(v1, 0, v2, 0, v1.length);
414: v2[v1.length] = value;
415: parameters.put(name, v2);
416: } else {
417: parameters.put(name, new String[] { values.toString(),
418: value });
419: }
420: }
421: return uri.substring(0, i);
422: }
423:
424: /**
425: * Add parameters stored in the Map to the uri string.
426: * Map can contain Object values which will be converted to the string,
427: * or Object arrays, which will be treated as multivalue attributes.
428: *
429: * @param uri The uri to add parameters into
430: * @param parameters The map containing parameters to be added
431: * @return The uri with added parameters
432: */
433: public static String parameterize(String uri, Map parameters) {
434: if (parameters.size() == 0) {
435: return uri;
436: }
437:
438: StringBuffer buffer = new StringBuffer(uri);
439: if (uri.indexOf('?') == -1) {
440: buffer.append('?');
441: } else {
442: buffer.append('&');
443: }
444:
445: for (Iterator i = parameters.entrySet().iterator(); i.hasNext();) {
446: Map.Entry entry = (Map.Entry) i.next();
447: if (entry.getValue().getClass().isArray()) {
448: Object[] value = (Object[]) entry.getValue();
449: for (int j = 0; j < value.length; j++) {
450: if (j > 0) {
451: buffer.append('&');
452: }
453: buffer.append(entry.getKey());
454: buffer.append('=');
455: buffer.append(value[j]);
456: }
457: } else {
458: buffer.append(entry.getKey());
459: buffer.append('=');
460: buffer.append(entry.getValue());
461: }
462: if (i.hasNext()) {
463: buffer.append('&');
464: }
465: }
466: return buffer.toString();
467: }
468:
469: /**
470: * Create new <code>SourceParameters</code> with the same
471: * parameters as the current request
472: */
473: public static SourceParameters createParameters(Request request) {
474: final SourceParameters pars = new SourceParameters();
475:
476: if (null != request) {
477: final Enumeration names = request.getParameterNames();
478: while (names.hasMoreElements()) {
479: final String current = (String) names.nextElement();
480: final String[] values = request
481: .getParameterValues(current);
482: if (null != values) {
483: for (int i = 0; i < values.length; i++) {
484: pars.setParameter(current, values[i]);
485: }
486: }
487: }
488: }
489:
490: return pars;
491: }
492:
493: /**
494: * Remove any authorisation details from a URI
495: */
496: public static String removeAuthorisation(String uri) {
497: if (uri.indexOf("@") != -1
498: && (uri.startsWith("ftp://") || uri
499: .startsWith("http://"))) {
500: return uri.substring(0, uri.indexOf(":") + 2)
501: + uri.substring(uri.indexOf("@") + 1);
502: }
503: return uri;
504: }
505:
506: // FIXME Remove when JDK1.3 support is removed.
507: private static Method urlEncode;
508: private static Method urlDecode;
509:
510: static {
511: if (SystemUtils.isJavaVersionAtLeast(140)) {
512: try {
513: urlEncode = URLEncoder.class.getMethod("encode",
514: new Class[] { String.class, String.class });
515: urlDecode = URLDecoder.class.getMethod("decode",
516: new Class[] { String.class, String.class });
517: } catch (NoSuchMethodException e) {
518: // EMPTY
519: }
520: } else {
521: urlEncode = null;
522: urlDecode = null;
523: }
524: }
525:
526: /**
527: * Pass through to the {@link java.net.URLEncoder}. If running under JDK < 1.4,
528: * default encoding will always be used.
529: */
530: public static String encode(String s, String enc)
531: throws UnsupportedEncodingException {
532: if (urlEncode != null) {
533: try {
534: return (String) urlEncode.invoke(s, new Object[] { s,
535: enc });
536: } catch (IllegalAccessException e) {
537: // EMPTY
538: } catch (InvocationTargetException e) {
539: if (e.getTargetException() instanceof UnsupportedEncodingException) {
540: throw (UnsupportedEncodingException) e
541: .getTargetException();
542: } else if (e.getTargetException() instanceof RuntimeException) {
543: throw (RuntimeException) e.getTargetException();
544: }
545: }
546: }
547: return URLEncoder.encode(s);
548: }
549:
550: /**
551: * Pass through to the {@link java.net.URLDecoder}. If running under JDK < 1.4,
552: * default encoding will always be used.
553: */
554: public static String decode(String s, String enc)
555: throws UnsupportedEncodingException {
556: if (urlDecode != null) {
557: try {
558: return (String) urlDecode.invoke(s, new Object[] { s,
559: enc });
560: } catch (IllegalAccessException e) {
561: // EMPTY
562: } catch (InvocationTargetException e) {
563: if (e.getTargetException() instanceof UnsupportedEncodingException) {
564: throw (UnsupportedEncodingException) e
565: .getTargetException();
566: } else if (e.getTargetException() instanceof RuntimeException) {
567: throw (RuntimeException) e.getTargetException();
568: }
569: }
570: }
571: return URLDecoder.decode(s);
572: }
573: }
|