001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005:
006: package com.sun.portal.search.util;
007:
008: import java.io.*;
009: import java.util.*;
010: import java.net.*;
011:
012: public class Decoder {
013:
014: /**
015: * Decodes a URL encoded string. Also decodes html entities.
016: * NB: java.net.URLDecoder can only handle (encoded) ASCII strings.
017: * @param s the <code>String</code> to decode
018: * @return the newly decoded <code>String</code>
019: */
020: public static String urlDecode(String s, String charset)
021: throws UnsupportedEncodingException {
022: StringBuffer sb = new StringBuffer();
023: for (int i = 0; i < s.length(); i++) {
024: char c = s.charAt(i);
025: switch (c) {
026: case '+':
027: sb.append(' ');
028: break;
029: case '%':
030: try {
031: sb.append((char) Integer.parseInt(s.substring(
032: i + 1, i + 3), 16));
033: i += 2;
034: } catch (Throwable t) {
035: sb.append(c);
036: }
037: break;
038: default:
039: sb.append(c);
040: break;
041: }
042: }
043: // Undo conversion to external encoding
044: String result = sb.toString();
045: //System.out.println("dec: " + s + " -> " + result);
046: if (charset != null)
047: result = new String(result.getBytes("ISO-8859-1"), charset);
048:
049: // handle html entities in urls (allows 16 bit input from 8 bit forms)
050: return htmlEntityDecode(result);
051:
052: }
053:
054: /**
055: * Decodes html entities.
056: * @param s the <code>String</code> to decode
057: * @return the newly decoded <code>String</code>
058: */
059: public static String htmlEntityDecode(String s) {
060:
061: int i = 0, j = 0, pos = 0;
062: StringBuffer sb = new StringBuffer();
063: while ((i = s.indexOf("&#", pos)) != -1
064: && (j = s.indexOf(';', i)) != -1) {
065: int n = -1;
066: for (i += 2; i < j; ++i) {
067: char c = s.charAt(i);
068: if ('0' <= c && c <= '9')
069: n = (n == -1 ? 0 : n * 10) + c - '0';
070: else
071: break;
072: }
073: if (i != j)
074: n = -1; // malformed entity - abort
075: if (n != -1) {
076: sb.append((char) n);
077: i = j + 1; // skip ';'
078: } else {
079: for (int k = pos; k < i; ++k)
080: sb.append(s.charAt(k));
081: }
082: pos = i;
083: }
084: if (sb.length() == 0)
085: return s;
086: else
087: sb.append(s.substring(pos, s.length()));
088: return sb.toString();
089:
090: }
091:
092: public static void decodeQueryString(String qs, String charset,
093: Map m) throws UnsupportedEncodingException {
094:
095: if (qs == null)
096: return;
097:
098: StringTokenizer st = new StringTokenizer(qs, "&");
099: while (st.hasMoreTokens()) {
100: String param = st.nextToken();
101: StringTokenizer pst = new StringTokenizer(param, "=");
102: String name = urlDecode(pst.nextToken(), charset);
103: String val = "";
104: if (pst.hasMoreTokens())
105: val = urlDecode(pst.nextToken(), charset);
106: //System.out.println(name + " -> " + val);
107: m.put(name, val);
108: }
109: }
110:
111: /*
112: public static void main(String[] args) throws Exception {
113: String charset = "utf-8";
114: String[] s = new String[] { "abcd", " ", "a b", "\u019f \u7624", "a:/b-c\u1234**:", "\uff01\uf897\uffdd", "?a=b&c=d", "-\u8c9d-" };
115: System.out.println("started " + new Date().getTime());
116: for (int j = 0; j < 5000; ++j) {
117: for (int i = 0; i < s.length; ++i) {
118: String e = Encoder.urlEncode(s[i], charset);
119: if (j == 0)
120: System.out.println(Encoder.unicodeEscape(s[i] + " -> " + e + " -> " + urlDecode(e, charset)));
121: }
122: }
123: System.out.println("finished " + new Date().getTime());
124: }*/
125:
126: }
|