001: /*
002: *******************************************************************************
003: * Copyright (C) 2003-2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.test.stringprep;
008:
009: import com.ibm.icu.text.StringPrepParseException;
010: import com.ibm.icu.text.UCharacterIterator;
011:
012: /**
013: * @author ram
014: *
015: * To change the template for this generated type comment go to
016: * Window>Preferences>Java>Code Generation>Code and Comments
017: */
018: public class IDNAReference {
019:
020: private static char[] ACE_PREFIX = new char[] { 0x0078, 0x006E,
021: 0x002d, 0x002d };
022: private static final int ACE_PREFIX_LENGTH = 4;
023:
024: private static final int MAX_LABEL_LENGTH = 63;
025: private static final int HYPHEN = 0x002D;
026: private static final int CAPITAL_A = 0x0041;
027: private static final int CAPITAL_Z = 0x005A;
028: private static final int LOWER_CASE_DELTA = 0x0020;
029: private static final int FULL_STOP = 0x002E;
030:
031: public static final int DEFAULT = 0x0000;
032: public static final int ALLOW_UNASSIGNED = 0x0001;
033: public static final int USE_STD3_RULES = 0x0002;
034: public static final NamePrepTransform transform = NamePrepTransform
035: .getInstance();
036:
037: private static boolean startsWithPrefix(StringBuffer src) {
038: boolean startsWithPrefix = true;
039:
040: if (src.length() < ACE_PREFIX_LENGTH) {
041: return false;
042: }
043: for (int i = 0; i < ACE_PREFIX_LENGTH; i++) {
044: if (toASCIILower(src.charAt(i)) != ACE_PREFIX[i]) {
045: startsWithPrefix = false;
046: }
047: }
048: return startsWithPrefix;
049: }
050:
051: private static char toASCIILower(char ch) {
052: if (CAPITAL_A <= ch && ch <= CAPITAL_Z) {
053: return (char) (ch + LOWER_CASE_DELTA);
054: }
055: return ch;
056: }
057:
058: private static StringBuffer toASCIILower(StringBuffer src) {
059: StringBuffer dest = new StringBuffer();
060: for (int i = 0; i < src.length(); i++) {
061: dest.append(toASCIILower(src.charAt(i)));
062: }
063: return dest;
064: }
065:
066: private static int compareCaseInsensitiveASCII(StringBuffer s1,
067: StringBuffer s2) {
068: char c1, c2;
069: int rc;
070: for (int i = 0;/* no condition */; i++) {
071: /* If we reach the ends of both strings then they match */
072: if (i == s1.length()) {
073: return 0;
074: }
075:
076: c1 = s1.charAt(i);
077: c2 = s2.charAt(i);
078:
079: /* Case-insensitive comparison */
080: if (c1 != c2) {
081: rc = toASCIILower(c1) - toASCIILower(c2);
082: if (rc != 0) {
083: return rc;
084: }
085: }
086: }
087: }
088:
089: private static int getSeparatorIndex(char[] src, int start,
090: int limit) {
091: for (; start < limit; start++) {
092: if (NamePrepTransform.isLabelSeparator(src[start])) {
093: return start;
094: }
095: }
096: // we have not found the separator just return length
097: return start;
098: }
099:
100: private static boolean isLDHChar(int ch) {
101: // high runner case
102: if (ch > 0x007A) {
103: return false;
104: }
105: //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
106: if ((ch == 0x002D) || (0x0030 <= ch && ch <= 0x0039)
107: || (0x0041 <= ch && ch <= 0x005A)
108: || (0x0061 <= ch && ch <= 0x007A)) {
109: return true;
110: }
111: return false;
112: }
113:
114: public static StringBuffer convertToASCII(String src, int options)
115: throws StringPrepParseException {
116: UCharacterIterator iter = UCharacterIterator.getInstance(src);
117: return convertToASCII(iter, options);
118: }
119:
120: public static StringBuffer convertToASCII(StringBuffer src,
121: int options) throws StringPrepParseException {
122: UCharacterIterator iter = UCharacterIterator.getInstance(src);
123: return convertToASCII(iter, options);
124: }
125:
126: public static StringBuffer convertToASCII(
127: UCharacterIterator srcIter, int options)
128: throws StringPrepParseException {
129:
130: char[] caseFlags = null;
131:
132: // the source contains all ascii codepoints
133: boolean srcIsASCII = true;
134: // assume the source contains all LDH codepoints
135: boolean srcIsLDH = true;
136:
137: //get the options
138: boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
139:
140: int ch;
141: // step 1
142: while ((ch = srcIter.next()) != UCharacterIterator.DONE) {
143: if (ch > 0x7f) {
144: srcIsASCII = false;
145: }
146: }
147: int failPos = -1;
148: srcIter.setToStart();
149: StringBuffer processOut = null;
150: // step 2 is performed only if the source contains non ASCII
151: if (!srcIsASCII) {
152: // step 2
153: processOut = transform.prepare(srcIter, options);
154: } else {
155: processOut = new StringBuffer(srcIter.getText());
156: }
157: int poLen = processOut.length();
158: if (poLen == 0) {
159: throw new StringPrepParseException(
160: "Found zero length lable after NamePrep.",
161: StringPrepParseException.ZERO_LENGTH_LABEL);
162: }
163: StringBuffer dest = new StringBuffer();
164:
165: // reset the variable to verify if output of prepare is ASCII or not
166: srcIsASCII = true;
167:
168: // step 3 & 4
169: for (int j = 0; j < poLen; j++) {
170: ch = processOut.charAt(j);
171: if (ch > 0x7F) {
172: srcIsASCII = false;
173: } else if (isLDHChar(ch) == false) {
174: // here we do not assemble surrogates
175: // since we know that LDH code points
176: // are in the ASCII range only
177: srcIsLDH = false;
178: failPos = j;
179: }
180: }
181:
182: if (useSTD3ASCIIRules == true) {
183: // verify 3a and 3b
184: if (srcIsLDH == false /* source contains some non-LDH characters */
185: || processOut.charAt(0) == HYPHEN
186: || processOut.charAt(processOut.length() - 1) == HYPHEN) {
187:
188: /* populate the parseError struct */
189: if (srcIsLDH == false) {
190: throw new StringPrepParseException(
191: "The input does not conform to the STD 3 ASCII rules",
192: StringPrepParseException.STD3_ASCII_RULES_ERROR,
193: processOut.toString(),
194: (failPos > 0) ? (failPos - 1) : failPos);
195: } else if (processOut.charAt(0) == HYPHEN) {
196: throw new StringPrepParseException(
197: "The input does not conform to the STD 3 ASCII rules",
198: StringPrepParseException.STD3_ASCII_RULES_ERROR,
199: processOut.toString(), 0);
200:
201: } else {
202: throw new StringPrepParseException(
203: "The input does not conform to the STD 3 ASCII rules",
204: StringPrepParseException.STD3_ASCII_RULES_ERROR,
205: processOut.toString(),
206: (poLen > 0) ? poLen - 1 : poLen);
207:
208: }
209: }
210: }
211: if (srcIsASCII) {
212: dest = processOut;
213: } else {
214: // step 5 : verify the sequence does not begin with ACE prefix
215: if (!startsWithPrefix(processOut)) {
216:
217: //step 6: encode the sequence with punycode
218: StringBuffer punyout = PunycodeReference.encode(
219: processOut, caseFlags);
220:
221: // convert all codepoints to lower case ASCII
222: StringBuffer lowerOut = toASCIILower(punyout);
223:
224: //Step 7: prepend the ACE prefix
225: dest.append(ACE_PREFIX, 0, ACE_PREFIX_LENGTH);
226: //Step 6: copy the contents in b2 into dest
227: dest.append(lowerOut);
228: } else {
229: throw new StringPrepParseException(
230: "The input does not start with the ACE Prefix.",
231: StringPrepParseException.ACE_PREFIX_ERROR,
232: processOut.toString(), 0);
233: }
234: }
235: if (dest.length() > MAX_LABEL_LENGTH) {
236: throw new StringPrepParseException(
237: "The labels in the input are too long. Length > 64.",
238: StringPrepParseException.LABEL_TOO_LONG_ERROR, dest
239: .toString(), 0);
240: }
241: return dest;
242: }
243:
244: public static StringBuffer convertIDNtoASCII(
245: UCharacterIterator iter, int options)
246: throws StringPrepParseException {
247: return convertIDNToASCII(iter.getText(), options);
248: }
249:
250: public static StringBuffer convertIDNtoASCII(StringBuffer str,
251: int options) throws StringPrepParseException {
252: return convertIDNToASCII(str.toString(), options);
253: }
254:
255: public static StringBuffer convertIDNToASCII(String src, int options)
256: throws StringPrepParseException {
257: char[] srcArr = src.toCharArray();
258: StringBuffer result = new StringBuffer();
259: int sepIndex = 0;
260: int oldSepIndex = 0;
261: for (;;) {
262: sepIndex = getSeparatorIndex(srcArr, sepIndex,
263: srcArr.length);
264: String label = new String(srcArr, oldSepIndex, sepIndex
265: - oldSepIndex);
266: //make sure this is not a root label separator.
267: if (!(label.length() == 0 && sepIndex == srcArr.length)) {
268: UCharacterIterator iter = UCharacterIterator
269: .getInstance(label);
270: result.append(convertToASCII(iter, options));
271: }
272: if (sepIndex == srcArr.length) {
273: break;
274: }
275: // increment the sepIndex to skip past the separator
276: sepIndex++;
277: oldSepIndex = sepIndex;
278: result.append((char) FULL_STOP);
279: }
280: return result;
281: }
282:
283: public static StringBuffer convertToUnicode(String src, int options)
284: throws StringPrepParseException {
285: UCharacterIterator iter = UCharacterIterator.getInstance(src);
286: return convertToUnicode(iter, options);
287: }
288:
289: public static StringBuffer convertToUnicode(StringBuffer src,
290: int options) throws StringPrepParseException {
291: UCharacterIterator iter = UCharacterIterator.getInstance(src);
292: return convertToUnicode(iter, options);
293: }
294:
295: public static StringBuffer convertToUnicode(
296: UCharacterIterator iter, int options)
297: throws StringPrepParseException {
298:
299: char[] caseFlags = null;
300:
301: //get the options
302: boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
303:
304: // the source contains all ascii codepoints
305: boolean srcIsASCII = true;
306: // assume the source contains all LDH codepoints
307: boolean srcIsLDH = true;
308:
309: int failPos = -1;
310: int ch;
311: int saveIndex = iter.getIndex();
312: // step 1: find out if all the codepoints in src are ASCII
313: while ((ch = iter.next()) != UCharacterIterator.DONE) {
314: if (ch > 0x7F) {
315: srcIsASCII = false;
316: } else if (isLDHChar(ch) == false) {
317: failPos = iter.getIndex();
318: srcIsLDH = false;
319: }
320: }
321: StringBuffer processOut;
322:
323: if (srcIsASCII == false) {
324: // step 2: process the string
325: iter.setIndex(saveIndex);
326: processOut = transform.prepare(iter, options);
327:
328: } else {
329: //just point to source
330: processOut = new StringBuffer(iter.getText());
331: }
332: // TODO:
333: // The RFC states that
334: // <quote>
335: // ToUnicode never fails. If any step fails, then the original input
336: // is returned immediately in that step.
337: // </quote>
338:
339: //step 3: verify ACE Prefix
340: if (startsWithPrefix(processOut)) {
341:
342: //step 4: Remove the ACE Prefix
343: String temp = processOut.substring(ACE_PREFIX_LENGTH,
344: processOut.length());
345:
346: //step 5: Decode using punycode
347: StringBuffer decodeOut = PunycodeReference.decode(
348: new StringBuffer(temp), caseFlags);
349:
350: //step 6:Apply toASCII
351: StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
352:
353: //step 7: verify
354: if (compareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) {
355: throw new StringPrepParseException(
356: "The verification step prescribed by the RFC 3491 failed",
357: StringPrepParseException.VERIFICATION_ERROR);
358: }
359:
360: //step 8: return output of step 5
361: return decodeOut;
362:
363: } else {
364: // verify that STD3 ASCII rules are satisfied
365: if (useSTD3ASCIIRules == true) {
366: if (srcIsLDH == false /* source contains some non-LDH characters */
367: || processOut.charAt(0) == HYPHEN
368: || processOut.charAt(processOut.length() - 1) == HYPHEN) {
369:
370: if (srcIsLDH == false) {
371: throw new StringPrepParseException(
372: "The input does not conform to the STD 3 ASCII rules",
373: StringPrepParseException.STD3_ASCII_RULES_ERROR,
374: processOut.toString(),
375: (failPos > 0) ? (failPos - 1) : failPos);
376: } else if (processOut.charAt(0) == HYPHEN) {
377: throw new StringPrepParseException(
378: "The input does not conform to the STD 3 ASCII rules",
379: StringPrepParseException.STD3_ASCII_RULES_ERROR,
380: processOut.toString(), 0);
381:
382: } else {
383: throw new StringPrepParseException(
384: "The input does not conform to the STD 3 ASCII rules",
385: StringPrepParseException.STD3_ASCII_RULES_ERROR,
386: processOut.toString(), processOut
387: .length());
388:
389: }
390: }
391: }
392: // just return the source
393: return new StringBuffer(iter.getText());
394: }
395: }
396:
397: public static StringBuffer convertIDNToUnicode(
398: UCharacterIterator iter, int options)
399: throws StringPrepParseException {
400: return convertIDNToUnicode(iter.getText(), options);
401: }
402:
403: public static StringBuffer convertIDNToUnicode(StringBuffer str,
404: int options) throws StringPrepParseException {
405: return convertIDNToUnicode(str.toString(), options);
406: }
407:
408: public static StringBuffer convertIDNToUnicode(String src,
409: int options) throws StringPrepParseException {
410:
411: char[] srcArr = src.toCharArray();
412: StringBuffer result = new StringBuffer();
413: int sepIndex = 0;
414: int oldSepIndex = 0;
415: for (;;) {
416: sepIndex = getSeparatorIndex(srcArr, sepIndex,
417: srcArr.length);
418: String label = new String(srcArr, oldSepIndex, sepIndex
419: - oldSepIndex);
420: if (label.length() == 0 && sepIndex != srcArr.length) {
421: throw new StringPrepParseException(
422: "Found zero length lable after NamePrep.",
423: StringPrepParseException.ZERO_LENGTH_LABEL);
424: }
425: UCharacterIterator iter = UCharacterIterator
426: .getInstance(label);
427: result.append(convertToUnicode(iter, options));
428: if (sepIndex == srcArr.length) {
429: break;
430: }
431: // increment the sepIndex to skip past the separator
432: sepIndex++;
433: oldSepIndex = sepIndex;
434: result.append((char) FULL_STOP);
435: }
436: return result;
437: }
438:
439: // TODO: optimize
440: public static int compare(StringBuffer s1, StringBuffer s2,
441: int options) throws StringPrepParseException {
442: if (s1 == null || s2 == null) {
443: throw new IllegalArgumentException(
444: "One of the source buffers is null");
445: }
446: StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);
447: StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
448: return compareCaseInsensitiveASCII(s1Out, s2Out);
449: }
450:
451: // TODO: optimize
452: public static int compare(String s1, String s2, int options)
453: throws StringPrepParseException {
454: if (s1 == null || s2 == null) {
455: throw new IllegalArgumentException(
456: "One of the source buffers is null");
457: }
458: StringBuffer s1Out = convertIDNToASCII(s1, options);
459: StringBuffer s2Out = convertIDNToASCII(s2, options);
460: return compareCaseInsensitiveASCII(s1Out, s2Out);
461: }
462:
463: // TODO: optimize
464: public static int compare(UCharacterIterator i1,
465: UCharacterIterator i2, int options)
466: throws StringPrepParseException {
467: if (i1 == null || i2 == null) {
468: throw new IllegalArgumentException(
469: "One of the source buffers is null");
470: }
471: StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
472: StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
473: return compareCaseInsensitiveASCII(s1Out, s2Out);
474: }
475:
476: }
|