001: /*
002: *******************************************************************************
003: * Copyright (C) 2003-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.test.stringprep;
008:
009: //import com.ibm.icu.impl.ICULocaleData;
010: import com.ibm.icu.impl.ICUResourceBundle;
011: import com.ibm.icu.lang.UCharacter;
012: import com.ibm.icu.lang.UCharacterDirection;
013: import com.ibm.icu.text.StringPrepParseException;
014: import com.ibm.icu.text.UCharacterIterator;
015: import com.ibm.icu.text.UnicodeSet;
016: import com.ibm.icu.text.Transliterator;
017:
018: /**
019: * @author ram
020: *
021: * To change the template for this generated type comment go to
022: * Window>Preferences>Java>Code Generation>Code and Comments
023: */
024: public class NamePrepTransform {
025:
026: private static final NamePrepTransform transform = new NamePrepTransform();
027:
028: private UnicodeSet labelSeparatorSet;
029: private UnicodeSet prohibitedSet;
030: private UnicodeSet unassignedSet;
031: private Transliterator mapTransform;
032: public static final int NONE = 0;
033: public static final int ALLOW_UNASSIGNED = 1;
034:
035: private NamePrepTransform() {
036: // load the resource bundle
037: // ICUResourceBundle bundle = (ICUResourceBundle)ICUResourceBundle.getBundleInstance("com/ibm/icu/dev/data/testdata","idna_rules", true);
038: ICUResourceBundle bundle = (ICUResourceBundle) ICUResourceBundle
039: .createBundle("com/ibm/icu/dev/data/testdata",
040: "idna_rules", NamePrepTransform.class
041: .getClassLoader());
042: String mapRules = bundle.getString("MapNoNormalization");
043: mapRules += bundle.getString("MapNFKC");
044: mapTransform = Transliterator.createFromRules("CaseMap",
045: mapRules, Transliterator.FORWARD);
046: labelSeparatorSet = new UnicodeSet(bundle
047: .getString("LabelSeparatorSet"));
048: prohibitedSet = new UnicodeSet(bundle
049: .getString("ProhibitedSet"));
050: unassignedSet = new UnicodeSet(bundle
051: .getString("UnassignedSet"));
052: }
053:
054: public static final NamePrepTransform getInstance() {
055: return transform;
056: }
057:
058: public static boolean isLabelSeparator(int ch) {
059: return transform.labelSeparatorSet.contains(ch);
060: }
061:
062: /*
063: 1) Map -- For each character in the input, check if it has a mapping
064: and, if so, replace it with its mapping.
065:
066: 2) Normalize -- Possibly normalize the result of step 1 using Unicode
067: normalization.
068:
069: 3) Prohibit -- Check for any characters that are not allowed in the
070: output. If any are found, return an error.
071:
072: 4) Check bidi -- Possibly check for right-to-left characters, and if
073: any are found, make sure that the whole string satisfies the
074: requirements for bidirectional strings. If the string does not
075: satisfy the requirements for bidirectional strings, return an
076: error.
077: [Unicode3.2] defines several bidirectional categories; each character
078: has one bidirectional category assigned to it. For the purposes of
079: the requirements below, an "RandALCat character" is a character that
080: has Unicode bidirectional categories "R" or "AL"; an "LCat character"
081: is a character that has Unicode bidirectional category "L". Note
082:
083:
084: that there are many characters which fall in neither of the above
085: definitions; Latin digits (<U+0030> through <U+0039>) are examples of
086: this because they have bidirectional category "EN".
087:
088: In any profile that specifies bidirectional character handling, all
089: three of the following requirements MUST be met:
090:
091: 1) The characters in section 5.8 MUST be prohibited.
092:
093: 2) If a string contains any RandALCat character, the string MUST NOT
094: contain any LCat character.
095:
096: 3) If a string contains any RandALCat character, a RandALCat
097: character MUST be the first character of the string, and a
098: RandALCat character MUST be the last character of the string.
099: */
100: public StringBuffer prepare(UCharacterIterator src, int options)
101: throws StringPrepParseException {
102: return prepare(src.getText(), options);
103: }
104:
105: private String map(String src, int options)
106: throws StringPrepParseException {
107: // map
108: boolean allowUnassigned = ((options & ALLOW_UNASSIGNED) > 0);
109: String caseMapOut = transform.mapTransform.transliterate(src);
110: UCharacterIterator iter = UCharacterIterator
111: .getInstance(caseMapOut);
112: int ch;
113: while ((ch = iter.nextCodePoint()) != UCharacterIterator.DONE) {
114: if (transform.unassignedSet.contains(ch) == true
115: && allowUnassigned == false) {
116: throw new StringPrepParseException(
117: "An unassigned code point was found in the input",
118: StringPrepParseException.UNASSIGNED_ERROR);
119: }
120: }
121: return caseMapOut;
122: }
123:
124: public StringBuffer prepare(String src, int options)
125: throws StringPrepParseException {
126:
127: int ch;
128: String mapOut = map(src, options);
129: UCharacterIterator iter = UCharacterIterator
130: .getInstance(mapOut);
131:
132: int direction = UCharacterDirection.CHAR_DIRECTION_COUNT, firstCharDir = UCharacterDirection.CHAR_DIRECTION_COUNT;
133: int rtlPos = -1, ltrPos = -1;
134: boolean rightToLeft = false, leftToRight = false;
135:
136: while ((ch = iter.nextCodePoint()) != UCharacterIterator.DONE) {
137:
138: if (transform.prohibitedSet.contains(ch) == true
139: && ch != 0x0020) {
140: throw new StringPrepParseException(
141: "A prohibited code point was found in the input",
142: StringPrepParseException.PROHIBITED_ERROR, iter
143: .getText(), iter.getIndex());
144: }
145:
146: direction = UCharacter.getDirection(ch);
147: if (firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT) {
148: firstCharDir = direction;
149: }
150: if (direction == UCharacterDirection.LEFT_TO_RIGHT) {
151: leftToRight = true;
152: ltrPos = iter.getIndex() - 1;
153: }
154: if (direction == UCharacterDirection.RIGHT_TO_LEFT
155: || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) {
156: rightToLeft = true;
157: rtlPos = iter.getIndex() - 1;
158: }
159: }
160:
161: // satisfy 2
162: if (leftToRight == true && rightToLeft == true) {
163: throw new StringPrepParseException(
164: "The input does not conform to the rules for BiDi code points.",
165: StringPrepParseException.CHECK_BIDI_ERROR, iter
166: .getText(), (rtlPos > ltrPos) ? rtlPos
167: : ltrPos);
168: }
169:
170: //satisfy 3
171: if (rightToLeft == true
172: && !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) && (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))) {
173: throw new StringPrepParseException(
174: "The input does not conform to the rules for BiDi code points.",
175: StringPrepParseException.CHECK_BIDI_ERROR, iter
176: .getText(), (rtlPos > ltrPos) ? rtlPos
177: : ltrPos);
178: }
179:
180: return new StringBuffer(mapOut);
181:
182: }
183:
184: }
|