001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package java.util.regex;
019:
020: /**
021: * Represents canonical decomposition of
022: * Hangul syllable. Is used when
023: * CANON_EQ flag of Pattern class
024: * is specified.
025: */
026: class HangulDecomposedCharSet extends JointSet {
027:
028: /**
029: * Decomposed Hangul syllable.
030: */
031: private char[] decomposedChar;
032:
033: /**
034: * String representing syllable
035: */
036: private String decomposedCharUTF16 = null;
037:
038: /**
039: * Length of useful part of decomposedChar
040: * decomposedCharLength <= decomposedChar.length
041: */
042: private int decomposedCharLength;
043:
044: public HangulDecomposedCharSet(char[] decomposedChar,
045: int decomposedCharLength) {
046: this .decomposedChar = decomposedChar;
047: this .decomposedCharLength = decomposedCharLength;
048: }
049:
050: /**
051: * Returns the next.
052: */
053: public AbstractSet getNext() {
054: return this .next;
055: }
056:
057: /**
058: * Sets next abstract set.
059: * @param next
060: * The next to set.
061: */
062: public void setNext(AbstractSet next) {
063: this .next = next;
064: }
065:
066: /**
067: * Give string representation of this.
068: *
069: * @return - string representation.
070: */
071: private String getDecomposedChar() {
072: return (decomposedCharUTF16 == null) ? (decomposedCharUTF16 = new String(
073: decomposedChar))
074: : decomposedCharUTF16;
075: }
076:
077: protected String getName() {
078: return "decomposed Hangul syllable:" + getDecomposedChar(); //$NON-NLS-1$
079: }
080:
081: public int matches(int strIndex, CharSequence testString,
082: MatchResultImpl matchResult) {
083:
084: /*
085: * All decompositions for Hangul syllables have length that
086: * is less or equal Lexer.MAX_DECOMPOSITION_LENGTH
087: */
088: int rightBound = matchResult.getRightBound();
089: int SyllIndex = 0;
090: int[] decompSyllable = new int[Lexer.MAX_HANGUL_DECOMPOSITION_LENGTH];
091: int[] decompCurSymb;
092: char curSymb;
093:
094: /*
095: * For details about Hangul composition and decomposition see
096: * http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf
097: * "3.12 Conjoining Jamo Behavior"
098: */
099: int LIndex = -1;
100: int VIndex = -1;
101: int TIndex = -1;
102:
103: if (strIndex >= rightBound) {
104: return -1;
105: }
106: curSymb = testString.charAt(strIndex++);
107: decompCurSymb = Lexer.getHangulDecomposition(curSymb);
108:
109: if (decompCurSymb == null) {
110:
111: /*
112: * We deal with ordinary letter or sequence of jamos
113: * at strIndex at testString.
114: */
115: decompSyllable[SyllIndex++] = curSymb;
116: LIndex = curSymb - Lexer.LBase;
117:
118: if ((LIndex < 0) || (LIndex >= Lexer.LCount)) {
119:
120: /*
121: * Ordinary letter, that doesn't match this
122: */
123: return -1;
124: }
125:
126: if (strIndex < rightBound) {
127: curSymb = testString.charAt(strIndex);
128: VIndex = curSymb - Lexer.VBase;
129: }
130:
131: if ((VIndex < 0) || (VIndex >= Lexer.VCount)) {
132:
133: /*
134: * Single L jamo doesn't compose Hangul syllable,
135: * so doesn't match
136: */
137: return -1;
138: }
139: strIndex++;
140: decompSyllable[SyllIndex++] = curSymb;
141:
142: if (strIndex < rightBound) {
143: curSymb = testString.charAt(strIndex);
144: TIndex = curSymb - Lexer.TBase;
145: }
146:
147: if ((TIndex < 0) || (TIndex >= Lexer.TCount)) {
148:
149: /*
150: * We deal with LV syllable at testString, so
151: * compare it to this
152: */
153: return ((decomposedCharLength == 2)
154: && (decompSyllable[0] == decomposedChar[0]) && (decompSyllable[1] == decomposedChar[1])) ? next
155: .matches(strIndex, testString, matchResult)
156: : -1;
157: }
158: strIndex++;
159: decompSyllable[SyllIndex++] = curSymb;
160:
161: /*
162: * We deal with LVT syllable at testString, so
163: * compare it to this
164: */
165: return ((decomposedCharLength == 3)
166: && (decompSyllable[0] == decomposedChar[0])
167: && (decompSyllable[1] == decomposedChar[1]) && (decompSyllable[2] == decomposedChar[2])) ? next
168: .matches(strIndex, testString, matchResult)
169: : -1;
170: } else {
171:
172: /*
173: * We deal with Hangul syllable at strIndex at testString.
174: * So we decomposed it to compare with this.
175: */
176: int i = 0;
177:
178: if (decompCurSymb.length != decomposedCharLength) {
179: return -1;
180: }
181:
182: for (; i < decomposedCharLength; i++) {
183: if (decompCurSymb[i] != decomposedChar[i]) {
184: return -1;
185: }
186: }
187: return next.matches(strIndex, testString, matchResult);
188: }
189: }
190:
191: public boolean first(AbstractSet set) {
192: return (set instanceof HangulDecomposedCharSet) ? ((HangulDecomposedCharSet) set)
193: .getDecomposedChar().equals(getDecomposedChar())
194: : true;
195: }
196:
197: public boolean hasConsumed(MatchResultImpl matchResult) {
198: return true;
199: }
200: }
|