001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: /*
019: *
020: * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
021: *
022: * COPYRIGHT AND PERMISSION NOTICE
023: *
024: * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
025: * the Terms of Use in http://www.unicode.org/copyright.html. Permission is
026: * hereby granted, free of charge, to any person obtaining a copy of the
027: * Unicode data files and any associated documentation (the "Data Files")
028: * or Unicode software and any associated documentation (the "Software")
029: * to deal in the Data Files or Software without restriction, including without
030: * limitation the rights to use, copy, modify, merge, publish, distribute,
031: * and/or sell copies of the Data Files or Software, and to permit persons
032: * to whom the Data Files or Software are furnished to do so, provided that
033: * (a) the above copyright notice(s) and this permission notice appear with
034: * all copies of the Data Files or Software, (b) both the above copyright
035: * notice(s) and this permission notice appear in associated documentation,
036: * and (c) there is clear notice in each modified Data File or in the Software
037: * as well as in the documentation associated with the Data File(s) or Software
038: * that the data or software has been modified.
039:
040: * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
041: * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
042: * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
043: * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
044: * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
045: * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
046: * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
047: * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
048: * PERFORMANCE OF THE DATA FILES OR SOFTWARE.
049: *
050: * Except as contained in this notice, the name of a copyright holder shall
051: * not be used in advertising or otherwise to promote the sale, use or other
052: * dealings in these Data Files or Software without prior written
053: * authorization of the copyright holder.
054: *
055: * 2. Additional terms from the Database:
056: *
057: * Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
058: *
059: * Disclaimer
060: *
061: * The Unicode Character Database is provided as is by Unicode, Inc.
062: * No claims are made as to fitness for any particular purpose. No warranties
063: * of any kind are expressed or implied. The recipient agrees to determine
064: * applicability of information provided. If this file has been purchased
065: * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
066: * will be exchange of defective media within 90 days of receipt. This disclaimer
067: * is applicable for all other data files accompanying the Unicode Character Database,
068: * some of which have been compiled by the Unicode Consortium, and some of which
069: * have been supplied by other sources.
070: *
071: * Limitations on Rights to Redistribute This Data
072: *
073: * Recipient is granted the right to make copies in any form for internal
074: * distribution and to freely use the information supplied in the creation of
075: * products supporting the UnicodeTM Standard. The files in
076: * the Unicode Character Database can be redistributed to third parties or other
077: * organizations (whether for profit or not) as long as this notice and the disclaimer
078: * notice are retained. Information can be extracted from these files and used
079: * in documentation or programs, as long as there is an accompanying notice
080: * indicating the source.
081: */
082:
083: package java.util.regex;
084:
085: /**
086: * This class represents high surrogate character.
087: */
088: class HighSurrogateCharSet extends JointSet {
089:
090: /*
091: * Note that we can use high and low surrogate characters
092: * that don't combine into supplementary code point.
093: * See http://www.unicode.org/reports/tr18/#Supplementary_Characters
094: */
095:
096: private char high;
097:
098: public HighSurrogateCharSet(char high) {
099: this .high = high;
100: }
101:
102: /**
103: * Returns the next.
104: */
105: public AbstractSet getNext() {
106: return this .next;
107: }
108:
109: /**
110: * Sets next abstract set.
111: * @param next
112: * The next to set.
113: */
114: public void setNext(AbstractSet next) {
115: this .next = next;
116: }
117:
118: public int matches(int stringIndex, CharSequence testString,
119: MatchResultImpl matchResult) {
120: int strLength = matchResult.getRightBound();
121:
122: if (stringIndex + 1 > strLength) {
123: matchResult.hitEnd = true;
124: return -1;
125: }
126:
127: char high = testString.charAt(stringIndex);
128:
129: if (stringIndex + 1 < strLength) {
130: char low = testString.charAt(stringIndex + 1);
131:
132: /*
133: * we consider high surrogate followed by
134: * low surrogate as a codepoint
135: */
136: if (Character.isLowSurrogate(low)) {
137: return -1;
138: }
139: }
140:
141: if (this .high == high) {
142: return next.matches(stringIndex + 1, testString,
143: matchResult);
144: }
145:
146: return -1;
147: }
148:
149: public int find(int strIndex, CharSequence testString,
150: MatchResultImpl matchResult) {
151: if (testString instanceof String) {
152: String testStr = (String) testString;
153: int strLength = matchResult.getRightBound();
154:
155: while (strIndex < strLength) {
156:
157: strIndex = testStr.indexOf(high, strIndex);
158: if (strIndex < 0)
159: return -1;
160:
161: if (strIndex + 1 < strLength) {
162:
163: /*
164: * we consider high surrogate followed by
165: * low surrogate as a codepoint
166: */
167: if (Character.isLowSurrogate(testStr
168: .charAt(strIndex + 1))) {
169: strIndex += 2;
170: continue;
171: }
172: }
173:
174: if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
175: return strIndex;
176: }
177: strIndex++;
178: }
179:
180: return -1;
181: }
182:
183: return super .find(strIndex, testString, matchResult);
184: }
185:
186: public int findBack(int strIndex, int lastIndex,
187: CharSequence testString, MatchResultImpl matchResult) {
188: if (testString instanceof String) {
189: String testStr = (String) testString;
190: int strLength = matchResult.getRightBound();
191:
192: while (lastIndex >= strIndex) {
193: lastIndex = testStr.lastIndexOf(high, lastIndex);
194: if (lastIndex < 0 || lastIndex < strIndex) {
195: return -1;
196: }
197:
198: if (lastIndex + 1 < strLength) {
199:
200: /*
201: * we consider high surrogate followed by
202: * low surrogate as a codepoint
203: */
204: if (Character.isLowSurrogate(testStr
205: .charAt(lastIndex + 1))) {
206: lastIndex--;
207: continue;
208: }
209: }
210:
211: if (next
212: .matches(lastIndex + 1, testString, matchResult) >= 0) {
213: return lastIndex;
214: }
215:
216: lastIndex--;
217: }
218:
219: return -1;
220: }
221:
222: return super .findBack(strIndex, lastIndex, testString,
223: matchResult);
224: }
225:
226: protected String getName() {
227: return "" + high;
228: }
229:
230: protected int getChar() {
231: return high;
232: }
233:
234: public boolean first(AbstractSet set) {
235: if (set instanceof CharSet) {
236: return false;
237: } else if (set instanceof RangeSet) {
238: return false;
239: } else if (set instanceof SupplRangeSet) {
240: return false;
241: } else if (set instanceof SupplCharSet) {
242: return false;
243: } else if (set instanceof LowSurrogateCharSet) {
244: return false;
245: } else if (set instanceof HighSurrogateCharSet) {
246: return ((HighSurrogateCharSet) set).high == this .high;
247: }
248:
249: return true;
250: }
251:
252: public boolean hasConsumed(MatchResultImpl matchResult) {
253: return true;
254: }
255: }
|