001: /*
002: * (c) Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * See end of file.
004: */
005:
006: package com.hp.hpl.jena.rdf.arp.impl;
007:
008: import com.ibm.icu.lang.UCharacter;
009: import com.ibm.icu.text.Normalizer;
010:
011: /**
012: * Some support for the Character Model Recommendation
013: * from the W3C (currently in second last call working
014: * draft).
015: *
016: * @author Jeremy Carroll
017: *
018: *
019: */
020: public class CharacterModel {
021: static private final boolean SWITCH_OFF = false;
022:
023: /** Is this string in Unicode Normal Form C.
024: * @param str The string to be tested.
025: */
026: static public boolean isNormalFormC(String str) {
027: // try {
028: return SWITCH_OFF
029: || Normalizer.isNormalized(str, Normalizer.NFC, 0);
030: // }
031: // catch (ArrayIndexOutOfBoundsException e) {
032: // // false below means "NFC" see javadoc for compose().
033: // String normalized = Normalizer.compose(str,false);
034: // return normalized.equals(str);
035: // }
036: }
037:
038: /* Does this string start with a composing character as defined
039: * by the
040: * <a href="http://www.w3.org/TR/charmod">
041: * Character Model 2nd Last Call Working Draft</a>.
042: * @param str The string to be tested.
043: */
044: static public boolean startsWithComposingCharacter(String str) {
045: return SWITCH_OFF ? false : (str.length() == 0 ? false
046: : isComposingChar(str.charAt(0)));
047: }
048:
049: /** Is this string fully normalized as defined
050: * by the
051: * <a href="http://www.w3.org/TR/charmod">
052: * Character Model 2nd Last Call Working Draft</a>.
053: * @param str The string to be tested.
054: */
055: static public boolean isFullyNormalizedConstruct(String str) {
056: return SWITCH_OFF
057: || (isNormalFormC(str) && !startsWithComposingCharacter(str));
058: }
059:
060: /** Is the character a composing character as defined
061: * by the
062: * <a href="http://www.w3.org/TR/charmod">
063: * Character Model 2nd Last Call Working Draft</a>.
064: * @param x The character to be tested.
065: */
066: static public boolean isComposingChar(char x) {
067: if (SWITCH_OFF)
068: return false;
069: switch (x) {
070: // Brahmi-derived scripts
071: case 0X09BE: // BENGALI VOWEL SIGN AA
072: case 0X09D7: // BENGALI AU LENGTH MARK
073: case 0X0B3E: // ORIYA VOWEL SIGN AA
074: case 0X0B56: // ORIYA AI LENGTH MARK
075: case 0X0B57: // ORIYA AU LENGTH MARK
076: case 0X0BBE: // TAMIL VOWEL SIGN AA
077: case 0X0BD7: // TAMIL AU LENGTH MARK
078: case 0X0CC2: // KANNADA VOWEL SIGN UU
079: case 0X0CD5: // KANNADA LENGTH MARK
080: case 0X0CD6: // KANNADA AI LENGTH MARK
081: case 0X0D3E: // MALAYALAM VOWEL SIGN AA
082: case 0X0D57: // MALAYALAM AU LENGTH MARK
083: case 0X0DCF: // SINHALA VOWEL SIGN AELA-PILLA
084: case 0X0DDF: // SINHALA VOWEL SING GAYANUKITTA
085: case 0X0FB5: // TIBETAN SUBJOINED LETTER SSA
086: case 0X0FB7: // TIBETAN SUBJOINED LETTER HA
087: case 0X102E: // MYANMAR VOWEL SIGN II
088: // Hangul vowels
089: case 0X1161: // HANGUL JUNGSEONG A
090: case 0X1162: // HANGUL JUNGSEONG AE
091: case 0X1163: // HANGUL JUNGSEONG YA
092: case 0X1164: // HANGUL JUNGSEONG YAE
093: case 0X1165: // HANGUL JUNGSEONG EO
094: case 0X1166: // HANGUL JUNGSEONG E
095: case 0X1167: // HANGUL JUNGSEONG YEO
096: case 0X1168: // HANGUL JUNGSEONG YE
097: case 0X1169: // HANGUL JUNGSEONG O
098: case 0X116A: // HANGUL JUNGSEONG WA
099: case 0X116B: // HANGUL JUNGSEONG WAE
100: case 0X116C: // HANGUL JUNGSEONG OE
101: case 0X116D: // HANGUL JUNGSEONG YO
102: case 0X116E: // HANGUL JUNGSEONG U
103: case 0X116F: // HANGUL JUNGSEONG WEO
104: case 0X1170: // HANGUL JUNGSEONG WE
105: case 0X1171: // HANGUL JUNGSEONG WI
106: case 0X1172: // HANGUL JUNGSEONG YU
107: case 0X1173: // HANGUL JUNGSEONG EU
108: case 0X1174: // HANGUL JUNGSEONG YI
109: case 0X1175: // HANGUL JUNGSEONG I
110: // Hangul trailing consonants
111: case 0X11A8: // HANGUL JONGSEONG KIYEOK
112: case 0X11A9: // HANGUL JONGSEONG SSANGKIYEOK
113: case 0X11AA: // HANGUL JONGSEONG KIYEOK-SIOS
114: case 0X11AB: // HANGUL JONGSEONG NIEUN
115: case 0X11AC: // HANGUL JONGSEONG NIEUN-CIEUC
116: case 0X11AD: // HANGUL JONGSEONG NIEUN-HIEUH
117: case 0X11AE: // HANGUL JONGSEONG TIKEUT
118: case 0X11AF: // HANGUL JONGSEONG RIEUL
119: case 0X11B0: // HANGUL JONGSEONG RIEUL-KIYEOK
120: case 0X11B1: // HANGUL JONGSEONG RIEUL-MIEUM
121: case 0X11B2: // HANGUL JONGSEONG RIEUL-PIEUP
122: case 0X11B3: // HANGUL JONGSEONG RIEUL-SIOS
123: case 0X11B4: // HANGUL JONGSEONG RIEUL-THIEUTH
124: case 0X11B5: // HANGUL JONGSEONG RIEUL-PHIEUPH
125: case 0X11B6: // HANGUL JONGSEONG RIEUL-HIEUH
126: case 0X11B7: // HANGUL JONGSEONG MIEUM
127: case 0X11B8: // HANGUL JONGSEONG PIEUP
128: case 0X11B9: // HANGUL JONGSEONG PIEUP-SIOS
129: case 0X11BA: // HANGUL JONGSEONG SIOS
130: case 0X11BB: // HANGUL JONGSEONG SSANGSIOS
131: case 0X11BC: // HANGUL JONGSEONG IEUNG
132: case 0X11BD: // HANGUL JONGSEONG CIEUC
133: case 0X11BE: // HANGUL JONGSEONG CHIEUCH
134: case 0X11BF: // HANGUL JONGSEONG KHIEUKH
135: case 0X11C0: // HANGUL JONGSEONG THIEUTH
136: case 0X11C1: // HANGUL JONGSEONG PHIEUPH
137: case 0X11C2: // HANGUL JONGSEONG HIEUH
138: return true;
139: default:
140: return UCharacter.getCombiningClass(x) != 0;
141: }
142: }
143: /*
144: static public void main(String args[]) {
145: int ch = Integer.parseInt(args[0],16);
146: System.out.println(UCharacter.getCombiningClass(ch));
147: }
148: */
149: }
150:
151: /*
152: * (c) Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
153: * All rights reserved.
154: *
155: * Redistribution and use in source and binary forms, with or without
156: * modification, are permitted provided that the following conditions
157: * are met:
158: * 1. Redistributions of source code must retain the above copyright
159: * notice, this list of conditions and the following disclaimer.
160: * 2. Redistributions in binary form must reproduce the above copyright
161: * notice, this list of conditions and the following disclaimer in the
162: * documentation and/or other materials provided with the distribution.
163: * 3. The name of the author may not be used to endorse or promote products
164: * derived from this software without specific prior written permission.
165:
166: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
167: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
168: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
169: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
170: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
171: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
172: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
173: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
174: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
175: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
176: */
|