001: package net.sf.saxon.charcode;
002:
003: /**
004: * This class defines properties of the CP1251 Cyrillic character set,
005: * as defined at <a href="http://www.microsoft.com/globaldev/reference/sbcs/1251.htm">http://www.microsoft.com/globaldev/reference/sbcs/1251.htm</a>.
006: */
007:
008: public class CP1251CharacterSet implements CharacterSet {
009:
010: public static CP1251CharacterSet theInstance = new CP1251CharacterSet();
011:
012: private CP1251CharacterSet() {
013: }
014:
015: public static CP1251CharacterSet getInstance() {
016: return theInstance;
017: }
018:
019: public final boolean inCharset(int c) {
020: return (c <= 0x7f) || (c >= 0x0401 && c <= 0x044F)
021: || (c >= 0x0451 && c <= 0x045f) || (c == 0x0490)
022: || (c == 0x0491) || (c == 0x2013) || (c == 0x2014)
023: || (c == 0x2018) || (c == 0x2019) || (c == 0x201A)
024: || (c == 0x201C) || (c == 0x201D) || (c == 0x201E)
025: || (c == 0x2020) || (c == 0x2021) || (c == 0x2022)
026: || (c == 0x2026) || (c == 0x2030) || (c == 0x2039)
027: || (c == 0x203A) || (c == 0x20AC) || (c == 0x2116)
028: || (c == 0x2122);
029: }
030:
031: }
032:
033: // 80 = U+0402 : CYRILLIC CAPITAL LETTER DJE
034: // 81 = U+0403 : CYRILLIC CAPITAL LETTER GJE
035: // 82 = U+201A : SINGLE LOW-9 QUOTATION MARK
036: // 83 = U+0453 : CYRILLIC SMALL LETTER GJE
037: // 84 = U+201E : DOUBLE LOW-9 QUOTATION MARK
038: // 85 = U+2026 : HORIZONTAL ELLIPSIS
039: // 86 = U+2020 : DAGGER
040: // 87 = U+2021 : DOUBLE DAGGER
041: // 88 = U+20AC : EURO SIGN
042: // 89 = U+2030 : PER MILLE SIGN
043: // 8A = U+0409 : CYRILLIC CAPITAL LETTER LJE
044: // 8B = U+2039 : SINGLE LEFT-POINTING ANGLE QUOTATION MARK
045: // 8C = U+040A : CYRILLIC CAPITAL LETTER NJE
046: // 8D = U+040C : CYRILLIC CAPITAL LETTER KJE
047: // 8E = U+040B : CYRILLIC CAPITAL LETTER TSHE
048: // 8F = U+040F : CYRILLIC CAPITAL LETTER DZHE
049: // 90 = U+0452 : CYRILLIC SMALL LETTER DJE
050: // 91 = U+2018 : LEFT SINGLE QUOTATION MARK
051: // 92 = U+2019 : RIGHT SINGLE QUOTATION MARK
052: // 93 = U+201C : LEFT DOUBLE QUOTATION MARK
053: // 94 = U+201D : RIGHT DOUBLE QUOTATION MARK
054: // 95 = U+2022 : BULLET
055: // 96 = U+2013 : EN DASH
056: // 97 = U+2014 : EM DASH
057: // 99 = U+2122 : TRADE MARK SIGN
058: // 9A = U+0459 : CYRILLIC SMALL LETTER LJE
059: // 9B = U+203A : SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
060: // 9C = U+045A : CYRILLIC SMALL LETTER NJE
061: // 9D = U+045C : CYRILLIC SMALL LETTER KJE
062: // 9E = U+045B : CYRILLIC SMALL LETTER TSHE
063: // 9F = U+045F : CYRILLIC SMALL LETTER DZHE
064: // A0 = U+00A0 : NO-BREAK SPACE
065: // A1 = U+040E : CYRILLIC CAPITAL LETTER SHORT U
066: // A2 = U+045E : CYRILLIC SMALL LETTER SHORT U
067: // A3 = U+0408 : CYRILLIC CAPITAL LETTER JE
068: // A4 = U+00A4 : CURRENCY SIGN
069: // A5 = U+0490 : CYRILLIC CAPITAL LETTER GHE WITH UPTURN
070: // A6 = U+00A6 : BROKEN BAR
071: // A7 = U+00A7 : SECTION SIGN
072: // A8 = U+0401 : CYRILLIC CAPITAL LETTER IO
073: // A9 = U+00A9 : COPYRIGHT SIGN
074: // AA = U+0404 : CYRILLIC CAPITAL LETTER UKRAINIAN IE
075: // AB = U+00AB : LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
076: // AC = U+00AC : NOT SIGN
077: // AD = U+00AD : SOFT HYPHEN
078: // AE = U+00AE : REGISTERED SIGN
079: // AF = U+0407 : CYRILLIC CAPITAL LETTER YI
080: // B0 = U+00B0 : DEGREE SIGN
081: // B1 = U+00B1 : PLUS-MINUS SIGN
082: // B2 = U+0406 : CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
083: // B3 = U+0456 : CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
084: // B4 = U+0491 : CYRILLIC SMALL LETTER GHE WITH UPTURN
085: // B5 = U+00B5 : MICRO SIGN
086: // B6 = U+00B6 : PILCROW SIGN
087: // B7 = U+00B7 : MIDDLE DOT
088: // B8 = U+0451 : CYRILLIC SMALL LETTER IO
089: // B9 = U+2116 : NUMERO SIGN
090: // BA = U+0454 : CYRILLIC SMALL LETTER UKRAINIAN IE
091: // BB = U+00BB : RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
092: // BC = U+0458 : CYRILLIC SMALL LETTER JE
093: // BD = U+0405 : CYRILLIC CAPITAL LETTER DZE
094: // BE = U+0455 : CYRILLIC SMALL LETTER DZE
095: // BF = U+0457 : CYRILLIC SMALL LETTER YI
096: // C0 = U+0410 : CYRILLIC CAPITAL LETTER A
097: // C1 = U+0411 : CYRILLIC CAPITAL LETTER BE
098: // C2 = U+0412 : CYRILLIC CAPITAL LETTER VE
099: // C3 = U+0413 : CYRILLIC CAPITAL LETTER GHE
100: // C4 = U+0414 : CYRILLIC CAPITAL LETTER DE
101: // C5 = U+0415 : CYRILLIC CAPITAL LETTER IE
102: // C6 = U+0416 : CYRILLIC CAPITAL LETTER ZHE
103: // C7 = U+0417 : CYRILLIC CAPITAL LETTER ZE
104: // C8 = U+0418 : CYRILLIC CAPITAL LETTER I
105: // C9 = U+0419 : CYRILLIC CAPITAL LETTER SHORT I
106: // CA = U+041A : CYRILLIC CAPITAL LETTER KA
107: // CB = U+041B : CYRILLIC CAPITAL LETTER EL
108: // CC = U+041C : CYRILLIC CAPITAL LETTER EM
109: // CD = U+041D : CYRILLIC CAPITAL LETTER EN
110: // CE = U+041E : CYRILLIC CAPITAL LETTER O
111: // CF = U+041F : CYRILLIC CAPITAL LETTER PE
112: // D0 = U+0420 : CYRILLIC CAPITAL LETTER ER
113: // D1 = U+0421 : CYRILLIC CAPITAL LETTER ES
114: // D2 = U+0422 : CYRILLIC CAPITAL LETTER TE
115: // D3 = U+0423 : CYRILLIC CAPITAL LETTER U
116: // D4 = U+0424 : CYRILLIC CAPITAL LETTER EF
117: // D5 = U+0425 : CYRILLIC CAPITAL LETTER HA
118: // D6 = U+0426 : CYRILLIC CAPITAL LETTER TSE
119: // D7 = U+0427 : CYRILLIC CAPITAL LETTER CHE
120: // D8 = U+0428 : CYRILLIC CAPITAL LETTER SHA
121: // D9 = U+0429 : CYRILLIC CAPITAL LETTER SHCHA
122: // DA = U+042A : CYRILLIC CAPITAL LETTER HARD SIGN
123: // DB = U+042B : CYRILLIC CAPITAL LETTER YERU
124: // DC = U+042C : CYRILLIC CAPITAL LETTER SOFT SIGN
125: // DD = U+042D : CYRILLIC CAPITAL LETTER E
126: // DE = U+042E : CYRILLIC CAPITAL LETTER YU
127: // DF = U+042F : CYRILLIC CAPITAL LETTER YA
128: // E0 = U+0430 : CYRILLIC SMALL LETTER A
129: // E1 = U+0431 : CYRILLIC SMALL LETTER BE
130: // E2 = U+0432 : CYRILLIC SMALL LETTER VE
131: // E3 = U+0433 : CYRILLIC SMALL LETTER GHE
132: // E4 = U+0434 : CYRILLIC SMALL LETTER DE
133: // E5 = U+0435 : CYRILLIC SMALL LETTER IE
134: // E6 = U+0436 : CYRILLIC SMALL LETTER ZHE
135: // E7 = U+0437 : CYRILLIC SMALL LETTER ZE
136: // E8 = U+0438 : CYRILLIC SMALL LETTER I
137: // E9 = U+0439 : CYRILLIC SMALL LETTER SHORT I
138: // EA = U+043A : CYRILLIC SMALL LETTER KA
139: // EB = U+043B : CYRILLIC SMALL LETTER EL
140: // EC = U+043C : CYRILLIC SMALL LETTER EM
141: // ED = U+043D : CYRILLIC SMALL LETTER EN
142: // EE = U+043E : CYRILLIC SMALL LETTER O
143: // EF = U+043F : CYRILLIC SMALL LETTER PE
144: // F0 = U+0440 : CYRILLIC SMALL LETTER ER
145: // F1 = U+0441 : CYRILLIC SMALL LETTER ES
146: // F2 = U+0442 : CYRILLIC SMALL LETTER TE
147: // F3 = U+0443 : CYRILLIC SMALL LETTER U
148: // F4 = U+0444 : CYRILLIC SMALL LETTER EF
149: // F5 = U+0445 : CYRILLIC SMALL LETTER HA
150: // F6 = U+0446 : CYRILLIC SMALL LETTER TSE
151: // F7 = U+0447 : CYRILLIC SMALL LETTER CHE
152: // F8 = U+0448 : CYRILLIC SMALL LETTER SHA
153: // F9 = U+0449 : CYRILLIC SMALL LETTER SHCHA
154: // FA = U+044A : CYRILLIC SMALL LETTER HARD SIGN
155: // FB = U+044B : CYRILLIC SMALL LETTER YERU
156: // FC = U+044C : CYRILLIC SMALL LETTER SOFT SIGN
157: // FD = U+044D : CYRILLIC SMALL LETTER E
158: // FE = U+044E : CYRILLIC SMALL LETTER YU
159: // FF = U+044F : CYRILLIC SMALL LETTER YA
160:
161: //
162: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
163: // you may not use this file except in compliance with the License. You may obtain a copy of the
164: // License at http://www.mozilla.org/MPL/
165: //
166: // Software distributed under the License is distributed on an "AS IS" basis,
167: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
168: // See the License for the specific language governing rights and limitations under the License.
169: //
170: // The Original Code is: all this file.
171: //
172: // The Initial Developer of the Original Code is
173: // Aleksei Makarov [makarov@iitam.omsk.net.ru]
174: //
175: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
176: //
177: // Contributor(s): none.
178: //
|