001: package net.sf.saxon.charcode;
002:
003: /**
004: * This class defines properties of the CP1252 (Latin 1) character set,
005: * as defined at <a ref="http://www.microsoft.com/globaldev/reference/sbcs/1252.htm">http://www.microsoft.com/globaldev/reference/sbcs/1252.htm</a>.
006: *
007: * <p>This file was kindly provided by Sectra AB, Sweden to the DocBook community.
008: * Author: Pontus Haglund</p>
009: */
010:
011: public class CP1252CharacterSet implements CharacterSet {
012:
013: public static CP1252CharacterSet theInstance = new CP1252CharacterSet();
014:
015: private CP1252CharacterSet() {
016: }
017:
018: public static CP1252CharacterSet getInstance() {
019: return theInstance;
020: }
021:
022: public final boolean inCharset(int c) {
023:
024: return (c >= 0x00 && c <= 0x7F) || (c >= 0xA0 && c <= 0xFF)
025: || (c == 0x20AC) || (c == 0x201A) || (c == 0x0192)
026: || (c == 0x201E) || (c == 0x2026) || (c == 0x2020)
027: || (c == 0x2021) || (c == 0x02C6) || (c == 0x2030)
028: || (c == 0x0160) || (c == 0x2039) || (c == 0x0152)
029: || (c == 0x017D) || (c == 0x2018) || (c == 0x2019)
030: || (c == 0x201C) || (c == 0x201D) || (c == 0x2022)
031: || (c == 0x2013) || (c == 0x2014) || (c == 0x02DC)
032: || (c == 0x2122) || (c == 0x0161) || (c == 0x203A)
033: || (c == 0x0153) || (c == 0x017E) || (c == 0x0178);
034:
035: }
036: }
037:
038: //00 = U+0000 : NULL
039: //01 = U+0001 : START OF HEADING
040: //02 = U+0002 : START OF TEXT
041: //03 = U+0003 : END OF TEXT
042: //04 = U+0004 : END OF TRANSMISSION
043: //05 = U+0005 : ENQUIRY
044: //06 = U+0006 : ACKNOWLEDGE
045: //07 = U+0007 : BELL
046: //08 = U+0008 : BACKSPACE
047: //09 = U+0009 : HORIZONTAL TABULATION
048: //0A = U+000A : LINE FEED
049: //0B = U+000B : VERTICAL TABULATION
050: //0C = U+000C : FORM FEED
051: //0D = U+000D : CARRIAGE RETURN
052: //0E = U+000E : SHIFT OUT
053: //0F = U+000F : SHIFT IN
054: //10 = U+0010 : DATA LINK ESCAPE
055: //11 = U+0011 : DEVICE CONTROL ONE
056: //12 = U+0012 : DEVICE CONTROL TWO
057: //13 = U+0013 : DEVICE CONTROL THREE
058: //14 = U+0014 : DEVICE CONTROL FOUR
059: //15 = U+0015 : NEGATIVE ACKNOWLEDGE
060: //16 = U+0016 : SYNCHRONOUS IDLE
061: //17 = U+0017 : END OF TRANSMISSION BLOCK
062: //18 = U+0018 : CANCEL
063: //19 = U+0019 : END OF MEDIUM
064: //1A = U+001A : SUBSTITUTE
065: //1B = U+001B : ESCAPE
066: //1C = U+001C : FILE SEPARATOR
067: //1D = U+001D : GROUP SEPARATOR
068: //1E = U+001E : RECORD SEPARATOR
069: //1F = U+001F : UNIT SEPARATOR
070: //20 = U+0020 : SPACE
071: //21 = U+0021 : EXCLAMATION MARK
072: //22 = U+0022 : QUOTATION MARK
073: //23 = U+0023 : NUMBER SIGN
074: //24 = U+0024 : DOLLAR SIGN
075: //25 = U+0025 : PERCENT SIGN
076: //26 = U+0026 : AMPERSAND
077: //27 = U+0027 : APOSTROPHE
078: //28 = U+0028 : LEFT PARENTHESIS
079: //29 = U+0029 : RIGHT PARENTHESIS
080: //2A = U+002A : ASTERISK
081: //2B = U+002B : PLUS SIGN
082: //2C = U+002C : COMMA
083: //2D = U+002D : HYPHEN-MINUS
084: //2E = U+002E : FULL STOP
085: //2F = U+002F : SOLIDUS
086: //30 = U+0030 : DIGIT ZERO
087: //31 = U+0031 : DIGIT ONE
088: //32 = U+0032 : DIGIT TWO
089: //33 = U+0033 : DIGIT THREE
090: //34 = U+0034 : DIGIT FOUR
091: //35 = U+0035 : DIGIT FIVE
092: //36 = U+0036 : DIGIT SIX
093: //37 = U+0037 : DIGIT SEVEN
094: //38 = U+0038 : DIGIT EIGHT
095: //39 = U+0039 : DIGIT NINE
096: //3A = U+003A : COLON
097: //3B = U+003B : SEMICOLON
098: //3C = U+003C : LESS-THAN SIGN
099: //3D = U+003D : EQUALS SIGN
100: //3E = U+003E : GREATER-THAN SIGN
101: //3F = U+003F : QUESTION MARK
102: //40 = U+0040 : COMMERCIAL AT
103: //41 = U+0041 : LATIN CAPITAL LETTER A
104: //42 = U+0042 : LATIN CAPITAL LETTER B
105: //43 = U+0043 : LATIN CAPITAL LETTER C
106: //44 = U+0044 : LATIN CAPITAL LETTER D
107: //45 = U+0045 : LATIN CAPITAL LETTER E
108: //46 = U+0046 : LATIN CAPITAL LETTER F
109: //47 = U+0047 : LATIN CAPITAL LETTER G
110: //48 = U+0048 : LATIN CAPITAL LETTER H
111: //49 = U+0049 : LATIN CAPITAL LETTER I
112: //4A = U+004A : LATIN CAPITAL LETTER J
113: //4B = U+004B : LATIN CAPITAL LETTER K
114: //4C = U+004C : LATIN CAPITAL LETTER L
115: //4D = U+004D : LATIN CAPITAL LETTER M
116: //4E = U+004E : LATIN CAPITAL LETTER N
117: //4F = U+004F : LATIN CAPITAL LETTER O
118: //50 = U+0050 : LATIN CAPITAL LETTER P
119: //51 = U+0051 : LATIN CAPITAL LETTER Q
120: //52 = U+0052 : LATIN CAPITAL LETTER R
121: //53 = U+0053 : LATIN CAPITAL LETTER S
122: //54 = U+0054 : LATIN CAPITAL LETTER T
123: //55 = U+0055 : LATIN CAPITAL LETTER U
124: //56 = U+0056 : LATIN CAPITAL LETTER V
125: //57 = U+0057 : LATIN CAPITAL LETTER W
126: //58 = U+0058 : LATIN CAPITAL LETTER X
127: //59 = U+0059 : LATIN CAPITAL LETTER Y
128: //5A = U+005A : LATIN CAPITAL LETTER Z
129: //5B = U+005B : LEFT SQUARE BRACKET
130: //5C = U+005C : REVERSE SOLIDUS
131: //5D = U+005D : RIGHT SQUARE BRACKET
132: //5E = U+005E : CIRCUMFLEX ACCENT
133: //5F = U+005F : LOW LINE
134: //60 = U+0060 : GRAVE ACCENT
135: //61 = U+0061 : LATIN SMALL LETTER A
136: //62 = U+0062 : LATIN SMALL LETTER B
137: //63 = U+0063 : LATIN SMALL LETTER C
138: //64 = U+0064 : LATIN SMALL LETTER D
139: //65 = U+0065 : LATIN SMALL LETTER E
140: //66 = U+0066 : LATIN SMALL LETTER F
141: //67 = U+0067 : LATIN SMALL LETTER G
142: //68 = U+0068 : LATIN SMALL LETTER H
143: //69 = U+0069 : LATIN SMALL LETTER I
144: //6A = U+006A : LATIN SMALL LETTER J
145: //6B = U+006B : LATIN SMALL LETTER K
146: //6C = U+006C : LATIN SMALL LETTER L
147: //6D = U+006D : LATIN SMALL LETTER M
148: //6E = U+006E : LATIN SMALL LETTER N
149: //6F = U+006F : LATIN SMALL LETTER O
150: //70 = U+0070 : LATIN SMALL LETTER P
151: //71 = U+0071 : LATIN SMALL LETTER Q
152: //72 = U+0072 : LATIN SMALL LETTER R
153: //73 = U+0073 : LATIN SMALL LETTER S
154: //74 = U+0074 : LATIN SMALL LETTER T
155: //75 = U+0075 : LATIN SMALL LETTER U
156: //76 = U+0076 : LATIN SMALL LETTER V
157: //77 = U+0077 : LATIN SMALL LETTER W
158: //78 = U+0078 : LATIN SMALL LETTER X
159: //79 = U+0079 : LATIN SMALL LETTER Y
160: //7A = U+007A : LATIN SMALL LETTER Z
161: //7B = U+007B : LEFT CURLY BRACKET
162: //7C = U+007C : VERTICAL LINE
163: //7D = U+007D : RIGHT CURLY BRACKET
164: //7E = U+007E : TILDE
165: //7F = U+007F : DELETE
166: //80 = U+20AC : EURO SIGN
167: //82 = U+201A : SINGLE LOW-9 QUOTATION MARK
168: //83 = U+0192 : LATIN SMALL LETTER F WITH HOOK
169: //84 = U+201E : DOUBLE LOW-9 QUOTATION MARK
170: //85 = U+2026 : HORIZONTAL ELLIPSIS
171: //86 = U+2020 : DAGGER
172: //87 = U+2021 : DOUBLE DAGGER
173: //88 = U+02C6 : MODIFIER LETTER CIRCUMFLEX ACCENT
174: //89 = U+2030 : PER MILLE SIGN
175: //8A = U+0160 : LATIN CAPITAL LETTER S WITH CARON
176: //8B = U+2039 : SINGLE LEFT-POINTING ANGLE QUOTATION MARK
177: //8C = U+0152 : LATIN CAPITAL LIGATURE OE
178: //8E = U+017D : LATIN CAPITAL LETTER Z WITH CARON
179: //91 = U+2018 : LEFT SINGLE QUOTATION MARK
180: //92 = U+2019 : RIGHT SINGLE QUOTATION MARK
181: //93 = U+201C : LEFT DOUBLE QUOTATION MARK
182: //94 = U+201D : RIGHT DOUBLE QUOTATION MARK
183: //95 = U+2022 : BULLET
184: //96 = U+2013 : EN DASH
185: //97 = U+2014 : EM DASH
186: //98 = U+02DC : SMALL TILDE
187: //99 = U+2122 : TRADE MARK SIGN
188: //9A = U+0161 : LATIN SMALL LETTER S WITH CARON
189: //9B = U+203A : SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
190: //9C = U+0153 : LATIN SMALL LIGATURE OE
191: //9E = U+017E : LATIN SMALL LETTER Z WITH CARON
192: //9F = U+0178 : LATIN CAPITAL LETTER Y WITH DIAERESIS
193: //A0 = U+00A0 : NO-BREAK SPACE
194: //A1 = U+00A1 : INVERTED EXCLAMATION MARK
195: //A2 = U+00A2 : CENT SIGN
196: //A3 = U+00A3 : POUND SIGN
197: //A4 = U+00A4 : CURRENCY SIGN
198: //A5 = U+00A5 : YEN SIGN
199: //A6 = U+00A6 : BROKEN BAR
200: //A7 = U+00A7 : SECTION SIGN
201: //A8 = U+00A8 : DIAERESIS
202: //A9 = U+00A9 : COPYRIGHT SIGN
203: //AA = U+00AA : FEMININE ORDINAL INDICATOR
204: //AB = U+00AB : LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
205: //AC = U+00AC : NOT SIGN
206: //AD = U+00AD : SOFT HYPHEN
207: //AE = U+00AE : REGISTERED SIGN
208: //AF = U+00AF : MACRON
209: //B0 = U+00B0 : DEGREE SIGN
210: //B1 = U+00B1 : PLUS-MINUS SIGN
211: //B2 = U+00B2 : SUPERSCRIPT TWO
212: //B3 = U+00B3 : SUPERSCRIPT THREE
213: //B4 = U+00B4 : ACUTE ACCENT
214: //B5 = U+00B5 : MICRO SIGN
215: //B6 = U+00B6 : PILCROW SIGN
216: //B7 = U+00B7 : MIDDLE DOT
217: //B8 = U+00B8 : CEDILLA
218: //B9 = U+00B9 : SUPERSCRIPT ONE
219: //BA = U+00BA : MASCULINE ORDINAL INDICATOR
220: //BB = U+00BB : RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
221: //BC = U+00BC : VULGAR FRACTION ONE QUARTER
222: //BD = U+00BD : VULGAR FRACTION ONE HALF
223: //BE = U+00BE : VULGAR FRACTION THREE QUARTERS
224: //BF = U+00BF : INVERTED QUESTION MARK
225: //C0 = U+00C0 : LATIN CAPITAL LETTER A WITH GRAVE
226: //C1 = U+00C1 : LATIN CAPITAL LETTER A WITH ACUTE
227: //C2 = U+00C2 : LATIN CAPITAL LETTER A WITH CIRCUMFLEX
228: //C3 = U+00C3 : LATIN CAPITAL LETTER A WITH TILDE
229: //C4 = U+00C4 : LATIN CAPITAL LETTER A WITH DIAERESIS
230: //C5 = U+00C5 : LATIN CAPITAL LETTER A WITH RING ABOVE
231: //C6 = U+00C6 : LATIN CAPITAL LETTER AE
232: //C7 = U+00C7 : LATIN CAPITAL LETTER C WITH CEDILLA
233: //C8 = U+00C8 : LATIN CAPITAL LETTER E WITH GRAVE
234: //C9 = U+00C9 : LATIN CAPITAL LETTER E WITH ACUTE
235: //CA = U+00CA : LATIN CAPITAL LETTER E WITH CIRCUMFLEX
236: //CB = U+00CB : LATIN CAPITAL LETTER E WITH DIAERESIS
237: //CC = U+00CC : LATIN CAPITAL LETTER I WITH GRAVE
238: //CD = U+00CD : LATIN CAPITAL LETTER I WITH ACUTE
239: //CE = U+00CE : LATIN CAPITAL LETTER I WITH CIRCUMFLEX
240: //CF = U+00CF : LATIN CAPITAL LETTER I WITH DIAERESIS
241: //D0 = U+00D0 : LATIN CAPITAL LETTER ETH
242: //D1 = U+00D1 : LATIN CAPITAL LETTER N WITH TILDE
243: //D2 = U+00D2 : LATIN CAPITAL LETTER O WITH GRAVE
244: //D3 = U+00D3 : LATIN CAPITAL LETTER O WITH ACUTE
245: //D4 = U+00D4 : LATIN CAPITAL LETTER O WITH CIRCUMFLEX
246: //D5 = U+00D5 : LATIN CAPITAL LETTER O WITH TILDE
247: //D6 = U+00D6 : LATIN CAPITAL LETTER O WITH DIAERESIS
248: //D7 = U+00D7 : MULTIPLICATION SIGN
249: //D8 = U+00D8 : LATIN CAPITAL LETTER O WITH STROKE
250: //D9 = U+00D9 : LATIN CAPITAL LETTER U WITH GRAVE
251: //DA = U+00DA : LATIN CAPITAL LETTER U WITH ACUTE
252: //DB = U+00DB : LATIN CAPITAL LETTER U WITH CIRCUMFLEX
253: //DC = U+00DC : LATIN CAPITAL LETTER U WITH DIAERESIS
254: //DD = U+00DD : LATIN CAPITAL LETTER Y WITH ACUTE
255: //DE = U+00DE : LATIN CAPITAL LETTER THORN
256: //DF = U+00DF : LATIN SMALL LETTER SHARP S
257: //E0 = U+00E0 : LATIN SMALL LETTER A WITH GRAVE
258: //E1 = U+00E1 : LATIN SMALL LETTER A WITH ACUTE
259: //E2 = U+00E2 : LATIN SMALL LETTER A WITH CIRCUMFLEX
260: //E3 = U+00E3 : LATIN SMALL LETTER A WITH TILDE
261: //E4 = U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
262: //E5 = U+00E5 : LATIN SMALL LETTER A WITH RING ABOVE
263: //E6 = U+00E6 : LATIN SMALL LETTER AE
264: //E7 = U+00E7 : LATIN SMALL LETTER C WITH CEDILLA
265: //E8 = U+00E8 : LATIN SMALL LETTER E WITH GRAVE
266: //E9 = U+00E9 : LATIN SMALL LETTER E WITH ACUTE
267: //EA = U+00EA : LATIN SMALL LETTER E WITH CIRCUMFLEX
268: //EB = U+00EB : LATIN SMALL LETTER E WITH DIAERESIS
269: //EC = U+00EC : LATIN SMALL LETTER I WITH GRAVE
270: //ED = U+00ED : LATIN SMALL LETTER I WITH ACUTE
271: //EE = U+00EE : LATIN SMALL LETTER I WITH CIRCUMFLEX
272: //EF = U+00EF : LATIN SMALL LETTER I WITH DIAERESIS
273: //F0 = U+00F0 : LATIN SMALL LETTER ETH
274: //F1 = U+00F1 : LATIN SMALL LETTER N WITH TILDE
275: //F2 = U+00F2 : LATIN SMALL LETTER O WITH GRAVE
276: //F3 = U+00F3 : LATIN SMALL LETTER O WITH ACUTE
277: //F4 = U+00F4 : LATIN SMALL LETTER O WITH CIRCUMFLEX
278: //F5 = U+00F5 : LATIN SMALL LETTER O WITH TILDE
279: //F6 = U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
280: //F7 = U+00F7 : DIVISION SIGN
281: //F8 = U+00F8 : LATIN SMALL LETTER O WITH STROKE
282: //F9 = U+00F9 : LATIN SMALL LETTER U WITH GRAVE
283: //FA = U+00FA : LATIN SMALL LETTER U WITH ACUTE
284: //FB = U+00FB : LATIN SMALL LETTER U WITH CIRCUMFLEX
285: //FC = U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
286: //FD = U+00FD : LATIN SMALL LETTER Y WITH ACUTE
287: //FE = U+00FE : LATIN SMALL LETTER THORN
288: //FF = U+00FF : LATIN SMALL LETTER Y WITH DIAERESIS
289:
290: //
291: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
292: // you may not use this file except in compliance with the License. You may obtain a copy of the
293: // License at http://www.mozilla.org/MPL/
294: //
295: // Software distributed under the License is distributed on an "AS IS" basis,
296: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
297: // See the License for the specific language governing rights and limitations under the License.
298: //
299: // The Original Code is: all this file.
300: //
301: // The Initial Developer of the Original Code is Pontus Haglund
302: //
303: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
304: //
305: // Contributor(s): none.
306: //
|