001: /*
002: * Copyright (c) 2007, intarsys consulting GmbH
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * - Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: *
010: * - Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: *
014: * - Neither the name of intarsys nor the names of its contributors may be used
015: * to endorse or promote products derived from this software without specific
016: * prior written permission.
017: *
018: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
019: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
020: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
021: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
022: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
024: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
025: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
026: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
027: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
028: * POSSIBILITY OF SUCH DAMAGE.
029: */
030: package de.intarsys.pdf.encoding;
031:
032: import java.util.HashMap;
033: import java.util.Map;
034:
035: /**
036: * The mapping from Adobe glyph names to Unicode.
037: *
038: * <p>
039: * Currently only "Latin" encoding is included.
040: * </p>
041: * todo 2 cmap read externalized representation from adobe
042: */
043: public class GlyphNameMap extends Object {
044: /**
045: * The number of 1:1 mappings from unicode to code points.
046: *
047: * <p>
048: * This is done for the first 256 bytes of unicode to cache the most often
049: * used chars.
050: * </p>
051: */
052: static private final int ARRAY_MAPPING_SIZE = 256;
053:
054: static public final GlyphNameMap Standard = new GlyphNameMap();
055:
056: static public final String GLYPH_NOTDEF = ".notdef"; //$NON-NLS-1$
057:
058: private Map glyphNameToUnicode = new HashMap();
059:
060: private Map unicodeToGlyphName = new HashMap();
061:
062: /**
063: * unicode to glyph name optimization: most unicode chars will be in the
064: * ascii range
065: */
066: private String[] asciiToGlyphName = new String[ARRAY_MAPPING_SIZE];
067:
068: /**
069: * Create a GlyphNameMap.
070: */
071: protected GlyphNameMap() {
072: super ();
073: initialize();
074: }
075:
076: /**
077: * Get the adobe glyph name for a unicode code point or ".notdef" if not
078: * available.
079: *
080: * @param unicode
081: * The unicode code point to look up.
082: *
083: * @return The adobe glyph name or ".notdef".
084: */
085: public String getGlyphName(int unicode) {
086: String glyphName = null;
087: if ((unicode >= 0) && (unicode < ARRAY_MAPPING_SIZE)) {
088: glyphName = asciiToGlyphName[unicode];
089: } else {
090: glyphName = (String) getUnicodeToGlyphName().get(
091: new Integer(unicode));
092: }
093: if (glyphName == null) {
094: return GLYPH_NOTDEF;
095: } else {
096: return glyphName;
097: }
098: }
099:
100: /**
101: * Get the unicode code point for an Adobe glyph name.
102: *
103: * <p>
104: * If the glyph name is unknown, -1 is returned.
105: * </p>
106: *
107: * @param glyphName
108: * An adobe glyph name.
109: *
110: * @return The unicode code point for an Adobe glyph name or -1.
111: */
112: public int getUnicode(String glyphName) {
113: Integer result = (Integer) getGlyphNameToUnicode().get(
114: glyphName);
115: if (result == null) {
116: return -1;
117: }
118: return result.intValue();
119: }
120:
121: /**
122: * The internal representation of the map from glyph names to unicode.
123: *
124: * @return The internal representation of the map from glyph names to
125: * unicode.
126: */
127: protected Map getGlyphNameToUnicode() {
128: return glyphNameToUnicode;
129: }
130:
131: /**
132: * The internal representation of the map from unicode to glyph names .
133: *
134: * @return The internal representation of the map from unicode to glyph
135: * names .
136: */
137: protected Map getUnicodeToGlyphName() {
138: return unicodeToGlyphName;
139: }
140:
141: /**
142: * Add an entry to the collection of known mappings.
143: *
144: * @param glyphName
145: * The adobe glyph name.
146: * @param unicode
147: * The unicode code point.
148: */
149: protected void addEntry(String glyphName, int unicode) {
150: getGlyphNameToUnicode().put(glyphName, new Integer(unicode));
151: getUnicodeToGlyphName().put(new Integer(unicode), glyphName);
152: if ((unicode >= 0) && (unicode < ARRAY_MAPPING_SIZE)) {
153: asciiToGlyphName[unicode] = glyphName;
154: }
155: }
156:
157: /**
158: * Initialize the GlyphNameMap with the required mappings.
159: */
160: private void initialize() {
161: // Latin
162: addEntry("A", 0x0041); //$NON-NLS-1$
163: addEntry("AE", 0x00C6); //$NON-NLS-1$
164: addEntry("Aacute", 0x00C1); //$NON-NLS-1$
165: addEntry("Acircumflex", 0x00C2); //$NON-NLS-1$
166: addEntry("Adieresis", 0x00C4); //$NON-NLS-1$
167: addEntry("Agrave", 0x00C0); //$NON-NLS-1$
168: addEntry("Aring", 0x00C5); //$NON-NLS-1$
169: addEntry("Atilde", 0x00C3); //$NON-NLS-1$
170: addEntry("B", 0x0042); //$NON-NLS-1$
171: addEntry("C", 0x0043); //$NON-NLS-1$
172: addEntry("Ccedilla", 0x00C7); //$NON-NLS-1$
173: addEntry("D", 0x0044); //$NON-NLS-1$
174: addEntry("E", 0x0045); //$NON-NLS-1$
175: addEntry("Eacute", 0x00C9); //$NON-NLS-1$
176: addEntry("Ecircumflex", 0x00CA); //$NON-NLS-1$
177: addEntry("Edieresis", 0x00CB); //$NON-NLS-1$
178: addEntry("Egrave", 0x00C8); //$NON-NLS-1$
179: addEntry("Eth", 0x00D0); //$NON-NLS-1$
180: addEntry("Euro", 0x20AC); //$NON-NLS-1$
181: addEntry("F", 0x0046); //$NON-NLS-1$
182: addEntry("G", 0x0047); //$NON-NLS-1$
183: addEntry("H", 0x0048); //$NON-NLS-1$
184: addEntry("I", 0x0049); //$NON-NLS-1$
185: addEntry("Iacute", 0x00CD); //$NON-NLS-1$
186: addEntry("Icircumflex", 0x00CE); //$NON-NLS-1$
187: addEntry("Idieresis", 0x00CF); //$NON-NLS-1$
188: addEntry("Igrave", 0x00CC); //$NON-NLS-1$
189: addEntry("J", 0x004A); //$NON-NLS-1$
190: addEntry("K", 0x004B); //$NON-NLS-1$
191: addEntry("L", 0x004C); //$NON-NLS-1$
192: addEntry("Lslash", 0x0141); //$NON-NLS-1$
193: addEntry("M", 0x004D); //$NON-NLS-1$
194: addEntry("N", 0x004E); //$NON-NLS-1$
195: addEntry("Ntilde", 0x00D1); //$NON-NLS-1$
196: addEntry("O", 0x004F); //$NON-NLS-1$
197: addEntry("OE", 0x0152); //$NON-NLS-1$
198: addEntry("Oacute", 0x00D3); //$NON-NLS-1$
199: addEntry("Ocircumflex", 0x00D4); //$NON-NLS-1$
200: addEntry("Odieresis", 0x00D6); //$NON-NLS-1$
201: addEntry("Ograve", 0x00D2); //$NON-NLS-1$
202: addEntry("Oslash", 0x00D8); //$NON-NLS-1$
203: addEntry("Otilde", 0x00D5); //$NON-NLS-1$
204: addEntry("P", 0x0050); //$NON-NLS-1$
205: addEntry("Q", 0x0051); //$NON-NLS-1$
206: addEntry("R", 0x0052); //$NON-NLS-1$
207: addEntry("S", 0x0053); //$NON-NLS-1$
208: addEntry("Scaron", 0x0160); //$NON-NLS-1$
209: addEntry("T", 0x0054); //$NON-NLS-1$
210: addEntry("Thorn", 0x00DE); //$NON-NLS-1$
211: addEntry("U", 0x0055); //$NON-NLS-1$
212: addEntry("Uacute", 0x00DA); //$NON-NLS-1$
213: addEntry("Ucircumflex", 0x00DB); //$NON-NLS-1$
214: addEntry("Udieresis", 0x00DC); //$NON-NLS-1$
215: addEntry("Ugrave", 0x00D9); //$NON-NLS-1$
216: addEntry("V", 0x0056); //$NON-NLS-1$
217: addEntry("W", 0x0057); //$NON-NLS-1$
218: addEntry("X", 0x0058); //$NON-NLS-1$
219: addEntry("Y", 0x0059); //$NON-NLS-1$
220: addEntry("Yacute", 0x00DD); //$NON-NLS-1$
221: addEntry("Ydieresis", 0x0178); //$NON-NLS-1$
222: addEntry("Z", 0x005A); //$NON-NLS-1$
223: addEntry("Zcaron", 0x017D); //$NON-NLS-1$
224: addEntry("a", 0x0061); //$NON-NLS-1$
225: addEntry("aacute", 0x00E1); //$NON-NLS-1$
226: addEntry("acircumflex", 0x00E2); //$NON-NLS-1$
227: addEntry("acute", 0x00B4); //$NON-NLS-1$
228: addEntry("adieresis", 0x00E4); //$NON-NLS-1$
229: addEntry("ae", 0x00E6); //$NON-NLS-1$
230: addEntry("agrave", 0x00E0); //$NON-NLS-1$
231: addEntry("ampersand", 0x0026); //$NON-NLS-1$
232: addEntry("aring", 0x00E5); //$NON-NLS-1$
233: addEntry("asciicircum", 0x005E); //$NON-NLS-1$
234: addEntry("asciitilde", 0x007E); //$NON-NLS-1$
235: addEntry("asterisk", 0x002A); //$NON-NLS-1$
236: addEntry("at", 0x0040); //$NON-NLS-1$
237: addEntry("atilde", 0x00E3); //$NON-NLS-1$
238: addEntry("b", 0x0062); //$NON-NLS-1$
239: addEntry("backslash", 0x005C); //$NON-NLS-1$
240: addEntry("bar", 0x007C); //$NON-NLS-1$
241: addEntry("braceleft", 0x007B); //$NON-NLS-1$
242: addEntry("braceright", 0x007D); //$NON-NLS-1$
243: addEntry("bracketleft", 0x005B); //$NON-NLS-1$
244: addEntry("bracketright", 0x005D); //$NON-NLS-1$
245: addEntry("breve", 0x02D8); //$NON-NLS-1$
246: addEntry("brokenbar", 0x00A6); //$NON-NLS-1$
247: addEntry("bullet", 0x2022); //$NON-NLS-1$
248: addEntry("c", 0x0063); //$NON-NLS-1$
249: addEntry("caron", 0x02C7); //$NON-NLS-1$
250: addEntry("ccedilla", 0x00E7); //$NON-NLS-1$
251: addEntry("cedilla", 0x00B8); //$NON-NLS-1$
252: addEntry("cent", 0x00A2); //$NON-NLS-1$
253: addEntry("circumflex", 0x02C6); //$NON-NLS-1$
254: addEntry("colon", 0x003A); //$NON-NLS-1$
255: addEntry("comma", 0x002C); //$NON-NLS-1$
256: addEntry("copyright", 0x00A9); //$NON-NLS-1$
257: addEntry("currency", 0x00A4); //$NON-NLS-1$
258: addEntry("d", 0x0064); //$NON-NLS-1$
259: addEntry("dagger", 0x2020); //$NON-NLS-1$
260: addEntry("daggerdbl", 0x2021); //$NON-NLS-1$
261: addEntry("degree", 0x00B0); //$NON-NLS-1$
262: addEntry("dieresis", 0x00A8); //$NON-NLS-1$
263: addEntry("divide", 0x00F7); //$NON-NLS-1$
264: addEntry("dollar", 0x0024); //$NON-NLS-1$
265: addEntry("dotaccent", 0x02D9); //$NON-NLS-1$
266: addEntry("dotlessi", 0x0131); //$NON-NLS-1$
267: addEntry("e", 0x0065); //$NON-NLS-1$
268: addEntry("eacute", 0x00E9); //$NON-NLS-1$
269: addEntry("ecircumflex", 0x00EA); //$NON-NLS-1$
270: addEntry("edieresis", 0x00EB); //$NON-NLS-1$
271: addEntry("egrave", 0x00E8); //$NON-NLS-1$
272: addEntry("eight", 0x0038); //$NON-NLS-1$
273: addEntry("ellipsis", 0x2026); //$NON-NLS-1$
274: addEntry("emdash", 0x2014); //$NON-NLS-1$
275: addEntry("endash", 0x2013); //$NON-NLS-1$
276: addEntry("equal", 0x003D); //$NON-NLS-1$
277: addEntry("eth", 0x00F0); //$NON-NLS-1$
278: addEntry("exclam", 0x0021); //$NON-NLS-1$
279: addEntry("exclamdown", 0x00A1); //$NON-NLS-1$
280: addEntry("f", 0x0066); //$NON-NLS-1$
281: addEntry("fi", 0xFB01); //$NON-NLS-1$
282: addEntry("five", 0x0035); //$NON-NLS-1$
283: addEntry("fl", 0xFB02); //$NON-NLS-1$
284: addEntry("florin", 0x0192); //$NON-NLS-1$
285: addEntry("four", 0x0034); //$NON-NLS-1$
286: addEntry("fraction", 0x2044); //$NON-NLS-1$
287: addEntry("g", 0x0067); //$NON-NLS-1$
288: addEntry("germandbls", 0x00DF); //$NON-NLS-1$
289: addEntry("grave", 0x0060); //$NON-NLS-1$
290: addEntry("greater", 0x003E); //$NON-NLS-1$
291: addEntry("guillemotleft", 0x00AB); //$NON-NLS-1$
292: addEntry("guillemotright", 0x00BB); //$NON-NLS-1$
293: addEntry("guilsinglleft", 0x2039); //$NON-NLS-1$
294: addEntry("guilsinglright", 0x203A); //$NON-NLS-1$
295: addEntry("h", 0x0068); //$NON-NLS-1$
296: addEntry("hungarumlaut", 0x02DD); //$NON-NLS-1$
297: addEntry("hyphen", 0x002D); //$NON-NLS-1$
298: addEntry("i", 0x0069); //$NON-NLS-1$
299: addEntry("iacute", 0x00ED); //$NON-NLS-1$
300: addEntry("icircumflex", 0x00EE); //$NON-NLS-1$
301: addEntry("idieresis", 0x00EF); //$NON-NLS-1$
302: addEntry("igrave", 0x00EC); //$NON-NLS-1$
303: addEntry("j", 0x006A); //$NON-NLS-1$
304: addEntry("k", 0x006B); //$NON-NLS-1$
305: addEntry("l", 0x006C); //$NON-NLS-1$
306: addEntry("less", 0x003C); //$NON-NLS-1$
307: addEntry("logicalnot", 0x00AC); //$NON-NLS-1$
308: addEntry("lslash", 0x0142); //$NON-NLS-1$
309: addEntry("m", 0x006D); //$NON-NLS-1$
310: addEntry("macron", 0x00AF); //$NON-NLS-1$
311: addEntry("minus", 0x2212); //$NON-NLS-1$
312: addEntry("mu", 0x00B5); //$NON-NLS-1$
313: addEntry("multiply", 0x00D7); //$NON-NLS-1$
314: addEntry("n", 0x006E); //$NON-NLS-1$
315: addEntry("nine", 0x0039); //$NON-NLS-1$
316: addEntry("ntilde", 0x00F1); //$NON-NLS-1$
317: addEntry("numbersign", 0x0023); //$NON-NLS-1$
318: addEntry("o", 0x006F); //$NON-NLS-1$
319: addEntry("oacute", 0x00F3); //$NON-NLS-1$
320: addEntry("ocircumflex", 0x00F4); //$NON-NLS-1$
321: addEntry("odieresis", 0x00F6); //$NON-NLS-1$
322: addEntry("oe", 0x0153); //$NON-NLS-1$
323: addEntry("ogonek", 0x02DB); //$NON-NLS-1$
324: addEntry("ograve", 0x00F2); //$NON-NLS-1$
325: addEntry("one", 0x0031); //$NON-NLS-1$
326: addEntry("onehalf", 0x00BD); //$NON-NLS-1$
327: addEntry("onequarter", 0x00BC); //$NON-NLS-1$
328: addEntry("onesuperior", 0x00B9); //$NON-NLS-1$
329: addEntry("ordfeminine", 0x00AA); //$NON-NLS-1$
330: addEntry("ordmasculine", 0x00BA); //$NON-NLS-1$
331: addEntry("oslash", 0x00F8); //$NON-NLS-1$
332: addEntry("otilde", 0x00F5); //$NON-NLS-1$
333: addEntry("p", 0x0070); //$NON-NLS-1$
334: addEntry("paragraph", 0x00B6); //$NON-NLS-1$
335: addEntry("parenleft", 0x0028); //$NON-NLS-1$
336: addEntry("parenright", 0x0029); //$NON-NLS-1$
337: addEntry("percent", 0x0025); //$NON-NLS-1$
338: addEntry("period", 0x002E); //$NON-NLS-1$
339: addEntry("periodcentered", 0x00B7); //$NON-NLS-1$
340: addEntry("perthousand", 0x2030); //$NON-NLS-1$
341: addEntry("plus", 0x002B); //$NON-NLS-1$
342: addEntry("plusminus", 0x00B1); //$NON-NLS-1$
343: addEntry("q", 0x0071); //$NON-NLS-1$
344: addEntry("question", 0x003F); //$NON-NLS-1$
345: addEntry("questiondown", 0x00BF); //$NON-NLS-1$
346: addEntry("quotedbl", 0x0022); //$NON-NLS-1$
347: addEntry("quotedblbase", 0x201E); //$NON-NLS-1$
348: addEntry("quotedblleft", 0x201C); //$NON-NLS-1$
349: addEntry("quotedblright", 0x201D); //$NON-NLS-1$
350: addEntry("quoteleft", 0x2018); //$NON-NLS-1$
351: addEntry("quoteright", 0x2019); //$NON-NLS-1$
352: addEntry("quotesinglbase", 0x201A); //$NON-NLS-1$
353: addEntry("quotesingle", 0x0027); //$NON-NLS-1$
354: addEntry("r", 0x0072); //$NON-NLS-1$
355: addEntry("registered", 0x00AE); //$NON-NLS-1$
356: addEntry("ring", 0x02DA); //$NON-NLS-1$
357: addEntry("s", 0x0073); //$NON-NLS-1$
358: addEntry("scaron", 0x0161); //$NON-NLS-1$
359: addEntry("section", 0x00A7); //$NON-NLS-1$
360: addEntry("semicolon", 0x003B); //$NON-NLS-1$
361: addEntry("seven", 0x0037); //$NON-NLS-1$
362: addEntry("six", 0x0036); //$NON-NLS-1$
363: addEntry("slash", 0x002F); //$NON-NLS-1$
364: addEntry("space", 0x0020); //$NON-NLS-1$
365: addEntry("sterling", 0x00A3); //$NON-NLS-1$
366: addEntry("t", 0x0074); //$NON-NLS-1$
367: addEntry("thorn", 0x00FE); //$NON-NLS-1$
368: addEntry("three", 0x0033); //$NON-NLS-1$
369: addEntry("threequarters", 0x00BE); //$NON-NLS-1$
370: addEntry("threesuperior", 0x00B3); //$NON-NLS-1$
371: addEntry("tilde", 0x02DC); //$NON-NLS-1$
372: addEntry("trademark", 0x2122); //$NON-NLS-1$
373: addEntry("two", 0x0032); //$NON-NLS-1$
374: addEntry("twosuperior", 0x00B2); //$NON-NLS-1$
375: addEntry("u", 0x0075); //$NON-NLS-1$
376: addEntry("uacute", 0x00FA); //$NON-NLS-1$
377: addEntry("ucircumflex", 0x00FB); //$NON-NLS-1$
378: addEntry("udieresis", 0x00FC); //$NON-NLS-1$
379: addEntry("ugrave", 0x00F9); //$NON-NLS-1$
380: addEntry("underscore", 0x005F); //$NON-NLS-1$
381: addEntry("v", 0x0076); //$NON-NLS-1$
382: addEntry("w", 0x0077); //$NON-NLS-1$
383: addEntry("x", 0x0078); //$NON-NLS-1$
384: addEntry("y", 0x0079); //$NON-NLS-1$
385: addEntry("yacute", 0x00FD); //$NON-NLS-1$
386: addEntry("ydieresis", 0x00FF); //$NON-NLS-1$
387: addEntry("yen", 0x00A5); //$NON-NLS-1$
388: addEntry("z", 0x007A); //$NON-NLS-1$
389: addEntry("zcaron", 0x017E); //$NON-NLS-1$
390: addEntry("zero", 0x0030); //$NON-NLS-1$
391: }
392: }
|