001: /**
002: * Copyright (c) 2003-2004, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.encoding;
031:
032: import java.io.BufferedReader;
033: import java.io.InputStream;
034: import java.io.InputStreamReader;
035: import java.io.IOException;
036:
037: import java.util.HashMap;
038: import java.util.Iterator;
039: import java.util.Map;
040: import java.util.StringTokenizer;
041:
042: import org.pdfbox.cos.COSName;
043:
044: import org.pdfbox.util.ResourceLoader;
045:
046: import org.pdfbox.pdmodel.common.COSObjectable;
047:
048: /**
049: * This is an interface to a text encoder.
050: *
051: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
052: * @version $Revision: 1.15 $
053: */
054: public abstract class Encoding implements COSObjectable {
055:
056: /**
057: * This is a mapping from a character code to a character name.
058: */
059: protected Map codeToName = new HashMap();
060: /**
061: * This is a mapping from a character name to a character code.
062: */
063: protected Map nameToCode = new HashMap();
064:
065: private static final Map NAME_TO_CHARACTER = new HashMap();
066: private static final Map CHARACTER_TO_NAME = new HashMap();
067:
068: static {
069: BufferedReader glyphStream = null;
070: try {
071: InputStream resource = ResourceLoader
072: .loadResource("Resources/glyphlist.txt");
073: glyphStream = new BufferedReader(new InputStreamReader(
074: resource));
075: String line = null;
076: while ((line = glyphStream.readLine()) != null) {
077: line = line.trim();
078: //lines starting with # are comments which we can ignore.
079: if (!line.startsWith("#")) {
080: int semicolonIndex = line.indexOf(';');
081: if (semicolonIndex >= 0) {
082: try {
083: String characterName = line.substring(0,
084: semicolonIndex);
085: String unicodeValue = line.substring(
086: semicolonIndex + 1, line.length());
087: StringTokenizer tokenizer = new StringTokenizer(
088: unicodeValue, " ", false);
089: String value = "";
090: while (tokenizer.hasMoreTokens()) {
091: int characterCode = Integer.parseInt(
092: tokenizer.nextToken(), 16);
093: value += (char) characterCode;
094: }
095:
096: NAME_TO_CHARACTER.put(COSName
097: .getPDFName(characterName), value);
098: } catch (NumberFormatException nfe) {
099: nfe.printStackTrace();
100: }
101: }
102: }
103: }
104: } catch (IOException io) {
105: io.printStackTrace();
106: } finally {
107: if (glyphStream != null) {
108: try {
109: glyphStream.close();
110: } catch (IOException e) {
111: e.printStackTrace();
112: }
113:
114: }
115: }
116:
117: NAME_TO_CHARACTER.put(COSName.getPDFName(".notdef"), "");
118: NAME_TO_CHARACTER.put(COSName.getPDFName("fi"), "fi");
119: NAME_TO_CHARACTER.put(COSName.getPDFName("fl"), "fl");
120: NAME_TO_CHARACTER.put(COSName.getPDFName("ffi"), "ffi");
121: NAME_TO_CHARACTER.put(COSName.getPDFName("ff"), "ff");
122: NAME_TO_CHARACTER.put(COSName.getPDFName("pi"), "pi");
123:
124: Iterator keys = NAME_TO_CHARACTER.keySet().iterator();
125: while (keys.hasNext()) {
126: Object key = keys.next();
127: Object value = NAME_TO_CHARACTER.get(key);
128: CHARACTER_TO_NAME.put(value, key);
129: }
130: }
131:
132: /**
133: * This will add a character encoding.
134: *
135: * @param code The character code that matches the character.
136: * @param name The name of the character.
137: */
138: protected void addCharacterEncoding(int code, COSName name) {
139: Integer intCode = new Integer(code);
140: codeToName.put(intCode, name);
141: nameToCode.put(name, intCode);
142: }
143:
144: /**
145: * This will get the character code for the name.
146: *
147: * @param name The name of the character.
148: *
149: * @return The code for the character.
150: *
151: * @throws IOException If there is no character code for the name.
152: */
153: public int getCode(COSName name) throws IOException {
154: Integer code = (Integer) nameToCode.get(name);
155: if (code == null) {
156: throw new IOException(
157: "No character code for character name '"
158: + name.getName() + "'");
159: }
160: return code.intValue();
161: }
162:
163: /**
164: * This will take a character code and get the name from the code.
165: *
166: * @param code The character code.
167: *
168: * @return The name of the character.
169: *
170: * @throws IOException If there is no name for the code.
171: */
172: public COSName getName(int code) throws IOException {
173: COSName name = (COSName) codeToName.get(new Integer(code));
174: if (name == null) {
175: //lets be forgiving for now
176: name = COSName.getPDFName("space");
177: //throw new IOException( getClass().getName() +
178: // ": No name for character code '" + code + "'" );
179: }
180: return name;
181: }
182:
183: /**
184: * This will take a character code and get the name from the code.
185: *
186: * @param c The character.
187: *
188: * @return The name of the character.
189: *
190: * @throws IOException If there is no name for the character.
191: */
192: public COSName getNameFromCharacter(char c) throws IOException {
193: COSName name = (COSName) CHARACTER_TO_NAME.get("" + c);
194: if (name == null) {
195: throw new IOException("No name for character '" + c + "'");
196: }
197: return name;
198: }
199:
200: /**
201: * This will get the character from the code.
202: *
203: * @param code The character code.
204: *
205: * @return The printable character for the code.
206: *
207: * @throws IOException If there is not name for the character.
208: */
209: public String getCharacter(int code) throws IOException {
210: String character = getCharacter(getName(code));
211: return character;
212: }
213:
214: /**
215: * This will get the character from the name.
216: *
217: * @param name The name of the character.
218: *
219: * @return The printable character for the code.
220: */
221: public static String getCharacter(COSName name) {
222: String character = (String) NAME_TO_CHARACTER.get(name);
223: if (character == null) {
224: character = name.getName();
225: }
226: return character;
227: }
228: }
|