001: /*
002: ***********************************************************************
003: * Copyright (C) 2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: ***********************************************************************
006: *
007: */
008:
009: package com.ibm.icu.dev.tool.charsetdet.sbcs;
010:
011: import java.io.File;
012: import java.io.FileInputStream;
013: import java.io.InputStreamReader;
014: import java.nio.ByteBuffer;
015: import java.nio.CharBuffer;
016: import java.nio.charset.CharacterCodingException;
017: import java.nio.charset.Charset;
018: import java.nio.charset.CharsetDecoder;
019: import java.nio.charset.CharsetEncoder;
020: import java.nio.charset.CodingErrorAction;
021:
022: /**
023: * @author emader
024: *
025: * TODO To change the template for this generated type comment go to
026: * Window - Preferences - Java - Code Style - Code Templates
027: */
028: public class InputFile implements NGramList.NGramKeyMapper {
029:
030: private File file;
031: private FileInputStream fileStream;
032: private InputStreamReader inputStream;
033:
034: private Charset charset;
035: private CharsetDecoder decoder;
036: private CharsetEncoder encoder;
037:
038: private boolean visualOrder;
039:
040: private static void exceptionError(Exception e) {
041: System.err.println("ioError: " + e.toString());
042: }
043:
044: /**
045: *
046: */
047: public InputFile(String filename, String encoding, boolean visual) {
048: file = new File(filename);
049: setEncoding(encoding);
050: visualOrder = visual;
051: }
052:
053: public boolean open() {
054: try {
055: fileStream = new FileInputStream(file);
056: inputStream = new InputStreamReader(fileStream, "UTF8");
057: } catch (Exception e) {
058: exceptionError(e);
059: return false;
060: }
061:
062: return true;
063: }
064:
065: public void close() {
066: try {
067: inputStream.close();
068: fileStream.close();
069: } catch (Exception e) {
070: // don't really care if this fails...
071: }
072: }
073:
074: public String getFilename() {
075: return file.getName();
076: }
077:
078: public String getParent() {
079: return file.getParent();
080: }
081:
082: public String getPath() {
083: return file.getPath();
084: }
085:
086: public int read(char[] buffer) {
087: int charsRead = -1;
088:
089: try {
090: charsRead = inputStream.read(buffer, 0, buffer.length);
091: } catch (Exception e) {
092: exceptionError(e);
093: }
094:
095: return charsRead;
096: }
097:
098: public void setEncoding(String encoding) {
099: charset = Charset.forName(encoding);
100: decoder = charset.newDecoder();
101: encoder = charset.newEncoder();
102:
103: encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
104: encoder.onMalformedInput(CodingErrorAction.REPLACE);
105:
106: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
107: decoder.onMalformedInput(CodingErrorAction.REPLACE);
108: }
109:
110: public String getEncoding() {
111: return charset.displayName();
112: }
113:
114: public boolean getVisualOrder() {
115: return visualOrder;
116: }
117:
118: public Object mapKey(String key) {
119: byte[] bytes = encode(key.toCharArray());
120: int length = key.length();
121: int value = 0;
122:
123: for (int b = 0; b < length; b += 1) {
124: value <<= 8;
125: value += (bytes[b] & 0xFF);
126: }
127:
128: return new Integer(value);
129: }
130:
131: public byte[] encode(char[] chars) {
132: int length = chars.length;
133: CharBuffer cb = CharBuffer.wrap(chars);
134: ByteBuffer bb;
135:
136: try {
137: bb = encoder.encode(cb);
138: } catch (CharacterCodingException e) {
139: // don't expect to get any exceptions in normal usage...
140: return null;
141: }
142:
143: return bb.array();
144: }
145:
146: public char[] decode(byte[] bytes) {
147: int length = bytes.length;
148: ByteBuffer bb = ByteBuffer.wrap(bytes);
149: CharBuffer cb;
150:
151: try {
152: cb = decoder.decode(bb);
153: } catch (CharacterCodingException e) {
154: // don't expect to get any exceptions in normal usage...
155: return null;
156: }
157:
158: return cb.array();
159: }
160: }
|