001: /*
002: **************************************************************************
003: * Copyright (C) 2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: **************************************************************************
006: *
007: */
008:
009: package com.ibm.icu.dev.demo.charsetdet;
010:
011: import java.awt.event.*;
012: import java.awt.*;
013: import java.io.*;
014: import java.net.URL;
015:
016: import javax.swing.*;
017:
018: import com.ibm.icu.impl.UTF32;
019: import com.ibm.icu.text.CharsetDetector;
020: import com.ibm.icu.text.CharsetMatch;
021:
022: /**
023: * This simple application demonstrates how to use the CharsetDetector API. It
024: * opens a file or web page, detects the encoding, and then displays it using that
025: * encoding.
026: */
027: public class DetectingViewer extends JFrame implements ActionListener {
028:
029: private JTextPane text;
030: private JFileChooser fileChooser;
031:
032: /**
033: * @throws java.awt.HeadlessException
034: */
035: public DetectingViewer() {
036: super ();
037:
038: fileChooser = new JFileChooser();
039:
040: setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
041: setSize(800, 800);
042:
043: setJMenuBar(makeMenus());
044: text = new JTextPane();
045: text.setContentType("text/plain");
046: text.setText("");
047: text.setSize(800, 800);
048:
049: Font font = new Font("Arial Unicode MS", Font.PLAIN, 24);
050: text.setFont(font);
051:
052: JScrollPane scrollPane = new JScrollPane(text);
053:
054: getContentPane().add(scrollPane);
055: setVisible(true);
056: }
057:
058: public void actionPerformed(ActionEvent event) {
059: String cmd = event.getActionCommand();
060:
061: if (cmd.equals("New...")) {
062: doNew();
063: } else if (cmd.equals("Open File...")) {
064: doOpenFile();
065: } else if (cmd.equals("Open URL...")) {
066: doOpenURL();
067: } else if (cmd.equals("Quit")) {
068: doQuit();
069: }
070: }
071:
072: public static void main(String[] args) {
073: new DetectingViewer();
074: }
075:
076: private void errorDialog(String title, String msg) {
077: JOptionPane.showMessageDialog(this , msg, title,
078: JOptionPane.ERROR_MESSAGE);
079: }
080:
081: private BufferedInputStream openFile(File file) {
082: FileInputStream fileStream = null;
083:
084: try {
085: fileStream = new FileInputStream(file);
086: } catch (Exception e) {
087: errorDialog("Error Opening File", e.getMessage());
088: return null;
089: }
090:
091: return new BufferedInputStream(fileStream);
092: }
093:
094: private void openFile(String directory, String filename) {
095: openFile(new File(directory, filename));
096: }
097:
098: private BufferedInputStream openURL(String url) {
099: InputStream s = null;
100:
101: try {
102: URL aURL = new URL(url);
103: s = aURL.openStream();
104: } catch (Exception e) {
105: errorDialog("Error Opening URL", e.getMessage());
106: return null;
107: }
108:
109: return new BufferedInputStream(s);
110: }
111:
112: private String encodingName(CharsetMatch match) {
113: return match.getName() + " (" + match.getLanguage() + ")";
114: }
115:
116: private void setMatchMenu(CharsetMatch[] matches) {
117: JMenu menu = getJMenuBar().getMenu(1);
118: JMenuItem menuItem;
119:
120: menu.removeAll();
121:
122: for (int i = 0; i < matches.length; i += 1) {
123: CharsetMatch match = matches[i];
124:
125: menuItem = new JMenuItem(encodingName(match) + " "
126: + match.getConfidence());
127:
128: menu.add(menuItem);
129: }
130: }
131:
132: private byte[] scriptTag = { (byte) 's', (byte) 'c', (byte) 'r',
133: (byte) 'i', (byte) 'p', (byte) 't' };
134: private byte[] styleTag = { (byte) 's', (byte) 't', (byte) 'y',
135: (byte) 'l', (byte) 'e' };
136: private static int BUFFER_SIZE = 100000;
137:
138: private boolean openTag(byte[] buffer, int offset, int length,
139: byte[] tag) {
140: int tagLen = tag.length;
141: int bufRem = length - offset;
142: int b;
143:
144: for (b = 0; b < tagLen && b < bufRem; b += 1) {
145: if (buffer[b + offset] != tag[b]) {
146: return false;
147: }
148: }
149:
150: return b == tagLen;
151: }
152:
153: private boolean closedTag(byte[] buffer, int offset, int length,
154: byte[] tag) {
155: if (buffer[offset] != (byte) '/') {
156: return false;
157: }
158:
159: return openTag(buffer, offset + 1, length, tag);
160: }
161:
162: private byte[] filter(InputStream in) {
163: byte[] buffer = new byte[BUFFER_SIZE];
164: int bytesRemaining = BUFFER_SIZE;
165: int bufLen = 0;
166:
167: in.mark(BUFFER_SIZE);
168:
169: try {
170: while (bytesRemaining > 0) {
171: int bytesRead = in.read(buffer, bufLen, bytesRemaining);
172:
173: if (bytesRead <= 0) {
174: break;
175: }
176:
177: bufLen += bytesRead;
178: bytesRemaining -= bytesRead;
179: }
180: } catch (Exception e) {
181: // TODO: error handling?
182: return null;
183: }
184:
185: boolean inTag = false;
186: boolean skip = false;
187: int out = 0;
188:
189: for (int i = 0; i < bufLen; i += 1) {
190: byte b = buffer[i];
191:
192: if (b == (byte) '<') {
193: inTag = true;
194:
195: if (openTag(buffer, i + 1, bufLen, scriptTag)
196: || openTag(buffer, i + 1, bufLen, styleTag)) {
197: skip = true;
198: } else if (closedTag(buffer, i + 1, bufLen, scriptTag)
199: || closedTag(buffer, i + 1, bufLen, styleTag)) {
200: skip = false;
201: }
202: } else if (b == (byte) '>') {
203: inTag = false;
204: } else if (!(inTag || skip)) {
205: buffer[out++] = b;
206: }
207: }
208:
209: byte[] filtered = new byte[out];
210:
211: System.arraycopy(buffer, 0, filtered, 0, out);
212: return filtered;
213: }
214:
215: private CharsetMatch[] detect(byte[] bytes) {
216: CharsetDetector det = new CharsetDetector();
217:
218: det.setText(bytes);
219:
220: return det.detectAll();
221: }
222:
223: private CharsetMatch[] detect(BufferedInputStream inputStream) {
224: CharsetDetector det = new CharsetDetector();
225:
226: try {
227: det.setText(inputStream);
228:
229: return det.detectAll();
230: } catch (Exception e) {
231: // TODO: error message?
232: return null;
233: }
234: }
235:
236: private void show(InputStream inputStream, CharsetMatch[] matches,
237: String title) {
238: InputStreamReader isr;
239: char[] buffer = new char[1024];
240: int bytesRead = 0;
241:
242: if (matches == null || matches.length == 0) {
243: errorDialog("Match Error", "No matches!");
244: return;
245: }
246:
247: try {
248: StringBuffer sb = new StringBuffer();
249: String encoding = matches[0].getName();
250:
251: inputStream.reset();
252:
253: if (encoding.startsWith("UTF-32")) {
254: byte[] bytes = new byte[1024];
255: int offset = 0;
256: int chBytes = 0;
257: UTF32 utf32 = UTF32.getInstance(encoding);
258:
259: while ((bytesRead = inputStream.read(bytes, offset,
260: 1024)) >= 0) {
261: offset = bytesRead % 4;
262: chBytes = bytesRead - offset;
263:
264: sb.append(utf32.fromBytes(bytes, 0, chBytes));
265:
266: if (offset != 0) {
267: for (int i = 0; i < offset; i += 1) {
268: bytes[i] = bytes[chBytes + i];
269: }
270: }
271: }
272: } else {
273: isr = new InputStreamReader(inputStream, encoding);
274:
275: while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
276: sb.append(buffer, 0, bytesRead);
277: }
278:
279: isr.close();
280: }
281:
282: this .setTitle(title + " - " + encodingName(matches[0]));
283:
284: setMatchMenu(matches);
285: text.setText(sb.toString());
286: } catch (IOException e) {
287: errorDialog("IO Error", e.getMessage());
288: } catch (Exception e) {
289: errorDialog("Internal Error", e.getMessage());
290: }
291: }
292:
293: private void doNew() {
294: // open a new window...
295: }
296:
297: private void doOpenFile() {
298: int retVal = fileChooser.showOpenDialog(this );
299:
300: if (retVal == JFileChooser.APPROVE_OPTION) {
301: File file = fileChooser.getSelectedFile();
302: BufferedInputStream inputStream = openFile(file);
303:
304: if (inputStream != null) {
305: CharsetMatch[] matches = detect(inputStream);
306:
307: show(inputStream, matches, file.getName());
308: }
309: }
310: }
311:
312: private void doOpenURL() {
313: String url = (String) JOptionPane.showInputDialog(this ,
314: "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE,
315: null, null, null);
316:
317: if (url != null && url.length() > 0) {
318: BufferedInputStream inputStream = openURL(url);
319:
320: if (inputStream != null) {
321: byte[] filtered = filter(inputStream);
322: CharsetMatch[] matches = detect(filtered);
323:
324: show(inputStream, matches, url);
325: }
326: }
327: }
328:
329: private void doQuit() {
330: System.exit(0);
331: }
332:
333: private JMenuBar makeMenus() {
334: JMenu menu = new JMenu("File");
335: JMenuItem mi;
336:
337: mi = new JMenuItem("Open File...");
338: mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O,
339: ActionEvent.CTRL_MASK)));
340: mi.addActionListener(this );
341: menu.add(mi);
342:
343: mi = new JMenuItem("Open URL...");
344: mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U,
345: ActionEvent.CTRL_MASK)));
346: mi.addActionListener(this );
347: menu.add(mi);
348:
349: mi = new JMenuItem("Quit");
350: mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q,
351: ActionEvent.CTRL_MASK)));
352: mi.addActionListener(this );
353: menu.add(mi);
354:
355: JMenuBar mbar = new JMenuBar();
356: mbar.add(menu);
357:
358: menu = new JMenu("Detected Encodings");
359: mbar.add(menu);
360:
361: return mbar;
362: }
363: }
|