| /* |
| ************************************************************************** |
| * Copyright (C) 2005, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ************************************************************************** |
| * |
| */ |
| |
| package com.ibm.icu.dev.demo.charsetdet; |
| |
| import java.awt.event.*; |
| import java.awt.*; |
| import java.io.*; |
| import java.net.URL; |
| |
| import javax.swing.*; |
| |
| import com.ibm.icu.impl.UTF32; |
| import com.ibm.icu.text.CharsetDetector; |
| import com.ibm.icu.text.CharsetMatch; |
| |
| /** |
| * This simple application demonstrates how to use the CharsetDetector API. It |
| * opens a file or web page, detects the encoding, and then displays it using that |
| * encoding. |
| */ |
| public class DetectingViewer extends JFrame implements ActionListener |
| { |
| |
| private JTextPane text; |
| private JFileChooser fileChooser; |
| |
| /** |
| * @throws java.awt.HeadlessException |
| */ |
| public DetectingViewer() |
| { |
| super(); |
| |
| fileChooser = new JFileChooser(); |
| |
| setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); |
| setSize(800, 800); |
| |
| setJMenuBar(makeMenus()); |
| text = new JTextPane(); |
| text.setContentType("text/plain"); |
| text.setText(""); |
| text.setSize(800, 800); |
| |
| Font font = new Font("Arial Unicode MS", Font.PLAIN, 24); |
| text.setFont(font); |
| |
| JScrollPane scrollPane = new JScrollPane(text); |
| |
| getContentPane().add(scrollPane); |
| setVisible(true); |
| } |
| |
| public void actionPerformed(ActionEvent event) |
| { |
| String cmd = event.getActionCommand(); |
| |
| if (cmd.equals("New...")) { |
| doNew(); |
| } else if (cmd.equals("Open File...")) { |
| doOpenFile(); |
| } else if (cmd.equals("Open URL...")) { |
| doOpenURL(); |
| } else if (cmd.equals("Quit")) { |
| doQuit(); |
| } |
| } |
| |
| public static void main(String[] args) |
| { |
| new DetectingViewer(); |
| } |
| |
| private void errorDialog(String title, String msg) |
| { |
| JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE); |
| } |
| |
| private BufferedInputStream openFile(File file) |
| { |
| FileInputStream fileStream = null; |
| |
| try { |
| fileStream = new FileInputStream(file); |
| } catch (Exception e) { |
| errorDialog("Error Opening File", e.getMessage()); |
| return null; |
| } |
| |
| return new BufferedInputStream(fileStream); |
| } |
| |
| private void openFile(String directory, String filename) |
| { |
| openFile(new File(directory, filename)); |
| } |
| |
| |
| private BufferedInputStream openURL(String url) |
| { |
| InputStream s = null; |
| |
| try { |
| URL aURL = new URL(url); |
| s = aURL.openStream(); |
| } catch (Exception e) { |
| errorDialog("Error Opening URL", e.getMessage()); |
| return null; |
| } |
| |
| return new BufferedInputStream(s); |
| } |
| |
| private String encodingName(CharsetMatch match) |
| { |
| return match.getName() + " (" + match.getLanguage() + ")"; |
| } |
| |
| private void setMatchMenu(CharsetMatch[] matches) |
| { |
| JMenu menu = getJMenuBar().getMenu(1); |
| JMenuItem menuItem; |
| |
| menu.removeAll(); |
| |
| for (int i = 0; i < matches.length; i += 1) { |
| CharsetMatch match = matches[i]; |
| |
| menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence()); |
| |
| menu.add(menuItem); |
| } |
| } |
| |
| private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'}; |
| private byte[] styleTag = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'}; |
| private static int BUFFER_SIZE = 100000; |
| |
| private boolean openTag(byte[] buffer, int offset, int length, byte[] tag) |
| { |
| int tagLen = tag.length; |
| int bufRem = length - offset; |
| int b; |
| |
| for (b = 0; b < tagLen && b < bufRem; b += 1) { |
| if (buffer[b + offset] != tag[b]) { |
| return false; |
| } |
| } |
| |
| return b == tagLen; |
| } |
| |
| private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag) |
| { |
| if (buffer[offset] != (byte) '/') { |
| return false; |
| } |
| |
| return openTag(buffer, offset + 1, length, tag); |
| } |
| |
| private byte[] filter(InputStream in) |
| { |
| byte[] buffer = new byte[BUFFER_SIZE]; |
| int bytesRemaining = BUFFER_SIZE; |
| int bufLen = 0; |
| |
| in.mark(BUFFER_SIZE); |
| |
| try { |
| while (bytesRemaining > 0) { |
| int bytesRead = in.read(buffer, bufLen, bytesRemaining); |
| |
| if (bytesRead <= 0) { |
| break; |
| } |
| |
| bufLen += bytesRead; |
| bytesRemaining -= bytesRead; |
| } |
| } catch (Exception e) { |
| // TODO: error handling? |
| return null; |
| } |
| |
| boolean inTag = false; |
| boolean skip = false; |
| int out = 0; |
| |
| for (int i = 0; i < bufLen; i += 1) { |
| byte b = buffer[i]; |
| |
| if (b == (byte) '<') { |
| inTag = true; |
| |
| if (openTag(buffer, i + 1, bufLen, scriptTag) || |
| openTag(buffer, i + 1, bufLen, styleTag)) { |
| skip = true; |
| } else if (closedTag(buffer, i + 1, bufLen, scriptTag) || |
| closedTag(buffer, i + 1, bufLen, styleTag)) { |
| skip = false; |
| } |
| } else if (b == (byte) '>') { |
| inTag = false; |
| } else if (! (inTag || skip)) { |
| buffer[out++] = b; |
| } |
| } |
| |
| byte[] filtered = new byte[out]; |
| |
| System.arraycopy(buffer, 0, filtered, 0, out); |
| return filtered; |
| } |
| |
| private CharsetMatch[] detect(byte[] bytes) |
| { |
| CharsetDetector det = new CharsetDetector(); |
| |
| det.setText(bytes); |
| |
| return det.detectAll(); |
| } |
| |
| private CharsetMatch[] detect(BufferedInputStream inputStream) |
| { |
| CharsetDetector det = new CharsetDetector(); |
| |
| try { |
| det.setText(inputStream); |
| |
| return det.detectAll(); |
| } catch (Exception e) { |
| // TODO: error message? |
| return null; |
| } |
| } |
| |
| private void show(InputStream inputStream, CharsetMatch[] matches, String title) |
| { |
| InputStreamReader isr; |
| char[] buffer = new char[1024]; |
| int bytesRead = 0; |
| |
| if (matches == null || matches.length == 0) { |
| errorDialog("Match Error", "No matches!"); |
| return; |
| } |
| |
| try { |
| StringBuffer sb = new StringBuffer(); |
| String encoding = matches[0].getName(); |
| |
| inputStream.reset(); |
| |
| if (encoding.startsWith("UTF-32")) { |
| byte[] bytes = new byte[1024]; |
| int offset = 0; |
| int chBytes = 0; |
| UTF32 utf32 = UTF32.getInstance(encoding); |
| |
| while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) { |
| offset = bytesRead % 4; |
| chBytes = bytesRead - offset; |
| |
| sb.append(utf32.fromBytes(bytes, 0, chBytes)); |
| |
| if (offset != 0) { |
| for (int i = 0; i < offset; i += 1) { |
| bytes[i] = bytes[chBytes + i]; |
| } |
| } |
| } |
| } else { |
| isr = new InputStreamReader(inputStream, encoding); |
| |
| while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) { |
| sb.append(buffer, 0, bytesRead); |
| } |
| |
| isr.close(); |
| } |
| |
| this.setTitle(title + " - " + encodingName(matches[0])); |
| |
| setMatchMenu(matches); |
| text.setText(sb.toString()); |
| } catch (IOException e) { |
| errorDialog("IO Error", e.getMessage()); |
| } catch (Exception e) { |
| errorDialog("Internal Error", e.getMessage()); |
| } |
| } |
| |
| private void doNew() |
| { |
| // open a new window... |
| } |
| |
| private void doOpenFile() |
| { |
| int retVal = fileChooser.showOpenDialog(this); |
| |
| if (retVal == JFileChooser.APPROVE_OPTION) { |
| File file = fileChooser.getSelectedFile(); |
| BufferedInputStream inputStream = openFile(file); |
| |
| if (inputStream != null) { |
| CharsetMatch[] matches = detect(inputStream); |
| |
| show(inputStream, matches, file.getName()); |
| } |
| } |
| } |
| |
| private void doOpenURL() |
| { |
| String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE, |
| null, null, null); |
| |
| if (url != null && url.length() > 0) { |
| BufferedInputStream inputStream = openURL(url); |
| |
| if (inputStream != null) { |
| byte[] filtered = filter(inputStream); |
| CharsetMatch[] matches = detect(filtered); |
| |
| show(inputStream, matches, url); |
| } |
| } |
| } |
| |
| private void doQuit() |
| { |
| System.exit(0); |
| } |
| |
| private JMenuBar makeMenus() |
| { |
| JMenu menu = new JMenu("File"); |
| JMenuItem mi; |
| |
| mi = new JMenuItem("Open File..."); |
| mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK))); |
| mi.addActionListener(this); |
| menu.add(mi); |
| |
| mi = new JMenuItem("Open URL..."); |
| mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK))); |
| mi.addActionListener(this); |
| menu.add(mi); |
| |
| mi = new JMenuItem("Quit"); |
| mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK))); |
| mi.addActionListener(this); |
| menu.add(mi); |
| |
| JMenuBar mbar = new JMenuBar(); |
| mbar.add(menu); |
| |
| menu = new JMenu("Detected Encodings"); |
| mbar.add(menu); |
| |
| return mbar; |
| } |
| } |