blob: c55e81173edc75d35903134fe4be872caee920ae [file] [log] [blame]
/*
**********************************************************************
* Copyright (c) 2001-2010, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/29/2001 aliu Creation.
* 06/26/2002 aliu Moved to com.ibm.icu.dev.tool.translit
**********************************************************************
*/
package com.ibm.icu.dev.tool.translit;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Enumeration;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UnicodeSet;
/**
* Class that generates source set information for a transliterator.
*
* To run, use:
*
* java com.ibm.icu.dev.tool.translit.SourceSet Latin-Katakana NFD lower
*
* Output is produced in the command console, and a file with more detail is also written.
*
* To see if it works, use:
*
* java com.ibm.icu.dev.test.translit.TransliteratorTest -v -nothrow TestIncrementalProgress
*
* and
*
* java com.ibm.icu.dev.demo.translit.Demo
*/
public class SourceSet {
public static void main(String[] args) throws IOException {
if (args.length == 0) {
// Compute and display the source sets for all system
// transliterators.
for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
String ID = (String) e.nextElement();
showSourceSet(ID, Normalizer.NONE, false);
}
} else {
// Usage: ID [NFKD | NFD] [lower]
Normalizer.Mode m = Normalizer.NONE;
boolean lowerFirst = false;
if (args.length >= 2) {
if (args[1].equalsIgnoreCase("NFD")) {
m = Normalizer.NFD;
} else if (args[1].equalsIgnoreCase("NFKD")) {
m = Normalizer.NFKD;
} else {
usage();
}
}
if (args.length >= 3) {
if (args[2].equalsIgnoreCase("lower")) {
lowerFirst = true;
} else {
usage();
}
}
if (args.length > 3) {
usage();
}
showSourceSet(args[0], m, lowerFirst);
}
}
static void showSourceSet(String ID, Normalizer.Mode m, boolean lowerFirst) throws IOException {
File f = new File("UnicodeSetClosure.txt");
String filename = f.getCanonicalFile().toString();
out = new PrintWriter(
new OutputStreamWriter(
new FileOutputStream(filename), "UTF-8"));
out.print('\uFEFF'); // BOM
System.out.println();
System.out.println("Writing " + filename);
Transliterator t = Transliterator.getInstance(ID);
showSourceSetAux(t, m, lowerFirst, true);
showSourceSetAux(t.getInverse(), m, lowerFirst, false);
out.close();
}
static PrintWriter out;
static void showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward) {
UnicodeSet sourceSet = t.getSourceSet();
if (m != Normalizer.NONE || lowerFirst) {
UnicodeSetClosure.close(sourceSet, m, lowerFirst);
}
System.out.println(t.getID() + ": " +
sourceSet.toPattern(true));
out.println("# MINIMAL FILTER GENERATED FOR: " + t.getID() + (forward ? "" : " REVERSE"));
out.println(":: "
+ (forward ? "" : "( ")
+ sourceSet.toPattern(true)
+ (forward ? "" : " )")
+ " ;");
out.println("# Unicode: " + sourceSet.toPattern(false));
out.println();
}
static void usage() {
System.err.println("Usage: ID [ NFD|NFKD [lower] ]");
System.exit(1);
}
}