blob: 27a03e9cf2c05ae630f19b3cc95b30361929fa8e [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 1998-2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* Created on Dec 3, 2003
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/layout/Attic/ArabicGSUBBuilder.java,v $
* $Date: 2003/12/17 04:54:40 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.tool.layout;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UTF16;
public class ArabicGSUBBuilder
{
static public String convertString(int type, int ligature, String decomp, ClassTable isolClassTable)
{
int leftType = ArabicShaping.VALUE_NONE;
int rightType = ArabicShaping.VALUE_NONE;
switch (type) {
case UCharacter.DecompositionType.ISOLATED:
break;
case UCharacter.DecompositionType.FINAL:
rightType = ArabicShaping.VALUE_LEFT;
break;
case UCharacter.DecompositionType.INITIAL:
leftType = ArabicShaping.VALUE_RIGHT;
break;
case UCharacter.DecompositionType.MEDIAL:
rightType = ArabicShaping.VALUE_LEFT;
leftType = ArabicShaping.VALUE_RIGHT;
break;
default:
return decomp + UCharacter.toString(ligature);
}
char[] chars = decomp.toCharArray();
ArabicShaping.shape(chars, leftType, rightType, isolClassTable);
return new String(chars) + UCharacter.toString(ligature);
}
static void buildContextualForms(CharacterData data, ClassTable initClassTable, ClassTable mediClassTable,
ClassTable finaClassTable, ClassTable isolClassTable)
{
System.out.print("Finding contextual forms... ");
for (int i = 0; i < data.countRecords(); i += 1) {
CharacterData.Record record = data.getRecord(i);
String decomposition = record.getDecomposition();
if (decomposition != null && decomposition.length() == 1) {
int contextual = record.getCodePoint();
int isolated = UTF16.charAt(record.getDecomposition(), 0);
switch (record.getDecompositionType()) {
case UCharacter.DecompositionType.INITIAL:
initClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.MEDIAL:
mediClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.FINAL:
finaClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.ISOLATED:
isolClassTable.addMapping(isolated, contextual);
break;
default:
// issue some error message?
break;
}
}
}
System.out.println("Done.");
}
static void buildLigatureTrees(CharacterData data, ClassTable isolClassTable,
LigatureTree contextualTree, LigatureTree cannonicalTree)
{
System.out.print("Building ligature trees... ");
for (int i = 0; i < data.countRecords(); i += 1) {
CharacterData.Record record = data.getRecord(i);
String decomposition = record.getDecomposition();
if (decomposition != null && decomposition.length() > 1) {
int ligature = record.getCodePoint();
int decompType = record.getDecompositionType();
switch (decompType) {
case UCharacter.DecompositionType.FINAL:
case UCharacter.DecompositionType.INITIAL:
case UCharacter.DecompositionType.MEDIAL:
case UCharacter.DecompositionType.ISOLATED:
contextualTree.insert(convertString(decompType, ligature, decomposition, isolClassTable));
break;
case UCharacter.DecompositionType.CANONICAL:
cannonicalTree.insert(decomposition + UCharacter.toString(ligature));
break;
}
}
}
System.out.println("Done.");
}
static final int SIMPLE_GLYPH = 1;
static final int LIGATURE_GLYPH = 2;
static final int MARK_GLYPH = 3;
static final int COMPONENT_GLYPH = 4;
static final int categoryClassMap[] = {
0, // UNASSIGNED
SIMPLE_GLYPH, // UPPERCASE_LETTER
SIMPLE_GLYPH, // LOWERCASE_LETTER
SIMPLE_GLYPH, // TITLECASE_LETTER
SIMPLE_GLYPH, // MODIFIER_LETTER
SIMPLE_GLYPH, // OTHER_LETTER
MARK_GLYPH, // NON_SPACING_MARK
MARK_GLYPH, // ENCLOSING_MARK ??
MARK_GLYPH, // COMBINING_SPACING_MARK ??
SIMPLE_GLYPH, // DECIMAL_NUMBER
SIMPLE_GLYPH, // LETTER_NUMBER
SIMPLE_GLYPH, // OTHER_NUMBER;
0, // SPACE_SEPARATOR
0, // LINE_SEPARATOR
0, // PARAGRAPH_SEPARATOR
0, // CONTROL
0, // FORMAT
0, // PRIVATE_USE
0, // SURROGATE
SIMPLE_GLYPH, // DASH_PUNCTUATION
SIMPLE_GLYPH, // START_PUNCTUATION
SIMPLE_GLYPH, // END_PUNCTUATION
SIMPLE_GLYPH, // CONNECTOR_PUNCTUATION
SIMPLE_GLYPH, // OTHER_PUNCTUATION
SIMPLE_GLYPH, // MATH_SYMBOL;
SIMPLE_GLYPH, // CURRENCY_SYMBOL
SIMPLE_GLYPH, // MODIFIER_SYMBOL
SIMPLE_GLYPH, // OTHER_SYMBOL
SIMPLE_GLYPH, // INITIAL_PUNCTUATION
SIMPLE_GLYPH // FINAL_PUNCTUATION
};
static int getGlyphClass(CharacterData.Record record)
{
String decomp = record.getDecomposition();
if (decomp != null && decomp.length() > 1) {
return LIGATURE_GLYPH;
}
return categoryClassMap[record.getGeneralCategory()];
}
static ClassTable buildGlyphClassTable(CharacterData data)
{
System.out.print("Building glyph class table... ");
ClassTable classTable = new ClassTable();
for (int i = 0; i < data.countRecords(); i += 1) {
CharacterData.Record record = data.getRecord(i);
classTable.addMapping(record.getCodePoint(), getGlyphClass(record));
}
System.out.println("Done.");
return classTable;
}
private static void buildArabicTables(String fileName) {
// TODO: Might want to have the ligature table builder explicitly check for ligatures
// which start with space and tatweel rather than pulling them out here...
UnicodeSet arabicBlock = new UnicodeSet("[[\\p{block=Arabic}] & [[:Cf:][:Po:][:So:][:Mn:][:Nd:][:Lm:]]]");
UnicodeSet oddLigatures = new UnicodeSet("[\\uFC5E-\\uFC63\\uFCF2-\\uFCF4\\uFE70-\\uFE7F]");
UnicodeSet arabicLetters = new UnicodeSet("[\\p{Arabic}]");
CharacterData arabicData = CharacterData.factory(arabicLetters.addAll(arabicBlock).removeAll(oddLigatures));
ClassTable classTable = buildGlyphClassTable(arabicData);
ClassTable initClassTable = new ClassTable();
ClassTable mediClassTable = new ClassTable();
ClassTable finaClassTable = new ClassTable();
ClassTable isolClassTable = new ClassTable();
buildContextualForms(arabicData, initClassTable, mediClassTable, finaClassTable, isolClassTable);
isolClassTable.snapshot();
LigatureTree ccmpTree = new LigatureTree();
LigatureTree ligaTree = new LigatureTree();
buildLigatureTrees(arabicData, isolClassTable, ligaTree, ccmpTree);
LigatureTreeWalker ccmpWalker = new LigatureTreeWalker();
LigatureTreeWalker ligaWalker = new LigatureTreeWalker();
ccmpTree.walk(ccmpWalker);
ligaTree.walk(ligaWalker);
LookupList lookupList = new LookupList();
FeatureList featureList = new FeatureList();
ScriptList scriptList = new ScriptList();
Lookup ccmpLookup, initLookup, mediLookup, finaLookup, ligaLookup;
int ccmpLookupIndex, initLookupIndex, mediLookupIndex, finaLookupIndex, ligaLookupIndex;
ccmpLookup = new Lookup(Lookup.GSST_Ligature, 0);
ccmpLookup.addSubtable(ccmpWalker);
initLookup = new Lookup(Lookup.GSST_Single, 0);
initLookup.addSubtable(initClassTable);
mediLookup = new Lookup(Lookup.GSST_Single, 0);
mediLookup.addSubtable(mediClassTable);
finaLookup = new Lookup(Lookup.GSST_Single, 0);
finaLookup.addSubtable(finaClassTable);
ligaLookup = new Lookup(Lookup.GSST_Ligature, Lookup.LF_IgnoreMarks);
ligaLookup.addSubtable(ligaWalker);
ccmpLookupIndex = lookupList.addLookup(ccmpLookup);
initLookupIndex = lookupList.addLookup(initLookup);
mediLookupIndex = lookupList.addLookup(mediLookup);
finaLookupIndex = lookupList.addLookup(finaLookup);
ligaLookupIndex = lookupList.addLookup(ligaLookup);
featureList.addLookup("ccmp", ccmpLookupIndex);
featureList.addLookup("init", initLookupIndex);
featureList.addLookup("medi", mediLookupIndex);
featureList.addLookup("fina", finaLookupIndex);
featureList.addLookup("liga", ligaLookupIndex);
featureList.finalizeFeatureList();
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("ccmp"));
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("init"));
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("medi"));
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("fina"));
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("liga"));
GSUBWriter gsubWriter = new GSUBWriter("Arabic", scriptList, featureList, lookupList);
GDEFWriter gdefWriter = new GDEFWriter("Arabic", classTable);
String[] includeFiles = {"LETypes.h", "ArabicShaping.h"};
LigatureModuleWriter writer = new LigatureModuleWriter();
writer.openFile(fileName);
writer.writeHeader(null, includeFiles);
writer.writeTable(gsubWriter);
writer.writeTable(gdefWriter);
writer.writeTrailer();
writer.closeFile();
}
private static void buildHebrewTables(String fileName)
{
UnicodeSet hebrewBlock = new UnicodeSet("[[\\p{block=Hebrew}] & [[:Cf:][:Po:][:So:][:Mn:][:Nd:][:Lm:]]]");
UnicodeSet oddLigatures = new UnicodeSet("[\\uFC5E-\\uFC63\\uFCF2-\\uFCF4\\uFE70-\\uFE7F]");
UnicodeSet hebrewLetters = new UnicodeSet("[\\p{Hebrew}]");
CharacterData hebrewData = CharacterData.factory(hebrewLetters.addAll(hebrewBlock).removeAll(oddLigatures));
ClassTable classTable = buildGlyphClassTable(hebrewData);
LigatureTree ligaTree = new LigatureTree();
buildLigatureTrees(hebrewData, null, null, ligaTree);
LigatureTreeWalker ligaWalker = new LigatureTreeWalker();
ligaTree.walk(ligaWalker);
LookupList lookupList = new LookupList();
FeatureList featureList = new FeatureList();
ScriptList scriptList = new ScriptList();
Lookup ligaLookup;
int ligaLookupIndex;
ligaLookup = new Lookup(Lookup.GSST_Ligature, 0);
ligaLookup.addSubtable(ligaWalker);
ligaLookupIndex = lookupList.addLookup(ligaLookup);
featureList.addLookup("liga", ligaLookupIndex);
featureList.finalizeFeatureList();
scriptList.addFeature("hebr", "(default)", featureList.getFeatureIndex("liga"));
GSUBWriter gsubWriter = new GSUBWriter("Hebrew", scriptList, featureList, lookupList);
GDEFWriter gdefWriter = new GDEFWriter("Hebrew", classTable);
String[] includeFiles = {"LETypes.h", "HebrewShaping.h"};
LigatureModuleWriter writer = new LigatureModuleWriter();
writer.openFile(fileName);
writer.writeHeader(null, includeFiles);
writer.writeTable(gsubWriter);
writer.writeTable(gdefWriter);
writer.writeTrailer();
writer.closeFile();
}
/*
* Conversion notes:
*
* Use a UnicodeSet of [\p{Arab}] to get all the Arabic letters. (Might want to
* subtract [\uFBE8\uFBE9] (Uighur Alef Maksura forms) and [\UFBF9-\UFBFB] Uighur
* ligatures which decompose to the same characters as the corresponding "normal"
* ones.)
*
* Use UCharacter.getType(ch) to get the general category. Values are defined in
* UCharacterCategory.
*
* Use (something like) [\p{DecompositonType=INITIAL}] to get initial, medial
* final (and isolated?) forms.
*
* Use the normalizer to decompose the characters: if the decomposition is
* a single letter, it's an initial, medial or final form, otherwise it's a
* ligature.
*
* Use ArabicShaping to convert the decomposed ligature back into shaped
* presentation forms. Need to add kashida's on the front and / or back to
* get it to generate the correct forms. (could add either a kashida or a
* non-joiner so that we never need to look at the first or last character)
*
* Can do contextual forms and ligatures in a single pass, since we have
* to actually normalize the character to figure out if it's a ligature or
* not. Also need the ligature-ness of the character to compute it's glyph
* class... might work to keep a class table which says contextual or ligature
* for each character. Build it while building contextual and ligature tables,
* then use it to generate the actual glyph class table... (this is backwards
* to how it's done now ;-)
*/
public static void main(String[] args)
{
buildArabicTables(args[0]);
buildHebrewTables(args[1]);
}
}