blob: 6811313282864c68e2191cbece964c8f97e4d0d6 [file] [log] [blame]
#/**
# *******************************************************************************
# * Copyright (C) 2002-2004, International Business Machines Corporation and *
# * others. All Rights Reserved. *
# *******************************************************************************
# */
@rem = '--*-Perl-*--
@echo off
if "%OS%" == "Windows_NT" goto WinNT
perl -W -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
goto endofperl
:WinNT
perl -W -x -S "%0" %*
if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
if %errorlevel% == 9009 echo You do not have Perl in your PATH.
goto endofperl
@rem ';
#!perl
#line 14
# This perl script updates the filters in the transliterator index file.
# It does so in a dumb way:
#
# Latin-X NFD lower
# X-Latin NFD
#
# For transliterators using NFKD, or not using Lower in this way, you
# will have to hand-edit the index file.
#
# This script writes a new index file. The new file has to then be
# hand-edited and checked before use; it contains comments indicating
# old lines that were replaced.
#
# Alan Liu 11/29/01
use Getopt::Long;
my $DIR = "../../text/resources";
my $CLASSES = "../../../../../classes";
#GetOptions('dir=s' => \$DIR,
# 'id=s' => \$ID,
# '<>' => \&usage) || die;
#usage() if (@ARGV);
#$ID =~ s/-/_/;
if (! -d $DIR) {
print STDERR "$DIR is not a directory\n";
usage();
}
#sub usage {
# my $me = $0;
# $me =~ s|.+[/\\]||;
# print "Usage: $me [-dir <dir>] [-id <id>]\n";
# print " --dir <dir> Specify the directory containing the\n";
# print " Transliterator_*.txt files\n";
# print " --id <id> Specify a single ID to transform, e.g.\n";
# print " Fullwidth-Halfwidth\n";
# die;
#}
convertIndex();
######################################################################
# Convert the index file from Java to C format
# Assume lines are of the form:
# <ID>:alias:<FILTER>;<REMAINDER>
# <REMAINDER> can be
# Lower;NFX;...
# NFX;Lower;...
# NFX;...
sub convertIndex {
$IN = "Transliterator_index.txt";
$OUT = "$IN.new";
open(IN, "$DIR/$IN") or die;
open(OUT, ">$DIR/$OUT") or die;
while (<IN>) {
# Look for lines that are aliases with NF*
if (/^([^:]+):alias:(\[.+?);\s*((NF[^\s]*?)\s*;.+)$/i) {
my $id = $1;
my $oldset = $2;
my $remainder = $3;
my $NFXD = $4;
my $lower = '';
# Check for Lower
# If it comes before NF* then adjust accordingly
if (/^([^:]+):alias:(\[.+?);\s*(Lower\s*;.+)$/i) {
$lower = 'lower';
if (length($2) < length($oldset)) {
$oldset = $2;
$remainder = $3;
}
}
print STDERR "$id $NFXD $lower\n";
my $set = getSourceSet($id, $NFXD, $lower);
$_ = "$id:alias:$set;$remainder\n";
}
print OUT;
}
close(IN);
close(OUT);
print STDERR "Wrote $DIR/$OUT\n";
}
######################################################################
# Get the source set (call out to Java), optionally with a closure.
sub getSourceSet {
my $ID = shift;
my $NFXD = shift;
my $lower = shift;
my $set = `java -classpath $CLASSES com.ibm.tools.translit.genIndexFilters $ID $NFXD $lower`;
chomp($set);
$set;
}
__END__
:endofperl