blob: 4d614ba7a8f20a8745647a2e9121d44e656af5f0 [file] [log] [blame]
#/**
# *******************************************************************************
# * Copyright (C) 2000-2004, International Business Machines Corporation and *
# * others. All Rights Reserved. *
# *******************************************************************************
# */
#!perl
# Usage - $0 <remap file>
# e.g. - indic indic.txt
# The input file should be a subset of the Unicode data file containing
# the blocks of interest.
#
# The remap file should have lines of the form
# "\u0D01>\u0D02;"
# including the quotes. These will be interpreted as saying that the
# undefined code point U+D01 (derived via mapping from InterIndic)
# can be remapped to U+D02.
#
# The purpose of this script is to process the Indic script data into
# a form usable by the IndicTransliterator, that is, the Indic-Indic
# transliterator. The transliterator needs two things: A mapping of
# the code points in common, and a list of the exceptions.
# Assume we are located in icu4j/src/com/ibm/tools/translit/.
# We want the Unicode DB in icu4j/src/data/unicode/.
$UNICODE_DB = "../../../../data/unicode/UnicodeData.txt";
$EXCEPTIONS_FILE = shift;
# Assume we are located in icu4j/src/com/ibm/tools/translit/.
# We want to output files to icu4j/src/com/ibm/text/resources/.
# Output directory
$OUTDIR = "../../text/resources";
# The template file should contain java code that can be used
# to generate RuleBasedTransliterator resource files. The template
# should contain the following embedded symbols, which this script
# will replace:
# $TOOL - name of generating tool
# $DATE - date of generation
# $SCRIPTFROM - name of source script
# $SCRIPTTO - name of target script
# $RULES - rules
$RBT_TEMPLATE = 'rbtTemplate.txt';
# Name of this tool in generated RBT files
$RBT_GEN_TOOL = 'icu4j/src/com/ibm/tools/translit/indic.pl';
$DUMP = 0; # If 1, dump out internal data
$DO_HEURISTIC_REMAP = 0; # If 1, do automatic heuristic remapping
$DO_DECOMP_REMAP = 0; # If 1, do decomp remapping
open(UNICODE_DB);
while (<UNICODE_DB>) {
next if (m|^0[0-8]|); # Skip up to Devanagari block (0900)
last if (m|^0D[8-F]|i); # Bail out after Malayam block (0D00)
# 0D39;MALAYALAM LETTER HA;Lo;0;L;;;;;N;;;;;
my @data = split(/;/);
my $fullCode = hex($data[0]); # e.g., 0x093F
my $code = $fullCode & 0x7F; # e.g., 0x3F
my ($script, $name) = ($data[1] =~ /(\w+)\s+(.+)/);
die "Can't parse $_" unless ($name);
# e.g., $code/$script/$name = 3F/MALAYALAM/VOWEL SIGN I
# Titlecase the script
$script = ucfirst(lc($script));
# Fix a couple inconsistencies in the 3.0 data
# REVISIT: Is this okay to do?
if ($DO_HEURISTIC_REMAP) {
if ($script eq 'Gujarati' && $code >= 5 && $code <= 0x14) {
$name =~ s/^VOWEL/LETTER/;
}
}
# Keep track of all script names we encounter. We also note the
# base of the block.
my $base = $fullCode & ~0x7F; # e.g., 0x900;
if (exists $SCRIPT_TO_BASE{$script}) {
die "Script base mismatch for $script: $base vs. $SCRIPT_TO_BASE{$script}"
if ($SCRIPT_TO_BASE{$script} ne $base);
} else {
$SCRIPT_TO_BASE{$script} = $base;
}
# Build up a mapping by name. For each name, keep a hash keyed by
# code point. For each code point, keep an array of script names.
# Also keep a total use count for each name.
push @{$NAME_CODE_TO_SCRIPTS{$name}{$code}}, $script;
++$NAME_CODE_TO_SCRIPTS{$name}{count};
# Build a map that looks like this:
# $SCRIPT_NAME_TO_CODE{<script>}{<name>} = <code>
# or undef if there is no mapping.
$SCRIPT_NAME_TO_CODE{$script}{$name} = $code;
# Build a map that looks like this:
$SCRIPT_CODE_TO_NAME{$script}{$code} = $name;
# And a map from the fullCode point to the name
$FULLCODE_TO_NAME{$fullCode} = $name;
# Map code (0..7F) to name. This is usually a 1-1 mapping, but
# is 1-n in a few cases.
if (exists $CODE_TO_NAME{$code}) {
if ($name ne $CODE_TO_NAME{$code}) {
# For multiple names on a code offset, use the format
# (a/b), (a/b/c), etc.
local $_ = $CODE_TO_NAME{$code};
if (m|^\(|) {
if (!m|[\(\)/]$name[\(\)/]|) {
s|\)$|/$name\)|;
}
} else {
$_ = "($_/$name)";
}
$CODE_TO_NAME{$code} = $_;
}
} else {
$CODE_TO_NAME{$code} = $name;
}
}
close(UNICODE_DB);
# Read and parse the manual remapping file. This contains lines
# of the form:
# |"\u0956>\u0948;" // AI Length Mark -> Devanagari Vowel Sign AI
# The left hand side contains a non-existent full code value. It
# should be a single value. The right hand side contains one or more
# real full code values. The idea is that when a mapping from another
# script ends up at the non-existent code point on the left, the
# sequence on the right should be substituted. In this example,
# Devanagari has no AI Length Mark. So, if transliterating from
# Oriya, then the character 0B56 (Oriya AI Length Mark) will remap to
# the non-existent 0956, and that remaps to 0948, our chosen
# Devanagari equivalent. For our purposes, the left hand side should
# be taken to mean its equivalent point in the InterIndic range. In
# this example, what it really says is E056>0948 in the
# InterIndic-Devanagari transliterator.
if ($EXCEPTIONS_FILE) {
open(EXCEPTIONS_FILE) or die;
while (<EXCEPTIONS_FILE>) {
if (m|^\s*\"([^\"]*?)\"|) {
my $line = $_;
$_ = $1;
if (/^(.*)>(.*);$/) {
my ($rawFrom, $rawTo) = ($1, $2);
my @from = parseUnicodeEscape($rawFrom);
my @to = parseUnicodeEscape($rawTo);
my $from = hexArray(@from);
# Some entries look like this:
# |"\u0955>\u0955;"
# these do nothing; ignore them.
if (intArraysEqual(\@from, \@to)) {
#print STDERR "Ignoring NOOP remap of $from\n";
} elsif (exists $EXCEPTIONS{$from}) {
print STDERR "ERROR in $EXCEPTIONS_FILE - Duplicate remap entries for $from\n";
} elsif (scalar @from > 1) {
print STDERR "ERROR in $EXCEPTIONS_FILE - Ignoring multichar remap: ", hexArray(@from), "->", hexArray(@to), "\n";
} else {
# Check this for validity. Full code on the left
# should NOT exist. Full code seq on the right should.
if (exists $FULLCODE_TO_NAME{$from[0]}) {
print STDERR "ERROR in $EXCEPTIONS_FILE - Invalid remap; left side defined: ", hexArray(@from), "->", hexArray(@to), "\n";
} elsif (grep(! exists $FULLCODE_TO_NAME{$_}, @to)) {
print STDERR "ERROR in $EXCEPTIONS_FILE - Invalid remap; right side undefined: ", hexArray(@from), "->", hexArray(@to), "\n";
} else {
$EXCEPTIONS{$from[0]} = \@to;
}
}
} else { die "ERROR in $EXCEPTIONS_FILE - Can't parse \"$_\" in line $line"; }
}
}
close(EXCEPTIONS_FILE);
print STDERR "$EXCEPTIONS_FILE: Loaded ", scalar keys %EXCEPTIONS, " remappings\n";
}
if ($DO_DECOMP_REMAP) {
# Read the NamesList.txt file. This contains decomposition data.
# Gather these into %DECOMP, which maps a name to n1.n2..., where n1
# etc. are decomposed names. E.g. $DECOMP{'LETTER RRA'} -> 'LETTER
# RA.SIGN NUKTA'. There may be different mappings in different script
# blocks (LETTER RRA is mapped differently in Devanagari and Bengali),
# in which case the name goes into %DECOMP_MISMATCH, and is removed
# from %DECOMP.
$NAMES = "NamesList.txt";
open(NAMES);
while (<NAMES>) {
# Skip to start of DEVANAGARI block
last if (/^\@\@\s+0900/);
}
while (<NAMES>) {
# Continue until start of SINHALA block
last if (/^\@\@\s+0D80/);
if (/^([0-9A-Z]{4})/i) {
$code = $1;
} elsif (/^\s+:\s*(.+)/) {
# We've found a mapping of the form:
# 0929 DEVANAGARI LETTER NNNA
# * for transcribing Dravidian alveolar n
# : 0928 093C
my $from = $FULLCODE_TO_NAME{hex($code)};
my @to = map($FULLCODE_TO_NAME{hex($_)}, split(/\s+/, $1));
if (exists $DECOMP{$from}) {
my $aref = $DECOMP{$from};
if (join(".", @$aref) ne join(".", @to)) {
print STDERR "ERROR: Decomp mismatch for $from\n";
print STDERR " : $from = ", join(".", @$aref), "\n";
print STDERR " : $from = ", join(".", @to), "\n";
$DECOMP_MISMATCH{$from} = 1;
}
} else {
$DECOMP{$from} = \@to;
}
}
}
close(NAMES);
# Remove mismatches
foreach (keys %DECOMP_MISMATCH) {
delete $DECOMP{$_};
}
if ($DUMP) {
foreach (keys %DECOMP) {
print "$_ = ", join(" + ", @{$DECOMP{$_}}), "\n";
}
}
}
# Count the total number of scripts
$SCRIPT_COUNT = scalar keys %SCRIPT_TO_BASE;
#print join("\n", sort keys %SCRIPT_TO_BASE), "\n";
# Dump out the %NAME_CODE_TO_SCRIPTS map.
if ($DUMP) {
print "\nBY NAME:\n";
foreach my $pass ((1, 2)) {
print "\nBY NAME - SINGLETONS:\n" if ($pass eq 2);
foreach my $name (sort keys %NAME_CODE_TO_SCRIPTS) {
if ($pass eq 1) {
next if (1 >= $NAME_CODE_TO_SCRIPTS{$name}{count});
} else {
next if (1 < $NAME_CODE_TO_SCRIPTS{$name}{count});
}
print "$name:";
my $href = $NAME_CODE_TO_SCRIPTS{$name};
foreach my $code (sort {$a <=> $b} keys %$href) {
next if ($code eq 'count');
my $aref = $href->{$code};
print " ", hex2($code), " (", formatScriptList($aref), ")";
}
print "\n";
}
}
}
# Create some transliterators, based on the scripts and the %NAME_CODE_TO_SCRIPTS
# map. Only use %NAME_CODE_TO_SCRIPTS entries with a count of 2 or more, that is,
# names that occur in two or more scripts. For those scripts where
# the names occur, map both up to the InterIndic range, and down to
# the target script.
$INTERINDIC = 0xE000;
$INTERINDIC_EXTRA = 0xE080;
$INTERINDIC_EXTRA_NEXT = $INTERINDIC_EXTRA;
# For each script, create a hash. The hash has a key for each
# code point, either within its block, or in the InterIndic block.
# the value of the key is the mapping.
# The script hashes are named %DEVANAGARI, etc., and referenced
# with symbolic refs.
@REMAP = ('s/\bSHORT\s+//i',
's/\bCANDRA\s+//i',
's/\bQA$/KA/i',
's/\bKHHA$/KHA/i',
's/\bGHHA$/GA/i',
's/\bZA$/JA/i',
's/\bFA$/PHA/i',
's/\bVA$/BA/i',
's/\bNNNA$/NA/i',
's/\bRRA$/RA/i',
's/\bLLLA$/LLA/i',
's/\bLLLA$/LA/i',
's/\bLLA$/LA/i',
's/^A(.) LENGTH MARK$/VOWEL SIGN A$1/i',
's/CANDRABINDU/BINDI/i',
's/BINDI/CANDRABINDU/i',
);
# Do this so we see zero counts:
foreach my $remap (@REMAP) { $REMAP{$remap} = 0; }
# This loop iterates over the names in the NAME_CODE_TO_SCRIPTS hash.
# These names are things like "LETTER NNNA". For each name, it then
# creates script mappings up to the InterIndic area, and back down
# to the script areas. If a name maps to more than one offset,
# then it uses the InterIndic extra range. Either way, it picks
# a single InterIndic point, either an offset point or something in
# the extra range, and maps up and down from that point.
foreach my $name (sort keys %NAME_CODE_TO_SCRIPTS) {
next if (1 >= $NAME_CODE_TO_SCRIPTS{$name}{count});
my $href = $NAME_CODE_TO_SCRIPTS{$name};
# Count the number of different codes assigned to this name.
# Usually 1, but 2 for a handful of names.
my $codeCount = (keys %{$NAME_CODE_TO_SCRIPTS{$name}}) - 1; # less 1: {count}
# If $codeCount is 1, then map directly up to the $INTERINDIC
# base. If $codeCount is 2, then map into unused spots starting
# at $INTERINDIC_EXTRA.
my $interIndicCode;
if ($codeCount > 1) {
# Map into the InterIndic extra range
$interIndicCode = $INTERINDIC_EXTRA_NEXT++;
}
my %seen;
foreach my $code (sort {$a ne 'count' && $b ne 'count' && $a <=> $b} keys %$href) {
next if ($code eq 'count');
my $aref = $href->{$code}; # Ref to array of scripts
if ($codeCount == 1) {
# Map directly
$interIndicCode = $INTERINDIC + $code;
}
# Keep track of the names of the extra InterIndic points
$INTERINDIC_NAME_TO_FULLCODE{$name} = $interIndicCode;
foreach my $scr (@$aref) {
$seen{$scr} = 1;
my $fullCode = $SCRIPT_TO_BASE{$scr} + $code;
$ {$scr}{$fullCode} = hex4($interIndicCode) . "; // $name";
$ {$scr}{$interIndicCode} = hex4($fullCode) . "; // $name";
}
}
# Now handle InterIndic->Script unmapped points. For each name,
# some of the scripts will be left out -- will have no mappings
# to that name. For these scripts, we can either leave them
# unmapped (so the InterIndic->Local mapping is empty), or
# try to remap.
unmappedScript:
foreach my $scr (keys %SCRIPT_TO_BASE) {
next if ($seen{$scr});
if ($DO_HEURISTIC_REMAP) {
# Try to remap through the known equivalences in our
# remapping table
foreach my $remapRE (@REMAP) {
local $_ = $name;
if (eval($remapRE)) {
if (exists $SCRIPT_NAME_TO_CODE{$scr}{$_}) {
$ {$scr}{$interIndicCode} =
hex4($SCRIPT_TO_BASE{$scr} + $SCRIPT_NAME_TO_CODE{$scr}{$_}) .
"; // REMAP: $name -> $_";
++$REMAP{$remapRE};
next unmappedScript;
}
}
}
}
# Try to remap through the file. This contains remappings of
# the form 0991->0993. That is, it contains local remappings
# that we can substitute and try again with.
#|GURMUKHI-InterIndic ------------------------------
#|// 0A02>; // UNMAPPED INTERNAL: SIGN BINDI
#|InterIndic-GURMUKHI ------------------------------
#|// E001>; // UNMAPPED EXTERNAL: SIGN CANDRABINDU
#|"\u0A01>\u0A02;"
# In this example, the remapping tells us that the non-existent
# character A01 should be considered equivalent to the real
# character A02.
# We implement this by adding two mappings; one from
# the InterIndic equivalent of A01, that is, E001, to A02,
# and one from A02, which otherwise has no mapping, to E001.
if ($EXCEPTIONS_FILE && $interIndicCode < $INTERINDIC_EXTRA) {
# Try to map this InterIndic character back to a the spot
# it would occupy in this script if it had a mapping.
my $code = $interIndicCode & 0x7F;
my $pseudoFullCode = $SCRIPT_TO_BASE{$scr} + $code;
if (exists $EXCEPTIONS{$pseudoFullCode}) {
my $fullCodeArray = $EXCEPTIONS{$pseudoFullCode};
my $comment;
foreach my $c (@$fullCodeArray) {
$comment .= "." if ($comment);
$comment .= $FULLCODE_TO_NAME{$c};
}
$comment = "; // REMAP ($EXCEPTIONS_FILE): " .
hex4($pseudoFullCode) . ">" . hexArray(@$fullCodeArray) . " = " .
$CODE_TO_NAME{$code} . ">" . $comment;
$ {$scr}{$interIndicCode} = hexArray(@$fullCodeArray) . $comment;
if (scalar @$fullCodeArray == 1) {
if (exists $ {$scr}{$fullCodeArray->[0]}) {
# There's already a proper mapping; no need to fill
# in reverse
} else {
$ {$scr}{$fullCodeArray->[0]} = hex4($interIndicCode) . $comment;
}
}
next unmappedScript;
}
}
$SCRIPT_FULLCODE_TO_IS_UNMAPPED{$scr}{$interIndicCode} = 1;
local $_ = "; // UNMAPPED InterIndic-$scr: $name";
if (exists $SCRIPT_CODE_TO_NAME{$scr}{$interIndicCode & 0x7F}) {
my $fullCode = $SCRIPT_TO_BASE{$scr} + ($interIndicCode & 0x7F);
$_ .= " (" . hex4($fullCode) . " = " . $FULLCODE_TO_NAME{$fullCode} . ")";
}
$ {$scr}{$interIndicCode} = $_;
}
}
# Add in unmapped entries for each script
foreach my $scr (keys %SCRIPT_TO_BASE) {
my $base = $SCRIPT_TO_BASE{$scr};
unmappedInt:
foreach my $code (keys %{$SCRIPT_CODE_TO_NAME{$scr}}) {
my $fullCode = $code + $base;
next if (exists $ {$scr}{$fullCode});
my $name = $SCRIPT_CODE_TO_NAME{$scr}{$code};
if ($DO_HEURISTIC_REMAP) {
foreach my $remapRE (@REMAP) {
local $_ = $name;
if (eval($remapRE)) {
if (exists $INTERINDIC_NAME_TO_FULLCODE{$_}) {
$ {$scr}{$fullCode} =
hex4($INTERINDIC_NAME_TO_FULLCODE{$_}) .
"; // REMAP: $name -> $_";
++$REMAP{$remapRE};
next unmappedInt;
}
}
}
}
# Now try the decomp table
if ($DO_DECOMP_REMAP && exists $DECOMP{$name}) {
my $x;
my $cmt = "; // DECOMP: $name -> ";
foreach my $n (@{$DECOMP{$name}}) {
if (exists $SCRIPT_NAME_TO_CODE{$scr}{$n}) {
$x .= hex4($SCRIPT_TO_BASE{$scr} + $SCRIPT_NAME_TO_CODE{$scr}{$n});
$cmt .= $n . " + ";
} else {
$cmt = 0;
last;
}
}
if ($cmt) {
$ {$scr}{$fullCode} = $x . $cmt;
next unmappedInt;
}
}
$SCRIPT_FULLCODE_TO_IS_UNMAPPED{$scr}{$fullCode} = 1;
$ {$scr}{$fullCode} = "; // UNMAPPED $scr-InterIndic: $name";
}
}
# GUR
# E00B>; // UNMAPPED EXTERNAL: LETTER VOCALIC R "\u0A0B>\u0A30\u0A3F;"
# E00C>; // UNMAPPED EXTERNAL: LETTER VOCALIC L "\u0A0C>\u0A07;"
# E00D>; // UNMAPPED EXTERNAL: LETTER CANDRA E "\u0A0D>\u0A10;"
# E011>; // UNMAPPED EXTERNAL: LETTER CANDRA O "\u0A11>\u0A14;"
# E037>; // UNMAPPED EXTERNAL: LETTER SSA "\u0A37>\u0A36;"
# E045>; // UNMAPPED EXTERNAL: VOWEL SIGN CANDRA E "\u0A45>\u0A48;"
# E049>; // UNMAPPED EXTERNAL: VOWEL SIGN CANDRA O "\u0A49>\u0A4C;"
# Fix QA too
# Dump out script maps
foreach my $scr (sort keys %SCRIPT_TO_BASE) {
## next unless ($scr eq 'TELUGU'); # Debugging
my @rules;
my $flag = 1;
foreach my $fullCode (sort {$a <=> $b} keys %{$scr}) {
if ($flag && $fullCode >= $INTERINDIC) {
# We have the complete <scr>-InterIndic rules; dump
# them out.
generateRBT($scr, "InterIndic", \@rules, $OUTDIR);
@rules = ();
$flag = 0;
}
if (exists $SCRIPT_FULLCODE_TO_IS_UNMAPPED{$scr}{$fullCode}) {
push @rules, "// " . hex4($fullCode) . ">" . $ {$scr}{$fullCode};
} else {
push @rules, hex4($fullCode) . ">" . $ {$scr}{$fullCode};
}
}
# Now generate the InterIndic-<scr> rules.
generateRBT("InterIndic", $scr, \@rules, $OUTDIR);
# print "$scr-InterIndic ------------------------------\n";
# my $flag = 1;
# foreach my $fullCode (sort {$a <=> $b} keys %{$scr}) {
# if ($flag && $fullCode >= $INTERINDIC) {
# print "InterIndic-$scr ------------------------------\n";
# $flag = 0;
# }
# if (exists $SCRIPT_FULLCODE_TO_IS_UNMAPPED{$scr}{$fullCode}) {
# print "// ", hex4($fullCode), ">", $ {$scr}{$fullCode}, "\n";
# } else {
# print hex4($fullCode), ">", $ {$scr}{$fullCode}, "\n";
# }
# }
}
# List successful remappings
if ($DO_HEURISTIC_REMAP) {
foreach my $remap (sort keys %REMAP) {
print STDERR "REMAP ", $REMAP{$remap}, " x $remap\n";
}
}
#----------------------------------------------------------------------
# SUBROUTINES
# Return a listing of an array of scripts
# Param: array ref
sub formatScriptList {
my $aref = shift;
if ($SCRIPT_COUNT == @$aref) {
return "all";
} elsif (($SCRIPT_COUNT - 3) <= @$aref) {
my $s = "all but";
my %temp;
foreach (@$aref) { $temp{$_} = 1; }
foreach (sort keys %SCRIPT_TO_BASE) {
$s .= " $_" unless exists $temp{$_};
}
return $s;
} else {
return join(" ", @$aref);
}
}
# Format as %02X hex
sub hex2 {
sprintf("%02X", $_[0]);
}
# Format as %04X hex
sub hex4 {
sprintf("\\u%04X", $_[0]);
}
# Format an array as %04X hex, delimited by "."s
sub hexArray {
join("", map { hex4($_); } @_);
}
# Parse a string of the form "\u0D01" to an array of integers.
# Must ONLY contain escapes.
# Return the array.
sub parseUnicodeEscape {
local $_ = shift;
my $orig = $_;
my @result;
while (length($_)) {
if (/^\\u([0-9a-f]{4})(.*)/i) {
push @result, hex($1);
$_ = $2;
} else {
die "Can't parse Unicode escape $orig\n";
}
}
if (0 == @result) {
die "Can't parse Unicode escape $orig\n";
}
@result;
}
# Return 1 if the two arrays of ints are equal.
# Param: ref to array of ints
# Param: ref to array of ints
sub intArraysEqual {
my $a = shift;
my $b = shift;
if (scalar @$a == scalar @$b) {
for (my $i=0; $i<@$a; ++$i) {
if ($a->[$i] != $b->[$i]) {
return 0;
}
}
return 1;
}
return 0;
}
# Given a rule, possibly with trailing // comment,
# quote the rule part and add a trailing "+" after
# it.
sub quoteRule {
my $cmt;
$cmt = $1 if (s|(\s*//.*)||); # isolate trailing // comment
s/^(.*;)/\"$1\"+/;
s/$/$cmt/;
$_;
}
# Given the name of the source script, name of the target script,
# and array of rule strings, return a string containing the source
# for a RuleBasedTransliterator file.
# Param: source script name
# Param: target script name
# Param: ref to array of rules. These rules are unquoted, without
# concatenators between them, but do have trailing ';' separators.
# Param: name of output directory
sub generateRBT {
# $TOOL - name of generating tool
# $DATE - date of generation
# $SCRIPTFROM - name of source script
# $SCRIPTTO - name of target script
# $RULES - rules
my ($source, $target, $rules, $outdir) = @_;
my $text;
$outdir =~ s|[/\\]$||; # Delete trailing / or \
my $OUT = "$outdir/TransliterationRule_${source}_$target.java";
open(RBT_TEMPLATE) or die;
open(OUT, ">$OUT") or die;
while (<RBT_TEMPLATE>) {
while (/\$([A-Za-z0-9]+)/) {
my $tag = $1;
my $sub;
if ($tag eq 'TOOL') {
$sub = $RBT_GEN_TOOL;
} elsif ($tag eq 'DATE') {
$sub = localtime;
} elsif ($tag eq 'SCRIPTFROM') {
$sub = $source;
} elsif ($tag eq 'SCRIPTTO') {
$sub = $target;
} elsif ($tag eq 'RULES') {
# Get any whitespace-only indent off the front of this tag
my $indent;
$indent = $1 if (/^(\s+)\$$tag/);
# The rules in the array are not quoted. We need to quote
# them and add '+' operators between them. We do NOT need
# to add ';' separators. We DO need to separate trailing
# // comments and handle them.
$sub = join("\n$indent", map(&quoteRule, @$rules)) .
"\n$indent\"\"";
} else {
print STDERR "ERROR in $RBT_TEMPLATE: Unknown tag $tag\n";
$sub = "[ERROR:Unknown tag \$$tag]";
}
s/\$$tag/$sub/;
}
print OUT;
}
close(OUT);
close(RBT_TEMPLATE);
print STDERR "Written: $OUT\n";
}
__END__