| #!/usr/bin/perl |
| #* |
| #******************************************************************************* |
| #* Copyright (C) 2006, International Business Machines |
| #* Corporation and others. All Rights Reserved. |
| #******************************************************************************* |
| #* |
| #* file name: genspva.pl |
| #* encoding: US-ASCII |
| #* tab size: 8 (not used) |
| #* indentation:4 |
| #* |
| #* Created by: Ram Viswanadha |
| #* |
| #* This file filters iso15924-utf8-<date>.txt |
| #* |
| |
| use File::Find; |
| use File::Basename; |
| use IO::File; |
| use Cwd; |
| use File::Copy; |
| use Getopt::Long; |
| use File::Path; |
| use File::Copy; |
| |
| #run the program |
| main(); |
| |
| #--------------------------------------------------------------------- |
| # The main program |
| |
| sub main(){ |
| GetOptions( |
| "--destdir=s" => \$destdir, |
| "--iso15924=s" => \$iso, |
| "--prop=s" => \$prop, |
| "--code-start=s" => \$code, |
| ); |
| usage() unless defined $destdir; |
| usage() unless defined $iso; |
| usage() unless defined $prop; |
| |
| $outfile = "$destdir/SyntheticPropertyValueAliases.txt"; |
| $propFH = IO::File->new($prop,"r") |
| or die "could not open the file $prop for reading: $! \n"; |
| $isoFH = IO::File->new($iso,"r") |
| or die "could not open the file $iso for reading: $! \n"; |
| $outFH = IO::File->new($outfile,"w") |
| or die "could not open the file $outfile for reading: $! \n"; |
| my @propLines; |
| while (<$propFH>) { |
| next if(!($_ =~/sc ; /)); |
| push(@propLines, $_); |
| } |
| printHeader($outFH); |
| if(defined $code){ |
| print "Please add the following to UScriptCode enum in uscript.h.\n"; |
| print "#ifndef U_HIDE_DRAFT_API\n"; |
| } |
| while (<$isoFH>) { |
| next if($_=~/^#/);#skip if the line starts with a comment char |
| ($script, $t, $name, $rest) = split(/;/,$_,4); |
| #sc ; Arab |
| $outstr = "sc ; $script"; |
| $encoded = 0; #false |
| |
| # seach the propLines to make sure that this scipt code is not |
| # encoded in Unicode |
| foreach $key (@propLines){ |
| if($key =~ /$outstr/){ |
| $encoded = 1; |
| } |
| } |
| next if($encoded == 1); |
| #ignore private use codes |
| next if($script =~ /Qa[ab][a-z]/); |
| |
| #if($script eq "Qaaa"){ |
| # $outstr = $outstr." ; Private_Use_Start\n"; |
| #}elsif($script eq "Qabx"){ |
| # $outstr = $outstr." ; Private_Use_End\n"; |
| #}else{ |
| # $outstr = $outstr." ; $script \n"; |
| #} |
| |
| $outstr = $outstr." ; $script \n"; |
| print $outFH $outstr; |
| |
| #print to console |
| if(defined $code){ |
| if($name =~ /[(\s,\x80-\xFF]/){ |
| $name = $script; |
| } |
| $name =~s/-/_/g; |
| |
| $scriptcode = "USCRIPT_".uc($name); |
| print " $scriptcode = $code, /* $script */\n"; |
| $code++; |
| } |
| |
| } |
| if(defined $code){ |
| print "#endif /* U_HIDE_DRAFT_API */\n"; |
| } |
| for($i=0; $i<2; $i++){ |
| |
| } |
| close($isoFH); |
| close($propFH); |
| close($outFH); |
| } |
| #----------------------------------------------------------------------- |
| sub printHeader{ |
| ($outFH) = @_; |
| ($DAY, $MONTH, $YEAR) = (localtime)[3,4,5]; |
| $YEAR += 1900; |
| #We will print our copyright here + warnings |
| print $outFH <<END_HEADER_COMMENT; |
| ######################################################################## |
| # Copyright (c) 2006-$YEAR, International Business Machines |
| # Corporation and others. All Rights Reserved. |
| ######################################################################## |
| # file name: SyntheticPropertyValueAliases.txt |
| # encoding: US-ASCII |
| # tab size: 8 (not used) |
| # indentation: 4 |
| # created by: gensvpa.pl |
| ######################################################################## |
| |
| # This file follows the format of PropertyValueAliases.txt |
| # It contains synthetic property value aliases not present |
| # in the UCD. Unlike PropertyValueAliases.txt, it should |
| # NOT contain a version number. |
| |
| ######################################################################## |
| # THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW |
| # WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! |
| ######################################################################## |
| |
| # set the same names as short and long names to fit the syntax without |
| # inventing names that we would have to support forever |
| |
| # Script (sc) |
| |
| END_HEADER_COMMENT |
| } |
| #----------------------------------------------------------------------- |
| sub usage { |
| print << "END"; |
| Usage: |
| gensvpa.pl |
| Options: |
| --destdir=<directory> |
| --iso15924=<file name> |
| --prop=<PropertyValueAliases.txt> |
| --code-start=s |
| e.g.: gensvpa.pl --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60 |
| END |
| exit(0); |
| } |