src/com/ibm/icu/dev/tool/translit/dumpICUrules.bat - external/github.com/unicode-org/icu - Git at Google

 @rem = '--*-Perl-*--
 @echo off
 if "%OS%" == "Windows_NT" goto WinNT
 perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
 goto endofperl
 :WinNT
 perl -x -S "%0" %*
 if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
 if %errorlevel% == 9009 echo You do not have Perl in your PATH.
 goto endofperl
 @rem ';
 #!perl
 #line 14

 # This perl script creates ICU transliterator data files, that live
 # in icu/data, from ICU4J UTF8 transliterator data files, in
 # icu4j/src/com/ibm/text/resources.
 #
 # The transformation that is done is very minimal.  The script assumes
 # that the input files use only # comments
 # and that they follow a rigid format.
 #
 # The output files are named according to ICU conventions (see NAME_MAP
 # below) and created in the current directory.	They should be manually
 # checked and then copied into the icu/data directory.  An ICU build must
 # then be initiated, and the standard suite of ICU transliterator tests
 # should be run after that.
 #
 # Alan Liu 5/19/00 2/27/01

 use Getopt::Long;

 my $DIR = "../../../impl/data";
 my $ID = '';

 GetOptions('dir=s' => \$DIR,
            'id=s' => \$ID,
            '<>' => \&usage) || die;

 usage() if (@ARGV);

 $ID =~ s/-/_/;
 if (! -d $DIR) {
     print STDERR "$DIR is not a directory\n";
     usage();
 }

 sub usage {
     my $me = $0;
     $me =~ s|.+[/\\]||;
     print "Usage: $me [-dir <dir>] [-id <id>]\n";
     print " --dir <dir> Specify the directory containing the\n";
     print " 	Transliterator_*.txt files\n";
     print " --id <id>	Specify a single ID to transform, e.g.\n";
     print " 	Fullwidth-Halfwidth\n";
     die;
 }

 $JAVA_ONLY = '-';

 $OUTDIR = "icu4c";
 mkdir($OUTDIR,0777);

 # Mapping from Java file names to ICU file names
 %NAME_MAP = (
 	 # An ICU name of "" means the ICU name == the ID

 	 "Any_Accents" => "",
 	 "Any_Publishing" => "",
 	 "Bengali_InterIndic" => "Beng_InterIndic",
 	 "Cyrillic_Latin" => "Cyrl_Latn",
 	 "Devanagari_InterIndic" => "Deva_InterIndic",
 	 "Fullwidth_Halfwidth" => "FWidth_HWidth",
 	 "Greek_Latin" => "Grek_Latn",
 	 "Gujarati_InterIndic" => "Gujr_InterIndic",
 	 "Gurmukhi_InterIndic" => "Guru_InterIndic",
 	 "Hiragana_Katakana" => "Hira_Kana",
 	 "Hiragana_Latin" => "Hira_Latn",
 	 "InterIndic_Bengali" => "InterIndic_Beng",
 	 "InterIndic_Devanagari" => "InterIndic_Deva",
 	 "InterIndic_Gujarati" => "InterIndic_Gujr",
 	 "InterIndic_Gurmukhi" => "InterIndic_Guru",
 	 "InterIndic_Kannada" => "InterIndic_Knda",
 	 "InterIndic_Latin" => "InterIndic_Latn",
 	 "InterIndic_Malayalam" => "InterIndic_Mlym",
 	 "InterIndic_Oriya" => "InterIndic_Orya",
 	 "InterIndic_Tamil" => "InterIndic_Taml",
 	 "InterIndic_Telugu" => "InterIndic_Telu",
 	 "Kannada_InterIndic" => "Knda_InterIndic",
 	 "Latin_InterIndic" => "Latn_InterIndic",
 	 "Latin_Jamo" => "Latn_Jamo",
 	 "Latin_Katakana" => "Latn_Kana",
 	 "Malayalam_InterIndic" => "Mlym_InterIndic",
 	 "Oriya_InterIndic" => "Orya_InterIndic",
 	 "Tamil_InterIndic" => "Taml_InterIndic",
 	 "Telugu_InterIndic" => "Telu_InterIndic",

 	 "Han_Pinyin" => $JAVA_ONLY,
 	 "Kanji_English" => $JAVA_ONLY,
 	 "Kanji_OnRomaji" => $JAVA_ONLY,
 	 );

 # Header blocks of text written at start of ICU output files
 $HEADER1 = <<END;
 //--------------------------------------------------------------------
 // Copyright (c) 1999-2004, International Business Machines
 // Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
 // THIS IS A MACHINE-GENERATED FILE
 END
 $HEADER2 = <<END;
 //--------------------------------------------------------------------
 END

 $TOOL = $0;

 # Iterate over all Java RBT rule files
 foreach (<$DIR/Transliterator_*.txt>) {
     next if (/~$/);
     next if (/_index\.txt$/);
     next if ($ID && !/$ID/);
     my ($out, $id) = convertFileName($_);
     if ($out) {
         if ($out eq $JAVA_ONLY) {
 	print STDERR "$id: Java only\n";
 	next;
         }
         file($id, $_, $out);
     }
 }

 convertIndex();

 ######################################################################
 # Convert a Java file name to C
 # Param: Java file name of the form m|Transliterator_(.+)\.utf8\.txt$|
 # Return: A C file name (e.g., ldevan.txt) or the empty string,
 #  if there is no mapping, or $JAVA_ONLY if the given file isn't
 #  intended to be incorporated into C.
 sub convertFileName {
     local $_ = shift;
     my $id;
     if (m|Transliterator_(.+)\.utf8\.txt$| ||
         m|Transliterator_(.+)\.txt$|) {
         $id = $1;
     } else { die "Can't parse Java file name $_"; }
     if (!exists $NAME_MAP{$id}) {
         print STDERR "ERROR: $id not in map; please update $0\n";
         return '';
     }
     my $out = $NAME_MAP{$id};
     if ($out eq '') {
         $out = $id;
     }
     if ($out ne $JAVA_ONLY) {
         $out = 't_' . $out;
     }
     return ($out, $id);
 }

 ######################################################################
 # Convert the index file from Java to C format
 sub convertIndex {
     $JAVA_INDEX = "Transliterator_index.txt";
     $C_INDEX = "translit_index.txt";
     open(JAVA_INDEX, "$DIR/$JAVA_INDEX") or die;
     open(C_INDEX, ">$OUTDIR/$C_INDEX") or die;

     header(\*C_INDEX, $JAVA_INDEX);

     print C_INDEX <<END;
 //--------------------------------------------------------------------
 // N.B.: This file has been generated mechanically from the
 // corresponding ICU4J file, which is the master file that receives
 // primary updates.  The colon-delimited fields have been split into
 // separate strings.  For 'file' and 'internal' lines, the encoding
 // field has been deleted, since the encoding is processed at build
 // time in ICU4C.  Certain large rule sets not intended for general
 // use have been commented out with the notation "Java only".
 //--------------------------------------------------------------------

 translit_index {
   RuleBasedTransliteratorIDs {
 END

     while (<JAVA_INDEX>) {
         # ignore $Source $Revision $Date CVS keyword substitutions
         next if /\$Source/ ;
         next if /\$Revision/ ;
         next if /\$Date/ ;

         # we have printed out the copyright info ... ignore one in Java version
         next if /Copyright/ ;
         next if /Corporation/;

         # Comments; change # to //
         if (s|^(\s*)\#|$1//|) {
 	print C_INDEX;
 	next;
         }
         # Blank lines
         if (!/\S/) {
 	print C_INDEX;
 	next;
         }
         # Content lines
         chomp;
         my $prefix = '';
         #replace \p with \\p
         $_=~ s/\\p/\\\\p/g;
         my @a = split(':', $_);
         if ($a[1] eq 'file' || $a[1] eq 'internal') {
 	# Convert the file name
 	my $id;
 	($a[2], $id) = convertFileName($a[2]);
 	if ($a[2] eq $JAVA_ONLY) {
 	    $prefix = '// Java only: ';
 	}

 	# Delete the encoding field
 	splice(@a, 3, 1);
         } elsif ($a[1] eq 'alias') {
 	# Pad out with extra blank fields to make the
 	# 2-d array square
 	push @a, "";
         } else {
 	die "Can't parse $_";
         }
         print C_INDEX
 	$prefix, "{ ",
 	join(", ", map("\"$_\"", @a)),
 	" },\n";
     }

     print C_INDEX <<END;
   }
 }
 END

     close(C_INDEX);
     close(JAVA_INDEX);
     print STDERR "$JAVA_INDEX -> $C_INDEX\n";
 }

 ######################################################################
 # Output a header
 # Param: Filehandle
 sub header {
     my $out = shift;
     my $in = shift;
     print $out $HEADER1;
     print $out "// Tool: $TOOL\n// Source: $in\n";
     print $out "// Date: ", scalar localtime, "\n";
     print $out $HEADER2;
     print $out "\n";
 }

 ######################################################################
 # Process one file
 # Param: ID, e.g. Fullwidth-Halfwidth
 # Param: Java input file name, e.g.
 #  f:/icu4j/src/com/ibm/text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
 # Param: ICU output file name, e.g. fullhalf
 sub file {
     my $id = shift;
     my $IN = shift;
     my $out = shift;

     my $OUT = "$out.txt";

     # Show input size. Show output size later -- useful for quick sanity check.
     print "$id (", -s $IN, ") -> $OUT (";

     # Open file, write UTF8 marker, close it, and reopen in text mode
     open(OUT, ">$OUTDIR/$OUT") or die;
     binmode OUT; # Must do this so we can write our UTF8 marker
     print OUT pack("C3", 0xEF, 0xBB, 0xBF); # Write UTF8 marker
     close(OUT);

     open(OUT, ">>$OUTDIR/$OUT") or die;
     print OUT " // -*- Coding: utf-8; -*-\n";

     header(\*OUT, $IN);
     print OUT "// $id\n";
     print OUT "\n";
     print OUT "$out {\n";
     print OUT "  Rule {\n";

     open(IN, $IN) or die;
     binmode IN; # IN is a UTF8 file

     my $first = 1;
     my $BOM = pack("C3", 239, 187, 191); # a UTF8 byte order mark

     # Process each line by changing # comments to // comments
     # and taking other text and enclosing it in double quotes
     while (<IN>) {
         my $raw = $_;
 		# ignore $Source $Revision $Date CVS keyword substitutions
 		next if /\$Source/ ;
 		next if /\$Revision/ ;
 		next if /\$Date/ ;

 		# we have printed out the copyright info ... ignore one in Java version
 		next if /Copyright/ ;
 		next if /Corporation/;

         # Look for and delete BOM
         if ($first) {
 	s/^$BOM//;
 	$first = 0;
         }

         # Clean the eol junk up
         s/[\x0D\x0A]+$//;

         # If there is a trailing backslash, then delete it -- we don't
         # need line continuation in C, since adjacent strings are
         # concatenated.  Count trailing backslashes; if they are odd,
         # one is trailing.
         if (m|(\\+)$|) {
 	if ((length($1) % 2) == 1) {
 	    s|\\$||;
 	}
         }

         # Transform escaped characters
         hideEscapes();

         if (/^(\s*)(\#.*)$/) {
 	# Comment-only line
 	my ($white, $cmt) = ($1, $2);
 	$cmt =~ s|\#|//|;
 	$_ = $white . $cmt;

         } elsif (!/\S/) {
 	# Blank line -- leave as-is

         } else {
 	# Remove single-quoted matter
 	my @quotes;
 	my $nquotes = 0;
 	my $x = $_;
 	while (s/^([^\']*)(\'[^\']*\')/$1<<x$nquotes>>/) {
 	    push @quotes, $2;
 	    ++$nquotes;
 	}

 	# Extract comment
 	my $cmt = '';
 	if (s|\#(.*)||) {
 	    $cmt = '//' . $1;
 	}

 	# Add quotes
 	s|^(\s*)(\S.*?)(\s*)$|$1\"$2\"$3|;

 	# Restore single-quoted matter
 	for (my $i=0; $i<$nquotes; ++$i) {
 	    s|<<x$i>>|$quotes[$i]|;
 	}

 	# Restore comment
 	$_ .= $cmt;
         }

         # Restore escaped characters
         restoreEscapes();

         print OUT $_, "\n";
     }

     # Finish up
     close(IN);
     print OUT "  }\n";
     print OUT "}\n";
     close(OUT);

     # Write output file size for sanity check
     print -s "$OUTDIR/$OUT", ")\n";
 }

 ######################################################################
 sub hideEscapes {
     # Transform escaped characters
     s|\\\\|<<bs>>|g; # DO THIS FIRST Transform backslashes
     s|\\u([a-zA-Z0-9]{4})|<<u$1>>|g; # Transform Unicode escapes
     s|\\\"|<<dq>>|g; # Transform backslash double quote
     s|\\\'|<<sq>>|g; # Transform backslash single quote
     s|\\\#|<<lb>>|g; # Transform backslash pound
     s|\\(.)|<<q$1>>|g; # Transform backslash escapes
 }

 ######################################################################
 sub restoreEscapes {
     # Restore escaped characters
     s|<<bs>>|\\\\|g;
     s|<<dq>>|\\\\\\\"|g;
     s|<<sq>>|\\\\\\\'|g;
     s|<<lb>>|\\\\\\\#|g;
     s|<<q(.)>>|\\\\\\$1|g;
     s|<<u0000>>|\\\\u0000|g; # Double escape U+0000
     s|<<u(....)>>|\\u$1|g;
 }

 __END__
 :endofperl
	@rem = '---Perl---
	@echo off
	if "%OS%" == "Windows_NT" goto WinNT
	perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
	goto endofperl
	:WinNT
	perl -x -S "%0" %*
	if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
	if %errorlevel% == 9009 echo You do not have Perl in your PATH.
	goto endofperl
	@rem ';
	#!perl
	#line 14

	# This perl script creates ICU transliterator data files, that live
	# in icu/data, from ICU4J UTF8 transliterator data files, in
	# icu4j/src/com/ibm/text/resources.
	#
	# The transformation that is done is very minimal. The script assumes
	# that the input files use only # comments
	# and that they follow a rigid format.
	#
	# The output files are named according to ICU conventions (see NAME_MAP
	# below) and created in the current directory. They should be manually
	# checked and then copied into the icu/data directory. An ICU build must
	# then be initiated, and the standard suite of ICU transliterator tests
	# should be run after that.
	#
	# Alan Liu 5/19/00 2/27/01

	use Getopt::Long;

	my $DIR = "../../../impl/data";
	my $ID = '';

	GetOptions('dir=s' => \$DIR,
	'id=s' => \$ID,
	'<>' => \&usage) \|\| die;

	usage() if (@ARGV);

	$ID =~ s/-/_/;
	if (! -d $DIR) {
	print STDERR "$DIR is not a directory\n";
	usage();
	}

	sub usage {
	my $me = $0;
	$me =~ s\|.+[/\\]\|\|;
	print "Usage: $me [-dir <dir>] [-id <id>]\n";
	print " --dir <dir> Specify the directory containing the\n";
	print " Transliterator_*.txt files\n";
	print " --id <id> Specify a single ID to transform, e.g.\n";
	print " Fullwidth-Halfwidth\n";
	die;
	}

	$JAVA_ONLY = '-';

	$OUTDIR = "icu4c";
	mkdir($OUTDIR,0777);

	# Mapping from Java file names to ICU file names
	%NAME_MAP = (
	# An ICU name of "" means the ICU name == the ID

	"Any_Accents" => "",
	"Any_Publishing" => "",
	"Bengali_InterIndic" => "Beng_InterIndic",
	"Cyrillic_Latin" => "Cyrl_Latn",
	"Devanagari_InterIndic" => "Deva_InterIndic",
	"Fullwidth_Halfwidth" => "FWidth_HWidth",
	"Greek_Latin" => "Grek_Latn",
	"Gujarati_InterIndic" => "Gujr_InterIndic",
	"Gurmukhi_InterIndic" => "Guru_InterIndic",
	"Hiragana_Katakana" => "Hira_Kana",
	"Hiragana_Latin" => "Hira_Latn",
	"InterIndic_Bengali" => "InterIndic_Beng",
	"InterIndic_Devanagari" => "InterIndic_Deva",
	"InterIndic_Gujarati" => "InterIndic_Gujr",
	"InterIndic_Gurmukhi" => "InterIndic_Guru",
	"InterIndic_Kannada" => "InterIndic_Knda",
	"InterIndic_Latin" => "InterIndic_Latn",
	"InterIndic_Malayalam" => "InterIndic_Mlym",
	"InterIndic_Oriya" => "InterIndic_Orya",
	"InterIndic_Tamil" => "InterIndic_Taml",
	"InterIndic_Telugu" => "InterIndic_Telu",
	"Kannada_InterIndic" => "Knda_InterIndic",
	"Latin_InterIndic" => "Latn_InterIndic",
	"Latin_Jamo" => "Latn_Jamo",
	"Latin_Katakana" => "Latn_Kana",
	"Malayalam_InterIndic" => "Mlym_InterIndic",
	"Oriya_InterIndic" => "Orya_InterIndic",
	"Tamil_InterIndic" => "Taml_InterIndic",
	"Telugu_InterIndic" => "Telu_InterIndic",

	"Han_Pinyin" => $JAVA_ONLY,
	"Kanji_English" => $JAVA_ONLY,
	"Kanji_OnRomaji" => $JAVA_ONLY,
	);

	# Header blocks of text written at start of ICU output files
	$HEADER1 = <<END;
	//--------------------------------------------------------------------
	// Copyright (c) 1999-2004, International Business Machines
	// Corporation and others. All Rights Reserved.
	//--------------------------------------------------------------------
	// THIS IS A MACHINE-GENERATED FILE
	END
	$HEADER2 = <<END;
	//--------------------------------------------------------------------
	END

	$TOOL = $0;

	# Iterate over all Java RBT rule files
	foreach (<$DIR/Transliterator_*.txt>) {
	next if (/~$/);
	next if (/_index\.txt$/);
	next if ($ID && !/$ID/);
	my ($out, $id) = convertFileName($_);
	if ($out) {
	if ($out eq $JAVA_ONLY) {
	print STDERR "$id: Java only\n";
	next;
	}
	file($id, $_, $out);
	}
	}

	convertIndex();

	######################################################################
	# Convert a Java file name to C
	# Param: Java file name of the form m\|Transliterator_(.+)\.utf8\.txt$\|
	# Return: A C file name (e.g., ldevan.txt) or the empty string,
	# if there is no mapping, or $JAVA_ONLY if the given file isn't
	# intended to be incorporated into C.
	sub convertFileName {
	local $_ = shift;
	my $id;
	if (m\|Transliterator_(.+)\.utf8\.txt$\| \|\|
	m\|Transliterator_(.+)\.txt$\|) {
	$id = $1;
	} else { die "Can't parse Java file name $_"; }
	if (!exists $NAME_MAP{$id}) {
	print STDERR "ERROR: $id not in map; please update $0\n";
	return '';
	}
	my $out = $NAME_MAP{$id};
	if ($out eq '') {
	$out = $id;
	}
	if ($out ne $JAVA_ONLY) {
	$out = 't_' . $out;
	}
	return ($out, $id);
	}

	######################################################################
	# Convert the index file from Java to C format
	sub convertIndex {
	$JAVA_INDEX = "Transliterator_index.txt";
	$C_INDEX = "translit_index.txt";
	open(JAVA_INDEX, "$DIR/$JAVA_INDEX") or die;
	open(C_INDEX, ">$OUTDIR/$C_INDEX") or die;

	header(\*C_INDEX, $JAVA_INDEX);

	print C_INDEX <<END;
	//--------------------------------------------------------------------
	// N.B.: This file has been generated mechanically from the
	// corresponding ICU4J file, which is the master file that receives
	// primary updates. The colon-delimited fields have been split into
	// separate strings. For 'file' and 'internal' lines, the encoding
	// field has been deleted, since the encoding is processed at build
	// time in ICU4C. Certain large rule sets not intended for general
	// use have been commented out with the notation "Java only".
	//--------------------------------------------------------------------

	translit_index {
	RuleBasedTransliteratorIDs {
	END

	while (<JAVA_INDEX>) {
	# ignore $Source $Revision $Date CVS keyword substitutions
	next if /\$Source/ ;
	next if /\$Revision/ ;
	next if /\$Date/ ;

	# we have printed out the copyright info ... ignore one in Java version
	next if /Copyright/ ;
	next if /Corporation/;

	# Comments; change # to //
	if (s\|^(\s*)\#\|$1//\|) {
	print C_INDEX;
	next;
	}
	# Blank lines
	if (!/\S/) {
	print C_INDEX;
	next;
	}
	# Content lines
	chomp;
	my $prefix = '';
	#replace \p with \\p
	$_=~ s/\\p/\\\\p/g;
	my @a = split(':', $_);
	if ($a[1] eq 'file' \|\| $a[1] eq 'internal') {
	# Convert the file name
	my $id;
	($a[2], $id) = convertFileName($a[2]);
	if ($a[2] eq $JAVA_ONLY) {
	$prefix = '// Java only: ';
	}

	# Delete the encoding field
	splice(@a, 3, 1);
	} elsif ($a[1] eq 'alias') {
	# Pad out with extra blank fields to make the
	# 2-d array square
	push @a, "";
	} else {
	die "Can't parse $_";
	}
	print C_INDEX
	$prefix, "{ ",
	join(", ", map("\"$_\"", @a)),
	" },\n";
	}

	print C_INDEX <<END;
	}
	}
	END

	close(C_INDEX);
	close(JAVA_INDEX);
	print STDERR "$JAVA_INDEX -> $C_INDEX\n";
	}

	######################################################################
	# Output a header
	# Param: Filehandle
	sub header {
	my $out = shift;
	my $in = shift;
	print $out $HEADER1;
	print $out "// Tool: $TOOL\n// Source: $in\n";
	print $out "// Date: ", scalar localtime, "\n";
	print $out $HEADER2;
	print $out "\n";
	}

	######################################################################
	# Process one file
	# Param: ID, e.g. Fullwidth-Halfwidth
	# Param: Java input file name, e.g.
	# f:/icu4j/src/com/ibm/text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
	# Param: ICU output file name, e.g. fullhalf
	sub file {
	my $id = shift;
	my $IN = shift;
	my $out = shift;

	my $OUT = "$out.txt";

	# Show input size. Show output size later -- useful for quick sanity check.
	print "$id (", -s $IN, ") -> $OUT (";

	# Open file, write UTF8 marker, close it, and reopen in text mode
	open(OUT, ">$OUTDIR/$OUT") or die;
	binmode OUT; # Must do this so we can write our UTF8 marker
	print OUT pack("C3", 0xEF, 0xBB, 0xBF); # Write UTF8 marker
	close(OUT);

	open(OUT, ">>$OUTDIR/$OUT") or die;
	print OUT " // -- Coding: utf-8; --\n";

	header(\*OUT, $IN);
	print OUT "// $id\n";
	print OUT "\n";
	print OUT "$out {\n";
	print OUT " Rule {\n";

	open(IN, $IN) or die;
	binmode IN; # IN is a UTF8 file

	my $first = 1;
	my $BOM = pack("C3", 239, 187, 191); # a UTF8 byte order mark

	# Process each line by changing # comments to // comments
	# and taking other text and enclosing it in double quotes
	while (<IN>) {
	my $raw = $_;
	# ignore $Source $Revision $Date CVS keyword substitutions
	next if /\$Source/ ;
	next if /\$Revision/ ;
	next if /\$Date/ ;

	# we have printed out the copyright info ... ignore one in Java version
	next if /Copyright/ ;
	next if /Corporation/;

	# Look for and delete BOM
	if ($first) {
	s/^$BOM//;
	$first = 0;
	}

	# Clean the eol junk up
	s/[\x0D\x0A]+$//;

	# If there is a trailing backslash, then delete it -- we don't
	# need line continuation in C, since adjacent strings are
	# concatenated. Count trailing backslashes; if they are odd,
	# one is trailing.
	if (m\|(\\+)$\|) {
	if ((length($1) % 2) == 1) {
	s\|\\$\|\|;
	}
	}

	# Transform escaped characters
	hideEscapes();

	if (/^(\s)(\#.)$/) {
	# Comment-only line
	my ($white, $cmt) = ($1, $2);
	$cmt =~ s\|\#\|//\|;
	$_ = $white . $cmt;

	} elsif (!/\S/) {
	# Blank line -- leave as-is

	} else {
	# Remove single-quoted matter
	my @quotes;
	my $nquotes = 0;
	my $x = $_;
	while (s/^([^\'])(\'[^\']\')/$1<<x$nquotes>>/) {
	push @quotes, $2;
	++$nquotes;
	}

	# Extract comment
	my $cmt = '';
	if (s\|\#(.*)\|\|) {
	$cmt = '//' . $1;
	}

	# Add quotes
	s\|^(\s)(\S.?)(\s*)$\|$1\"$2\"$3\|;

	# Restore single-quoted matter
	for (my $i=0; $i<$nquotes; ++$i) {
	s\|<<x$i>>\|$quotes[$i]\|;
	}

	# Restore comment
	$_ .= $cmt;
	}

	# Restore escaped characters
	restoreEscapes();

	print OUT $_, "\n";
	}

	# Finish up
	close(IN);
	print OUT " }\n";
	print OUT "}\n";
	close(OUT);

	# Write output file size for sanity check
	print -s "$OUTDIR/$OUT", ")\n";
	}

	######################################################################
	sub hideEscapes {
	# Transform escaped characters
	s\|\\\\\|<<bs>>\|g; # DO THIS FIRST Transform backslashes
	s\|\\u([a-zA-Z0-9]{4})\|<<u$1>>\|g; # Transform Unicode escapes
	s\|\\\"\|<<dq>>\|g; # Transform backslash double quote
	s\|\\\'\|<<sq>>\|g; # Transform backslash single quote
	s\|\\\#\|<<lb>>\|g; # Transform backslash pound
	s\|\\(.)\|<<q$1>>\|g; # Transform backslash escapes
	}

	######################################################################
	sub restoreEscapes {
	# Restore escaped characters
	s\|<<bs>>\|\\\\\|g;
	s\|<<dq>>\|\\\\\\\"\|g;
	s\|<<sq>>\|\\\\\\\'\|g;
	s\|<<lb>>\|\\\\\\\#\|g;
	s\|<<q(.)>>\|\\\\\\$1\|g;
	s\|<<u0000>>\|\\\\u0000\|g; # Double escape U+0000
	s\|<<u(....)>>\|\\u$1\|g;
	}

	__END__
	:endofperl