main/tests/translit/src/com/ibm/icu/dev/test/util/TransliteratorUtilities.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 2002-2009, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  */
 package com.ibm.icu.dev.test.util;

 import java.io.BufferedReader;
 import java.io.IOException;

 import com.ibm.icu.text.Transliterator;

 public class TransliteratorUtilities {
     public static boolean DEBUG = false;

     public static void registerTransliteratorFromFile(String dir, String id) {
         try {
             String filename = id.replace('-', '_') +  ".txt";
             String rules = getFileContents(dir, filename);
             Transliterator t;
             int pos = id.indexOf('-');
             String rid;
             if (pos < 0) {
                 rid = id + "-Any";
                 id = "Any-" + id;
             } else {
                 rid = id.substring(pos+1) + "-" + id.substring(0, pos);
             }
             t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
             Transliterator.unregister(id);
             Transliterator.registerInstance(t);

             /*String test = "\u049A\u0430\u0437\u0430\u049B";
             System.out.println(t.transliterate(test));
             t = Transliterator.getInstance(id);
             System.out.println(t.transliterate(test));
             */

             t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
             Transliterator.unregister(rid);
             Transliterator.registerInstance(t);
             if (DEBUG) System.out.println("Registered new Transliterator: " + id + ", " + rid);
         } catch (IOException e) {
 //#if defined(FOUNDATION10) || defined(J2SE13)
 //##        throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id+" "+ e.getMessage());
 //#else
             throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id).initCause(e);
 //#endif
         }
     }

     /**
      *
      */
     public static String getFileContents(String dir, String filename) throws IOException {
 //#if defined(FOUNDATION10) || defined(J2SE13)
 //##        BufferedReader br = TestUtil.openUTF8Reader(dir, filename);
 //#else
         BufferedReader br = BagFormatter.openUTF8Reader(dir, filename);
 //#endif
         StringBuffer buffer = new StringBuffer();
         while (true) {
             String line = br.readLine();
             if (line == null) break;
             if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
             buffer.append(line).append("\r\n");
         }
         br.close();
         return buffer.toString();

     }

     private static final String BASE_RULES =
         ":: (hex-any/xml);" +
         ":: (hex-any/xml10);" +
         "'<' > '&lt;' ;" +
         "'<' < '&'[lL][Tt]';' ;" +
         "'&' > '&amp;' ;" +
         "'&' < '&'[aA][mM][pP]';' ;" +
         "'>' < '&'[gG][tT]';' ;" +
         "'\"' < '&'[qQ][uU][oO][tT]';' ; " +
         "'' < '&'[aA][pP][oO][sS]';' ; ";

     private static final String CONTENT_RULES =
         "'>' > '&gt;' ;";

     private static final String HTML_RULES = BASE_RULES + CONTENT_RULES +
         "'\"' > '&quot;' ; ";

     private static final String HTML_RULES_CONTROLS = HTML_RULES +
         ":: [[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]] hex/unicode ; ";

     private static final String HTML_RULES_ASCII = HTML_RULES +
         ":: [[:C:][:^ASCII:]] any-hex/xml ; ";

     private static final String XML_RULES = HTML_RULES +
         "'' > '&apos;' ; "
 ;

     /*
 The ampersand character (&) and the left angle bracket (<) MUST NOT appear

 in their literal form, except when used as markup delimiters, or within a

 comment, a processing instruction, or a CDATA section. If they are needed

 elsewhere, they MUST be escaped using either numeric character references or

 the strings "&amp;" and "&lt;" respectively. The right angle bracket (>) MAY

 be represented using the string "&gt;", and MUST, for compatibility, be

 escaped using either "&gt;" or a character reference when it appears in the string

 "]]>" in content, when that string is not marking the end of a CDATA section.

 In the content of elements, character data is any string of characters which does

 not contain the start-delimiter of any markup and does not include the

 CDATA-section-close delimiter, "]]>". In a CDATA section, character data is

 any string of characters not including the CDATA-section-close delimiter,

 "]]>".

 To allow attribute values to contain both single and double quotes, the

 apostrophe or single-quote character (') MAY be represented as "&apos;", and

 the double-quote character (") as "&quot;".


      */

     public static final Transliterator toXML = Transliterator.createFromRules(
             "any-xml", XML_RULES, Transliterator.FORWARD);
     public static final Transliterator fromXML = Transliterator.createFromRules(
             "xml-any", XML_RULES, Transliterator.REVERSE);
     public static final Transliterator toHTML = Transliterator.createFromRules(
             "any-html", HTML_RULES, Transliterator.FORWARD);
     public static final Transliterator toHTMLControl = Transliterator.createFromRules(
             "any-html", HTML_RULES_CONTROLS, Transliterator.FORWARD);
     public static final Transliterator toHTMLAscii = Transliterator.createFromRules(
             "any-html", HTML_RULES_ASCII, Transliterator.FORWARD);
     public static final Transliterator fromHTML = Transliterator.createFromRules(
             "html-any", HTML_RULES, Transliterator.REVERSE);
 }
	/*
	*******************************************************************************
	* Copyright (C) 2002-2009, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*/
	package com.ibm.icu.dev.test.util;

	import java.io.BufferedReader;
	import java.io.IOException;

	import com.ibm.icu.text.Transliterator;

	public class TransliteratorUtilities {
	public static boolean DEBUG = false;

	public static void registerTransliteratorFromFile(String dir, String id) {
	try {
	String filename = id.replace('-', '_') + ".txt";
	String rules = getFileContents(dir, filename);
	Transliterator t;
	int pos = id.indexOf('-');
	String rid;
	if (pos < 0) {
	rid = id + "-Any";
	id = "Any-" + id;
	} else {
	rid = id.substring(pos+1) + "-" + id.substring(0, pos);
	}
	t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
	Transliterator.unregister(id);
	Transliterator.registerInstance(t);

	/*String test = "\u049A\u0430\u0437\u0430\u049B";
	System.out.println(t.transliterate(test));
	t = Transliterator.getInstance(id);
	System.out.println(t.transliterate(test));
	*/

	t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
	Transliterator.unregister(rid);
	Transliterator.registerInstance(t);
	if (DEBUG) System.out.println("Registered new Transliterator: " + id + ", " + rid);
	} catch (IOException e) {
	//#if defined(FOUNDATION10) \|\| defined(J2SE13)
	//## throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id+" "+ e.getMessage());
	//#else
	throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id).initCause(e);
	//#endif
	}
	}

	/**
	*
	*/
	public static String getFileContents(String dir, String filename) throws IOException {
	//#if defined(FOUNDATION10) \|\| defined(J2SE13)
	//## BufferedReader br = TestUtil.openUTF8Reader(dir, filename);
	//#else
	BufferedReader br = BagFormatter.openUTF8Reader(dir, filename);
	//#endif
	StringBuffer buffer = new StringBuffer();
	while (true) {
	String line = br.readLine();
	if (line == null) break;
	if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
	buffer.append(line).append("\r\n");
	}
	br.close();
	return buffer.toString();

	}

	private static final String BASE_RULES =
	":: (hex-any/xml);" +
	":: (hex-any/xml10);" +
	"'<' > '<' ;" +
	"'<' < '&'[lL][Tt]';' ;" +
	"'&' > '&' ;" +
	"'&' < '&'[aA][mM][pP]';' ;" +
	"'>' < '&'[gG][tT]';' ;" +
	"'\"' < '&'[qQ][uU][oO][tT]';' ; " +
	"'' < '&'[aA][pP][oO][sS]';' ; ";

	private static final String CONTENT_RULES =
	"'>' > '>' ;";

	private static final String HTML_RULES = BASE_RULES + CONTENT_RULES +
	"'\"' > '"' ; ";

	private static final String HTML_RULES_CONTROLS = HTML_RULES +
	":: [[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]] hex/unicode ; ";

	private static final String HTML_RULES_ASCII = HTML_RULES +
	":: [[:C:][:^ASCII:]] any-hex/xml ; ";

	private static final String XML_RULES = HTML_RULES +
	"'' > ''' ; "
	;

	/*
	The ampersand character (&) and the left angle bracket (<) MUST NOT appear

	in their literal form, except when used as markup delimiters, or within a

	comment, a processing instruction, or a CDATA section. If they are needed

	elsewhere, they MUST be escaped using either numeric character references or

	the strings "&" and "<" respectively. The right angle bracket (>) MAY

	be represented using the string ">", and MUST, for compatibility, be

	escaped using either ">" or a character reference when it appears in the string

	"]]>" in content, when that string is not marking the end of a CDATA section.

	In the content of elements, character data is any string of characters which does

	not contain the start-delimiter of any markup and does not include the

	CDATA-section-close delimiter, "]]>". In a CDATA section, character data is

	any string of characters not including the CDATA-section-close delimiter,

	"]]>".

	To allow attribute values to contain both single and double quotes, the

	apostrophe or single-quote character (') MAY be represented as "'", and

	the double-quote character (") as """.


	*/

	public static final Transliterator toXML = Transliterator.createFromRules(
	"any-xml", XML_RULES, Transliterator.FORWARD);
	public static final Transliterator fromXML = Transliterator.createFromRules(
	"xml-any", XML_RULES, Transliterator.REVERSE);
	public static final Transliterator toHTML = Transliterator.createFromRules(
	"any-html", HTML_RULES, Transliterator.FORWARD);
	public static final Transliterator toHTMLControl = Transliterator.createFromRules(
	"any-html", HTML_RULES_CONTROLS, Transliterator.FORWARD);
	public static final Transliterator toHTMLAscii = Transliterator.createFromRules(
	"any-html", HTML_RULES_ASCII, Transliterator.FORWARD);
	public static final Transliterator fromHTML = Transliterator.createFromRules(
	"html-any", HTML_RULES, Transliterator.REVERSE);
	}