tools/cldr/cldr-to-icu/build-icu-data.xml - external/github.com/unicode-org/icu - Git at Google

 <!-- © 2019 and later: Unicode, Inc. and others.
      License & terms of use: http://www.unicode.org/copyright.html -->

 <!--================================================================================
     Setup:
     Follow the installation instructions in README.txt in this directory.

     To build ICU data files:
     1: Determine the CLDR base directory and set the CLDR_DIR environment variable.
     2: Determine the flags required (see the list of properties below).
     3: Run: ant -f build-icu-data.xml -D<flag-name>=<flag-value>...
     ================================================================================-->
 <!-- TODO: Add things like copying of a template directory and deleting previous files
      (perhaps always generate into a temporary directory and copy back to avoid having
       inconsistent state when the conversion is cancelled). -->
 <project name="Convert" default="all" basedir=".">

     <target name="all" depends="init-args, prepare-jar, clean, convert"/>

     <!-- Initialize the properties which were not already set on the command line. -->
     <target name="init-args">
         <property environment="env"/>
         <!-- Inherit properties from environment variable unless specified. As usual
              with Ant, this is messier than it should be. All we are saying here is:
              "Use the property if explicitly set, otherwise use the environment variable."
              We cannot just set the property to the environment variable, since expansion
              fails for non existant properties, and you are left with a literal value of
              "${env.CLDR_DATA_DIR}". -->
         <condition property="cldrDataDir" value="${env.CLDR_DATA_DIR}">
             <isset property="env.CLDR_DATA_DIR"/>
         </condition>
         <fail unless="cldrDataDir"
               message="Set the CLDR_DATA_DIR environment variable (or cldrDataDir property) to the CLDR data directory (typically ending in '/production')"/>

         <!-- Ant does not inherit this from the user's environment (and it can matter).
              This is only needed because we have to "exec" a new Ant task below. -->
         <condition property="javaHome" value="${env.JAVA_HOME}">
             <isset property="env.JAVA_HOME"/>
         </condition>

         <!-- The output directory into which to write the converted ICU data. By default
              this will overwrite (without deletion) the ICU data files in this ICU release,
              so it is recommended that for testing, it be set to another value.  -->
         <property name="outDir" value="${basedir}/../../../icu4c/source/data/"/>

         <!-- The directory in which the additional ICU XML data is stored. -->
         <property name="specialsDir" value="${basedir}/../../../icu4c/source/data/xml"/>

         <!-- Default value for ICU version (icuver.txt). Update this for each release. -->
         <property name="icuVersion" value="69.1.0.0"/>

         <!-- Default value for ICU data version (icuver.txt). Update this for each release. -->
         <property name="icuDataVersion" value="69.1.0.0"/>

         <!-- An override for the CLDR version string (icuver.txt and others). This will be
              extracted from the CLDR library used for building the data if not set here. -->
         <property name="cldrVersion" value=""/>

         <!-- The minimum draft status for CLDR data to be used in the conversion. See
              CldrDraftStatus for more details. -->
         <property name="minDraftStatus" value="contributed"/>

         <!-- A regular expression to match the locale IDs to be generated (useful for
              debugging specific regions). This is applied after locale ID specifications
              have been expanded into full locale IDs, so the value "en" will NOT match
              "en_GB" or "en_001" etc. -->
         <property name="localeIdFilter" value=""/>

         <!-- Whether to synthetically generate "pseudo locale" data ("en_XA" and "ar_XB"). -->
         <property name="includePseudoLocales" value="false"/>

         <!-- Whether to emit a debug report containing some possibly useful information after
              the conversion has finished. -->
         <!-- TODO: Currently this isn't hugely useful, so find out what people want. -->
         <property name="emitReport" value="false"/>

         <!-- List of output "types" to be generated (e.g. "rbnf,plurals,locales"); an empty
              list means "build everything".

              Note that the grouping of types is based on the legacy converter behaviour and
              is not always directly associated with an output directory (e.g. "locales"
              produces locale data for curr/, lang/, main/, region/, unit/, zone/ but NOT
              coll/, brkitr/ or rbnf/).

              Pass in the value "HELP" (or any invalid value) to see the full list of types. -->
         <!-- TODO: Find out what common use cases are and use them. -->
         <property name="outputTypes" value=""/>

         <!-- Override to force the 'clean' task to delete files it cannot determine to be
              auto-generated by this tool. This is useful if the file header changes since
              the heading is what's used to recognize auto-generated files. -->
         <property name="forceDelete" value="false"/>
     </target>

     <!-- Build a standalone JAR which is called by Ant (and which avoids needing to mess
          about making Ant know the Maven class-path). -->
     <target name="prepare-jar" depends="init-args">
         <exec executable="mvn" searchpath="true" failonerror="true">
             <arg value="compile"/>
         </exec>
     </target>

     <!-- Somewhat hacky wrapper target which invokes the real conversion task.
          This is done so we can set the environment variable of the new process and
          effectively overwrite the CLDR_DIR value. If ever the CLDR library doesn't
          need to use CLDR_DIR at runtime to find the production data, this can all be
          removed. -->
     <target name="convert" depends="init-args, prepare-jar">
         <exec executable="ant" searchpath="true" failonerror="true">
             <!-- The CLDR library wants CLDR_DIR set, to the data directory. -->
             <env key="CLDR_DIR" value="${cldrDataDir}" />
             <!-- Force inherit JAVA_HOME (this can be important). -->
             <env key="JAVA_HOME" value="${javaHome}" />
             <!-- Initial Ant command line with all the "interesting" bit in. -->
             <arg line="-f build-icu-data.xml convert-impl -DcldrDir=${cldrDataDir}"/>
             <!-- List all properties in the "convert-impl" task (except cldrDir). -->
             <arg value="-DoutDir=${outDir}"/>
             <arg value="-DspecialsDir=${specialsDir}"/>
             <arg value="-DoutputTypes=${outputTypes}"/>
             <arg value="-DicuVersion=${icuVersion}"/>
             <arg value="-DicuDataVersion=${icuDataVersion}"/>
             <arg value="-DcldrVersion=${cldrVersion}"/>
             <arg value="-DminDraftStatus=${minDraftStatus}"/>
             <arg value="-DlocaleIdFilter=${localeIdFilter}"/>
             <arg value="-DincludePseudoLocales=${includePseudoLocales}"/>
             <arg value="-DemitReport=${emitReport}"/>
         </exec>
     </target>

     <!-- Do the actual CLDR data conversion, based on the command line arguments, built in
          default properties and the configuration in the "<convert>" element below. -->
     <target name="convert-impl">
         <taskdef name="convert" classname="org.unicode.icu.tool.cldrtoicu.ant.ConvertIcuDataTask">
             <classpath>
                 <pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/>
             </classpath>
         </taskdef>
         <convert cldrDir="${cldrDir}" outputDir="${outDir}" specialsDir="${specialsDir}"
                  outputTypes="${outputTypes}" cldrVersion="${cldrVersion}"
                  icuVersion="${icuVersion}" icuDataVersion="${icuDataVersion}"
                  minimalDraftStatus="${minDraftStatus}" localeIdFilter="${localeIdFilter}"
                  includePseudoLocales="${includePseudoLocales}" emitReport="${emitReport}">

             <!-- The primary set of locale IDs to be generated by default. The IDs in this list are
                  automatically expanded to include default scripts and all available regions. The
                  rules are:

                  1) Base languages are expanded to include default scripts (e.g. "en" -> "en_Latn").
                  2) All region and variant subtags are added for any base language or language+script
                     (e.g. "en" -> "en_GB" or "shi_Latn" -> "shi_Latn_MA").

                  If a non-default script is desired it should be listed explicitly (e.g. "sr_Latn").

                  Locale IDs with deprecated subtags (which become aliases) must still be listed in
                  full (e.g. "en_RH" or "sr_Latn_YU").
             -->
             <localeIds>
                 // A
                 af, agq, ak, am, ar, ars, as, asa, ast, az, az_AZ, az_Cyrl

                 // B
                 bas, be, bem, bez, bg, bm, bn, bo, br, brx, bs, bs_BA, bs_Cyrl

                 // C
                 ca, ccp, ce, ceb, cgg, chr, ckb, cs, cy

                 // D
                 da, dav, de, dje, doi, dsb, dua, dyo, dz

                 // E
                 ebu, ee, el, en, en_NH, en_RH, eo, es, et, eu, ewo

                 // F
                 fa, ff, ff_Adlm, ff_CM, ff_GN, ff_MR, ff_SN, fi, fil, fo, fr, fur, fy

                 // G
                 ga, gd, gl, gsw, gu, guz, gv

                 // H
                 ha, haw, he, hi, hr, hsb, hu, hy

                 // I
                 ia, id, ig, ii, in, in_ID, is, it, iw, iw_IL

                 // J
                 ja, jgo, jmc, jv

                 // K
                 ka, kab, kam, kde, kea, khq, ki, kk, kkj, kl, kln, km, kn, ko, kok, ks
                 ks_IN, ksb, ksf, ksh, ku, kw, ky

                 // L
                 lag, lb, lg, lkt, ln, lo, lrc, lt, lu, luo, luy, lv

                 // M
                 mai, mas, mer, mfe, mg, mgh, mgo, mi, mk, ml, mn, mni, mni_IN, mo, mr, ms
                 mt, mua, my, mzn

                 // N
                 naq, nb, nb_NO, nb_SJ, nd, ne, nl, nmg, nn, nnh, no, no_NO_NY, nus, nyn

                 // O
                 om, or, os

                 // P
                 pa, pa_Arab, pa_IN, pa_PK, pcm, pl, ps, pt

                 // Q
                 qu

                 // R
                 rm, rn, ro, rof, ru, rw, rwk

                 // S
                 sa, sah, saq, sat, sat_IN, sbp, sd, sd_Deva, sd_PK, se, seh, ses, sg, sh, sh_BA, sh_CS, sh_YU
                 shi, shi_Latn, shi_MA, si, sk, sl, smn, sn, so, sq, sr, sr_BA, sr_CS, sr_Cyrl_CS, sr_Cyrl_YU, sr_Latn
                 sr_Latn_CS, sr_Latn_YU, sr_ME, sr_RS, sr_XK, sr_YU, su, su_ID, sv, sw

                 // T
                 ta, te, teo, tg, th, ti, tk, tl, tl_PH, to, tr, tt, twq, tzm

                 // U
                 ug, uk, ur, uz, uz_AF, uz_Arab, uz_Cyrl, uz_UZ

                 // V
                 vai, vai_LR, vai_Latn, vi, vun

                 // W
                 wae, wo

                 // X
                 xh, xog

                 // Y
                 yav, yi, yo, yue, yue_CN, yue_HK, yue_Hans

                 // Z
                 zgh, zh, zh_CN, zh_HK, zh_Hant, zh_MO, zh_SG, zh_TW, zu
             </localeIds>

             <!-- The following elements configure directories in which a subset of the available
                  locales IDs should be generated. Unlike the main <localeId> element, these
                  filters must specify all locale IDs in full (but since they mostly select base
                  languages, this isn't a big deal).

                  As well as allowing some data directories to have a subset of available data (via
                  the <localeIds> element) there are also mechanisms for controlling aliasing and
                  the locale parent relation which allows the sharing of some ICU data in cases
                  where it would otherwise need to be copied. The two mechanisms are:

                  1: inheritLanguageSubtag: Used to rewrite the parent of a locale ID from "root" to
                     its language subtag (e.g. "zh_Hant" has a natural parent of "root", but to allow
                     some base language data to be shared it can be made to have a parent of "zh").

                  2: forcedAlias: Used to add aliases for specific directories in order to affect the
                     ICU behaviour in special cases.

                  Between them these mechanisms are known as "tailorings" of the affected locales. -->
             <!-- TODO: Explain why these special cases are needed/different. -->

             <!-- Collation data is large, but also more sharable than other data, which is why there
                  are a number of aliases and parent remappings for this directory. -->
             <directory dir="coll" inheritLanguageSubtag="bs_Cyrl, sr_Latn, zh_Hant">
                 <!-- These aliases are to avoid needing to copy and maintain the same collation data
                      for "zh" and "yue". The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs
                      "zh_Hans_CN"), and for "yue" it's "yue_Hant_HK" (vs "zh_Hant_HK"), so the
                      aliases are effectively just rewriting the base language. -->
                 <forcedAlias source="yue" target="zh_Hant"/>
                 <forcedAlias source="yue_Hant" target="zh_Hant"/>
                 <forcedAlias source="yue_CN" target="zh_Hans"/>
                 <forcedAlias source="yue_Hans" target="zh_Hans"/>
                 <forcedAlias source="yue_Hans_CN" target="zh_Hans"/>

                 <!-- TODO: Find out and document this properly. -->
                 <forcedAlias source="sr_ME" target="sr_Cyrl_ME"/>

                 <localeIds>
                     root,

                     // A-B
                     af, am, ars, ar, as, az, be, bg, bn, bo, br, bs_Cyrl, bs,

                     // C-F
                     ca, ceb, chr, cs, cy, da, de_AT, de, dsb, dz, ee, el, en,
                     en_US_POSIX, en_US, eo, es, et, fa_AF, fa, ff_Adlm, ff, fil, fi, fo, fr_CA, fr,

                     // G-J
                     ga, gl, gu, ha, haw, he, hi, hr, hsb, hu, hy,
                     id_ID, id, ig, in, in_ID, is, it, iw_IL, iw, ja,

                     // K-P
                     ka, kk, kl, km, kn, kok, ko, ku, ky, lb, lkt, ln, lo, lt, lv,
                     mk, ml, mn, mo, mr, ms, mt, my, nb, nb_NO, ne, nl, nn, no, no_NO,
                     om, or, pa_IN, pa, pa_Guru, pl, ps, pt,

                     // R-T
                     ro, ru, sa, se, sh_BA, sh_CS, sh, sh_YU, si, sk, sl, smn, sq,
                     sr_BA, sr_Cyrl_ME, sr_Latn, sr_ME, sr_RS, sr, sv, sw,
                     ta, te, th, tk, to, tr,

                     // U-Z
                     ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans_CN, yue_Hans
                     yue_Hant, yue, zh_CN, zh_Hans, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu
                 </localeIds>
             </directory>

             <directory dir="rbnf">
                 <!-- It is not at all clear why this is being done. It's certainly not exactly the
                      same as above, since (a) the alias is reversed (b) "zh_Hant" does exist, with
                      different data than "yue", so this alias is not just rewriting the base
                      language. -->
                 <!-- TODO: Find out and document this properly. -->
                 <forcedAlias source="zh_Hant_HK" target="yue"/>

                 <localeIds>
                     root,

                     // A-E
                     af, ak, am, ars, ar, az, be, bg, bs, ca, ccp, chr, cs, cy,
                     da, de_CH, de, ee, el, en_001, en_IN, en, eo, es_419, es_DO,
                     es_GT, es_HN, es_MX, es_NI, es_PA, es_PR, es_SV, es, es_US, et,

                     // F-P
                     fa_AF, fa, ff, fil, fi, fo, fr_BE, fr_CH, fr, ga, he, hi, hr,
                     hu, hy, id, in, is, it, iw, ja, ka, kl, km, ko, ky, lb,
                     lo, lrc, lt, lv, mk, ms, mt, my, nb, nl, nn, no, pl, pt_PT, pt,

                     // Q-Z
                     qu, ro, ru, se, sh, sk, sl, sq, sr_Latn, sr, su, sv, sw, ta, th, tr,
                     uk, vi, yue_Hans, yue, zh_Hant_HK, zh_Hant, zh_HK, zh_MO, zh_TW, zh
                 </localeIds>
             </directory>

             <directory dir="brkitr" inheritLanguageSubtag="zh_Hant">
                 <localeIds>
                     root,
                     de, el, en, en_US_POSIX, en_US, es, fr, it, ja, pt, ru, zh_Hant, zh
                 </localeIds>
             </directory>

             <!-- GLOBAL ALIASES -->

             <!-- Some spoken languages (e.g. "ars") inherit all their data from a written language
                  (e.g. "ar_SA"). However CLDR doesn't currently support a way to represent that
                  relationship. Unlike deprecated languages for which an alias can be inferred from
                  the "languageAlias" CLDR data, there's no way in CLDR to represent the fact that
                  we want "ars" (a non-deprecated language) to inherit the data of "ar_SA".

                  This alias is the first example of potentially many cases where ICU needs to
                  generate an alias in order to affect "sideways inheritance" for spoken languages,
                  and at some stage it should probably be supported properly in the CLDR data. -->
             <forcedAlias source="ars" target="ar_SA"/>

             <!-- A legacy global alias (note that "no_NO_NY" is not even structurally valid). -->
             <forcedAlias source="no_NO_NY" target="nn_NO"/>

             <!-- ALTERNATE VALUES -->

             <!-- The following elements configure alternate values for some special case paths.
                  The target path will only be replaced if both it, and the source path, exist in
                  the CLDR data (paths will not be modified if only the source path exists).

                  Since the paths must represent the same semantic type of data, they must be in the
                  same "namespace" (same element names) and must not contain value attributes. Thus
                  they can only differ by distinguishing attributes (either added or modified).

                  This feature is typically used to select alternate translations (e.g. short forms)
                  for certain paths. -->
             <!-- <altPath target="//path/to/value[@attr='foo']"
                           source="//path/to/value[@attr='bar']"
                           locales="xx,yy_ZZ"/> -->
         </convert>
     </target>

     <target name="clean" depends="init-args, prepare-jar">
         <taskdef name="outputDirectories" classname="org.unicode.icu.tool.cldrtoicu.ant.CleanOutputDirectoryTask">
             <classpath>
                 <pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/>
             </classpath>
         </taskdef>

         <!-- If a directory is listed here, then every file in it is assumed to be automatically
              generated by the conversion tool, unless it is explicitly listed in a <retain> element.
              The tool then checks every file to determine if it has the expected header present,
              indiciating that it was automatically generated, before deleting it.

              If unexpected files are found, the "clean" task will fail without deleting anything
              (unless'forceDelete' is set to override this). Note that even if 'forceDelete' is set,
              the files listed explicitly below will never be deleted by this process.

              This two-step approach minimizes the risk that the conversion process will ever
              accidentally delete a manually maintained file.
              -->
         <outputDirectories root="${outDir}" forceDelete="${forceDelete}">
             <dir name="brkitr">
                 <retain path="dictionaries"/>
                 <retain path="rules"/>
             </dir>
             <dir name="coll">
                 <!-- Legacy files whose file names aren't supported for automatic generation.
                      Simple to maintain manually and unlikely to ever change again. -->
                 <retain path="de__PHONEBOOK.txt"/>
                 <retain path="de_.txt"/>
                 <retain path="es__TRADITIONAL.txt"/>
                 <retain path="es_.txt"/>
             </dir>
             <dir name="curr"/>
             <dir name="lang"/>
             <dir name="locales"/>
             <dir name="misc">
                 <!-- Machine generated files produced by different tools.
                      Possibly worth moving into the new LDML conversion tool one day. -->
                 <retain path="currencyNumericCodes.txt"/>
                 <retain path="zoneinfo64.txt"/>
                 <!-- Project file (not ICU data), unlikely to ever be auto-generated. -->
                 <retain path="icudata.rc"/>
                 <!-- Small high-level metadata file, stable and easy to maintain manually. -->
                 <retain path="icustd.txt"/>
             </dir>
             <dir name="rbnf"/>
             <dir name="region"/>
             <dir name="translit">
                 <!-- Small, easy to maintain, special case top-level files. -->
                 <retain path="en.txt"/>
                 <retain path="el.txt"/>
             </dir>
             <dir name="unit"/>
             <dir name="zone">
                 <!-- Manually edited to support TZ database name compatibility. -->
                 <retain path="tzdbNames.txt"/>
             </dir>
         </outputDirectories>
     </target>
 </project>
	<!-- © 2019 and later: Unicode, Inc. and others.
	License & terms of use: http://www.unicode.org/copyright.html -->

	<!--================================================================================
	Setup:
	Follow the installation instructions in README.txt in this directory.

	To build ICU data files:
	1: Determine the CLDR base directory and set the CLDR_DIR environment variable.
	2: Determine the flags required (see the list of properties below).
	3: Run: ant -f build-icu-data.xml -D<flag-name>=<flag-value>...
	================================================================================-->
	<!-- TODO: Add things like copying of a template directory and deleting previous files
	(perhaps always generate into a temporary directory and copy back to avoid having
	inconsistent state when the conversion is cancelled). -->
	<project name="Convert" default="all" basedir=".">

	<target name="all" depends="init-args, prepare-jar, clean, convert"/>

	<!-- Initialize the properties which were not already set on the command line. -->
	<target name="init-args">
	<property environment="env"/>
	<!-- Inherit properties from environment variable unless specified. As usual
	with Ant, this is messier than it should be. All we are saying here is:
	"Use the property if explicitly set, otherwise use the environment variable."
	We cannot just set the property to the environment variable, since expansion
	fails for non existant properties, and you are left with a literal value of
	"${env.CLDR_DATA_DIR}". -->
	<condition property="cldrDataDir" value="${env.CLDR_DATA_DIR}">
	<isset property="env.CLDR_DATA_DIR"/>
	</condition>
	<fail unless="cldrDataDir"
	message="Set the CLDR_DATA_DIR environment variable (or cldrDataDir property) to the CLDR data directory (typically ending in '/production')"/>

	<!-- Ant does not inherit this from the user's environment (and it can matter).
	This is only needed because we have to "exec" a new Ant task below. -->
	<condition property="javaHome" value="${env.JAVA_HOME}">
	<isset property="env.JAVA_HOME"/>
	</condition>

	<!-- The output directory into which to write the converted ICU data. By default
	this will overwrite (without deletion) the ICU data files in this ICU release,
	so it is recommended that for testing, it be set to another value. -->
	<property name="outDir" value="${basedir}/../../../icu4c/source/data/"/>

	<!-- The directory in which the additional ICU XML data is stored. -->
	<property name="specialsDir" value="${basedir}/../../../icu4c/source/data/xml"/>

	<!-- Default value for ICU version (icuver.txt). Update this for each release. -->
	<property name="icuVersion" value="69.1.0.0"/>

	<!-- Default value for ICU data version (icuver.txt). Update this for each release. -->
	<property name="icuDataVersion" value="69.1.0.0"/>

	<!-- An override for the CLDR version string (icuver.txt and others). This will be
	extracted from the CLDR library used for building the data if not set here. -->
	<property name="cldrVersion" value=""/>

	<!-- The minimum draft status for CLDR data to be used in the conversion. See
	CldrDraftStatus for more details. -->
	<property name="minDraftStatus" value="contributed"/>

	<!-- A regular expression to match the locale IDs to be generated (useful for
	debugging specific regions). This is applied after locale ID specifications
	have been expanded into full locale IDs, so the value "en" will NOT match
	"en_GB" or "en_001" etc. -->
	<property name="localeIdFilter" value=""/>

	<!-- Whether to synthetically generate "pseudo locale" data ("en_XA" and "ar_XB"). -->
	<property name="includePseudoLocales" value="false"/>

	<!-- Whether to emit a debug report containing some possibly useful information after
	the conversion has finished. -->
	<!-- TODO: Currently this isn't hugely useful, so find out what people want. -->
	<property name="emitReport" value="false"/>

	<!-- List of output "types" to be generated (e.g. "rbnf,plurals,locales"); an empty
	list means "build everything".

	Note that the grouping of types is based on the legacy converter behaviour and
	is not always directly associated with an output directory (e.g. "locales"
	produces locale data for curr/, lang/, main/, region/, unit/, zone/ but NOT
	coll/, brkitr/ or rbnf/).

	Pass in the value "HELP" (or any invalid value) to see the full list of types. -->
	<!-- TODO: Find out what common use cases are and use them. -->
	<property name="outputTypes" value=""/>

	<!-- Override to force the 'clean' task to delete files it cannot determine to be
	auto-generated by this tool. This is useful if the file header changes since
	the heading is what's used to recognize auto-generated files. -->
	<property name="forceDelete" value="false"/>
	</target>

	<!-- Build a standalone JAR which is called by Ant (and which avoids needing to mess
	about making Ant know the Maven class-path). -->
	<target name="prepare-jar" depends="init-args">
	<exec executable="mvn" searchpath="true" failonerror="true">
	<arg value="compile"/>
	</exec>
	</target>

	<!-- Somewhat hacky wrapper target which invokes the real conversion task.
	This is done so we can set the environment variable of the new process and
	effectively overwrite the CLDR_DIR value. If ever the CLDR library doesn't
	need to use CLDR_DIR at runtime to find the production data, this can all be
	removed. -->
	<target name="convert" depends="init-args, prepare-jar">
	<exec executable="ant" searchpath="true" failonerror="true">
	<!-- The CLDR library wants CLDR_DIR set, to the data directory. -->
	<env key="CLDR_DIR" value="${cldrDataDir}" />
	<!-- Force inherit JAVA_HOME (this can be important). -->
	<env key="JAVA_HOME" value="${javaHome}" />
	<!-- Initial Ant command line with all the "interesting" bit in. -->
	<arg line="-f build-icu-data.xml convert-impl -DcldrDir=${cldrDataDir}"/>
	<!-- List all properties in the "convert-impl" task (except cldrDir). -->
	<arg value="-DoutDir=${outDir}"/>
	<arg value="-DspecialsDir=${specialsDir}"/>
	<arg value="-DoutputTypes=${outputTypes}"/>
	<arg value="-DicuVersion=${icuVersion}"/>
	<arg value="-DicuDataVersion=${icuDataVersion}"/>
	<arg value="-DcldrVersion=${cldrVersion}"/>
	<arg value="-DminDraftStatus=${minDraftStatus}"/>
	<arg value="-DlocaleIdFilter=${localeIdFilter}"/>
	<arg value="-DincludePseudoLocales=${includePseudoLocales}"/>
	<arg value="-DemitReport=${emitReport}"/>
	</exec>
	</target>

	<!-- Do the actual CLDR data conversion, based on the command line arguments, built in
	default properties and the configuration in the "<convert>" element below. -->
	<target name="convert-impl">
	<taskdef name="convert" classname="org.unicode.icu.tool.cldrtoicu.ant.ConvertIcuDataTask">
	<classpath>
	<pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/>
	</classpath>
	</taskdef>
	<convert cldrDir="${cldrDir}" outputDir="${outDir}" specialsDir="${specialsDir}"
	outputTypes="${outputTypes}" cldrVersion="${cldrVersion}"
	icuVersion="${icuVersion}" icuDataVersion="${icuDataVersion}"
	minimalDraftStatus="${minDraftStatus}" localeIdFilter="${localeIdFilter}"
	includePseudoLocales="${includePseudoLocales}" emitReport="${emitReport}">

	<!-- The primary set of locale IDs to be generated by default. The IDs in this list are
	automatically expanded to include default scripts and all available regions. The
	rules are:

	1) Base languages are expanded to include default scripts (e.g. "en" -> "en_Latn").
	2) All region and variant subtags are added for any base language or language+script
	(e.g. "en" -> "en_GB" or "shi_Latn" -> "shi_Latn_MA").

	If a non-default script is desired it should be listed explicitly (e.g. "sr_Latn").

	Locale IDs with deprecated subtags (which become aliases) must still be listed in
	full (e.g. "en_RH" or "sr_Latn_YU").
	-->
	<localeIds>
	// A
	af, agq, ak, am, ar, ars, as, asa, ast, az, az_AZ, az_Cyrl

	// B
	bas, be, bem, bez, bg, bm, bn, bo, br, brx, bs, bs_BA, bs_Cyrl

	// C
	ca, ccp, ce, ceb, cgg, chr, ckb, cs, cy

	// D
	da, dav, de, dje, doi, dsb, dua, dyo, dz

	// E
	ebu, ee, el, en, en_NH, en_RH, eo, es, et, eu, ewo

	// F
	fa, ff, ff_Adlm, ff_CM, ff_GN, ff_MR, ff_SN, fi, fil, fo, fr, fur, fy

	// G
	ga, gd, gl, gsw, gu, guz, gv

	// H
	ha, haw, he, hi, hr, hsb, hu, hy

	// I
	ia, id, ig, ii, in, in_ID, is, it, iw, iw_IL

	// J
	ja, jgo, jmc, jv

	// K
	ka, kab, kam, kde, kea, khq, ki, kk, kkj, kl, kln, km, kn, ko, kok, ks
	ks_IN, ksb, ksf, ksh, ku, kw, ky

	// L
	lag, lb, lg, lkt, ln, lo, lrc, lt, lu, luo, luy, lv

	// M
	mai, mas, mer, mfe, mg, mgh, mgo, mi, mk, ml, mn, mni, mni_IN, mo, mr, ms
	mt, mua, my, mzn

	// N
	naq, nb, nb_NO, nb_SJ, nd, ne, nl, nmg, nn, nnh, no, no_NO_NY, nus, nyn

	// O
	om, or, os

	// P
	pa, pa_Arab, pa_IN, pa_PK, pcm, pl, ps, pt

	// Q
	qu

	// R
	rm, rn, ro, rof, ru, rw, rwk

	// S
	sa, sah, saq, sat, sat_IN, sbp, sd, sd_Deva, sd_PK, se, seh, ses, sg, sh, sh_BA, sh_CS, sh_YU
	shi, shi_Latn, shi_MA, si, sk, sl, smn, sn, so, sq, sr, sr_BA, sr_CS, sr_Cyrl_CS, sr_Cyrl_YU, sr_Latn
	sr_Latn_CS, sr_Latn_YU, sr_ME, sr_RS, sr_XK, sr_YU, su, su_ID, sv, sw

	// T
	ta, te, teo, tg, th, ti, tk, tl, tl_PH, to, tr, tt, twq, tzm

	// U
	ug, uk, ur, uz, uz_AF, uz_Arab, uz_Cyrl, uz_UZ

	// V
	vai, vai_LR, vai_Latn, vi, vun

	// W
	wae, wo

	// X
	xh, xog

	// Y
	yav, yi, yo, yue, yue_CN, yue_HK, yue_Hans

	// Z
	zgh, zh, zh_CN, zh_HK, zh_Hant, zh_MO, zh_SG, zh_TW, zu
	</localeIds>

	<!-- The following elements configure directories in which a subset of the available
	locales IDs should be generated. Unlike the main <localeId> element, these
	filters must specify all locale IDs in full (but since they mostly select base
	languages, this isn't a big deal).

	As well as allowing some data directories to have a subset of available data (via
	the <localeIds> element) there are also mechanisms for controlling aliasing and
	the locale parent relation which allows the sharing of some ICU data in cases
	where it would otherwise need to be copied. The two mechanisms are:

	1: inheritLanguageSubtag: Used to rewrite the parent of a locale ID from "root" to
	its language subtag (e.g. "zh_Hant" has a natural parent of "root", but to allow
	some base language data to be shared it can be made to have a parent of "zh").

	2: forcedAlias: Used to add aliases for specific directories in order to affect the
	ICU behaviour in special cases.

	Between them these mechanisms are known as "tailorings" of the affected locales. -->
	<!-- TODO: Explain why these special cases are needed/different. -->

	<!-- Collation data is large, but also more sharable than other data, which is why there
	are a number of aliases and parent remappings for this directory. -->
	<directory dir="coll" inheritLanguageSubtag="bs_Cyrl, sr_Latn, zh_Hant">
	<!-- These aliases are to avoid needing to copy and maintain the same collation data
	for "zh" and "yue". The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs
	"zh_Hans_CN"), and for "yue" it's "yue_Hant_HK" (vs "zh_Hant_HK"), so the
	aliases are effectively just rewriting the base language. -->
	<forcedAlias source="yue" target="zh_Hant"/>
	<forcedAlias source="yue_Hant" target="zh_Hant"/>
	<forcedAlias source="yue_CN" target="zh_Hans"/>
	<forcedAlias source="yue_Hans" target="zh_Hans"/>
	<forcedAlias source="yue_Hans_CN" target="zh_Hans"/>

	<!-- TODO: Find out and document this properly. -->
	<forcedAlias source="sr_ME" target="sr_Cyrl_ME"/>

	<localeIds>
	root,

	// A-B
	af, am, ars, ar, as, az, be, bg, bn, bo, br, bs_Cyrl, bs,

	// C-F
	ca, ceb, chr, cs, cy, da, de_AT, de, dsb, dz, ee, el, en,
	en_US_POSIX, en_US, eo, es, et, fa_AF, fa, ff_Adlm, ff, fil, fi, fo, fr_CA, fr,

	// G-J
	ga, gl, gu, ha, haw, he, hi, hr, hsb, hu, hy,
	id_ID, id, ig, in, in_ID, is, it, iw_IL, iw, ja,

	// K-P
	ka, kk, kl, km, kn, kok, ko, ku, ky, lb, lkt, ln, lo, lt, lv,
	mk, ml, mn, mo, mr, ms, mt, my, nb, nb_NO, ne, nl, nn, no, no_NO,
	om, or, pa_IN, pa, pa_Guru, pl, ps, pt,

	// R-T
	ro, ru, sa, se, sh_BA, sh_CS, sh, sh_YU, si, sk, sl, smn, sq,
	sr_BA, sr_Cyrl_ME, sr_Latn, sr_ME, sr_RS, sr, sv, sw,
	ta, te, th, tk, to, tr,

	// U-Z
	ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans_CN, yue_Hans
	yue_Hant, yue, zh_CN, zh_Hans, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu
	</localeIds>
	</directory>

	<directory dir="rbnf">
	<!-- It is not at all clear why this is being done. It's certainly not exactly the
	same as above, since (a) the alias is reversed (b) "zh_Hant" does exist, with
	different data than "yue", so this alias is not just rewriting the base
	language. -->
	<!-- TODO: Find out and document this properly. -->
	<forcedAlias source="zh_Hant_HK" target="yue"/>

	<localeIds>
	root,

	// A-E
	af, ak, am, ars, ar, az, be, bg, bs, ca, ccp, chr, cs, cy,
	da, de_CH, de, ee, el, en_001, en_IN, en, eo, es_419, es_DO,
	es_GT, es_HN, es_MX, es_NI, es_PA, es_PR, es_SV, es, es_US, et,

	// F-P
	fa_AF, fa, ff, fil, fi, fo, fr_BE, fr_CH, fr, ga, he, hi, hr,
	hu, hy, id, in, is, it, iw, ja, ka, kl, km, ko, ky, lb,
	lo, lrc, lt, lv, mk, ms, mt, my, nb, nl, nn, no, pl, pt_PT, pt,

	// Q-Z
	qu, ro, ru, se, sh, sk, sl, sq, sr_Latn, sr, su, sv, sw, ta, th, tr,
	uk, vi, yue_Hans, yue, zh_Hant_HK, zh_Hant, zh_HK, zh_MO, zh_TW, zh
	</localeIds>
	</directory>

	<directory dir="brkitr" inheritLanguageSubtag="zh_Hant">
	<localeIds>
	root,
	de, el, en, en_US_POSIX, en_US, es, fr, it, ja, pt, ru, zh_Hant, zh
	</localeIds>
	</directory>

	<!-- GLOBAL ALIASES -->

	<!-- Some spoken languages (e.g. "ars") inherit all their data from a written language
	(e.g. "ar_SA"). However CLDR doesn't currently support a way to represent that
	relationship. Unlike deprecated languages for which an alias can be inferred from
	the "languageAlias" CLDR data, there's no way in CLDR to represent the fact that
	we want "ars" (a non-deprecated language) to inherit the data of "ar_SA".

	This alias is the first example of potentially many cases where ICU needs to
	generate an alias in order to affect "sideways inheritance" for spoken languages,
	and at some stage it should probably be supported properly in the CLDR data. -->
	<forcedAlias source="ars" target="ar_SA"/>

	<!-- A legacy global alias (note that "no_NO_NY" is not even structurally valid). -->
	<forcedAlias source="no_NO_NY" target="nn_NO"/>

	<!-- ALTERNATE VALUES -->

	<!-- The following elements configure alternate values for some special case paths.
	The target path will only be replaced if both it, and the source path, exist in
	the CLDR data (paths will not be modified if only the source path exists).

	Since the paths must represent the same semantic type of data, they must be in the
	same "namespace" (same element names) and must not contain value attributes. Thus
	they can only differ by distinguishing attributes (either added or modified).

	This feature is typically used to select alternate translations (e.g. short forms)
	for certain paths. -->
	<!-- <altPath target="//path/to/value[@attr='foo']"
	source="//path/to/value[@attr='bar']"
	locales="xx,yy_ZZ"/> -->
	</convert>
	</target>

	<target name="clean" depends="init-args, prepare-jar">
	<taskdef name="outputDirectories" classname="org.unicode.icu.tool.cldrtoicu.ant.CleanOutputDirectoryTask">
	<classpath>
	<pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/>
	</classpath>
	</taskdef>

	<!-- If a directory is listed here, then every file in it is assumed to be automatically
	generated by the conversion tool, unless it is explicitly listed in a <retain> element.
	The tool then checks every file to determine if it has the expected header present,
	indiciating that it was automatically generated, before deleting it.

	If unexpected files are found, the "clean" task will fail without deleting anything
	(unless'forceDelete' is set to override this). Note that even if 'forceDelete' is set,
	the files listed explicitly below will never be deleted by this process.

	This two-step approach minimizes the risk that the conversion process will ever
	accidentally delete a manually maintained file.
	-->
	<outputDirectories root="${outDir}" forceDelete="${forceDelete}">
	<dir name="brkitr">
	<retain path="dictionaries"/>
	<retain path="rules"/>
	</dir>
	<dir name="coll">
	<!-- Legacy files whose file names aren't supported for automatic generation.
	Simple to maintain manually and unlikely to ever change again. -->
	<retain path="de__PHONEBOOK.txt"/>
	<retain path="de_.txt"/>
	<retain path="es__TRADITIONAL.txt"/>
	<retain path="es_.txt"/>
	</dir>
	<dir name="curr"/>
	<dir name="lang"/>
	<dir name="locales"/>
	<dir name="misc">
	<!-- Machine generated files produced by different tools.
	Possibly worth moving into the new LDML conversion tool one day. -->
	<retain path="currencyNumericCodes.txt"/>
	<retain path="zoneinfo64.txt"/>
	<!-- Project file (not ICU data), unlikely to ever be auto-generated. -->
	<retain path="icudata.rc"/>
	<!-- Small high-level metadata file, stable and easy to maintain manually. -->
	<retain path="icustd.txt"/>
	</dir>
	<dir name="rbnf"/>
	<dir name="region"/>
	<dir name="translit">
	<!-- Small, easy to maintain, special case top-level files. -->
	<retain path="en.txt"/>
	<retain path="el.txt"/>
	</dir>
	<dir name="unit"/>
	<dir name="zone">
	<!-- Manually edited to support TZ database name compatibility. -->
	<retain path="tzdbNames.txt"/>
	</dir>
	</outputDirectories>
	</target>
	</project>