| <!-- © 2019 and later: Unicode, Inc. and others. |
| License & terms of use: http://www.unicode.org/copyright.html --> |
| |
| <!--================================================================================ |
| Setup: |
| Follow the installation instructions in README.txt in this directory. |
| |
| To build ICU data files: |
| 1: Determine the CLDR base directory and set the CLDR_DIR environment variable. |
| 2: Determine the flags required (see the list of properties below). |
| 3: Run: ant -f build-icu-data.xml -D<flag-name>=<flag-value>... |
| ================================================================================--> |
| <!-- TODO: Add things like copying of a template directory and deleting previous files |
| (perhaps always generate into a temporary directory and copy back to avoid having |
| inconsistent state when the conversion is cancelled). --> |
| <project name="Convert" default="all" basedir="."> |
| |
| <target name="all" depends="init-args, prepare-jar, clean, convert"/> |
| |
| <!-- Initialize the properties which were not already set on the command line. --> |
| <target name="init-args"> |
| <property environment="env"/> |
| <!-- Inherit properties from environment variable unless specified. As usual |
| with Ant, this is messier than it should be. All we are saying here is: |
| "Use the property if explicitly set, otherwise use the environment variable." |
| We cannot just set the property to the environment variable, since expansion |
| fails for non existant properties, and you are left with a literal value of |
| "${env.CLDR_DATA_DIR}". --> |
| <condition property="cldrDataDir" value="${env.CLDR_DATA_DIR}"> |
| <isset property="env.CLDR_DATA_DIR"/> |
| </condition> |
| <fail unless="cldrDataDir" |
| message="Set the CLDR_DATA_DIR environment variable (or cldrDataDir property) to the CLDR data directory (typically ending in '/production')"/> |
| |
| <!-- Ant does not inherit this from the user's environment (and it can matter). |
| This is only needed because we have to "exec" a new Ant task below. --> |
| <condition property="javaHome" value="${env.JAVA_HOME}"> |
| <isset property="env.JAVA_HOME"/> |
| </condition> |
| |
| <!-- The output directory into which to write the converted ICU data. By default |
| this will overwrite (without deletion) the ICU data files in this ICU release, |
| so it is recommended that for testing, it be set to another value. --> |
| <property name="outDir" value="${basedir}/../../../icu4c/source/data/"/> |
| |
| <!-- The directory in which the additional ICU XML data is stored. --> |
| <property name="specialsDir" value="${basedir}/../../../icu4c/source/data/xml"/> |
| |
| <!-- Default value for ICU version (icuver.txt). Update this for each release. --> |
| <property name="icuVersion" value="69.1.0.0"/> |
| |
| <!-- Default value for ICU data version (icuver.txt). Update this for each release. --> |
| <property name="icuDataVersion" value="69.1.0.0"/> |
| |
| <!-- An override for the CLDR version string (icuver.txt and others). This will be |
| extracted from the CLDR library used for building the data if not set here. --> |
| <property name="cldrVersion" value=""/> |
| |
| <!-- The minimum draft status for CLDR data to be used in the conversion. See |
| CldrDraftStatus for more details. --> |
| <property name="minDraftStatus" value="contributed"/> |
| |
| <!-- A regular expression to match the locale IDs to be generated (useful for |
| debugging specific regions). This is applied after locale ID specifications |
| have been expanded into full locale IDs, so the value "en" will NOT match |
| "en_GB" or "en_001" etc. --> |
| <property name="localeIdFilter" value=""/> |
| |
| <!-- Whether to synthetically generate "pseudo locale" data ("en_XA" and "ar_XB"). --> |
| <property name="includePseudoLocales" value="false"/> |
| |
| <!-- Whether to emit a debug report containing some possibly useful information after |
| the conversion has finished. --> |
| <!-- TODO: Currently this isn't hugely useful, so find out what people want. --> |
| <property name="emitReport" value="false"/> |
| |
| <!-- List of output "types" to be generated (e.g. "rbnf,plurals,locales"); an empty |
| list means "build everything". |
| |
| Note that the grouping of types is based on the legacy converter behaviour and |
| is not always directly associated with an output directory (e.g. "locales" |
| produces locale data for curr/, lang/, main/, region/, unit/, zone/ but NOT |
| coll/, brkitr/ or rbnf/). |
| |
| Pass in the value "HELP" (or any invalid value) to see the full list of types. --> |
| <!-- TODO: Find out what common use cases are and use them. --> |
| <property name="outputTypes" value=""/> |
| |
| <!-- Override to force the 'clean' task to delete files it cannot determine to be |
| auto-generated by this tool. This is useful if the file header changes since |
| the heading is what's used to recognize auto-generated files. --> |
| <property name="forceDelete" value="false"/> |
| </target> |
| |
| <!-- Build a standalone JAR which is called by Ant (and which avoids needing to mess |
| about making Ant know the Maven class-path). --> |
| <target name="prepare-jar" depends="init-args"> |
| <exec executable="mvn" searchpath="true" failonerror="true"> |
| <arg value="compile"/> |
| </exec> |
| </target> |
| |
| <!-- Somewhat hacky wrapper target which invokes the real conversion task. |
| This is done so we can set the environment variable of the new process and |
| effectively overwrite the CLDR_DIR value. If ever the CLDR library doesn't |
| need to use CLDR_DIR at runtime to find the production data, this can all be |
| removed. --> |
| <target name="convert" depends="init-args, prepare-jar"> |
| <exec executable="ant" searchpath="true" failonerror="true"> |
| <!-- The CLDR library wants CLDR_DIR set, to the data directory. --> |
| <env key="CLDR_DIR" value="${cldrDataDir}" /> |
| <!-- Force inherit JAVA_HOME (this can be important). --> |
| <env key="JAVA_HOME" value="${javaHome}" /> |
| <!-- Initial Ant command line with all the "interesting" bit in. --> |
| <arg line="-f build-icu-data.xml convert-impl -DcldrDir=${cldrDataDir}"/> |
| <!-- List all properties in the "convert-impl" task (except cldrDir). --> |
| <arg value="-DoutDir=${outDir}"/> |
| <arg value="-DspecialsDir=${specialsDir}"/> |
| <arg value="-DoutputTypes=${outputTypes}"/> |
| <arg value="-DicuVersion=${icuVersion}"/> |
| <arg value="-DicuDataVersion=${icuDataVersion}"/> |
| <arg value="-DcldrVersion=${cldrVersion}"/> |
| <arg value="-DminDraftStatus=${minDraftStatus}"/> |
| <arg value="-DlocaleIdFilter=${localeIdFilter}"/> |
| <arg value="-DincludePseudoLocales=${includePseudoLocales}"/> |
| <arg value="-DemitReport=${emitReport}"/> |
| </exec> |
| </target> |
| |
| <!-- Do the actual CLDR data conversion, based on the command line arguments, built in |
| default properties and the configuration in the "<convert>" element below. --> |
| <target name="convert-impl"> |
| <taskdef name="convert" classname="org.unicode.icu.tool.cldrtoicu.ant.ConvertIcuDataTask"> |
| <classpath> |
| <pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/> |
| </classpath> |
| </taskdef> |
| <convert cldrDir="${cldrDir}" outputDir="${outDir}" specialsDir="${specialsDir}" |
| outputTypes="${outputTypes}" cldrVersion="${cldrVersion}" |
| icuVersion="${icuVersion}" icuDataVersion="${icuDataVersion}" |
| minimalDraftStatus="${minDraftStatus}" localeIdFilter="${localeIdFilter}" |
| includePseudoLocales="${includePseudoLocales}" emitReport="${emitReport}"> |
| |
| <!-- The primary set of locale IDs to be generated by default. The IDs in this list are |
| automatically expanded to include default scripts and all available regions. The |
| rules are: |
| |
| 1) Base languages are expanded to include default scripts (e.g. "en" -> "en_Latn"). |
| 2) All region and variant subtags are added for any base language or language+script |
| (e.g. "en" -> "en_GB" or "shi_Latn" -> "shi_Latn_MA"). |
| |
| If a non-default script is desired it should be listed explicitly (e.g. "sr_Latn"). |
| |
| Locale IDs with deprecated subtags (which become aliases) must still be listed in |
| full (e.g. "en_RH" or "sr_Latn_YU"). |
| --> |
| <localeIds> |
| // A |
| af, agq, ak, am, ar, ars, as, asa, ast, az, az_AZ, az_Cyrl |
| |
| // B |
| bas, be, bem, bez, bg, bm, bn, bo, br, brx, bs, bs_BA, bs_Cyrl |
| |
| // C |
| ca, ccp, ce, ceb, cgg, chr, ckb, cs, cy |
| |
| // D |
| da, dav, de, dje, doi, dsb, dua, dyo, dz |
| |
| // E |
| ebu, ee, el, en, en_NH, en_RH, eo, es, et, eu, ewo |
| |
| // F |
| fa, ff, ff_Adlm, ff_CM, ff_GN, ff_MR, ff_SN, fi, fil, fo, fr, fur, fy |
| |
| // G |
| ga, gd, gl, gsw, gu, guz, gv |
| |
| // H |
| ha, haw, he, hi, hr, hsb, hu, hy |
| |
| // I |
| ia, id, ig, ii, in, in_ID, is, it, iw, iw_IL |
| |
| // J |
| ja, jgo, jmc, jv |
| |
| // K |
| ka, kab, kam, kde, kea, khq, ki, kk, kkj, kl, kln, km, kn, ko, kok, ks |
| ks_IN, ksb, ksf, ksh, ku, kw, ky |
| |
| // L |
| lag, lb, lg, lkt, ln, lo, lrc, lt, lu, luo, luy, lv |
| |
| // M |
| mai, mas, mer, mfe, mg, mgh, mgo, mi, mk, ml, mn, mni, mni_IN, mo, mr, ms |
| mt, mua, my, mzn |
| |
| // N |
| naq, nb, nd, ne, nl, nmg, nn, nnh, no, no_NO, no_NO_NY, nus, nyn |
| |
| // O |
| om, or, os |
| |
| // P |
| pa, pa_Arab, pa_IN, pa_PK, pcm, pl, ps, pt |
| |
| // Q |
| qu |
| |
| // R |
| rm, rn, ro, rof, ru, rw, rwk |
| |
| // S |
| sa, sah, saq, sat, sat_IN, sbp, sd, sd_Deva, sd_PK, se, seh, ses, sg, sh, sh_BA, sh_CS, sh_YU |
| shi, shi_Latn, shi_MA, si, sk, sl, smn, sn, so, sq, sr, sr_BA, sr_CS, sr_Cyrl_CS, sr_Cyrl_YU, sr_Latn |
| sr_Latn_CS, sr_Latn_YU, sr_ME, sr_RS, sr_XK, sr_YU, su, su_ID, sv, sw |
| |
| // T |
| ta, te, teo, tg, th, ti, tk, tl, tl_PH, to, tr, tt, twq, tzm |
| |
| // U |
| ug, uk, ur, uz, uz_AF, uz_Arab, uz_Cyrl, uz_UZ |
| |
| // V |
| vai, vai_LR, vai_Latn, vi, vun |
| |
| // W |
| wae, wo |
| |
| // X |
| xh, xog |
| |
| // Y |
| yav, yi, yo, yue, yue_CN, yue_HK, yue_Hans |
| |
| // Z |
| zgh, zh, zh_CN, zh_HK, zh_Hant, zh_MO, zh_SG, zh_TW, zu |
| </localeIds> |
| |
| <!-- The following elements configure directories in which a subset of the available |
| locales IDs should be generated. Unlike the main <localeId> element, these |
| filters must specify all locale IDs in full (but since they mostly select base |
| languages, this isn't a big deal). |
| |
| As well as allowing some data directories to have a subset of available data (via |
| the <localeIds> element) there are also mechanisms for controlling aliasing and |
| the locale parent relation which allows the sharing of some ICU data in cases |
| where it would otherwise need to be copied. The two mechanisms are: |
| |
| 1: inheritLanguageSubtag: Used to rewrite the parent of a locale ID from "root" to |
| its language subtag (e.g. "zh_Hant" has a natural parent of "root", but to allow |
| some base language data to be shared it can be made to have a parent of "zh"). |
| |
| 2: forcedAlias: Used to add aliases for specific directories in order to affect the |
| ICU behaviour in special cases. |
| |
| Between them these mechanisms are known as "tailorings" of the affected locales. --> |
| <!-- TODO: Explain why these special cases are needed/different. --> |
| |
| <!-- Collation data is large, but also more sharable than other data, which is why there |
| are a number of aliases and parent remappings for this directory. --> |
| <directory dir="coll" inheritLanguageSubtag="bs_Cyrl, sr_Latn, zh_Hant"> |
| <!-- These aliases are to avoid needing to copy and maintain the same collation data |
| for "zh" and "yue". The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs |
| "zh_Hans_CN"), and for "yue" it's "yue_Hant_HK" (vs "zh_Hant_HK"), so the |
| aliases are effectively just rewriting the base language. --> |
| <forcedAlias source="yue" target="zh_Hant"/> |
| <forcedAlias source="yue_Hant" target="zh_Hant"/> |
| <forcedAlias source="yue_CN" target="zh_Hans"/> |
| <forcedAlias source="yue_Hans" target="zh_Hans"/> |
| <forcedAlias source="yue_Hans_CN" target="zh_Hans"/> |
| |
| <!-- TODO: Find out and document this properly. --> |
| <forcedAlias source="sr_ME" target="sr_Cyrl_ME"/> |
| |
| <localeIds> |
| root, |
| |
| // A-B |
| af, am, ars, ar, as, az, be, bg, bn, bo, br, bs_Cyrl, bs, |
| |
| // C-F |
| ca, ceb, chr, cs, cy, da, de_AT, de, dsb, dz, ee, el, en, |
| en_US_POSIX, en_US, eo, es, et, fa_AF, fa, ff_Adlm, ff, fil, fi, fo, fr_CA, fr, |
| |
| // G-J |
| ga, gl, gu, ha, haw, he, hi, hr, hsb, hu, hy, |
| id_ID, id, ig, in, in_ID, is, it, iw_IL, iw, ja, |
| |
| // K-P |
| ka, kk, kl, km, kn, kok, ko, ku, ky, lb, lkt, ln, lo, lt, lv, |
| mk, ml, mn, mo, mr, ms, mt, my, nb, nb_NO, ne, nl, nn, no, no_NO, |
| om, or, pa_IN, pa, pa_Guru, pl, ps, pt, |
| |
| // R-T |
| ro, ru, sa, se, sh_BA, sh_CS, sh, sh_YU, si, sk, sl, smn, sq, |
| sr_BA, sr_Cyrl_ME, sr_Latn, sr_ME, sr_RS, sr, sv, sw, |
| ta, te, th, tk, to, tr, |
| |
| // U-Z |
| ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans_CN, yue_Hans |
| yue_Hant, yue, zh_CN, zh_Hans, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu |
| </localeIds> |
| </directory> |
| |
| <directory dir="rbnf"> |
| <!-- It is not at all clear why this is being done. It's certainly not exactly the |
| same as above, since (a) the alias is reversed (b) "zh_Hant" does exist, with |
| different data than "yue", so this alias is not just rewriting the base |
| language. --> |
| <!-- TODO: Find out and document this properly. --> |
| <forcedAlias source="zh_Hant_HK" target="yue"/> |
| |
| <localeIds> |
| root, |
| |
| // A-E |
| af, ak, am, ars, ar, az, be, bg, bs, ca, ccp, chr, cs, cy, |
| da, de_CH, de, ee, el, en_001, en_IN, en, eo, es_419, es_DO, |
| es_GT, es_HN, es_MX, es_NI, es_PA, es_PR, es_SV, es, es_US, et, |
| |
| // F-P |
| fa_AF, fa, ff, fil, fi, fo, fr_BE, fr_CH, fr, ga, he, hi, hr, |
| hu, hy, id, in, is, it, iw, ja, ka, kl, km, ko, ky, lb, |
| lo, lrc, lt, lv, mk, ms, mt, my, nb, ne, nl, nn, no, pl, pt_PT, pt, |
| |
| // Q-Z |
| qu, ro, ru, se, sh, sk, sl, sq, sr_Latn, sr, su, sv, sw, ta, th, tr, |
| uk, vi, yue_Hans, yue, zh_Hant_HK, zh_Hant, zh_HK, zh_MO, zh_TW, zh |
| </localeIds> |
| </directory> |
| |
| <directory dir="brkitr" inheritLanguageSubtag="zh_Hant"> |
| <localeIds> |
| root, |
| de, el, en, en_US_POSIX, en_US, es, fr, it, ja, pt, ru, zh_Hant, zh |
| </localeIds> |
| </directory> |
| |
| <!-- GLOBAL ALIASES --> |
| |
| <!-- Some spoken languages (e.g. "ars") inherit all their data from a written language |
| (e.g. "ar_SA"). However CLDR doesn't currently support a way to represent that |
| relationship. Unlike deprecated languages for which an alias can be inferred from |
| the "languageAlias" CLDR data, there's no way in CLDR to represent the fact that |
| we want "ars" (a non-deprecated language) to inherit the data of "ar_SA". |
| |
| This alias is the first example of potentially many cases where ICU needs to |
| generate an alias in order to affect "sideways inheritance" for spoken languages, |
| and at some stage it should probably be supported properly in the CLDR data. --> |
| <forcedAlias source="ars" target="ar_SA"/> |
| |
| <!-- A legacy global alias (note that "no_NO_NY" is not even structurally valid). --> |
| <forcedAlias source="no_NO_NY" target="nn_NO"/> |
| |
| <!-- This one is a bit silly, it is just to generate a stub for no_NO, which is |
| not in CLDR. If we do not do this, then including it in localeIds will generate |
| empty no_Latn and no_Latn_NO and then no_NO aliasing to no_Latn_NO. --> |
| <forcedAlias source="no_NO" target="no"/> |
| |
| <!-- ALTERNATE VALUES --> |
| |
| <!-- The following elements configure alternate values for some special case paths. |
| The target path will only be replaced if both it, and the source path, exist in |
| the CLDR data (paths will not be modified if only the source path exists). |
| |
| Since the paths must represent the same semantic type of data, they must be in the |
| same "namespace" (same element names) and must not contain value attributes. Thus |
| they can only differ by distinguishing attributes (either added or modified). |
| |
| This feature is typically used to select alternate translations (e.g. short forms) |
| for certain paths. --> |
| <!-- <altPath target="//path/to/value[@attr='foo']" |
| source="//path/to/value[@attr='bar']" |
| locales="xx,yy_ZZ"/> --> |
| </convert> |
| </target> |
| |
| <target name="clean" depends="init-args, prepare-jar"> |
| <taskdef name="outputDirectories" classname="org.unicode.icu.tool.cldrtoicu.ant.CleanOutputDirectoryTask"> |
| <classpath> |
| <pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/> |
| </classpath> |
| </taskdef> |
| |
| <!-- If a directory is listed here, then every file in it is assumed to be automatically |
| generated by the conversion tool, unless it is explicitly listed in a <retain> element. |
| The tool then checks every file to determine if it has the expected header present, |
| indiciating that it was automatically generated, before deleting it. |
| |
| If unexpected files are found, the "clean" task will fail without deleting anything |
| (unless'forceDelete' is set to override this). Note that even if 'forceDelete' is set, |
| the files listed explicitly below will never be deleted by this process. |
| |
| This two-step approach minimizes the risk that the conversion process will ever |
| accidentally delete a manually maintained file. |
| --> |
| <outputDirectories root="${outDir}" forceDelete="${forceDelete}"> |
| <dir name="brkitr"> |
| <retain path="dictionaries"/> |
| <retain path="rules"/> |
| </dir> |
| <dir name="coll"> |
| <!-- Legacy files whose file names aren't supported for automatic generation. |
| Simple to maintain manually and unlikely to ever change again. --> |
| <retain path="de__PHONEBOOK.txt"/> |
| <retain path="de_.txt"/> |
| <retain path="es__TRADITIONAL.txt"/> |
| <retain path="es_.txt"/> |
| </dir> |
| <dir name="curr"/> |
| <dir name="lang"/> |
| <dir name="locales"/> |
| <dir name="misc"> |
| <!-- Machine generated files produced by different tools. |
| Possibly worth moving into the new LDML conversion tool one day. --> |
| <retain path="currencyNumericCodes.txt"/> |
| <retain path="zoneinfo64.txt"/> |
| <!-- Project file (not ICU data), unlikely to ever be auto-generated. --> |
| <retain path="icudata.rc"/> |
| <!-- Small high-level metadata file, stable and easy to maintain manually. --> |
| <retain path="icustd.txt"/> |
| </dir> |
| <dir name="rbnf"/> |
| <dir name="region"/> |
| <dir name="translit"> |
| <!-- Small, easy to maintain, special case top-level files. --> |
| <retain path="en.txt"/> |
| <retain path="el.txt"/> |
| </dir> |
| <dir name="unit"/> |
| <dir name="zone"> |
| <!-- Manually edited to support TZ database name compatibility. --> |
| <retain path="tzdbNames.txt"/> |
| </dir> |
| </outputDirectories> |
| </target> |
| </project> |
| |