| // © 2017 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html#License |
| package com.ibm.icu.dev.test.util; |
| |
| import java.io.BufferedReader; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.TreeSet; |
| |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| |
| import com.ibm.icu.dev.test.TestFmwk; |
| import com.ibm.icu.impl.locale.LocaleDistance; |
| import com.ibm.icu.impl.locale.XCldrStub.FileUtilities; |
| import com.ibm.icu.impl.locale.XLikelySubtags; |
| import com.ibm.icu.impl.locale.XLocaleMatcher; |
| import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag; |
| import com.ibm.icu.util.LocaleMatcher; |
| import com.ibm.icu.util.LocalePriorityList; |
| import com.ibm.icu.util.ULocale; |
| |
| import junitparams.JUnitParamsRunner; |
| import junitparams.Parameters; |
| |
| /** |
| * Test the XLocaleMatcher. |
| * |
| * @author markdavis |
| */ |
| @RunWith(JUnitParamsRunner.class) |
| public class XLocaleMatcherTest extends TestFmwk { |
| private static final int REGION_DISTANCE = 4; |
| |
| private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE; |
| |
| private XLocaleMatcher newXLocaleMatcher() { |
| return new XLocaleMatcher(""); |
| } |
| |
| private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) { |
| return new XLocaleMatcher(build); |
| } |
| |
| private XLocaleMatcher newXLocaleMatcher(String string) { |
| return new XLocaleMatcher(LocalePriorityList.add(string).build()); |
| } |
| |
| @SuppressWarnings("unused") |
| private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) { |
| return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()). |
| internalSetThresholdDistance(d).build(); |
| } |
| |
| // public void testParentLocales() { |
| // // find all the regions that have a closer relation because of an explicit parent |
| // Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents()); |
| // explicitParents.remove("root"); |
| // Set<String> otherParents = new HashSet<>(INFO.getExplicitParents()); |
| // for (String locale : explicitParents) { |
| // while (true) { |
| // locale = LocaleIDParser.getParent(locale); |
| // if (locale == null || locale.equals("root")) { |
| // break; |
| // } |
| // otherParents.add(locale); |
| // } |
| // } |
| // otherParents.remove("root"); |
| // |
| // for (String locale : CONFIG.getCldrFactory().getAvailable()) { |
| // String parentId = LocaleIDParser.getParent(locale); |
| // String parentIdSimple = LocaleIDParser.getSimpleParent(locale); |
| // if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) { |
| // continue; |
| // } |
| // System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple); |
| // } |
| // } |
| |
| |
| // TBD reenable with override data |
| // public void testOverrideData() { |
| // double threshold = 0.05; |
| // XLocaleDistance XLocaleMatcherData = new XLocaleDistance() |
| // .addDistance("br", "fr", 10, true) |
| // .addDistance("es", "cy", 10, true); |
| // logln(XLocaleMatcherData.toString()); |
| // |
| // final XLocaleMatcher matcher = newXLocaleMatcher( |
| // LocalePriorityList |
| // .add(ULocale.ENGLISH) |
| // .add(ULocale.FRENCH) |
| // .add(ULocale.UK) |
| // .build(), XLocaleMatcherData, threshold); |
| // logln(matcher.toString()); |
| // |
| // assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br"))); |
| // assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one |
| // // way |
| // } |
| |
| |
| /** |
| * If all the base languages are the same, then each sublocale matches |
| * itself most closely |
| */ |
| @Test |
| public void testExactMatches() { |
| String lastBase = ""; |
| TreeSet<ULocale> sorted = new TreeSet<>(); |
| for (ULocale loc : ULocale.getAvailableLocales()) { |
| String language = loc.getLanguage(); |
| if (!lastBase.equals(language)) { |
| check(sorted); |
| sorted.clear(); |
| lastBase = language; |
| } |
| sorted.add(loc); |
| } |
| check(sorted); |
| } |
| |
| private void check(Set<ULocale> sorted) { |
| if (sorted.isEmpty()) { |
| return; |
| } |
| check2(sorted); |
| ULocale first = sorted.iterator().next(); |
| ULocale max = ULocale.addLikelySubtags(first); |
| sorted.add(max); |
| check2(sorted); |
| } |
| |
| private static final ULocale posix = new ULocale("en_US_POSIX"); |
| |
| /** |
| * @param sorted |
| */ |
| private void check2(Set<ULocale> sorted) { |
| logln("Checking: " + sorted); |
| XLocaleMatcher matcher = newXLocaleMatcher( |
| LocalePriorityList.add( |
| sorted.toArray(new ULocale[sorted.size()])) |
| .build()); |
| for (ULocale loc : sorted) { |
| // The result may not be the exact same locale, but it must be equivalent. |
| // Variants and extensions are ignored. |
| if (loc.equals(posix)) { continue; } |
| ULocale max = ULocale.addLikelySubtags(loc); |
| ULocale best = matcher.getBestMatch(loc); |
| ULocale maxBest = ULocale.addLikelySubtags(best); |
| assertEquals(loc.toString(), max, maxBest); |
| } |
| } |
| |
| @Test |
| public void testDemotion() { |
| LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build(); |
| LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build(); |
| XLocaleMatcher noDemotion = XLocaleMatcher.builder(). |
| setSupportedULocales(supported.getULocales()). |
| setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build(); |
| assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired)); |
| |
| XLocaleMatcher regionDemotion = XLocaleMatcher.builder(). |
| setSupportedULocales(supported.getULocales()). |
| setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build(); |
| assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired)); |
| } |
| |
| private static final class PerfCase { |
| ULocale desired; |
| ULocale expectedShort; |
| ULocale expectedLong; |
| ULocale expectedVeryLong; |
| |
| PerfCase(String des, String expShort, String expLong, String expVeryLong) { |
| desired = new ULocale(des); |
| expectedShort = new ULocale(expShort); |
| expectedLong = new ULocale(expLong); |
| expectedVeryLong = new ULocale(expVeryLong); |
| } |
| } |
| |
| private static final int WARM_UP_ITERATIONS = 1000; |
| private static final int BENCHMARK_ITERATIONS = 20000; |
| private static final int AVG_PCT_MEDIUM_NEW_OLD = 33; |
| private static final int AVG_PCT_LONG_NEW_OLD = 80; |
| |
| @Test |
| public void testPerf() { |
| if (LANGUAGE_MATCHER_DATA == null) { |
| return; // skip except when testing data |
| } |
| |
| final String shortList = "en, sv"; |
| final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " + |
| "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " + |
| "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " + |
| "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " + |
| "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " + |
| "zh-CN, zh-TW, zu"; |
| final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " + |
| "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " + |
| "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " + |
| "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " + |
| "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " + |
| "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " + |
| "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " + |
| "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " + |
| "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " + |
| "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " + |
| "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " + |
| // removed en_001 to avoid exact match |
| "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " + |
| "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " + |
| "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " + |
| "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " + |
| "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " + |
| "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " + |
| "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " + |
| "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " + |
| "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " + |
| "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " + |
| "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " + |
| "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " + |
| "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " + |
| "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " + |
| "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " + |
| "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " + |
| "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " + |
| "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " + |
| "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " + |
| "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " + |
| "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " + |
| "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " + |
| "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " + |
| "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " + |
| "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " + |
| "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " + |
| "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " + |
| "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " + |
| "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " + |
| "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " + |
| "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " + |
| "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " + |
| "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " + |
| "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " + |
| "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " + |
| "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " + |
| "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " + |
| "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " + |
| "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " + |
| "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " + |
| "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " + |
| "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " + |
| "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " + |
| "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " + |
| "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " + |
| "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " + |
| "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " + |
| "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " + |
| "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " + |
| "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " + |
| "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " + |
| "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA"; |
| |
| final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList); |
| final XLocaleMatcher matcherLong = newXLocaleMatcher(longList); |
| final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList); |
| |
| final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList); |
| final LocaleMatcher matcherLongOld = new LocaleMatcher(longList); |
| final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList); |
| |
| long timeShortNew=0; |
| long timeMediumNew=0; |
| long timeLongNew=0; |
| |
| long timeShortOld=0; |
| long timeMediumOld=0; |
| long timeLongOld=0; |
| |
| PerfCase[] pcs = new PerfCase[] { |
| // Exact match in all matchers. |
| new PerfCase("sv", "sv", "sv", "sv"), |
| // Common locale, exact match only in very long list. |
| new PerfCase("fr_CA", "en", "fr", "fr_CA"), |
| // Unusual locale, no exact match. |
| new PerfCase("de_CA", "en", "de", "de"), |
| // World English maps to several region partitions. |
| new PerfCase("en_001", "en", "en", "en"), |
| // Ancient language with interesting subtags. |
| new PerfCase("egy_Copt_CY", "en", "af", "af") |
| }; |
| |
| for (PerfCase pc : pcs) { |
| final ULocale desired = pc.desired; |
| |
| assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired)); |
| assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired)); |
| assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired)); |
| |
| timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS); |
| timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS); |
| timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS); |
| long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS); |
| System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns); |
| timeShortNew += tns; |
| long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS); |
| System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl); |
| timeMediumNew += tnl; |
| long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS); |
| System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv); |
| timeLongNew += tnv; |
| |
| timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS); |
| timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS); |
| timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS); |
| long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS); |
| System.out.format("Old Duration (few supported):\t%s\t%d\tnanos new/old=%d%%\n", |
| desired, tos, (100 * tns) / tos); |
| timeShortOld += tos; |
| long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS); |
| System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos new/old=%d%%\n", |
| desired, tol, (100 * tnl) / tol); |
| timeMediumOld += tol; |
| long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS); |
| System.out.format("Old Duration (many supported):\t%s\t%d\tnanos new/old=%d%%\n", |
| desired, tov, (100 * tnv) / tov); |
| timeLongOld += tov; |
| } |
| |
| assertTrue( |
| String.format("timeShortNew=%d < %d%% of timeShortOld=%d", |
| timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld), |
| timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD); |
| assertTrue( |
| String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d", |
| timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld), |
| timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD); |
| assertTrue( |
| String.format("timeLongNew=%d < %d%% of timeLongOld=%d", |
| timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld), |
| timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD); |
| |
| maximizePerf(); |
| } |
| |
| private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) { |
| long start = System.nanoTime(); |
| for (int i = iterations; i > 0; --i) { |
| matcher.getBestMatch(desired); |
| } |
| long delta = System.nanoTime() - start; |
| return (delta / iterations); |
| } |
| |
| private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) { |
| long start = System.nanoTime(); |
| for (int i = iterations; i > 0; --i) { |
| matcher.getBestMatch(desired); |
| } |
| long delta = System.nanoTime() - start; |
| return (delta / iterations); |
| } |
| |
| private void maximizePerf() { |
| final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " + |
| "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " + |
| "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " + |
| "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " + |
| "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " + |
| "zh-CN, zh-TW, zu"; |
| LocalePriorityList list = LocalePriorityList.add(tags).build(); |
| int few = 1000; |
| long t = timeMaximize(list, few); // warm up |
| t = timeMaximize(list, few); // measure for scale |
| long targetTime = 100000000L; // 10^8 ns = 0.1s |
| int iterations = (int)((targetTime * few) / t); |
| t = timeMaximize(list, iterations); |
| int length = 0; |
| for (@SuppressWarnings("unused") ULocale locale : list) { ++length; } |
| System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " + |
| t + " ns / " + iterations + " iterations / " + length + " locales"); |
| } |
| |
| // returns total ns not per iteration |
| private static long timeMaximize(Iterable<ULocale> list, int iterations) { |
| long start = System.nanoTime(); |
| for (int i = iterations; i > 0; --i) { |
| for (ULocale locale : list) { |
| XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); |
| } |
| } |
| return System.nanoTime() - start; |
| } |
| |
| private static final class TestCase implements Cloneable { |
| private static final String ENDL = System.getProperties().getProperty("line.separator"); |
| |
| int lineNr = 0; |
| |
| String nameLine = ""; |
| String supportedLine = ""; |
| String defaultLine = ""; |
| String distanceLine = ""; |
| String thresholdLine = ""; |
| String matchLine = ""; |
| |
| String supported = ""; |
| String def = ""; |
| String favor = ""; |
| String threshold = ""; |
| String desired = ""; |
| String expMatch = ""; |
| String expDesired = ""; |
| String expCombined = ""; |
| |
| @Override |
| public TestCase clone() throws CloneNotSupportedException { |
| return (TestCase) super.clone(); |
| } |
| |
| void reset(String newNameLine) { |
| nameLine = newNameLine; |
| supportedLine = ""; |
| defaultLine = ""; |
| distanceLine = ""; |
| thresholdLine = ""; |
| |
| supported = ""; |
| def = ""; |
| favor = ""; |
| threshold = ""; |
| } |
| |
| String toInputsKey() { |
| return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired; |
| } |
| |
| private static void appendLine(StringBuilder sb, String line) { |
| if (!line.isEmpty()) { |
| sb.append(ENDL).append(line); |
| } |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(nameLine); |
| appendLine(sb, supportedLine); |
| appendLine(sb, defaultLine); |
| appendLine(sb, distanceLine); |
| appendLine(sb, thresholdLine); |
| sb.append(ENDL).append("line ").append(lineNr).append(':'); |
| appendLine(sb, matchLine); |
| return sb.toString(); |
| } |
| } |
| |
| private static String getSuffixAfterPrefix(String s, int limit, String prefix) { |
| if (prefix.length() <= limit && s.startsWith(prefix)) { |
| return s.substring(prefix.length(), limit); |
| } else { |
| return null; |
| } |
| } |
| |
| // UsedReflectively, not private to avoid unused-warning |
| static List<TestCase> readTestCases() throws Exception { |
| List<TestCase> tests = new ArrayList<>(); |
| Map<String, Integer> uniqueTests = new HashMap<>(); |
| TestCase test = new TestCase(); |
| String filename = "data/localeMatcherTest.txt"; |
| try (BufferedReader in = FileUtilities.openFile(XLocaleMatcherTest.class, filename)) { |
| String line; |
| while ((line = in.readLine()) != null) { |
| ++test.lineNr; |
| // Start of comment, or end of line, minus trailing spaces. |
| int limit = line.indexOf('#'); |
| if (limit < 0) { |
| limit = line.length(); |
| } |
| char c; |
| while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) { |
| --limit; |
| } |
| if (limit == 0) { // empty line |
| continue; |
| } |
| String suffix; |
| if (line.startsWith("** test: ")) { |
| test.reset(line); |
| } else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) { |
| test.supportedLine = line; |
| test.supported = suffix; |
| } else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) { |
| test.defaultLine = line; |
| test.def = suffix; |
| } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) { |
| test.distanceLine = line; |
| test.favor = suffix; |
| } else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) { |
| test.thresholdLine = line; |
| test.threshold = suffix; |
| } else { |
| int matchSep = line.indexOf(">>"); |
| // >> before an inline comment, and followed by more than white space. |
| if (0 <= matchSep && (matchSep + 2) < limit) { |
| test.matchLine = line; |
| test.desired = line.substring(0, matchSep).trim(); |
| test.expDesired = test.expCombined = ""; |
| int start = matchSep + 2; |
| int expLimit = line.indexOf('|', start); |
| if (expLimit < 0) { |
| test.expMatch = line.substring(start, limit).trim(); |
| } else { |
| test.expMatch = line.substring(start, expLimit).trim(); |
| start = expLimit + 1; |
| expLimit = line.indexOf('|', start); |
| if (expLimit < 0) { |
| test.expDesired = line.substring(start, limit).trim(); |
| } else { |
| test.expDesired = line.substring(start, expLimit).trim(); |
| test.expCombined = line.substring(expLimit + 1, limit).trim(); |
| } |
| } |
| String inputs = test.toInputsKey(); |
| Integer prevIndex = uniqueTests.get(inputs); |
| if (prevIndex == null) { |
| uniqueTests.put(inputs, tests.size()); |
| } else { |
| System.out.println("Locale matcher test case on line " + test.lineNr |
| + " is a duplicate of line " + tests.get(prevIndex).lineNr); |
| } |
| tests.add(test.clone()); |
| } else { |
| throw new IllegalArgumentException("test data syntax error on line " |
| + test.lineNr + "\n" + line); |
| } |
| } |
| } |
| } |
| System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size())); |
| return tests; |
| } |
| |
| private static ULocale getULocaleOrNull(String s) { |
| if (s.equals("null")) { |
| return null; |
| } else { |
| return new ULocale(s); |
| } |
| } |
| |
| @Test |
| @Parameters(method = "readTestCases") |
| public void dataDriven(TestCase test) { |
| XLocaleMatcher matcher; |
| if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) { |
| matcher = new XLocaleMatcher(test.supported); |
| } else { |
| XLocaleMatcher.Builder builder = XLocaleMatcher.builder(); |
| builder.setSupportedLocales(test.supported); |
| if (!test.def.isEmpty()) { |
| builder.setDefaultULocale(new ULocale(test.def)); |
| } |
| if (!test.favor.isEmpty()) { |
| FavorSubtag favor; |
| switch (test.favor) { |
| case "normal": |
| favor = FavorSubtag.LANGUAGE; |
| break; |
| case "script": |
| favor = FavorSubtag.SCRIPT; |
| break; |
| default: |
| throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor); |
| } |
| builder.setFavorSubtag(favor); |
| } |
| if (!test.threshold.isEmpty()) { |
| int threshold = Integer.valueOf(test.threshold); |
| builder.internalSetThresholdDistance(threshold); |
| } |
| matcher = builder.build(); |
| } |
| |
| ULocale expMatch = getULocaleOrNull(test.expMatch); |
| if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) { |
| ULocale bestSupported = matcher.getBestMatch(test.desired); |
| assertEquals("bestSupported", expMatch, bestSupported); |
| } else { |
| LocalePriorityList desired = LocalePriorityList.add(test.desired).build(); |
| XLocaleMatcher.Result result = matcher.getBestMatchResult(desired); |
| assertEquals("bestSupported", expMatch, result.getSupportedULocale()); |
| if (!test.expDesired.isEmpty()) { |
| ULocale expDesired = getULocaleOrNull(test.expDesired); |
| assertEquals("bestDesired", expDesired, result.getDesiredULocale()); |
| } |
| if (!test.expCombined.isEmpty()) { |
| ULocale expCombined = getULocaleOrNull(test.expCombined); |
| ULocale combined = result.makeServiceULocale(); |
| assertEquals("combined", expCombined, combined); |
| } |
| } |
| } |
| } |