ICU-21184 rephrase docs/comments using the term grandfathered
diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp
index 19b61cd..a2c38a9 100644
--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp
@@ -1025,13 +1025,14 @@
return result;
}
- // If a BCP-47 language tag is passed as the language parameter to the
+ // If a BCP 47 language tag is passed as the language parameter to the
// normal Locale constructor, it will actually fall back to invoking
// uloc_forLanguageTag() to parse it if it somehow is able to detect that
- // the string actually is BCP-47. This works well for things like strings
- // using BCP-47 extensions, but it does not at all work for things like
- // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
- // interpret as ICU locale IDs and because of that won't trigger the BCP-47
+ // the string actually is BCP 47. This works well for things like strings
+ // using BCP 47 extensions, but it does not at all work for things like
+ // legacy language tags (marked as “Type: grandfathered” in BCP 47,
+ // e.g., "en-GB-oed") which are possible to also
+ // interpret as ICU locale IDs and because of that won't trigger the BCP 47
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
// and then Locale::init(), instead of just calling the normal constructor.
diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
index ad5dd64..5eed02c 100644
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp
@@ -53,7 +53,7 @@
VariantListEntry *variants;
ExtensionListEntry *extensions;
const char *privateuse;
- const char *grandfathered;
+ const char *legacy;
} ULanguageTag;
#define MINLEN 2
@@ -85,8 +85,9 @@
Updated on 2018-09-12 from
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
- This table has 2 parts. The parts for Grandfathered tags is generated by the
- following scripts from the IANA language tag registry.
+ This table has 2 parts. The part for
+ legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ is generated by the following scripts from the IANA language tag registry.
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
egrep -A 7 'Type: grandfathered' | \
@@ -100,8 +101,8 @@
values. They may have to be removed for the strict BCP 47 compliance.
*/
-static const char* const GRANDFATHERED[] = {
-/* grandfathered preferred */
+static const char* const LEGACY[] = {
+/* legacy preferred */
"art-lojban", "jbo",
"en-gb-oed", "en-gb-oxendict",
"i-ami", "ami",
@@ -124,7 +125,7 @@
"zh-min-nan", "nan",
"zh-xiang", "hsn",
- // Grandfathered tags with no preferred value in the IANA
+ // Legacy tags with no preferred value in the IANA
// registry. Kept for now for the backward compatibility
// because ICU has mapped them this way.
"cel-gaulish", "xtg-x-cel-gaulish",
@@ -346,7 +347,7 @@
#if 0
static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag);
+ultag_getLegacy(const ULanguageTag* langtag);
#endif
U_NAMESPACE_BEGIN
@@ -986,7 +987,7 @@
langtag->variants = NULL;
langtag->extensions = NULL;
- langtag->grandfathered = EMPTY;
+ langtag->legacy = EMPTY;
langtag->privateuse = EMPTY;
}
@@ -2042,7 +2043,7 @@
char *pExtValueSubtag, *pExtValueSubtagEnd;
int32_t i;
UBool privateuseVar = FALSE;
- int32_t grandfatheredLen = 0;
+ int32_t legacyLen = 0;
if (parsedLen != NULL) {
*parsedLen = 0;
@@ -2082,25 +2083,25 @@
}
size_t parsedLenDelta = 0;
- // Grandfathered tag will be consider together. Grandfathered tag with intervening
+ // Legacy tag will be consider together. Legacy tag with intervening
// script and region such as art-DE-lojban or art-Latn-lojban won't be
// matched.
- /* check if the tag is grandfathered */
- for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
- int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
- if (tagLen < checkGrandfatheredLen) {
+ /* check if the tag is legacy */
+ for (i = 0; i < UPRV_LENGTHOF(LEGACY); i += 2) {
+ int32_t checkLegacyLen = static_cast<int32_t>(uprv_strlen(LEGACY[i]));
+ if (tagLen < checkLegacyLen) {
continue;
}
- if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
+ if (tagLen > checkLegacyLen && tagBuf[checkLegacyLen] != '-') {
// make sure next char is '-'.
continue;
}
- if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
+ if (uprv_strnicmp(LEGACY[i], tagBuf, checkLegacyLen) == 0) {
int32_t newTagLength;
- grandfatheredLen = checkGrandfatheredLen; /* back up for output parsedLen */
- int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
- newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
+ legacyLen = checkLegacyLen; /* back up for output parsedLen */
+ int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1]));
+ newTagLength = replacementLen + tagLen - checkLegacyLen;
if (tagLen < newTagLength) {
uprv_free(tagBuf);
tagBuf = (char*)uprv_malloc(newTagLength + 1);
@@ -2111,16 +2112,16 @@
t->buf = tagBuf;
tagLen = newTagLength;
}
- parsedLenDelta = checkGrandfatheredLen - replacementLen;
- uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
- if (checkGrandfatheredLen != tagLen) {
- uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
+ parsedLenDelta = checkLegacyLen - replacementLen;
+ uprv_strcpy(t->buf, LEGACY[i + 1]);
+ if (checkLegacyLen != tagLen) {
+ uprv_strcpy(t->buf + replacementLen, tag + checkLegacyLen);
}
break;
}
}
- if (grandfatheredLen == 0) {
+ if (legacyLen == 0) {
for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
const char* redundantTag = REDUNDANT[i];
size_t redundantTagLen = uprv_strlen(redundantTag);
@@ -2608,8 +2609,8 @@
#if 0
static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag) {
- return langtag->grandfathered;
+ultag_getLegacy(const ULanguageTag* langtag) {
+ return langtag->legacy;
}
#endif
diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h
index b4d64c5..cd5a357 100644
--- a/icu4c/source/common/ulocimp.h
+++ b/icu4c/source/common/ulocimp.h
@@ -109,13 +109,17 @@
* If the specified language tag contains any ill-formed subtags,
* the first such subtag and all following subtags are ignored.
* <p>
- * This implements the 'Language-Tag' production of BCP47, and so
- * supports grandfathered (regular and irregular) as well as private
- * use language tags. Private use tags are represented as 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist. Note that a few grandfathered tags have no modern
- * replacement, these will be converted using the fallback described in
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
* the first paragraph, so some information might be lost.
+ *
* @param langtag the input BCP47 language tag.
* @param tagLen the length of langtag, or -1 to call uprv_strlen().
* @param sink the output sink receiving a locale ID for the
diff --git a/icu4c/source/common/unicode/localebuilder.h b/icu4c/source/common/unicode/localebuilder.h
index c5836fe..664ee6a 100644
--- a/icu4c/source/common/unicode/localebuilder.h
+++ b/icu4c/source/common/unicode/localebuilder.h
@@ -92,11 +92,12 @@
/**
* Resets the LocaleBuilder to match the provided
* [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) .
- * Discards the existing state. the empty string cause the builder to be
- * reset, like {@link #clear}. Grandfathered tags are converted to their
- * canonical form before being processed. Otherwise, the <code>language
- * tag</code> must be well-formed, or else the build() method will later
- * report an U_ILLEGAL_ARGUMENT_ERROR.
+ * Discards the existing state.
+ * The empty string causes the builder to be reset, like {@link #clear}.
+ * Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * are converted to their canonical form before being processed.
+ * Otherwise, the <code>language tag</code> must be well-formed,
+ * or else the build() method will later report an U_ILLEGAL_ARGUMENT_ERROR.
*
* <p>This method clears the internal UErrorCode.
*
diff --git a/icu4c/source/common/unicode/locid.h b/icu4c/source/common/unicode/locid.h
index 1d031da..f955743 100644
--- a/icu4c/source/common/unicode/locid.h
+++ b/icu4c/source/common/unicode/locid.h
@@ -393,13 +393,17 @@
* If the specified language tag contains any ill-formed subtags,
* the first such subtag and all following subtags are ignored.
* <p>
- * This implements the 'Language-Tag' production of BCP47, and so
- * supports grandfathered (regular and irregular) as well as private
- * use language tags. Private use tags are represented as 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist. Note that a few grandfathered tags have no modern
- * replacement, these will be converted using the fallback described in
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
* the first paragraph, so some information might be lost.
+ *
* @param tag the input BCP47 language tag.
* @param status error information if creating the Locale failed.
* @return the Locale for the specified BCP47 language tag.
diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h
index d3de89f..fa38092 100644
--- a/icu4c/source/common/unicode/uloc.h
+++ b/icu4c/source/common/unicode/uloc.h
@@ -1237,14 +1237,18 @@
* Returns a locale ID for the specified BCP47 language tag string.
* If the specified language tag contains any ill-formed subtags,
* the first such subtag and all following subtags are ignored.
- * <p>
- * This implements the 'Language-Tag' production of BCP47, and so
- * supports grandfathered (regular and irregular) as well as private
- * use language tags. Private use tags are represented as 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist. Note that a few grandfathered tags have no modern
- * replacement, these will be converted using the fallback described in
+ * <p>
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
* the first paragraph, so some information might be lost.
+ *
* @param langtag the input BCP47 language tag.
* @param localeID the output buffer receiving a locale ID for the
* specified BCP47 language tag.
diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp
index 9497a85..c3e5e8c 100644
--- a/icu4c/source/i18n/calendar.cpp
+++ b/icu4c/source/i18n/calendar.cpp
@@ -266,7 +266,7 @@
//TODO: ULOC_FULL_NAME is out of date and too small..
char canonicalName[256];
- // canonicalize, so grandfathered variant will be transformed to keywords
+ // Canonicalize, so that an old-style variant will be transformed to keywords.
// e.g ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
// NOTE: Since ICU-20187, ja_JP_TRADITIONAL no longer canonicalizes, and
// the Gregorian calendar is returned instead.
diff --git a/icu4c/source/i18n/fmtable_cnv.cpp b/icu4c/source/i18n/fmtable_cnv.cpp
index 9a64792..bc3847b 100644
--- a/icu4c/source/i18n/fmtable_cnv.cpp
+++ b/icu4c/source/i18n/fmtable_cnv.cpp
@@ -30,8 +30,6 @@
// -------------------------------------
// Creates a formattable object with a char* string.
// This API is useless. The API that takes a UnicodeString is actually just as good.
-// This is just a grandfathered API.
-
Formattable::Formattable(const char* stringToCopy)
{
init();
diff --git a/icu4c/source/test/testdata/localeMatcherTest.txt b/icu4c/source/test/testdata/localeMatcherTest.txt
index 7a10986..70afcd0 100644
--- a/icu4c/source/test/testdata/localeMatcherTest.txt
+++ b/icu4c/source/test/testdata/localeMatcherTest.txt
@@ -279,7 +279,7 @@
zh-Hant >> und-TW
zh >> und-TW
-** test: testMatchGrandfatheredCode
+** test: testMatchLegacyCode
@supported=fr, i-klingon, en-Latn-US
en-GB-oed >> en-Latn-US
@@ -984,7 +984,7 @@
x-piglatin >> x-bork
x-bork >> x-bork
-** test: MatchGrandfatheredCode
+** test: MatchLegacyCode
@supported=fr, i-klingon, en-Latn-US
en-GB-oed >> en-Latn-US
i-klingon >> tlh
@@ -1525,7 +1525,7 @@
x-piglatin >> fr
x-bork >> x-bork
-** test: grandfathered codes
+** test: legacy codes
@supported=fr, i-klingon, en-Latn-US
en-GB-oed >> en-Latn-US
i-klingon >> tlh
diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java
index c5cf38e..3adf08a 100644
--- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java
+++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java
@@ -38,13 +38,13 @@
private List<String> _variants = Collections.emptyList(); // variant subtags
private List<String> _extensions = Collections.emptyList(); // extensions
- // Map contains grandfathered tags and its preferred mappings from
- // http://www.ietf.org/rfc/rfc5646.txt
- private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED =
+ // The Map contains legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ // and their preferred mappings from BCP 47.
+ private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> LEGACY =
new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>();
static {
- // grandfathered = irregular ; non-redundant tags registered
+ // legacy = irregular ; non-redundant tags registered
// / regular ; during the RFC 3066 era
//
// irregular = "en-GB-oed" ; irregular tags do not match
@@ -105,57 +105,17 @@
{"zh-xiang", "hsn"},
};
for (String[] e : entries) {
- GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
+ LEGACY.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
}
}
private LanguageTag() {
}
- /*
- * BNF in RFC5464
- *
- * Language-Tag = langtag ; normal language tags
- * / privateuse ; private use tag
- * / grandfathered ; grandfathered tags
- *
- *
- * langtag = language
- * ["-" script]
- * ["-" region]
- * *("-" variant)
- * *("-" extension)
- * ["-" privateuse]
- *
- * language = 2*3ALPHA ; shortest ISO 639 code
- * ["-" extlang] ; sometimes followed by
- * ; extended language subtags
- * / 4ALPHA ; or reserved for future use
- * / 5*8ALPHA ; or registered language subtag
- *
- * extlang = 3ALPHA ; selected ISO 639 codes
- * *2("-" 3ALPHA) ; permanently reserved
- *
- * script = 4ALPHA ; ISO 15924 code
- *
- * region = 2ALPHA ; ISO 3166-1 code
- * / 3DIGIT ; UN M.49 code
- *
- * variant = 5*8alphanum ; registered variants
- * / (DIGIT 3alphanum)
- *
- * extension = singleton 1*("-" (2*8alphanum))
- *
- * ; Single alphanumerics
- * ; "x" reserved for private use
- * singleton = DIGIT ; 0 - 9
- * / %x41-57 ; A - W
- * / %x59-5A ; Y - Z
- * / %x61-77 ; a - w
- * / %x79-7A ; y - z
- *
- * privateuse = "x" 1*("-" (1*8alphanum))
- *
+ /**
+ * See BCP 47 “Tags for Identifying Languages”:
+ * https://www.rfc-editor.org/info/bcp47 -->
+ * https://www.rfc-editor.org/rfc/rfc5646.html#section-2.1
*/
public static LanguageTag parse(String languageTag, ParseStatus sts) {
if (sts == null) {
@@ -166,8 +126,7 @@
StringTokenIterator itr;
- // Check if the tag is grandfathered
- String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
+ String[] gfmap = LEGACY.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
if (gfmap != null) {
// use preferred mapping
itr = new StringTokenIterator(gfmap[1], SEP);
diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java
index 5081767..b811d50 100644
--- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java
@@ -70,7 +70,8 @@
* Canonicalization additionally performs the following:
* <ul>
* <li>POSIX ids are converted to ICU format IDs</li>
- * <li>'grandfathered' 3066 ids are converted to ICU standard form</li>
+ * <li>Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * are converted to ICU standard form</li>
* <li>'PREEURO' and 'EURO' variants are converted to currency keyword form,
* with the currency
* id appropriate to the country of the locale (for PREEURO) or EUR (for EURO).
@@ -1033,7 +1034,7 @@
/**
* {@icu} Returns the canonical name for the specified locale ID. This is used to
- * convert POSIX and other grandfathered IDs to standard ICU form.
+ * convert POSIX and other legacy IDs to standard ICU form.
* @param localeID the locale id
* @return the canonicalized id
* @stable ICU 3.0
@@ -2666,60 +2667,18 @@
* script to title case, country to upper case, variant to upper case,
* and extensions to lower case.
*
- * <p>This implements the 'Language-Tag' production of BCP47, and
- * so supports grandfathered (regular and irregular) as well as
- * private use language tags. Stand alone private use tags are
- * represented as empty language and extension 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist.
+ * <p>This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
*
- * <p>Grandfathered tags with canonical replacements are as follows:
+ * <p>Stand-alone private use tags are represented as empty language and extension 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
*
- * <table>
- * <tbody align="center">
- * <tr><th>grandfathered tag</th><th> </th><th>modern replacement</th></tr>
- * <tr><td>art-lojban</td><td> </td><td>jbo</td></tr>
- * <tr><td>i-ami</td><td> </td><td>ami</td></tr>
- * <tr><td>i-bnn</td><td> </td><td>bnn</td></tr>
- * <tr><td>i-hak</td><td> </td><td>hak</td></tr>
- * <tr><td>i-klingon</td><td> </td><td>tlh</td></tr>
- * <tr><td>i-lux</td><td> </td><td>lb</td></tr>
- * <tr><td>i-navajo</td><td> </td><td>nv</td></tr>
- * <tr><td>i-pwn</td><td> </td><td>pwn</td></tr>
- * <tr><td>i-tao</td><td> </td><td>tao</td></tr>
- * <tr><td>i-tay</td><td> </td><td>tay</td></tr>
- * <tr><td>i-tsu</td><td> </td><td>tsu</td></tr>
- * <tr><td>no-bok</td><td> </td><td>nb</td></tr>
- * <tr><td>no-nyn</td><td> </td><td>nn</td></tr>
- * <tr><td>sgn-BE-FR</td><td> </td><td>sfb</td></tr>
- * <tr><td>sgn-BE-NL</td><td> </td><td>vgt</td></tr>
- * <tr><td>sgn-CH-DE</td><td> </td><td>sgg</td></tr>
- * <tr><td>zh-guoyu</td><td> </td><td>cmn</td></tr>
- * <tr><td>zh-hakka</td><td> </td><td>hak</td></tr>
- * <tr><td>zh-min-nan</td><td> </td><td>nan</td></tr>
- * <tr><td>zh-xiang</td><td> </td><td>hsn</td></tr>
- * </tbody>
- * </table>
+ * <p>Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
*
- * <p>Grandfathered tags with no modern replacement will be
- * converted as follows:
- *
- * <table>
- * <tbody align="center">
- * <tr><th>grandfathered tag</th><th> </th><th>converts to</th></tr>
- * <tr><td>cel-gaulish</td><td> </td><td>xtg-x-cel-gaulish</td></tr>
- * <tr><td>en-GB-oed</td><td> </td><td>en-GB-x-oed</td></tr>
- * <tr><td>i-default</td><td> </td><td>en-x-i-default</td></tr>
- * <tr><td>i-enochian</td><td> </td><td>und-x-i-enochian</td></tr>
- * <tr><td>i-mingo</td><td> </td><td>see-x-i-mingo</td></tr>
- * <tr><td>zh-min</td><td> </td><td>nan-x-zh-min</td></tr>
- * </tbody>
- * </table>
- *
- * <p>For a list of all grandfathered tags, see the
- * IANA Language Subtag Registry (search for "Type: grandfathered").
- *
- * <p><b>Note</b>: there is no guarantee that <code>toLanguageTag</code>
+ * <p><b>Note</b>: There is no guarantee that <code>toLanguageTag</code>
* and <code>forLanguageTag</code> will round-trip.
*
* @param languageTag the language tag
@@ -2821,7 +2780,7 @@
* Resets the Builder to match the provided IETF BCP 47
* language tag. Discards the existing state. Null and the
* empty string cause the builder to be reset, like {@link
- * #clear}. Grandfathered tags (see {@link
+ * #clear}. Legacy tags (see {@link
* ULocale#forLanguageTag}) are converted to their canonical
* form before being processed. Otherwise, the language tag
* must be well-formed (see {@link ULocale}) or an exception is
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java
index 83da8ce..8ceb8d1 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java
@@ -46,7 +46,7 @@
return calType.toLowerCase(Locale.ROOT);
}
- // Canonicalize, so grandfathered variant will be transformed to keywords
+ // Canonicalize, so that an old-style variant will be transformed to keywords.
ULocale canonical = ULocale.createCanonical(loc.toString());
calType = canonical.getKeywordValue(CALKEY);
if (calType != null) {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
index 14d1a94..53f9879 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
@@ -38,13 +38,13 @@
private List<String> _variants = Collections.emptyList(); // variant subtags
private List<String> _extensions = Collections.emptyList(); // extensions
- // Map contains grandfathered tags and its preferred mappings from
- // http://www.ietf.org/rfc/rfc5646.txt
- private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED =
+ // The Map contains legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ // and their preferred mappings from BCP 47.
+ private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> LEGACY =
new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>();
static {
- // grandfathered = irregular ; non-redundant tags registered
+ // legacy = irregular ; non-redundant tags registered
// / regular ; during the RFC 3066 era
//
// irregular = "en-GB-oed" ; irregular tags do not match
@@ -105,57 +105,17 @@
{"zh-xiang", "hsn"},
};
for (String[] e : entries) {
- GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
+ LEGACY.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
}
}
private LanguageTag() {
}
- /*
- * BNF in RFC5464
- *
- * Language-Tag = langtag ; normal language tags
- * / privateuse ; private use tag
- * / grandfathered ; grandfathered tags
- *
- *
- * langtag = language
- * ["-" script]
- * ["-" region]
- * *("-" variant)
- * *("-" extension)
- * ["-" privateuse]
- *
- * language = 2*3ALPHA ; shortest ISO 639 code
- * ["-" extlang] ; sometimes followed by
- * ; extended language subtags
- * / 4ALPHA ; or reserved for future use
- * / 5*8ALPHA ; or registered language subtag
- *
- * extlang = 3ALPHA ; selected ISO 639 codes
- * *2("-" 3ALPHA) ; permanently reserved
- *
- * script = 4ALPHA ; ISO 15924 code
- *
- * region = 2ALPHA ; ISO 3166-1 code
- * / 3DIGIT ; UN M.49 code
- *
- * variant = 5*8alphanum ; registered variants
- * / (DIGIT 3alphanum)
- *
- * extension = singleton 1*("-" (2*8alphanum))
- *
- * ; Single alphanumerics
- * ; "x" reserved for private use
- * singleton = DIGIT ; 0 - 9
- * / %x41-57 ; A - W
- * / %x59-5A ; Y - Z
- * / %x61-77 ; a - w
- * / %x79-7A ; y - z
- *
- * privateuse = "x" 1*("-" (1*8alphanum))
- *
+ /**
+ * See BCP 47 “Tags for Identifying Languages”:
+ * https://www.rfc-editor.org/info/bcp47 -->
+ * https://www.rfc-editor.org/rfc/rfc5646.html#section-2.1
*/
public static LanguageTag parse(String languageTag, ParseStatus sts) {
if (sts == null) {
@@ -165,14 +125,13 @@
}
StringTokenIterator itr;
- boolean isGrandfathered = false;
+ boolean isLegacy = false;
- // Check if the tag is grandfathered
- String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
+ String[] gfmap = LEGACY.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
// Language tag is at least 2 alpha so we can skip searching the first 2 chars.
int dash = 2;
while (gfmap == null && (dash = languageTag.indexOf('-', dash + 1)) != -1) {
- gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag.substring(0, dash)));
+ gfmap = LEGACY.get(new AsciiUtil.CaseInsensitiveKey(languageTag.substring(0, dash)));
}
if (gfmap != null) {
@@ -183,7 +142,7 @@
// append the rest of the tag.
itr = new StringTokenIterator(gfmap[1] + languageTag.substring(dash), SEP);
}
- isGrandfathered = true;
+ isLegacy = true;
} else {
itr = new StringTokenIterator(languageTag, SEP);
}
@@ -202,8 +161,8 @@
}
tag.parsePrivateuse(itr, sts);
- if (isGrandfathered) {
- // Grandfathered tag is replaced with a well-formed tag above.
+ if (isLegacy) {
+ // A legacy tag is replaced with a well-formed tag above.
// However, the parsed length must be the original tag length.
assert (itr.isDone());
assert (!sts.isError());
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
index ac8746f..8c3a4eb 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
@@ -80,7 +80,8 @@
* Canonicalization additionally performs the following:
* <ul>
* <li>POSIX ids are converted to ICU format IDs</li>
- * <li>'grandfathered' 3066 ids are converted to ICU standard form</li>
+ * <li>Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * are converted to ICU standard form</li>
* </ul>
* All ULocale constructors automatically normalize the locale id. To handle
* POSIX ids, <code>canonicalize</code> can be called to convert the id
@@ -1204,7 +1205,7 @@
/**
* {@icu} Returns the canonical name according to CLDR for the specified locale ID.
- * This is used to convert POSIX and other grandfathered IDs to standard ICU form.
+ * This is used to convert POSIX and other legacy IDs to standard ICU form.
* @param localeID the locale id
* @return the canonicalized id
* @stable ICU 3.0
@@ -1242,7 +1243,7 @@
// element in Supplemental Data, replace the language subtag with the replacement value.
// If there are additional subtags in the replacement value, add them to the result, but
// only if there is no corresponding subtag already in the tag.
- // Five special deprecated grandfathered codes (such as i-default) are in type attributes, and are also replaced.
+ // Five special deprecated codes (such as i-default) are in type attributes, and are also replaced.
try {
UResourceBundle languageAlias = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
"metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER)
@@ -3201,58 +3202,16 @@
*
* </ul>
*
- * <p>This implements the 'Language-Tag' production of BCP47, and
- * so supports grandfathered (regular and irregular) as well as
- * private use language tags. Stand alone private use tags are
- * represented as empty language and extension 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist.
+ * <p>This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
*
- * <p>Grandfathered tags with canonical replacements are as follows:
+ * <p>Stand-alone private use tags are represented as empty language and extension 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
*
- * <table>
- * <tbody align="center">
- * <tr><th>grandfathered tag</th><th> </th><th>modern replacement</th></tr>
- * <tr><td>art-lojban</td><td> </td><td>jbo</td></tr>
- * <tr><td>i-ami</td><td> </td><td>ami</td></tr>
- * <tr><td>i-bnn</td><td> </td><td>bnn</td></tr>
- * <tr><td>i-hak</td><td> </td><td>hak</td></tr>
- * <tr><td>i-klingon</td><td> </td><td>tlh</td></tr>
- * <tr><td>i-lux</td><td> </td><td>lb</td></tr>
- * <tr><td>i-navajo</td><td> </td><td>nv</td></tr>
- * <tr><td>i-pwn</td><td> </td><td>pwn</td></tr>
- * <tr><td>i-tao</td><td> </td><td>tao</td></tr>
- * <tr><td>i-tay</td><td> </td><td>tay</td></tr>
- * <tr><td>i-tsu</td><td> </td><td>tsu</td></tr>
- * <tr><td>no-bok</td><td> </td><td>nb</td></tr>
- * <tr><td>no-nyn</td><td> </td><td>nn</td></tr>
- * <tr><td>sgn-BE-FR</td><td> </td><td>sfb</td></tr>
- * <tr><td>sgn-BE-NL</td><td> </td><td>vgt</td></tr>
- * <tr><td>sgn-CH-DE</td><td> </td><td>sgg</td></tr>
- * <tr><td>zh-guoyu</td><td> </td><td>cmn</td></tr>
- * <tr><td>zh-hakka</td><td> </td><td>hak</td></tr>
- * <tr><td>zh-min-nan</td><td> </td><td>nan</td></tr>
- * <tr><td>zh-xiang</td><td> </td><td>hsn</td></tr>
- * </tbody>
- * </table>
- *
- * <p>Grandfathered tags with no modern replacement will be
- * converted as follows:
- *
- * <table>
- * <tbody align="center">
- * <tr><th>grandfathered tag</th><th> </th><th>converts to</th></tr>
- * <tr><td>cel-gaulish</td><td> </td><td>xtg-x-cel-gaulish</td></tr>
- * <tr><td>en-GB-oed</td><td> </td><td>en-GB-x-oed</td></tr>
- * <tr><td>i-default</td><td> </td><td>en-x-i-default</td></tr>
- * <tr><td>i-enochian</td><td> </td><td>und-x-i-enochian</td></tr>
- * <tr><td>i-mingo</td><td> </td><td>see-x-i-mingo</td></tr>
- * <tr><td>zh-min</td><td> </td><td>nan-x-zh-min</td></tr>
- * </tbody>
- * </table>
- *
- * <p>For a list of all grandfathered tags, see the
- * IANA Language Subtag Registry (search for "Type: grandfathered").
+ * <p>Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
*
* <p><b>Note</b>: there is no guarantee that <code>toLanguageTag</code>
* and <code>forLanguageTag</code> will round-trip.
@@ -3491,7 +3450,7 @@
* Resets the Builder to match the provided IETF BCP 47
* language tag. Discards the existing state. Null and the
* empty string cause the builder to be reset, like {@link
- * #clear}. Grandfathered tags (see {@link
+ * #clear}. Legacy tags (see {@link
* ULocale#forLanguageTag}) are converted to their canonical
* form before being processed. Otherwise, the language tag
* must be well-formed (see {@link ULocale}) or an exception is
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
index 97e2dab..deaf486 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
@@ -365,7 +365,7 @@
}
@Test
- public void testMatchGrandfatheredCode() {
+ public void testMatchLegacyCode() {
final LocaleMatcher matcher = newLocaleMatcher("fr, i_klingon, en_Latn_US");
assertEquals("en_Latn_US", matcher.getBestMatch("en_GB_oed").toString());
// assertEquals("tlh", matcher.getBestMatch("i_klingon").toString());
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
index 7a10986..70afcd0 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
@@ -279,7 +279,7 @@
zh-Hant >> und-TW
zh >> und-TW
-** test: testMatchGrandfatheredCode
+** test: testMatchLegacyCode
@supported=fr, i-klingon, en-Latn-US
en-GB-oed >> en-Latn-US
@@ -984,7 +984,7 @@
x-piglatin >> x-bork
x-bork >> x-bork
-** test: MatchGrandfatheredCode
+** test: MatchLegacyCode
@supported=fr, i-klingon, en-Latn-US
en-GB-oed >> en-Latn-US
i-klingon >> tlh
@@ -1525,7 +1525,7 @@
x-piglatin >> fr
x-bork >> x-bork
-** test: grandfathered codes
+** test: legacy codes
@supported=fr, i-klingon, en-Latn-US
en-GB-oed >> en-Latn-US
i-klingon >> tlh
diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt
index 696750a..26ba85b 100644
--- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt
+++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt
@@ -38,14 +38,15 @@
$privateUse = $x (?: $s $alphanum{1,8} )+ ; # "x" 1*("-" (1*8alphanum))
-# Define certain grandfathered codes, since otherwise the regex is pretty useless.
+# Define certain legacy language tags (marked as “Type: grandfathered” in BCP 47),
+# since otherwise the regex is pretty useless.
# Since these are limited, this is safe even later changes to the registry --
# the only oddity is that it might change the type of the tag, and thus
# the results from the capturing groups.
# http://www.iana.org/assignments/language-subtag-registry
# Note that these have to be compared case insensitively, requiring (?i) below.
-$grandfathered = en $s GB $s oed
+$legacy = en $s GB $s oed
| i $s (?: ami | bnn | default | enochian | hak | klingon | lux | mingo | navajo | pwn | tao | tay | tsu )
| no $s (?: bok | nyn )
| sgn $s (?: BE $s (?: fr | nl) | CH $s de )
@@ -55,7 +56,7 @@
# For well-formedness, we don't need the ones that would otherwise pass.
# For validity, they need to be checked.
-# $grandfatheredWellFormed = (?:
+# $legacyWellFormed = (?:
# art $s lojban
# | cel $s gaulish
# | zh $s (?: guoyu | hakka | xiang )
@@ -78,12 +79,12 @@
(?: $s ( $privateUse ) )? 5%);
# Here is the final breakdown, with capturing groups for each of these components
-# The variants, extensions, grandfathered, and private-use may have interior '-'
+# The variants, extensions, legacy, and private-use may have interior '-'
$root = (?i) # case-insensitive
(?:
$langtag 90%
| ( $privateUse ) 5%
- | ( $grandfathered ) 5%)
+ | ( $legacy ) 5%)
# (?: \@ $keywords )? 5%
;
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java
index ddc6478..57c6df8 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java
@@ -527,7 +527,8 @@
// ...
// Remove the script code 'Zzzz' and the region code 'ZZ' if they occur.
//
- // Note that this implementation does not need to handle "grandfathered" tags.
+ // Note that this implementation does not need to handle
+ // legacy language tags (marked as “Type: grandfathered” in BCP 47).
private Optional<LocaleId> addLikelySubtags(String localeId) {
if (localeId.equals("root")) {
return Optional.empty();