ICU-21184 rephrase docs/comments using the term grandfathered

commit: 39da689d30512148dc47d2169910d6b7c1c09cd5 [log] [tgz]
author: Markus Scherer <markus.icu@gmail.com> Tue Aug 18 15:05:22 2020 -0700
committer: Markus Scherer <markus.icu@gmail.com> Fri Aug 21 14:13:03 2020 -0700
tree: ac1271cc89cb78c6fb006108e21f1825a60cff73
parent: cde54fc5ba1581061cc31c158967ab6b074df3ab [diff]
diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp
index 19b61cd..a2c38a9 100644
--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp

@@ -1025,13 +1025,14 @@
         return result;
     }
 
-    // If a BCP-47 language tag is passed as the language parameter to the
+    // If a BCP 47 language tag is passed as the language parameter to the
     // normal Locale constructor, it will actually fall back to invoking
     // uloc_forLanguageTag() to parse it if it somehow is able to detect that
-    // the string actually is BCP-47. This works well for things like strings
-    // using BCP-47 extensions, but it does not at all work for things like
-    // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
-    // interpret as ICU locale IDs and because of that won't trigger the BCP-47
+    // the string actually is BCP 47. This works well for things like strings
+    // using BCP 47 extensions, but it does not at all work for things like
+    // legacy language tags (marked as “Type: grandfathered” in BCP 47,
+    // e.g., "en-GB-oed") which are possible to also
+    // interpret as ICU locale IDs and because of that won't trigger the BCP 47
     // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
     // and then Locale::init(), instead of just calling the normal constructor.
 

diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
index ad5dd64..5eed02c 100644
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp

@@ -53,7 +53,7 @@
     VariantListEntry    *variants;
     ExtensionListEntry  *extensions;
     const char          *privateuse;
-    const char          *grandfathered;
+    const char          *legacy;
 } ULanguageTag;
 
 #define MINLEN 2
@@ -85,8 +85,9 @@
  Updated on 2018-09-12 from
  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
 
- This table has 2 parts. The parts for Grandfathered tags is generated by the
- following scripts from the IANA language tag registry.
+ This table has 2 parts. The part for
+ legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ is generated by the following scripts from the IANA language tag registry.
 
  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
  egrep -A 7 'Type: grandfathered' | \
@@ -100,8 +101,8 @@
  values. They may have to be removed for the strict BCP 47 compliance.
 
 */
-static const char* const GRANDFATHERED[] = {
-/*  grandfathered   preferred */
+static const char* const LEGACY[] = {
+/*  legacy          preferred */
     "art-lojban",   "jbo",
     "en-gb-oed",    "en-gb-oxendict",
     "i-ami",        "ami",
@@ -124,7 +125,7 @@
     "zh-min-nan",   "nan",
     "zh-xiang",     "hsn",
 
-    // Grandfathered tags with no preferred value in the IANA
+    // Legacy tags with no preferred value in the IANA
     // registry. Kept for now for the backward compatibility
     // because ICU has mapped them this way.
     "cel-gaulish",  "xtg-x-cel-gaulish",
@@ -346,7 +347,7 @@
 
 #if 0
 static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag);
+ultag_getLegacy(const ULanguageTag* langtag);
 #endif
 
 U_NAMESPACE_BEGIN
@@ -986,7 +987,7 @@
     langtag->variants = NULL;
     langtag->extensions = NULL;
 
-    langtag->grandfathered = EMPTY;
+    langtag->legacy = EMPTY;
     langtag->privateuse = EMPTY;
 }
 
@@ -2042,7 +2043,7 @@
     char *pExtValueSubtag, *pExtValueSubtagEnd;
     int32_t i;
     UBool privateuseVar = FALSE;
-    int32_t grandfatheredLen = 0;
+    int32_t legacyLen = 0;
 
     if (parsedLen != NULL) {
         *parsedLen = 0;
@@ -2082,25 +2083,25 @@
     }
 
     size_t parsedLenDelta = 0;
-    // Grandfathered tag will be consider together. Grandfathered tag with intervening
+    // Legacy tag will be consider together. Legacy tag with intervening
     // script and region such as art-DE-lojban or art-Latn-lojban won't be
     // matched.
-    /* check if the tag is grandfathered */
-    for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
-        int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
-        if (tagLen < checkGrandfatheredLen) {
+    /* check if the tag is legacy */
+    for (i = 0; i < UPRV_LENGTHOF(LEGACY); i += 2) {
+        int32_t checkLegacyLen = static_cast<int32_t>(uprv_strlen(LEGACY[i]));
+        if (tagLen < checkLegacyLen) {
             continue;
         }
-        if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
+        if (tagLen > checkLegacyLen && tagBuf[checkLegacyLen] != '-') {
             // make sure next char is '-'.
             continue;
         }
-        if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
+        if (uprv_strnicmp(LEGACY[i], tagBuf, checkLegacyLen) == 0) {
             int32_t newTagLength;
 
-            grandfatheredLen = checkGrandfatheredLen;  /* back up for output parsedLen */
-            int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
-            newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
+            legacyLen = checkLegacyLen;  /* back up for output parsedLen */
+            int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1]));
+            newTagLength = replacementLen + tagLen - checkLegacyLen;
             if (tagLen < newTagLength) {
                 uprv_free(tagBuf);
                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
@@ -2111,16 +2112,16 @@
                 t->buf = tagBuf;
                 tagLen = newTagLength;
             }
-            parsedLenDelta = checkGrandfatheredLen - replacementLen;
-            uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
-            if (checkGrandfatheredLen != tagLen) {
-                uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
+            parsedLenDelta = checkLegacyLen - replacementLen;
+            uprv_strcpy(t->buf, LEGACY[i + 1]);
+            if (checkLegacyLen != tagLen) {
+                uprv_strcpy(t->buf + replacementLen, tag + checkLegacyLen);
             }
             break;
         }
     }
 
-    if (grandfatheredLen == 0) {
+    if (legacyLen == 0) {
         for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
             const char* redundantTag = REDUNDANT[i];
             size_t redundantTagLen = uprv_strlen(redundantTag);
@@ -2608,8 +2609,8 @@
 
 #if 0
 static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag) {
-    return langtag->grandfathered;
+ultag_getLegacy(const ULanguageTag* langtag) {
+    return langtag->legacy;
 }
 #endif
 

diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h
index b4d64c5..cd5a357 100644
--- a/icu4c/source/common/ulocimp.h
+++ b/icu4c/source/common/ulocimp.h

@@ -109,13 +109,17 @@
  * If the specified language tag contains any ill-formed subtags,
  * the first such subtag and all following subtags are ignored.
  * <p>
- * This implements the 'Language-Tag' production of BCP47, and so
- * supports grandfathered (regular and irregular) as well as private
- * use language tags.  Private use tags are represented as 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist.  Note that a few grandfathered tags have no modern
- * replacement, these will be converted using the fallback described in
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
  * the first paragraph, so some information might be lost.
+ *
  * @param langtag   the input BCP47 language tag.
  * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
  * @param sink      the output sink receiving a locale ID for the

diff --git a/icu4c/source/common/unicode/localebuilder.h b/icu4c/source/common/unicode/localebuilder.h
index c5836fe..664ee6a 100644
--- a/icu4c/source/common/unicode/localebuilder.h
+++ b/icu4c/source/common/unicode/localebuilder.h

@@ -92,11 +92,12 @@
     /**
      * Resets the LocaleBuilder to match the provided
      * [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) .
-     * Discards the existing state. the empty string cause the builder to be
-     * reset, like {@link #clear}.  Grandfathered tags are converted to their
-     * canonical form before being processed.  Otherwise, the <code>language
-     * tag</code> must be well-formed, or else the build() method will later
-     * report an U_ILLEGAL_ARGUMENT_ERROR.
+     * Discards the existing state.
+     * The empty string causes the builder to be reset, like {@link #clear}.
+     * Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+     * are converted to their canonical form before being processed.
+     * Otherwise, the <code>language tag</code> must be well-formed,
+     * or else the build() method will later report an U_ILLEGAL_ARGUMENT_ERROR.
      *
      * <p>This method clears the internal UErrorCode.
      *

diff --git a/icu4c/source/common/unicode/locid.h b/icu4c/source/common/unicode/locid.h
index 1d031da..f955743 100644
--- a/icu4c/source/common/unicode/locid.h
+++ b/icu4c/source/common/unicode/locid.h

@@ -393,13 +393,17 @@
      * If the specified language tag contains any ill-formed subtags,
      * the first such subtag and all following subtags are ignored.
      * <p>
-     * This implements the 'Language-Tag' production of BCP47, and so
-     * supports grandfathered (regular and irregular) as well as private
-     * use language tags.  Private use tags are represented as 'x-whatever',
-     * and grandfathered tags are converted to their canonical replacements
-     * where they exist.  Note that a few grandfathered tags have no modern
-     * replacement, these will be converted using the fallback described in
+     * This implements the 'Language-Tag' production of BCP 47, and so
+     * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+     * (regular and irregular) as well as private use language tags.
+     *
+     * Private use tags are represented as 'x-whatever',
+     * and legacy tags are converted to their canonical replacements where they exist.
+     *
+     * Note that a few legacy tags have no modern replacement;
+     * these will be converted using the fallback described in
      * the first paragraph, so some information might be lost.
+     *
      * @param tag     the input BCP47 language tag.
      * @param status  error information if creating the Locale failed.
      * @return        the Locale for the specified BCP47 language tag.

diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h
index d3de89f..fa38092 100644
--- a/icu4c/source/common/unicode/uloc.h
+++ b/icu4c/source/common/unicode/uloc.h

@@ -1237,14 +1237,18 @@
  * Returns a locale ID for the specified BCP47 language tag string.
  * If the specified language tag contains any ill-formed subtags,
  * the first such subtag and all following subtags are ignored.
- * <p> 
- * This implements the 'Language-Tag' production of BCP47, and so
- * supports grandfathered (regular and irregular) as well as private
- * use language tags.  Private use tags are represented as 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist.  Note that a few grandfathered tags have no modern
- * replacement, these will be converted using the fallback described in
+ * <p>
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
  * the first paragraph, so some information might be lost.
+ *
  * @param langtag   the input BCP47 language tag.
  * @param localeID  the output buffer receiving a locale ID for the
  *                  specified BCP47 language tag.

diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp
index 9497a85..c3e5e8c 100644
--- a/icu4c/source/i18n/calendar.cpp
+++ b/icu4c/source/i18n/calendar.cpp

@@ -266,7 +266,7 @@
     //TODO: ULOC_FULL_NAME is out of date and too small..
     char canonicalName[256];
 
-    // canonicalize, so grandfathered variant will be transformed to keywords
+    // Canonicalize, so that an old-style variant will be transformed to keywords.
     // e.g ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
     // NOTE: Since ICU-20187, ja_JP_TRADITIONAL no longer canonicalizes, and
     // the Gregorian calendar is returned instead.

diff --git a/icu4c/source/i18n/fmtable_cnv.cpp b/icu4c/source/i18n/fmtable_cnv.cpp
index 9a64792..bc3847b 100644
--- a/icu4c/source/i18n/fmtable_cnv.cpp
+++ b/icu4c/source/i18n/fmtable_cnv.cpp

@@ -30,8 +30,6 @@
 // -------------------------------------
 // Creates a formattable object with a char* string.
 // This API is useless. The API that takes a UnicodeString is actually just as good.
-// This is just a grandfathered API.
-
 Formattable::Formattable(const char* stringToCopy)
 {
     init();

diff --git a/icu4c/source/test/testdata/localeMatcherTest.txt b/icu4c/source/test/testdata/localeMatcherTest.txt
index 7a10986..70afcd0 100644
--- a/icu4c/source/test/testdata/localeMatcherTest.txt
+++ b/icu4c/source/test/testdata/localeMatcherTest.txt

@@ -279,7 +279,7 @@
 zh-Hant >> und-TW
 zh >> und-TW
 
-** test: testMatchGrandfatheredCode
+** test: testMatchLegacyCode
 
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
@@ -984,7 +984,7 @@
 x-piglatin >> x-bork
 x-bork >> x-bork
 
-** test: MatchGrandfatheredCode
+** test: MatchLegacyCode
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
 i-klingon >> tlh
@@ -1525,7 +1525,7 @@
 x-piglatin >> fr
 x-bork >> x-bork
 
-** test: grandfathered codes
+** test: legacy codes
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
 i-klingon >> tlh

diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java
index c5cf38e..3adf08a 100644
--- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java
+++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java

@@ -38,13 +38,13 @@
     private List<String> _variants = Collections.emptyList();   // variant subtags
     private List<String> _extensions = Collections.emptyList(); // extensions
 
-    // Map contains grandfathered tags and its preferred mappings from
-    // http://www.ietf.org/rfc/rfc5646.txt
-    private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED =
+    // The Map contains legacy language tags (marked as “Type: grandfathered” in BCP 47)
+    // and their preferred mappings from BCP 47.
+    private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> LEGACY =
         new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>();
 
     static {
-        // grandfathered = irregular           ; non-redundant tags registered
+        // legacy        = irregular           ; non-redundant tags registered
         //               / regular             ; during the RFC 3066 era
         //
         // irregular     = "en-GB-oed"         ; irregular tags do not match
@@ -105,57 +105,17 @@
             {"zh-xiang",    "hsn"},
         };
         for (String[] e : entries) {
-            GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
+            LEGACY.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
         }
     }
 
     private LanguageTag() {
     }
 
-    /*
-     * BNF in RFC5464
-     *  
-     * Language-Tag  = langtag             ; normal language tags
-     *               / privateuse          ; private use tag
-     *               / grandfathered       ; grandfathered tags
-     *
-     * 
-     * langtag       = language
-     *                 ["-" script]
-     *                 ["-" region]
-     *                 *("-" variant)
-     *                 *("-" extension)
-     *                 ["-" privateuse]
-     * 
-     * language      = 2*3ALPHA            ; shortest ISO 639 code
-     *                 ["-" extlang]       ; sometimes followed by
-     *                                     ; extended language subtags
-     *               / 4ALPHA              ; or reserved for future use
-     *               / 5*8ALPHA            ; or registered language subtag
-     * 
-     * extlang       = 3ALPHA              ; selected ISO 639 codes
-     *                 *2("-" 3ALPHA)      ; permanently reserved
-     * 
-     * script        = 4ALPHA              ; ISO 15924 code
-     * 
-     * region        = 2ALPHA              ; ISO 3166-1 code
-     *               / 3DIGIT              ; UN M.49 code
-     * 
-     * variant       = 5*8alphanum         ; registered variants
-     *               / (DIGIT 3alphanum)
-     * 
-     * extension     = singleton 1*("-" (2*8alphanum))
-     * 
-     *                                     ; Single alphanumerics
-     *                                     ; "x" reserved for private use
-     * singleton     = DIGIT               ; 0 - 9
-     *               / %x41-57             ; A - W
-     *               / %x59-5A             ; Y - Z
-     *               / %x61-77             ; a - w
-     *               / %x79-7A             ; y - z
-     * 
-     * privateuse    = "x" 1*("-" (1*8alphanum))
-     * 
+    /**
+     * See BCP 47 “Tags for Identifying Languages”:
+     * https://www.rfc-editor.org/info/bcp47 -->
+     * https://www.rfc-editor.org/rfc/rfc5646.html#section-2.1
      */
     public static LanguageTag parse(String languageTag, ParseStatus sts) {
         if (sts == null) {
@@ -166,8 +126,7 @@
 
         StringTokenIterator itr;
 
-        // Check if the tag is grandfathered
-        String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
+        String[] gfmap = LEGACY.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
         if (gfmap != null) {
             // use preferred mapping
             itr = new StringTokenIterator(gfmap[1], SEP);

diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java
index 5081767..b811d50 100644
--- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java

@@ -70,7 +70,8 @@
  * Canonicalization additionally performs the following:
  * <ul>
  * <li>POSIX ids are converted to ICU format IDs</li>
- * <li>'grandfathered' 3066 ids are converted to ICU standard form</li>
+ * <li>Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * are converted to ICU standard form</li>
  * <li>'PREEURO' and 'EURO' variants are converted to currency keyword form,
  * with the currency
  * id appropriate to the country of the locale (for PREEURO) or EUR (for EURO).
@@ -1033,7 +1034,7 @@
 
     /**
      * {@icu} Returns the canonical name for the specified locale ID.  This is used to
-     * convert POSIX and other grandfathered IDs to standard ICU form.
+     * convert POSIX and other legacy IDs to standard ICU form.
      * @param localeID the locale id
      * @return the canonicalized id
      * @stable ICU 3.0
@@ -2666,60 +2667,18 @@
      * script to title case, country to upper case, variant to upper case,
      * and extensions to lower case.
      *
-     * <p>This implements the 'Language-Tag' production of BCP47, and
-     * so supports grandfathered (regular and irregular) as well as
-     * private use language tags.  Stand alone private use tags are
-     * represented as empty language and extension 'x-whatever',
-     * and grandfathered tags are converted to their canonical replacements
-     * where they exist.  
+     * <p>This implements the 'Language-Tag' production of BCP 47, and so
+     * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+     * (regular and irregular) as well as private use language tags.
      *
-     * <p>Grandfathered tags with canonical replacements are as follows:
+     * <p>Stand-alone private use tags are represented as empty language and extension 'x-whatever',
+     * and legacy tags are converted to their canonical replacements where they exist.
      *
-     * <table>
-     * <tbody align="center">
-     * <tr><th>grandfathered tag</th><th>&nbsp;</th><th>modern replacement</th></tr>
-     * <tr><td>art-lojban</td><td>&nbsp;</td><td>jbo</td></tr>
-     * <tr><td>i-ami</td><td>&nbsp;</td><td>ami</td></tr>
-     * <tr><td>i-bnn</td><td>&nbsp;</td><td>bnn</td></tr>
-     * <tr><td>i-hak</td><td>&nbsp;</td><td>hak</td></tr>
-     * <tr><td>i-klingon</td><td>&nbsp;</td><td>tlh</td></tr>
-     * <tr><td>i-lux</td><td>&nbsp;</td><td>lb</td></tr>
-     * <tr><td>i-navajo</td><td>&nbsp;</td><td>nv</td></tr>
-     * <tr><td>i-pwn</td><td>&nbsp;</td><td>pwn</td></tr>
-     * <tr><td>i-tao</td><td>&nbsp;</td><td>tao</td></tr>
-     * <tr><td>i-tay</td><td>&nbsp;</td><td>tay</td></tr>
-     * <tr><td>i-tsu</td><td>&nbsp;</td><td>tsu</td></tr>
-     * <tr><td>no-bok</td><td>&nbsp;</td><td>nb</td></tr>
-     * <tr><td>no-nyn</td><td>&nbsp;</td><td>nn</td></tr>
-     * <tr><td>sgn-BE-FR</td><td>&nbsp;</td><td>sfb</td></tr>
-     * <tr><td>sgn-BE-NL</td><td>&nbsp;</td><td>vgt</td></tr>
-     * <tr><td>sgn-CH-DE</td><td>&nbsp;</td><td>sgg</td></tr>
-     * <tr><td>zh-guoyu</td><td>&nbsp;</td><td>cmn</td></tr>
-     * <tr><td>zh-hakka</td><td>&nbsp;</td><td>hak</td></tr>
-     * <tr><td>zh-min-nan</td><td>&nbsp;</td><td>nan</td></tr>
-     * <tr><td>zh-xiang</td><td>&nbsp;</td><td>hsn</td></tr>
-     * </tbody>
-     * </table>
+     * <p>Note that a few legacy tags have no modern replacement;
+     * these will be converted using the fallback described in
+     * the first paragraph, so some information might be lost.
      *
-     * <p>Grandfathered tags with no modern replacement will be
-     * converted as follows:
-     *
-     * <table>
-     * <tbody align="center">
-     * <tr><th>grandfathered tag</th><th>&nbsp;</th><th>converts to</th></tr>
-     * <tr><td>cel-gaulish</td><td>&nbsp;</td><td>xtg-x-cel-gaulish</td></tr>
-     * <tr><td>en-GB-oed</td><td>&nbsp;</td><td>en-GB-x-oed</td></tr>
-     * <tr><td>i-default</td><td>&nbsp;</td><td>en-x-i-default</td></tr>
-     * <tr><td>i-enochian</td><td>&nbsp;</td><td>und-x-i-enochian</td></tr>
-     * <tr><td>i-mingo</td><td>&nbsp;</td><td>see-x-i-mingo</td></tr>
-     * <tr><td>zh-min</td><td>&nbsp;</td><td>nan-x-zh-min</td></tr>
-     * </tbody>
-     * </table>
-     *
-     * <p>For a list of all grandfathered tags, see the
-     * IANA Language Subtag Registry (search for "Type: grandfathered").
-     *
-     * <p><b>Note</b>: there is no guarantee that <code>toLanguageTag</code>
+     * <p><b>Note</b>: There is no guarantee that <code>toLanguageTag</code>
      * and <code>forLanguageTag</code> will round-trip.
      *
      * @param languageTag the language tag
@@ -2821,7 +2780,7 @@
          * Resets the Builder to match the provided IETF BCP 47
          * language tag.  Discards the existing state.  Null and the
          * empty string cause the builder to be reset, like {@link
-         * #clear}.  Grandfathered tags (see {@link
+         * #clear}.  Legacy tags (see {@link
          * ULocale#forLanguageTag}) are converted to their canonical
          * form before being processed.  Otherwise, the language tag
          * must be well-formed (see {@link ULocale}) or an exception is

diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java
index 83da8ce..8ceb8d1 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java

@@ -46,7 +46,7 @@
             return calType.toLowerCase(Locale.ROOT);
         }
 
-        // Canonicalize, so grandfathered variant will be transformed to keywords
+        // Canonicalize, so that an old-style variant will be transformed to keywords.
         ULocale canonical = ULocale.createCanonical(loc.toString());
         calType = canonical.getKeywordValue(CALKEY);
         if (calType != null) {

diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
index 14d1a94..53f9879 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java

@@ -38,13 +38,13 @@
     private List<String> _variants = Collections.emptyList();   // variant subtags
     private List<String> _extensions = Collections.emptyList(); // extensions
 
-    // Map contains grandfathered tags and its preferred mappings from
-    // http://www.ietf.org/rfc/rfc5646.txt
-    private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED =
+    // The Map contains legacy language tags (marked as “Type: grandfathered” in BCP 47)
+    // and their preferred mappings from BCP 47.
+    private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> LEGACY =
         new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>();
 
     static {
-        // grandfathered = irregular           ; non-redundant tags registered
+        // legacy        = irregular           ; non-redundant tags registered
         //               / regular             ; during the RFC 3066 era
         //
         // irregular     = "en-GB-oed"         ; irregular tags do not match
@@ -105,57 +105,17 @@
             {"zh-xiang",    "hsn"},
         };
         for (String[] e : entries) {
-            GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
+            LEGACY.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
         }
     }
 
     private LanguageTag() {
     }
 
-    /*
-     * BNF in RFC5464
-     *
-     * Language-Tag  = langtag             ; normal language tags
-     *               / privateuse          ; private use tag
-     *               / grandfathered       ; grandfathered tags
-     *
-     *
-     * langtag       = language
-     *                 ["-" script]
-     *                 ["-" region]
-     *                 *("-" variant)
-     *                 *("-" extension)
-     *                 ["-" privateuse]
-     *
-     * language      = 2*3ALPHA            ; shortest ISO 639 code
-     *                 ["-" extlang]       ; sometimes followed by
-     *                                     ; extended language subtags
-     *               / 4ALPHA              ; or reserved for future use
-     *               / 5*8ALPHA            ; or registered language subtag
-     *
-     * extlang       = 3ALPHA              ; selected ISO 639 codes
-     *                 *2("-" 3ALPHA)      ; permanently reserved
-     *
-     * script        = 4ALPHA              ; ISO 15924 code
-     *
-     * region        = 2ALPHA              ; ISO 3166-1 code
-     *               / 3DIGIT              ; UN M.49 code
-     *
-     * variant       = 5*8alphanum         ; registered variants
-     *               / (DIGIT 3alphanum)
-     *
-     * extension     = singleton 1*("-" (2*8alphanum))
-     *
-     *                                     ; Single alphanumerics
-     *                                     ; "x" reserved for private use
-     * singleton     = DIGIT               ; 0 - 9
-     *               / %x41-57             ; A - W
-     *               / %x59-5A             ; Y - Z
-     *               / %x61-77             ; a - w
-     *               / %x79-7A             ; y - z
-     *
-     * privateuse    = "x" 1*("-" (1*8alphanum))
-     *
+    /**
+     * See BCP 47 “Tags for Identifying Languages”:
+     * https://www.rfc-editor.org/info/bcp47 -->
+     * https://www.rfc-editor.org/rfc/rfc5646.html#section-2.1
      */
     public static LanguageTag parse(String languageTag, ParseStatus sts) {
         if (sts == null) {
@@ -165,14 +125,13 @@
         }
 
         StringTokenIterator itr;
-        boolean isGrandfathered = false;
+        boolean isLegacy = false;
 
-        // Check if the tag is grandfathered
-        String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
+        String[] gfmap = LEGACY.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
         // Language tag is at least 2 alpha so we can skip searching the first 2 chars.
         int dash = 2;
         while (gfmap == null && (dash = languageTag.indexOf('-', dash + 1)) != -1) {
-            gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag.substring(0, dash)));
+            gfmap = LEGACY.get(new AsciiUtil.CaseInsensitiveKey(languageTag.substring(0, dash)));
         }
 
         if (gfmap != null) {
@@ -183,7 +142,7 @@
                 // append the rest of the tag.
                 itr = new StringTokenIterator(gfmap[1] + languageTag.substring(dash), SEP);
             }
-            isGrandfathered = true;
+            isLegacy = true;
         } else {
             itr = new StringTokenIterator(languageTag, SEP);
         }
@@ -202,8 +161,8 @@
         }
         tag.parsePrivateuse(itr, sts);
 
-        if (isGrandfathered) {
-            // Grandfathered tag is replaced with a well-formed tag above.
+        if (isLegacy) {
+            // A legacy tag is replaced with a well-formed tag above.
             // However, the parsed length must be the original tag length.
             assert (itr.isDone());
             assert (!sts.isError());

diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
index ac8746f..8c3a4eb 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java

@@ -80,7 +80,8 @@
  * Canonicalization additionally performs the following:
  * <ul>
  * <li>POSIX ids are converted to ICU format IDs</li>
- * <li>'grandfathered' 3066 ids are converted to ICU standard form</li>
+ * <li>Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * are converted to ICU standard form</li>
  * </ul>
  * All ULocale constructors automatically normalize the locale id.  To handle
  * POSIX ids, <code>canonicalize</code> can be called to convert the id
@@ -1204,7 +1205,7 @@
 
     /**
      * {@icu} Returns the canonical name according to CLDR for the specified locale ID.
-     * This is used to convert POSIX and other grandfathered IDs to standard ICU form.
+     * This is used to convert POSIX and other legacy IDs to standard ICU form.
      * @param localeID the locale id
      * @return the canonicalized id
      * @stable ICU 3.0
@@ -1242,7 +1243,7 @@
         // element in Supplemental Data, replace the language subtag with the replacement value.
         // If there are additional subtags in the replacement value, add them to the result, but
         // only if there is no corresponding subtag already in the tag.
-        // Five special deprecated grandfathered codes (such as i-default) are in type attributes, and are also replaced.
+        // Five special deprecated codes (such as i-default) are in type attributes, and are also replaced.
         try {
             UResourceBundle languageAlias = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
                 "metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER)
@@ -3201,58 +3202,16 @@
      *
      * </ul>
      *
-     * <p>This implements the 'Language-Tag' production of BCP47, and
-     * so supports grandfathered (regular and irregular) as well as
-     * private use language tags.  Stand alone private use tags are
-     * represented as empty language and extension 'x-whatever',
-     * and grandfathered tags are converted to their canonical replacements
-     * where they exist.
+     * <p>This implements the 'Language-Tag' production of BCP 47, and so
+     * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+     * (regular and irregular) as well as private use language tags.
      *
-     * <p>Grandfathered tags with canonical replacements are as follows:
+     * <p>Stand-alone private use tags are represented as empty language and extension 'x-whatever',
+     * and legacy tags are converted to their canonical replacements where they exist.
      *
-     * <table>
-     * <tbody align="center">
-     * <tr><th>grandfathered tag</th><th>&nbsp;</th><th>modern replacement</th></tr>
-     * <tr><td>art-lojban</td><td>&nbsp;</td><td>jbo</td></tr>
-     * <tr><td>i-ami</td><td>&nbsp;</td><td>ami</td></tr>
-     * <tr><td>i-bnn</td><td>&nbsp;</td><td>bnn</td></tr>
-     * <tr><td>i-hak</td><td>&nbsp;</td><td>hak</td></tr>
-     * <tr><td>i-klingon</td><td>&nbsp;</td><td>tlh</td></tr>
-     * <tr><td>i-lux</td><td>&nbsp;</td><td>lb</td></tr>
-     * <tr><td>i-navajo</td><td>&nbsp;</td><td>nv</td></tr>
-     * <tr><td>i-pwn</td><td>&nbsp;</td><td>pwn</td></tr>
-     * <tr><td>i-tao</td><td>&nbsp;</td><td>tao</td></tr>
-     * <tr><td>i-tay</td><td>&nbsp;</td><td>tay</td></tr>
-     * <tr><td>i-tsu</td><td>&nbsp;</td><td>tsu</td></tr>
-     * <tr><td>no-bok</td><td>&nbsp;</td><td>nb</td></tr>
-     * <tr><td>no-nyn</td><td>&nbsp;</td><td>nn</td></tr>
-     * <tr><td>sgn-BE-FR</td><td>&nbsp;</td><td>sfb</td></tr>
-     * <tr><td>sgn-BE-NL</td><td>&nbsp;</td><td>vgt</td></tr>
-     * <tr><td>sgn-CH-DE</td><td>&nbsp;</td><td>sgg</td></tr>
-     * <tr><td>zh-guoyu</td><td>&nbsp;</td><td>cmn</td></tr>
-     * <tr><td>zh-hakka</td><td>&nbsp;</td><td>hak</td></tr>
-     * <tr><td>zh-min-nan</td><td>&nbsp;</td><td>nan</td></tr>
-     * <tr><td>zh-xiang</td><td>&nbsp;</td><td>hsn</td></tr>
-     * </tbody>
-     * </table>
-     *
-     * <p>Grandfathered tags with no modern replacement will be
-     * converted as follows:
-     *
-     * <table>
-     * <tbody align="center">
-     * <tr><th>grandfathered tag</th><th>&nbsp;</th><th>converts to</th></tr>
-     * <tr><td>cel-gaulish</td><td>&nbsp;</td><td>xtg-x-cel-gaulish</td></tr>
-     * <tr><td>en-GB-oed</td><td>&nbsp;</td><td>en-GB-x-oed</td></tr>
-     * <tr><td>i-default</td><td>&nbsp;</td><td>en-x-i-default</td></tr>
-     * <tr><td>i-enochian</td><td>&nbsp;</td><td>und-x-i-enochian</td></tr>
-     * <tr><td>i-mingo</td><td>&nbsp;</td><td>see-x-i-mingo</td></tr>
-     * <tr><td>zh-min</td><td>&nbsp;</td><td>nan-x-zh-min</td></tr>
-     * </tbody>
-     * </table>
-     *
-     * <p>For a list of all grandfathered tags, see the
-     * IANA Language Subtag Registry (search for "Type: grandfathered").
+     * <p>Note that a few legacy tags have no modern replacement;
+     * these will be converted using the fallback described in
+     * the first paragraph, so some information might be lost.
      *
      * <p><b>Note</b>: there is no guarantee that <code>toLanguageTag</code>
      * and <code>forLanguageTag</code> will round-trip.
@@ -3491,7 +3450,7 @@
          * Resets the Builder to match the provided IETF BCP 47
          * language tag.  Discards the existing state.  Null and the
          * empty string cause the builder to be reset, like {@link
-         * #clear}.  Grandfathered tags (see {@link
+         * #clear}.  Legacy tags (see {@link
          * ULocale#forLanguageTag}) are converted to their canonical
          * form before being processed.  Otherwise, the language tag
          * must be well-formed (see {@link ULocale}) or an exception is

diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
index 97e2dab..deaf486 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java

@@ -365,7 +365,7 @@
     }
 
     @Test
-    public void testMatchGrandfatheredCode() {
+    public void testMatchLegacyCode() {
         final LocaleMatcher matcher = newLocaleMatcher("fr, i_klingon, en_Latn_US");
         assertEquals("en_Latn_US", matcher.getBestMatch("en_GB_oed").toString());
         // assertEquals("tlh", matcher.getBestMatch("i_klingon").toString());

diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
index 7a10986..70afcd0 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt

@@ -279,7 +279,7 @@
 zh-Hant >> und-TW
 zh >> und-TW
 
-** test: testMatchGrandfatheredCode
+** test: testMatchLegacyCode
 
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
@@ -984,7 +984,7 @@
 x-piglatin >> x-bork
 x-bork >> x-bork
 
-** test: MatchGrandfatheredCode
+** test: MatchLegacyCode
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
 i-klingon >> tlh
@@ -1525,7 +1525,7 @@
 x-piglatin >> fr
 x-bork >> x-bork
 
-** test: grandfathered codes
+** test: legacy codes
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
 i-klingon >> tlh

diff --git a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt
index 696750a..26ba85b 100644
--- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt
+++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt

@@ -38,14 +38,15 @@
 
 $privateUse = $x (?: $s $alphanum{1,8} )+ ; # "x" 1*("-" (1*8alphanum))
 
-# Define certain grandfathered codes, since otherwise the regex is pretty useless.
+# Define certain legacy language tags (marked as “Type: grandfathered” in BCP 47),
+# since otherwise the regex is pretty useless.
 # Since these are limited, this is safe even later changes to the registry --
 # the only oddity is that it might change the type of the tag, and thus
 # the results from the capturing groups.
 # http://www.iana.org/assignments/language-subtag-registry
 # Note that these have to be compared case insensitively, requiring (?i) below.
 
-$grandfathered  = en $s GB $s oed
+$legacy  = en $s GB $s oed
       | i $s (?: ami | bnn | default | enochian | hak | klingon | lux | mingo | navajo | pwn | tao | tay | tsu )
       | no $s (?: bok | nyn )
       | sgn $s (?: BE $s (?: fr | nl) | CH $s de )
@@ -55,7 +56,7 @@
 # For well-formedness, we don't need the ones that would otherwise pass.
 # For validity, they need to be checked.
 
-# $grandfatheredWellFormed = (?:
+# $legacyWellFormed = (?:
 #         art $s lojban
 #     | cel $s gaulish
 #     | zh $s (?: guoyu | hakka | xiang )
@@ -78,12 +79,12 @@
       (?: $s ( $privateUse ) )? 5%);
 
 # Here is the final breakdown, with capturing groups for each of these components
-# The variants, extensions, grandfathered, and private-use may have interior '-'
+# The variants, extensions, legacy, and private-use may have interior '-'
  
 $root = (?i) # case-insensitive
   (?:
       $langtag 90%
     | ( $privateUse ) 5%
-    | ( $grandfathered ) 5%)
+    | ( $legacy ) 5%)
 #    (?: \@ $keywords )? 5%
     ;

diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java
index ddc6478..57c6df8 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java

@@ -527,7 +527,8 @@
     // ...
     // Remove the script code 'Zzzz' and the region code 'ZZ' if they occur.
     //
-    // Note that this implementation does not need to handle "grandfathered" tags.
+    // Note that this implementation does not need to handle
+    // legacy language tags (marked as “Type: grandfathered” in BCP 47).
     private Optional<LocaleId> addLikelySubtags(String localeId) {
         if (localeId.equals("root")) {
             return Optional.empty();
commit	39da689d30512148dc47d2169910d6b7c1c09cd5	[log] [tgz]
author	Markus Scherer <markus.icu@gmail.com>	Tue Aug 18 15:05:22 2020 -0700
committer	Markus Scherer <markus.icu@gmail.com>	Fri Aug 21 14:13:03 2020 -0700
tree	ac1271cc89cb78c6fb006108e21f1825a60cff73
parent	cde54fc5ba1581061cc31c158967ab6b074df3ab [diff]