ICU-20328 Implement LocaleBuilder
Design Doc: https://goo.gl/Qf12p3
diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in
index e10d3a2..e663cb8 100644
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@@ -88,6 +88,7 @@
 ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
 resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
 ucurr.o \
+localebuilder.o \
 messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
 bytestream.o stringpiece.o bytesinkutil.o \
 stringtriebuilder.o bytestriebuilder.o \
diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj
index eb9b456..14d6e6c 100644
--- a/icu4c/source/common/common.vcxproj
+++ b/icu4c/source/common/common.vcxproj
@@ -256,6 +256,7 @@
     <ClCompile Include="uresdata.cpp" />
     <ClCompile Include="resource.cpp" />
     <ClCompile Include="ucurr.cpp" />
+    <ClCompile Include="localebuilder.cpp" />
     <ClCompile Include="caniter.cpp" />
     <ClCompile Include="filterednormalizer2.cpp" />
     <ClCompile Include="loadednormalizer2impl.cpp" />
@@ -445,6 +446,7 @@
     <ClInclude Include="ustr_imp.h" />
     <ClInclude Include="static_unicode_sets.h" />
     <ClInclude Include="capi_helper.h" />
+    <ClInclude Include="unicode\localebuilder.h" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="common.rc" />
diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters
index 85d0d9b..72fef1b 100644
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -361,6 +361,9 @@
     <ClCompile Include="resource.cpp">
       <Filter>locales &amp; resources</Filter>
     </ClCompile>
+    <ClCompile Include="localebuilder.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
     <ClCompile Include="caniter.cpp">
       <Filter>normalization</Filter>
     </ClCompile>
@@ -1225,5 +1228,8 @@
     <CustomBuild Include="unicode\stringoptions.h">
       <Filter>strings</Filter>
     </CustomBuild>
+    <CustomBuild Include="unicode\localebuilder.h">
+      <Filter>locales &amp; resources</Filter>
+    </CustomBuild>
   </ItemGroup>
 </Project>
diff --git a/icu4c/source/common/common_uwp.vcxproj b/icu4c/source/common/common_uwp.vcxproj
index 1265b67..af030c4 100644
--- a/icu4c/source/common/common_uwp.vcxproj
+++ b/icu4c/source/common/common_uwp.vcxproj
@@ -383,6 +383,7 @@
     <ClCompile Include="uresdata.cpp" />
     <ClCompile Include="resource.cpp" />
     <ClCompile Include="ucurr.cpp" />
+    <ClCompile Include="localebuilder.cpp" />
     <ClCompile Include="caniter.cpp" />
     <ClCompile Include="filterednormalizer2.cpp" />
     <ClCompile Include="loadednormalizer2impl.cpp" />
@@ -572,6 +573,7 @@
     <ClInclude Include="ustr_imp.h" />
     <ClInclude Include="static_unicode_sets.h" />
     <ClInclude Include="capi_helper.h" />
+    <ClInclude Include="unicode\localebuilder.h" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="common.rc" />
diff --git a/icu4c/source/common/localebuilder.cpp b/icu4c/source/common/localebuilder.cpp
new file mode 100644
index 0000000..fe931fc
--- /dev/null
+++ b/icu4c/source/common/localebuilder.cpp
@@ -0,0 +1,436 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <utility>
+
+#include "bytesinkutil.h"  // CharStringByteSink
+#include "charstr.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "unicode/localebuilder.h"
+#include "unicode/locid.h"
+
+U_NAMESPACE_BEGIN
+
+#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
+#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
+
+const char* kAttributeKey = "attribute";
+
+static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
+    switch (uprv_tolower(key)) {
+        case 'u':
+            return ultag_isUnicodeExtensionSubtags(s, len);
+        case 't':
+            return ultag_isTransformedExtensionSubtags(s, len);
+        case 'x':
+            return ultag_isPrivateuseValueSubtags(s, len);
+        default:
+            return ultag_isExtensionSubtags(s, len);
+    }
+}
+
+LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
+    script_(), region_(), variant_(nullptr), extensions_(nullptr)
+{
+    language_[0] = 0;
+    script_[0] = 0;
+    region_[0] = 0;
+}
+
+LocaleBuilder::~LocaleBuilder()
+{
+    delete variant_;
+    delete extensions_;
+}
+
+LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
+{
+    clear();
+    setLanguage(locale.getLanguage());
+    setScript(locale.getScript());
+    setRegion(locale.getCountry());
+    setVariant(locale.getVariant());
+    extensions_ = locale.clone();
+    if (extensions_ == nullptr) {
+        status_ = U_MEMORY_ALLOCATION_ERROR;
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
+{
+    Locale l = Locale::forLanguageTag(tag, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    // Because setLocale will reset status_ we need to return
+    // first if we have error in forLanguageTag.
+    setLocale(l);
+    return *this;
+}
+
+static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
+                     UBool (*test)(const char*, int32_t)) {
+    if (U_FAILURE(errorCode)) { return; }
+    if (input.empty()) {
+        dest[0] = '\0';
+    } else if (test(input.data(), input.length())) {
+        uprv_memcpy(dest, input.data(), input.length());
+        dest[input.length()] = '\0';
+    } else {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
+{
+    setField(language, language_, status_, &ultag_isLanguageSubtag);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
+{
+    setField(script, script_, status_, &ultag_isScriptSubtag);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
+{
+    setField(region, region_, status_, &ultag_isRegionSubtag);
+    return *this;
+}
+
+static void transform(char* data, int32_t len) {
+    for (int32_t i = 0; i < len; i++, data++) {
+        if (*data == '_') {
+            *data = '-';
+        } else {
+            *data = uprv_tolower(*data);
+        }
+    }
+}
+
+LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (variant.empty()) {
+        delete variant_;
+        variant_ = nullptr;
+        return *this;
+    }
+    CharString* new_variant = new CharString(variant, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    if (new_variant == nullptr) {
+        status_ = U_MEMORY_ALLOCATION_ERROR;
+        return *this;
+    }
+    transform(new_variant->data(), new_variant->length());
+    if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
+        delete new_variant;
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    delete variant_;
+    variant_ = new_variant;
+    return *this;
+}
+
+static bool
+_isKeywordValue(const char* key, const char* value, int32_t value_len)
+{
+    if (key[1] == '\0') {
+        // one char key
+        return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
+                _isExtensionSubtags(key[0], value, value_len));
+    } else if (uprv_strcmp(key, kAttributeKey) == 0) {
+        // unicode attributes
+        return ultag_isUnicodeLocaleAttributes(value, value_len);
+    }
+    // otherwise: unicode extension value
+    // We need to convert from legacy key/value to unicode
+    // key/value
+    const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
+    const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
+
+    return unicode_locale_key && unicode_locale_type &&
+           ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
+           ultag_isUnicodeLocaleType(unicode_locale_type, -1);
+}
+
+static void
+_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) { return; }
+    LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode));
+    if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+    const char* key;
+    while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+        CharString value;
+        CharStringByteSink sink(&value);
+        from.getKeywordValue(key, sink, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        if (uprv_strcmp(key, kAttributeKey) == 0) {
+            transform(value.data(), value.length());
+        }
+        if (validate &&
+            !_isKeywordValue(key, value.data(), value.length())) {
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        to->setKeywordValue(key, value.data(), errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+    }
+}
+
+void static
+_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode)
+{
+    // Clear Unicode attributes
+    locale->setKeywordValue(kAttributeKey, "", errorCode);
+
+    // Clear all Unicode keyword values
+    LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode));
+    if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+    const char* key;
+    while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+        locale->setUnicodeKeywordValue(key, nullptr, errorCode);
+    }
+}
+
+static void
+_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode)
+{
+    // Add the unicode extensions to extensions_
+    CharString locale_str("und-u-", errorCode);
+    locale_str.append(value, errorCode);
+    _copyExtensions(
+        Locale::forLanguageTag(locale_str.data(), errorCode),
+        locale, false, errorCode);
+}
+
+LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (!UPRV_ISALPHANUM(key)) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!value_str.isEmpty() &&
+            !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = new Locale();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+    }
+    if (uprv_tolower(key) != 'u') {
+        // for t, x and others extension.
+        extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
+                                     status_);
+        return *this;
+    }
+    _clearUAttributesAndKeyType(extensions_, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    if (!value.empty()) {
+        _setUnicodeExtensions(extensions_, value_str, status_);
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
+      StringPiece key, StringPiece type)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
+            (!type.empty() &&
+                 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
+      status_ = U_ILLEGAL_ARGUMENT_ERROR;
+      return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = new Locale();
+    }
+    if (extensions_ == nullptr) {
+        status_ = U_MEMORY_ALLOCATION_ERROR;
+        return *this;
+    }
+    extensions_->setUnicodeKeywordValue(key, type, status_);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
+    StringPiece value)
+{
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = new Locale();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+        extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
+        return *this;
+    }
+
+    CharString attributes;
+    CharStringByteSink sink(&attributes);
+    UErrorCode localErrorCode = U_ZERO_ERROR;
+    extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+    if (U_FAILURE(localErrorCode)) {
+        CharString new_attributes(value_str.data(), status_);
+        // No attributes, set the attribute.
+        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+        return *this;
+    }
+
+    transform(attributes.data(),attributes.length());
+    const char* start = attributes.data();
+    const char* limit = attributes.data() + attributes.length();
+    CharString new_attributes;
+    bool inserted = false;
+    while (start < limit) {
+        if (!inserted) {
+            int cmp = uprv_strcmp(start, value_str.data());
+            if (cmp == 0) { return *this; }  // Found it in attributes: Just return
+            if (cmp > 0) {
+                if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
+                new_attributes.append(value_str.data(), status_);
+                inserted = true;
+            }
+        }
+        if (!new_attributes.isEmpty()) {
+            new_attributes.append('_', status_);
+        }
+        new_attributes.append(start, status_);
+        start += uprv_strlen(start) + 1;
+    }
+    if (!inserted) {
+        if (!new_attributes.isEmpty()) {
+            new_attributes.append('_', status_);
+        }
+        new_attributes.append(value_str.data(), status_);
+    }
+    // Not yet in the attributes, set the attribute.
+    extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
+    StringPiece value)
+{
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) { return *this; }
+    UErrorCode localErrorCode = U_ZERO_ERROR;
+    CharString attributes;
+    CharStringByteSink sink(&attributes);
+    extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+    // get failure, just return
+    if (U_FAILURE(localErrorCode)) { return *this; }
+    // Do not have any attributes, just return.
+    if (attributes.isEmpty()) { return *this; }
+
+    char* p = attributes.data();
+    // Replace null terminiator in place for _ and - so later
+    // we can use uprv_strcmp to compare.
+    for (int32_t i = 0; i < attributes.length(); i++, p++) {
+        *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
+    }
+
+    const char* start = attributes.data();
+    const char* limit = attributes.data() + attributes.length();
+    CharString new_attributes;
+    bool found = false;
+    while (start < limit) {
+        if (uprv_strcmp(start, value_str.data()) == 0) {
+            found = true;
+        } else {
+            if (!new_attributes.isEmpty()) {
+                new_attributes.append('_', status_);
+            }
+            new_attributes.append(start, status_);
+        }
+        start += uprv_strlen(start) + 1;
+    }
+    // Found the value in attributes, set the attribute.
+    if (found) {
+        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clear()
+{
+    status_ = U_ZERO_ERROR;
+    language_[0] = 0;
+    script_[0] = 0;
+    region_[0] = 0;
+    delete variant_;
+    variant_ = nullptr;
+    clearExtensions();
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clearExtensions()
+{
+    delete extensions_;
+    extensions_ = nullptr;
+    return *this;
+}
+
+Locale makeBogusLocale() {
+  Locale bogus;
+  bogus.setToBogus();
+  return bogus;
+}
+
+Locale LocaleBuilder::build(UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    if (U_FAILURE(status_)) {
+        errorCode = status_;
+        return makeBogusLocale();
+    }
+    CharString locale_str(language_, errorCode);
+    if (uprv_strlen(script_) > 0) {
+        locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
+    }
+    if (uprv_strlen(region_) > 0) {
+        locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
+    }
+    if (variant_ != nullptr) {
+        locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
+    }
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    Locale product(locale_str.data());
+    if (extensions_ != nullptr) {
+        _copyExtensions(*extensions_, &product, true, errorCode);
+    }
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    return product;
+}
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
index 9b5de7f..063efd4 100644
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp
@@ -406,13 +406,22 @@
 }
 
 static UBool
-_isLanguageSubtag(const char* s, int32_t len) {
+_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {
+    if (len < 0) {
+        len = (int32_t)uprv_strlen(s);
+    }
+    if (len >= min && len <= max && _isAlphaNumericString(s, len)) {
+        return TRUE;
+    }
+    return FALSE;
+}
+
+U_CFUNC UBool
+ultag_isLanguageSubtag(const char* s, int32_t len) {
     /*
-     * language      = 2*3ALPHA            ; shortest ISO 639 code
-     *                 ["-" extlang]       ; sometimes followed by
-     *                                     ;   extended language subtags
-     *               / 4ALPHA              ; or reserved for future use
-     *               / 5*8ALPHA            ; or registered language subtag
+     * unicode_language_subtag = alpha{2,3} | alpha{5,8};
+     * NOTE: Per ICUTC 2019/01/23- accepting alpha 4
+     * See ICU-20372
      */
     if (len < 0) {
         len = (int32_t)uprv_strlen(s);
@@ -438,8 +447,8 @@
     return FALSE;
 }
 
-static UBool
-_isScriptSubtag(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isScriptSubtag(const char* s, int32_t len) {
     /*
      * script        = 4ALPHA              ; ISO 15924 code
      */
@@ -452,8 +461,8 @@
     return FALSE;
 }
 
-static UBool
-_isRegionSubtag(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isRegionSubtag(const char* s, int32_t len) {
     /*
      * region        = 2ALPHA              ; ISO 3166-1 code
      *               / 3DIGIT              ; UN M.49 code
@@ -479,7 +488,7 @@
     if (len < 0) {
         len = (int32_t)uprv_strlen(s);
     }
-    if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
+    if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) {
         return TRUE;
     }
     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
@@ -489,18 +498,47 @@
 }
 
 static UBool
+_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) {
+    const char *p = s;
+    const char *pSubtag = NULL;
+
+    if (len < 0) {
+        len = (int32_t)uprv_strlen(s);
+    }
+
+    while ((p - s) < len) {
+        if (*p == SEP) {
+            if (pSubtag == NULL) {
+                return FALSE;
+            }
+            if (!test(pSubtag, (int32_t)(p - pSubtag))) {
+                return FALSE;
+            }
+            pSubtag = NULL;
+        } else if (pSubtag == NULL) {
+            pSubtag = p;
+        }
+        p++;
+    }
+    if (pSubtag == NULL) {
+        return FALSE;
+    }
+    return test(pSubtag, (int32_t)(p - pSubtag));
+}
+
+U_CFUNC UBool
+ultag_isVariantSubtags(const char* s, int32_t len) {
+    return _isSepListOf(&_isVariantSubtag, s, len);
+}
+
+// This is for the ICU-specific "lvariant" handling.
+static UBool
 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
     /*
      * variant       = 1*8alphanum         ; registered variants
      *               / (DIGIT 3alphanum)
      */
-    if (len < 0) {
-        len = (int32_t)uprv_strlen(s);
-    }
-    if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
-        return TRUE;
-    }
-    return FALSE;
+    return _isAlphaNumericStringLimitedLength(s, len , 1, 8);
 }
 
 static UBool
@@ -528,42 +566,12 @@
     /*
      * extension     = singleton 1*("-" (2*8alphanum))
      */
-    if (len < 0) {
-        len = (int32_t)uprv_strlen(s);
-    }
-    if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
-        return TRUE;
-    }
-    return FALSE;
+    return _isAlphaNumericStringLimitedLength(s, len, 2, 8);
 }
 
-static UBool
-_isExtensionSubtags(const char* s, int32_t len) {
-    const char *p = s;
-    const char *pSubtag = NULL;
-
-    if (len < 0) {
-        len = (int32_t)uprv_strlen(s);
-    }
-
-    while ((p - s) < len) {
-        if (*p == SEP) {
-            if (pSubtag == NULL) {
-                return FALSE;
-            }
-            if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
-                return FALSE;
-            }
-            pSubtag = NULL;
-        } else if (pSubtag == NULL) {
-            pSubtag = p;
-        }
-        p++;
-    }
-    if (pSubtag == NULL) {
-        return FALSE;
-    }
-    return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
+U_CFUNC UBool
+ultag_isExtensionSubtags(const char* s, int32_t len) {
+    return _isSepListOf(&_isExtensionSubtag, s, len);
 }
 
 static UBool
@@ -571,46 +579,32 @@
     /*
      * privateuse    = "x" 1*("-" (1*8alphanum))
      */
-    if (len < 0) {
-        len = (int32_t)uprv_strlen(s);
-    }
-    if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
-        return TRUE;
-    }
-    return FALSE;
+    return _isAlphaNumericStringLimitedLength(s, len, 1, 8);
 }
 
-static UBool
-_isPrivateuseValueSubtags(const char* s, int32_t len) {
-    const char *p = s;
-    const char *pSubtag = NULL;
+U_CFUNC UBool
+ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {
+    return _isSepListOf(&_isPrivateuseValueSubtag, s, len);
+}
 
-    if (len < 0) {
-        len = (int32_t)uprv_strlen(s);
-    }
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {
+    /*
+     * attribute = alphanum{3,8} ;
+     */
+    return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
 
-    while ((p - s) < len) {
-        if (*p == SEP) {
-            if (pSubtag == NULL) {
-                return FALSE;
-            }
-            if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
-                return FALSE;
-            }
-            pSubtag = NULL;
-        } else if (pSubtag == NULL) {
-            pSubtag = p;
-        }
-        p++;
-    }
-    if (pSubtag == NULL) {
-        return FALSE;
-    }
-    return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {
+    return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len);
 }
 
 U_CFUNC UBool
 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
+    /*
+     * key = alphanum alpha ;
+     */
     if (len < 0) {
         len = (int32_t)uprv_strlen(s);
     }
@@ -621,8 +615,159 @@
 }
 
 U_CFUNC UBool
+_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {
+    /*
+     * alphanum{3,8}
+     */
+    return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
+
+U_CFUNC UBool
 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
+    /*
+     * type = alphanum{3,8} (sep alphanum{3,8})* ;
+     */
+    return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len);
+}
+
+static UBool
+_isTKey(const char* s, int32_t len)
+{
+    /*
+     * tkey = alpha digit ;
+     */
+    if (len < 0) {
+        len = (int32_t)uprv_strlen(s);
+    }
+    if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) {
+        return TRUE;
+    }
+    return FALSE;
+}
+
+static UBool
+_isTValue(const char* s, int32_t len)
+{
+    /*
+     * tvalue = (sep alphanum{3,8})+ ;
+     */
+    return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
+
+static UBool
+_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
+{
+    const int32_t kStart = 0;       // Start, wait for unicode_language_subtag, tkey or end
+    const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag,
+                                    // unicode_region_subtag, unicode_variant_subtag, tkey or end
+    const int32_t kGotScript = 2;   // Got unicode_script_subtag, wait for unicode_region_subtag,
+                                    // unicode_variant_subtag, tkey, or end
+    const int32_t kGotRegion = 3;   // Got unicode_region_subtag, wait for unicode_variant_subtag,
+                                    // tkey, or end.
+    const int32_t kGotVariant = 4;  // Got unicode_variant_subtag, wait for unicode_variant_subtag
+                                    // tkey or end.
+    const int32_t kGotTKey = -1;    // Got tkey, wait for tvalue. ERROR if stop here.
+    const int32_t kGotTValue = 6;   // Got tvalue, wait for tkey, tvalue or end
+
+    switch (state) {
+        case kStart:
+            if (ultag_isLanguageSubtag(s, len)) {
+                state = kGotLanguage;
+                return TRUE;
+            }
+            if (_isTKey(s, len)) {
+                state = kGotTKey;
+                return TRUE;
+            }
+            return FALSE;
+        case kGotLanguage:
+            if (ultag_isScriptSubtag(s, len)) {
+                state = kGotScript;
+                return TRUE;
+            }
+            U_FALLTHROUGH;
+        case kGotScript:
+            if (ultag_isRegionSubtag(s, len)) {
+                state = kGotRegion;
+                return TRUE;
+            }
+            U_FALLTHROUGH;
+        case kGotRegion:
+            U_FALLTHROUGH;
+        case kGotVariant:
+            if (_isVariantSubtag(s, len)) {
+                state = kGotVariant;
+                return TRUE;
+            }
+            if (_isTKey(s, len)) {
+                state = kGotTKey;
+                return TRUE;
+            }
+            return FALSE;
+        case kGotTKey:
+            if (_isTValue(s, len)) {
+                state = kGotTValue;
+                return TRUE;
+            }
+            return FALSE;
+        case kGotTValue:
+            if (_isTKey(s, len)) {
+                state = kGotTKey;
+                return TRUE;
+            }
+            if (_isTValue(s, len)) {
+                return TRUE;
+            }
+            return FALSE;
+    }
+    return FALSE;
+}
+
+static UBool
+_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
+{
+    const int32_t kStart = 0;         // Start, wait for a key or attribute or end
+    const int32_t kGotKey = 1;        // Got a key, wait for type or key or end
+    const int32_t kGotType = 2;       // Got a type, wait for key or end
+
+    switch (state) {
+        case kStart:
+            if (ultag_isUnicodeLocaleKey(s, len)) {
+                state = kGotKey;
+                return TRUE;
+            }
+            if (ultag_isUnicodeLocaleAttribute(s, len)) {
+                return TRUE;
+            }
+            return FALSE;
+        case kGotKey:
+            if (ultag_isUnicodeLocaleKey(s, len)) {
+                return TRUE;
+            }
+            if (_isUnicodeLocaleTypeSubtag(s, len)) {
+                state = kGotType;
+                return TRUE;
+            }
+            return FALSE;
+        case kGotType:
+            if (ultag_isUnicodeLocaleKey(s, len)) {
+                state = kGotKey;
+                return TRUE;
+            }
+            if (_isUnicodeLocaleTypeSubtag(s, len)) {
+                return TRUE;
+            }
+            return FALSE;
+    }
+    return FALSE;
+}
+
+static UBool
+_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
+{
+    int32_t state = 0;
     const char* p;
+    const char* start = s;
     int32_t subtagLen = 0;
 
     if (len < 0) {
@@ -631,22 +776,34 @@
 
     for (p = s; len > 0; p++, len--) {
         if (*p == SEP) {
-            if (subtagLen < 3) {
+            if (!test(state, start, subtagLen)) {
                 return FALSE;
             }
             subtagLen = 0;
-        } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
-            subtagLen++;
-            if (subtagLen > 8) {
-                return FALSE;
-            }
+            start = p + 1;
         } else {
-            return FALSE;
+            subtagLen++;
         }
     }
 
-    return (subtagLen >= 3);
+    if (test(state, start, subtagLen) && state >= 0) {
+        return TRUE;
+    }
+    return FALSE;
 }
+
+U_CFUNC UBool
+ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
+{
+    return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len);
+}
+
+U_CFUNC UBool
+ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {
+    return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len);
+}
+
+
 /*
 * -------------------------------------------------
 *
@@ -856,7 +1013,7 @@
 
     if (len == 0) {
         sink.Append(LANG_UND, LANG_UND_LEN);
-    } else if (!_isLanguageSubtag(buf, len)) {
+    } else if (!ultag_isLanguageSubtag(buf, len)) {
             /* invalid language code */
         if (strict) {
             *status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -900,7 +1057,7 @@
     }
 
     if (len > 0) {
-        if (!_isScriptSubtag(buf, len)) {
+        if (!ultag_isScriptSubtag(buf, len)) {
             /* invalid script code */
             if (strict) {
                 *status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -932,7 +1089,7 @@
     }
 
     if (len > 0) {
-        if (!_isRegionSubtag(buf, len)) {
+        if (!ultag_isRegionSubtag(buf, len)) {
             /* invalid region code */
             if (strict) {
                 *status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -1252,7 +1409,7 @@
                 }
             } else {
                 if (*key == PRIVATEUSE) {
-                    if (!_isPrivateuseValueSubtags(buf.data(), len)) {
+                    if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) {
                         if (strict) {
                             *status = U_ILLEGAL_ARGUMENT_ERROR;
                             break;
@@ -1260,7 +1417,7 @@
                         continue;
                     }
                 } else {
-                    if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
+                    if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) {
                         if (strict) {
                             *status = U_ILLEGAL_ARGUMENT_ERROR;
                             break;
@@ -1997,7 +2154,7 @@
         subtagLen = (int32_t)(pSep - pSubtag);
 
         if (next & LANG) {
-            if (_isLanguageSubtag(pSubtag, subtagLen)) {
+            if (ultag_isLanguageSubtag(pSubtag, subtagLen)) {
                 *pSep = 0;  /* terminate */
                 // TODO: move deprecated language code handling here.
                 t->language = T_CString_toLowerCase(pSubtag);
@@ -2024,7 +2181,7 @@
             }
         }
         if (next & SCRT) {
-            if (_isScriptSubtag(pSubtag, subtagLen)) {
+            if (ultag_isScriptSubtag(pSubtag, subtagLen)) {
                 char *p = pSubtag;
 
                 *pSep = 0;
@@ -2044,7 +2201,7 @@
             }
         }
         if (next & REGN) {
-            if (_isRegionSubtag(pSubtag, subtagLen)) {
+            if (ultag_isRegionSubtag(pSubtag, subtagLen)) {
                 *pSep = 0;
                 // TODO: move deprecated region code handling here.
                 t->region = T_CString_toUpperCase(pSubtag);
@@ -2535,7 +2692,7 @@
                     buf[1] = SEP;
                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
                     if (U_SUCCESS(tmpStatus)) {
-                        if (_isPrivateuseValueSubtags(&buf[2], len)) {
+                        if (ultag_isPrivateuseValueSubtags(&buf[2], len)) {
                             /* return private use only tag */
                             sink.Append(buf, len + 2);
                             done = TRUE;
diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h
index f268f89..fd16af5 100644
--- a/icu4c/source/common/ulocimp.h
+++ b/icu4c/source/common/ulocimp.h
@@ -148,6 +148,32 @@
 U_CAPI const char * U_EXPORT2
 locale_getKeywordsStart(const char *localeID);
 
+U_CFUNC UBool
+ultag_isExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isLanguageSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isRegionSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isScriptSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
 
 U_CFUNC UBool
 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
@@ -155,6 +181,9 @@
 U_CFUNC UBool
 ultag_isUnicodeLocaleType(const char* s, int32_t len);
 
+U_CFUNC UBool
+ultag_isVariantSubtags(const char* s, int32_t len);
+
 U_CFUNC const char*
 ulocimp_toBcpKey(const char* key);
 
diff --git a/icu4c/source/common/unicode/localebuilder.h b/icu4c/source/common/unicode/localebuilder.h
new file mode 100644
index 0000000..8cd2039
--- /dev/null
+++ b/icu4c/source/common/unicode/localebuilder.h
@@ -0,0 +1,288 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+#ifndef __LOCALEBUILDER_H__
+#define __LOCALEBUILDER_H__
+
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "unicode/utypes.h"
+
+
+/**
+ * \file
+ * \brief C++ API: Builder API for Locale
+ */
+
+U_NAMESPACE_BEGIN
+class CharString;
+
+#ifndef U_HIDE_DRAFT_API
+/**
+ * <code>LocaleBuilder</code> is used to build instances of <code>Locale</code>
+ * from values configured by the setters.  Unlike the <code>Locale</code>
+ * constructors, the <code>LocaleBuilder</code> checks if a value configured by a
+ * setter satisfies the syntax requirements defined by the <code>Locale</code>
+ * class.  A <code>Locale</code> object created by a <code>LocaleBuilder</code> is
+ * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
+ * without losing information.
+ *
+ * <p>The following example shows how to create a <code>Locale</code> object
+ * with the <code>LocaleBuilder</code>.
+ * <blockquote>
+ * <pre>
+ *     UErrorCode status = U_ZERO_ERROR;
+ *     Locale aLocale = LocaleBuilder()
+ *                          .setLanguage("sr")
+ *                          .setScript("Latn")
+ *                          .setRegion("RS")
+ *                          .build(status);
+ *     if (U_SUCCESS(status)) {
+ *       // ...
+ *     }
+ * </pre>
+ * </blockquote>
+ *
+ * <p>LocaleBuilders can be reused; <code>clear()</code> resets all
+ * fields to their default values.
+ *
+ * <p>LocaleBuilder tracks errors in an internal UErrorCode. For all setters,
+ * except setLanguageTag and setLocale, LocaleBuilder will return immediately
+ * if the internal UErrorCode is in error state.
+ * To reset internal state and error code, call clear method.
+ * The setLanguageTag and setLocale method will first clear the internal
+ * UErrorCode, then track the error of the validation of the input parameter
+ * into the internal UErrorCode.
+ *
+ * @draft ICU 64
+ */
+class U_COMMON_API LocaleBuilder : public UObject {
+public:
+    /**
+     * Constructs an empty LocaleBuilder. The default value of all
+     * fields, extensions, and private use information is the
+     * empty string.
+     *
+     * @draft ICU 64
+     */
+    LocaleBuilder();
+
+    virtual ~LocaleBuilder();
+
+    /**
+     * Resets the <code>LocaleBuilder</code> to match the provided
+     * <code>locale</code>.  Existing state is discarded.
+     *
+     * <p>All fields of the locale must be well-formed.
+     * <p>This method clears the internal UErrorCode.
+     *
+     * @param locale the locale
+     * @return This builder.
+     *
+     * @draft ICU 64
+     */
+    LocaleBuilder& setLocale(const Locale& locale);
+
+    /**
+     * Resets the LocaleBuilder to match the provided
+     * [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) .
+     * Discards the existing state. the empty string cause the builder to be
+     * reset, like {@link #clear}.  Grandfathered tags are converted to their
+     * canonical form before being processed.  Otherwise, the <code>language
+     * tag</code> must be well-formed, or else the build() method will later
+     * report an U_ILLEGAL_ARGUMENT_ERROR.
+     *
+     * <p>This method clears the internal UErrorCode.
+     *
+     * @param tag the language tag, defined as
+     *   [unicode_locale_id](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id).
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& setLanguageTag(StringPiece tag);
+
+    /**
+     * Sets the language.  If <code>language</code> is the empty string, the
+     * language in this <code>LocaleBuilder</code> is removed. Otherwise, the
+     * <code>language</code> must be well-formed, or else the build() method will
+     * later report an U_ILLEGAL_ARGUMENT_ERROR.
+     *
+     * <p>The syntax of language value is defined as
+     * [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
+     *
+     * @param language the language
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& setLanguage(StringPiece language);
+
+    /**
+     * Sets the script. If <code>script</code> is the empty string, the script in
+     * this <code>LocaleBuilder</code> is removed.
+     * Otherwise, the <code>script</code> must be well-formed, or else the build()
+     * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+     *
+     * <p>The script value is a four-letter script code as
+     * [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
+     * defined by ISO 15924
+     *
+     * @param script the script
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& setScript(StringPiece script);
+
+    /**
+     * Sets the region.  If region is the empty string, the region in this
+     * <code>LocaleBuilder</code> is removed. Otherwise, the <code>region</code>
+     * must be well-formed, or else the build() method will later report an
+     * U_ILLEGAL_ARGUMENT_ERROR.
+     *
+     * <p>The region value is defined by
+     *  [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag)
+     * as a two-letter ISO 3166 code or a three-digit UN M.49 area code.
+     *
+     * <p>The region value in the <code>Locale</code> created by the
+     * <code>LocaleBuilder</code> is always normalized to upper case.
+     *
+     * @param region the region
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& setRegion(StringPiece region);
+
+    /**
+     * Sets the variant.  If variant is the empty string, the variant in this
+     * <code>LocaleBuilder</code> is removed.  Otherwise, the <code>variant</code>
+     * must be well-formed, or else the build() method will later report an
+     * U_ILLEGAL_ARGUMENT_ERROR.
+     *
+     * <p><b>Note:</b> This method checks if <code>variant</code>
+     * satisfies the
+     * [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
+     * syntax requirements, and normalizes the value to lowercase letters. However,
+     * the <code>Locale</code> class does not impose any syntactic
+     * restriction on variant. To set an ill-formed variant, use a Locale constructor.
+     * If there are multiple unicode_variant_subtag, the caller must concatenate
+     * them with '-' as separator (ex: "foobar-fibar").
+     *
+     * @param variant the variant
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& setVariant(StringPiece variant);
+
+    /**
+     * Sets the extension for the given key. If the value is the empty string,
+     * the extension is removed.  Otherwise, the <code>key</code> and
+     * <code>value</code> must be well-formed, or else the build() method will
+     * later report an U_ILLEGAL_ARGUMENT_ERROR.
+     *
+     * <p><b>Note:</b> The key ('u') is used for the Unicode locale extension.
+     * Setting a value for this key replaces any existing Unicode locale key/type
+     * pairs with those defined in the extension.
+     *
+     * <p><b>Note:</b> The key ('x') is used for the private use code. To be
+     * well-formed, the value for this key needs only to have subtags of one to
+     * eight alphanumeric characters, not two to eight as in the general case.
+     *
+     * @param key the extension key
+     * @param value the extension value
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& setExtension(char key, StringPiece value);
+
+    /**
+     * Sets the Unicode locale keyword type for the given key. If the type
+     * StringPiece is constructed with a nullptr, the keyword is removed.
+     * If the type is the empty string, the keyword is set without type subtags.
+     * Otherwise, the key and type must be well-formed, or else the build()
+     * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+     *
+     * <p>Keys and types are converted to lower case.
+     *
+     * <p><b>Note</b>:Setting the 'u' extension via {@link #setExtension}
+     * replaces all Unicode locale keywords with those defined in the
+     * extension.
+     *
+     * @param key the Unicode locale key
+     * @param type the Unicode locale type
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& setUnicodeLocaleKeyword(
+        StringPiece key, StringPiece type);
+
+    /**
+     * Adds a unicode locale attribute, if not already present, otherwise
+     * has no effect.  The attribute must not be empty string and must be
+     * well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status
+     * during the build() call.
+     *
+     * @param attribute the attribute
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& addUnicodeLocaleAttribute(StringPiece attribute);
+
+    /**
+     * Removes a unicode locale attribute, if present, otherwise has no
+     * effect.  The attribute must not be empty string and must be well-formed
+     * or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the build() call.
+     *
+     * <p>Attribute comparison for removal is case-insensitive.
+     *
+     * @param attribute the attribute
+     * @return This builder.
+     * @draft ICU 64
+     */
+    LocaleBuilder& removeUnicodeLocaleAttribute(StringPiece attribute);
+
+    /**
+     * Resets the builder to its initial, empty state.
+     * <p>This method clears the internal UErrorCode.
+     *
+     * @return this builder
+     * @draft ICU 64
+     */
+    LocaleBuilder& clear();
+
+    /**
+     * Resets the extensions to their initial, empty state.
+     * Language, script, region and variant are unchanged.
+     *
+     * @return this builder
+     * @draft ICU 64
+     */
+    LocaleBuilder& clearExtensions();
+
+    /**
+     * Returns an instance of <code>Locale</code> created from the fields set
+     * on this builder.
+     * If any set methods or during the build() call require memory allocation
+     * but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
+     * If any of the fields set by the setters are not well-formed, the status
+     * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
+     * not change after the build() call and the caller is free to keep using
+     * the same builder to build more locales.
+     *
+     * @return a new Locale
+     * @draft ICU 64
+     */
+    Locale build(UErrorCode& status);
+
+private:
+    UErrorCode status_;
+    char language_[9];
+    char script_[5];
+    char region_[4];
+    CharString *variant_;  // Pointer not object so we need not #include internal charstr.h.
+    icu::Locale *extensions_;  // Pointer not object. Storage for all other fields.
+
+};
+#endif  // U_HIDE_DRAFT_API
+
+U_NAMESPACE_END
+
+#endif  // __LOCALEBUILDER_H__
diff --git a/icu4c/source/common/unicode/urename.h b/icu4c/source/common/unicode/urename.h
index 0512be3..cea3be4 100644
--- a/icu4c/source/common/unicode/urename.h
+++ b/icu4c/source/common/unicode/urename.h
@@ -1109,6 +1109,16 @@
 #define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType)
 #define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
 #define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
+#define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags)
+#define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag)
+#define ultag_isPrivateuseValueSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isPrivateuseValueSubtags)
+#define ultag_isRegionSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isRegionSubtag)
+#define ultag_isScriptSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isScriptSubtag)
+#define ultag_isTransformedExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isTransformedExtensionSubtags)
+#define ultag_isUnicodeExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeExtensionSubtags)
+#define ultag_isUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttribute)
+#define ultag_isUnicodeLocaleAttributes U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttributes)
+#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags)
 #define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
 #define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
 #define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt
index d2682ab..1e51980 100644
--- a/icu4c/source/test/depstest/dependencies.txt
+++ b/icu4c/source/test/depstest/dependencies.txt
@@ -188,6 +188,7 @@
     uinit utypes errorcode
     icuplug
     platform
+    localebuilder
 
 group: pluralmap
     # TODO: Move to i18n library, ticket #11926.
@@ -643,6 +644,11 @@
     uscript_props propname
     bytesinkutil
 
+group: localebuilder
+    localebuilder.o
+  deps
+    resourcebundle
+
 group: udata
     udata.o ucmndata.o udatamem.o
     umapfile.o
diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in
index c049a5c..b4cf918 100644
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@@ -44,7 +44,7 @@
 fldset.o dadrfmt.o dadrcal.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o	\
 dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o	\
 itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o	\
-loctest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o		\
+loctest.o localebuildertest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o		\
 numfmtst.o numrgts.o  plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \
 sdtfmtts.o svccoll.o tchcfmt.o	selfmts.o \
 tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o	\
diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj
index 298a7f5..5e82ef3 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj
+++ b/icu4c/source/test/intltest/intltest.vcxproj
@@ -364,6 +364,7 @@
     <ClCompile Include="bidiconf.cpp" />
     <ClCompile Include="listformattertest.cpp" />
     <ClCompile Include="formattedvaluetest.cpp" />
+    <ClCompile Include="localebuildertest.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="colldata.h" />
@@ -494,8 +495,9 @@
     <ClInclude Include="convtest.h" />
     <ClInclude Include="csdetest.h" />
     <ClInclude Include="listformattertest.h" />
+    <ClInclude Include="localebuildertest.h" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters
index d707727..bed26bc 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj.filters
+++ b/icu4c/source/test/intltest/intltest.vcxproj.filters
@@ -540,6 +540,8 @@
     </ClCompile>
     <ClCompile Include="formattedvaluetest.cpp">
       <Filter>formatting</Filter>
+    <ClCompile Include="localebuildertest.cpp">
+      <Filter>locales &amp; resources</Filter>
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
@@ -927,5 +929,8 @@
     <ClInclude Include="erarulestest.h">
       <Filter>formatting</Filter>
     </ClInclude>
+    <ClInclude Include="localebuildertest.h">
+      <Filter>locales &amp; resources</Filter>
+    </ClInclude>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/icu4c/source/test/intltest/itutil.cpp b/icu4c/source/test/intltest/itutil.cpp
index 91d81d0..3cda39d 100644
--- a/icu4c/source/test/intltest/itutil.cpp
+++ b/icu4c/source/test/intltest/itutil.cpp
@@ -19,6 +19,7 @@
 #include "itutil.h"
 #include "strtest.h"
 #include "loctest.h"
+#include "localebuildertest.h"
 #include "citrtest.h"
 #include "ustrtest.h"
 #include "ucdtest.h"
@@ -149,6 +150,7 @@
             }
 #endif
             break;
+        CASE(25, LocaleBuilderTest);
         default: name = ""; break; //needed to end loop
     }
 }
diff --git a/icu4c/source/test/intltest/localebuildertest.cpp b/icu4c/source/test/intltest/localebuildertest.cpp
new file mode 100644
index 0000000..f99057f
--- /dev/null
+++ b/icu4c/source/test/intltest/localebuildertest.cpp
@@ -0,0 +1,1627 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <memory>
+
+#include "cmemory.h"
+#include "cstring.h"
+#include "localebuildertest.h"
+#include "unicode/localebuilder.h"
+#include "unicode/strenum.h"
+
+LocaleBuilderTest::LocaleBuilderTest()
+{
+}
+
+LocaleBuilderTest::~LocaleBuilderTest()
+{
+}
+
+void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
+{
+    TESTCASE_AUTO_BEGIN;
+    TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
+    TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
+    TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
+    TESTCASE_AUTO(TestLocaleBuilder);
+    TESTCASE_AUTO(TestLocaleBuilderBasic);
+    TESTCASE_AUTO(TestPosixCases);
+    TESTCASE_AUTO(TestSetExtensionOthers);
+    TESTCASE_AUTO(TestSetExtensionPU);
+    TESTCASE_AUTO(TestSetExtensionT);
+    TESTCASE_AUTO(TestSetExtensionU);
+    TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
+    TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
+    TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
+    TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
+    TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
+    TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
+    TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
+    TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
+    TESTCASE_AUTO(TestSetLanguageIllFormed);
+    TESTCASE_AUTO(TestSetLanguageWellFormed);
+    TESTCASE_AUTO(TestSetLocale);
+    TESTCASE_AUTO(TestSetRegionIllFormed);
+    TESTCASE_AUTO(TestSetRegionWellFormed);
+    TESTCASE_AUTO(TestSetScriptIllFormed);
+    TESTCASE_AUTO(TestSetScriptWellFormed);
+    TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
+    TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
+    TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
+    TESTCASE_AUTO(TestSetVariantIllFormed);
+    TESTCASE_AUTO(TestSetVariantWellFormed);
+    TESTCASE_AUTO_END;
+}
+
+void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
+    UErrorCode status = U_ZERO_ERROR;
+    Locale loc = bld.build(status);
+    if (U_FAILURE(status)) {
+        errln(msg, u_errorName(status));
+    }
+    std::string tag = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status)) {
+        errln("loc.toLanguageTag() got Error: %s\n",
+              u_errorName(status));
+    }
+    if (tag != expected) {
+        errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
+    }
+}
+
+void LocaleBuilderTest::TestLocaleBuilder() {
+    // The following test data are copy from
+    // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
+    // "L": +1 = language
+    // "S": +1 = script
+    // "R": +1 = region
+    // "V": +1 = variant
+    // "K": +1 = Unicode locale key / +2 = Unicode locale type
+    // "A": +1 = Unicode locale attribute
+    // "E": +1 = extension letter / +2 = extension value
+    // "P": +1 = private use
+    // "U": +1 = ULocale
+    // "B": +1 = BCP47 language tag
+    // "C": Clear all
+    // "N": Clear extensions
+    // "D": +1 = Unicode locale attribute to be removed
+    // "X": indicates an exception must be thrown
+    // "T": +1 = expected language tag / +2 = expected locale string
+    const char* TESTCASES[][14] = {
+        {"L", "en", "R", "us", "T", "en-US", "en_US"},
+        {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
+        {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
+        {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
+        {"L", "123", "X"},
+        {"R", "us", "T", "und-US", "_US"},
+        {"R", "usa", "X"},
+        {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
+        {"R", "123", "L", "it", "R", "", "T", "it", "it"},
+        {"R", "123", "L", "en", "T", "en-123", "en_123"},
+        {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
+        {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
+        {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
+        {"S", "latin", "X"},
+        {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
+        {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
+        {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
+        {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
+        {"V", "123", "X"},
+        {"U", "en_US", "T", "en-US", "en_US"},
+        {"U", "en_US_WIN", "X"},
+        {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
+          "fr-FR-1606nict-u-ca-gregory-x-test",
+          "fr_FR_1606NICT@calendar=gregorian;x=test"},
+        {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
+        {"B", "und-CA", "T", "und-CA", "_CA"},
+        // Blocked by ICU-20327
+        // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
+        // "en_US_VAR@x=test"},
+        {"B", "en-US-VAR", "X"},
+        {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
+          "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
+        {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
+          "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
+        {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
+          "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
+          "ja_JP@attribute=attr1;calendar=gregorian"},
+        {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn-true",
+          "en@colnumeric=yes"},
+        {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
+          "th_TH@numbers=thai"},
+        {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
+        {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
+        {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
+        {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
+        {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
+        {"E", "a", "x", "X"},
+        {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
+        // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
+        // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
+        // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
+        // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
+        // key = alphanum alpha
+        {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a-yes",
+         "en@0a=yes;attribute=aaa-bbb"},
+        {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
+          "fr_FR@x=yoshito-icu"},
+        {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
+          "ja_JP@calendar=japanese"},
+        {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
+          "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
+        {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
+        {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
+          "th@calendar=gregorian;numbers=thai"},
+        {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
+          "en_US@timezone=America/New_York"},
+        {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
+          "true", "T", "de-u-co-phonebk-kk-true-ks-level1",
+          "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
+        {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
+          "en_US@calendar=gregorian"},
+        {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
+        {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
+        {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn-true",
+          "en_US@colnumeric=yes"},
+        {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
+        {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
+        {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
+          "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
+        {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
+          "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
+        {"L", "en", "A", "aa", "X"},
+        {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
+    };
+    UErrorCode status = U_ZERO_ERROR;
+    LocaleBuilder bld;
+    for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
+        const char* (&testCase)[14] = TESTCASES[tidx];
+        std::string actions;
+        for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
+             if (testCase[p] == nullptr) {
+                 actions += " (nullptr)";
+                 break;
+             }
+             if (p > 0) actions += " ";
+             actions += testCase[p];
+        }
+        int i = 0;
+        const char* method;
+        status = U_ZERO_ERROR;
+        bld.clear();
+        while (true) {
+            method = testCase[i++];
+            if (strcmp("L", method) == 0) {
+                bld.setLanguage(testCase[i++]).build(status);
+            } else if (strcmp("S", method) == 0) {
+                bld.setScript(testCase[i++]).build(status);
+            } else if (strcmp("R", method) == 0) {
+                bld.setRegion(testCase[i++]).build(status);
+            } else if (strcmp("V", method) == 0) {
+                bld.setVariant(testCase[i++]).build(status);
+            } else if (strcmp("K", method) == 0) {
+                const char* key = testCase[i++];
+                const char* type = testCase[i++];
+                bld.setUnicodeLocaleKeyword(key, type).build(status);
+            } else if (strcmp("A", method) == 0) {
+                bld.addUnicodeLocaleAttribute(testCase[i++]).build(status);
+            } else if (strcmp("E", method) == 0) {
+                const char* key = testCase[i++];
+                const char* value = testCase[i++];
+                bld.setExtension(key[0], value).build(status);
+            } else if (strcmp("P", method) == 0) {
+                bld.setExtension('x', testCase[i++]).build(status);
+            } else if (strcmp("U", method) == 0) {
+                bld.setLocale(Locale(testCase[i++])).build(status);
+            } else if (strcmp("B", method) == 0) {
+                bld.setLanguageTag(testCase[i++]).build(status);
+            }
+            // clear / remove
+            else if (strcmp("C", method) == 0) {
+                bld.clear().build(status);
+            } else if (strcmp("N", method) == 0) {
+                bld.clearExtensions().build(status);
+            } else if (strcmp("D", method) == 0) {
+                bld.removeUnicodeLocaleAttribute(testCase[i++]).build(status);
+            }
+            // result
+            else if (strcmp("X", method) == 0) {
+                if (U_SUCCESS(status)) {
+                    errln("FAIL: No error return - test case: %s", actions.c_str());
+                }
+            } else if (strcmp("T", method) == 0) {
+                status = U_ZERO_ERROR;
+                Locale loc = bld.build(status);
+                if (U_FAILURE(status) ||
+                    strcmp(loc.getName(), testCase[i + 1]) != 0) {
+                    errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
+                            " for test case: ", actions.c_str());
+                }
+                std::string langtag = loc.toLanguageTag<std::string>(status);
+                if (U_FAILURE(status) || langtag != testCase[i]) {
+                    errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
+                            " for test case: ", actions.c_str());
+                }
+                break;
+            } else {
+                // Unknow test method
+                errln("Unknown test case method: There is an error in the test case data.");
+                break;
+            }
+            if (U_FAILURE(status)) {
+                if (strcmp("X", testCase[i]) == 0) {
+                    // This failure is expected
+                    break;
+                } else {
+                    errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
+                          " in test case: ", actions.c_str());
+                    break;
+                }
+            }
+            if (strcmp("T", method) == 0) {
+                break;
+            }
+        }  // while(true)
+    }  // for TESTCASES
+}
+
+void LocaleBuilderTest::TestLocaleBuilderBasic() {
+    LocaleBuilder bld;
+    bld.setLanguage("zh");
+    Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
+
+    bld.setScript("Hant");
+    Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
+
+    bld.setRegion("SG");
+    Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
+
+    bld.setRegion("HK");
+    bld.setScript("Hans");
+    Verify(bld, "zh-Hans-HK",
+           "setRegion('HK') and setScript('Hans') got Error: %s\n");
+
+    bld.setVariant("revised");
+    Verify(bld, "zh-Hans-HK-revised",
+           "setVariant('revised') got Error: %s\n");
+
+    bld.setUnicodeLocaleKeyword("nu", "thai");
+    Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
+           "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
+
+    bld.setUnicodeLocaleKeyword("co", "pinyin");
+    Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
+           "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
+
+    bld.setUnicodeLocaleKeyword("nu", "latn");
+    Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
+           "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
+
+    bld.setUnicodeLocaleKeyword("nu", nullptr);
+    Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
+           "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
+
+    bld.setUnicodeLocaleKeyword("co", nullptr);
+    Verify(bld, "zh-Hans-HK-revised",
+           "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
+
+    bld.setScript("");
+    Verify(bld, "zh-HK-revised",
+           "setScript('') got Error: %s\n");
+
+    bld.setVariant("");
+    Verify(bld, "zh-HK",
+           "setVariant('') got Error: %s\n");
+
+    bld.setRegion("");
+    Verify(bld, "zh",
+           "setRegion('') got Error: %s\n");
+}
+
+void LocaleBuilderTest::TestSetLanguageWellFormed() {
+    // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
+    // unicode_language_subtag = alpha{2,3} | alpha{5,8};
+    // ICUTC decided also support alpha{4}
+    static const char* wellFormedLanguages[] = {
+        "",
+
+        // alpha{2}
+        "en",
+        "NE",
+        "eN",
+        "Ne",
+
+        // alpha{3}
+        "aNe",
+        "zzz",
+        "AAA",
+
+        // alpha{4}
+        "ABCD",
+        "abcd",
+
+        // alpha{5}
+        "efgij",
+        "AbCAD",
+        "ZAASD",
+
+        // alpha{6}
+        "efgijk",
+        "AADGFE",
+        "AkDfFz",
+
+        // alpha{7}
+        "asdfads",
+        "ADSFADF",
+        "piSFkDk",
+
+        // alpha{8}
+        "oieradfz",
+        "IADSFJKR",
+        "kkDSFJkR",
+    };
+    for (const char* lang : wellFormedLanguages) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setLanguage(lang);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setLanguage(\"%s\") got Error: %s\n",
+                  lang, u_errorName(status));
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetLanguageIllFormed() {
+    static const char* illFormed[] = {
+        "a",
+        "z",
+        "A",
+        "F",
+        "2",
+        "0",
+        "9"
+        "{",
+        ".",
+        "[",
+        "]",
+        "\\",
+
+        "e1",
+        "N2",
+        "3N",
+        "4e",
+        "e:",
+        "43",
+        "a9",
+
+        "aN0",
+        "z1z",
+        "2zz",
+        "3A3",
+        "456",
+        "af)",
+
+        // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
+        // "latn",
+        // "Arab",
+        // "LATN",
+
+        "e)gij",
+        "Ab3AD",
+        "ZAAS8",
+
+        "efgi[]",
+        "AA9GFE",
+        "7kD3Fz",
+        "as8fads",
+        "0DSFADF",
+        "'iSFkDk",
+
+        "oieradf+",
+        "IADSFJK-",
+        "kkDSFJk0",
+
+        // alpha{9}
+        "oieradfab",
+        "IADSFJKDE",
+        "kkDSFJkzf",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setLanguage(ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetScriptWellFormed() {
+    // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
+    // unicode_script_subtag = alpha{4} ;
+    static const char* wellFormedScripts[] = {
+        "",
+
+        "Latn",
+        "latn",
+        "lATN",
+        "laTN",
+        "arBN",
+        "ARbn",
+        "adsf",
+        "aADF",
+        "BSVS",
+        "LATn",
+    };
+    for (const char* script : wellFormedScripts) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setScript(script);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setScript(\"%s\") got Error: %s\n",
+                  script, u_errorName(status));
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetScriptIllFormed() {
+    static const char* illFormed[] = {
+        "a",
+        "z",
+        "A",
+        "F",
+        "2",
+        "0",
+        "9"
+        "{",
+        ".",
+        "[",
+        "]",
+        "\\",
+
+        "e1",
+        "N2",
+        "3N",
+        "4e",
+        "e:",
+        "43",
+        "a9",
+
+        "aN0",
+        "z1z",
+        "2zz",
+        "3A3",
+        "456",
+        "af)",
+
+        "0atn",
+        "l1tn",
+        "lA2N",
+        "la4N",
+        "arB5",
+        "1234",
+
+        "e)gij",
+        "Ab3AD",
+        "ZAAS8",
+
+        "efgi[]",
+        "AA9GFE",
+        "7kD3Fz",
+
+        "as8fads",
+        "0DSFADF",
+        "'iSFkDk",
+
+        "oieradf+",
+        "IADSFJK-",
+        "kkDSFJk0",
+
+        // alpha{9}
+        "oieradfab",
+        "IADSFJKDE",
+        "kkDSFJkzf",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setScript(ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setScript(\"%s\") should fail but has no Error\n", ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetRegionWellFormed() {
+    // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
+    // unicode_region_subtag = (alpha{2} | digit{3})
+    static const char* wellFormedRegions[] = {
+        "",
+
+        // alpha{2}
+        "en",
+        "NE",
+        "eN",
+        "Ne",
+
+        // digit{3}
+        "000",
+        "999",
+        "123",
+        "987"
+    };
+    for (const char* region : wellFormedRegions) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setRegion(region);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setRegion(\"%s\") got Error: %s\n",
+                  region, u_errorName(status));
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetRegionIllFormed() {
+    static const char* illFormed[] = {
+        "a",
+        "z",
+        "A",
+        "F",
+        "2",
+        "0",
+        "9"
+        "{",
+        ".",
+        "[",
+        "]",
+        "\\",
+
+        "e1",
+        "N2",
+        "3N",
+        "4e",
+        "e:",
+        "43",
+        "a9",
+
+        "aN0",
+        "z1z",
+        "2zz",
+        "3A3",
+        "4.6",
+        "af)",
+
+        "0atn",
+        "l1tn",
+        "lA2N",
+        "la4N",
+        "arB5",
+        "1234",
+
+        "e)gij",
+        "Ab3AD",
+        "ZAAS8",
+
+        "efgi[]",
+        "AA9GFE",
+        "7kD3Fz",
+
+        "as8fads",
+        "0DSFADF",
+        "'iSFkDk",
+
+        "oieradf+",
+        "IADSFJK-",
+        "kkDSFJk0",
+
+        // alpha{9}
+        "oieradfab",
+        "IADSFJKDE",
+        "kkDSFJkzf",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setRegion(ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setRegion(\"%s\") should fail but has no Error\n", ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetVariantWellFormed() {
+    // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
+    // (sep unicode_variant_subtag)*
+    // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
+    static const char* wellFormedVariants[] = {
+        "",
+
+        // alphanum{5}
+        "efgij",
+        "AbCAD",
+        "ZAASD",
+        "0AASD",
+        "A1CAD",
+        "ef2ij",
+        "ads3X",
+        "owqF4",
+
+        // alphanum{6}
+        "efgijk",
+        "AADGFE",
+        "AkDfFz",
+        "0ADGFE",
+        "A9DfFz",
+        "AADG7E",
+
+        // alphanum{7}
+        "asdfads",
+        "ADSFADF",
+        "piSFkDk",
+        "a0dfads",
+        "ADSF3DF",
+        "piSFkD9",
+
+        // alphanum{8}
+        "oieradfz",
+        "IADSFJKR",
+        "kkDSFJkR",
+        "0ADSFJKR",
+        "12345679",
+
+        // digit alphanum{3}
+        "0123",
+        "1abc",
+        "20EF",
+        "30EF",
+        "8A03",
+        "3Ax3",
+        "9Axy",
+
+        // (sep unicode_variant_subtag)*
+        "0123-4567",
+        "0ab3-ABCDE",
+        "9ax3-xByD9",
+        "9ax3-xByD9-adfk934a",
+
+        "0123_4567",
+        "0ab3_ABCDE",
+        "9ax3_xByD9",
+        "9ax3_xByD9_adfk934a",
+
+        "9ax3-xByD9_adfk934a",
+        "9ax3_xByD9-adfk934a",
+    };
+    for (const char* variant : wellFormedVariants) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setVariant(variant);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setVariant(\"%s\") got Error: %s\n",
+                  variant, u_errorName(status));
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetVariantIllFormed() {
+    static const char* illFormed[] = {
+        "a",
+        "z",
+        "A",
+        "F",
+        "2",
+        "0",
+        "9"
+        "{",
+        ".",
+        "[",
+        "]",
+        "\\",
+
+        "e1",
+        "N2",
+        "3N",
+        "4e",
+        "e:",
+        "43",
+        "a9",
+        "en",
+        "NE",
+        "eN",
+        "Ne",
+
+        "aNe",
+        "zzz",
+        "AAA",
+        "aN0",
+        "z1z",
+        "2zz",
+        "3A3",
+        "4.6",
+        "af)",
+        "345",
+        "923",
+
+        "Latn",
+        "latn",
+        "lATN",
+        "laTN",
+        "arBN",
+        "ARbn",
+        "adsf",
+        "aADF",
+        "BSVS",
+        "LATn",
+        "l1tn",
+        "lA2N",
+        "la4N",
+        "arB5",
+        "abc3",
+        "A3BC",
+
+        "e)gij",
+        "A+3AD",
+        "ZAA=8",
+
+        "efgi[]",
+        "AA9]FE",
+        "7k[3Fz",
+
+        "as8f/ds",
+        "0DSFAD{",
+        "'iSFkDk",
+
+        "oieradf+",
+        "IADSFJK-",
+        "k}DSFJk0",
+
+        // alpha{9}
+        "oieradfab",
+        "IADSFJKDE",
+        "kkDSFJkzf",
+        "123456789",
+
+        "-0123",
+        "-0123-4567",
+        "0123-4567-",
+        "-123-4567",
+        "_0123",
+        "_0123_4567",
+        "0123_4567_",
+        "_123_4567",
+
+        "-abcde-figjk",
+        "abcde-figjk-",
+        "-abcde-figjk-",
+        "_abcde_figjk",
+        "abcde_figjk_",
+        "_abcde_figjk_",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setVariant(ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setVariant(\"%s\") should fail but has no Error\n", ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
+    // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
+    // keyword = key (sep type)? ;
+    // key = alphanum alpha ;
+    // type = alphanum{3,8} (sep alphanum{3,8})* ;
+    static const char* wellFormed_key_value[] = {
+        "aa", "123",
+        "3b", "zyzbcdef",
+        "0Z", "1ZB30zk9-abc",
+        "cZ", "2ck30zfZ-adsf023-234kcZ",
+        "ZZ", "Lant",
+        "ko", "",
+    };
+    for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
+                                    wellFormed_key_value[i + 1]);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
+                  wellFormed_key_value[i],
+                  wellFormed_key_value[i + 1],
+                  u_errorName(status));
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
+    static const char* illFormed[] = {
+        "34",
+        "ab-cde",
+        "123",
+        "b3",
+        "zyzabcdef",
+        "Z0",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setUnicodeLocaleKeyword(ill, "abc");
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
+                  ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
+    static const char* illFormed[] = {
+        "34",
+        "ab-",
+        "-cd",
+        "-ef-",
+        "zyzabcdef",
+        "ab-abc",
+        "1ZB30zfk9-abc",
+        "2ck30zfk9-adsf023-234kcZ",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setUnicodeLocaleKeyword("ab", ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
+                  ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
+    LocaleBuilder bld;
+    UErrorCode status = U_ZERO_ERROR;
+    Locale loc = bld.setLanguage("fr")
+                    .addUnicodeLocaleAttribute("abc")
+                    .addUnicodeLocaleAttribute("aBc")
+                    .addUnicodeLocaleAttribute("EFG")
+                    .addUnicodeLocaleAttribute("efghi")
+                    .addUnicodeLocaleAttribute("efgh")
+                    .addUnicodeLocaleAttribute("efGhi")
+                    .addUnicodeLocaleAttribute("EFg")
+                    .addUnicodeLocaleAttribute("hijk")
+                    .addUnicodeLocaleAttribute("EFG")
+                    .addUnicodeLocaleAttribute("HiJK")
+                    .addUnicodeLocaleAttribute("aBc")
+                    .build(status);
+    if (U_FAILURE(status)) {
+        errln("addUnicodeLocaleAttribute() got Error: %s\n",
+              u_errorName(status));
+    }
+    std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
+    std::string actual = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status) || expected != actual) {
+        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+    }
+
+    // remove "efgh" in the middle with different casing.
+    loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
+    if (U_FAILURE(status)) {
+        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+              u_errorName(status));
+    }
+    expected = "fr-u-abc-efg-efghi-hijk";
+    actual = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status) || expected != actual) {
+        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+    }
+
+    // remove non-existing attributes.
+    loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
+    if (U_FAILURE(status)) {
+        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+              u_errorName(status));
+    }
+    actual = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status) || expected != actual) {
+        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+    }
+
+    // remove "abc" in the beginning with different casing.
+    loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
+    if (U_FAILURE(status)) {
+        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+              u_errorName(status));
+    }
+    expected = "fr-u-efg-efghi-hijk";
+    actual = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status) || expected != actual) {
+        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+    }
+
+    // remove non-existing substring in the end.
+    loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
+    if (U_FAILURE(status)) {
+        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+              u_errorName(status));
+    }
+    actual = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status) || expected != actual) {
+        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+    }
+
+    // remove "hijk" in the end with different casing.
+    loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
+    if (U_FAILURE(status)) {
+        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+              u_errorName(status));
+    }
+    expected = "fr-u-efg-efghi";
+    actual = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status) || expected != actual) {
+        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+    }
+
+    // remove "efghi" in the end with different casing.
+    loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
+    if (U_FAILURE(status)) {
+        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+              u_errorName(status));
+    }
+    expected = "fr-u-efg";
+    actual = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status) || expected != actual) {
+        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+    }
+
+    // remove "efg" in as the only one, with different casing.
+    loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
+    if (U_FAILURE(status)) {
+        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+              u_errorName(status));
+    }
+    expected = "fr";
+    actual = loc.toLanguageTag<std::string>(status);
+    if (U_FAILURE(status) || expected != actual) {
+        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+    }
+
+}
+
+void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
+    // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
+    // attribute = alphanum{3,8} ;
+    static const char* wellFormedAttributes[] = {
+        // alphanum{3}
+        "AbC",
+        "ZAA",
+        "0AA",
+        "x3A",
+        "xa8",
+
+        // alphanum{4}
+        "AbCA",
+        "ZASD",
+        "0ASD",
+        "A3a4",
+        "zK90",
+
+        // alphanum{5}
+        "efgij",
+        "AbCAD",
+        "ZAASD",
+        "0AASD",
+        "A1CAD",
+        "ef2ij",
+        "ads3X",
+        "owqF4",
+
+        // alphanum{6}
+        "efgijk",
+        "AADGFE",
+        "AkDfFz",
+        "0ADGFE",
+        "A9DfFz",
+        "AADG7E",
+
+        // alphanum{7}
+        "asdfads",
+        "ADSFADF",
+        "piSFkDk",
+        "a0dfads",
+        "ADSF3DF",
+        "piSFkD9",
+
+        // alphanum{8}
+        "oieradfz",
+        "IADSFJKR",
+        "kkDSFJkR",
+    };
+    LocaleBuilder bld;
+    for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
+        if (i % 5 == 0) {
+            bld.clear();
+        }
+        UErrorCode status = U_ZERO_ERROR;
+        bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
+                  wellFormedAttributes[i], u_errorName(status));
+        }
+        if (i > 2) {
+            bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
+            loc = bld.build(status);
+            if (U_FAILURE(status)) {
+                errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
+                      wellFormedAttributes[i - 1], u_errorName(status));
+            }
+            bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
+            loc = bld.build(status);
+            if (U_FAILURE(status)) {
+                errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
+                      wellFormedAttributes[i - 3], u_errorName(status));
+            }
+        }
+    }
+}
+
+void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
+    static const char* illFormed[] = {
+        "aa",
+        "34",
+        "ab-",
+        "-cd",
+        "-ef-",
+        "zyzabcdef",
+        "123456789",
+        "ab-abc",
+        "1ZB30zfk9-abc",
+        "2ck30zfk9-adsf023-234kcZ",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.addUnicodeLocaleAttribute(ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
+                  ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetExtensionU() {
+    LocaleBuilder bld;
+    bld.setLanguage("zh");
+    Verify(bld, "zh",
+           "setLanguage(\"zh\") got Error: %s\n");
+
+    bld.setExtension('u', "co-stroke");
+    Verify(bld, "zh-u-co-stroke",
+           "setExtension('u', \"co-stroke\") got Error: %s\n");
+
+    bld.setExtension('U', "ca-islamic");
+    Verify(bld, "zh-u-ca-islamic",
+           "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
+
+    bld.setExtension('u', "ca-chinese");
+    Verify(bld, "zh-u-ca-chinese",
+           "setExtension('u', \"ca-chinese\") got Error: %s\n");
+
+    bld.setExtension('U', "co-pinyin");
+    Verify(bld, "zh-u-co-pinyin",
+           "setExtension('U', \"co-pinyin\") got Error: %s\n");
+
+    bld.setRegion("TW");
+    Verify(bld, "zh-TW-u-co-pinyin",
+           "setRegion(\"TW\") got Error: %s\n");
+
+    bld.setExtension('U', "");
+    Verify(bld, "zh-TW",
+           "setExtension('U', \"\") got Error: %s\n");
+
+    bld.setExtension('u', "abc-defg-kr-face");
+    Verify(bld, "zh-TW-u-abc-defg-kr-face",
+           "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
+
+    bld.setExtension('U', "ca-japanese");
+    Verify(bld, "zh-TW-u-ca-japanese",
+           "setExtension('U', \"ca-japanese\") got Error: %s\n");
+
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
+    static const char* wellFormedExtensions[] = {
+        // keyword
+        //   keyword = key (sep type)? ;
+        //   key = alphanum alpha ;
+        //   type = alphanum{3,8} (sep alphanum{3,8})* ;
+        "3A",
+        "ZA",
+        "az-abc",
+        "zz-123",
+        "7z-12345678",
+        "kb-A234567Z",
+        // (sep keyword)+
+        "1z-ZZ",
+        "2z-ZZ-123",
+        "3z-ZZ-123-cd",
+        "0z-ZZ-123-cd-efghijkl",
+        // attribute
+        "abc",
+        "456",
+        "87654321",
+        "ZABADFSD",
+        // (sep attribute)+
+        "abc-ZABADFSD",
+        "123-ZABADFSD",
+        "K2K-12345678",
+        "K2K-12345678-zzz",
+        // (sep attribute)+ (sep keyword)*
+        "K2K-12345678-zz",
+        "K2K-12345678-zz-0z",
+        "K2K-12345678-9z-AZ-abc",
+        "K2K-12345678-zz-9A-234",
+        "K2K-12345678-zk0-abc-efg-zz-9k-234",
+    };
+    for (const char* extension : wellFormedExtensions) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension('u', extension);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setExtension('u', \"%s\") got Error: %s\n",
+                  extension, u_errorName(status));
+        }
+    };
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
+    static const char* illFormed[] = {
+        // bad key
+        "-",
+        "-ab",
+        "ab-",
+        "abc-",
+        "-abc",
+        "0",
+        "a",
+        "A0",
+        "z9",
+        "09",
+        "90",
+        // bad keyword
+        "AB-A0",
+        "AB-efg-A0",
+        "xy-123456789",
+        "AB-Aa-",
+        "AB-Aac-",
+        // bad attribute
+        "abcdefghi",
+        "abcdefgh-",
+        "abcdefgh-abcdefghi",
+        "abcdefgh-1",
+        "abcdefgh-a",
+        "abcdefgh-a2345678z",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension('u', ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setExtension('u', \"%s\") should fail but has no Error\n",
+                  ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetExtensionT() {
+    LocaleBuilder bld;
+    bld.setLanguage("fr");
+    Verify(bld, "fr",
+           "setLanguage(\"fr\") got Error: %s\n");
+
+    bld.setExtension('T', "zh");
+    Verify(bld, "fr-t-zh",
+           "setExtension('T', \"zh\") got Error: %s\n");
+
+    bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
+    Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
+           "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
+
+    bld.setExtension('T', "a9-123");
+    Verify(bld, "fr-t-a9-123",
+           "setExtension('T', \"a9-123\") got Error: %s\n");
+
+    bld.setRegion("MX");
+    Verify(bld, "fr-MX-t-a9-123",
+           "setRegion(\"MX\") got Error: %s\n");
+
+    bld.setScript("Hans");
+    Verify(bld, "fr-Hans-MX-t-a9-123",
+           "setScript(\"Hans\") got Error: %s\n");
+
+    bld.setVariant("9abc-abcde");
+    Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
+           "setVariant(\"9abc-abcde\") got Error: %s\n");
+
+    bld.setExtension('T', "");
+    Verify(bld, "fr-Hans-MX-9abc-abcde",
+           "bld.setExtension('T', \"\") got Error: %s\n");
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
+    // ((sep tlang (sep tfield)*) | (sep tfield)+)
+    static const char* wellFormedExtensions[] = {
+        // tlang
+        //  tlang = unicode_language_subtag (sep unicode_script_subtag)?
+        //          (sep unicode_region_subtag)?  (sep unicode_variant_subtag)* ;
+        // unicode_language_subtag
+        "en",
+        "abc",
+        "abcde",
+        "ABCDEFGH",
+        // unicode_language_subtag sep unicode_script_subtag
+        "en-latn",
+        "abc-arab",
+        "ABCDEFGH-Thai",
+        // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
+        "en-latn-ME",
+        "abc-arab-RU",
+        "ABCDEFGH-Thai-TH",
+        "en-latn-409",
+        "abc-arab-123",
+        "ABCDEFGH-Thai-456",
+        // unicode_language_subtag sep unicode_region_subtag
+        "en-ME",
+        "abc-RU",
+        "ABCDEFGH-TH",
+        "en-409",
+        "abc-123",
+        "ABCDEFGH-456",
+        // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
+        // sep (sep unicode_variant_subtag)*
+        "en-latn-ME-abcde",
+        "abc-arab-RU-3abc-abcdef",
+        "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
+        "en-latn-409-xafsa",
+        "abc-arab-123-ADASDF",
+        "ABCDEFGH-Thai-456-9sdf-ADASFAS",
+        // (sep tfield)+
+        "A0-abcde",
+        "z9-abcde123",
+        "z9-abcde123-a1-abcde",
+        // tlang (sep tfield)*
+        "fr-A0-abcde",
+        "fr-FR-A0-abcde",
+        "fr-123-z9-abcde123-a1-abcde",
+        "fr-Latn-FR-z9-abcde123-a1-abcde",
+        "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
+        "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
+    };
+    for (const char* extension : wellFormedExtensions) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension('t', extension);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setExtension('t', \"%s\") got Error: %s\n",
+                  extension, u_errorName(status));
+        }
+    };
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
+    static const char* illFormed[] = {
+        "a",
+        "a-",
+        "0",
+        "9-",
+        "-9",
+        "-z",
+        // "Latn", // Per 2019-01-23 ICUTC, still accept 4alpha. See ICU-20321
+        "Latn-",
+        "en-",
+        "nob-",
+        "-z9",
+        "a3",
+        "a3-",
+        "3a",
+        "0z-",
+        "en-123-a1",
+        "en-TH-a1",
+        "gab-TH-a1",
+        "gab-Thai-a1",
+        "gab-Thai-TH-a1",
+        "gab-Thai-TH-0bde-a1",
+        "gab-Thai-TH-0bde-3b",
+        "gab-Thai-TH-0bde-z9-a1",
+        "gab-Thai-TH-0bde-z9-3b",
+        "gab-Thai-TH-0bde-z9-abcde123-3b",
+        "gab-Thai-TH-0bde-z9-abcde123-ab",
+        "gab-Thai-TH-0bde-z9-abcde123-ab",
+        "gab-Thai-TH-0bde-z9-abcde123-a1",
+        "gab-Thai-TH-0bde-z9-abcde123-a1-",
+        "gab-Thai-TH-0bde-z9-abcde123-a1-a",
+        "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension('t', ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setExtension('t', \"%s\") should fail but has no Error\n",
+                  ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetExtensionPU() {
+    LocaleBuilder bld;
+    bld.setLanguage("ar");
+    Verify(bld, "ar",
+           "setLanguage(\"ar\") got Error: %s\n");
+
+    bld.setExtension('X', "a-b-c-d-e");
+    Verify(bld, "ar-x-a-b-c-d-e",
+           "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
+
+    bld.setExtension('x', "0-1-2-3");
+    Verify(bld, "ar-x-0-1-2-3",
+           "setExtension('x', \"0-1-2-3\") got Error: %s\n");
+
+    bld.setExtension('X', "0-12345678-x-x");
+    Verify(bld, "ar-x-0-12345678-x-x",
+           "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
+
+    bld.setRegion("TH");
+    Verify(bld, "ar-TH-x-0-12345678-x-x",
+           "setRegion(\"TH\") got Error: %s\n");
+
+    bld.setExtension('X', "");
+    Verify(bld, "ar-TH",
+           "setExtension(\"X\") got Error: %s\n");
+}
+
+void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
+    // ((sep tlang (sep tfield)*) | (sep tfield)+)
+    static const char* wellFormedExtensions[] = {
+        "a",  // Short subtag
+        "z",  // Short subtag
+        "0",  // Short subtag, digit
+        "9",  // Short subtag, digit
+        "a-0",  // Two short subtag, alpha and digit
+        "9-z",  // Two short subtag, digit and alpha
+        "ab",
+        "abc",
+        "abcefghi",  // Long subtag
+        "87654321",
+        "01",
+        "234",
+        "0a-ab-87654321",  // Three subtags
+        "87654321-ab-00-3A",  // Four subtabs
+        "a-9-87654321",  // Three subtags with short and long subtags
+        "87654321-ab-0-3A",
+    };
+    for (const char* extension : wellFormedExtensions) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension('x', extension);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setExtension('x', \"%s\") got Error: %s\n",
+                  extension, u_errorName(status));
+        }
+    };
+}
+
+void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
+    static const char* illFormed[] = {
+        "123456789",  // Too long
+        "abcdefghi",  // Too long
+        "ab-123456789",  // Second subtag too long
+        "abcdefghi-12",  // First subtag too long
+        "a-ab-987654321",  // Third subtag too long
+        "987654321-a-0-3",  // First subtag too long
+    };
+    for (const char* ill : illFormed) {
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension('x', ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setExtension('x', \"%s\") should fail but has no Error\n",
+                  ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetExtensionOthers() {
+    LocaleBuilder bld;
+    bld.setLanguage("fr");
+    Verify(bld, "fr",
+           "setLanguage(\"fr\") got Error: %s\n");
+
+    bld.setExtension('Z', "ab");
+    Verify(bld, "fr-z-ab",
+           "setExtension('Z', \"ab\") got Error: %s\n");
+
+    bld.setExtension('0', "xyz12345-abcdefg");
+    Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
+           "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
+
+    bld.setExtension('a', "01-12345678-ABcdef");
+    Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
+           "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
+
+    bld.setRegion("TH");
+    Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
+           "setRegion(\"TH\") got Error: %s\n");
+
+    bld.setScript("Arab");
+    Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
+           "setRegion(\"Arab\") got Error: %s\n");
+
+    bld.setExtension('A', "97");
+    Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
+           "setExtension('a', \"97\") got Error: %s\n");
+
+    bld.setExtension('a', "");
+    Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
+           "setExtension('a', \"\") got Error: %s\n");
+
+    bld.setExtension('0', "");
+    Verify(bld, "fr-Arab-TH-z-ab",
+           "setExtension('0', \"\") got Error: %s\n");
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
+    static const char* wellFormedExtensions[] = {
+        "ab",
+        "abc",
+        "abcefghi",
+        "01",
+        "234",
+        "87654321",
+        "0a-ab-87654321",
+        "87654321-ab-00-3A",
+    };
+
+    const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
+    const int32_t aToZLen = uprv_strlen(aToZ);
+    int32_t i = 0;
+    for (const char* extension : wellFormedExtensions) {
+        char ch = aToZ[i];
+        i = (i + 1) % aToZLen;
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension(ch, extension);
+        Locale loc = bld.build(status);
+        if (U_FAILURE(status)) {
+            errln("setExtension('%c', \"%s\") got Error: %s\n",
+                  ch, extension, u_errorName(status));
+        }
+    };
+
+    const char* someChars =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
+    const int32_t someCharsLen = uprv_strlen(someChars);
+    for (int32_t i = 0; i < someCharsLen; i++) {
+        char ch = someChars[i];
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
+        Locale loc = bld.build(status);
+        if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
+            if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
+                if (U_FAILURE(status)) {
+                    errln("setExtension('%c', \"%s\") got Error: %s\n",
+                          ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
+                }
+            }
+        } else {
+            if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+                errln("setExtension('%c', \"%s\") should fail but has no Error\n",
+                      ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
+            }
+        }
+
+    }
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
+    static const char* illFormed[] = {
+        "0",  // Too short
+        "a",  // Too short
+        "123456789",  // Too long
+        "abcdefghi",  // Too long
+        "ab-123456789",  // Second subtag too long
+        "abcdefghi-12",  // First subtag too long
+        "a-ab-87654321",  // Third subtag too long
+        "87654321-a-0-3",  // First subtag too long
+    };
+    const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
+    const int32_t aToZLen = uprv_strlen(aToZ);
+    int32_t i = 0;
+    for (const char* ill : illFormed) {
+        char ch = aToZ[i];
+        i = (i + 1) % aToZLen;
+        UErrorCode status = U_ZERO_ERROR;
+        LocaleBuilder bld;
+        bld.setExtension(ch, ill);
+        Locale loc = bld.build(status);
+        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+            errln("setExtension('%c', \"%s\") should fail but has no Error\n",
+                  ch, ill);
+        }
+    }
+}
+
+void LocaleBuilderTest::TestSetLocale() {
+    LocaleBuilder bld1, bld2;
+    UErrorCode status = U_ZERO_ERROR;
+    Locale l1 = bld1.setLanguage("en")
+        .setScript("Latn")
+        .setRegion("MX")
+        .setVariant("3456-abcde")
+        .addUnicodeLocaleAttribute("456")
+        .addUnicodeLocaleAttribute("123")
+        .setUnicodeLocaleKeyword("nu", "thai")
+        .setUnicodeLocaleKeyword("co", "stroke")
+        .setUnicodeLocaleKeyword("ca", "chinese")
+        .build(status);
+    if (U_FAILURE(status) || l1.isBogus()) {
+        errln("build got Error: %s\n", u_errorName(status));
+    }
+    status = U_ZERO_ERROR;
+    Locale l2 = bld1.setLocale(l1).build(status);
+    if (U_FAILURE(status) || l2.isBogus()) {
+        errln("build got Error: %s\n", u_errorName(status));
+    }
+
+    if (l1 != l2) {
+        errln("Two locales should be the same, but one is '%s' and the other is '%s'",
+              l1.getName(), l2.getName());
+    }
+}
+
+void LocaleBuilderTest::TestPosixCases() {
+    UErrorCode status = U_ZERO_ERROR;
+    Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
+    if (U_FAILURE(status) || l1.isBogus()) {
+        errln("build got Error: %s\n", u_errorName(status));
+    }
+    LocaleBuilder bld;
+    bld.setLanguage("en")
+        .setRegion("MX")
+        .setScript("Arab")
+        .setUnicodeLocaleKeyword("nu", "Thai")
+        .setExtension('x', "1");
+    // All of above should be cleared by the setLocale call.
+    Locale l2 = bld.setLocale(l1).build(status);
+    if (U_FAILURE(status) || l2.isBogus()) {
+        errln("build got Error: %s\n", u_errorName(status));
+    }
+    if (l1 != l2) {
+        errln("The result locale should be the set as the setLocale %s but got %s\n",
+              l1.toLanguageTag<std::string>(status).c_str(),
+              l2.toLanguageTag<std::string>(status).c_str());
+    }
+    Locale posix("en-US-POSIX");
+    if (posix != l2) {
+        errln("The result locale should be the set as %s but got %s\n",
+              posix.getName(), l2.getName());
+    }
+}
diff --git a/icu4c/source/test/intltest/localebuildertest.h b/icu4c/source/test/intltest/localebuildertest.h
new file mode 100644
index 0000000..41f3730
--- /dev/null
+++ b/icu4c/source/test/intltest/localebuildertest.h
@@ -0,0 +1,51 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "intltest.h"
+#include "unicode/localebuilder.h"
+
+
+/**
+ * Tests for the LocaleBuilder class
+ **/
+class LocaleBuilderTest: public IntlTest {
+ public:
+    LocaleBuilderTest();
+    virtual ~LocaleBuilderTest();
+
+    void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL );
+
+    void TestAddRemoveUnicodeLocaleAttribute(void);
+    void TestAddRemoveUnicodeLocaleAttributeWellFormed(void);
+    void TestAddUnicodeLocaleAttributeIllFormed(void);
+    void TestLocaleBuilder(void);
+    void TestLocaleBuilderBasic(void);
+    void TestPosixCases(void);
+    void TestSetExtensionOthers(void);
+    void TestSetExtensionPU(void);
+    void TestSetExtensionT(void);
+    void TestSetExtensionU(void);
+    void TestSetExtensionValidateOthersIllFormed(void);
+    void TestSetExtensionValidateOthersWellFormed(void);
+    void TestSetExtensionValidatePUIllFormed(void);
+    void TestSetExtensionValidatePUWellFormed(void);
+    void TestSetExtensionValidateTIllFormed(void);
+    void TestSetExtensionValidateTWellFormed(void);
+    void TestSetExtensionValidateUIllFormed(void);
+    void TestSetExtensionValidateUWellFormed(void);
+    void TestSetLanguageIllFormed(void);
+    void TestSetLanguageWellFormed(void);
+    void TestSetLocale(void);
+    void TestSetRegionIllFormed(void);
+    void TestSetRegionWellFormed(void);
+    void TestSetScriptIllFormed(void);
+    void TestSetScriptWellFormed(void);
+    void TestSetUnicodeLocaleKeywordIllFormedKey(void);
+    void TestSetUnicodeLocaleKeywordIllFormedValue(void);
+    void TestSetUnicodeLocaleKeywordWellFormed(void);
+    void TestSetVariantIllFormed(void);
+    void TestSetVariantWellFormed(void);
+
+ private:
+    void Verify(LocaleBuilder& bld, const char* expected, const char* msg);
+};