ICU-20328 Implement LocaleBuilder
Design Doc: https://goo.gl/Qf12p3
diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in
index e10d3a2..e663cb8 100644
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@@ -88,6 +88,7 @@
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucurr.o \
+localebuilder.o \
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
bytestream.o stringpiece.o bytesinkutil.o \
stringtriebuilder.o bytestriebuilder.o \
diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj
index eb9b456..14d6e6c 100644
--- a/icu4c/source/common/common.vcxproj
+++ b/icu4c/source/common/common.vcxproj
@@ -256,6 +256,7 @@
<ClCompile Include="uresdata.cpp" />
<ClCompile Include="resource.cpp" />
<ClCompile Include="ucurr.cpp" />
+ <ClCompile Include="localebuilder.cpp" />
<ClCompile Include="caniter.cpp" />
<ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="loadednormalizer2impl.cpp" />
@@ -445,6 +446,7 @@
<ClInclude Include="ustr_imp.h" />
<ClInclude Include="static_unicode_sets.h" />
<ClInclude Include="capi_helper.h" />
+ <ClInclude Include="unicode\localebuilder.h" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="common.rc" />
diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters
index 85d0d9b..72fef1b 100644
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -361,6 +361,9 @@
<ClCompile Include="resource.cpp">
<Filter>locales & resources</Filter>
</ClCompile>
+ <ClCompile Include="localebuilder.cpp">
+ <Filter>locales & resources</Filter>
+ </ClCompile>
<ClCompile Include="caniter.cpp">
<Filter>normalization</Filter>
</ClCompile>
@@ -1225,5 +1228,8 @@
<CustomBuild Include="unicode\stringoptions.h">
<Filter>strings</Filter>
</CustomBuild>
+ <CustomBuild Include="unicode\localebuilder.h">
+ <Filter>locales & resources</Filter>
+ </CustomBuild>
</ItemGroup>
</Project>
diff --git a/icu4c/source/common/common_uwp.vcxproj b/icu4c/source/common/common_uwp.vcxproj
index 1265b67..af030c4 100644
--- a/icu4c/source/common/common_uwp.vcxproj
+++ b/icu4c/source/common/common_uwp.vcxproj
@@ -383,6 +383,7 @@
<ClCompile Include="uresdata.cpp" />
<ClCompile Include="resource.cpp" />
<ClCompile Include="ucurr.cpp" />
+ <ClCompile Include="localebuilder.cpp" />
<ClCompile Include="caniter.cpp" />
<ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="loadednormalizer2impl.cpp" />
@@ -572,6 +573,7 @@
<ClInclude Include="ustr_imp.h" />
<ClInclude Include="static_unicode_sets.h" />
<ClInclude Include="capi_helper.h" />
+ <ClInclude Include="unicode\localebuilder.h" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="common.rc" />
diff --git a/icu4c/source/common/localebuilder.cpp b/icu4c/source/common/localebuilder.cpp
new file mode 100644
index 0000000..fe931fc
--- /dev/null
+++ b/icu4c/source/common/localebuilder.cpp
@@ -0,0 +1,436 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <utility>
+
+#include "bytesinkutil.h" // CharStringByteSink
+#include "charstr.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "unicode/localebuilder.h"
+#include "unicode/locid.h"
+
+U_NAMESPACE_BEGIN
+
+#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
+#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
+
+const char* kAttributeKey = "attribute";
+
+static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
+ switch (uprv_tolower(key)) {
+ case 'u':
+ return ultag_isUnicodeExtensionSubtags(s, len);
+ case 't':
+ return ultag_isTransformedExtensionSubtags(s, len);
+ case 'x':
+ return ultag_isPrivateuseValueSubtags(s, len);
+ default:
+ return ultag_isExtensionSubtags(s, len);
+ }
+}
+
+LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
+ script_(), region_(), variant_(nullptr), extensions_(nullptr)
+{
+ language_[0] = 0;
+ script_[0] = 0;
+ region_[0] = 0;
+}
+
+LocaleBuilder::~LocaleBuilder()
+{
+ delete variant_;
+ delete extensions_;
+}
+
+LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
+{
+ clear();
+ setLanguage(locale.getLanguage());
+ setScript(locale.getScript());
+ setRegion(locale.getCountry());
+ setVariant(locale.getVariant());
+ extensions_ = locale.clone();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
+{
+ Locale l = Locale::forLanguageTag(tag, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ // Because setLocale will reset status_ we need to return
+ // first if we have error in forLanguageTag.
+ setLocale(l);
+ return *this;
+}
+
+static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
+ UBool (*test)(const char*, int32_t)) {
+ if (U_FAILURE(errorCode)) { return; }
+ if (input.empty()) {
+ dest[0] = '\0';
+ } else if (test(input.data(), input.length())) {
+ uprv_memcpy(dest, input.data(), input.length());
+ dest[input.length()] = '\0';
+ } else {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
+{
+ setField(language, language_, status_, &ultag_isLanguageSubtag);
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
+{
+ setField(script, script_, status_, &ultag_isScriptSubtag);
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
+{
+ setField(region, region_, status_, &ultag_isRegionSubtag);
+ return *this;
+}
+
+static void transform(char* data, int32_t len) {
+ for (int32_t i = 0; i < len; i++, data++) {
+ if (*data == '_') {
+ *data = '-';
+ } else {
+ *data = uprv_tolower(*data);
+ }
+ }
+}
+
+LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
+{
+ if (U_FAILURE(status_)) { return *this; }
+ if (variant.empty()) {
+ delete variant_;
+ variant_ = nullptr;
+ return *this;
+ }
+ CharString* new_variant = new CharString(variant, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ if (new_variant == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ transform(new_variant->data(), new_variant->length());
+ if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
+ delete new_variant;
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ delete variant_;
+ variant_ = new_variant;
+ return *this;
+}
+
+static bool
+_isKeywordValue(const char* key, const char* value, int32_t value_len)
+{
+ if (key[1] == '\0') {
+ // one char key
+ return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
+ _isExtensionSubtags(key[0], value, value_len));
+ } else if (uprv_strcmp(key, kAttributeKey) == 0) {
+ // unicode attributes
+ return ultag_isUnicodeLocaleAttributes(value, value_len);
+ }
+ // otherwise: unicode extension value
+ // We need to convert from legacy key/value to unicode
+ // key/value
+ const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
+ const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
+
+ return unicode_locale_key && unicode_locale_type &&
+ ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
+ ultag_isUnicodeLocaleType(unicode_locale_type, -1);
+}
+
+static void
+_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode)
+{
+ if (U_FAILURE(errorCode)) { return; }
+ LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode));
+ if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+ const char* key;
+ while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+ CharString value;
+ CharStringByteSink sink(&value);
+ from.getKeywordValue(key, sink, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (uprv_strcmp(key, kAttributeKey) == 0) {
+ transform(value.data(), value.length());
+ }
+ if (validate &&
+ !_isKeywordValue(key, value.data(), value.length())) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ to->setKeywordValue(key, value.data(), errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ }
+}
+
+void static
+_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode)
+{
+ // Clear Unicode attributes
+ locale->setKeywordValue(kAttributeKey, "", errorCode);
+
+ // Clear all Unicode keyword values
+ LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode));
+ if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+ const char* key;
+ while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+ locale->setUnicodeKeywordValue(key, nullptr, errorCode);
+ }
+}
+
+static void
+_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode)
+{
+ // Add the unicode extensions to extensions_
+ CharString locale_str("und-u-", errorCode);
+ locale_str.append(value, errorCode);
+ _copyExtensions(
+ Locale::forLanguageTag(locale_str.data(), errorCode),
+ locale, false, errorCode);
+}
+
+LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
+{
+ if (U_FAILURE(status_)) { return *this; }
+ if (!UPRV_ISALPHANUM(key)) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ CharString value_str(value, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ transform(value_str.data(), value_str.length());
+ if (!value_str.isEmpty() &&
+ !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ }
+ if (uprv_tolower(key) != 'u') {
+ // for t, x and others extension.
+ extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
+ status_);
+ return *this;
+ }
+ _clearUAttributesAndKeyType(extensions_, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ if (!value.empty()) {
+ _setUnicodeExtensions(extensions_, value_str, status_);
+ }
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
+ StringPiece key, StringPiece type)
+{
+ if (U_FAILURE(status_)) { return *this; }
+ if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
+ (!type.empty() &&
+ !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ }
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ extensions_->setUnicodeKeywordValue(key, type, status_);
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
+ StringPiece value)
+{
+ CharString value_str(value, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ transform(value_str.data(), value_str.length());
+ if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
+ return *this;
+ }
+
+ CharString attributes;
+ CharStringByteSink sink(&attributes);
+ UErrorCode localErrorCode = U_ZERO_ERROR;
+ extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+ if (U_FAILURE(localErrorCode)) {
+ CharString new_attributes(value_str.data(), status_);
+ // No attributes, set the attribute.
+ extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+ return *this;
+ }
+
+ transform(attributes.data(),attributes.length());
+ const char* start = attributes.data();
+ const char* limit = attributes.data() + attributes.length();
+ CharString new_attributes;
+ bool inserted = false;
+ while (start < limit) {
+ if (!inserted) {
+ int cmp = uprv_strcmp(start, value_str.data());
+ if (cmp == 0) { return *this; } // Found it in attributes: Just return
+ if (cmp > 0) {
+ if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
+ new_attributes.append(value_str.data(), status_);
+ inserted = true;
+ }
+ }
+ if (!new_attributes.isEmpty()) {
+ new_attributes.append('_', status_);
+ }
+ new_attributes.append(start, status_);
+ start += uprv_strlen(start) + 1;
+ }
+ if (!inserted) {
+ if (!new_attributes.isEmpty()) {
+ new_attributes.append('_', status_);
+ }
+ new_attributes.append(value_str.data(), status_);
+ }
+ // Not yet in the attributes, set the attribute.
+ extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
+ StringPiece value)
+{
+ CharString value_str(value, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ transform(value_str.data(), value_str.length());
+ if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if (extensions_ == nullptr) { return *this; }
+ UErrorCode localErrorCode = U_ZERO_ERROR;
+ CharString attributes;
+ CharStringByteSink sink(&attributes);
+ extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+ // get failure, just return
+ if (U_FAILURE(localErrorCode)) { return *this; }
+ // Do not have any attributes, just return.
+ if (attributes.isEmpty()) { return *this; }
+
+ char* p = attributes.data();
+ // Replace null terminiator in place for _ and - so later
+ // we can use uprv_strcmp to compare.
+ for (int32_t i = 0; i < attributes.length(); i++, p++) {
+ *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
+ }
+
+ const char* start = attributes.data();
+ const char* limit = attributes.data() + attributes.length();
+ CharString new_attributes;
+ bool found = false;
+ while (start < limit) {
+ if (uprv_strcmp(start, value_str.data()) == 0) {
+ found = true;
+ } else {
+ if (!new_attributes.isEmpty()) {
+ new_attributes.append('_', status_);
+ }
+ new_attributes.append(start, status_);
+ }
+ start += uprv_strlen(start) + 1;
+ }
+ // Found the value in attributes, set the attribute.
+ if (found) {
+ extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+ }
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clear()
+{
+ status_ = U_ZERO_ERROR;
+ language_[0] = 0;
+ script_[0] = 0;
+ region_[0] = 0;
+ delete variant_;
+ variant_ = nullptr;
+ clearExtensions();
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clearExtensions()
+{
+ delete extensions_;
+ extensions_ = nullptr;
+ return *this;
+}
+
+Locale makeBogusLocale() {
+ Locale bogus;
+ bogus.setToBogus();
+ return bogus;
+}
+
+Locale LocaleBuilder::build(UErrorCode& errorCode)
+{
+ if (U_FAILURE(errorCode)) {
+ return makeBogusLocale();
+ }
+ if (U_FAILURE(status_)) {
+ errorCode = status_;
+ return makeBogusLocale();
+ }
+ CharString locale_str(language_, errorCode);
+ if (uprv_strlen(script_) > 0) {
+ locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
+ }
+ if (uprv_strlen(region_) > 0) {
+ locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
+ }
+ if (variant_ != nullptr) {
+ locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
+ }
+ if (U_FAILURE(errorCode)) {
+ return makeBogusLocale();
+ }
+ Locale product(locale_str.data());
+ if (extensions_ != nullptr) {
+ _copyExtensions(*extensions_, &product, true, errorCode);
+ }
+ if (U_FAILURE(errorCode)) {
+ return makeBogusLocale();
+ }
+ return product;
+}
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
index 9b5de7f..063efd4 100644
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp
@@ -406,13 +406,22 @@
}
static UBool
-_isLanguageSubtag(const char* s, int32_t len) {
+_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len >= min && len <= max && _isAlphaNumericString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+U_CFUNC UBool
+ultag_isLanguageSubtag(const char* s, int32_t len) {
/*
- * language = 2*3ALPHA ; shortest ISO 639 code
- * ["-" extlang] ; sometimes followed by
- * ; extended language subtags
- * / 4ALPHA ; or reserved for future use
- * / 5*8ALPHA ; or registered language subtag
+ * unicode_language_subtag = alpha{2,3} | alpha{5,8};
+ * NOTE: Per ICUTC 2019/01/23- accepting alpha 4
+ * See ICU-20372
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
@@ -438,8 +447,8 @@
return FALSE;
}
-static UBool
-_isScriptSubtag(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isScriptSubtag(const char* s, int32_t len) {
/*
* script = 4ALPHA ; ISO 15924 code
*/
@@ -452,8 +461,8 @@
return FALSE;
}
-static UBool
-_isRegionSubtag(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isRegionSubtag(const char* s, int32_t len) {
/*
* region = 2ALPHA ; ISO 3166-1 code
* / 3DIGIT ; UN M.49 code
@@ -479,7 +488,7 @@
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
- if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
+ if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) {
return TRUE;
}
if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
@@ -489,18 +498,47 @@
}
static UBool
+_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) {
+ const char *p = s;
+ const char *pSubtag = NULL;
+
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+
+ while ((p - s) < len) {
+ if (*p == SEP) {
+ if (pSubtag == NULL) {
+ return FALSE;
+ }
+ if (!test(pSubtag, (int32_t)(p - pSubtag))) {
+ return FALSE;
+ }
+ pSubtag = NULL;
+ } else if (pSubtag == NULL) {
+ pSubtag = p;
+ }
+ p++;
+ }
+ if (pSubtag == NULL) {
+ return FALSE;
+ }
+ return test(pSubtag, (int32_t)(p - pSubtag));
+}
+
+U_CFUNC UBool
+ultag_isVariantSubtags(const char* s, int32_t len) {
+ return _isSepListOf(&_isVariantSubtag, s, len);
+}
+
+// This is for the ICU-specific "lvariant" handling.
+static UBool
_isPrivateuseVariantSubtag(const char* s, int32_t len) {
/*
* variant = 1*8alphanum ; registered variants
* / (DIGIT 3alphanum)
*/
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
+ return _isAlphaNumericStringLimitedLength(s, len , 1, 8);
}
static UBool
@@ -528,42 +566,12 @@
/*
* extension = singleton 1*("-" (2*8alphanum))
*/
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
+ return _isAlphaNumericStringLimitedLength(s, len, 2, 8);
}
-static UBool
-_isExtensionSubtags(const char* s, int32_t len) {
- const char *p = s;
- const char *pSubtag = NULL;
-
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
-
- while ((p - s) < len) {
- if (*p == SEP) {
- if (pSubtag == NULL) {
- return FALSE;
- }
- if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
- return FALSE;
- }
- pSubtag = NULL;
- } else if (pSubtag == NULL) {
- pSubtag = p;
- }
- p++;
- }
- if (pSubtag == NULL) {
- return FALSE;
- }
- return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
+U_CFUNC UBool
+ultag_isExtensionSubtags(const char* s, int32_t len) {
+ return _isSepListOf(&_isExtensionSubtag, s, len);
}
static UBool
@@ -571,46 +579,32 @@
/*
* privateuse = "x" 1*("-" (1*8alphanum))
*/
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
+ return _isAlphaNumericStringLimitedLength(s, len, 1, 8);
}
-static UBool
-_isPrivateuseValueSubtags(const char* s, int32_t len) {
- const char *p = s;
- const char *pSubtag = NULL;
+U_CFUNC UBool
+ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {
+ return _isSepListOf(&_isPrivateuseValueSubtag, s, len);
+}
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {
+ /*
+ * attribute = alphanum{3,8} ;
+ */
+ return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
- while ((p - s) < len) {
- if (*p == SEP) {
- if (pSubtag == NULL) {
- return FALSE;
- }
- if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
- return FALSE;
- }
- pSubtag = NULL;
- } else if (pSubtag == NULL) {
- pSubtag = p;
- }
- p++;
- }
- if (pSubtag == NULL) {
- return FALSE;
- }
- return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {
+ return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len);
}
U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
+ /*
+ * key = alphanum alpha ;
+ */
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
@@ -621,8 +615,159 @@
}
U_CFUNC UBool
+_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {
+ /*
+ * alphanum{3,8}
+ */
+ return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
+
+U_CFUNC UBool
ultag_isUnicodeLocaleType(const char*s, int32_t len) {
+ /*
+ * type = alphanum{3,8} (sep alphanum{3,8})* ;
+ */
+ return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len);
+}
+
+static UBool
+_isTKey(const char* s, int32_t len)
+{
+ /*
+ * tkey = alpha digit ;
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isTValue(const char* s, int32_t len)
+{
+ /*
+ * tvalue = (sep alphanum{3,8})+ ;
+ */
+ return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
+
+static UBool
+_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
+{
+ const int32_t kStart = 0; // Start, wait for unicode_language_subtag, tkey or end
+ const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag,
+ // unicode_region_subtag, unicode_variant_subtag, tkey or end
+ const int32_t kGotScript = 2; // Got unicode_script_subtag, wait for unicode_region_subtag,
+ // unicode_variant_subtag, tkey, or end
+ const int32_t kGotRegion = 3; // Got unicode_region_subtag, wait for unicode_variant_subtag,
+ // tkey, or end.
+ const int32_t kGotVariant = 4; // Got unicode_variant_subtag, wait for unicode_variant_subtag
+ // tkey or end.
+ const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
+ const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
+
+ switch (state) {
+ case kStart:
+ if (ultag_isLanguageSubtag(s, len)) {
+ state = kGotLanguage;
+ return TRUE;
+ }
+ if (_isTKey(s, len)) {
+ state = kGotTKey;
+ return TRUE;
+ }
+ return FALSE;
+ case kGotLanguage:
+ if (ultag_isScriptSubtag(s, len)) {
+ state = kGotScript;
+ return TRUE;
+ }
+ U_FALLTHROUGH;
+ case kGotScript:
+ if (ultag_isRegionSubtag(s, len)) {
+ state = kGotRegion;
+ return TRUE;
+ }
+ U_FALLTHROUGH;
+ case kGotRegion:
+ U_FALLTHROUGH;
+ case kGotVariant:
+ if (_isVariantSubtag(s, len)) {
+ state = kGotVariant;
+ return TRUE;
+ }
+ if (_isTKey(s, len)) {
+ state = kGotTKey;
+ return TRUE;
+ }
+ return FALSE;
+ case kGotTKey:
+ if (_isTValue(s, len)) {
+ state = kGotTValue;
+ return TRUE;
+ }
+ return FALSE;
+ case kGotTValue:
+ if (_isTKey(s, len)) {
+ state = kGotTKey;
+ return TRUE;
+ }
+ if (_isTValue(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
+{
+ const int32_t kStart = 0; // Start, wait for a key or attribute or end
+ const int32_t kGotKey = 1; // Got a key, wait for type or key or end
+ const int32_t kGotType = 2; // Got a type, wait for key or end
+
+ switch (state) {
+ case kStart:
+ if (ultag_isUnicodeLocaleKey(s, len)) {
+ state = kGotKey;
+ return TRUE;
+ }
+ if (ultag_isUnicodeLocaleAttribute(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+ case kGotKey:
+ if (ultag_isUnicodeLocaleKey(s, len)) {
+ return TRUE;
+ }
+ if (_isUnicodeLocaleTypeSubtag(s, len)) {
+ state = kGotType;
+ return TRUE;
+ }
+ return FALSE;
+ case kGotType:
+ if (ultag_isUnicodeLocaleKey(s, len)) {
+ state = kGotKey;
+ return TRUE;
+ }
+ if (_isUnicodeLocaleTypeSubtag(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
+{
+ int32_t state = 0;
const char* p;
+ const char* start = s;
int32_t subtagLen = 0;
if (len < 0) {
@@ -631,22 +776,34 @@
for (p = s; len > 0; p++, len--) {
if (*p == SEP) {
- if (subtagLen < 3) {
+ if (!test(state, start, subtagLen)) {
return FALSE;
}
subtagLen = 0;
- } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
- subtagLen++;
- if (subtagLen > 8) {
- return FALSE;
- }
+ start = p + 1;
} else {
- return FALSE;
+ subtagLen++;
}
}
- return (subtagLen >= 3);
+ if (test(state, start, subtagLen) && state >= 0) {
+ return TRUE;
+ }
+ return FALSE;
}
+
+U_CFUNC UBool
+ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
+{
+ return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len);
+}
+
+U_CFUNC UBool
+ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {
+ return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len);
+}
+
+
/*
* -------------------------------------------------
*
@@ -856,7 +1013,7 @@
if (len == 0) {
sink.Append(LANG_UND, LANG_UND_LEN);
- } else if (!_isLanguageSubtag(buf, len)) {
+ } else if (!ultag_isLanguageSubtag(buf, len)) {
/* invalid language code */
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -900,7 +1057,7 @@
}
if (len > 0) {
- if (!_isScriptSubtag(buf, len)) {
+ if (!ultag_isScriptSubtag(buf, len)) {
/* invalid script code */
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -932,7 +1089,7 @@
}
if (len > 0) {
- if (!_isRegionSubtag(buf, len)) {
+ if (!ultag_isRegionSubtag(buf, len)) {
/* invalid region code */
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -1252,7 +1409,7 @@
}
} else {
if (*key == PRIVATEUSE) {
- if (!_isPrivateuseValueSubtags(buf.data(), len)) {
+ if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -1260,7 +1417,7 @@
continue;
}
} else {
- if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
+ if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -1997,7 +2154,7 @@
subtagLen = (int32_t)(pSep - pSubtag);
if (next & LANG) {
- if (_isLanguageSubtag(pSubtag, subtagLen)) {
+ if (ultag_isLanguageSubtag(pSubtag, subtagLen)) {
*pSep = 0; /* terminate */
// TODO: move deprecated language code handling here.
t->language = T_CString_toLowerCase(pSubtag);
@@ -2024,7 +2181,7 @@
}
}
if (next & SCRT) {
- if (_isScriptSubtag(pSubtag, subtagLen)) {
+ if (ultag_isScriptSubtag(pSubtag, subtagLen)) {
char *p = pSubtag;
*pSep = 0;
@@ -2044,7 +2201,7 @@
}
}
if (next & REGN) {
- if (_isRegionSubtag(pSubtag, subtagLen)) {
+ if (ultag_isRegionSubtag(pSubtag, subtagLen)) {
*pSep = 0;
// TODO: move deprecated region code handling here.
t->region = T_CString_toUpperCase(pSubtag);
@@ -2535,7 +2692,7 @@
buf[1] = SEP;
len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
if (U_SUCCESS(tmpStatus)) {
- if (_isPrivateuseValueSubtags(&buf[2], len)) {
+ if (ultag_isPrivateuseValueSubtags(&buf[2], len)) {
/* return private use only tag */
sink.Append(buf, len + 2);
done = TRUE;
diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h
index f268f89..fd16af5 100644
--- a/icu4c/source/common/ulocimp.h
+++ b/icu4c/source/common/ulocimp.h
@@ -148,6 +148,32 @@
U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID);
+U_CFUNC UBool
+ultag_isExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isLanguageSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isRegionSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isScriptSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char* s, int32_t len);
@@ -155,6 +181,9 @@
U_CFUNC UBool
ultag_isUnicodeLocaleType(const char* s, int32_t len);
+U_CFUNC UBool
+ultag_isVariantSubtags(const char* s, int32_t len);
+
U_CFUNC const char*
ulocimp_toBcpKey(const char* key);
diff --git a/icu4c/source/common/unicode/localebuilder.h b/icu4c/source/common/unicode/localebuilder.h
new file mode 100644
index 0000000..8cd2039
--- /dev/null
+++ b/icu4c/source/common/unicode/localebuilder.h
@@ -0,0 +1,288 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+#ifndef __LOCALEBUILDER_H__
+#define __LOCALEBUILDER_H__
+
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "unicode/utypes.h"
+
+
+/**
+ * \file
+ * \brief C++ API: Builder API for Locale
+ */
+
+U_NAMESPACE_BEGIN
+class CharString;
+
+#ifndef U_HIDE_DRAFT_API
+/**
+ * <code>LocaleBuilder</code> is used to build instances of <code>Locale</code>
+ * from values configured by the setters. Unlike the <code>Locale</code>
+ * constructors, the <code>LocaleBuilder</code> checks if a value configured by a
+ * setter satisfies the syntax requirements defined by the <code>Locale</code>
+ * class. A <code>Locale</code> object created by a <code>LocaleBuilder</code> is
+ * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
+ * without losing information.
+ *
+ * <p>The following example shows how to create a <code>Locale</code> object
+ * with the <code>LocaleBuilder</code>.
+ * <blockquote>
+ * <pre>
+ * UErrorCode status = U_ZERO_ERROR;
+ * Locale aLocale = LocaleBuilder()
+ * .setLanguage("sr")
+ * .setScript("Latn")
+ * .setRegion("RS")
+ * .build(status);
+ * if (U_SUCCESS(status)) {
+ * // ...
+ * }
+ * </pre>
+ * </blockquote>
+ *
+ * <p>LocaleBuilders can be reused; <code>clear()</code> resets all
+ * fields to their default values.
+ *
+ * <p>LocaleBuilder tracks errors in an internal UErrorCode. For all setters,
+ * except setLanguageTag and setLocale, LocaleBuilder will return immediately
+ * if the internal UErrorCode is in error state.
+ * To reset internal state and error code, call clear method.
+ * The setLanguageTag and setLocale method will first clear the internal
+ * UErrorCode, then track the error of the validation of the input parameter
+ * into the internal UErrorCode.
+ *
+ * @draft ICU 64
+ */
+class U_COMMON_API LocaleBuilder : public UObject {
+public:
+ /**
+ * Constructs an empty LocaleBuilder. The default value of all
+ * fields, extensions, and private use information is the
+ * empty string.
+ *
+ * @draft ICU 64
+ */
+ LocaleBuilder();
+
+ virtual ~LocaleBuilder();
+
+ /**
+ * Resets the <code>LocaleBuilder</code> to match the provided
+ * <code>locale</code>. Existing state is discarded.
+ *
+ * <p>All fields of the locale must be well-formed.
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @param locale the locale
+ * @return This builder.
+ *
+ * @draft ICU 64
+ */
+ LocaleBuilder& setLocale(const Locale& locale);
+
+ /**
+ * Resets the LocaleBuilder to match the provided
+ * [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) .
+ * Discards the existing state. the empty string cause the builder to be
+ * reset, like {@link #clear}. Grandfathered tags are converted to their
+ * canonical form before being processed. Otherwise, the <code>language
+ * tag</code> must be well-formed, or else the build() method will later
+ * report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @param tag the language tag, defined as
+ * [unicode_locale_id](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id).
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& setLanguageTag(StringPiece tag);
+
+ /**
+ * Sets the language. If <code>language</code> is the empty string, the
+ * language in this <code>LocaleBuilder</code> is removed. Otherwise, the
+ * <code>language</code> must be well-formed, or else the build() method will
+ * later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The syntax of language value is defined as
+ * [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
+ *
+ * @param language the language
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& setLanguage(StringPiece language);
+
+ /**
+ * Sets the script. If <code>script</code> is the empty string, the script in
+ * this <code>LocaleBuilder</code> is removed.
+ * Otherwise, the <code>script</code> must be well-formed, or else the build()
+ * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The script value is a four-letter script code as
+ * [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
+ * defined by ISO 15924
+ *
+ * @param script the script
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& setScript(StringPiece script);
+
+ /**
+ * Sets the region. If region is the empty string, the region in this
+ * <code>LocaleBuilder</code> is removed. Otherwise, the <code>region</code>
+ * must be well-formed, or else the build() method will later report an
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The region value is defined by
+ * [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag)
+ * as a two-letter ISO 3166 code or a three-digit UN M.49 area code.
+ *
+ * <p>The region value in the <code>Locale</code> created by the
+ * <code>LocaleBuilder</code> is always normalized to upper case.
+ *
+ * @param region the region
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& setRegion(StringPiece region);
+
+ /**
+ * Sets the variant. If variant is the empty string, the variant in this
+ * <code>LocaleBuilder</code> is removed. Otherwise, the <code>variant</code>
+ * must be well-formed, or else the build() method will later report an
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p><b>Note:</b> This method checks if <code>variant</code>
+ * satisfies the
+ * [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
+ * syntax requirements, and normalizes the value to lowercase letters. However,
+ * the <code>Locale</code> class does not impose any syntactic
+ * restriction on variant. To set an ill-formed variant, use a Locale constructor.
+ * If there are multiple unicode_variant_subtag, the caller must concatenate
+ * them with '-' as separator (ex: "foobar-fibar").
+ *
+ * @param variant the variant
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& setVariant(StringPiece variant);
+
+ /**
+ * Sets the extension for the given key. If the value is the empty string,
+ * the extension is removed. Otherwise, the <code>key</code> and
+ * <code>value</code> must be well-formed, or else the build() method will
+ * later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p><b>Note:</b> The key ('u') is used for the Unicode locale extension.
+ * Setting a value for this key replaces any existing Unicode locale key/type
+ * pairs with those defined in the extension.
+ *
+ * <p><b>Note:</b> The key ('x') is used for the private use code. To be
+ * well-formed, the value for this key needs only to have subtags of one to
+ * eight alphanumeric characters, not two to eight as in the general case.
+ *
+ * @param key the extension key
+ * @param value the extension value
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& setExtension(char key, StringPiece value);
+
+ /**
+ * Sets the Unicode locale keyword type for the given key. If the type
+ * StringPiece is constructed with a nullptr, the keyword is removed.
+ * If the type is the empty string, the keyword is set without type subtags.
+ * Otherwise, the key and type must be well-formed, or else the build()
+ * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>Keys and types are converted to lower case.
+ *
+ * <p><b>Note</b>:Setting the 'u' extension via {@link #setExtension}
+ * replaces all Unicode locale keywords with those defined in the
+ * extension.
+ *
+ * @param key the Unicode locale key
+ * @param type the Unicode locale type
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& setUnicodeLocaleKeyword(
+ StringPiece key, StringPiece type);
+
+ /**
+ * Adds a unicode locale attribute, if not already present, otherwise
+ * has no effect. The attribute must not be empty string and must be
+ * well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status
+ * during the build() call.
+ *
+ * @param attribute the attribute
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& addUnicodeLocaleAttribute(StringPiece attribute);
+
+ /**
+ * Removes a unicode locale attribute, if present, otherwise has no
+ * effect. The attribute must not be empty string and must be well-formed
+ * or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the build() call.
+ *
+ * <p>Attribute comparison for removal is case-insensitive.
+ *
+ * @param attribute the attribute
+ * @return This builder.
+ * @draft ICU 64
+ */
+ LocaleBuilder& removeUnicodeLocaleAttribute(StringPiece attribute);
+
+ /**
+ * Resets the builder to its initial, empty state.
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @return this builder
+ * @draft ICU 64
+ */
+ LocaleBuilder& clear();
+
+ /**
+ * Resets the extensions to their initial, empty state.
+ * Language, script, region and variant are unchanged.
+ *
+ * @return this builder
+ * @draft ICU 64
+ */
+ LocaleBuilder& clearExtensions();
+
+ /**
+ * Returns an instance of <code>Locale</code> created from the fields set
+ * on this builder.
+ * If any set methods or during the build() call require memory allocation
+ * but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
+ * If any of the fields set by the setters are not well-formed, the status
+ * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
+ * not change after the build() call and the caller is free to keep using
+ * the same builder to build more locales.
+ *
+ * @return a new Locale
+ * @draft ICU 64
+ */
+ Locale build(UErrorCode& status);
+
+private:
+ UErrorCode status_;
+ char language_[9];
+ char script_[5];
+ char region_[4];
+ CharString *variant_; // Pointer not object so we need not #include internal charstr.h.
+ icu::Locale *extensions_; // Pointer not object. Storage for all other fields.
+
+};
+#endif // U_HIDE_DRAFT_API
+
+U_NAMESPACE_END
+
+#endif // __LOCALEBUILDER_H__
diff --git a/icu4c/source/common/unicode/urename.h b/icu4c/source/common/unicode/urename.h
index 0512be3..cea3be4 100644
--- a/icu4c/source/common/unicode/urename.h
+++ b/icu4c/source/common/unicode/urename.h
@@ -1109,6 +1109,16 @@
#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType)
#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
+#define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags)
+#define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag)
+#define ultag_isPrivateuseValueSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isPrivateuseValueSubtags)
+#define ultag_isRegionSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isRegionSubtag)
+#define ultag_isScriptSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isScriptSubtag)
+#define ultag_isTransformedExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isTransformedExtensionSubtags)
+#define ultag_isUnicodeExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeExtensionSubtags)
+#define ultag_isUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttribute)
+#define ultag_isUnicodeLocaleAttributes U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttributes)
+#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags)
#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt
index d2682ab..1e51980 100644
--- a/icu4c/source/test/depstest/dependencies.txt
+++ b/icu4c/source/test/depstest/dependencies.txt
@@ -188,6 +188,7 @@
uinit utypes errorcode
icuplug
platform
+ localebuilder
group: pluralmap
# TODO: Move to i18n library, ticket #11926.
@@ -643,6 +644,11 @@
uscript_props propname
bytesinkutil
+group: localebuilder
+ localebuilder.o
+ deps
+ resourcebundle
+
group: udata
udata.o ucmndata.o udatamem.o
umapfile.o
diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in
index c049a5c..b4cf918 100644
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@@ -44,7 +44,7 @@
fldset.o dadrfmt.o dadrcal.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o \
dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \
itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \
-loctest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
+loctest.o localebuildertest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
numfmtst.o numrgts.o plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \
sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \
tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \
diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj
index 298a7f5..5e82ef3 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj
+++ b/icu4c/source/test/intltest/intltest.vcxproj
@@ -364,6 +364,7 @@
<ClCompile Include="bidiconf.cpp" />
<ClCompile Include="listformattertest.cpp" />
<ClCompile Include="formattedvaluetest.cpp" />
+ <ClCompile Include="localebuildertest.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="colldata.h" />
@@ -494,8 +495,9 @@
<ClInclude Include="convtest.h" />
<ClInclude Include="csdetest.h" />
<ClInclude Include="listformattertest.h" />
+ <ClInclude Include="localebuildertest.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters
index d707727..bed26bc 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj.filters
+++ b/icu4c/source/test/intltest/intltest.vcxproj.filters
@@ -540,6 +540,8 @@
</ClCompile>
<ClCompile Include="formattedvaluetest.cpp">
<Filter>formatting</Filter>
+ <ClCompile Include="localebuildertest.cpp">
+ <Filter>locales & resources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
@@ -927,5 +929,8 @@
<ClInclude Include="erarulestest.h">
<Filter>formatting</Filter>
</ClInclude>
+ <ClInclude Include="localebuildertest.h">
+ <Filter>locales & resources</Filter>
+ </ClInclude>
</ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/icu4c/source/test/intltest/itutil.cpp b/icu4c/source/test/intltest/itutil.cpp
index 91d81d0..3cda39d 100644
--- a/icu4c/source/test/intltest/itutil.cpp
+++ b/icu4c/source/test/intltest/itutil.cpp
@@ -19,6 +19,7 @@
#include "itutil.h"
#include "strtest.h"
#include "loctest.h"
+#include "localebuildertest.h"
#include "citrtest.h"
#include "ustrtest.h"
#include "ucdtest.h"
@@ -149,6 +150,7 @@
}
#endif
break;
+ CASE(25, LocaleBuilderTest);
default: name = ""; break; //needed to end loop
}
}
diff --git a/icu4c/source/test/intltest/localebuildertest.cpp b/icu4c/source/test/intltest/localebuildertest.cpp
new file mode 100644
index 0000000..f99057f
--- /dev/null
+++ b/icu4c/source/test/intltest/localebuildertest.cpp
@@ -0,0 +1,1627 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <memory>
+
+#include "cmemory.h"
+#include "cstring.h"
+#include "localebuildertest.h"
+#include "unicode/localebuilder.h"
+#include "unicode/strenum.h"
+
+LocaleBuilderTest::LocaleBuilderTest()
+{
+}
+
+LocaleBuilderTest::~LocaleBuilderTest()
+{
+}
+
+void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
+{
+ TESTCASE_AUTO_BEGIN;
+ TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
+ TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
+ TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
+ TESTCASE_AUTO(TestLocaleBuilder);
+ TESTCASE_AUTO(TestLocaleBuilderBasic);
+ TESTCASE_AUTO(TestPosixCases);
+ TESTCASE_AUTO(TestSetExtensionOthers);
+ TESTCASE_AUTO(TestSetExtensionPU);
+ TESTCASE_AUTO(TestSetExtensionT);
+ TESTCASE_AUTO(TestSetExtensionU);
+ TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
+ TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
+ TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
+ TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
+ TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
+ TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
+ TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
+ TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
+ TESTCASE_AUTO(TestSetLanguageIllFormed);
+ TESTCASE_AUTO(TestSetLanguageWellFormed);
+ TESTCASE_AUTO(TestSetLocale);
+ TESTCASE_AUTO(TestSetRegionIllFormed);
+ TESTCASE_AUTO(TestSetRegionWellFormed);
+ TESTCASE_AUTO(TestSetScriptIllFormed);
+ TESTCASE_AUTO(TestSetScriptWellFormed);
+ TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
+ TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
+ TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
+ TESTCASE_AUTO(TestSetVariantIllFormed);
+ TESTCASE_AUTO(TestSetVariantWellFormed);
+ TESTCASE_AUTO_END;
+}
+
+void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
+ UErrorCode status = U_ZERO_ERROR;
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln(msg, u_errorName(status));
+ }
+ std::string tag = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status)) {
+ errln("loc.toLanguageTag() got Error: %s\n",
+ u_errorName(status));
+ }
+ if (tag != expected) {
+ errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
+ }
+}
+
+void LocaleBuilderTest::TestLocaleBuilder() {
+ // The following test data are copy from
+ // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
+ // "L": +1 = language
+ // "S": +1 = script
+ // "R": +1 = region
+ // "V": +1 = variant
+ // "K": +1 = Unicode locale key / +2 = Unicode locale type
+ // "A": +1 = Unicode locale attribute
+ // "E": +1 = extension letter / +2 = extension value
+ // "P": +1 = private use
+ // "U": +1 = ULocale
+ // "B": +1 = BCP47 language tag
+ // "C": Clear all
+ // "N": Clear extensions
+ // "D": +1 = Unicode locale attribute to be removed
+ // "X": indicates an exception must be thrown
+ // "T": +1 = expected language tag / +2 = expected locale string
+ const char* TESTCASES[][14] = {
+ {"L", "en", "R", "us", "T", "en-US", "en_US"},
+ {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
+ {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
+ {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
+ {"L", "123", "X"},
+ {"R", "us", "T", "und-US", "_US"},
+ {"R", "usa", "X"},
+ {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
+ {"R", "123", "L", "it", "R", "", "T", "it", "it"},
+ {"R", "123", "L", "en", "T", "en-123", "en_123"},
+ {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
+ {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
+ {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
+ {"S", "latin", "X"},
+ {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
+ {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
+ {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
+ {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
+ {"V", "123", "X"},
+ {"U", "en_US", "T", "en-US", "en_US"},
+ {"U", "en_US_WIN", "X"},
+ {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
+ "fr-FR-1606nict-u-ca-gregory-x-test",
+ "fr_FR_1606NICT@calendar=gregorian;x=test"},
+ {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
+ {"B", "und-CA", "T", "und-CA", "_CA"},
+ // Blocked by ICU-20327
+ // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
+ // "en_US_VAR@x=test"},
+ {"B", "en-US-VAR", "X"},
+ {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
+ "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
+ {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
+ "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
+ {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
+ "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
+ "ja_JP@attribute=attr1;calendar=gregorian"},
+ {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn-true",
+ "en@colnumeric=yes"},
+ {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
+ "th_TH@numbers=thai"},
+ {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
+ {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
+ {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
+ {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
+ {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
+ {"E", "a", "x", "X"},
+ {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
+ // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
+ // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
+ // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
+ // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
+ // key = alphanum alpha
+ {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a-yes",
+ "en@0a=yes;attribute=aaa-bbb"},
+ {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
+ "fr_FR@x=yoshito-icu"},
+ {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
+ "ja_JP@calendar=japanese"},
+ {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
+ "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
+ {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
+ {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
+ "th@calendar=gregorian;numbers=thai"},
+ {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
+ "en_US@timezone=America/New_York"},
+ {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
+ "true", "T", "de-u-co-phonebk-kk-true-ks-level1",
+ "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
+ {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
+ "en_US@calendar=gregorian"},
+ {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
+ {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
+ {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn-true",
+ "en_US@colnumeric=yes"},
+ {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
+ {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
+ {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
+ "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
+ {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
+ "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
+ {"L", "en", "A", "aa", "X"},
+ {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
+ };
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
+ const char* (&testCase)[14] = TESTCASES[tidx];
+ std::string actions;
+ for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
+ if (testCase[p] == nullptr) {
+ actions += " (nullptr)";
+ break;
+ }
+ if (p > 0) actions += " ";
+ actions += testCase[p];
+ }
+ int i = 0;
+ const char* method;
+ status = U_ZERO_ERROR;
+ bld.clear();
+ while (true) {
+ method = testCase[i++];
+ if (strcmp("L", method) == 0) {
+ bld.setLanguage(testCase[i++]).build(status);
+ } else if (strcmp("S", method) == 0) {
+ bld.setScript(testCase[i++]).build(status);
+ } else if (strcmp("R", method) == 0) {
+ bld.setRegion(testCase[i++]).build(status);
+ } else if (strcmp("V", method) == 0) {
+ bld.setVariant(testCase[i++]).build(status);
+ } else if (strcmp("K", method) == 0) {
+ const char* key = testCase[i++];
+ const char* type = testCase[i++];
+ bld.setUnicodeLocaleKeyword(key, type).build(status);
+ } else if (strcmp("A", method) == 0) {
+ bld.addUnicodeLocaleAttribute(testCase[i++]).build(status);
+ } else if (strcmp("E", method) == 0) {
+ const char* key = testCase[i++];
+ const char* value = testCase[i++];
+ bld.setExtension(key[0], value).build(status);
+ } else if (strcmp("P", method) == 0) {
+ bld.setExtension('x', testCase[i++]).build(status);
+ } else if (strcmp("U", method) == 0) {
+ bld.setLocale(Locale(testCase[i++])).build(status);
+ } else if (strcmp("B", method) == 0) {
+ bld.setLanguageTag(testCase[i++]).build(status);
+ }
+ // clear / remove
+ else if (strcmp("C", method) == 0) {
+ bld.clear().build(status);
+ } else if (strcmp("N", method) == 0) {
+ bld.clearExtensions().build(status);
+ } else if (strcmp("D", method) == 0) {
+ bld.removeUnicodeLocaleAttribute(testCase[i++]).build(status);
+ }
+ // result
+ else if (strcmp("X", method) == 0) {
+ if (U_SUCCESS(status)) {
+ errln("FAIL: No error return - test case: %s", actions.c_str());
+ }
+ } else if (strcmp("T", method) == 0) {
+ status = U_ZERO_ERROR;
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status) ||
+ strcmp(loc.getName(), testCase[i + 1]) != 0) {
+ errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
+ " for test case: ", actions.c_str());
+ }
+ std::string langtag = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || langtag != testCase[i]) {
+ errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
+ " for test case: ", actions.c_str());
+ }
+ break;
+ } else {
+ // Unknow test method
+ errln("Unknown test case method: There is an error in the test case data.");
+ break;
+ }
+ if (U_FAILURE(status)) {
+ if (strcmp("X", testCase[i]) == 0) {
+ // This failure is expected
+ break;
+ } else {
+ errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
+ " in test case: ", actions.c_str());
+ break;
+ }
+ }
+ if (strcmp("T", method) == 0) {
+ break;
+ }
+ } // while(true)
+ } // for TESTCASES
+}
+
+void LocaleBuilderTest::TestLocaleBuilderBasic() {
+ LocaleBuilder bld;
+ bld.setLanguage("zh");
+ Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
+
+ bld.setScript("Hant");
+ Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
+
+ bld.setRegion("SG");
+ Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
+
+ bld.setRegion("HK");
+ bld.setScript("Hans");
+ Verify(bld, "zh-Hans-HK",
+ "setRegion('HK') and setScript('Hans') got Error: %s\n");
+
+ bld.setVariant("revised");
+ Verify(bld, "zh-Hans-HK-revised",
+ "setVariant('revised') got Error: %s\n");
+
+ bld.setUnicodeLocaleKeyword("nu", "thai");
+ Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
+ "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
+
+ bld.setUnicodeLocaleKeyword("co", "pinyin");
+ Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
+ "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
+
+ bld.setUnicodeLocaleKeyword("nu", "latn");
+ Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
+ "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
+
+ bld.setUnicodeLocaleKeyword("nu", nullptr);
+ Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
+ "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
+
+ bld.setUnicodeLocaleKeyword("co", nullptr);
+ Verify(bld, "zh-Hans-HK-revised",
+ "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
+
+ bld.setScript("");
+ Verify(bld, "zh-HK-revised",
+ "setScript('') got Error: %s\n");
+
+ bld.setVariant("");
+ Verify(bld, "zh-HK",
+ "setVariant('') got Error: %s\n");
+
+ bld.setRegion("");
+ Verify(bld, "zh",
+ "setRegion('') got Error: %s\n");
+}
+
+void LocaleBuilderTest::TestSetLanguageWellFormed() {
+ // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
+ // unicode_language_subtag = alpha{2,3} | alpha{5,8};
+ // ICUTC decided also support alpha{4}
+ static const char* wellFormedLanguages[] = {
+ "",
+
+ // alpha{2}
+ "en",
+ "NE",
+ "eN",
+ "Ne",
+
+ // alpha{3}
+ "aNe",
+ "zzz",
+ "AAA",
+
+ // alpha{4}
+ "ABCD",
+ "abcd",
+
+ // alpha{5}
+ "efgij",
+ "AbCAD",
+ "ZAASD",
+
+ // alpha{6}
+ "efgijk",
+ "AADGFE",
+ "AkDfFz",
+
+ // alpha{7}
+ "asdfads",
+ "ADSFADF",
+ "piSFkDk",
+
+ // alpha{8}
+ "oieradfz",
+ "IADSFJKR",
+ "kkDSFJkR",
+ };
+ for (const char* lang : wellFormedLanguages) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setLanguage(lang);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setLanguage(\"%s\") got Error: %s\n",
+ lang, u_errorName(status));
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetLanguageIllFormed() {
+ static const char* illFormed[] = {
+ "a",
+ "z",
+ "A",
+ "F",
+ "2",
+ "0",
+ "9"
+ "{",
+ ".",
+ "[",
+ "]",
+ "\\",
+
+ "e1",
+ "N2",
+ "3N",
+ "4e",
+ "e:",
+ "43",
+ "a9",
+
+ "aN0",
+ "z1z",
+ "2zz",
+ "3A3",
+ "456",
+ "af)",
+
+ // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
+ // "latn",
+ // "Arab",
+ // "LATN",
+
+ "e)gij",
+ "Ab3AD",
+ "ZAAS8",
+
+ "efgi[]",
+ "AA9GFE",
+ "7kD3Fz",
+ "as8fads",
+ "0DSFADF",
+ "'iSFkDk",
+
+ "oieradf+",
+ "IADSFJK-",
+ "kkDSFJk0",
+
+ // alpha{9}
+ "oieradfab",
+ "IADSFJKDE",
+ "kkDSFJkzf",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setLanguage(ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetScriptWellFormed() {
+ // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
+ // unicode_script_subtag = alpha{4} ;
+ static const char* wellFormedScripts[] = {
+ "",
+
+ "Latn",
+ "latn",
+ "lATN",
+ "laTN",
+ "arBN",
+ "ARbn",
+ "adsf",
+ "aADF",
+ "BSVS",
+ "LATn",
+ };
+ for (const char* script : wellFormedScripts) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setScript(script);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setScript(\"%s\") got Error: %s\n",
+ script, u_errorName(status));
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetScriptIllFormed() {
+ static const char* illFormed[] = {
+ "a",
+ "z",
+ "A",
+ "F",
+ "2",
+ "0",
+ "9"
+ "{",
+ ".",
+ "[",
+ "]",
+ "\\",
+
+ "e1",
+ "N2",
+ "3N",
+ "4e",
+ "e:",
+ "43",
+ "a9",
+
+ "aN0",
+ "z1z",
+ "2zz",
+ "3A3",
+ "456",
+ "af)",
+
+ "0atn",
+ "l1tn",
+ "lA2N",
+ "la4N",
+ "arB5",
+ "1234",
+
+ "e)gij",
+ "Ab3AD",
+ "ZAAS8",
+
+ "efgi[]",
+ "AA9GFE",
+ "7kD3Fz",
+
+ "as8fads",
+ "0DSFADF",
+ "'iSFkDk",
+
+ "oieradf+",
+ "IADSFJK-",
+ "kkDSFJk0",
+
+ // alpha{9}
+ "oieradfab",
+ "IADSFJKDE",
+ "kkDSFJkzf",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setScript(ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setScript(\"%s\") should fail but has no Error\n", ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetRegionWellFormed() {
+ // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
+ // unicode_region_subtag = (alpha{2} | digit{3})
+ static const char* wellFormedRegions[] = {
+ "",
+
+ // alpha{2}
+ "en",
+ "NE",
+ "eN",
+ "Ne",
+
+ // digit{3}
+ "000",
+ "999",
+ "123",
+ "987"
+ };
+ for (const char* region : wellFormedRegions) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setRegion(region);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setRegion(\"%s\") got Error: %s\n",
+ region, u_errorName(status));
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetRegionIllFormed() {
+ static const char* illFormed[] = {
+ "a",
+ "z",
+ "A",
+ "F",
+ "2",
+ "0",
+ "9"
+ "{",
+ ".",
+ "[",
+ "]",
+ "\\",
+
+ "e1",
+ "N2",
+ "3N",
+ "4e",
+ "e:",
+ "43",
+ "a9",
+
+ "aN0",
+ "z1z",
+ "2zz",
+ "3A3",
+ "4.6",
+ "af)",
+
+ "0atn",
+ "l1tn",
+ "lA2N",
+ "la4N",
+ "arB5",
+ "1234",
+
+ "e)gij",
+ "Ab3AD",
+ "ZAAS8",
+
+ "efgi[]",
+ "AA9GFE",
+ "7kD3Fz",
+
+ "as8fads",
+ "0DSFADF",
+ "'iSFkDk",
+
+ "oieradf+",
+ "IADSFJK-",
+ "kkDSFJk0",
+
+ // alpha{9}
+ "oieradfab",
+ "IADSFJKDE",
+ "kkDSFJkzf",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setRegion(ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setRegion(\"%s\") should fail but has no Error\n", ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetVariantWellFormed() {
+ // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
+ // (sep unicode_variant_subtag)*
+ // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
+ static const char* wellFormedVariants[] = {
+ "",
+
+ // alphanum{5}
+ "efgij",
+ "AbCAD",
+ "ZAASD",
+ "0AASD",
+ "A1CAD",
+ "ef2ij",
+ "ads3X",
+ "owqF4",
+
+ // alphanum{6}
+ "efgijk",
+ "AADGFE",
+ "AkDfFz",
+ "0ADGFE",
+ "A9DfFz",
+ "AADG7E",
+
+ // alphanum{7}
+ "asdfads",
+ "ADSFADF",
+ "piSFkDk",
+ "a0dfads",
+ "ADSF3DF",
+ "piSFkD9",
+
+ // alphanum{8}
+ "oieradfz",
+ "IADSFJKR",
+ "kkDSFJkR",
+ "0ADSFJKR",
+ "12345679",
+
+ // digit alphanum{3}
+ "0123",
+ "1abc",
+ "20EF",
+ "30EF",
+ "8A03",
+ "3Ax3",
+ "9Axy",
+
+ // (sep unicode_variant_subtag)*
+ "0123-4567",
+ "0ab3-ABCDE",
+ "9ax3-xByD9",
+ "9ax3-xByD9-adfk934a",
+
+ "0123_4567",
+ "0ab3_ABCDE",
+ "9ax3_xByD9",
+ "9ax3_xByD9_adfk934a",
+
+ "9ax3-xByD9_adfk934a",
+ "9ax3_xByD9-adfk934a",
+ };
+ for (const char* variant : wellFormedVariants) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setVariant(variant);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setVariant(\"%s\") got Error: %s\n",
+ variant, u_errorName(status));
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetVariantIllFormed() {
+ static const char* illFormed[] = {
+ "a",
+ "z",
+ "A",
+ "F",
+ "2",
+ "0",
+ "9"
+ "{",
+ ".",
+ "[",
+ "]",
+ "\\",
+
+ "e1",
+ "N2",
+ "3N",
+ "4e",
+ "e:",
+ "43",
+ "a9",
+ "en",
+ "NE",
+ "eN",
+ "Ne",
+
+ "aNe",
+ "zzz",
+ "AAA",
+ "aN0",
+ "z1z",
+ "2zz",
+ "3A3",
+ "4.6",
+ "af)",
+ "345",
+ "923",
+
+ "Latn",
+ "latn",
+ "lATN",
+ "laTN",
+ "arBN",
+ "ARbn",
+ "adsf",
+ "aADF",
+ "BSVS",
+ "LATn",
+ "l1tn",
+ "lA2N",
+ "la4N",
+ "arB5",
+ "abc3",
+ "A3BC",
+
+ "e)gij",
+ "A+3AD",
+ "ZAA=8",
+
+ "efgi[]",
+ "AA9]FE",
+ "7k[3Fz",
+
+ "as8f/ds",
+ "0DSFAD{",
+ "'iSFkDk",
+
+ "oieradf+",
+ "IADSFJK-",
+ "k}DSFJk0",
+
+ // alpha{9}
+ "oieradfab",
+ "IADSFJKDE",
+ "kkDSFJkzf",
+ "123456789",
+
+ "-0123",
+ "-0123-4567",
+ "0123-4567-",
+ "-123-4567",
+ "_0123",
+ "_0123_4567",
+ "0123_4567_",
+ "_123_4567",
+
+ "-abcde-figjk",
+ "abcde-figjk-",
+ "-abcde-figjk-",
+ "_abcde_figjk",
+ "abcde_figjk_",
+ "_abcde_figjk_",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setVariant(ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setVariant(\"%s\") should fail but has no Error\n", ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
+ // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
+ // keyword = key (sep type)? ;
+ // key = alphanum alpha ;
+ // type = alphanum{3,8} (sep alphanum{3,8})* ;
+ static const char* wellFormed_key_value[] = {
+ "aa", "123",
+ "3b", "zyzbcdef",
+ "0Z", "1ZB30zk9-abc",
+ "cZ", "2ck30zfZ-adsf023-234kcZ",
+ "ZZ", "Lant",
+ "ko", "",
+ };
+ for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
+ wellFormed_key_value[i + 1]);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
+ wellFormed_key_value[i],
+ wellFormed_key_value[i + 1],
+ u_errorName(status));
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
+ static const char* illFormed[] = {
+ "34",
+ "ab-cde",
+ "123",
+ "b3",
+ "zyzabcdef",
+ "Z0",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setUnicodeLocaleKeyword(ill, "abc");
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
+ ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
+ static const char* illFormed[] = {
+ "34",
+ "ab-",
+ "-cd",
+ "-ef-",
+ "zyzabcdef",
+ "ab-abc",
+ "1ZB30zfk9-abc",
+ "2ck30zfk9-adsf023-234kcZ",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setUnicodeLocaleKeyword("ab", ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
+ ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
+ LocaleBuilder bld;
+ UErrorCode status = U_ZERO_ERROR;
+ Locale loc = bld.setLanguage("fr")
+ .addUnicodeLocaleAttribute("abc")
+ .addUnicodeLocaleAttribute("aBc")
+ .addUnicodeLocaleAttribute("EFG")
+ .addUnicodeLocaleAttribute("efghi")
+ .addUnicodeLocaleAttribute("efgh")
+ .addUnicodeLocaleAttribute("efGhi")
+ .addUnicodeLocaleAttribute("EFg")
+ .addUnicodeLocaleAttribute("hijk")
+ .addUnicodeLocaleAttribute("EFG")
+ .addUnicodeLocaleAttribute("HiJK")
+ .addUnicodeLocaleAttribute("aBc")
+ .build(status);
+ if (U_FAILURE(status)) {
+ errln("addUnicodeLocaleAttribute() got Error: %s\n",
+ u_errorName(status));
+ }
+ std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
+ std::string actual = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || expected != actual) {
+ errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+ }
+
+ // remove "efgh" in the middle with different casing.
+ loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+ u_errorName(status));
+ }
+ expected = "fr-u-abc-efg-efghi-hijk";
+ actual = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || expected != actual) {
+ errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+ }
+
+ // remove non-existing attributes.
+ loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+ u_errorName(status));
+ }
+ actual = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || expected != actual) {
+ errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+ }
+
+ // remove "abc" in the beginning with different casing.
+ loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+ u_errorName(status));
+ }
+ expected = "fr-u-efg-efghi-hijk";
+ actual = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || expected != actual) {
+ errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+ }
+
+ // remove non-existing substring in the end.
+ loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+ u_errorName(status));
+ }
+ actual = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || expected != actual) {
+ errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+ }
+
+ // remove "hijk" in the end with different casing.
+ loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+ u_errorName(status));
+ }
+ expected = "fr-u-efg-efghi";
+ actual = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || expected != actual) {
+ errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+ }
+
+ // remove "efghi" in the end with different casing.
+ loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+ u_errorName(status));
+ }
+ expected = "fr-u-efg";
+ actual = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || expected != actual) {
+ errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+ }
+
+ // remove "efg" in as the only one, with different casing.
+ loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute() got Error: %s\n",
+ u_errorName(status));
+ }
+ expected = "fr";
+ actual = loc.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status) || expected != actual) {
+ errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
+ }
+
+}
+
+void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
+ // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
+ // attribute = alphanum{3,8} ;
+ static const char* wellFormedAttributes[] = {
+ // alphanum{3}
+ "AbC",
+ "ZAA",
+ "0AA",
+ "x3A",
+ "xa8",
+
+ // alphanum{4}
+ "AbCA",
+ "ZASD",
+ "0ASD",
+ "A3a4",
+ "zK90",
+
+ // alphanum{5}
+ "efgij",
+ "AbCAD",
+ "ZAASD",
+ "0AASD",
+ "A1CAD",
+ "ef2ij",
+ "ads3X",
+ "owqF4",
+
+ // alphanum{6}
+ "efgijk",
+ "AADGFE",
+ "AkDfFz",
+ "0ADGFE",
+ "A9DfFz",
+ "AADG7E",
+
+ // alphanum{7}
+ "asdfads",
+ "ADSFADF",
+ "piSFkDk",
+ "a0dfads",
+ "ADSF3DF",
+ "piSFkD9",
+
+ // alphanum{8}
+ "oieradfz",
+ "IADSFJKR",
+ "kkDSFJkR",
+ };
+ LocaleBuilder bld;
+ for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
+ if (i % 5 == 0) {
+ bld.clear();
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
+ wellFormedAttributes[i], u_errorName(status));
+ }
+ if (i > 2) {
+ bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
+ loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
+ wellFormedAttributes[i - 1], u_errorName(status));
+ }
+ bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
+ loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
+ wellFormedAttributes[i - 3], u_errorName(status));
+ }
+ }
+ }
+}
+
+void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
+ static const char* illFormed[] = {
+ "aa",
+ "34",
+ "ab-",
+ "-cd",
+ "-ef-",
+ "zyzabcdef",
+ "123456789",
+ "ab-abc",
+ "1ZB30zfk9-abc",
+ "2ck30zfk9-adsf023-234kcZ",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.addUnicodeLocaleAttribute(ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
+ ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetExtensionU() {
+ LocaleBuilder bld;
+ bld.setLanguage("zh");
+ Verify(bld, "zh",
+ "setLanguage(\"zh\") got Error: %s\n");
+
+ bld.setExtension('u', "co-stroke");
+ Verify(bld, "zh-u-co-stroke",
+ "setExtension('u', \"co-stroke\") got Error: %s\n");
+
+ bld.setExtension('U', "ca-islamic");
+ Verify(bld, "zh-u-ca-islamic",
+ "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
+
+ bld.setExtension('u', "ca-chinese");
+ Verify(bld, "zh-u-ca-chinese",
+ "setExtension('u', \"ca-chinese\") got Error: %s\n");
+
+ bld.setExtension('U', "co-pinyin");
+ Verify(bld, "zh-u-co-pinyin",
+ "setExtension('U', \"co-pinyin\") got Error: %s\n");
+
+ bld.setRegion("TW");
+ Verify(bld, "zh-TW-u-co-pinyin",
+ "setRegion(\"TW\") got Error: %s\n");
+
+ bld.setExtension('U', "");
+ Verify(bld, "zh-TW",
+ "setExtension('U', \"\") got Error: %s\n");
+
+ bld.setExtension('u', "abc-defg-kr-face");
+ Verify(bld, "zh-TW-u-abc-defg-kr-face",
+ "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
+
+ bld.setExtension('U', "ca-japanese");
+ Verify(bld, "zh-TW-u-ca-japanese",
+ "setExtension('U', \"ca-japanese\") got Error: %s\n");
+
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
+ static const char* wellFormedExtensions[] = {
+ // keyword
+ // keyword = key (sep type)? ;
+ // key = alphanum alpha ;
+ // type = alphanum{3,8} (sep alphanum{3,8})* ;
+ "3A",
+ "ZA",
+ "az-abc",
+ "zz-123",
+ "7z-12345678",
+ "kb-A234567Z",
+ // (sep keyword)+
+ "1z-ZZ",
+ "2z-ZZ-123",
+ "3z-ZZ-123-cd",
+ "0z-ZZ-123-cd-efghijkl",
+ // attribute
+ "abc",
+ "456",
+ "87654321",
+ "ZABADFSD",
+ // (sep attribute)+
+ "abc-ZABADFSD",
+ "123-ZABADFSD",
+ "K2K-12345678",
+ "K2K-12345678-zzz",
+ // (sep attribute)+ (sep keyword)*
+ "K2K-12345678-zz",
+ "K2K-12345678-zz-0z",
+ "K2K-12345678-9z-AZ-abc",
+ "K2K-12345678-zz-9A-234",
+ "K2K-12345678-zk0-abc-efg-zz-9k-234",
+ };
+ for (const char* extension : wellFormedExtensions) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension('u', extension);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setExtension('u', \"%s\") got Error: %s\n",
+ extension, u_errorName(status));
+ }
+ };
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
+ static const char* illFormed[] = {
+ // bad key
+ "-",
+ "-ab",
+ "ab-",
+ "abc-",
+ "-abc",
+ "0",
+ "a",
+ "A0",
+ "z9",
+ "09",
+ "90",
+ // bad keyword
+ "AB-A0",
+ "AB-efg-A0",
+ "xy-123456789",
+ "AB-Aa-",
+ "AB-Aac-",
+ // bad attribute
+ "abcdefghi",
+ "abcdefgh-",
+ "abcdefgh-abcdefghi",
+ "abcdefgh-1",
+ "abcdefgh-a",
+ "abcdefgh-a2345678z",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension('u', ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setExtension('u', \"%s\") should fail but has no Error\n",
+ ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetExtensionT() {
+ LocaleBuilder bld;
+ bld.setLanguage("fr");
+ Verify(bld, "fr",
+ "setLanguage(\"fr\") got Error: %s\n");
+
+ bld.setExtension('T', "zh");
+ Verify(bld, "fr-t-zh",
+ "setExtension('T', \"zh\") got Error: %s\n");
+
+ bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
+ Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
+ "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
+
+ bld.setExtension('T', "a9-123");
+ Verify(bld, "fr-t-a9-123",
+ "setExtension('T', \"a9-123\") got Error: %s\n");
+
+ bld.setRegion("MX");
+ Verify(bld, "fr-MX-t-a9-123",
+ "setRegion(\"MX\") got Error: %s\n");
+
+ bld.setScript("Hans");
+ Verify(bld, "fr-Hans-MX-t-a9-123",
+ "setScript(\"Hans\") got Error: %s\n");
+
+ bld.setVariant("9abc-abcde");
+ Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
+ "setVariant(\"9abc-abcde\") got Error: %s\n");
+
+ bld.setExtension('T', "");
+ Verify(bld, "fr-Hans-MX-9abc-abcde",
+ "bld.setExtension('T', \"\") got Error: %s\n");
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
+ // ((sep tlang (sep tfield)*) | (sep tfield)+)
+ static const char* wellFormedExtensions[] = {
+ // tlang
+ // tlang = unicode_language_subtag (sep unicode_script_subtag)?
+ // (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
+ // unicode_language_subtag
+ "en",
+ "abc",
+ "abcde",
+ "ABCDEFGH",
+ // unicode_language_subtag sep unicode_script_subtag
+ "en-latn",
+ "abc-arab",
+ "ABCDEFGH-Thai",
+ // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
+ "en-latn-ME",
+ "abc-arab-RU",
+ "ABCDEFGH-Thai-TH",
+ "en-latn-409",
+ "abc-arab-123",
+ "ABCDEFGH-Thai-456",
+ // unicode_language_subtag sep unicode_region_subtag
+ "en-ME",
+ "abc-RU",
+ "ABCDEFGH-TH",
+ "en-409",
+ "abc-123",
+ "ABCDEFGH-456",
+ // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
+ // sep (sep unicode_variant_subtag)*
+ "en-latn-ME-abcde",
+ "abc-arab-RU-3abc-abcdef",
+ "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
+ "en-latn-409-xafsa",
+ "abc-arab-123-ADASDF",
+ "ABCDEFGH-Thai-456-9sdf-ADASFAS",
+ // (sep tfield)+
+ "A0-abcde",
+ "z9-abcde123",
+ "z9-abcde123-a1-abcde",
+ // tlang (sep tfield)*
+ "fr-A0-abcde",
+ "fr-FR-A0-abcde",
+ "fr-123-z9-abcde123-a1-abcde",
+ "fr-Latn-FR-z9-abcde123-a1-abcde",
+ "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
+ "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
+ };
+ for (const char* extension : wellFormedExtensions) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension('t', extension);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setExtension('t', \"%s\") got Error: %s\n",
+ extension, u_errorName(status));
+ }
+ };
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
+ static const char* illFormed[] = {
+ "a",
+ "a-",
+ "0",
+ "9-",
+ "-9",
+ "-z",
+ // "Latn", // Per 2019-01-23 ICUTC, still accept 4alpha. See ICU-20321
+ "Latn-",
+ "en-",
+ "nob-",
+ "-z9",
+ "a3",
+ "a3-",
+ "3a",
+ "0z-",
+ "en-123-a1",
+ "en-TH-a1",
+ "gab-TH-a1",
+ "gab-Thai-a1",
+ "gab-Thai-TH-a1",
+ "gab-Thai-TH-0bde-a1",
+ "gab-Thai-TH-0bde-3b",
+ "gab-Thai-TH-0bde-z9-a1",
+ "gab-Thai-TH-0bde-z9-3b",
+ "gab-Thai-TH-0bde-z9-abcde123-3b",
+ "gab-Thai-TH-0bde-z9-abcde123-ab",
+ "gab-Thai-TH-0bde-z9-abcde123-ab",
+ "gab-Thai-TH-0bde-z9-abcde123-a1",
+ "gab-Thai-TH-0bde-z9-abcde123-a1-",
+ "gab-Thai-TH-0bde-z9-abcde123-a1-a",
+ "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension('t', ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setExtension('t', \"%s\") should fail but has no Error\n",
+ ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetExtensionPU() {
+ LocaleBuilder bld;
+ bld.setLanguage("ar");
+ Verify(bld, "ar",
+ "setLanguage(\"ar\") got Error: %s\n");
+
+ bld.setExtension('X', "a-b-c-d-e");
+ Verify(bld, "ar-x-a-b-c-d-e",
+ "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
+
+ bld.setExtension('x', "0-1-2-3");
+ Verify(bld, "ar-x-0-1-2-3",
+ "setExtension('x', \"0-1-2-3\") got Error: %s\n");
+
+ bld.setExtension('X', "0-12345678-x-x");
+ Verify(bld, "ar-x-0-12345678-x-x",
+ "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
+
+ bld.setRegion("TH");
+ Verify(bld, "ar-TH-x-0-12345678-x-x",
+ "setRegion(\"TH\") got Error: %s\n");
+
+ bld.setExtension('X', "");
+ Verify(bld, "ar-TH",
+ "setExtension(\"X\") got Error: %s\n");
+}
+
+void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
+ // ((sep tlang (sep tfield)*) | (sep tfield)+)
+ static const char* wellFormedExtensions[] = {
+ "a", // Short subtag
+ "z", // Short subtag
+ "0", // Short subtag, digit
+ "9", // Short subtag, digit
+ "a-0", // Two short subtag, alpha and digit
+ "9-z", // Two short subtag, digit and alpha
+ "ab",
+ "abc",
+ "abcefghi", // Long subtag
+ "87654321",
+ "01",
+ "234",
+ "0a-ab-87654321", // Three subtags
+ "87654321-ab-00-3A", // Four subtabs
+ "a-9-87654321", // Three subtags with short and long subtags
+ "87654321-ab-0-3A",
+ };
+ for (const char* extension : wellFormedExtensions) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension('x', extension);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setExtension('x', \"%s\") got Error: %s\n",
+ extension, u_errorName(status));
+ }
+ };
+}
+
+void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
+ static const char* illFormed[] = {
+ "123456789", // Too long
+ "abcdefghi", // Too long
+ "ab-123456789", // Second subtag too long
+ "abcdefghi-12", // First subtag too long
+ "a-ab-987654321", // Third subtag too long
+ "987654321-a-0-3", // First subtag too long
+ };
+ for (const char* ill : illFormed) {
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension('x', ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setExtension('x', \"%s\") should fail but has no Error\n",
+ ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetExtensionOthers() {
+ LocaleBuilder bld;
+ bld.setLanguage("fr");
+ Verify(bld, "fr",
+ "setLanguage(\"fr\") got Error: %s\n");
+
+ bld.setExtension('Z', "ab");
+ Verify(bld, "fr-z-ab",
+ "setExtension('Z', \"ab\") got Error: %s\n");
+
+ bld.setExtension('0', "xyz12345-abcdefg");
+ Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
+ "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
+
+ bld.setExtension('a', "01-12345678-ABcdef");
+ Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
+ "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
+
+ bld.setRegion("TH");
+ Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
+ "setRegion(\"TH\") got Error: %s\n");
+
+ bld.setScript("Arab");
+ Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
+ "setRegion(\"Arab\") got Error: %s\n");
+
+ bld.setExtension('A', "97");
+ Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
+ "setExtension('a', \"97\") got Error: %s\n");
+
+ bld.setExtension('a', "");
+ Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
+ "setExtension('a', \"\") got Error: %s\n");
+
+ bld.setExtension('0', "");
+ Verify(bld, "fr-Arab-TH-z-ab",
+ "setExtension('0', \"\") got Error: %s\n");
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
+ static const char* wellFormedExtensions[] = {
+ "ab",
+ "abc",
+ "abcefghi",
+ "01",
+ "234",
+ "87654321",
+ "0a-ab-87654321",
+ "87654321-ab-00-3A",
+ };
+
+ const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
+ const int32_t aToZLen = uprv_strlen(aToZ);
+ int32_t i = 0;
+ for (const char* extension : wellFormedExtensions) {
+ char ch = aToZ[i];
+ i = (i + 1) % aToZLen;
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension(ch, extension);
+ Locale loc = bld.build(status);
+ if (U_FAILURE(status)) {
+ errln("setExtension('%c', \"%s\") got Error: %s\n",
+ ch, extension, u_errorName(status));
+ }
+ };
+
+ const char* someChars =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
+ const int32_t someCharsLen = uprv_strlen(someChars);
+ for (int32_t i = 0; i < someCharsLen; i++) {
+ char ch = someChars[i];
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
+ Locale loc = bld.build(status);
+ if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
+ if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
+ if (U_FAILURE(status)) {
+ errln("setExtension('%c', \"%s\") got Error: %s\n",
+ ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
+ }
+ }
+ } else {
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setExtension('%c', \"%s\") should fail but has no Error\n",
+ ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
+ }
+ }
+
+ }
+}
+
+void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
+ static const char* illFormed[] = {
+ "0", // Too short
+ "a", // Too short
+ "123456789", // Too long
+ "abcdefghi", // Too long
+ "ab-123456789", // Second subtag too long
+ "abcdefghi-12", // First subtag too long
+ "a-ab-87654321", // Third subtag too long
+ "87654321-a-0-3", // First subtag too long
+ };
+ const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
+ const int32_t aToZLen = uprv_strlen(aToZ);
+ int32_t i = 0;
+ for (const char* ill : illFormed) {
+ char ch = aToZ[i];
+ i = (i + 1) % aToZLen;
+ UErrorCode status = U_ZERO_ERROR;
+ LocaleBuilder bld;
+ bld.setExtension(ch, ill);
+ Locale loc = bld.build(status);
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) {
+ errln("setExtension('%c', \"%s\") should fail but has no Error\n",
+ ch, ill);
+ }
+ }
+}
+
+void LocaleBuilderTest::TestSetLocale() {
+ LocaleBuilder bld1, bld2;
+ UErrorCode status = U_ZERO_ERROR;
+ Locale l1 = bld1.setLanguage("en")
+ .setScript("Latn")
+ .setRegion("MX")
+ .setVariant("3456-abcde")
+ .addUnicodeLocaleAttribute("456")
+ .addUnicodeLocaleAttribute("123")
+ .setUnicodeLocaleKeyword("nu", "thai")
+ .setUnicodeLocaleKeyword("co", "stroke")
+ .setUnicodeLocaleKeyword("ca", "chinese")
+ .build(status);
+ if (U_FAILURE(status) || l1.isBogus()) {
+ errln("build got Error: %s\n", u_errorName(status));
+ }
+ status = U_ZERO_ERROR;
+ Locale l2 = bld1.setLocale(l1).build(status);
+ if (U_FAILURE(status) || l2.isBogus()) {
+ errln("build got Error: %s\n", u_errorName(status));
+ }
+
+ if (l1 != l2) {
+ errln("Two locales should be the same, but one is '%s' and the other is '%s'",
+ l1.getName(), l2.getName());
+ }
+}
+
+void LocaleBuilderTest::TestPosixCases() {
+ UErrorCode status = U_ZERO_ERROR;
+ Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
+ if (U_FAILURE(status) || l1.isBogus()) {
+ errln("build got Error: %s\n", u_errorName(status));
+ }
+ LocaleBuilder bld;
+ bld.setLanguage("en")
+ .setRegion("MX")
+ .setScript("Arab")
+ .setUnicodeLocaleKeyword("nu", "Thai")
+ .setExtension('x', "1");
+ // All of above should be cleared by the setLocale call.
+ Locale l2 = bld.setLocale(l1).build(status);
+ if (U_FAILURE(status) || l2.isBogus()) {
+ errln("build got Error: %s\n", u_errorName(status));
+ }
+ if (l1 != l2) {
+ errln("The result locale should be the set as the setLocale %s but got %s\n",
+ l1.toLanguageTag<std::string>(status).c_str(),
+ l2.toLanguageTag<std::string>(status).c_str());
+ }
+ Locale posix("en-US-POSIX");
+ if (posix != l2) {
+ errln("The result locale should be the set as %s but got %s\n",
+ posix.getName(), l2.getName());
+ }
+}
diff --git a/icu4c/source/test/intltest/localebuildertest.h b/icu4c/source/test/intltest/localebuildertest.h
new file mode 100644
index 0000000..41f3730
--- /dev/null
+++ b/icu4c/source/test/intltest/localebuildertest.h
@@ -0,0 +1,51 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "intltest.h"
+#include "unicode/localebuilder.h"
+
+
+/**
+ * Tests for the LocaleBuilder class
+ **/
+class LocaleBuilderTest: public IntlTest {
+ public:
+ LocaleBuilderTest();
+ virtual ~LocaleBuilderTest();
+
+ void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL );
+
+ void TestAddRemoveUnicodeLocaleAttribute(void);
+ void TestAddRemoveUnicodeLocaleAttributeWellFormed(void);
+ void TestAddUnicodeLocaleAttributeIllFormed(void);
+ void TestLocaleBuilder(void);
+ void TestLocaleBuilderBasic(void);
+ void TestPosixCases(void);
+ void TestSetExtensionOthers(void);
+ void TestSetExtensionPU(void);
+ void TestSetExtensionT(void);
+ void TestSetExtensionU(void);
+ void TestSetExtensionValidateOthersIllFormed(void);
+ void TestSetExtensionValidateOthersWellFormed(void);
+ void TestSetExtensionValidatePUIllFormed(void);
+ void TestSetExtensionValidatePUWellFormed(void);
+ void TestSetExtensionValidateTIllFormed(void);
+ void TestSetExtensionValidateTWellFormed(void);
+ void TestSetExtensionValidateUIllFormed(void);
+ void TestSetExtensionValidateUWellFormed(void);
+ void TestSetLanguageIllFormed(void);
+ void TestSetLanguageWellFormed(void);
+ void TestSetLocale(void);
+ void TestSetRegionIllFormed(void);
+ void TestSetRegionWellFormed(void);
+ void TestSetScriptIllFormed(void);
+ void TestSetScriptWellFormed(void);
+ void TestSetUnicodeLocaleKeywordIllFormedKey(void);
+ void TestSetUnicodeLocaleKeywordIllFormedValue(void);
+ void TestSetUnicodeLocaleKeywordWellFormed(void);
+ void TestSetVariantIllFormed(void);
+ void TestSetVariantWellFormed(void);
+
+ private:
+ void Verify(LocaleBuilder& bld, const char* expected, const char* msg);
+};