blob: 4d5c1f8db056ec72874e5d230cdab882c64f87fb [file] [log] [blame]
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 2009-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#include "unicode/bytestream.h"
#include "unicode/utypes.h"
#include "unicode/ures.h"
#include "unicode/localpointer.h"
#include "unicode/putil.h"
#include "unicode/uenum.h"
#include "unicode/uloc.h"
#include "ustr_imp.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "uinvchar.h"
#include "ulocimp.h"
#include "uassert.h"
/* struct holding a single variant */
typedef struct VariantListEntry {
const char *variant;
struct VariantListEntry *next;
} VariantListEntry;
/* struct holding a single attribute value */
struct AttributeListEntry : public icu::UMemory {
const char *attribute;
struct AttributeListEntry *next;
};
/* struct holding a single extension */
struct ExtensionListEntry : public icu::UMemory {
const char *key;
const char *value;
struct ExtensionListEntry *next;
};
#define MAXEXTLANG 3
typedef struct ULanguageTag {
char *buf; /* holding parsed subtags */
const char *language;
const char *extlang[MAXEXTLANG];
const char *script;
const char *region;
VariantListEntry *variants;
ExtensionListEntry *extensions;
const char *privateuse;
const char *legacy;
} ULanguageTag;
#define MINLEN 2
#define SEP '-'
#define PRIVATEUSE 'x'
#define LDMLEXT 'u'
#define LOCALE_SEP '_'
#define LOCALE_EXT_SEP '@'
#define LOCALE_KEYWORD_SEP ';'
#define LOCALE_KEY_TYPE_SEP '='
#define ISALPHA(c) uprv_isASCIILetter(c)
#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
static const char EMPTY[] = "";
static const char LANG_UND[] = "und";
static const char PRIVATEUSE_KEY[] = "x";
static const char _POSIX[] = "_POSIX";
static const char POSIX_KEY[] = "va";
static const char POSIX_VALUE[] = "posix";
static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
static const char LOCALE_TYPE_YES[] = "yes";
#define LANG_UND_LEN 3
/*
Updated on 2018-09-12 from
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
This table has 2 parts. The part for
legacy language tags (marked as “Type: grandfathered” in BCP 47)
is generated by the following scripts from the IANA language tag registry.
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
egrep -A 7 'Type: grandfathered' | \
egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
tr 'A-Z' 'a-z'
The 2nd part is made of five ICU-specific entries. They're kept for
the backward compatibility for now, even though there are no preferred
values. They may have to be removed for the strict BCP 47 compliance.
*/
static const char* const LEGACY[] = {
/* legacy preferred */
"art-lojban", "jbo",
"en-gb-oed", "en-gb-oxendict",
"i-ami", "ami",
"i-bnn", "bnn",
"i-hak", "hak",
"i-klingon", "tlh",
"i-lux", "lb",
"i-navajo", "nv",
"i-pwn", "pwn",
"i-tao", "tao",
"i-tay", "tay",
"i-tsu", "tsu",
"no-bok", "nb",
"no-nyn", "nn",
"sgn-be-fr", "sfb",
"sgn-be-nl", "vgt",
"sgn-ch-de", "sgg",
"zh-guoyu", "cmn",
"zh-hakka", "hak",
"zh-min-nan", "nan",
"zh-xiang", "hsn",
// Legacy tags with no preferred value in the IANA
// registry. Kept for now for the backward compatibility
// because ICU has mapped them this way.
"i-default", "en-x-i-default",
"i-enochian", "und-x-i-enochian",
"i-mingo", "see-x-i-mingo",
"zh-min", "nan-x-zh-min",
};
/*
Updated on 2018-09-12 from
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
The table lists redundant tags with preferred value in the IANA language tag registry.
It's generated with the following command:
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
tr 'A-Z' 'a-z'
In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
*/
static const char* const REDUNDANT[] = {
// redundant preferred
"sgn-br", "bzs",
"sgn-co", "csn",
"sgn-de", "gsg",
"sgn-dk", "dsl",
"sgn-es", "ssp",
"sgn-fr", "fsl",
"sgn-gb", "bfi",
"sgn-gr", "gss",
"sgn-ie", "isg",
"sgn-it", "ise",
"sgn-jp", "jsl",
"sgn-mx", "mfs",
"sgn-ni", "ncs",
"sgn-nl", "dse",
"sgn-no", "nsl",
"sgn-pt", "psr",
"sgn-se", "swl",
"sgn-us", "ase",
"sgn-za", "sfs",
"zh-cmn", "cmn",
"zh-cmn-hans", "cmn-hans",
"zh-cmn-hant", "cmn-hant",
"zh-gan", "gan",
"zh-wuu", "wuu",
"zh-yue", "yue",
// variant tag with preferred value
"ja-latn-hepburn-heploc", "ja-latn-alalc97",
};
/*
Updated on 2018-09-12 from
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
grep -B1 'Preferred' | grep -v '^--' | \
awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
Make sure that 2-letter language subtags come before 3-letter subtags.
*/
static const char DEPRECATEDLANGS[][4] = {
/* deprecated new */
"in", "id",
"iw", "he",
"ji", "yi",
"jw", "jv",
"mo", "ro",
"aam", "aas",
"adp", "dz",
"aue", "ktz",
"ayx", "nun",
"bgm", "bcg",
"bjd", "drl",
"ccq", "rki",
"cjr", "mom",
"cka", "cmr",
"cmk", "xch",
"coy", "pij",
"cqu", "quh",
"drh", "khk",
"drw", "prs",
"gav", "dev",
"gfx", "vaj",
"ggn", "gvr",
"gti", "nyc",
"guv", "duz",
"hrr", "jal",
"ibi", "opa",
"ilw", "gal",
"jeg", "oyb",
"kgc", "tdf",
"kgh", "kml",
"koj", "kwv",
"krm", "bmf",
"ktr", "dtp",
"kvs", "gdj",
"kwq", "yam",
"kxe", "tvd",
"kzj", "dtp",
"kzt", "dtp",
"lii", "raq",
"lmm", "rmx",
"meg", "cir",
"mst", "mry",
"mwj", "vaj",
"myt", "mry",
"nad", "xny",
"ncp", "kdz",
"nnx", "ngv",
"nts", "pij",
"oun", "vaj",
"pcr", "adx",
"pmc", "huw",
"pmu", "phr",
"ppa", "bfy",
"ppr", "lcq",
"pry", "prt",
"puz", "pub",
"sca", "hle",
"skk", "oyb",
"tdu", "dtp",
"thc", "tpo",
"thx", "oyb",
"tie", "ras",
"tkk", "twm",
"tlw", "weo",
"tmp", "tyj",
"tne", "kak",
"tnf", "prs",
"tsf", "taj",
"uok", "ema",
"xba", "cax",
"xia", "acn",
"xkh", "waw",
"xsj", "suj",
"ybd", "rki",
"yma", "lrr",
"ymt", "mtm",
"yos", "zom",
"yuu", "yug",
};
/*
Updated on 2018-04-24 from
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
grep -B1 'Preferred' | \
awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
*/
static const char DEPRECATEDREGIONS[][3] = {
/* deprecated new */
"BU", "MM",
"DD", "DE",
"FX", "FR",
"TP", "TL",
"YD", "YE",
"ZR", "CD",
};
/*
* -------------------------------------------------
*
* These ultag_ functions may be exposed as APIs later
*
* -------------------------------------------------
*/
static ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
static void
ultag_close(ULanguageTag* langtag);
static const char*
ultag_getLanguage(const ULanguageTag* langtag);
#if 0
static const char*
ultag_getJDKLanguage(const ULanguageTag* langtag);
#endif
static const char*
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
static int32_t
ultag_getExtlangSize(const ULanguageTag* langtag);
static const char*
ultag_getScript(const ULanguageTag* langtag);
static const char*
ultag_getRegion(const ULanguageTag* langtag);
static const char*
ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
static int32_t
ultag_getVariantsSize(const ULanguageTag* langtag);
static const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
static const char*
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
static int32_t
ultag_getExtensionsSize(const ULanguageTag* langtag);
static const char*
ultag_getPrivateUse(const ULanguageTag* langtag);
#if 0
static const char*
ultag_getLegacy(const ULanguageTag* langtag);
#endif
U_NAMESPACE_BEGIN
/**
* \class LocalULanguageTagPointer
* "Smart pointer" class, closes a ULanguageTag via ultag_close().
* For most methods see the LocalPointerBase base class.
*
* @see LocalPointerBase
* @see LocalPointer
* @internal
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalULanguageTagPointer, ULanguageTag, ultag_close);
U_NAMESPACE_END
/*
* -------------------------------------------------
*
* Language subtag syntax validation functions
*
* -------------------------------------------------
*/
static UBool
_isAlphaString(const char* s, int32_t len) {
int32_t i;
for (i = 0; i < len; i++) {
if (!ISALPHA(*(s + i))) {
return false;
}
}
return true;
}
static UBool
_isNumericString(const char* s, int32_t len) {
int32_t i;
for (i = 0; i < len; i++) {
if (!ISNUMERIC(*(s + i))) {
return false;
}
}
return true;
}
static UBool
_isAlphaNumericString(const char* s, int32_t len) {
int32_t i;
for (i = 0; i < len; i++) {
if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
return false;
}
}
return true;
}
static UBool
_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len >= min && len <= max && _isAlphaNumericString(s, len)) {
return true;
}
return false;
}
U_CFUNC UBool
ultag_isLanguageSubtag(const char* s, int32_t len) {
/*
* unicode_language_subtag = alpha{2,3} | alpha{5,8};
* NOTE: Per ICUTC 2019/01/23- accepting alpha 4
* See ICU-20372
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
return true;
}
return false;
}
static UBool
_isExtlangSubtag(const char* s, int32_t len) {
/*
* extlang = 3ALPHA ; selected ISO 639 codes
* *2("-" 3ALPHA) ; permanently reserved
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len == 3 && _isAlphaString(s, len)) {
return true;
}
return false;
}
U_CFUNC UBool
ultag_isScriptSubtag(const char* s, int32_t len) {
/*
* script = 4ALPHA ; ISO 15924 code
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len == 4 && _isAlphaString(s, len)) {
return true;
}
return false;
}
U_CFUNC UBool
ultag_isRegionSubtag(const char* s, int32_t len) {
/*
* region = 2ALPHA ; ISO 3166-1 code
* / 3DIGIT ; UN M.49 code
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len == 2 && _isAlphaString(s, len)) {
return true;
}
if (len == 3 && _isNumericString(s, len)) {
return true;
}
return false;
}
static UBool
_isVariantSubtag(const char* s, int32_t len) {
/*
* variant = 5*8alphanum ; registered variants
* / (DIGIT 3alphanum)
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) {
return true;
}
if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
return true;
}
return false;
}
static UBool
_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) {
const char *p = s;
const char *pSubtag = nullptr;
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
while ((p - s) < len) {
if (*p == SEP) {
if (pSubtag == nullptr) {
return false;
}
if (!test(pSubtag, (int32_t)(p - pSubtag))) {
return false;
}
pSubtag = nullptr;
} else if (pSubtag == nullptr) {
pSubtag = p;
}
p++;
}
if (pSubtag == nullptr) {
return false;
}
return test(pSubtag, (int32_t)(p - pSubtag));
}
U_CFUNC UBool
ultag_isVariantSubtags(const char* s, int32_t len) {
return _isSepListOf(&_isVariantSubtag, s, len);
}
// This is for the ICU-specific "lvariant" handling.
static UBool
_isPrivateuseVariantSubtag(const char* s, int32_t len) {
/*
* variant = 1*8alphanum ; registered variants
* / (DIGIT 3alphanum)
*/
return _isAlphaNumericStringLimitedLength(s, len , 1, 8);
}
static UBool
_isExtensionSingleton(const char* s, int32_t len) {
/*
* extension = singleton 1*("-" (2*8alphanum))
*
* singleton = DIGIT ; 0 - 9
* / %x41-57 ; A - W
* / %x59-5A ; Y - Z
* / %x61-77 ; a - w
* / %x79-7A ; y - z
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len == 1 && (ISALPHA(*s) || ISNUMERIC(*s)) && (uprv_tolower(*s) != PRIVATEUSE)) {
return true;
}
return false;
}
static UBool
_isExtensionSubtag(const char* s, int32_t len) {
/*
* extension = singleton 1*("-" (2*8alphanum))
*/
return _isAlphaNumericStringLimitedLength(s, len, 2, 8);
}
U_CFUNC UBool
ultag_isExtensionSubtags(const char* s, int32_t len) {
return _isSepListOf(&_isExtensionSubtag, s, len);
}
static UBool
_isPrivateuseValueSubtag(const char* s, int32_t len) {
/*
* privateuse = "x" 1*("-" (1*8alphanum))
*/
return _isAlphaNumericStringLimitedLength(s, len, 1, 8);
}
U_CFUNC UBool
ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {
return _isSepListOf(&_isPrivateuseValueSubtag, s, len);
}
U_CFUNC UBool
ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {
/*
* attribute = alphanum{3,8} ;
*/
return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
}
U_CFUNC UBool
ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {
return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len);
}
U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
/*
* key = alphanum alpha ;
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len == 2 && (ISALPHA(*s) || ISNUMERIC(*s)) && ISALPHA(s[1])) {
return true;
}
return false;
}
U_CFUNC UBool
_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {
/*
* alphanum{3,8}
*/
return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
}
U_CFUNC UBool
ultag_isUnicodeLocaleType(const char*s, int32_t len) {
/*
* type = alphanum{3,8} (sep alphanum{3,8})* ;
*/
return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len);
}
static UBool
_isTKey(const char* s, int32_t len)
{
/*
* tkey = alpha digit ;
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) {
return true;
}
return false;
}
U_CAPI const char * U_EXPORT2
ultag_getTKeyStart(const char *localeID) {
const char *result = localeID;
const char *sep;
while((sep = uprv_strchr(result, SEP)) != nullptr) {
if (_isTKey(result, static_cast<int32_t>(sep - result))) {
return result;
}
result = ++sep;
}
if (_isTKey(result, -1)) {
return result;
}
return nullptr;
}
static UBool
_isTValue(const char* s, int32_t len)
{
/*
* tvalue = (sep alphanum{3,8})+ ;
*/
return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
}
static UBool
_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
{
const int32_t kStart = 0; // Start, wait for unicode_language_subtag, tkey or end
const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag,
// unicode_region_subtag, unicode_variant_subtag, tkey or end
const int32_t kGotScript = 2; // Got unicode_script_subtag, wait for unicode_region_subtag,
// unicode_variant_subtag, tkey, or end
const int32_t kGotRegion = 3; // Got unicode_region_subtag, wait for unicode_variant_subtag,
// tkey, or end.
const int32_t kGotVariant = 4; // Got unicode_variant_subtag, wait for unicode_variant_subtag
// tkey or end.
const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
switch (state) {
case kStart:
if (ultag_isLanguageSubtag(s, len) && len != 4) {
state = kGotLanguage;
return true;
}
if (_isTKey(s, len)) {
state = kGotTKey;
return true;
}
return false;
case kGotLanguage:
if (ultag_isScriptSubtag(s, len)) {
state = kGotScript;
return true;
}
U_FALLTHROUGH;
case kGotScript:
if (ultag_isRegionSubtag(s, len)) {
state = kGotRegion;
return true;
}
U_FALLTHROUGH;
case kGotRegion:
U_FALLTHROUGH;
case kGotVariant:
if (_isVariantSubtag(s, len)) {
state = kGotVariant;
return true;
}
if (_isTKey(s, len)) {
state = kGotTKey;
return true;
}
return false;
case kGotTKey:
if (_isTValue(s, len)) {
state = kGotTValue;
return true;
}
return false;
case kGotTValue:
if (_isTKey(s, len)) {
state = kGotTKey;
return true;
}
if (_isTValue(s, len)) {
return true;
}
return false;
}
return false;
}
static UBool
_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
{
const int32_t kStart = 0; // Start, wait for a key or attribute or end
const int32_t kGotKey = 1; // Got a key, wait for type or key or end
const int32_t kGotType = 2; // Got a type, wait for key or end
switch (state) {
case kStart:
if (ultag_isUnicodeLocaleKey(s, len)) {
state = kGotKey;
return true;
}
if (ultag_isUnicodeLocaleAttribute(s, len)) {
return true;
}
return false;
case kGotKey:
if (ultag_isUnicodeLocaleKey(s, len)) {
return true;
}
if (_isUnicodeLocaleTypeSubtag(s, len)) {
state = kGotType;
return true;
}
return false;
case kGotType:
if (ultag_isUnicodeLocaleKey(s, len)) {
state = kGotKey;
return true;
}
if (_isUnicodeLocaleTypeSubtag(s, len)) {
return true;
}
return false;
}
return false;
}
static UBool
_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
{
int32_t state = 0;
const char* p;
const char* start = s;
int32_t subtagLen = 0;
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
for (p = s; len > 0; p++, len--) {
if (*p == SEP) {
if (!test(state, start, subtagLen)) {
return false;
}
subtagLen = 0;
start = p + 1;
} else {
subtagLen++;
}
}
if (test(state, start, subtagLen) && state >= 0) {
return true;
}
return false;
}
U_CFUNC UBool
ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
{
return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len);
}
U_CFUNC UBool
ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {
return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len);
}
/*
* -------------------------------------------------
*
* Helper functions
*
* -------------------------------------------------
*/
static UBool
_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
UBool bAdded = true;
if (*first == nullptr) {
var->next = nullptr;
*first = var;
} else {
VariantListEntry *prev, *cur;
int32_t cmp;
/* variants order should be preserved */
prev = nullptr;
cur = *first;
while (true) {
if (cur == nullptr) {
prev->next = var;
var->next = nullptr;
break;
}
/* Checking for duplicate variant */
cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
if (cmp == 0) {
/* duplicated variant */
bAdded = false;
break;
}
prev = cur;
cur = cur->next;
}
}
return bAdded;
}
static UBool
_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
UBool bAdded = true;
if (*first == nullptr) {
attr->next = nullptr;
*first = attr;
} else {
AttributeListEntry *prev, *cur;
int32_t cmp;
/* reorder variants in alphabetical order */
prev = nullptr;
cur = *first;
while (true) {
if (cur == nullptr) {
prev->next = attr;
attr->next = nullptr;
break;
}
cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
if (cmp < 0) {
if (prev == nullptr) {
*first = attr;
} else {
prev->next = attr;
}
attr->next = cur;
break;
}
if (cmp == 0) {
/* duplicated variant */
bAdded = false;
break;
}
prev = cur;
cur = cur->next;
}
}
return bAdded;
}
static UBool
_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
UBool bAdded = true;
if (*first == nullptr) {
ext->next = nullptr;
*first = ext;
} else {
ExtensionListEntry *prev, *cur;
int32_t cmp;
/* reorder variants in alphabetical order */
prev = nullptr;
cur = *first;
while (true) {
if (cur == nullptr) {
prev->next = ext;
ext->next = nullptr;
break;
}
if (localeToBCP) {
/* special handling for locale to bcp conversion */
int32_t len, curlen;
len = (int32_t)uprv_strlen(ext->key);
curlen = (int32_t)uprv_strlen(cur->key);
if (len == 1 && curlen == 1) {
if (*(ext->key) == *(cur->key)) {
cmp = 0;
} else if (*(ext->key) == PRIVATEUSE) {
cmp = 1;
} else if (*(cur->key) == PRIVATEUSE) {
cmp = -1;
} else {
cmp = *(ext->key) - *(cur->key);
}
} else if (len == 1) {
cmp = *(ext->key) - LDMLEXT;
} else if (curlen == 1) {
cmp = LDMLEXT - *(cur->key);
} else {
cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
/* Both are u extension keys - we need special handling for 'attribute' */
if (cmp != 0) {
if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
cmp = 1;
} else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
cmp = -1;
}
}
}
} else {
cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
}
if (cmp < 0) {
if (prev == nullptr) {
*first = ext;
} else {
prev->next = ext;
}
ext->next = cur;
break;
}
if (cmp == 0) {
/* duplicated extension key */
bAdded = false;
break;
}
prev = cur;
cur = cur->next;
}
}
return bAdded;
}
static void
_initializeULanguageTag(ULanguageTag* langtag) {
int32_t i;
langtag->buf = nullptr;
langtag->language = EMPTY;
for (i = 0; i < MAXEXTLANG; i++) {
langtag->extlang[i] = nullptr;
}
langtag->script = EMPTY;
langtag->region = EMPTY;
langtag->variants = nullptr;
langtag->extensions = nullptr;
langtag->legacy = EMPTY;
langtag->privateuse = EMPTY;
}
static void
_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
char buf[ULOC_LANG_CAPACITY];
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t len, i;
if (U_FAILURE(*status)) {
return;
}
len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
len = 0;
}
/* Note: returned language code is in lower case letters */
if (len == 0) {
sink.Append(LANG_UND, LANG_UND_LEN);
} else if (!ultag_isLanguageSubtag(buf, len)) {
/* invalid language code */
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
sink.Append(LANG_UND, LANG_UND_LEN);
} else {
/* resolve deprecated */
for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
// 2-letter deprecated subtags are listede before 3-letter
// ones in DEPRECATEDLANGS[]. Get out of loop on coming
// across the 1st 3-letter subtag, if the input is a 2-letter code.
// to avoid continuing to try when there's no match.
if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
len = (int32_t)uprv_strlen(buf);
break;
}
}
sink.Append(buf, len);
}
}
static void
_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
char buf[ULOC_SCRIPT_CAPACITY];
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t len;
if (U_FAILURE(*status)) {
return;
}
len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
}
if (len > 0) {
if (!ultag_isScriptSubtag(buf, len)) {
/* invalid script code */
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
} else {
sink.Append("-", 1);
sink.Append(buf, len);
}
}
}
static void
_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
char buf[ULOC_COUNTRY_CAPACITY];
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t len;
if (U_FAILURE(*status)) {
return;
}
len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
}
if (len > 0) {
if (!ultag_isRegionSubtag(buf, len)) {
/* invalid region code */
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
} else {
sink.Append("-", 1);
/* resolve deprecated */
for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
len = (int32_t)uprv_strlen(buf);
break;
}
}
sink.Append(buf, len);
}
}
}
static void _sortVariants(VariantListEntry* first) {
for (VariantListEntry* var1 = first; var1 != nullptr; var1 = var1->next) {
for (VariantListEntry* var2 = var1->next; var2 != nullptr; var2 = var2->next) {
// Swap var1->variant and var2->variant.
if (uprv_compareInvCharsAsAscii(var1->variant, var2->variant) > 0) {
const char* temp = var1->variant;
var1->variant = var2->variant;
var2->variant = temp;
}
}
}
}
static void
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) {
char buf[ULOC_FULLNAME_CAPACITY];
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t len, i;
if (U_FAILURE(*status)) {
return;
}
len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
}
if (len > 0) {
char *p, *pVar;
UBool bNext = true;
VariantListEntry *var;
VariantListEntry *varFirst = nullptr;
pVar = nullptr;
p = buf;
while (bNext) {
if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
if (*p == 0) {
bNext = false;
} else {
*p = 0; /* terminate */
}
if (pVar == nullptr) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
/* ignore empty variant */
} else {
/* ICU uses upper case letters for variants, but
the canonical format is lowercase in BCP47 */
for (i = 0; *(pVar + i) != 0; i++) {
*(pVar + i) = uprv_tolower(*(pVar + i));
}
/* validate */
if (_isVariantSubtag(pVar, -1)) {
if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
/* emit the variant to the list */
var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
if (var == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
var->variant = pVar;
if (!_addVariantToList(&varFirst, var)) {
/* duplicated variant */
uprv_free(var);
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
} else {
/* Special handling for POSIX variant, need to remember that we had it and then */
/* treat it like an extension later. */
*hadPosix = true;
}
} else if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
} else if (_isPrivateuseValueSubtag(pVar, -1)) {
/* Handle private use subtags separately */
break;
}
}
/* reset variant starting position */
pVar = nullptr;
} else if (pVar == nullptr) {
pVar = p;
}
p++;
}
if (U_SUCCESS(*status)) {
if (varFirst != nullptr) {
int32_t varLen;
/* per UTS35, we should sort the variants */
_sortVariants(varFirst);
/* write out validated/normalized variants to the target */
var = varFirst;
while (var != nullptr) {
sink.Append("-", 1);
varLen = (int32_t)uprv_strlen(var->variant);
sink.Append(var->variant, varLen);
var = var->next;
}
}
}
/* clean up */
var = varFirst;
while (var != nullptr) {
VariantListEntry *tmpVar = var->next;
uprv_free(var);
var = tmpVar;
}
if (U_FAILURE(*status)) {
return;
}
}
}
static void
_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
int32_t attrBufLength = 0;
icu::MemoryPool<AttributeListEntry> attrPool;
icu::MemoryPool<ExtensionListEntry> extPool;
icu::MemoryPool<icu::CharString> strPool;
icu::LocalUEnumerationPointer keywordEnum(uloc_openKeywords(localeID, status));
if (U_FAILURE(*status) && !hadPosix) {
return;
}
if (keywordEnum.isValid() || hadPosix) {
/* reorder extensions */
int32_t len;
const char *key;
ExtensionListEntry *firstExt = nullptr;
ExtensionListEntry *ext;
AttributeListEntry *firstAttr = nullptr;
AttributeListEntry *attr;
icu::MemoryPool<icu::CharString> extBufPool;
const char *bcpKey=nullptr, *bcpValue=nullptr;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t keylen;
UBool isBcpUExt;
while (true) {
key = uenum_next(keywordEnum.getAlias(), nullptr, status);
if (key == nullptr) {
break;
}
icu::CharString buf;
{
icu::CharStringByteSink sink(&buf);
ulocimp_getKeywordValue(localeID, key, sink, &tmpStatus);
}
len = buf.length();
if (U_FAILURE(tmpStatus)) {
if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
/* ignore this keyword */
tmpStatus = U_ZERO_ERROR;
continue;
}
keylen = (int32_t)uprv_strlen(key);
isBcpUExt = (keylen > 1);
/* special keyword used for representing Unicode locale attributes */
if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
if (len > 0) {
int32_t i = 0;
while (true) {
attrBufLength = 0;
for (; i < len; i++) {
if (buf[i] != '-') {
if (static_cast<size_t>(attrBufLength) < sizeof(attrBuf)) {
attrBuf[attrBufLength++] = buf[i];
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
} else {
i++;
break;
}
}
if (attrBufLength > 0) {
if (static_cast<size_t>(attrBufLength) < sizeof(attrBuf)) {
attrBuf[attrBufLength] = 0;
} else {
*status = U_STRING_NOT_TERMINATED_WARNING;
}
} else if (i >= len){
break;
}
/* create AttributeListEntry */
attr = attrPool.create();
if (attr == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
icu::CharString* attrValue =
strPool.create(attrBuf, attrBufLength, *status);
if (attrValue == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
if (U_FAILURE(*status)) {
break;
}
attr->attribute = attrValue->data();
if (!_addAttributeToList(&firstAttr, attr)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
}
/* for a place holder ExtensionListEntry */
bcpKey = LOCALE_ATTRIBUTE_KEY;
bcpValue = nullptr;
}
} else if (isBcpUExt) {
bcpKey = uloc_toUnicodeLocaleKey(key);
if (bcpKey == nullptr) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
continue;
}
/* we've checked buf is null-terminated above */
bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
if (bcpValue == nullptr) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
continue;
}
if (bcpValue == buf.data()) {
/*
When uloc_toUnicodeLocaleType(key, buf) returns the
input value as is, the value is well-formed, but has
no known mapping. This implementation normalizes the
value to lower case
*/
icu::CharString* extBuf = extBufPool.create(buf, tmpStatus);
if (extBuf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
break;
}
T_CString_toLowerCase(extBuf->data());
bcpValue = extBuf->data();
}
} else {
if (*key == PRIVATEUSE) {
if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
continue;
}
} else {
if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
continue;
}
}
bcpKey = key;
icu::CharString* extBuf =
extBufPool.create(buf.data(), len, tmpStatus);
if (extBuf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
break;
}
bcpValue = extBuf->data();
}
/* create ExtensionListEntry */
ext = extPool.create();
if (ext == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
ext->key = bcpKey;
ext->value = bcpValue;
if (!_addExtensionToList(&firstExt, ext, true)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
}
/* Special handling for POSIX variant - add the keywords for POSIX */
if (hadPosix) {
/* create ExtensionListEntry for POSIX */
ext = extPool.create();
if (ext == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
ext->key = POSIX_KEY;
ext->value = POSIX_VALUE;
if (!_addExtensionToList(&firstExt, ext, true)) {
// Silently ignore errors.
}
}
if (U_SUCCESS(*status) && (firstExt != nullptr || firstAttr != nullptr)) {
UBool startLDMLExtension = false;
for (ext = firstExt; ext; ext = ext->next) {
if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
/* first LDML u singlton extension */
sink.Append("-u", 2);
startLDMLExtension = true;
}
/* write out the sorted BCP47 attributes, extensions and private use */
if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
/* write the value for the attributes */
for (attr = firstAttr; attr; attr = attr->next) {
sink.Append("-", 1);
sink.Append(
attr->attribute, static_cast<int32_t>(uprv_strlen(attr->attribute)));
}
} else {
sink.Append("-", 1);
sink.Append(ext->key, static_cast<int32_t>(uprv_strlen(ext->key)));
if (uprv_strcmp(ext->value, "true") != 0 &&
uprv_strcmp(ext->value, "yes") != 0) {
sink.Append("-", 1);
sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value)));
}
}
}
}
}
}
/**
* Append keywords parsed from LDML extension value
* e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
* Note: char* buf is used for storing keywords
*/
static void
_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, UBool *posixVariant, UErrorCode *status) {
const char *pTag; /* beginning of current subtag */
const char *pKwds; /* beginning of key-type pairs */
UBool variantExists = *posixVariant;
ExtensionListEntry *kwdFirst = nullptr; /* first LDML keyword */
ExtensionListEntry *kwd, *nextKwd;
int32_t len;
/* Reset the posixVariant value */
*posixVariant = false;
pTag = ldmlext;
pKwds = nullptr;
{
AttributeListEntry *attrFirst = nullptr; /* first attribute */
AttributeListEntry *attr, *nextAttr;
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
int32_t attrBufIdx = 0;
icu::MemoryPool<AttributeListEntry> attrPool;
/* Iterate through u extension attributes */
while (*pTag) {
/* locate next separator char */
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
if (ultag_isUnicodeLocaleKey(pTag, len)) {
pKwds = pTag;
break;
}
/* add this attribute to the list */
attr = attrPool.create();
if (attr == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
attrBuf[attrBufIdx + len] = 0;
attr->attribute = &attrBuf[attrBufIdx];
attrBufIdx += (len + 1);
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// duplicate attribute is ignored, causes no error.
_addAttributeToList(&attrFirst, attr);
/* next tag */
pTag += len;
if (*pTag) {
/* next to the separator */
pTag++;
}
}
if (attrFirst) {
/* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
kwd = extPool.create();
if (kwd == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
icu::CharString* value = kwdBuf.create();
if (value == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
/* attribute subtags sorted in alphabetical order as type */
attr = attrFirst;
while (attr != nullptr) {
nextAttr = attr->next;
if (attr != attrFirst) {
value->append('-', *status);
}
value->append(attr->attribute, *status);
attr = nextAttr;
}
if (U_FAILURE(*status)) {
return;
}
kwd->key = LOCALE_ATTRIBUTE_KEY;
kwd->value = value->data();
if (!_addExtensionToList(&kwdFirst, kwd, false)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
}
if (pKwds) {
const char *pBcpKey = nullptr; /* u extension key subtag */
const char *pBcpType = nullptr; /* beginning of u extension type subtag(s) */
int32_t bcpKeyLen = 0;
int32_t bcpTypeLen = 0;
UBool isDone = false;
pTag = pKwds;
/* BCP47 representation of LDML key/type pairs */
while (!isDone) {
const char *pNextBcpKey = nullptr;
int32_t nextBcpKeyLen = 0;
UBool emitKeyword = false;
if (*pTag) {
/* locate next separator char */
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
if (ultag_isUnicodeLocaleKey(pTag, len)) {
if (pBcpKey) {
emitKeyword = true;
pNextBcpKey = pTag;
nextBcpKeyLen = len;
} else {
pBcpKey = pTag;
bcpKeyLen = len;
}
} else {
U_ASSERT(pBcpKey != nullptr);
/* within LDML type subtags */
if (pBcpType) {
bcpTypeLen += (len + 1);
} else {
pBcpType = pTag;
bcpTypeLen = len;
}
}
/* next tag */
pTag += len;
if (*pTag) {
/* next to the separator */
pTag++;
}
} else {
/* processing last one */
emitKeyword = true;
isDone = true;
}
if (emitKeyword) {
const char *pKey = nullptr; /* LDML key */
const char *pType = nullptr; /* LDML type */
char bcpKeyBuf[3]; /* BCP key length is always 2 for now */
U_ASSERT(pBcpKey != nullptr);
if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
/* the BCP key is invalid */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
U_ASSERT(bcpKeyLen <= 2);
uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
bcpKeyBuf[bcpKeyLen] = 0;
/* u extension key to LDML key */
pKey = uloc_toLegacyKey(bcpKeyBuf);
if (pKey == nullptr) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (pKey == bcpKeyBuf) {
/*
The key returned by toLegacyKey points to the input buffer.
We normalize the result key to lower case.
*/
T_CString_toLowerCase(bcpKeyBuf);
icu::CharString* key = kwdBuf.create(bcpKeyBuf, bcpKeyLen, *status);
if (key == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
if (U_FAILURE(*status)) {
return;
}
pKey = key->data();
}
if (pBcpType) {
char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
/* the BCP type is too long */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
bcpTypeBuf[bcpTypeLen] = 0;
/* BCP type to locale type */
pType = uloc_toLegacyType(pKey, bcpTypeBuf);
if (pType == nullptr) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (pType == bcpTypeBuf) {
/*
The type returned by toLegacyType points to the input buffer.
We normalize the result type to lower case.
*/
/* normalize to lower case */
T_CString_toLowerCase(bcpTypeBuf);
icu::CharString* type = kwdBuf.create(bcpTypeBuf, bcpTypeLen, *status);
if (type == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
if (U_FAILURE(*status)) {
return;
}
pType = type->data();
}
} else {
/* typeless - default type value is "yes" */
pType = LOCALE_TYPE_YES;
}
/* Special handling for u-va-posix, since we want to treat this as a variant,
not as a keyword */
if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
*posixVariant = true;
} else {
/* create an ExtensionListEntry for this keyword */
kwd = extPool.create();
if (kwd == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
kwd->key = pKey;
kwd->value = pType;
if (!_addExtensionToList(&kwdFirst, kwd, false)) {
// duplicate keyword is allowed, Only the first
// is honored.
}
}
pBcpKey = pNextBcpKey;
bcpKeyLen = pNextBcpKey != nullptr ? nextBcpKeyLen : 0;
pBcpType = nullptr;
bcpTypeLen = 0;
}
}
}
kwd = kwdFirst;
while (kwd != nullptr) {
nextKwd = kwd->next;
_addExtensionToList(appendTo, kwd, false);
kwd = nextKwd;
}
}
static void
_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {
int32_t i, n;
int32_t len;
ExtensionListEntry *kwdFirst = nullptr;
ExtensionListEntry *kwd;
const char *key, *type;
icu::MemoryPool<ExtensionListEntry> extPool;
icu::MemoryPool<icu::CharString> kwdBuf;
UBool posixVariant = false;
if (U_FAILURE(*status)) {
return;
}
n = ultag_getExtensionsSize(langtag);
/* resolve locale keywords and reordering keys */
for (i = 0; i < n; i++) {
key = ultag_getExtensionKey(langtag, i);
type = ultag_getExtensionValue(langtag, i);
if (*key == LDMLEXT) {
/* Determine if variants already exists */
if (ultag_getVariantsSize(langtag)) {
posixVariant = true;
}
_appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
if (U_FAILURE(*status)) {
break;
}
} else {
kwd = extPool.create();
if (kwd == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
kwd->key = key;
kwd->value = type;
if (!_addExtensionToList(&kwdFirst, kwd, false)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
}
if (U_SUCCESS(*status)) {
type = ultag_getPrivateUse(langtag);
if ((int32_t)uprv_strlen(type) > 0) {
/* add private use as a keyword */
kwd = extPool.create();
if (kwd == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
} else {
kwd->key = PRIVATEUSE_KEY;
kwd->value = type;
if (!_addExtensionToList(&kwdFirst, kwd, false)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
}
}
/* If a POSIX variant was in the extensions, write it out before writing the keywords. */
if (U_SUCCESS(*status) && posixVariant) {
len = (int32_t) uprv_strlen(_POSIX);
sink.Append(_POSIX, len);
}
if (U_SUCCESS(*status) && kwdFirst != nullptr) {
/* write out the sorted keywords */
UBool firstValue = true;
kwd = kwdFirst;
do {
if (firstValue) {
sink.Append("@", 1);
firstValue = false;
} else {
sink.Append(";", 1);
}
/* key */
len = (int32_t)uprv_strlen(kwd->key);
sink.Append(kwd->key, len);
sink.Append("=", 1);
/* type */
len = (int32_t)uprv_strlen(kwd->value);
sink.Append(kwd->value, len);
kwd = kwd->next;
} while (kwd);
}
}
static void
_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
(void)hadPosix;
char buf[ULOC_FULLNAME_CAPACITY];
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t len, i;
if (U_FAILURE(*status)) {
return;
}
len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
}
if (len > 0) {
char *p, *pPriv;
UBool bNext = true;
UBool firstValue = true;
UBool writeValue;
pPriv = nullptr;
p = buf;
while (bNext) {
writeValue = false;
if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
if (*p == 0) {
bNext = false;
} else {
*p = 0; /* terminate */
}
if (pPriv != nullptr) {
/* Private use in the canonical format is lowercase in BCP47 */
for (i = 0; *(pPriv + i) != 0; i++) {
*(pPriv + i) = uprv_tolower(*(pPriv + i));
}
/* validate */
if (_isPrivateuseValueSubtag(pPriv, -1)) {
if (firstValue) {
if (!_isVariantSubtag(pPriv, -1)) {
writeValue = true;
}
} else {
writeValue = true;
}
} else if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
} else {
break;
}
if (writeValue) {
sink.Append("-", 1);
if (firstValue) {
sink.Append(PRIVATEUSE_KEY, UPRV_LENGTHOF(PRIVATEUSE_KEY) - 1);
sink.Append("-", 1);
sink.Append(PRIVUSE_VARIANT_PREFIX, UPRV_LENGTHOF(PRIVUSE_VARIANT_PREFIX) - 1);
sink.Append("-", 1);
firstValue = false;
}
len = (int32_t)uprv_strlen(pPriv);
sink.Append(pPriv, len);
}
}
/* reset private use starting position */
pPriv = nullptr;
} else if (pPriv == nullptr) {
pPriv = p;
}
p++;
}
}
}
/*
* -------------------------------------------------
*
* ultag_ functions
*
* -------------------------------------------------
*/
/* Bit flags used by the parser */
#define LANG 0x0001
#define EXTL 0x0002
#define SCRT 0x0004
#define REGN 0x0008
#define VART 0x0010
#define EXTS 0x0020
#define EXTV 0x0040
#define PRIV 0x0080
/**
* Ticket #12705 - The optimizer in Visual Studio 2015 Update 3 has problems optimizing this function.
* As a work-around, optimization is disabled for this function on VS2015 and VS2017.
* This work-around should be removed once the following versions of Visual Studio are no
* longer supported: All versions of VS2015/VS2017, and versions of VS2019 below 16.4.
*/
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
#pragma optimize( "", off )
#endif
static ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
char *tagBuf;
int16_t next;
char *pSubtag, *pNext, *pLastGoodPosition;
int32_t subtagLen;
int32_t extlangIdx;
ExtensionListEntry *pExtension;
char *pExtValueSubtag, *pExtValueSubtagEnd;
int32_t i;
UBool privateuseVar = false;
int32_t legacyLen = 0;
if (parsedLen != nullptr) {
*parsedLen = 0;
}
if (U_FAILURE(*status)) {
return nullptr;
}
if (tagLen < 0) {
tagLen = (int32_t)uprv_strlen(tag);
}
/* copy the entire string */
tagBuf = (char*)uprv_malloc(tagLen + 1);
if (tagBuf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
if (tagLen > 0) {
uprv_memcpy(tagBuf, tag, tagLen);
}
*(tagBuf + tagLen) = 0;
/* create a ULanguageTag */
icu::LocalULanguageTagPointer t(
(ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)));
if (t.isNull()) {
uprv_free(tagBuf);
*status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
_initializeULanguageTag(t.getAlias());
t->buf = tagBuf;
if (tagLen < MINLEN) {
/* the input tag is too short - return empty ULanguageTag */
return t.orphan();
}
size_t parsedLenDelta = 0;
// Legacy tag will be consider together. Legacy tag with intervening
// script and region such as art-DE-lojban or art-Latn-lojban won't be
// matched.
/* check if the tag is legacy */
for (i = 0; i < UPRV_LENGTHOF(LEGACY); i += 2) {
int32_t checkLegacyLen = static_cast<int32_t>(uprv_strlen(LEGACY[i]));
if (tagLen < checkLegacyLen) {
continue;
}
if (tagLen > checkLegacyLen && tagBuf[checkLegacyLen] != '-') {
// make sure next char is '-'.
continue;
}
if (uprv_strnicmp(LEGACY[i], tagBuf, checkLegacyLen) == 0) {
int32_t newTagLength;
legacyLen = checkLegacyLen; /* back up for output parsedLen */
int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1]));
newTagLength = replacementLen + tagLen - checkLegacyLen;
int32_t oldTagLength = tagLen;
if (tagLen < newTagLength) {
uprv_free(tagBuf);
// Change t->buf after the free and before return to avoid the second double free in
// the destructor of t when t is out of scope.
t->buf = tagBuf = (char*)uprv_malloc(newTagLength + 1);
if (tagBuf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
tagLen = newTagLength;
}
parsedLenDelta = checkLegacyLen - replacementLen;
uprv_strcpy(t->buf, LEGACY[i + 1]);
if (checkLegacyLen != tagLen) {
uprv_memcpy(t->buf + replacementLen, tag + checkLegacyLen,
oldTagLength - checkLegacyLen);
// NUL-terminate after memcpy().
t->buf[replacementLen + oldTagLength - checkLegacyLen] = 0;
}
break;
}
}
if (legacyLen == 0) {
for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
const char* redundantTag = REDUNDANT[i];
size_t redundantTagLen = uprv_strlen(redundantTag);
// The preferred tag for a redundant tag is always shorter than redundant
// tag. A redundant tag may or may not be followed by other subtags.
// (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
const char* redundantTagEnd = tagBuf + redundantTagLen;
if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
const char* preferredTag = REDUNDANT[i + 1];
size_t preferredTagLen = uprv_strlen(preferredTag);
uprv_memcpy(t->buf, preferredTag, preferredTagLen);
if (*redundantTagEnd == SEP) {
uprv_memmove(tagBuf + preferredTagLen,
redundantTagEnd,
tagLen - redundantTagLen + 1);
} else {
tagBuf[preferredTagLen] = '\0';
}
// parsedLen should be the length of the input
// before redundantTag is replaced by preferredTag.
// Save the delta to add it back later.
parsedLenDelta = redundantTagLen - preferredTagLen;
break;
}
}
}
}
/*
* langtag = language
* ["-" script]
* ["-" region]
* *("-" variant)
* *("-" extension)
* ["-" privateuse]
*/
next = LANG | PRIV;
pNext = pLastGoodPosition = tagBuf;
extlangIdx = 0;
pExtension = nullptr;
pExtValueSubtag = nullptr;
pExtValueSubtagEnd = nullptr;
while (pNext) {
char *pSep;
pSubtag = pNext;
/* locate next separator char */
pSep = pSubtag;
while (*pSep) {
if (*pSep == SEP) {
break;
}
pSep++;
}
if (*pSep == 0) {
/* last subtag */
pNext = nullptr;
} else {
pNext = pSep + 1;
}
subtagLen = (int32_t)(pSep - pSubtag);
if (next & LANG) {
if (ultag_isLanguageSubtag(pSubtag, subtagLen)) {
*pSep = 0; /* terminate */
// TODO: move deprecated language code handling here.
t->language = T_CString_toLowerCase(pSubtag);
pLastGoodPosition = pSep;
next = SCRT | REGN | VART | EXTS | PRIV;
if (subtagLen <= 3)
next |= EXTL;
continue;
}
}
if (next & EXTL) {
if (_isExtlangSubtag(pSubtag, subtagLen)) {
*pSep = 0;
t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
pLastGoodPosition = pSep;
if (extlangIdx < 3) {
next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
} else {
next = SCRT | REGN | VART | EXTS | PRIV;
}
continue;
}
}
if (next & SCRT) {
if (ultag_isScriptSubtag(pSubtag, subtagLen)) {
char *p = pSubtag;
*pSep = 0;
/* to title case */
*p = uprv_toupper(*p);
p++;
for (; *p; p++) {
*p = uprv_tolower(*p);
}
t->script = pSubtag;
pLastGoodPosition = pSep;
next = REGN | VART | EXTS | PRIV;
continue;
}
}
if (next & REGN) {
if (ultag_isRegionSubtag(pSubtag, subtagLen)) {
*pSep = 0;
// TODO: move deprecated region code handling here.
t->region = T_CString_toUpperCase(pSubtag);
pLastGoodPosition = pSep;
next = VART | EXTS | PRIV;
continue;
}
}
if (next & VART) {
if (_isVariantSubtag(pSubtag, subtagLen) ||
(privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
VariantListEntry *var;
UBool isAdded;
var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
if (var == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
*pSep = 0;
var->variant = T_CString_toUpperCase(pSubtag);
isAdded = _addVariantToList(&(t->variants), var);
if (!isAdded) {
/* duplicated variant entry */
uprv_free(var);
break;
}
pLastGoodPosition = pSep;
next = VART | EXTS | PRIV;
continue;
}
}
if (next & EXTS) {
if (_isExtensionSingleton(pSubtag, subtagLen)) {
if (pExtension != nullptr) {
if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
/* the previous extension is incomplete */
uprv_free(pExtension);
pExtension = nullptr;
break;
}
/* terminate the previous extension value */
*pExtValueSubtagEnd = 0;
pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
/* insert the extension to the list */
if (_addExtensionToList(&(t->extensions), pExtension, false)) {
pLastGoodPosition = pExtValueSubtagEnd;
} else {
/* stop parsing here */
uprv_free(pExtension);
pExtension = nullptr;
break;
}
}
/* create a new extension */
pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
if (pExtension == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
*pSep = 0;
pExtension->key = T_CString_toLowerCase(pSubtag);
pExtension->value = nullptr; /* will be set later */
/*
* reset the start and the end location of extension value
* subtags for this extension
*/
pExtValueSubtag = nullptr;
pExtValueSubtagEnd = nullptr;
next = EXTV;
continue;
}
}
if (next & EXTV) {
if (_isExtensionSubtag(pSubtag, subtagLen)) {
if (pExtValueSubtag == nullptr) {
/* if the start position of this extension's value is not yet,
this one is the first value subtag */
pExtValueSubtag = pSubtag;
}
/* Mark the end of this subtag */
pExtValueSubtagEnd = pSep;
next = EXTS | EXTV | PRIV;
continue;
}
}
if (next & PRIV) {
if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
char *pPrivuseVal;
if (pExtension != nullptr) {
/* Process the last extension */
if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
/* the previous extension is incomplete */
uprv_free(pExtension);
pExtension = nullptr;
break;
} else {
/* terminate the previous extension value */
*pExtValueSubtagEnd = 0;
pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
/* insert the extension to the list */
if (_addExtensionToList(&(t->extensions), pExtension, false)) {
pLastGoodPosition = pExtValueSubtagEnd;
pExtension = nullptr;
} else {
/* stop parsing here */
uprv_free(pExtension);
pExtension = nullptr;
break;
}
}
}
/* The rest of part will be private use value subtags */
if (pNext == nullptr) {
/* empty private use subtag */
break;
}
/* back up the private use value start position */
pPrivuseVal = pNext;
/* validate private use value subtags */
while (pNext) {
pSubtag = pNext;
pSep = pSubtag;
while (*pSep) {
if (*pSep == SEP) {
break;
}
pSep++;
}
if (*pSep == 0) {
/* last subtag */
pNext = nullptr;
} else {
pNext = pSep + 1;
}
subtagLen = (int32_t)(pSep - pSubtag);
if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
*pSep = 0;
next = VART;
privateuseVar = true;
break;
} else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
pLastGoodPosition = pSep;
} else {
break;
}
}
if (next == VART) {
continue;
}
if (pLastGoodPosition - pPrivuseVal > 0) {
*pLastGoodPosition = 0;
t->privateuse = T_CString_toLowerCase(pPrivuseVal);
}
/* No more subtags, exiting the parse loop */
break;
}
break;
}
/* If we fell through here, it means this subtag is illegal - quit parsing */
break;
}
if (pExtension != nullptr) {
/* Process the last extension */
if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
/* the previous extension is incomplete */
uprv_free(pExtension);
} else {
/* terminate the previous extension value */
*pExtValueSubtagEnd = 0;
pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
/* insert the extension to the list */
if (_addExtensionToList(&(t->extensions), pExtension, false)) {
pLastGoodPosition = pExtValueSubtagEnd;
} else {
uprv_free(pExtension);
}
}
}
if (parsedLen != nullptr) {
*parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
}
return t.orphan();
}
// Ticket #12705 - Turn optimization back on.
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
#pragma optimize( "", on )
#endif
static void
ultag_close(ULanguageTag* langtag) {
if (langtag == nullptr) {
return;
}
uprv_free(langtag->buf);
if (langtag->variants) {
VariantListEntry *curVar = langtag->variants;
while (curVar) {
VariantListEntry *nextVar = curVar->next;
uprv_free(curVar);
curVar = nextVar;
}
}
if (langtag->extensions) {
ExtensionListEntry *curExt = langtag->extensions;
while (curExt) {
ExtensionListEntry *nextExt = curExt->next;
uprv_free(curExt);
curExt = nextExt;
}
}
uprv_free(langtag);
}
static const char*
ultag_getLanguage(const ULanguageTag* langtag) {
return langtag->language;
}
#if 0
static const char*
ultag_getJDKLanguage(const ULanguageTag* langtag) {
int32_t i;
for (i = 0; DEPRECATEDLANGS[i] != nullptr; i += 2) {
if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
return DEPRECATEDLANGS[i + 1];
}
}
return langtag->language;
}
#endif
static const char*
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
if (idx >= 0 && idx < MAXEXTLANG) {
return langtag->extlang[idx];
}
return nullptr;
}
static int32_t
ultag_getExtlangSize(const ULanguageTag* langtag) {
int32_t size = 0;
int32_t i;
for (i = 0; i < MAXEXTLANG; i++) {
if (langtag->extlang[i]) {
size++;
}
}
return size;
}
static const char*
ultag_getScript(const ULanguageTag* langtag) {
return langtag->script;
}
static const char*
ultag_getRegion(const ULanguageTag* langtag) {
return langtag->region;
}
static const char*
ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
const char *var = nullptr;
VariantListEntry *cur = langtag->variants;
int32_t i = 0;
while (cur) {
if (i == idx) {
var = cur->variant;
break;
}
cur = cur->next;
i++;
}
return var;
}
static int32_t
ultag_getVariantsSize(const ULanguageTag* langtag) {
int32_t size = 0;
VariantListEntry *cur = langtag->variants;
while (true) {
if (cur == nullptr) {
break;
}
size++;
cur = cur->next;
}
return size;
}
static const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
const char *key = nullptr;
ExtensionListEntry *cur = langtag->extensions;
int32_t i = 0;
while (cur) {
if (i == idx) {
key = cur->key;
break;
}
cur = cur->next;
i++;
}
return key;
}
static const char*
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
const char *val = nullptr;
ExtensionListEntry *cur = langtag->extensions;
int32_t i = 0;
while (cur) {
if (i == idx) {
val = cur->value;
break;
}
cur = cur->next;
i++;
}
return val;
}
static int32_t
ultag_getExtensionsSize(const ULanguageTag* langtag) {
int32_t size = 0;
ExtensionListEntry *cur = langtag->extensions;
while (true) {
if (cur == nullptr) {
break;
}
size++;
cur = cur->next;
}
return size;
}
static const char*
ultag_getPrivateUse(const ULanguageTag* langtag) {
return langtag->privateuse;
}
#if 0
static const char*
ultag_getLegacy(const ULanguageTag* langtag) {
return langtag->legacy;
}
#endif
/*
* -------------------------------------------------
*
* Locale/BCP47 conversion APIs, exposed as uloc_*
*
* -------------------------------------------------
*/
U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char* localeID,
char* langtag,
int32_t langtagCapacity,
UBool strict,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(langtag, langtagCapacity);
ulocimp_toLanguageTag(localeID, sink, strict, status);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*status)) {
return reslen;
}
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(langtag, langtagCapacity, reslen, status);
}
return reslen;
}
U_CAPI void U_EXPORT2
ulocimp_toLanguageTag(const char* localeID,
icu::ByteSink& sink,
UBool strict,
UErrorCode* status) {
icu::CharString canonical;
int32_t reslen;
UErrorCode tmpStatus = U_ZERO_ERROR;
UBool hadPosix = false;
const char* pKeywordStart;
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
if (resultCapacity > 0) {
char* buffer;
for (;;) {
buffer = canonical.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return;
}
reslen =
uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
break;
}
resultCapacity = reslen;
tmpStatus = U_ZERO_ERROR;
}
if (U_FAILURE(tmpStatus)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
canonical.append(buffer, reslen, tmpStatus);
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return;
}
}
/* For handling special case - private use only tag */
pKeywordStart = locale_getKeywordsStart(canonical.data());
if (pKeywordStart == canonical.data()) {
int kwdCnt = 0;
UBool done = false;
icu::LocalUEnumerationPointer kwdEnum(uloc_openKeywords(canonical.data(), &tmpStatus));
if (U_SUCCESS(tmpStatus)) {
kwdCnt = uenum_count(kwdEnum.getAlias(), &tmpStatus);
if (kwdCnt == 1) {
const char *key;
int32_t len = 0;
key = uenum_next(kwdEnum.getAlias(), &len, &tmpStatus);
if (len == 1 && *key == PRIVATEUSE) {
icu::CharString buf;
{
icu::CharStringByteSink sink(&buf);
ulocimp_getKeywordValue(localeID, key, sink, &tmpStatus);
}
if (U_SUCCESS(tmpStatus)) {
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
/* return private use only tag */
sink.Append("und-x-", 6);
sink.Append(buf.data(), buf.length());
done = true;
} else if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
done = true;
}
/* if not strict mode, then "und" will be returned */
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
done = true;
}
}
}
if (done) {
return;
}
}
}
_appendLanguageToLanguageTag(canonical.data(), sink, strict, status);
_appendScriptToLanguageTag(canonical.data(), sink, strict, status);
_appendRegionToLanguageTag(canonical.data(), sink, strict, status);
_appendVariantsToLanguageTag(canonical.data(), sink, strict, &hadPosix, status);
_appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
_appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
}
U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char* langtag,
char* localeID,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*status)) {
return reslen;
}
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(localeID, localeIDCapacity, reslen, status);
}
return reslen;
}
U_CAPI void U_EXPORT2
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
icu::ByteSink& sink,
int32_t* parsedLength,
UErrorCode* status) {
UBool isEmpty = true;
const char *subtag, *p;
int32_t len;
int32_t i, n;
UBool noRegion = true;
icu::LocalULanguageTagPointer lt(ultag_parse(langtag, tagLen, parsedLength, status));
if (U_FAILURE(*status)) {
return;
}
/* language */
subtag = ultag_getExtlangSize(lt.getAlias()) > 0 ? ultag_getExtlang(lt.getAlias(), 0) : ultag_getLanguage(lt.getAlias());
if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
len = (int32_t)uprv_strlen(subtag);
if (len > 0) {
sink.Append(subtag, len);
isEmpty = false;
}
}
/* script */
subtag = ultag_getScript(lt.getAlias());
len = (int32_t)uprv_strlen(subtag);
if (len > 0) {
sink.Append("_", 1);
isEmpty = false;
/* write out the script in title case */
char c = uprv_toupper(*subtag);
sink.Append(&c, 1);
sink.Append(subtag + 1, len - 1);
}
/* region */
subtag = ultag_getRegion(lt.getAlias());
len = (int32_t)uprv_strlen(subtag);
if (len > 0) {
sink.Append("_", 1);
isEmpty = false;
/* write out the region in upper case */
p = subtag;
while (*p) {
char c = uprv_toupper(*p);
sink.Append(&c, 1);
p++;
}
noRegion = false;
}
/* variants */
_sortVariants(lt.getAlias()->variants);
n = ultag_getVariantsSize(lt.getAlias());
if (n > 0) {
if (noRegion) {
sink.Append("_", 1);
isEmpty = false;
}
for (i = 0; i < n; i++) {
subtag = ultag_getVariant(lt.getAlias(), i);
sink.Append("_", 1);
/* write out the variant in upper case */
p = subtag;
while (*p) {
char c = uprv_toupper(*p);
sink.Append(&c, 1);
p++;
}
}
}
/* keywords */
n = ultag_getExtensionsSize(lt.getAlias());
subtag = ultag_getPrivateUse(lt.getAlias());
if (n > 0 || uprv_strlen(subtag) > 0) {
if (isEmpty && n > 0) {
/* need a language */
sink.Append(LANG_UND, LANG_UND_LEN);
}
_appendKeywords(lt.getAlias(), sink, status);
}
}