| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 1997-2016, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * file name: loclikely.cpp |
| * encoding: UTF-8 |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2010feb25 |
| * created by: Markus W. Scherer |
| * |
| * Code for likely and minimized locale subtags, separated out from other .cpp files |
| * that then do not depend on resource bundle code and likely-subtags data. |
| */ |
| |
| #include "unicode/bytestream.h" |
| #include "unicode/utypes.h" |
| #include "unicode/locid.h" |
| #include "unicode/putil.h" |
| #include "unicode/uchar.h" |
| #include "unicode/uloc.h" |
| #include "unicode/ures.h" |
| #include "unicode/uscript.h" |
| #include "bytesinkutil.h" |
| #include "charstr.h" |
| #include "cmemory.h" |
| #include "cstring.h" |
| #include "ulocimp.h" |
| #include "ustr_imp.h" |
| |
| /** |
| * These are the canonical strings for unknown languages, scripts and regions. |
| **/ |
| static const char* const unknownLanguage = "und"; |
| static const char* const unknownScript = "Zzzz"; |
| static const char* const unknownRegion = "ZZ"; |
| |
| /** |
| * This function looks for the localeID in the likelySubtags resource. |
| * |
| * @param localeID The tag to find. |
| * @param buffer A buffer to hold the matching entry |
| * @param bufferLength The length of the output buffer |
| * @return A pointer to "buffer" if found, or a null pointer if not. |
| */ |
| static const char* U_CALLCONV |
| findLikelySubtags(const char* localeID, |
| char* buffer, |
| int32_t bufferLength, |
| UErrorCode* err) { |
| const char* result = NULL; |
| |
| if (!U_FAILURE(*err)) { |
| int32_t resLen = 0; |
| const UChar* s = NULL; |
| UErrorCode tmpErr = U_ZERO_ERROR; |
| icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr)); |
| if (U_SUCCESS(tmpErr)) { |
| icu::CharString und; |
| if (localeID != NULL) { |
| if (*localeID == '\0') { |
| localeID = unknownLanguage; |
| } else if (*localeID == '_') { |
| und.append(unknownLanguage, *err); |
| und.append(localeID, *err); |
| if (U_FAILURE(*err)) { |
| return NULL; |
| } |
| localeID = und.data(); |
| } |
| } |
| s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr); |
| |
| if (U_FAILURE(tmpErr)) { |
| /* |
| * If a resource is missing, it's not really an error, it's |
| * just that we don't have any data for that particular locale ID. |
| */ |
| if (tmpErr != U_MISSING_RESOURCE_ERROR) { |
| *err = tmpErr; |
| } |
| } |
| else if (resLen >= bufferLength) { |
| /* The buffer should never overflow. */ |
| *err = U_INTERNAL_PROGRAM_ERROR; |
| } |
| else { |
| u_UCharsToChars(s, buffer, resLen + 1); |
| if (resLen >= 3 && |
| uprv_strnicmp(buffer, unknownLanguage, 3) == 0 && |
| (resLen == 3 || buffer[3] == '_')) { |
| uprv_memmove(buffer, buffer + 3, resLen - 3 + 1); |
| } |
| result = buffer; |
| } |
| } else { |
| *err = tmpErr; |
| } |
| } |
| |
| return result; |
| } |
| |
| /** |
| * Append a tag to a buffer, adding the separator if necessary. The buffer |
| * must be large enough to contain the resulting tag plus any separator |
| * necessary. The tag must not be a zero-length string. |
| * |
| * @param tag The tag to add. |
| * @param tagLength The length of the tag. |
| * @param buffer The output buffer. |
| * @param bufferLength The length of the output buffer. This is an input/ouput parameter. |
| **/ |
| static void U_CALLCONV |
| appendTag( |
| const char* tag, |
| int32_t tagLength, |
| char* buffer, |
| int32_t* bufferLength, |
| UBool withSeparator) { |
| |
| if (withSeparator) { |
| buffer[*bufferLength] = '_'; |
| ++(*bufferLength); |
| } |
| |
| uprv_memmove( |
| &buffer[*bufferLength], |
| tag, |
| tagLength); |
| |
| *bufferLength += tagLength; |
| } |
| |
| /** |
| * Create a tag string from the supplied parameters. The lang, script and region |
| * parameters may be NULL pointers. If they are, their corresponding length parameters |
| * must be less than or equal to 0. |
| * |
| * If any of the language, script or region parameters are empty, and the alternateTags |
| * parameter is not NULL, it will be parsed for potential language, script and region tags |
| * to be used when constructing the new tag. If the alternateTags parameter is NULL, or |
| * it contains no language tag, the default tag for the unknown language is used. |
| * |
| * If the length of the new string exceeds the capacity of the output buffer, |
| * the function copies as many bytes to the output buffer as it can, and returns |
| * the error U_BUFFER_OVERFLOW_ERROR. |
| * |
| * If an illegal argument is provided, the function returns the error |
| * U_ILLEGAL_ARGUMENT_ERROR. |
| * |
| * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if |
| * the tag string fits in the output buffer, but the null terminator doesn't. |
| * |
| * @param lang The language tag to use. |
| * @param langLength The length of the language tag. |
| * @param script The script tag to use. |
| * @param scriptLength The length of the script tag. |
| * @param region The region tag to use. |
| * @param regionLength The length of the region tag. |
| * @param trailing Any trailing data to append to the new tag. |
| * @param trailingLength The length of the trailing data. |
| * @param alternateTags A string containing any alternate tags. |
| * @param sink The output sink receiving the tag string. |
| * @param err A pointer to a UErrorCode for error reporting. |
| **/ |
| static void U_CALLCONV |
| createTagStringWithAlternates( |
| const char* lang, |
| int32_t langLength, |
| const char* script, |
| int32_t scriptLength, |
| const char* region, |
| int32_t regionLength, |
| const char* trailing, |
| int32_t trailingLength, |
| const char* alternateTags, |
| icu::ByteSink& sink, |
| UErrorCode* err) { |
| |
| if (U_FAILURE(*err)) { |
| goto error; |
| } |
| else if (langLength >= ULOC_LANG_CAPACITY || |
| scriptLength >= ULOC_SCRIPT_CAPACITY || |
| regionLength >= ULOC_COUNTRY_CAPACITY) { |
| goto error; |
| } |
| else { |
| /** |
| * ULOC_FULLNAME_CAPACITY will provide enough capacity |
| * that we can build a string that contains the language, |
| * script and region code without worrying about overrunning |
| * the user-supplied buffer. |
| **/ |
| char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
| int32_t tagLength = 0; |
| UBool regionAppended = FALSE; |
| |
| if (langLength > 0) { |
| appendTag( |
| lang, |
| langLength, |
| tagBuffer, |
| &tagLength, |
| /*withSeparator=*/FALSE); |
| } |
| else if (alternateTags == NULL) { |
| /* |
| * Use the empty string for an unknown language, if |
| * we found no language. |
| */ |
| } |
| else { |
| /* |
| * Parse the alternateTags string for the language. |
| */ |
| char alternateLang[ULOC_LANG_CAPACITY]; |
| int32_t alternateLangLength = sizeof(alternateLang); |
| |
| alternateLangLength = |
| uloc_getLanguage( |
| alternateTags, |
| alternateLang, |
| alternateLangLength, |
| err); |
| if(U_FAILURE(*err) || |
| alternateLangLength >= ULOC_LANG_CAPACITY) { |
| goto error; |
| } |
| else if (alternateLangLength == 0) { |
| /* |
| * Use the empty string for an unknown language, if |
| * we found no language. |
| */ |
| } |
| else { |
| appendTag( |
| alternateLang, |
| alternateLangLength, |
| tagBuffer, |
| &tagLength, |
| /*withSeparator=*/FALSE); |
| } |
| } |
| |
| if (scriptLength > 0) { |
| appendTag( |
| script, |
| scriptLength, |
| tagBuffer, |
| &tagLength, |
| /*withSeparator=*/TRUE); |
| } |
| else if (alternateTags != NULL) { |
| /* |
| * Parse the alternateTags string for the script. |
| */ |
| char alternateScript[ULOC_SCRIPT_CAPACITY]; |
| |
| const int32_t alternateScriptLength = |
| uloc_getScript( |
| alternateTags, |
| alternateScript, |
| sizeof(alternateScript), |
| err); |
| |
| if (U_FAILURE(*err) || |
| alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { |
| goto error; |
| } |
| else if (alternateScriptLength > 0) { |
| appendTag( |
| alternateScript, |
| alternateScriptLength, |
| tagBuffer, |
| &tagLength, |
| /*withSeparator=*/TRUE); |
| } |
| } |
| |
| if (regionLength > 0) { |
| appendTag( |
| region, |
| regionLength, |
| tagBuffer, |
| &tagLength, |
| /*withSeparator=*/TRUE); |
| |
| regionAppended = TRUE; |
| } |
| else if (alternateTags != NULL) { |
| /* |
| * Parse the alternateTags string for the region. |
| */ |
| char alternateRegion[ULOC_COUNTRY_CAPACITY]; |
| |
| const int32_t alternateRegionLength = |
| uloc_getCountry( |
| alternateTags, |
| alternateRegion, |
| sizeof(alternateRegion), |
| err); |
| if (U_FAILURE(*err) || |
| alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { |
| goto error; |
| } |
| else if (alternateRegionLength > 0) { |
| appendTag( |
| alternateRegion, |
| alternateRegionLength, |
| tagBuffer, |
| &tagLength, |
| /*withSeparator=*/TRUE); |
| |
| regionAppended = TRUE; |
| } |
| } |
| |
| /** |
| * Copy the partial tag from our internal buffer to the supplied |
| * target. |
| **/ |
| sink.Append(tagBuffer, tagLength); |
| |
| if (trailingLength > 0) { |
| if (*trailing != '@') { |
| sink.Append("_", 1); |
| if (!regionAppended) { |
| /* extra separator is required */ |
| sink.Append("_", 1); |
| } |
| } |
| |
| /* |
| * Copy the trailing data into the supplied buffer. |
| */ |
| sink.Append(trailing, trailingLength); |
| } |
| |
| return; |
| } |
| |
| error: |
| |
| /** |
| * An overflow indicates the locale ID passed in |
| * is ill-formed. If we got here, and there was |
| * no previous error, it's an implicit overflow. |
| **/ |
| if (*err == U_BUFFER_OVERFLOW_ERROR || |
| U_SUCCESS(*err)) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| } |
| |
| /** |
| * Create a tag string from the supplied parameters. The lang, script and region |
| * parameters may be NULL pointers. If they are, their corresponding length parameters |
| * must be less than or equal to 0. If the lang parameter is an empty string, the |
| * default value for an unknown language is written to the output buffer. |
| * |
| * If the length of the new string exceeds the capacity of the output buffer, |
| * the function copies as many bytes to the output buffer as it can, and returns |
| * the error U_BUFFER_OVERFLOW_ERROR. |
| * |
| * If an illegal argument is provided, the function returns the error |
| * U_ILLEGAL_ARGUMENT_ERROR. |
| * |
| * @param lang The language tag to use. |
| * @param langLength The length of the language tag. |
| * @param script The script tag to use. |
| * @param scriptLength The length of the script tag. |
| * @param region The region tag to use. |
| * @param regionLength The length of the region tag. |
| * @param trailing Any trailing data to append to the new tag. |
| * @param trailingLength The length of the trailing data. |
| * @param sink The output sink receiving the tag string. |
| * @param err A pointer to a UErrorCode for error reporting. |
| **/ |
| static void U_CALLCONV |
| createTagString( |
| const char* lang, |
| int32_t langLength, |
| const char* script, |
| int32_t scriptLength, |
| const char* region, |
| int32_t regionLength, |
| const char* trailing, |
| int32_t trailingLength, |
| icu::ByteSink& sink, |
| UErrorCode* err) |
| { |
| createTagStringWithAlternates( |
| lang, |
| langLength, |
| script, |
| scriptLength, |
| region, |
| regionLength, |
| trailing, |
| trailingLength, |
| NULL, |
| sink, |
| err); |
| } |
| |
| /** |
| * Parse the language, script, and region subtags from a tag string, and copy the |
| * results into the corresponding output parameters. The buffers are null-terminated, |
| * unless overflow occurs. |
| * |
| * The langLength, scriptLength, and regionLength parameters are input/output |
| * parameters, and must contain the capacity of their corresponding buffers on |
| * input. On output, they will contain the actual length of the buffers, not |
| * including the null terminator. |
| * |
| * If the length of any of the output subtags exceeds the capacity of the corresponding |
| * buffer, the function copies as many bytes to the output buffer as it can, and returns |
| * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow |
| * occurs. |
| * |
| * If an illegal argument is provided, the function returns the error |
| * U_ILLEGAL_ARGUMENT_ERROR. |
| * |
| * @param localeID The locale ID to parse. |
| * @param lang The language tag buffer. |
| * @param langLength The length of the language tag. |
| * @param script The script tag buffer. |
| * @param scriptLength The length of the script tag. |
| * @param region The region tag buffer. |
| * @param regionLength The length of the region tag. |
| * @param err A pointer to a UErrorCode for error reporting. |
| * @return The number of chars of the localeID parameter consumed. |
| **/ |
| static int32_t U_CALLCONV |
| parseTagString( |
| const char* localeID, |
| char* lang, |
| int32_t* langLength, |
| char* script, |
| int32_t* scriptLength, |
| char* region, |
| int32_t* regionLength, |
| UErrorCode* err) |
| { |
| const char* position = localeID; |
| int32_t subtagLength = 0; |
| |
| if(U_FAILURE(*err) || |
| localeID == NULL || |
| lang == NULL || |
| langLength == NULL || |
| script == NULL || |
| scriptLength == NULL || |
| region == NULL || |
| regionLength == NULL) { |
| goto error; |
| } |
| |
| subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err); |
| |
| /* |
| * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING |
| * to be an error, because it indicates the user-supplied tag is |
| * not well-formed. |
| */ |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| *langLength = subtagLength; |
| |
| /* |
| * If no language was present, use the empty string instead. |
| * Otherwise, move past any separator. |
| */ |
| if (_isIDSeparator(*position)) { |
| ++position; |
| } |
| |
| subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err); |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| *scriptLength = subtagLength; |
| |
| if (*scriptLength > 0) { |
| if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { |
| /** |
| * If the script part is the "unknown" script, then don't return it. |
| **/ |
| *scriptLength = 0; |
| } |
| |
| /* |
| * Move past any separator. |
| */ |
| if (_isIDSeparator(*position)) { |
| ++position; |
| } |
| } |
| |
| subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err); |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| *regionLength = subtagLength; |
| |
| if (*regionLength > 0) { |
| if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { |
| /** |
| * If the region part is the "unknown" region, then don't return it. |
| **/ |
| *regionLength = 0; |
| } |
| } else if (*position != 0 && *position != '@') { |
| /* back up over consumed trailing separator */ |
| --position; |
| } |
| |
| exit: |
| |
| return (int32_t)(position - localeID); |
| |
| error: |
| |
| /** |
| * If we get here, we have no explicit error, it's the result of an |
| * illegal argument. |
| **/ |
| if (!U_FAILURE(*err)) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| |
| goto exit; |
| } |
| |
| static UBool U_CALLCONV |
| createLikelySubtagsString( |
| const char* lang, |
| int32_t langLength, |
| const char* script, |
| int32_t scriptLength, |
| const char* region, |
| int32_t regionLength, |
| const char* variants, |
| int32_t variantsLength, |
| icu::ByteSink& sink, |
| UErrorCode* err) { |
| /** |
| * ULOC_FULLNAME_CAPACITY will provide enough capacity |
| * that we can build a string that contains the language, |
| * script and region code without worrying about overrunning |
| * the user-supplied buffer. |
| **/ |
| char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| /** |
| * Try the language with the script and region first. |
| **/ |
| if (scriptLength > 0 && regionLength > 0) { |
| |
| const char* likelySubtags = NULL; |
| |
| icu::CharString tagBuffer; |
| { |
| icu::CharStringByteSink sink(&tagBuffer); |
| createTagString( |
| lang, |
| langLength, |
| script, |
| scriptLength, |
| region, |
| regionLength, |
| NULL, |
| 0, |
| sink, |
| err); |
| } |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| likelySubtags = |
| findLikelySubtags( |
| tagBuffer.data(), |
| likelySubtagsBuffer, |
| sizeof(likelySubtagsBuffer), |
| err); |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| if (likelySubtags != NULL) { |
| /* Always use the language tag from the |
| maximal string, since it may be more |
| specific than the one provided. */ |
| createTagStringWithAlternates( |
| NULL, |
| 0, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| variants, |
| variantsLength, |
| likelySubtags, |
| sink, |
| err); |
| return TRUE; |
| } |
| } |
| |
| /** |
| * Try the language with just the script. |
| **/ |
| if (scriptLength > 0) { |
| |
| const char* likelySubtags = NULL; |
| |
| icu::CharString tagBuffer; |
| { |
| icu::CharStringByteSink sink(&tagBuffer); |
| createTagString( |
| lang, |
| langLength, |
| script, |
| scriptLength, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| sink, |
| err); |
| } |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| likelySubtags = |
| findLikelySubtags( |
| tagBuffer.data(), |
| likelySubtagsBuffer, |
| sizeof(likelySubtagsBuffer), |
| err); |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| if (likelySubtags != NULL) { |
| /* Always use the language tag from the |
| maximal string, since it may be more |
| specific than the one provided. */ |
| createTagStringWithAlternates( |
| NULL, |
| 0, |
| NULL, |
| 0, |
| region, |
| regionLength, |
| variants, |
| variantsLength, |
| likelySubtags, |
| sink, |
| err); |
| return TRUE; |
| } |
| } |
| |
| /** |
| * Try the language with just the region. |
| **/ |
| if (regionLength > 0) { |
| |
| const char* likelySubtags = NULL; |
| |
| icu::CharString tagBuffer; |
| { |
| icu::CharStringByteSink sink(&tagBuffer); |
| createTagString( |
| lang, |
| langLength, |
| NULL, |
| 0, |
| region, |
| regionLength, |
| NULL, |
| 0, |
| sink, |
| err); |
| } |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| likelySubtags = |
| findLikelySubtags( |
| tagBuffer.data(), |
| likelySubtagsBuffer, |
| sizeof(likelySubtagsBuffer), |
| err); |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| if (likelySubtags != NULL) { |
| /* Always use the language tag from the |
| maximal string, since it may be more |
| specific than the one provided. */ |
| createTagStringWithAlternates( |
| NULL, |
| 0, |
| script, |
| scriptLength, |
| NULL, |
| 0, |
| variants, |
| variantsLength, |
| likelySubtags, |
| sink, |
| err); |
| return TRUE; |
| } |
| } |
| |
| /** |
| * Finally, try just the language. |
| **/ |
| { |
| const char* likelySubtags = NULL; |
| |
| icu::CharString tagBuffer; |
| { |
| icu::CharStringByteSink sink(&tagBuffer); |
| createTagString( |
| lang, |
| langLength, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| sink, |
| err); |
| } |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| likelySubtags = |
| findLikelySubtags( |
| tagBuffer.data(), |
| likelySubtagsBuffer, |
| sizeof(likelySubtagsBuffer), |
| err); |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| if (likelySubtags != NULL) { |
| /* Always use the language tag from the |
| maximal string, since it may be more |
| specific than the one provided. */ |
| createTagStringWithAlternates( |
| NULL, |
| 0, |
| script, |
| scriptLength, |
| region, |
| regionLength, |
| variants, |
| variantsLength, |
| likelySubtags, |
| sink, |
| err); |
| return TRUE; |
| } |
| } |
| |
| return FALSE; |
| |
| error: |
| |
| if (!U_FAILURE(*err)) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| |
| return FALSE; |
| } |
| |
| #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \ |
| int32_t count = 0; \ |
| int32_t i; \ |
| for (i = 0; i < trailingLength; i++) { \ |
| if (trailing[i] == '-' || trailing[i] == '_') { \ |
| count = 0; \ |
| if (count > 8) { \ |
| goto error; \ |
| } \ |
| } else if (trailing[i] == '@') { \ |
| break; \ |
| } else if (count > 8) { \ |
| goto error; \ |
| } else { \ |
| count++; \ |
| } \ |
| } \ |
| } UPRV_BLOCK_MACRO_END |
| |
| static UBool |
| _uloc_addLikelySubtags(const char* localeID, |
| icu::ByteSink& sink, |
| UErrorCode* err) { |
| char lang[ULOC_LANG_CAPACITY]; |
| int32_t langLength = sizeof(lang); |
| char script[ULOC_SCRIPT_CAPACITY]; |
| int32_t scriptLength = sizeof(script); |
| char region[ULOC_COUNTRY_CAPACITY]; |
| int32_t regionLength = sizeof(region); |
| const char* trailing = ""; |
| int32_t trailingLength = 0; |
| int32_t trailingIndex = 0; |
| UBool success = FALSE; |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| if (localeID == NULL) { |
| goto error; |
| } |
| |
| trailingIndex = parseTagString( |
| localeID, |
| lang, |
| &langLength, |
| script, |
| &scriptLength, |
| region, |
| ®ionLength, |
| err); |
| if(U_FAILURE(*err)) { |
| /* Overflow indicates an illegal argument error */ |
| if (*err == U_BUFFER_OVERFLOW_ERROR) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| |
| goto error; |
| } |
| |
| /* Find the length of the trailing portion. */ |
| while (_isIDSeparator(localeID[trailingIndex])) { |
| trailingIndex++; |
| } |
| trailing = &localeID[trailingIndex]; |
| trailingLength = (int32_t)uprv_strlen(trailing); |
| |
| CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); |
| |
| success = |
| createLikelySubtagsString( |
| lang, |
| langLength, |
| script, |
| scriptLength, |
| region, |
| regionLength, |
| trailing, |
| trailingLength, |
| sink, |
| err); |
| |
| if (!success) { |
| const int32_t localIDLength = (int32_t)uprv_strlen(localeID); |
| |
| /* |
| * If we get here, we need to return localeID. |
| */ |
| sink.Append(localeID, localIDLength); |
| } |
| |
| return success; |
| |
| error: |
| |
| if (!U_FAILURE(*err)) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| return FALSE; |
| } |
| |
| // Add likely subtags to the sink |
| // return true if the value in the sink is produced by a match during the lookup |
| // return false if the value in the sink is the same as input because there are |
| // no match after the lookup. |
| static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*); |
| |
| static void |
| _uloc_minimizeSubtags(const char* localeID, |
| icu::ByteSink& sink, |
| UErrorCode* err) { |
| icu::CharString maximizedTagBuffer; |
| |
| char lang[ULOC_LANG_CAPACITY]; |
| int32_t langLength = sizeof(lang); |
| char script[ULOC_SCRIPT_CAPACITY]; |
| int32_t scriptLength = sizeof(script); |
| char region[ULOC_COUNTRY_CAPACITY]; |
| int32_t regionLength = sizeof(region); |
| const char* trailing = ""; |
| int32_t trailingLength = 0; |
| int32_t trailingIndex = 0; |
| UBool successGetMax = FALSE; |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| else if (localeID == NULL) { |
| goto error; |
| } |
| |
| trailingIndex = |
| parseTagString( |
| localeID, |
| lang, |
| &langLength, |
| script, |
| &scriptLength, |
| region, |
| ®ionLength, |
| err); |
| if(U_FAILURE(*err)) { |
| |
| /* Overflow indicates an illegal argument error */ |
| if (*err == U_BUFFER_OVERFLOW_ERROR) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| |
| goto error; |
| } |
| |
| /* Find the spot where the variants or the keywords begin, if any. */ |
| while (_isIDSeparator(localeID[trailingIndex])) { |
| trailingIndex++; |
| } |
| trailing = &localeID[trailingIndex]; |
| trailingLength = (int32_t)uprv_strlen(trailing); |
| |
| CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); |
| |
| { |
| icu::CharString base; |
| { |
| icu::CharStringByteSink baseSink(&base); |
| createTagString( |
| lang, |
| langLength, |
| script, |
| scriptLength, |
| region, |
| regionLength, |
| NULL, |
| 0, |
| baseSink, |
| err); |
| } |
| |
| /** |
| * First, we need to first get the maximization |
| * from AddLikelySubtags. |
| **/ |
| { |
| icu::CharStringByteSink maxSink(&maximizedTagBuffer); |
| successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err); |
| } |
| } |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| if (!successGetMax) { |
| /** |
| * If we got here, return the locale ID parameter unchanged. |
| **/ |
| const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); |
| sink.Append(localeID, localeIDLength); |
| return; |
| } |
| |
| // In the following, the lang, script, region are referring to those in |
| // the maximizedTagBuffer, not the one in the localeID. |
| langLength = sizeof(lang); |
| scriptLength = sizeof(script); |
| regionLength = sizeof(region); |
| parseTagString( |
| maximizedTagBuffer.data(), |
| lang, |
| &langLength, |
| script, |
| &scriptLength, |
| region, |
| ®ionLength, |
| err); |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| |
| /** |
| * Start first with just the language. |
| **/ |
| { |
| icu::CharString tagBuffer; |
| { |
| icu::CharStringByteSink tagSink(&tagBuffer); |
| createLikelySubtagsString( |
| lang, |
| langLength, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| tagSink, |
| err); |
| } |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| else if (!tagBuffer.isEmpty() && |
| uprv_strnicmp( |
| maximizedTagBuffer.data(), |
| tagBuffer.data(), |
| tagBuffer.length()) == 0) { |
| |
| createTagString( |
| lang, |
| langLength, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| trailing, |
| trailingLength, |
| sink, |
| err); |
| return; |
| } |
| } |
| |
| /** |
| * Next, try the language and region. |
| **/ |
| if (regionLength > 0) { |
| |
| icu::CharString tagBuffer; |
| { |
| icu::CharStringByteSink tagSink(&tagBuffer); |
| createLikelySubtagsString( |
| lang, |
| langLength, |
| NULL, |
| 0, |
| region, |
| regionLength, |
| NULL, |
| 0, |
| tagSink, |
| err); |
| } |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| else if (!tagBuffer.isEmpty() && |
| uprv_strnicmp( |
| maximizedTagBuffer.data(), |
| tagBuffer.data(), |
| tagBuffer.length()) == 0) { |
| |
| createTagString( |
| lang, |
| langLength, |
| NULL, |
| 0, |
| region, |
| regionLength, |
| trailing, |
| trailingLength, |
| sink, |
| err); |
| return; |
| } |
| } |
| |
| /** |
| * Finally, try the language and script. This is our last chance, |
| * since trying with all three subtags would only yield the |
| * maximal version that we already have. |
| **/ |
| if (scriptLength > 0) { |
| icu::CharString tagBuffer; |
| { |
| icu::CharStringByteSink tagSink(&tagBuffer); |
| createLikelySubtagsString( |
| lang, |
| langLength, |
| script, |
| scriptLength, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| tagSink, |
| err); |
| } |
| |
| if(U_FAILURE(*err)) { |
| goto error; |
| } |
| else if (!tagBuffer.isEmpty() && |
| uprv_strnicmp( |
| maximizedTagBuffer.data(), |
| tagBuffer.data(), |
| tagBuffer.length()) == 0) { |
| |
| createTagString( |
| lang, |
| langLength, |
| script, |
| scriptLength, |
| NULL, |
| 0, |
| trailing, |
| trailingLength, |
| sink, |
| err); |
| return; |
| } |
| } |
| |
| { |
| /** |
| * If we got here, return the max + trail. |
| **/ |
| createTagString( |
| lang, |
| langLength, |
| script, |
| scriptLength, |
| region, |
| regionLength, |
| trailing, |
| trailingLength, |
| sink, |
| err); |
| return; |
| } |
| |
| error: |
| |
| if (!U_FAILURE(*err)) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| } |
| |
| static UBool |
| do_canonicalize(const char* localeID, |
| char* buffer, |
| int32_t bufferCapacity, |
| UErrorCode* err) |
| { |
| uloc_canonicalize( |
| localeID, |
| buffer, |
| bufferCapacity, |
| err); |
| |
| if (*err == U_STRING_NOT_TERMINATED_WARNING || |
| *err == U_BUFFER_OVERFLOW_ERROR) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| |
| return FALSE; |
| } |
| else if (U_FAILURE(*err)) { |
| |
| return FALSE; |
| } |
| else { |
| return TRUE; |
| } |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| uloc_addLikelySubtags(const char* localeID, |
| char* maximizedLocaleID, |
| int32_t maximizedLocaleIDCapacity, |
| UErrorCode* status) { |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| icu::CheckedArrayByteSink sink( |
| maximizedLocaleID, maximizedLocaleIDCapacity); |
| |
| ulocimp_addLikelySubtags(localeID, sink, status); |
| int32_t reslen = sink.NumberOfBytesAppended(); |
| |
| if (U_FAILURE(*status)) { |
| return sink.Overflowed() ? reslen : -1; |
| } |
| |
| if (sink.Overflowed()) { |
| *status = U_BUFFER_OVERFLOW_ERROR; |
| } else { |
| u_terminateChars( |
| maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status); |
| } |
| |
| return reslen; |
| } |
| |
| static UBool |
| _ulocimp_addLikelySubtags(const char* localeID, |
| icu::ByteSink& sink, |
| UErrorCode* status) { |
| char localeBuffer[ULOC_FULLNAME_CAPACITY]; |
| |
| if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) { |
| return _uloc_addLikelySubtags(localeBuffer, sink, status); |
| } |
| return FALSE; |
| } |
| |
| U_CAPI void U_EXPORT2 |
| ulocimp_addLikelySubtags(const char* localeID, |
| icu::ByteSink& sink, |
| UErrorCode* status) { |
| _ulocimp_addLikelySubtags(localeID, sink, status); |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| uloc_minimizeSubtags(const char* localeID, |
| char* minimizedLocaleID, |
| int32_t minimizedLocaleIDCapacity, |
| UErrorCode* status) { |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| icu::CheckedArrayByteSink sink( |
| minimizedLocaleID, minimizedLocaleIDCapacity); |
| |
| ulocimp_minimizeSubtags(localeID, sink, status); |
| int32_t reslen = sink.NumberOfBytesAppended(); |
| |
| if (U_FAILURE(*status)) { |
| return sink.Overflowed() ? reslen : -1; |
| } |
| |
| if (sink.Overflowed()) { |
| *status = U_BUFFER_OVERFLOW_ERROR; |
| } else { |
| u_terminateChars( |
| minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status); |
| } |
| |
| return reslen; |
| } |
| |
| U_CAPI void U_EXPORT2 |
| ulocimp_minimizeSubtags(const char* localeID, |
| icu::ByteSink& sink, |
| UErrorCode* status) { |
| char localeBuffer[ULOC_FULLNAME_CAPACITY]; |
| |
| if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) { |
| _uloc_minimizeSubtags(localeBuffer, sink, status); |
| } |
| } |
| |
| // Pairs of (language subtag, + or -) for finding out fast if common languages |
| // are LTR (minus) or RTL (plus). |
| static const char LANG_DIR_STRING[] = |
| "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; |
| |
| // Implemented here because this calls ulocimp_addLikelySubtags(). |
| U_CAPI UBool U_EXPORT2 |
| uloc_isRightToLeft(const char *locale) { |
| UErrorCode errorCode = U_ZERO_ERROR; |
| char script[8]; |
| int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode); |
| if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || |
| scriptLength == 0) { |
| // Fastpath: We know the likely scripts and their writing direction |
| // for some common languages. |
| errorCode = U_ZERO_ERROR; |
| char lang[8]; |
| int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); |
| if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { |
| return FALSE; |
| } |
| if (langLength > 0) { |
| const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); |
| if (langPtr != NULL) { |
| switch (langPtr[langLength]) { |
| case '-': return FALSE; |
| case '+': return TRUE; |
| default: break; // partial match of a longer code |
| } |
| } |
| } |
| // Otherwise, find the likely script. |
| errorCode = U_ZERO_ERROR; |
| icu::CharString likely; |
| { |
| icu::CharStringByteSink sink(&likely); |
| ulocimp_addLikelySubtags(locale, sink, &errorCode); |
| } |
| if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { |
| return FALSE; |
| } |
| scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode); |
| if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || |
| scriptLength == 0) { |
| return FALSE; |
| } |
| } |
| UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); |
| return uscript_isRightToLeft(scriptCode); |
| } |
| |
| U_NAMESPACE_BEGIN |
| |
| UBool |
| Locale::isRightToLeft() const { |
| return uloc_isRightToLeft(getBaseName()); |
| } |
| |
| U_NAMESPACE_END |
| |
| // The following must at least allow for rg key value (6) plus terminator (1). |
| #define ULOC_RG_BUFLEN 8 |
| |
| U_CAPI int32_t U_EXPORT2 |
| ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, |
| char *region, int32_t regionCapacity, UErrorCode* status) { |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| char rgBuf[ULOC_RG_BUFLEN]; |
| UErrorCode rgStatus = U_ZERO_ERROR; |
| |
| // First check for rg keyword value |
| int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); |
| if (U_FAILURE(rgStatus) || rgLen != 6) { |
| rgLen = 0; |
| } else { |
| // rgBuf guaranteed to be zero terminated here, with text len 6 |
| char *rgPtr = rgBuf; |
| for (; *rgPtr!= 0; rgPtr++) { |
| *rgPtr = uprv_toupper(*rgPtr); |
| } |
| rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0; |
| } |
| |
| if (rgLen == 0) { |
| // No valid rg keyword value, try for unicode_region_subtag |
| rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status); |
| if (U_FAILURE(*status)) { |
| rgLen = 0; |
| } else if (rgLen == 0 && inferRegion) { |
| // no unicode_region_subtag but inferRegion TRUE, try likely subtags |
| rgStatus = U_ZERO_ERROR; |
| icu::CharString locBuf; |
| { |
| icu::CharStringByteSink sink(&locBuf); |
| ulocimp_addLikelySubtags(localeID, sink, &rgStatus); |
| } |
| if (U_SUCCESS(rgStatus)) { |
| rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status); |
| if (U_FAILURE(*status)) { |
| rgLen = 0; |
| } |
| } |
| } |
| } |
| |
| rgBuf[rgLen] = 0; |
| uprv_strncpy(region, rgBuf, regionCapacity); |
| return u_terminateChars(region, regionCapacity, rgLen, status); |
| } |
| |