ICU-11058 support nested collation rule imports; make the importer stateless
X-SVN-Rev: 36157
diff --git a/icu4c/source/data/coll/es.txt b/icu4c/source/data/coll/es.txt
index 7d47f7a..4e2cf9c 100644
--- a/icu4c/source/data/coll/es.txt
+++ b/icu4c/source/data/coll/es.txt
@@ -14,117 +14,7 @@
collations{
search{
Sequence{
- "[normalization on][suppressContractions [เ-ไ ເ-ໄ ꪵ ꪶ ꪹ ꪻ ꪼ]]"
- "&'='<'≠'"
- "&ا"
- "<<<ﺎ<<<ﺍ"
- "<<آ"
- "<<<ﺂ<<<ﺁ"
- "<<أ"
- "<<<ﺄ<<<ﺃ"
- "<<إ"
- "<<<ﺈ<<<ﺇ"
- "&و"
- "<<<ۥ"
- "<<<ﻮ<<<ﻭ"
- "<<ؤ"
- "<<<ﺆ<<<ﺅ"
- "&ي"
- "<<<ۦ"
- "<<<ﻳ<<<ﻴ<<<ﻲ<<<ﻱ"
- "<<ئ"
- "<<<ﺋ<<<ﺌ<<<ﺊ<<<ﺉ"
- "<<ى"
- "<<<ﯨ<<<ﯩ"
- "<<<ﻰ<<<ﻯ"
- "&ه"
- "<<<ﻫ<<<ﻬ<<<ﻪ<<<ﻩ"
- "<<ة"
- "<<<ﺔ<<<ﺓ"
- "&[last primary ignorable]<<׳"
- "<<״"
- "<<ـ"
- "<<ฺ"
- "&ᄀ"
- "=ᆨ"
- "&ᄀᄀ"
- "=ᄁ=ᆩ"
- "&ᄀᄉ"
- "=ᆪ"
- "&ᄂ"
- "=ᆫ"
- "&ᄂᄌ"
- "=ᆬ"
- "&ᄂᄒ"
- "=ᆭ"
- "&ᄃ"
- "=ᆮ"
- "&ᄃᄃ"
- "=ᄄ"
- "&ᄅ"
- "=ᆯ"
- "&ᄅᄀ"
- "=ᆰ"
- "&ᄅᄆ"
- "=ᆱ"
- "&ᄅᄇ"
- "=ᆲ"
- "&ᄅᄉ"
- "=ᆳ"
- "&ᄅᄐ"
- "=ᆴ"
- "&ᄅᄑ"
- "=ᆵ"
- "&ᄅᄒ"
- "=ᆶ"
- "&ᄆ"
- "=ᆷ"
- "&ᄇ"
- "=ᆸ"
- "&ᄇᄇ"
- "=ᄈ"
- "&ᄇᄉ"
- "=ᆹ"
- "&ᄉ"
- "=ᆺ"
- "&ᄉᄉ"
- "=ᄊ=ᆻ"
- "&ᄋ"
- "=ᆼ"
- "&ᄌ"
- "=ᆽ"
- "&ᄌᄌ"
- "=ᄍ"
- "&ᄎ"
- "=ᆾ"
- "&ᄏ"
- "=ᆿ"
- "&ᄐ"
- "=ᇀ"
- "&ᄑ"
- "=ᇁ"
- "&ᄒ"
- "=ᇂ"
- "&ᅡᅵ"
- "=ᅢ"
- "&ᅣᅵ"
- "=ᅤ"
- "&ᅥᅵ"
- "=ᅦ"
- "&ᅧᅵ"
- "=ᅨ"
- "&ᅩᅡ"
- "=ᅪ"
- "&ᅩᅡᅵ"
- "=ᅫ"
- "&ᅩᅵ"
- "=ᅬ"
- "&ᅮᅴ"
- "=ᅯ"
- "&ᅮᅴᅵ"
- "=ᅰ"
- "&ᅮᅵ"
- "=ᅱ"
+ "[import und-u-co-search]"
"&N<ñ<<<Ñ"
}
Version{"25"}
diff --git a/icu4c/source/i18n/collationbuilder.cpp b/icu4c/source/i18n/collationbuilder.cpp
index acf5738..d0ce153 100644
--- a/icu4c/source/i18n/collationbuilder.cpp
+++ b/icu4c/source/i18n/collationbuilder.cpp
@@ -54,26 +54,22 @@
class BundleImporter : public CollationRuleParser::Importer {
public:
- BundleImporter() : rules(NULL) {}
+ BundleImporter() {}
virtual ~BundleImporter();
- virtual const UnicodeString *getRules(
+ virtual void getRules(
const char *localeID, const char *collationType,
+ UnicodeString &rules,
const char *&errorReason, UErrorCode &errorCode);
-
-private:
- UnicodeString *rules;
};
-BundleImporter::~BundleImporter() {
- delete rules;
-}
+BundleImporter::~BundleImporter() {}
-const UnicodeString *
+void
BundleImporter::getRules(
const char *localeID, const char *collationType,
+ UnicodeString &rules,
const char *& /*errorReason*/, UErrorCode &errorCode) {
- delete rules;
- return rules = CollationLoader::loadRules(localeID, collationType, errorCode);
+ CollationLoader::loadRules(localeID, collationType, rules, errorCode);
}
} // namespace
diff --git a/icu4c/source/i18n/collationruleparser.cpp b/icu4c/source/i18n/collationruleparser.cpp
index 62dcb64..500cf4d 100644
--- a/icu4c/source/i18n/collationruleparser.cpp
+++ b/icu4c/source/i18n/collationruleparser.cpp
@@ -638,10 +638,9 @@
if(importer == NULL) {
setParseError("[import langTag] is not supported", errorCode);
} else {
- const UnicodeString *importedRules =
- importer->getRules(baseID,
- length > 0 ? collationType : "standard",
- errorReason, errorCode);
+ UnicodeString importedRules;
+ importer->getRules(baseID, length > 0 ? collationType : "standard",
+ importedRules, errorReason, errorCode);
if(U_FAILURE(errorCode)) {
if(errorReason == NULL) {
errorReason = "[import langTag] failed";
@@ -651,7 +650,7 @@
}
const UnicodeString *outerRules = rules;
int32_t outerRuleIndex = ruleIndex;
- parse(*importedRules, errorCode);
+ parse(importedRules, errorCode);
if(U_FAILURE(errorCode)) {
if(parseError != NULL) {
parseError->offset = outerRuleIndex;
diff --git a/icu4c/source/i18n/collationruleparser.h b/icu4c/source/i18n/collationruleparser.h
index 8df82bc..3c2b22c 100644
--- a/icu4c/source/i18n/collationruleparser.h
+++ b/icu4c/source/i18n/collationruleparser.h
@@ -93,8 +93,9 @@
class U_I18N_API Importer : public UObject {
public:
virtual ~Importer();
- virtual const UnicodeString *getRules(
+ virtual void getRules(
const char *localeID, const char *collationType,
+ UnicodeString &rules,
const char *&errorReason, UErrorCode &errorCode) = 0;
};
diff --git a/icu4c/source/i18n/ucol_imp.h b/icu4c/source/i18n/ucol_imp.h
index 39d1b28..dd1c85a 100644
--- a/icu4c/source/i18n/ucol_imp.h
+++ b/icu4c/source/i18n/ucol_imp.h
@@ -61,8 +61,8 @@
class CollationLoader {
public:
static void appendRootRules(UnicodeString &s);
- static UnicodeString *loadRules(const char *localeID, const char *collationType,
- UErrorCode &errorCode);
+ static void loadRules(const char *localeID, const char *collationType,
+ UnicodeString &rules, UErrorCode &errorCode);
static const CollationTailoring *loadTailoring(const Locale &locale, Locale &validLocale,
UErrorCode &errorCode);
diff --git a/icu4c/source/i18n/ucol_res.cpp b/icu4c/source/i18n/ucol_res.cpp
index a01c121..d7fd26d 100644
--- a/icu4c/source/i18n/ucol_res.cpp
+++ b/icu4c/source/i18n/ucol_res.cpp
@@ -100,16 +100,17 @@
}
}
-UnicodeString *
-CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return NULL; }
+void
+CollationLoader::loadRules(const char *localeID, const char *collationType,
+ UnicodeString &rules, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
U_ASSERT(collationType != NULL && *collationType != 0);
// Copy the type for lowercasing.
char type[16];
int32_t typeLength = uprv_strlen(collationType);
if(typeLength >= LENGTHOF(type)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
+ return;
}
uprv_memcpy(type, collationType, typeLength + 1);
T_CString_toLowerCase(type);
@@ -121,15 +122,13 @@
ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
int32_t length;
const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode);
- if(U_FAILURE(errorCode)) { return NULL; }
+ if(U_FAILURE(errorCode)) { return; }
// No string pointer aliasing so that we need not hold onto the resource bundle.
- UnicodeString *rules = new UnicodeString(s, length);
- if(rules == NULL) {
+ rules.setTo(s, length);
+ if(rules.isBogus()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
}
- return rules;
}
const CollationTailoring *
diff --git a/icu4c/source/tools/genrb/parse.cpp b/icu4c/source/tools/genrb/parse.cpp
index cb4db51..26573d4 100644
--- a/icu4c/source/tools/genrb/parse.cpp
+++ b/icu4c/source/tools/genrb/parse.cpp
@@ -673,21 +673,22 @@
public:
GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
virtual ~GenrbImporter();
- virtual const UnicodeString *getRules(
+ virtual void getRules(
const char *localeID, const char *collationType,
+ UnicodeString &rules,
const char *&errorReason, UErrorCode &errorCode);
private:
const char *inputDir;
const char *outputDir;
- UnicodeString rules;
};
GenrbImporter::~GenrbImporter() {}
-const UnicodeString *
+void
GenrbImporter::getRules(
const char *localeID, const char *collationType,
+ UnicodeString &rules,
const char *& /*errorReason*/, UErrorCode &errorCode) {
struct SRBRoot *data = NULL;
UCHARBUF *ucbuf = NULL;
@@ -718,11 +719,11 @@
if (U_FAILURE(errorCode)) {
- return NULL;
+ return;
}
if(filename==NULL){
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
+ return;
}else{
filelen = (int32_t)uprv_strlen(filename);
}
@@ -810,6 +811,9 @@
/* Parse the data into an SRBRoot */
data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ goto finish;
+ }
root = data->fRoot;
collations = resLookup(root, "collations");
@@ -818,7 +822,8 @@
if (collation != NULL) {
sequence = resLookup(collation, "Sequence");
if (sequence != NULL) {
- rules.setTo(FALSE, sequence->u.fString.fChars, sequence->u.fString.fLength);
+ // No string pointer aliasing so that we need not hold onto the resource bundle.
+ rules.setTo(sequence->u.fString.fChars, sequence->u.fString.fLength);
}
}
}
@@ -835,8 +840,6 @@
if(ucbuf) {
ucbuf_close(ucbuf);
}
-
- return &rules;
}
// Quick-and-dirty escaping function.