ICU-21236 Refactor UniqueCharStrings / CharStringMap
Move them UniqueCharStrings and CharStringMap from
loclikelysubtags.{h,cpp} to separate header files
so so we can reuse them to implement
https://github.com/unicode-org/icu/pull/1254
diff --git a/icu4c/source/common/charstrmap.h b/icu4c/source/common/charstrmap.h
new file mode 100644
index 0000000..3320a46
--- /dev/null
+++ b/icu4c/source/common/charstrmap.h
@@ -0,0 +1,55 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// charstrmap.h
+// created: 2020sep01 Frank Yung-Fong Tang
+
+#ifndef __CHARSTRMAP_H__
+#define __CHARSTRMAP_H__
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Map of const char * keys & values.
+ * Stores pointers as is: Does not own/copy/adopt/release strings.
+ */
+class CharStringMap final : public UMemory {
+public:
+ /** Constructs an unusable non-map. */
+ CharStringMap() : map(nullptr) {}
+ CharStringMap(int32_t size, UErrorCode &errorCode) {
+ map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
+ size, &errorCode);
+ }
+ CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
+ other.map = nullptr;
+ }
+ CharStringMap(const CharStringMap &other) = delete;
+ ~CharStringMap() {
+ uhash_close(map);
+ }
+
+ CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
+ map = other.map;
+ other.map = nullptr;
+ return *this;
+ }
+ CharStringMap &operator=(const CharStringMap &other) = delete;
+
+ const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
+ void put(const char *key, const char *value, UErrorCode &errorCode) {
+ uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
+ }
+
+private:
+ UHashtable *map;
+};
+
+U_NAMESPACE_END
+
+#endif // __CHARSTRMAP_H__
diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj
index c9e4f98..d7f668e 100644
--- a/icu4c/source/common/common.vcxproj
+++ b/icu4c/source/common/common.vcxproj
@@ -341,7 +341,9 @@
<ClInclude Include="localeprioritylist.h" />
<ClInclude Include="locbased.h" />
<ClInclude Include="locdistance.h" />
+ <ClInclude Include="charstrmap.h" />
<ClInclude Include="loclikelysubtags.h" />
+ <ClInclude Include="uniquecharstr.h" />
<ClInclude Include="locutil.h" />
<ClInclude Include="lsr.h" />
<ClInclude Include="sharedobject.h" />
diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters
index 5fabc6f..f1ba901 100644
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -840,6 +840,12 @@
<ClInclude Include="locdistance.h">
<Filter>locales & resources</Filter>
</ClInclude>
+ <ClInclude Include="charstrmap.h">
+ <Filter>locales & resources</Filter>
+ </ClInclude>
+ <ClInclude Include="uniquecharstr.h">
+ <Filter>locales & resources</Filter>
+ </ClInclude>
<ClInclude Include="loclikelysubtags.h">
<Filter>locales & resources</Filter>
</ClInclude>
diff --git a/icu4c/source/common/common_uwp.vcxproj b/icu4c/source/common/common_uwp.vcxproj
index 65672e1..a579172 100644
--- a/icu4c/source/common/common_uwp.vcxproj
+++ b/icu4c/source/common/common_uwp.vcxproj
@@ -476,6 +476,8 @@
<ClInclude Include="localeprioritylist.h" />
<ClInclude Include="locbased.h" />
<ClInclude Include="locdistance.h" />
+ <ClInclude Include="charstrmap.h" />
+ <ClInclude Include="uniquecharstr.h" />
<ClInclude Include="loclikelysubtags.h" />
<ClInclude Include="locutil.h" />
<ClInclude Include="lsr.h" />
@@ -528,4 +530,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" Condition="'$(SkipUWP)'!='true'" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/icu4c/source/common/loclikelysubtags.cpp b/icu4c/source/common/loclikelysubtags.cpp
index 1fbf1a1..03ab858 100644
--- a/icu4c/source/common/loclikelysubtags.cpp
+++ b/icu4c/source/common/loclikelysubtags.cpp
@@ -20,6 +20,7 @@
#include "uhash.h"
#include "uinvchar.h"
#include "umutex.h"
+#include "uniquecharstr.h"
#include "uresdata.h"
#include "uresimp.h"
@@ -31,71 +32,6 @@
constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
-/**
- * Stores NUL-terminated strings with duplicate elimination.
- * Checks for unique UTF-16 string pointers and converts to invariant characters.
- */
-class UniqueCharStrings {
-public:
- UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
- uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
- if (U_FAILURE(errorCode)) { return; }
- strings = new CharString();
- if (strings == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- ~UniqueCharStrings() {
- uhash_close(&map);
- delete strings;
- }
-
- /** Returns/orphans the CharString that contains all strings. */
- CharString *orphanCharStrings() {
- CharString *result = strings;
- strings = nullptr;
- return result;
- }
-
- /** Adds a string and returns a unique number for it. */
- int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return 0; }
- if (isFrozen) {
- errorCode = U_NO_WRITE_PERMISSION;
- return 0;
- }
- // The string points into the resource bundle.
- const char16_t *p = s.getBuffer();
- int32_t oldIndex = uhash_geti(&map, p);
- if (oldIndex != 0) { // found duplicate
- return oldIndex;
- }
- // Explicit NUL terminator for the previous string.
- // The strings object is also terminated with one implicit NUL.
- strings->append(0, errorCode);
- int32_t newIndex = strings->length();
- strings->appendInvariantChars(s, errorCode);
- uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
- return newIndex;
- }
-
- void freeze() { isFrozen = true; }
-
- /**
- * Returns a string pointer for its unique number, if this object is frozen.
- * Otherwise nullptr.
- */
- const char *get(int32_t i) const {
- U_ASSERT(isFrozen);
- return isFrozen && i > 0 ? strings->data() + i : nullptr;
- }
-
-private:
- UHashtable map;
- CharString *strings;
- bool isFrozen = false;
-};
-
} // namespace
LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
diff --git a/icu4c/source/common/loclikelysubtags.h b/icu4c/source/common/loclikelysubtags.h
index 90ddfff..14a01a5 100644
--- a/icu4c/source/common/loclikelysubtags.h
+++ b/icu4c/source/common/loclikelysubtags.h
@@ -1,5 +1,5 @@
// © 2019 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
+// License & terms of use: http://www.unicode.org/copyright.html
// loclikelysubtags.h
// created: 2019may08 Markus W. Scherer
@@ -13,49 +13,13 @@
#include "unicode/locid.h"
#include "unicode/uobject.h"
#include "unicode/ures.h"
+#include "charstrmap.h"
#include "lsr.h"
-#include "uhash.h"
U_NAMESPACE_BEGIN
struct XLikelySubtagsData;
-/**
- * Map of const char * keys & values.
- * Stores pointers as is: Does not own/copy/adopt/release strings.
- */
-class CharStringMap final : public UMemory {
-public:
- /** Constructs an unusable non-map. */
- CharStringMap() : map(nullptr) {}
- CharStringMap(int32_t size, UErrorCode &errorCode) {
- map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
- size, &errorCode);
- }
- CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
- other.map = nullptr;
- }
- CharStringMap(const CharStringMap &other) = delete;
- ~CharStringMap() {
- uhash_close(map);
- }
-
- CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
- map = other.map;
- other.map = nullptr;
- return *this;
- }
- CharStringMap &operator=(const CharStringMap &other) = delete;
-
- const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
- void put(const char *key, const char *value, UErrorCode &errorCode) {
- uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
- }
-
-private:
- UHashtable *map;
-};
-
struct LocaleDistanceData {
LocaleDistanceData() = default;
LocaleDistanceData(LocaleDistanceData &&data);
diff --git a/icu4c/source/common/uniquecharstr.h b/icu4c/source/common/uniquecharstr.h
new file mode 100644
index 0000000..7e871dc
--- /dev/null
+++ b/icu4c/source/common/uniquecharstr.h
@@ -0,0 +1,82 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// uniquecharstr.h
+// created: 2020sep01 Frank Yung-Fong Tang
+
+#ifndef __UNIQUECHARSTR_H__
+#define __UNIQUECHARSTR_H__
+
+#include "charstr.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Stores NUL-terminated strings with duplicate elimination.
+ * Checks for unique UTF-16 string pointers and converts to invariant characters.
+ */
+class UniqueCharStrings {
+public:
+ UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
+ uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ strings = new CharString();
+ if (strings == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ ~UniqueCharStrings() {
+ uhash_close(&map);
+ delete strings;
+ }
+
+ /** Returns/orphans the CharString that contains all strings. */
+ CharString *orphanCharStrings() {
+ CharString *result = strings;
+ strings = nullptr;
+ return result;
+ }
+
+ /** Adds a string and returns a unique number for it. */
+ int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return 0; }
+ if (isFrozen) {
+ errorCode = U_NO_WRITE_PERMISSION;
+ return 0;
+ }
+ // The string points into the resource bundle.
+ const char16_t *p = s.getBuffer();
+ int32_t oldIndex = uhash_geti(&map, p);
+ if (oldIndex != 0) { // found duplicate
+ return oldIndex;
+ }
+ // Explicit NUL terminator for the previous string.
+ // The strings object is also terminated with one implicit NUL.
+ strings->append(0, errorCode);
+ int32_t newIndex = strings->length();
+ strings->appendInvariantChars(s, errorCode);
+ uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
+ return newIndex;
+ }
+
+ void freeze() { isFrozen = true; }
+
+ /**
+ * Returns a string pointer for its unique number, if this object is frozen.
+ * Otherwise nullptr.
+ */
+ const char *get(int32_t i) const {
+ U_ASSERT(isFrozen);
+ return isFrozen && i > 0 ? strings->data() + i : nullptr;
+ }
+
+private:
+ UHashtable map;
+ CharString *strings;
+ bool isFrozen = false;
+};
+
+U_NAMESPACE_END
+
+#endif // __UNIQUECHARSTR_H__