ICU-21349 Adjust C++ `MeasureUnitImpl::serialize` to be as same as the Java one
See #1496
diff --git a/icu4c/source/common/charstr.cpp b/icu4c/source/common/charstr.cpp
index 318a185..c4710d6 100644
--- a/icu4c/source/common/charstr.cpp
+++ b/icu4c/source/common/charstr.cpp
@@ -141,6 +141,38 @@
return *this;
}
+CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
+ if (number < 0) {
+ this->append('-', status);
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ }
+
+ if (number == 0) {
+ this->append('0', status);
+ return *this;
+ }
+
+ int32_t numLen = 0;
+ while (number != 0) {
+ int32_t residue = number % 10;
+ number /= 10;
+ this->append(std::abs(residue) + '0', status);
+ numLen++;
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ }
+
+ int32_t start = this->length() - numLen, end = this->length() - 1;
+ while(start < end) {
+ std::swap(this->data()[start++], this->data()[end--]);
+ }
+
+ return *this;
+}
+
char *CharString::getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,
diff --git a/icu4c/source/common/charstr.h b/icu4c/source/common/charstr.h
index 6619faa..175acd1 100644
--- a/icu4c/source/common/charstr.h
+++ b/icu4c/source/common/charstr.h
@@ -127,6 +127,9 @@
return append(s.data(), s.length(), errorCode);
}
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+
+ CharString &appendNumber(int32_t number, UErrorCode &status);
+
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
diff --git a/icu4c/source/common/util.h b/icu4c/source/common/util.h
index 9c3b76d..b5fac38 100644
--- a/icu4c/source/common/util.h
+++ b/icu4c/source/common/util.h
@@ -13,10 +13,10 @@
#ifndef ICU_UTIL_H
#define ICU_UTIL_H
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
+#include "charstr.h"
#include "unicode/unistr.h"
-
+#include "unicode/uobject.h"
+#include "unicode/utypes.h"
//--------------------------------------------------------------------
// class ICU_Utility
// i18n utility functions, scoped into the class ICU_Utility.
diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp
index 1edc772..03aefcd 100644
--- a/icu4c/source/i18n/measunit_extra.cpp
+++ b/icu4c/source/i18n/measunit_extra.cpp
@@ -30,6 +30,7 @@
#include "unicode/ures.h"
#include "unicode/ustringtrie.h"
#include "uresimp.h"
+#include "util.h"
#include <cstdlib>
U_NAMESPACE_BEGIN
@@ -628,107 +629,6 @@
return (*realLeft)->compareTo(**realRight);
}
-/**
- * Generate the identifier string for a single unit in place.
- *
- * Does not support the dimensionless SingleUnitImpl: calling serializeSingle
- * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
- *
- * @param first If singleUnit is part of a compound unit, and not its first
- * single unit, set this to false. Otherwise: set to true.
- */
-void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
- if (first && singleUnit.dimensionality < 0) {
- // Essentially the "unary per". For compound units with a numerator, the
- // caller takes care of the "binary per".
- output.append("per-", status);
- }
-
- if (singleUnit.isDimensionless()) {
- status = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- int8_t posPower = std::abs(singleUnit.dimensionality);
- if (posPower == 0) {
- status = U_INTERNAL_PROGRAM_ERROR;
- } else if (posPower == 1) {
- // no-op
- } else if (posPower == 2) {
- output.append("square-", status);
- } else if (posPower == 3) {
- output.append("cubic-", status);
- } else if (posPower < 10) {
- output.append("pow", status);
- output.append(posPower + '0', status);
- output.append('-', status);
- } else if (posPower <= 15) {
- output.append("pow1", status);
- output.append('0' + (posPower % 10), status);
- output.append('-', status);
- } else {
- status = kUnitIdentifierSyntaxError;
- }
- if (U_FAILURE(status)) {
- return;
- }
-
- if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) {
- for (const auto& siPrefixInfo : gSIPrefixStrings) {
- if (siPrefixInfo.value == singleUnit.siPrefix) {
- output.append(siPrefixInfo.string, status);
- break;
- }
- }
- }
- if (U_FAILURE(status)) {
- return;
- }
-
- output.append(singleUnit.getSimpleUnitID(), status);
-}
-
-/**
- * Normalize a MeasureUnitImpl and generate the identifier string in place.
- */
-void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
- U_ASSERT(impl.identifier.isEmpty());
- if (impl.singleUnits.length() == 0) {
- // Dimensionless, constructed by the default constructor: no appending
- // to impl.identifier, we wish it to contain the zero-length string.
- return;
- }
- if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
- // Note: don't sort a MIXED unit
- uprv_sortArray(impl.singleUnits.getAlias(), impl.singleUnits.length(),
- sizeof(impl.singleUnits[0]), compareSingleUnits, nullptr, false, &status);
- if (U_FAILURE(status)) {
- return;
- }
- }
- serializeSingle(*impl.singleUnits[0], true, impl.identifier, status);
- if (impl.singleUnits.length() == 1) {
- return;
- }
- for (int32_t i = 1; i < impl.singleUnits.length(); i++) {
- const SingleUnitImpl &prev = *impl.singleUnits[i - 1];
- const SingleUnitImpl &curr = *impl.singleUnits[i];
- if (impl.complexity == UMEASURE_UNIT_MIXED) {
- impl.identifier.append("-and-", status);
- serializeSingle(curr, true, impl.identifier, status);
- } else {
- if (prev.dimensionality > 0 && curr.dimensionality < 0) {
- impl.identifier.append("-per-", status);
- } else {
- impl.identifier.append('-', status);
- }
- serializeSingle(curr, false, impl.identifier, status);
- }
- }
-}
-
} // namespace
@@ -758,6 +658,42 @@
return gSimpleUnits[index];
}
+void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const {
+ int32_t absPower = std::abs(this->dimensionality);
+
+ U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units";
+
+ if (absPower == 1) {
+ // no-op
+ } else if (absPower == 2) {
+ result.append(StringPiece("square-"), status);
+ } else if (absPower == 3) {
+ result.append(StringPiece("cubic-"), status);
+ } else if (absPower <= 15) {
+ result.append(StringPiece("pow"), status);
+ result.appendNumber(absPower, status);
+ result.append(StringPiece("-"), status);
+ } else {
+ status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error
+ return;
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (this->siPrefix != UMEASURE_SI_PREFIX_ONE) {
+ for (const auto &siPrefixInfo : gSIPrefixStrings) {
+ if (siPrefixInfo.value == this->siPrefix) {
+ result.append(siPrefixInfo.string, status);
+ break;
+ }
+ }
+ }
+
+ result.append(StringPiece(this->getSimpleUnitID()), status);
+}
+
MeasureUnitImpl::MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) {
*this = other.copy(status);
}
@@ -853,8 +789,69 @@
return result;
}
+/**
+ * Normalize a MeasureUnitImpl and generate the identifier string in place.
+ */
+void MeasureUnitImpl::serialize(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (this->singleUnits.length() == 0) {
+ // Dimensionless, constructed by the default constructor.
+ return;
+ }
+
+ if (this->complexity == UMEASURE_UNIT_COMPOUND) {
+ // Note: don't sort a MIXED unit
+ uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(),
+ sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ }
+
+ CharString result;
+ bool beforePer = true;
+ bool firstTimeNegativeDimension = false;
+ for (int32_t i = 0; i < this->singleUnits.length(); i++) {
+ if (beforePer && (*this->singleUnits[i]).dimensionality < 0) {
+ beforePer = false;
+ firstTimeNegativeDimension = true;
+ } else if ((*this->singleUnits[i]).dimensionality < 0) {
+ firstTimeNegativeDimension = false;
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
+ if (result.length() != 0) {
+ result.append(StringPiece("-and-"), status);
+ }
+ } else {
+ if (firstTimeNegativeDimension) {
+ if (result.length() == 0) {
+ result.append(StringPiece("per-"), status);
+ } else {
+ result.append(StringPiece("-per-"), status);
+ }
+ } else {
+ if (result.length() != 0) {
+ result.append(StringPiece("-"), status);
+ }
+ }
+ }
+
+ this->singleUnits[i]->appendNeutralIdentifier(result, status);
+ }
+
+ this->identifier = CharString(result, status);
+}
+
MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
- serialize(*this, status);
+ this->serialize(status);
return MeasureUnit(std::move(*this));
}
diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h
index a4f97cf..ef027ec 100644
--- a/icu4c/source/i18n/measunit_impl.h
+++ b/icu4c/source/i18n/measunit_impl.h
@@ -43,6 +43,12 @@
const char *getSimpleUnitID() const;
/**
+ * Generates and append a neutral identifier string for a single unit which means we do not include
+ * the dimension signal.
+ */
+ void appendNeutralIdentifier(CharString &result, UErrorCode &status) const;
+
+ /**
* Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
* sorting and coalescing.
*
@@ -133,7 +139,8 @@
* Internal representation of measurement units. Capable of representing all complexities of units,
* including mixed and compound units.
*/
-struct U_I18N_API MeasureUnitImpl : public UMemory {
+class U_I18N_API MeasureUnitImpl : public UMemory {
+ public:
MeasureUnitImpl() = default;
MeasureUnitImpl(MeasureUnitImpl &&other) = default;
MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status);
@@ -233,6 +240,12 @@
* The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed.
*/
CharString identifier;
+
+ private:
+ /**
+ * Normalizes a MeasureUnitImpl and generate the identifier string in place.
+ */
+ void serialize(UErrorCode &status);
};
U_NAMESPACE_END
diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h
index 8db6527..6761f84 100644
--- a/icu4c/source/i18n/unicode/measunit.h
+++ b/icu4c/source/i18n/unicode/measunit.h
@@ -30,7 +30,7 @@
U_NAMESPACE_BEGIN
class StringEnumeration;
-struct MeasureUnitImpl;
+class MeasureUnitImpl;
#ifndef U_HIDE_DRAFT_API
/**
@@ -3568,7 +3568,7 @@
/** Internal version of public API */
LocalArray<MeasureUnit> splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const;
- friend struct MeasureUnitImpl;
+ friend class MeasureUnitImpl;
};
#ifndef U_HIDE_DRAFT_API // @draft ICU 68
diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp
index 043355f..93c7faf 100644
--- a/icu4c/source/test/intltest/strtest.cpp
+++ b/icu4c/source/test/intltest/strtest.cpp
@@ -255,6 +255,7 @@
TESTCASE_AUTO(TestStringByteSinkAppendU8);
TESTCASE_AUTO(TestCharString);
TESTCASE_AUTO(TestCStr);
+ TESTCASE_AUTO(TestCharStrAppendNumber);
TESTCASE_AUTO(Testctou);
TESTCASE_AUTO_END;
}
@@ -842,6 +843,36 @@
}
}
+void StringTest::TestCharStrAppendNumber() {
+ IcuTestErrorCode errorCode(*this, "TestCharStrAppendNumber()");
+
+ CharString testString;
+ testString.appendNumber(1, errorCode);
+ assertEquals("TestAppendNumber 1", "1", testString.data());
+
+ testString.clear();
+ testString.appendNumber(-1, errorCode);
+ assertEquals("TestAppendNumber -1", "-1", testString.data());
+
+ testString.clear();
+ testString.appendNumber(12345, errorCode);
+ assertEquals("TestAppendNumber 12345", "12345", testString.data());
+ testString.appendNumber(123, errorCode);
+ assertEquals("TestAppendNumber 12345 and then 123", "12345123", testString.data());
+
+ testString.clear();
+ testString.appendNumber(2147483647, errorCode);
+ assertEquals("TestAppendNumber when appending the biggest int32", "2147483647", testString.data());
+
+ testString.clear();
+ testString.appendNumber(-2147483648, errorCode);
+ assertEquals("TestAppendNumber when appending the smallest int32", "-2147483648", testString.data());
+
+ testString.clear();
+ testString.appendNumber(0, errorCode);
+ assertEquals("TestAppendNumber when appending zero", "0", testString.data());
+}
+
void
StringTest::Testctou() {
const char *cs = "Fa\\u0127mu";
diff --git a/icu4c/source/test/intltest/strtest.h b/icu4c/source/test/intltest/strtest.h
index 2a1b988..8890c0b 100644
--- a/icu4c/source/test/intltest/strtest.h
+++ b/icu4c/source/test/intltest/strtest.h
@@ -57,6 +57,7 @@
void TestSTLCompatibility();
void TestCharString();
void TestCStr();
+ void TestCharStrAppendNumber();
void Testctou();
};
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java
index 0821aa2..e0aa3c5 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java
@@ -223,6 +223,8 @@
// to this.result, we wish it to contain the zero-length string.
return;
}
+
+
if (this.complexity == MeasureUnit.Complexity.COMPOUND) {
// Note: don't sort a MIXED unit
Collections.sort(this.getSingleUnits(), new SingleUnitComparator());
@@ -240,7 +242,6 @@
firstTimeNegativeDimension = false;
}
- String singleUnitIdentifier = singleUnit.getNeutralIdentifier();
if (this.getComplexity() == MeasureUnit.Complexity.MIXED) {
if (result.length() != 0) {
result.append("-and-");
@@ -259,7 +260,7 @@
}
}
- result.append(singleUnitIdentifier);
+ result.append(singleUnit.getNeutralIdentifier());
}
this.identifier = result.toString();
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java
index 0ca51c9..6afb71b 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java
@@ -49,26 +49,25 @@
}
/**
- * Generates an neutral identifier string for a single unit which means we do not include the dimension signal.
+ * Generates a neutral identifier string for a single unit which means we do not include the dimension signal.
*/
public String getNeutralIdentifier() {
StringBuilder result = new StringBuilder();
- int posPower = Math.abs(this.getDimensionality());
+ int absPower = Math.abs(this.getDimensionality());
- assert posPower > 0 : "getIdentifier does not support the dimensionless";
+ assert absPower > 0 : "this function does not support the dimensionless single units";
- if (posPower == 1) {
+ if (absPower == 1) {
// no-op
- } else if (posPower == 2) {
+ } else if (absPower == 2) {
result.append("square-");
- } else if (posPower == 3) {
+ } else if (absPower == 3) {
result.append("cubic-");
- } else if (posPower <= 15) {
+ } else if (absPower <= 15) {
result.append("pow");
- result.append(posPower);
+ result.append(absPower);
result.append('-');
} else {
- // TODO: IllegalArgumentException might not be appropriate here
throw new IllegalArgumentException("Unit Identifier Syntax Error");
}