ICU-21060 Fix behaviour of -per-, -and-, and dimensionless units.
diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp
index 344ba45..4edf130 100644
--- a/icu4c/source/i18n/measunit.cpp
+++ b/icu4c/source/i18n/measunit.cpp
@@ -537,9 +537,9 @@
"solar-mass",
"stone",
"ton",
- "one",
- "percent",
- "permille",
+ "", // TODO(ICU-21076): manual edit of what should have been generated by Java.
+ "percent", // TODO(ICU-21076): regenerate, deal with duplication.
+ "permille", // TODO(ICU-21076): regenerate, deal with duplication.
"gigawatt",
"horsepower",
"kilowatt",
diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp
index e43ee65..ebc4ac3 100644
--- a/icu4c/source/i18n/measunit_extra.cpp
+++ b/icu4c/source/i18n/measunit_extra.cpp
@@ -34,17 +34,32 @@
// TODO: Propose a new error code for this?
constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
-// This is to ensure we only insert positive integers into the trie
+// Trie value offset for SI Prefixes. This is big enough to ensure we only
+// insert positive integers into the trie.
constexpr int32_t kSIPrefixOffset = 64;
+// Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
constexpr int32_t kCompoundPartOffset = 128;
enum CompoundPart {
+ // Represents "-per-"
COMPOUND_PART_PER = kCompoundPartOffset,
+ // Represents "-"
COMPOUND_PART_TIMES,
+ // Represents "-and-"
COMPOUND_PART_AND,
};
+// Trie value offset for "per-".
+constexpr int32_t kInitialCompoundPartOffset = 192;
+
+enum InitialCompoundPart {
+ // Represents "per-", the only compound part that can appear at the start of
+ // an identifier.
+ INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
+};
+
+// Trie value offset for powers like "square-", "cubic-", "p2-" etc.
constexpr int32_t kPowerPartOffset = 256;
enum PowerPart {
@@ -64,6 +79,8 @@
POWER_PART_P15,
};
+// Trie value offset for simple units, e.g. "gram", "nautical-mile",
+// "fluid-ounce-imperial".
constexpr int32_t kSimpleUnitOffset = 512;
const struct SIPrefixStrings {
@@ -94,7 +111,6 @@
// TODO(ICU-21059): Get this list from data
const char16_t* const gSimpleUnits[] = {
- u"one", // note: expected to be index 0
u"candela",
u"carat",
u"gram",
@@ -227,6 +243,7 @@
b.add(u"-per-", COMPOUND_PART_PER, status);
b.add(u"-", COMPOUND_PART_TIMES, status);
b.add(u"-and-", COMPOUND_PART_AND, status);
+ b.add(u"per-", INITIAL_COMPOUND_PART_PER, status);
b.add(u"square-", POWER_PART_P2, status);
b.add(u"cubic-", POWER_PART_P3, status);
b.add(u"p2-", POWER_PART_P2, status);
@@ -270,28 +287,30 @@
enum Type {
TYPE_UNDEFINED,
TYPE_SI_PREFIX,
+ // Token type for "-per-", "-", and "-and-".
TYPE_COMPOUND_PART,
+ // Token type for "per-".
+ TYPE_INITIAL_COMPOUND_PART,
TYPE_POWER_PART,
- TYPE_ONE,
TYPE_SIMPLE_UNIT,
};
+ // Calling getType() is invalid, resulting in an assertion failure, if Token
+ // value isn't positive.
Type getType() const {
- if (fMatch <= 0) {
- UPRV_UNREACHABLE;
- }
+ U_ASSERT(fMatch > 0);
if (fMatch < kCompoundPartOffset) {
return TYPE_SI_PREFIX;
}
- if (fMatch < kPowerPartOffset) {
+ if (fMatch < kInitialCompoundPartOffset) {
return TYPE_COMPOUND_PART;
}
+ if (fMatch < kPowerPartOffset) {
+ return TYPE_INITIAL_COMPOUND_PART;
+ }
if (fMatch < kSimpleUnitOffset) {
return TYPE_POWER_PART;
}
- if (fMatch == kSimpleUnitOffset) {
- return TYPE_ONE;
- }
return TYPE_SIMPLE_UNIT;
}
@@ -300,11 +319,22 @@
return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
}
+ // Valid only for tokens with type TYPE_COMPOUND_PART.
int32_t getMatch() const {
U_ASSERT(getType() == TYPE_COMPOUND_PART);
return fMatch;
}
+ int32_t getInitialCompoundPart() const {
+ // Even if there is only one InitialCompoundPart value, we have this
+ // function for the simplicity of code consistency.
+ U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
+ // Defensive: if this assert fails, code using this function also needs
+ // to change.
+ U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
+ return fMatch;
+ }
+
int8_t getPower() const {
U_ASSERT(getType() == TYPE_POWER_PART);
return static_cast<int8_t>(fMatch - kPowerPartOffset);
@@ -347,6 +377,7 @@
}
private:
+ // Tracks parser progress: the offset into fSource.
int32_t fIndex = 0;
// Since we're not owning this memory, whatever is passed to the constructor
@@ -355,6 +386,9 @@
StringPiece fSource;
UCharsTrie fTrie;
+ // Set to true when we've seen a "-per-" or a "per-", after which all units
+ // are in the denominator. Until we find an "-and-", at which point the
+ // identifier is invalid pending TODO(CLDR-13700).
bool fAfterPer = false;
Parser() : fSource(""), fTrie(u"") {}
@@ -366,11 +400,17 @@
return fIndex < fSource.length();
}
+ // Returns the next Token parsed from fSource, advancing fIndex to the end
+ // of that token in fSource. In case of U_FAILURE(status), the token
+ // returned will cause an abort if getType() is called on it.
Token nextToken(UErrorCode& status) {
fTrie.reset();
int32_t match = -1;
+ // Saves the position in the fSource string for the end of the most
+ // recent matching token.
int32_t previ = -1;
- do {
+ // Find the longest token that matches a value in the trie:
+ while (fIndex < fSource.length()) {
auto result = fTrie.next(fSource.data()[fIndex++]);
if (result == USTRINGTRIE_NO_MATCH) {
break;
@@ -385,7 +425,7 @@
}
U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
// continue;
- } while (fIndex < fSource.length());
+ }
if (match < 0) {
status = kUnitIdentifierSyntaxError;
@@ -395,63 +435,88 @@
return Token(match);
}
+ /**
+ * Returns the next "single unit" via result.
+ *
+ * If a "-per-" was parsed, the result will have appropriate negative
+ * dimensionality.
+ *
+ * Returns an error if we parse both compound units and "-and-", since mixed
+ * compound units are not yet supported - TODO(CLDR-13700).
+ *
+ * @param result Will be overwritten by the result, if status shows success.
+ * @param sawAnd If an "-and-" was parsed prior to finding the "single
+ * unit", sawAnd is set to true. If not, it is left as is.
+ * @param status ICU error code.
+ */
void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
- sawAnd = false;
if (U_FAILURE(status)) {
return;
}
- if (!hasNext()) {
- // probably "one"
- return;
- }
-
// state:
// 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
// 1 = power token seen (will not accept another power token)
// 2 = SI prefix token seen (will not accept a power or SI prefix token)
int32_t state = 0;
- // Maybe read a compound part
- if (fIndex != 0) {
- Token token = nextToken(status);
- if (U_FAILURE(status)) {
- return;
+ bool atStart = fIndex == 0;
+ Token token = nextToken(status);
+ if (U_FAILURE(status)) { return; }
+
+ if (atStart) {
+ // Identifiers optionally start with "per-".
+ if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
+ U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
+ fAfterPer = true;
+ result.dimensionality = -1;
+
+ token = nextToken(status);
+ if (U_FAILURE(status)) { return; }
}
+ } else {
+ // All other SingleUnit's are separated from previous SingleUnit's
+ // via a compound part:
if (token.getType() != Token::TYPE_COMPOUND_PART) {
status = kUnitIdentifierSyntaxError;
return;
}
+
switch (token.getMatch()) {
- case COMPOUND_PART_PER:
- if (fAfterPer) {
- status = kUnitIdentifierSyntaxError;
- return;
- }
- fAfterPer = true;
+ case COMPOUND_PART_PER:
+ if (sawAnd) {
+ // Mixed compound units not yet supported,
+ // TODO(CLDR-13700).
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ fAfterPer = true;
+ result.dimensionality = -1;
+ break;
+
+ case COMPOUND_PART_TIMES:
+ if (fAfterPer) {
result.dimensionality = -1;
- break;
+ }
+ break;
- case COMPOUND_PART_TIMES:
- if (fAfterPer) {
- result.dimensionality = -1;
- }
- break;
-
- case COMPOUND_PART_AND:
- sawAnd = true;
- fAfterPer = false;
- break;
+ case COMPOUND_PART_AND:
+ if (fAfterPer) {
+ // Can't start with "-and-", and mixed compound units
+ // not yet supported, TODO(CLDR-13700).
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ sawAnd = true;
+ break;
}
+
+ token = nextToken(status);
+ if (U_FAILURE(status)) { return; }
}
- // Read a unit
- while (hasNext()) {
- Token token = nextToken(status);
- if (U_FAILURE(status)) {
- return;
- }
-
+ // Read tokens until we have a complete SingleUnit or we reach the end.
+ while (true) {
switch (token.getType()) {
case Token::TYPE_POWER_PART:
if (state > 0) {
@@ -471,10 +536,6 @@
state = 2;
break;
- case Token::TYPE_ONE:
- // Skip "one" and go to the next unit
- return nextSingleUnit(result, sawAnd, status);
-
case Token::TYPE_SIMPLE_UNIT:
result.index = token.getSimpleUnitIndex();
return;
@@ -483,27 +544,38 @@
status = kUnitIdentifierSyntaxError;
return;
}
- }
- // We ran out of tokens before finding a complete single unit.
- status = kUnitIdentifierSyntaxError;
+ if (!hasNext()) {
+ // We ran out of tokens before finding a complete single unit.
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ token = nextToken(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ }
}
+ /// @param result is modified, not overridden. Caller must pass in a
+ /// default-constructed (empty) MeasureUnitImpl instance.
void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
+ if (fSource.empty()) {
+ // The dimenionless unit: nothing to parse. leave result as is.
+ return;
+ }
int32_t unitNum = 0;
while (hasNext()) {
- bool sawAnd;
+ bool sawAnd = false;
SingleUnitImpl singleUnit;
nextSingleUnit(singleUnit, sawAnd, status);
if (U_FAILURE(status)) {
return;
}
- if (singleUnit.index == 0) {
- continue;
- }
+ U_ASSERT(!singleUnit.isDimensionless());
bool added = result.append(singleUnit, status);
if (sawAnd && !added) {
// Two similar units are not allowed in a mixed unit
@@ -511,9 +583,12 @@
return;
}
if ((++unitNum) >= 2) {
- UMeasureUnitComplexity complexity = sawAnd
- ? UMEASURE_UNIT_MIXED
- : UMEASURE_UNIT_COMPOUND;
+ // nextSingleUnit fails appropriately for "per" and "and" in the
+ // same identifier. It doesn't fail for other compound units
+ // (COMPOUND_PART_TIMES). Consequently we take care of that
+ // here.
+ UMeasureUnitComplexity complexity =
+ sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
if (unitNum == 2) {
U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
result.complexity = complexity;
@@ -536,15 +611,22 @@
/**
* Generate the identifier string for a single unit in place.
+ *
+ * Does not support the dimensionless SingleUnitImpl: calling serializeSingle
+ * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
+ *
+ * @param first If singleUnit is part of a compound unit, and not its first
+ * single unit, set this to false. Otherwise: set to true.
*/
void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
if (first && singleUnit.dimensionality < 0) {
- output.append("one-per-", status);
+ // Essentially the "unary per". For compound units with a numerator, the
+ // caller takes care of the "binary per".
+ output.append("per-", status);
}
- if (singleUnit.index == 0) {
- // Don't propagate SI prefixes and powers on one
- output.append("one", status);
+ if (singleUnit.isDimensionless()) {
+ status = U_INTERNAL_PROGRAM_ERROR;
return;
}
int8_t posPower = std::abs(singleUnit.dimensionality);
@@ -595,7 +677,8 @@
}
U_ASSERT(impl.identifier.isEmpty());
if (impl.units.length() == 0) {
- impl.identifier.append("one", status);
+ // Dimensionless, constructed by the default constructor: no appending
+ // to impl.identifier, we wish it to contain the zero-length string.
return;
}
if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
@@ -634,8 +717,17 @@
}
-/** @return true if a new item was added */
+/**
+ * Appends a SingleUnitImpl to a MeasureUnitImpl.
+ *
+ * @return true if a new item was added. If unit is the dimensionless unit, it
+ * is never added: the return value will always be false.
+ */
bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
+ if (unit.isDimensionless()) {
+ // We don't append dimensionless units.
+ return false;
+ }
// Find a similar unit that already exists, to attempt to coalesce
SingleUnitImpl* oldUnit = nullptr;
for (int32_t i = 0; i < impl.units.length(); i++) {
@@ -645,6 +737,8 @@
}
}
if (oldUnit) {
+ // Both dimensionalities will be positive, or both will be negative, by
+ // virtue of isCompatibleWith().
oldUnit->dimensionality += unit.dimensionality;
} else {
SingleUnitImpl* destination = impl.units.emplaceBack();
@@ -744,7 +838,12 @@
}
int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
- return SingleUnitImpl::forMeasureUnit(*this, status).dimensionality;
+ SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
+ if (U_FAILURE(status)) { return 0; }
+ if (singleUnit.isDimensionless()) {
+ return 0;
+ }
+ return singleUnit.dimensionality;
}
MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h
index 9657ff2..c69d243 100644
--- a/icu4c/source/i18n/measunit_impl.h
+++ b/icu4c/source/i18n/measunit_impl.h
@@ -25,14 +25,25 @@
struct SingleUnitImpl : public UMemory {
/**
* Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error
- * code and return the base dimensionless unit. Parses if necessary.
+ * code and returns the base dimensionless unit. Parses if necessary.
*/
static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status);
/** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */
MeasureUnit build(UErrorCode& status) const;
- /** Compare this SingleUnitImpl to another SingleUnitImpl. */
+ /**
+ * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
+ * sorting and coalescing.
+ *
+ * Takes the sign of dimensionality into account, but not the absolute
+ * value: per-meter is not considered the same as meter, but meter is
+ * considered the same as square-meter.
+ *
+ * The dimensionless unit generally does not get compared, but if it did, it
+ * would sort before other units by virtue of index being < 0 and
+ * dimensionality not being negative.
+ */
int32_t compareTo(const SingleUnitImpl& other) const {
if (dimensionality < 0 && other.dimensionality > 0) {
// Positive dimensions first
@@ -66,13 +77,36 @@
return (compareTo(other) == 0);
}
- /** Simple unit index, unique for every simple unit. */
- int32_t index = 0;
+ /**
+ * Returns true if this unit is the "dimensionless base unit", as produced
+ * by the MeasureUnit() default constructor. (This does not include the
+ * likes of concentrations or angles.)
+ */
+ bool isDimensionless() const {
+ return index == -1;
+ }
- /** SI prefix. **/
+ /**
+ * Simple unit index, unique for every simple unit, -1 for the dimensionless
+ * unit. This is an index into a string list in measunit_extra.cpp.
+ *
+ * The default value is -1, meaning the dimensionless unit:
+ * isDimensionless() will return true, until index is changed.
+ */
+ int32_t index = -1;
+
+ /**
+ * SI prefix.
+ *
+ * This is ignored for the dimensionless unit.
+ */
UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE;
-
- /** Dimensionality. **/
+
+ /**
+ * Dimensionality.
+ *
+ * This is meaningless for the dimensionless unit.
+ */
int32_t dimensionality = 1;
};
@@ -92,7 +126,8 @@
*
* @param identifier The unit identifier string.
* @param status Set if the identifier string is not valid.
- * @return A newly parsed value object.
+ * @return A newly parsed value object. Behaviour of this unit is
+ * unspecified if an error is returned via status.
*/
static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status);
@@ -145,15 +180,23 @@
/** Mutates this MeasureUnitImpl to take the reciprocal. */
void takeReciprocal(UErrorCode& status);
- /** Mutates this MeasureUnitImpl to append a single unit. */
+ /**
+ * Mutates this MeasureUnitImpl to append a single unit.
+ *
+ * @return true if a new item was added. If unit is the dimensionless unit,
+ * it is never added: the return value will always be false.
+ */
bool append(const SingleUnitImpl& singleUnit, UErrorCode& status);
/** The complexity, either SINGLE, COMPOUND, or MIXED. */
UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE;
/**
- * The list of simple units. These may be summed or multiplied, based on the value of the
- * complexity field.
+ * The list of simple units. These may be summed or multiplied, based on the
+ * value of the complexity field.
+ *
+ * The "dimensionless" unit (SingleUnitImpl default constructor) must not be
+ * added to this list.
*/
MaybeStackVector<SingleUnitImpl> units;
diff --git a/icu4c/source/i18n/nounit.cpp b/icu4c/source/i18n/nounit.cpp
index b993cb5..1d4aa05 100644
--- a/icu4c/source/i18n/nounit.cpp
+++ b/icu4c/source/i18n/nounit.cpp
@@ -11,7 +11,7 @@
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoUnit)
NoUnit U_EXPORT2 NoUnit::base() {
- return NoUnit("one");
+ return NoUnit("");
}
NoUnit U_EXPORT2 NoUnit::percent() {
diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h
index d221fd8..e240092 100644
--- a/icu4c/source/i18n/unicode/measunit.h
+++ b/icu4c/source/i18n/unicode/measunit.h
@@ -37,7 +37,7 @@
* Enumeration for unit complexity. There are three levels:
*
* - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare,
- * square-kilometer, kilojoule, one-per-second.
+ * square-kilometer, kilojoule, per-second.
* - COMPOUND: A unit composed of the product of multiple single units. Examples:
* meter-per-second, kilowatt-hour, kilogram-meter-per-square-second.
* - MIXED: A unit composed of the sum of multiple single units. Examples: foot+inch,
@@ -387,6 +387,8 @@
* NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will
* occur. For more information, see UMeasureUnitComplexity.
*
+ * For the base dimensionless unit, withDimensionality does nothing.
+ *
* @param dimensionality The dimensionality (power).
* @param status Set if this is not a SINGLE unit or if another error occurs.
* @return A new SINGLE unit.
@@ -401,6 +403,8 @@
* NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will
* occur. For more information, see UMeasureUnitComplexity.
*
+ * For the base dimensionless unit, getDimensionality returns 0.
+ *
* @param status Set if this is not a SINGLE unit or if another error occurs.
* @return The dimensionality (power) of this simple unit.
* @draft ICU 67
@@ -447,7 +451,7 @@
*
* Examples:
* - Given "meter-kilogram-per-second", three units will be returned: "meter",
- * "kilogram", and "one-per-second".
+ * "kilogram", and "per-second".
* - Given "hour+minute+second", three units will be returned: "hour", "minute",
* and "second".
*
@@ -3375,11 +3379,15 @@
private:
- // If non-null, fImpl is owned by the MeasureUnit.
+ // Used by new draft APIs in ICU 67. If non-null, fImpl is owned by the
+ // MeasureUnit.
MeasureUnitImpl* fImpl;
- // These two ints are indices into static string lists in measunit.cpp
+ // An index into a static string list in measunit.cpp. If set to -1, fImpl
+ // is in use instead of fTypeId and fSubTypeId.
int16_t fSubTypeId;
+ // An index into a static string list in measunit.cpp. If set to -1, fImpl
+ // is in use instead of fTypeId and fSubTypeId.
int8_t fTypeId;
MeasureUnit(int32_t typeId, int32_t subTypeId);
@@ -3389,7 +3397,11 @@
static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status);
/**
- * @return Whether subType is known to ICU.
+ * Sets output's typeId and subTypeId according to subType, if subType is a
+ * valid/known identifier.
+ *
+ * @return Whether subType is known to ICU. If false, output was not
+ * modified.
*/
static bool findBySubType(StringPiece subType, MeasureUnit* output);
diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp
index 79eb9b4..51a85fe 100644
--- a/icu4c/source/test/intltest/measfmttest.cpp
+++ b/icu4c/source/test/intltest/measfmttest.cpp
@@ -79,9 +79,10 @@
void Test20332_PersonUnits();
void TestNumericTime();
void TestNumericTimeSomeSpecialFormats();
+ void TestIdentifiers();
void TestInvalidIdentifiers();
void TestCompoundUnitOperations();
- void TestIdentifiers();
+ void TestDimensionlessBehaviour();
void Test21060_AddressSanitizerProblem();
void verifyFormat(
@@ -202,9 +203,10 @@
TESTCASE_AUTO(Test20332_PersonUnits);
TESTCASE_AUTO(TestNumericTime);
TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats);
+ TESTCASE_AUTO(TestIdentifiers);
TESTCASE_AUTO(TestInvalidIdentifiers);
TESTCASE_AUTO(TestCompoundUnitOperations);
- TESTCASE_AUTO(TestIdentifiers);
+ TESTCASE_AUTO(TestDimensionlessBehaviour);
TESTCASE_AUTO(Test21060_AddressSanitizerProblem);
TESTCASE_AUTO_END;
}
@@ -3239,10 +3241,43 @@
verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877");
}
+void MeasureFormatTest::TestIdentifiers() {
+ IcuTestErrorCode status(*this, "TestIdentifiers");
+ struct TestCase {
+ const char* id;
+ const char* normalized;
+ } cases[] = {
+ // Correctly normalized identifiers should not change
+ {"", ""},
+ {"square-meter-per-square-meter", "square-meter-per-square-meter"},
+ {"kilogram-meter-per-square-meter-square-second",
+ "kilogram-meter-per-square-meter-square-second"},
+ {"square-mile-and-square-foot", "square-mile-and-square-foot"},
+ {"square-foot-and-square-mile", "square-foot-and-square-mile"},
+ {"per-cubic-centimeter", "per-cubic-centimeter"},
+ {"per-kilometer", "per-kilometer"},
+
+ // Normalization of power and per
+ {"p2-foot-and-p2-mile", "square-foot-and-square-mile"},
+ {"gram-square-gram-per-dekagram", "cubic-gram-per-dekagram"},
+ {"kilogram-per-meter-per-second", "kilogram-per-meter-second"},
+
+ // TODO(ICU-20920): Add more test cases once the proper ranking is available.
+ };
+ for (const auto &cas : cases) {
+ status.setScope(cas.id);
+ MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status);
+ status.errIfFailureAndReset();
+ const char* actual = unit.getIdentifier();
+ assertEquals(cas.id, cas.normalized, actual);
+ status.errIfFailureAndReset();
+ }
+}
+
void MeasureFormatTest::TestInvalidIdentifiers() {
IcuTestErrorCode status(*this, "TestInvalidIdentifiers");
- const char* const inputs[] = {
+ const char *const inputs[] = {
"kilo",
"kilokilo",
"onekilo",
@@ -3258,7 +3293,23 @@
"-p2-meter",
"+p2-meter",
"+",
- "-"
+ "-",
+ "-mile",
+ "-and-mile",
+ "-per-mile",
+ "one",
+ "one-one",
+ "one-per-mile",
+ "one-per-cubic-centimeter",
+ "square--per-meter",
+ "metersecond", // Must have compound part in between single units
+
+ // Negative powers not supported in mixed units yet. TODO(CLDR-13701).
+ "per-hour-and-hertz",
+ "hertz-and-per-hour",
+
+ // Compound units not supported in mixed units yet. TODO(CLDR-13700).
+ "kilonewton-meter-and-newton-meter",
};
for (const auto& input : inputs) {
@@ -3295,9 +3346,9 @@
MeasureUnit overQuarticKilometer1 = kilometer.withDimensionality(-4, status);
verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter");
- verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter");
+ verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "per-cubic-centimeter");
verifySingleUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer");
- verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
+ verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
assertTrue("power inequality", quarticKilometer != overQuarticKilometer1);
@@ -3310,9 +3361,9 @@
.reciprocal(status)
.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
- verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
- verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
- verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
+ verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
+ verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
+ verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2);
assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3);
@@ -3343,7 +3394,7 @@
const char* secondCentimeterSub[] = {"centimeter", "square-kilosecond"};
verifyCompoundUnit(secondCentimeter, "centimeter-square-kilosecond",
secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub));
- const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "one-per-kilometer"};
+ const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "per-kilometer"};
verifyCompoundUnit(secondCentimeterPerKilometer, "centimeter-square-kilosecond-per-kilometer",
secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub));
@@ -3378,31 +3429,16 @@
assertTrue("order matters inequality", footInch != inchFoot);
- MeasureUnit one1;
- MeasureUnit one2 = MeasureUnit::forIdentifier("one", status);
- MeasureUnit one3 = MeasureUnit::forIdentifier("", status);
- MeasureUnit squareOne = one2.withDimensionality(2, status);
- MeasureUnit onePerOne = one2.reciprocal(status);
- MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
- MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status);
- MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status);
- MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one-and-one", status);
- MeasureUnit kilometer2 = one2.product(kilometer, status);
+ MeasureUnit dimensionless;
+ MeasureUnit dimensionless2 = MeasureUnit::forIdentifier("", status);
+ status.errIfFailureAndReset("Dimensionless MeasureUnit.");
+ assertTrue("dimensionless equality", dimensionless == dimensionless2);
- verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one");
- verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one");
- verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one");
- verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
- verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
- verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
- verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
- verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
- verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
+ // We support starting from an "identity" MeasureUnit and then combining it
+ // with others via product:
+ MeasureUnit kilometer2 = dimensionless.product(kilometer, status);
+ status.errIfFailureAndReset("dimensionless.product(kilometer, status)");
verifySingleUnit(kilometer2, UMEASURE_SI_PREFIX_KILO, 1, "kilometer");
-
- assertTrue("one equality", one1 == one2);
- assertTrue("one equality", one2 == one3);
- assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne);
assertTrue("kilometer equality", kilometer == kilometer2);
// Test out-of-range powers
@@ -3413,49 +3449,81 @@
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
MeasureUnit power16b = power15.product(kilometer, status);
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
- MeasureUnit powerN15 = MeasureUnit::forIdentifier("one-per-p15-kilometer", status);
- verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "one-per-p15-kilometer");
+ MeasureUnit powerN15 = MeasureUnit::forIdentifier("per-p15-kilometer", status);
+ verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "per-p15-kilometer");
status.errIfFailureAndReset();
- MeasureUnit powerN16a = MeasureUnit::forIdentifier("one-per-p16-kilometer", status);
+ MeasureUnit powerN16a = MeasureUnit::forIdentifier("per-p16-kilometer", status);
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
MeasureUnit powerN16b = powerN15.product(overQuarticKilometer1, status);
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
}
-void MeasureFormatTest::TestIdentifiers() {
- IcuTestErrorCode status(*this, "TestIdentifiers");
- struct TestCase {
- bool valid;
- const char* id;
- const char* normalized;
- } cases[] = {
- {true, "square-meter-per-square-meter", "square-meter-per-square-meter"},
- {true, "kilogram-meter-per-square-meter-square-second",
- "kilogram-meter-per-square-meter-square-second"},
- // TODO(ICU-20920): Add more test cases once the proper ranking is available.
- };
- for (const auto& cas : cases) {
- status.setScope(cas.id);
- MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status);
- if (!cas.valid) {
- status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
- continue;
- }
- const char* actual = unit.getIdentifier();
- assertEquals(cas.id, cas.normalized, actual);
- status.errIfFailureAndReset();
- }
+void MeasureFormatTest::TestDimensionlessBehaviour() {
+ IcuTestErrorCode status(*this, "TestDimensionlessBehaviour");
+ MeasureUnit dimensionless;
+ MeasureUnit modified;
+
+ // At the time of writing, each of the seven groups below caused
+ // Parser::from("") to be called:
+
+ // splitToSingleUnits
+ int32_t count;
+ LocalArray<MeasureUnit> singles = dimensionless.splitToSingleUnits(count, status);
+ status.errIfFailureAndReset("dimensionless.splitToSingleUnits(...)");
+ assertEquals("no singles in dimensionless", 0, count);
+
+ // product(dimensionless)
+ MeasureUnit mile = MeasureUnit::getMile();
+ mile = mile.product(dimensionless, status);
+ status.errIfFailureAndReset("mile.product(dimensionless, ...)");
+ verifySingleUnit(mile, UMEASURE_SI_PREFIX_ONE, 1, "mile");
+
+ // dimensionless.getSIPrefix()
+ UMeasureSIPrefix siPrefix = dimensionless.getSIPrefix(status);
+ status.errIfFailureAndReset("dimensionless.getSIPrefix(...)");
+ assertEquals("dimensionless SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix);
+
+ // dimensionless.withSIPrefix()
+ modified = dimensionless.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
+ status.errIfFailureAndReset("dimensionless.withSIPrefix(...)");
+ singles = modified.splitToSingleUnits(count, status);
+ assertEquals("no singles in modified", 0, count);
+ siPrefix = modified.getSIPrefix(status);
+ status.errIfFailureAndReset("modified.getSIPrefix(...)");
+ assertEquals("modified SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix);
+
+ // dimensionless.getComplexity()
+ UMeasureUnitComplexity complexity = dimensionless.getComplexity(status);
+ status.errIfFailureAndReset("dimensionless.getComplexity(...)");
+ assertEquals("dimensionless complexity", UMEASURE_UNIT_SINGLE, complexity);
+
+ // Dimensionality is mostly meaningless for dimensionless units, but it's
+ // still considered a SINGLE unit, so this code doesn't throw errors:
+
+ // dimensionless.getDimensionality()
+ int32_t dimensionality = dimensionless.getDimensionality(status);
+ status.errIfFailureAndReset("dimensionless.getDimensionality(...)");
+ assertEquals("dimensionless dimensionality", 0, dimensionality);
+
+ // dimensionless.withDimensionality()
+ dimensionless.withDimensionality(-1, status);
+ status.errIfFailureAndReset("dimensionless.withDimensionality(...)");
+ dimensionality = dimensionless.getDimensionality(status);
+ status.errIfFailureAndReset("dimensionless.getDimensionality(...)");
+ assertEquals("dimensionless dimensionality", 0, dimensionality);
}
// ICU-21060
void MeasureFormatTest::Test21060_AddressSanitizerProblem() {
- UErrorCode status = U_ZERO_ERROR;
- MeasureUnit first = MeasureUnit::forIdentifier("one", status);
+ IcuTestErrorCode status(*this, "Test21060_AddressSanitizerProblem");
+
+ MeasureUnit first = MeasureUnit::forIdentifier("", status);
+ status.errIfFailureAndReset();
// Experimentally, a compound unit like "kilogram-meter" failed. A single
// unit like "kilogram" or "meter" did not fail, did not trigger the
// problem.
- MeasureUnit crux = MeasureUnit::forIdentifier("one-per-meter", status);
+ MeasureUnit crux = MeasureUnit::forIdentifier("per-meter", status);
// Heap allocation of a new CharString for first.identifier happens here:
first = first.product(crux, status);
@@ -3469,12 +3537,14 @@
first = first.product(crux, status);
// Proving we've had no failure yet:
- if (U_FAILURE(status)) return;
+ status.errIfFailureAndReset();
// heap-use-after-free failure happened here, since a SingleUnitImpl had
// held onto a StringPiece pointing at a substring of an identifier that was
// freed above:
second = second.product(crux, status);
+
+ status.errIfFailureAndReset();
}