ICU-21340 Don't coalesce adjacent list formatter fields in ICU4C
See #1425
diff --git a/icu4c/source/i18n/formattedval_impl.h b/icu4c/source/i18n/formattedval_impl.h
index 837e89b..4cfb0f0 100644
--- a/icu4c/source/i18n/formattedval_impl.h
+++ b/icu4c/source/i18n/formattedval_impl.h
@@ -117,6 +117,12 @@
};
+// Internal struct that must be exported for MSVC
+struct U_I18N_API SpanInfo {
+ int32_t spanValue;
+ int32_t length;
+};
+
// Export an explicit template instantiation of the MaybeStackArray that
// is used as a data member of CEBuffer.
//
@@ -126,7 +132,7 @@
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
//
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
-template class U_I18N_API MaybeStackArray<int32_t, 8>;
+template class U_I18N_API MaybeStackArray<SpanInfo, 8>;
#endif
/**
@@ -162,13 +168,19 @@
return fString;
}
- void appendSpanIndex(int32_t index);
- void prependSpanIndex(int32_t index);
+ /**
+ * Adds additional metadata used for span fields.
+ *
+ * spanValue: the index of the list item, for example.
+ * length: the length of the span, used to split adjacent fields.
+ */
+ void appendSpanInfo(int32_t spanValue, int32_t length);
+ void prependSpanInfo(int32_t spanValue, int32_t length);
private:
FormattedStringBuilder fString;
FormattedStringBuilder::Field fNumericField;
- MaybeStackArray<int32_t, 8> spanIndices;
+ MaybeStackArray<SpanInfo, 8> spanIndices;
bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const;
static bool isIntOrGroup(FormattedStringBuilder::Field field);
diff --git a/icu4c/source/i18n/formattedval_sbimpl.cpp b/icu4c/source/i18n/formattedval_sbimpl.cpp
index b2ae4c3..aad4438 100644
--- a/icu4c/source/i18n/formattedval_sbimpl.cpp
+++ b/icu4c/source/i18n/formattedval_sbimpl.cpp
@@ -46,19 +46,19 @@
UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
- return nextPositionImpl(cfpos, fNumericField, status) ? TRUE : FALSE;
+ return nextPositionImpl(cfpos, fNumericField, status) ? true : false;
}
UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
int32_t rawField = fp.getField();
if (rawField == FieldPosition::DONT_CARE) {
- return FALSE;
+ return false;
}
if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
status = U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
+ return false;
}
ConstrainedFieldPosition cfpos;
@@ -67,7 +67,7 @@
if (nextPositionImpl(cfpos, kUndefinedField, status)) {
fp.setBeginIndex(cfpos.getStart());
fp.setEndIndex(cfpos.getLimit());
- return TRUE;
+ return true;
}
// Special case: fraction should start after integer if fraction is not present
@@ -85,7 +85,7 @@
fp.setEndIndex(i - fString.fZero);
}
- return FALSE;
+ return false;
}
void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
@@ -103,23 +103,12 @@
bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
int32_t fieldStart = -1;
Field currField = kUndefinedField;
- UFieldCategory spanCategory = UFIELD_CATEGORY_UNDEFINED;
- int32_t spanValue;
for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) {
Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField;
// Case 1: currently scanning a field.
if (currField != kUndefinedField) {
if (currField != _field) {
int32_t end = i - fString.fZero;
- // Handle span fields; don't trim them
- if (spanCategory != UFIELD_CATEGORY_UNDEFINED) {
- cfpos.setState(
- spanCategory,
- spanValue,
- fieldStart,
- end);
- return true;
- }
// Grouping separators can be whitespace; don't throw them out!
if (isTrimmable(currField)) {
end = trimBack(i - fString.fZero);
@@ -182,13 +171,11 @@
if (elementField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)
&& cfpos.matchesField(elementField.getCategory(), elementField.getField())
&& (cfpos.getLimit() < i - fString.fZero || cfpos.getCategory() != elementField.getCategory())) {
- // Re-wind to the beginning of the field and then emit it
- int32_t j = i - 1;
- for (; j >= fString.fZero && fString.getFieldPtr()[j] == fString.getFieldPtr()[i-1]; j--) {}
+ int64_t si = cfpos.getInt64IterationContext() - 1;
cfpos.setState(
elementField.getCategory(),
elementField.getField(),
- j - fString.fZero + 1,
+ i - fString.fZero - spanIndices[si].length,
i - fString.fZero);
return true;
}
@@ -203,22 +190,28 @@
}
// Case 3: check for field starting at this position
// Case 3a: Need to add a SpanField
- if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)
- // don't return the same field twice in a row:
- && (i == fString.fZero
- || fString.getFieldPtr()[i-1].getCategory() != UFIELD_CATEGORY_LIST
- || fString.getFieldPtr()[i-1].getField() != ULISTFMT_ELEMENT_FIELD)) {
+ if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
int64_t si = cfpos.getInt64IterationContext();
- spanValue = spanIndices[si];
+ int32_t spanValue = spanIndices[si].spanValue;
+ int32_t length = spanIndices[si].length;
cfpos.setInt64IterationContext(si + 1);
if (cfpos.matchesField(UFIELD_CATEGORY_LIST_SPAN, spanValue)) {
- spanCategory = UFIELD_CATEGORY_LIST_SPAN;
+ UFieldCategory spanCategory = UFIELD_CATEGORY_LIST_SPAN;
fieldStart = i - fString.fZero;
- currField = _field;
+ int32_t end = fieldStart + length;
+ cfpos.setState(
+ spanCategory,
+ spanValue,
+ fieldStart,
+ end);
+ return true;
+ } else {
+ // Failed to match; jump ahead
+ i += length - 1;
continue;
}
}
- // Case 3b: No SpanField or SpanField did not match
+ // Case 3b: No SpanField
if (cfpos.matchesField(_field.getCategory(), _field.getField())) {
fieldStart = i - fString.fZero;
currField = _field;
@@ -229,21 +222,21 @@
return false;
}
-void FormattedValueStringBuilderImpl::appendSpanIndex(int32_t position) {
- if (spanIndices.getCapacity() <= position) {
- spanIndices.resize(position * 2);
+void FormattedValueStringBuilderImpl::appendSpanInfo(int32_t spanValue, int32_t length) {
+ if (spanIndices.getCapacity() <= spanValue) {
+ spanIndices.resize(spanValue * 2);
}
- spanIndices[position] = position;
+ spanIndices[spanValue] = {spanValue, length};
}
-void FormattedValueStringBuilderImpl::prependSpanIndex(int32_t position) {
- if (spanIndices.getCapacity() <= position) {
- spanIndices.resize(position * 2);
+void FormattedValueStringBuilderImpl::prependSpanInfo(int32_t spanValue, int32_t length) {
+ if (spanIndices.getCapacity() <= spanValue) {
+ spanIndices.resize(spanValue * 2);
}
- for (int32_t i = 0; i < position; i++) {
+ for (int32_t i = spanValue - 1; i >= 0; i--) {
spanIndices[i+1] = spanIndices[i];
}
- spanIndices[0] = position;
+ spanIndices[0] = {spanValue, length};
}
bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) {
diff --git a/icu4c/source/i18n/listformatter.cpp b/icu4c/source/i18n/listformatter.cpp
index 66d76b5..2ee646f 100644
--- a/icu4c/source/i18n/listformatter.cpp
+++ b/icu4c/source/i18n/listformatter.cpp
@@ -567,7 +567,7 @@
start,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
- data->appendSpanIndex(0);
+ data->appendSpanInfo(0, start.length());
}
}
@@ -603,7 +603,7 @@
next,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
- data->appendSpanIndex(position);
+ data->appendSpanInfo(position, next.length());
data->getStringRef().append(
temp.tempSubString(offsets[1]),
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
@@ -622,7 +622,7 @@
next,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
- data->prependSpanIndex(position);
+ data->prependSpanInfo(position, next.length());
data->getStringRef().insert(
0,
temp.tempSubStringBetween(0, offsets[1]),
diff --git a/icu4c/source/test/intltest/listformattertest.cpp b/icu4c/source/test/intltest/listformattertest.cpp
index c57c8f5..c96e2a0 100644
--- a/icu4c/source/test/intltest/listformattertest.cpp
+++ b/icu4c/source/test/intltest/listformattertest.cpp
@@ -48,6 +48,7 @@
TESTCASE_AUTO(TestBadStylesFail);
TESTCASE_AUTO(TestCreateStyled);
TESTCASE_AUTO(TestContextual);
+ TESTCASE_AUTO(TestNextPosition);
TESTCASE_AUTO_END;
}
@@ -494,10 +495,10 @@
void ListFormatterTest::TestFormattedValue() {
IcuTestErrorCode status(*this, "TestFormattedValue");
- LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("en", status));
- if (status.errIfFailureAndReset()) { return; }
{
+ LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("en", status));
+ if (status.errIfFailureAndReset()) { return; }
const char16_t* message = u"Field position test 1";
const char16_t* expectedString = u"hello, wonderful, and world";
const UnicodeString inputs[] = {
@@ -523,6 +524,33 @@
expectedFieldPositions,
UPRV_LENGTHOF(expectedFieldPositions));
}
+
+ {
+ LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("zh", ULISTFMT_TYPE_UNITS, ULISTFMT_WIDTH_SHORT, status));
+ if (status.errIfFailureAndReset()) { return; }
+ const char16_t* message = u"Field position test 2 (ICU-21340)";
+ const char16_t* expectedString = u"aabbbbbbbccc";
+ const UnicodeString inputs[] = {
+ u"aa",
+ u"bbbbbbb",
+ u"ccc"
+ };
+ FormattedList result = fmt->formatStringsToValue(inputs, UPRV_LENGTHOF(inputs), status);
+ static const UFieldPositionWithCategory expectedFieldPositions[] = {
+ // field, begin index, end index
+ {UFIELD_CATEGORY_LIST_SPAN, 0, 0, 2},
+ {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 0, 2},
+ {UFIELD_CATEGORY_LIST_SPAN, 1, 2, 9},
+ {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 2, 9},
+ {UFIELD_CATEGORY_LIST_SPAN, 2, 9, 12},
+ {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 9, 12}};
+ checkMixedFormattedValue(
+ message,
+ result,
+ expectedString,
+ expectedFieldPositions,
+ UPRV_LENGTHOF(expectedFieldPositions));
+ }
}
void ListFormatterTest::DoTheRealListStyleTesting(Locale locale,
@@ -700,4 +728,50 @@
}
}
+void ListFormatterTest::TestNextPosition() {
+ IcuTestErrorCode status(*this, "TestNextPosition");
+ std::vector<std::string> locales = { "en", "es", "zh", "ja" };
+ UListFormatterWidth widths [] = {
+ ULISTFMT_WIDTH_WIDE, ULISTFMT_WIDTH_SHORT, ULISTFMT_WIDTH_NARROW
+ };
+ const char* widthStr [] = {"wide", "short", "narrow"};
+ UListFormatterType types [] = {
+ ULISTFMT_TYPE_AND, ULISTFMT_TYPE_OR, ULISTFMT_TYPE_UNITS
+ };
+ const char* typeStr [] = {"and", "or", "units"};
+ const UnicodeString inputs[] = { u"A1", u"B2", u"C3", u"D4" };
+ for (auto width : widths) {
+ for (auto type : types) {
+ for (auto locale : locales) {
+ LocalPointer<ListFormatter> fmt(
+ ListFormatter::createInstance(locale.c_str(), type, width, status),
+ status);
+ if (status.errIfFailureAndReset()) {
+ continue;
+ }
+ for (int32_t n = 1; n <= UPRV_LENGTHOF(inputs); n++) {
+ FormattedList result = fmt->formatStringsToValue(
+ inputs, n, status);
+ int32_t elements = 0;
+ icu::ConstrainedFieldPosition cfpos;
+ cfpos.constrainCategory(UFIELD_CATEGORY_LIST);
+ while (result.nextPosition(cfpos, status) && U_SUCCESS(status)) {
+ if (cfpos.getField() == ULISTFMT_ELEMENT_FIELD) {
+ elements++;
+ }
+ }
+ std::string msg = locale;
+ // Test that if there are n elements (n=1..4) in the input, then the
+ // nextPosition() should iterate through exactly n times
+ // with field == ULISTFMT_ELEMENT_FIELD.
+ assertEquals((msg
+ .append(" w=").append(widthStr[width])
+ .append(" t=").append(typeStr[type])).c_str(),
+ n, elements);
+ }
+ }
+ }
+ }
+}
+
#endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/test/intltest/listformattertest.h b/icu4c/source/test/intltest/listformattertest.h
index 9c7a5dd..b3d4005 100644
--- a/icu4c/source/test/intltest/listformattertest.h
+++ b/icu4c/source/test/intltest/listformattertest.h
@@ -55,6 +55,7 @@
void TestBadStylesFail();
void TestCreateStyled();
void TestContextual();
+ void TestNextPosition();
private:
void CheckFormatting(