| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * Copyright (C) 2009-2015, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ******************************************************************************* |
| * |
| * File PLURFMT.CPP |
| ******************************************************************************* |
| */ |
| |
| #include "unicode/decimfmt.h" |
| #include "unicode/messagepattern.h" |
| #include "unicode/plurfmt.h" |
| #include "unicode/plurrule.h" |
| #include "unicode/utypes.h" |
| #include "cmemory.h" |
| #include "messageimpl.h" |
| #include "nfrule.h" |
| #include "plurrule_impl.h" |
| #include "uassert.h" |
| #include "uhash.h" |
| #include "number_decimalquantity.h" |
| #include "number_utils.h" |
| #include "number_utypes.h" |
| |
| #if !UCONFIG_NO_FORMATTING |
| |
| U_NAMESPACE_BEGIN |
| |
| using number::impl::DecimalQuantity; |
| |
| static const UChar OTHER_STRING[] = { |
| 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" |
| }; |
| |
| UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat) |
| |
| PluralFormat::PluralFormat(UErrorCode& status) |
| : locale(Locale::getDefault()), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(NULL, UPLURAL_TYPE_CARDINAL, status); |
| } |
| |
| PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) |
| : locale(loc), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(NULL, UPLURAL_TYPE_CARDINAL, status); |
| } |
| |
| PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) |
| : locale(Locale::getDefault()), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(&rules, UPLURAL_TYPE_COUNT, status); |
| } |
| |
| PluralFormat::PluralFormat(const Locale& loc, |
| const PluralRules& rules, |
| UErrorCode& status) |
| : locale(loc), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(&rules, UPLURAL_TYPE_COUNT, status); |
| } |
| |
| PluralFormat::PluralFormat(const Locale& loc, |
| UPluralType type, |
| UErrorCode& status) |
| : locale(loc), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(NULL, type, status); |
| } |
| |
| PluralFormat::PluralFormat(const UnicodeString& pat, |
| UErrorCode& status) |
| : locale(Locale::getDefault()), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(NULL, UPLURAL_TYPE_CARDINAL, status); |
| applyPattern(pat, status); |
| } |
| |
| PluralFormat::PluralFormat(const Locale& loc, |
| const UnicodeString& pat, |
| UErrorCode& status) |
| : locale(loc), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(NULL, UPLURAL_TYPE_CARDINAL, status); |
| applyPattern(pat, status); |
| } |
| |
| PluralFormat::PluralFormat(const PluralRules& rules, |
| const UnicodeString& pat, |
| UErrorCode& status) |
| : locale(Locale::getDefault()), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(&rules, UPLURAL_TYPE_COUNT, status); |
| applyPattern(pat, status); |
| } |
| |
| PluralFormat::PluralFormat(const Locale& loc, |
| const PluralRules& rules, |
| const UnicodeString& pat, |
| UErrorCode& status) |
| : locale(loc), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(&rules, UPLURAL_TYPE_COUNT, status); |
| applyPattern(pat, status); |
| } |
| |
| PluralFormat::PluralFormat(const Locale& loc, |
| UPluralType type, |
| const UnicodeString& pat, |
| UErrorCode& status) |
| : locale(loc), |
| msgPattern(status), |
| numberFormat(NULL), |
| offset(0) { |
| init(NULL, type, status); |
| applyPattern(pat, status); |
| } |
| |
| PluralFormat::PluralFormat(const PluralFormat& other) |
| : Format(other), |
| locale(other.locale), |
| msgPattern(other.msgPattern), |
| numberFormat(NULL), |
| offset(other.offset) { |
| copyObjects(other); |
| } |
| |
| void |
| PluralFormat::copyObjects(const PluralFormat& other) { |
| UErrorCode status = U_ZERO_ERROR; |
| if (numberFormat != NULL) { |
| delete numberFormat; |
| } |
| if (pluralRulesWrapper.pluralRules != NULL) { |
| delete pluralRulesWrapper.pluralRules; |
| } |
| |
| if (other.numberFormat == NULL) { |
| numberFormat = NumberFormat::createInstance(locale, status); |
| } else { |
| numberFormat = other.numberFormat->clone(); |
| } |
| if (other.pluralRulesWrapper.pluralRules == NULL) { |
| pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status); |
| } else { |
| pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone(); |
| } |
| } |
| |
| |
| PluralFormat::~PluralFormat() { |
| delete numberFormat; |
| } |
| |
| void |
| PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) { |
| if (U_FAILURE(status)) { |
| return; |
| } |
| |
| if (rules==NULL) { |
| pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status); |
| } else { |
| pluralRulesWrapper.pluralRules = rules->clone(); |
| if (pluralRulesWrapper.pluralRules == NULL) { |
| status = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| } |
| |
| numberFormat= NumberFormat::createInstance(locale, status); |
| } |
| |
| void |
| PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { |
| msgPattern.parsePluralStyle(newPattern, NULL, status); |
| if (U_FAILURE(status)) { |
| msgPattern.clear(); |
| offset = 0; |
| return; |
| } |
| offset = msgPattern.getPluralOffset(0); |
| } |
| |
| UnicodeString& |
| PluralFormat::format(const Formattable& obj, |
| UnicodeString& appendTo, |
| FieldPosition& pos, |
| UErrorCode& status) const |
| { |
| if (U_FAILURE(status)) return appendTo; |
| |
| if (obj.isNumeric()) { |
| return format(obj, obj.getDouble(), appendTo, pos, status); |
| } else { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| return appendTo; |
| } |
| } |
| |
| UnicodeString |
| PluralFormat::format(int32_t number, UErrorCode& status) const { |
| FieldPosition fpos(FieldPosition::DONT_CARE); |
| UnicodeString result; |
| return format(Formattable(number), number, result, fpos, status); |
| } |
| |
| UnicodeString |
| PluralFormat::format(double number, UErrorCode& status) const { |
| FieldPosition fpos(FieldPosition::DONT_CARE); |
| UnicodeString result; |
| return format(Formattable(number), number, result, fpos, status); |
| } |
| |
| |
| UnicodeString& |
| PluralFormat::format(int32_t number, |
| UnicodeString& appendTo, |
| FieldPosition& pos, |
| UErrorCode& status) const { |
| return format(Formattable(number), (double)number, appendTo, pos, status); |
| } |
| |
| UnicodeString& |
| PluralFormat::format(double number, |
| UnicodeString& appendTo, |
| FieldPosition& pos, |
| UErrorCode& status) const { |
| return format(Formattable(number), (double)number, appendTo, pos, status); |
| } |
| |
| UnicodeString& |
| PluralFormat::format(const Formattable& numberObject, double number, |
| UnicodeString& appendTo, |
| FieldPosition& pos, |
| UErrorCode& status) const { |
| if (U_FAILURE(status)) { |
| return appendTo; |
| } |
| if (msgPattern.countParts() == 0) { |
| return numberFormat->format(numberObject, appendTo, pos, status); |
| } |
| |
| // Get the appropriate sub-message. |
| // Select it based on the formatted number-offset. |
| double numberMinusOffset = number - offset; |
| // Call NumberFormatter to get both the DecimalQuantity and the string. |
| // This call site needs to use more internal APIs than the Java equivalent. |
| number::impl::UFormattedNumberData data; |
| if (offset == 0) { |
| // could be BigDecimal etc. |
| numberObject.populateDecimalQuantity(data.quantity, status); |
| } else { |
| data.quantity.setToDouble(numberMinusOffset); |
| } |
| UnicodeString numberString; |
| auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); |
| if(decFmt != nullptr) { |
| const number::LocalizedNumberFormatter* lnf = decFmt->toNumberFormatter(status); |
| if (U_FAILURE(status)) { |
| return appendTo; |
| } |
| lnf->formatImpl(&data, status); // mutates &data |
| if (U_FAILURE(status)) { |
| return appendTo; |
| } |
| numberString = data.getStringRef().toUnicodeString(); |
| } else { |
| if (offset == 0) { |
| numberFormat->format(numberObject, numberString, status); |
| } else { |
| numberFormat->format(numberMinusOffset, numberString, status); |
| } |
| } |
| |
| int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status); |
| if (U_FAILURE(status)) { return appendTo; } |
| // Replace syntactic # signs in the top level of this sub-message |
| // (not in nested arguments) with the formatted number-offset. |
| const UnicodeString& pattern = msgPattern.getPatternString(); |
| int32_t prevIndex = msgPattern.getPart(partIndex).getLimit(); |
| for (;;) { |
| const MessagePattern::Part& part = msgPattern.getPart(++partIndex); |
| const UMessagePatternPartType type = part.getType(); |
| int32_t index = part.getIndex(); |
| if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { |
| return appendTo.append(pattern, prevIndex, index - prevIndex); |
| } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) || |
| (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) { |
| appendTo.append(pattern, prevIndex, index - prevIndex); |
| if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { |
| appendTo.append(numberString); |
| } |
| prevIndex = part.getLimit(); |
| } else if (type == UMSGPAT_PART_TYPE_ARG_START) { |
| appendTo.append(pattern, prevIndex, index - prevIndex); |
| prevIndex = index; |
| partIndex = msgPattern.getLimitPartIndex(partIndex); |
| index = msgPattern.getPart(partIndex).getLimit(); |
| MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo); |
| prevIndex = index; |
| } |
| } |
| } |
| |
| UnicodeString& |
| PluralFormat::toPattern(UnicodeString& appendTo) { |
| if (0 == msgPattern.countParts()) { |
| appendTo.setToBogus(); |
| } else { |
| appendTo.append(msgPattern.getPatternString()); |
| } |
| return appendTo; |
| } |
| |
| void |
| PluralFormat::setLocale(const Locale& loc, UErrorCode& status) { |
| if (U_FAILURE(status)) { |
| return; |
| } |
| locale = loc; |
| msgPattern.clear(); |
| delete numberFormat; |
| offset = 0; |
| numberFormat = NULL; |
| pluralRulesWrapper.reset(); |
| init(NULL, UPLURAL_TYPE_CARDINAL, status); |
| } |
| |
| void |
| PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { |
| if (U_FAILURE(status)) { |
| return; |
| } |
| NumberFormat* nf = format->clone(); |
| if (nf != NULL) { |
| delete numberFormat; |
| numberFormat = nf; |
| } else { |
| status = U_MEMORY_ALLOCATION_ERROR; |
| } |
| } |
| |
| PluralFormat* |
| PluralFormat::clone() const |
| { |
| return new PluralFormat(*this); |
| } |
| |
| |
| PluralFormat& |
| PluralFormat::operator=(const PluralFormat& other) { |
| if (this != &other) { |
| locale = other.locale; |
| msgPattern = other.msgPattern; |
| offset = other.offset; |
| copyObjects(other); |
| } |
| |
| return *this; |
| } |
| |
| UBool |
| PluralFormat::operator==(const Format& other) const { |
| if (this == &other) { |
| return TRUE; |
| } |
| if (!Format::operator==(other)) { |
| return FALSE; |
| } |
| const PluralFormat& o = (const PluralFormat&)other; |
| return |
| locale == o.locale && |
| msgPattern == o.msgPattern && // implies same offset |
| (numberFormat == NULL) == (o.numberFormat == NULL) && |
| (numberFormat == NULL || *numberFormat == *o.numberFormat) && |
| (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) && |
| (pluralRulesWrapper.pluralRules == NULL || |
| *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); |
| } |
| |
| UBool |
| PluralFormat::operator!=(const Format& other) const { |
| return !operator==(other); |
| } |
| |
| void |
| PluralFormat::parseObject(const UnicodeString& /*source*/, |
| Formattable& /*result*/, |
| ParsePosition& pos) const |
| { |
| // Parsing not supported. |
| pos.setErrorIndex(pos.getIndex()); |
| } |
| |
| int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex, |
| const PluralSelector& selector, void *context, |
| double number, UErrorCode& ec) { |
| if (U_FAILURE(ec)) { |
| return 0; |
| } |
| int32_t count=pattern.countParts(); |
| double offset; |
| const MessagePattern::Part* part=&pattern.getPart(partIndex); |
| if (MessagePattern::Part::hasNumericValue(part->getType())) { |
| offset=pattern.getNumericValue(*part); |
| ++partIndex; |
| } else { |
| offset=0; |
| } |
| // The keyword is empty until we need to match against a non-explicit, not-"other" value. |
| // Then we get the keyword from the selector. |
| // (In other words, we never call the selector if we match against an explicit value, |
| // or if the only non-explicit keyword is "other".) |
| UnicodeString keyword; |
| UnicodeString other(FALSE, OTHER_STRING, 5); |
| // When we find a match, we set msgStart>0 and also set this boolean to true |
| // to avoid matching the keyword again (duplicates are allowed) |
| // while we continue to look for an explicit-value match. |
| UBool haveKeywordMatch=FALSE; |
| // msgStart is 0 until we find any appropriate sub-message. |
| // We remember the first "other" sub-message if we have not seen any |
| // appropriate sub-message before. |
| // We remember the first matching-keyword sub-message if we have not seen |
| // one of those before. |
| // (The parser allows [does not check for] duplicate keywords. |
| // We just have to make sure to take the first one.) |
| // We avoid matching the keyword twice by also setting haveKeywordMatch=true |
| // at the first keyword match. |
| // We keep going until we find an explicit-value match or reach the end of the plural style. |
| int32_t msgStart=0; |
| // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples |
| // until ARG_LIMIT or end of plural-only pattern. |
| do { |
| part=&pattern.getPart(partIndex++); |
| const UMessagePatternPartType type = part->getType(); |
| if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { |
| break; |
| } |
| U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR); |
| // part is an ARG_SELECTOR followed by an optional explicit value, and then a message |
| if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) { |
| // explicit value like "=2" |
| part=&pattern.getPart(partIndex++); |
| if(number==pattern.getNumericValue(*part)) { |
| // matches explicit value |
| return partIndex; |
| } |
| } else if(!haveKeywordMatch) { |
| // plural keyword like "few" or "other" |
| // Compare "other" first and call the selector if this is not "other". |
| if(pattern.partSubstringMatches(*part, other)) { |
| if(msgStart==0) { |
| msgStart=partIndex; |
| if(0 == keyword.compare(other)) { |
| // This is the first "other" sub-message, |
| // and the selected keyword is also "other". |
| // Do not match "other" again. |
| haveKeywordMatch=TRUE; |
| } |
| } |
| } else { |
| if(keyword.isEmpty()) { |
| keyword=selector.select(context, number-offset, ec); |
| if(msgStart!=0 && (0 == keyword.compare(other))) { |
| // We have already seen an "other" sub-message. |
| // Do not match "other" again. |
| haveKeywordMatch=TRUE; |
| // Skip keyword matching but do getLimitPartIndex(). |
| } |
| } |
| if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) { |
| // keyword matches |
| msgStart=partIndex; |
| // Do not match this keyword again. |
| haveKeywordMatch=TRUE; |
| } |
| } |
| } |
| partIndex=pattern.getLimitPartIndex(partIndex); |
| } while(++partIndex<count); |
| return msgStart; |
| } |
| |
| void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const { |
| // If no pattern was applied, return null. |
| if (msgPattern.countParts() == 0) { |
| pos.setBeginIndex(-1); |
| pos.setEndIndex(-1); |
| return; |
| } |
| int partIndex = 0; |
| int currMatchIndex; |
| int count=msgPattern.countParts(); |
| int startingAt = pos.getBeginIndex(); |
| if (startingAt < 0) { |
| startingAt = 0; |
| } |
| |
| // The keyword is null until we need to match against a non-explicit, not-"other" value. |
| // Then we get the keyword from the selector. |
| // (In other words, we never call the selector if we match against an explicit value, |
| // or if the only non-explicit keyword is "other".) |
| UnicodeString keyword; |
| UnicodeString matchedWord; |
| const UnicodeString& pattern = msgPattern.getPatternString(); |
| int matchedIndex = -1; |
| // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples |
| // until the end of the plural-only pattern. |
| while (partIndex < count) { |
| const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++); |
| if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) { |
| // Bad format |
| continue; |
| } |
| |
| const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++); |
| if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) { |
| // Bad format |
| continue; |
| } |
| |
| const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++); |
| if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) { |
| // Bad format |
| continue; |
| } |
| |
| UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); |
| if (rbnfLenientScanner != NULL) { |
| // Check if non-lenient rule finds the text before call lenient parsing |
| int32_t tempIndex = source.indexOf(currArg, startingAt); |
| if (tempIndex >= 0) { |
| currMatchIndex = tempIndex; |
| } else { |
| // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. |
| int32_t length = -1; |
| currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); |
| } |
| } |
| else { |
| currMatchIndex = source.indexOf(currArg, startingAt); |
| } |
| if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) { |
| matchedIndex = currMatchIndex; |
| matchedWord = currArg; |
| keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); |
| } |
| } |
| if (matchedIndex >= 0) { |
| pos.setBeginIndex(matchedIndex); |
| pos.setEndIndex(matchedIndex + matchedWord.length()); |
| result.setString(keyword); |
| return; |
| } |
| |
| // Not found! |
| pos.setBeginIndex(-1); |
| pos.setEndIndex(-1); |
| } |
| |
| PluralFormat::PluralSelector::~PluralSelector() {} |
| |
| PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { |
| delete pluralRules; |
| } |
| |
| UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number, |
| UErrorCode& /*ec*/) const { |
| (void)number; // unused except in the assertion |
| IFixedDecimal *dec=static_cast<IFixedDecimal *>(context); |
| return pluralRules->select(*dec); |
| } |
| |
| void PluralFormat::PluralSelectorAdapter::reset() { |
| delete pluralRules; |
| pluralRules = NULL; |
| } |
| |
| |
| U_NAMESPACE_END |
| |
| |
| #endif /* #if !UCONFIG_NO_FORMATTING */ |
| |
| //eof |