| /* |
| ********************************************************************** |
| * Copyright (C) 2001, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * Date Name Description |
| * 05/24/01 aliu Creation. |
| ********************************************************************** |
| */ |
| |
| #include "unicode/uchar.h" |
| #include "unicode/uniset.h" |
| #include "unicode/ustring.h" |
| #include "titletrn.h" |
| #include "mutex.h" |
| #include "ucln_in.h" |
| #include "ustr_imp.h" |
| #include "cpputils.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| const char TitlecaseTransliterator::fgClassID=0; |
| |
| /** |
| * ID for this transliterator. |
| */ |
| const char TitlecaseTransliterator::_ID[] = "Any-Title"; |
| |
| /** |
| * Mutex for statics IN THIS FILE |
| */ |
| static UMTX MUTEX = 0; |
| |
| /** |
| * The set of characters we skip. These are neither cased nor |
| * non-cased, to us; we copy them verbatim. |
| */ |
| static UnicodeSet* SKIP = NULL; |
| |
| /** |
| * The set of characters that cause the next non-SKIP character |
| * to be lowercased. |
| */ |
| static UnicodeSet* CASED = NULL; |
| |
| TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) : |
| Transliterator(_ID, 0), |
| loc(theLoc), |
| buffer(0) |
| { |
| buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0])); |
| // Need to look back 2 characters in the case of "can't" |
| setMaximumContextLength(2); |
| } |
| |
| /** |
| * Destructor. |
| */ |
| TitlecaseTransliterator::~TitlecaseTransliterator() { |
| uprv_free(buffer); |
| } |
| |
| /** |
| * Copy constructor. |
| */ |
| TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) : |
| Transliterator(o), |
| loc(o.loc), |
| buffer(0) |
| { |
| buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0])); |
| uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion()); |
| } |
| |
| /** |
| * Assignment operator. |
| */ |
| TitlecaseTransliterator& TitlecaseTransliterator::operator=( |
| const TitlecaseTransliterator& o) { |
| Transliterator::operator=(o); |
| loc = o.loc; |
| uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion()); |
| return *this; |
| } |
| |
| /** |
| * Transliterator API. |
| */ |
| Transliterator* TitlecaseTransliterator::clone(void) const { |
| return new TitlecaseTransliterator(*this); |
| } |
| |
| /** |
| * Implements {@link Transliterator#handleTransliterate}. |
| */ |
| void TitlecaseTransliterator::handleTransliterate( |
| Replaceable& text, UTransPosition& offsets, |
| UBool isIncremental) const { |
| if (SKIP == NULL) { |
| Mutex lock(&MUTEX); |
| if (SKIP == NULL) { |
| UErrorCode ec = U_ZERO_ERROR; |
| SKIP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00AD \\u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]"), ec); |
| CASED = new UnicodeSet(UNICODE_STRING_SIMPLE("[[:Lu:] [:Ll:] [:Lt:]]"), ec); |
| ucln_i18n_registerCleanup(); |
| } |
| } |
| |
| // Our mode; we are either converting letter toTitle or |
| // toLower. |
| UBool doTitle = TRUE; |
| |
| // Determine if there is a preceding context of CASED SKIP*, |
| // in which case we want to start in toLower mode. If the |
| // prior context is anything else (including empty) then start |
| // in toTitle mode. |
| UChar32 c; |
| int32_t start; |
| for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF_CHAR_LENGTH(c)) { |
| c = text.char32At(start); |
| if (SKIP->contains(c)) { |
| continue; |
| } |
| doTitle = !CASED->contains(c); |
| break; |
| } |
| |
| // Convert things after a CASED character toLower; things |
| // after a non-CASED, non-SKIP character toTitle. SKIP |
| // characters are copied directly and do not change the mode. |
| int32_t textPos = offsets.start; |
| if (textPos >= offsets.limit) return; |
| |
| UnicodeString original; |
| text.extractBetween(offsets.contextStart, offsets.contextLimit, original); |
| |
| UCharIterator iter; |
| uiter_setReplaceable(&iter, &text); |
| iter.start = offsets.contextStart; |
| iter.limit = offsets.contextLimit; |
| |
| // Walk through original string |
| // If there is a case change, modify corresponding position in replaceable |
| |
| int32_t i = textPos - offsets.contextStart; |
| int32_t limit = offsets.limit - offsets.contextStart; |
| UChar32 cp; |
| int32_t oldLen; |
| int32_t newLen; |
| |
| for (; i < limit; ) { |
| UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp); |
| oldLen = UTF_CHAR_LENGTH(cp); |
| i += oldLen; |
| iter.index = i; // Point _past_ current char |
| if (!SKIP->contains(cp)) { |
| if (doTitle) { |
| newLen = u_internalToTitle(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName()); |
| } else { |
| newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName()); |
| } |
| doTitle = !CASED->contains(cp); |
| if (newLen >= 0) { |
| UnicodeString temp(buffer, newLen); |
| text.handleReplaceBetween(textPos, textPos + oldLen, temp); |
| if (newLen != oldLen) { |
| textPos += newLen; |
| offsets.limit += newLen - oldLen; |
| offsets.contextLimit += newLen - oldLen; |
| continue; |
| } |
| } |
| } |
| textPos += oldLen; |
| } |
| offsets.start = offsets.limit; |
| } |
| |
| /** |
| * Static memory cleanup function. |
| */ |
| void TitlecaseTransliterator::cleanup() { |
| if (SKIP != NULL) { |
| delete SKIP; SKIP = NULL; |
| delete CASED; CASED = NULL; |
| umtx_destroy(&MUTEX); |
| } |
| } |
| |
| U_NAMESPACE_END |
| |