source/i18n/titletrn.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 *   Copyright (C) 2001, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   05/24/01    aliu        Creation.
 **********************************************************************
 */

 #include "unicode/uchar.h"
 #include "titletrn.h"
 #include "unicode/uniset.h"
 #include "mutex.h"
 #include "ucln_in.h"
 #include "unicode/ustring.h"
 #include "ustr_imp.h"
 #include "cpputils.h"

 U_NAMESPACE_BEGIN

 /**
  * ID for this transliterator.
  */
 const char TitlecaseTransliterator::_ID[] = "Any-Title";

 /**
  * The set of characters we skip.  These are neither cased nor
  * non-cased, to us; we copy them verbatim.
  */
 static UnicodeSet* SKIP = NULL;

 /**
  * The set of characters that cause the next non-SKIP character
  * to be lowercased.
  */
 static UnicodeSet* CASED = NULL;

 TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) :
     Transliterator(_ID, 0),
     loc(theLoc),
     buffer(0) {
     buffer = new UChar[u_getMaxCaseExpansion()];
     // Need to look back 2 characters in the case of "can't"
     setMaximumContextLength(2);
 }

 /**
  * Destructor.
  */
 TitlecaseTransliterator::~TitlecaseTransliterator() {
     delete [] buffer;
 }

 /**
  * Copy constructor.
  */
 TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
     Transliterator(o),
     loc(o.loc),
     buffer(0) {
     buffer = new UChar[u_getMaxCaseExpansion()];
     uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
 }

 /**
  * Assignment operator.
  */
 TitlecaseTransliterator& TitlecaseTransliterator::operator=(
                              const TitlecaseTransliterator& o) {
     Transliterator::operator=(o);
     loc = o.loc;
     uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
     return *this;
 }

 /**
  * Transliterator API.
  */
 Transliterator* TitlecaseTransliterator::clone(void) const {
     return new TitlecaseTransliterator(*this);
 }

 /**
  * Implements {@link Transliterator#handleTransliterate}.
  */
 void TitlecaseTransliterator::handleTransliterate(
                                   Replaceable& text, UTransPosition& offsets,
                                   UBool isIncremental) const {
     if (SKIP == NULL) {
         Mutex lock;
         if (SKIP == NULL) {
             UErrorCode ec = U_ZERO_ERROR;
             SKIP = new UnicodeSet(UnicodeString("[\\u00AD \\u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:]]", ""), ec);
             CASED = new UnicodeSet(UnicodeString("[[:Lu:] [:Ll:] [:Lt:]]", ""), ec);
             ucln_i18n_registerCleanup();
         }
     }

     // Our mode; we are either converting letter toTitle or
     // toLower.
     UBool doTitle = TRUE;

     // Determine if there is a preceding context of CASED SKIP*,
     // in which case we want to start in toLower mode.  If the
     // prior context is anything else (including empty) then start
     // in toTitle mode.
     int32_t start = offsets.start;
     while (start > offsets.contextStart) {
         UChar c = text.charAt(--start);
         if (SKIP->contains(c)) {
             continue;
         }
         doTitle = !CASED->contains(c);
         break;
     }

     // Convert things after a CASED character toLower; things
     // after a non-CASED, non-SKIP character toTitle.  SKIP
     // characters are copied directly and do not change the mode.
     int32_t textPos = offsets.start;
     if (textPos >= offsets.limit) return;

     // get string for context
     // TODO: add convenience method to do this, since we do it all over

     int32_t loop = 0;
     UnicodeString original;
     /* UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1]; */// get whole context
     /* Extract the characters from Replaceable */
     for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
         original.append(text.charAt(loop));
     }
     // Walk through original string
     // If there is a case change, modify corresponding position in replaceable

     int32_t i = textPos - offsets.contextStart;
     int32_t limit = offsets.limit - offsets.contextStart;
     UChar32 cp;
     int32_t oldLen;
     int32_t newLen;

     for (; i < limit; ) {
         UErrorCode status = U_ZERO_ERROR;
         int32_t s = i;
         buffer[0] = original.charAt(s);

         UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
         oldLen = UTF_CHAR_LENGTH(cp);
         i += oldLen;
         if (!SKIP->contains(cp)) {
             if (doTitle) {
                 newLen = u_internalTitleCase(cp, buffer, u_getMaxCaseExpansion(), loc.getName());
             } else {
                 u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
                 newLen = buffer[0] == original.charAt(s) ? -1 : u_strlen(buffer);
             }
             doTitle = !CASED->contains(cp);
             if (newLen >= 0) {
                 UnicodeString temp(buffer, newLen);
                 text.handleReplaceBetween(textPos, textPos + oldLen, temp);
                 if (newLen != oldLen) {
                     textPos += newLen;
                     offsets.limit += newLen - oldLen;
                     offsets.contextLimit += newLen - oldLen;
                     continue;
                 }
             }
         }
         textPos += oldLen;
     }
     offsets.start = offsets.limit;
 }

 /**
  * Static memory cleanup function.
  */
 void TitlecaseTransliterator::cleanup() {
     if (SKIP != NULL) {
         delete SKIP; SKIP = NULL;
         delete CASED; CASED = NULL;
     }
 }

 U_NAMESPACE_END
	/*
	**********************************************************************
	* Copyright (C) 2001, International Business Machines
	* Corporation and others. All Rights Reserved.
	**********************************************************************
	* Date Name Description
	* 05/24/01 aliu Creation.
	**********************************************************************
	*/

	#include "unicode/uchar.h"
	#include "titletrn.h"
	#include "unicode/uniset.h"
	#include "mutex.h"
	#include "ucln_in.h"
	#include "unicode/ustring.h"
	#include "ustr_imp.h"
	#include "cpputils.h"

	U_NAMESPACE_BEGIN

	/**
	* ID for this transliterator.
	*/
	const char TitlecaseTransliterator::_ID[] = "Any-Title";

	/**
	* The set of characters we skip. These are neither cased nor
	* non-cased, to us; we copy them verbatim.
	*/
	static UnicodeSet* SKIP = NULL;

	/**
	* The set of characters that cause the next non-SKIP character
	* to be lowercased.
	*/
	static UnicodeSet* CASED = NULL;

	TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) :
	Transliterator(_ID, 0),
	loc(theLoc),
	buffer(0) {
	buffer = new UChar[u_getMaxCaseExpansion()];
	// Need to look back 2 characters in the case of "can't"
	setMaximumContextLength(2);
	}

	/**
	* Destructor.
	*/
	TitlecaseTransliterator::~TitlecaseTransliterator() {
	delete [] buffer;
	}

	/**
	* Copy constructor.
	*/
	TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
	Transliterator(o),
	loc(o.loc),
	buffer(0) {
	buffer = new UChar[u_getMaxCaseExpansion()];
	uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
	}

	/**
	* Assignment operator.
	*/
	TitlecaseTransliterator& TitlecaseTransliterator::operator=(
	const TitlecaseTransliterator& o) {
	Transliterator::operator=(o);
	loc = o.loc;
	uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
	return *this;
	}

	/**
	* Transliterator API.
	*/
	Transliterator* TitlecaseTransliterator::clone(void) const {
	return new TitlecaseTransliterator(*this);
	}

	/**
	* Implements {@link Transliterator#handleTransliterate}.
	*/
	void TitlecaseTransliterator::handleTransliterate(
	Replaceable& text, UTransPosition& offsets,
	UBool isIncremental) const {
	if (SKIP == NULL) {
	Mutex lock;
	if (SKIP == NULL) {
	UErrorCode ec = U_ZERO_ERROR;
	SKIP = new UnicodeSet(UnicodeString("[\\u00AD \\u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:]]", ""), ec);
	CASED = new UnicodeSet(UnicodeString("[[:Lu:] [:Ll:] [:Lt:]]", ""), ec);
	ucln_i18n_registerCleanup();
	}
	}

	// Our mode; we are either converting letter toTitle or
	// toLower.
	UBool doTitle = TRUE;

	// Determine if there is a preceding context of CASED SKIP*,
	// in which case we want to start in toLower mode. If the
	// prior context is anything else (including empty) then start
	// in toTitle mode.
	int32_t start = offsets.start;
	while (start > offsets.contextStart) {
	UChar c = text.charAt(--start);
	if (SKIP->contains(c)) {
	continue;
	}
	doTitle = !CASED->contains(c);
	break;
	}

	// Convert things after a CASED character toLower; things
	// after a non-CASED, non-SKIP character toTitle. SKIP
	// characters are copied directly and do not change the mode.
	int32_t textPos = offsets.start;
	if (textPos >= offsets.limit) return;

	// get string for context
	// TODO: add convenience method to do this, since we do it all over

	int32_t loop = 0;
	UnicodeString original;
	/* UChar original = new UChar[offsets.contextLimit - offsets.contextStart+1]; /// get whole context
	/* Extract the characters from Replaceable */
	for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
	original.append(text.charAt(loop));
	}
	// Walk through original string
	// If there is a case change, modify corresponding position in replaceable

	int32_t i = textPos - offsets.contextStart;
	int32_t limit = offsets.limit - offsets.contextStart;
	UChar32 cp;
	int32_t oldLen;
	int32_t newLen;

	for (; i < limit; ) {
	UErrorCode status = U_ZERO_ERROR;
	int32_t s = i;
	buffer[0] = original.charAt(s);

	UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
	oldLen = UTF_CHAR_LENGTH(cp);
	i += oldLen;
	if (!SKIP->contains(cp)) {
	if (doTitle) {
	newLen = u_internalTitleCase(cp, buffer, u_getMaxCaseExpansion(), loc.getName());
	} else {
	u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
	newLen = buffer[0] == original.charAt(s) ? -1 : u_strlen(buffer);
	}
	doTitle = !CASED->contains(cp);
	if (newLen >= 0) {
	UnicodeString temp(buffer, newLen);
	text.handleReplaceBetween(textPos, textPos + oldLen, temp);
	if (newLen != oldLen) {
	textPos += newLen;
	offsets.limit += newLen - oldLen;
	offsets.contextLimit += newLen - oldLen;
	continue;
	}
	}
	}
	textPos += oldLen;
	}
	offsets.start = offsets.limit;
	}

	/**
	* Static memory cleanup function.
	*/
	void TitlecaseTransliterator::cleanup() {
	if (SKIP != NULL) {
	delete SKIP; SKIP = NULL;
	delete CASED; CASED = NULL;
	}
	}

	U_NAMESPACE_END