| /* | 
 | ******************************************************************************* | 
 | * | 
 | *   Copyright (C) 2003, International Business Machines | 
 | *   Corporation and others.  All Rights Reserved. | 
 | * | 
 | ******************************************************************************* | 
 | * | 
 | * File line.h | 
 | * | 
 | * Modification History: | 
 | * | 
 | *   Date        Name        Description | 
 | *   07/07/2003  weiv        Creation. | 
 | ******************************************************************************* | 
 | */ | 
 |  | 
 | // | 
 | //   class Line | 
 | // | 
 | //      Each line from the source file (containing a name, presumably) gets | 
 | //      one of these structs. | 
 | // | 
 |  | 
 | #include "strengthprobe.h" | 
 |  | 
 | StrengthProbe::StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE,  | 
 |                              UChar B0, UChar B1, UChar B2, UChar B3) : | 
 | SE(SE), | 
 | B0(B0), B1(B1), B2(B2), B3(B3), | 
 | utilFirstP(&utilFirst), utilSecondP(&utilSecond), | 
 | frenchSecondary(FALSE), | 
 | comparer(comparer), skgetter(getter) | 
 | { | 
 | } | 
 |  | 
 | int | 
 | StrengthProbe::setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3) | 
 | { | 
 |   this->B0 = B0; | 
 |   this->B1 = B1; | 
 |   this->B2 = B2; | 
 |   this-> | 
 | B3 = B3; | 
 |   return checkSanity(); | 
 | } | 
 |  | 
 | int | 
 | StrengthProbe::checkSanity()  | 
 | { | 
 |   int sanityRes; | 
 |   utilFirst.setTo(B0); | 
 |   utilSecond.setTo(B3); | 
 |   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { | 
 |     return sanityRes*10 + 3; | 
 |   } | 
 |   utilSecond.setTo(B2); | 
 |   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { | 
 |     return sanityRes*10 + 2; | 
 |   } | 
 |   utilSecond.setTo(B1); | 
 |   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { | 
 |     return sanityRes*10 + 1; | 
 |   } | 
 |   utilFirst.setTo(B3); | 
 |   utilSecond.setTo(B2); | 
 |   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { | 
 |     return sanityRes*10 + 5; | 
 |   } | 
 |   utilSecond.setTo(B1); | 
 |   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { | 
 |     return sanityRes*10 + 4; | 
 |   } | 
 |   utilFirst.setTo(B2); | 
 |   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { | 
 |     return sanityRes*10 + 6; | 
 |   } | 
 |   utilFirst.setTo(B0); | 
 |   if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { | 
 |     return 1000; | 
 |   } | 
 |   utilFirst.setTo(B1); | 
 |   if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { | 
 |     return 1001; | 
 |   } | 
 |   utilFirst.setTo(B2); | 
 |   if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { | 
 |     return 1002; | 
 |   } | 
 |   utilFirst.setTo(B3); | 
 |   if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { | 
 |     return 1003; | 
 |   } | 
 |   return 0; | 
 | } | 
 |  | 
 | UBool  | 
 | StrengthProbe::probePrefix(const Line &x, const Line &y, UChar first, UChar second) { | 
 |   utilFirst.name[0] = first; | 
 |   utilFirst.name[1] = SE; | 
 |   u_strcpy(utilFirst.name+2, x.name); | 
 |   utilFirst.name[x.len+2] = 0; | 
 |   utilFirst.len = x.len+2; | 
 |  | 
 |   utilSecond.name[0] = second; | 
 |   utilSecond.name[1] = SE; | 
 |   u_strcpy(utilSecond.name+2, y.name); | 
 |   utilSecond.name[y.len+2] = 0; | 
 |   utilSecond.len = y.len+2; | 
 |  | 
 |   if(comparer(&utilFirstP, &utilSecondP) < 0) { | 
 |     return TRUE; | 
 |   } else { | 
 |     return FALSE; | 
 |   } | 
 | } | 
 |  | 
 | UBool  | 
 | StrengthProbe::probeSuffix(const Line &x, const Line &y, UChar first, UChar second) { | 
 |   u_strcpy(utilFirst.name, x.name); | 
 |   utilFirst.name[x.len] = SE; | 
 |   utilFirst.name[x.len+1] = first; | 
 |   utilFirst.name[x.len+2] = 0; | 
 |   utilFirst.len = x.len + 2; | 
 |   u_strcpy(utilSecond.name, y.name); | 
 |   utilSecond.name[y.len] = SE; | 
 |   utilSecond.name[y.len+1] = second; | 
 |   utilSecond.name[y.len+2] = 0; | 
 |   utilSecond.len = y.len + 2; | 
 |  | 
 |   if(comparer(&utilFirstP, &utilSecondP) < 0) { | 
 |     return TRUE; | 
 |   } else { | 
 |     return FALSE; | 
 |   } | 
 | } | 
 |  | 
 | UBool  | 
 | StrengthProbe::probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second) { | 
 |   utilFirst.name[0] = first; | 
 |   u_strcpy(utilFirst.name+1, x.name); | 
 |   utilFirst.name[x.len+1] = 0; | 
 |   utilFirst.len = x.len + 1; | 
 |  | 
 |   utilSecond.name[0] = second; | 
 |   u_strcpy(utilSecond.name+1, y.name); | 
 |   utilSecond.name[y.len+1] = 0; | 
 |   utilSecond.len = y.len + 1; | 
 |  | 
 |   if(comparer(&utilFirstP, &utilSecondP) < 0) { | 
 |     return TRUE; | 
 |   } else { | 
 |     return FALSE; | 
 |   } | 
 | } | 
 |  | 
 | UBool  | 
 | StrengthProbe::probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second) { | 
 |   u_strcpy(utilFirst.name, x.name); | 
 |   utilFirst.name[x.len] = first; | 
 |   utilFirst.name[x.len+1] = 0; | 
 |   utilFirst.len = x.len + 1; | 
 |   u_strcpy(utilSecond.name, y.name); | 
 |   utilSecond.name[y.len] = second; | 
 |   utilSecond.name[y.len+1] = 0; | 
 |   utilSecond.len = y.len + 1; | 
 |  | 
 |   if(comparer(&utilFirstP, &utilSecondP) < 0) { | 
 |     return TRUE; | 
 |   } else { | 
 |     return FALSE; | 
 |   } | 
 | } | 
 |  | 
 | UColAttributeValue  | 
 | StrengthProbe::getStrength(const Line &x, const Line &y) { | 
 |   const Line *xp = &x; | 
 |   const Line *yp = &y; | 
 |  | 
 |   Line empty; | 
 |   Line *emptyP = ∅ | 
 |   if(comparer(&emptyP, &xp) == 0) { | 
 |     return distanceFromEmptyString(y); | 
 |   } | 
 |  | 
 |   int32_t result = comparer(&xp, &yp); | 
 |  | 
 |   if(result == 0) { | 
 |     return UCOL_IDENTICAL; | 
 |   } else if(result > 0) { | 
 |     return UCOL_OFF; // bad situation | 
 |   } else { // we need to probe strength | 
 |     if(probeSuffix(x, y, B1, B0)) { | 
 |     //if(probePrefix(x, y, B2, B0)) { // swamps secondary difference | 
 |       return UCOL_PRIMARY; | 
 |     } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference | 
 |       return UCOL_SECONDARY; | 
 |     } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference | 
 |       return UCOL_TERTIARY; | 
 |     } else if(!probePrefix(x, y, B3, B0)) { | 
 |       return UCOL_QUATERNARY; | 
 |     } | 
 |     /* | 
 |     //if(probeSuffix(x, y, B1, B0)) { | 
 |     if(probePrefix(x, y, B2, B0)) { // swamps secondary difference | 
 |       return UCOL_PRIMARY; | 
 |     } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference | 
 |       return UCOL_SECONDARY; | 
 |     } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference | 
 |       return UCOL_TERTIARY; | 
 |     } else if(!probePrefix(x, y, B3, B0)) { | 
 |       return UCOL_QUATERNARY; | 
 |     } | 
 |     */ | 
 |   } | 
 |   return UCOL_OFF; // bad | 
 | } | 
 |  | 
 | UColAttributeValue  | 
 | StrengthProbe::getStrength(const UnicodeString &sx, const UnicodeString &sy) { | 
 |   Line x(sx); | 
 |   Line y(sy); | 
 |   return getStrength(x, y); | 
 | } | 
 |  | 
 | int32_t  | 
 | StrengthProbe::compare(const UnicodeString &sx, const UnicodeString &sy) { | 
 |   Line x(sx); | 
 |   Line y(sy); | 
 |   const Line *xp = &x; | 
 |   const Line *yp = &y; | 
 |   return comparer(&xp, &yp); | 
 | } | 
 |  | 
 | int32_t  | 
 | StrengthProbe::compare(const Line &x, const Line &y) { | 
 |   const Line *xp = &x; | 
 |   const Line *yp = &y; | 
 |   return comparer(&xp, &yp); | 
 | } | 
 |  | 
 | UColAttributeValue  | 
 | StrengthProbe::distanceFromEmptyString(const Line &x) { | 
 |   if(x.name[0] == 0x30D) { | 
 |     int32_t putBreakPointHere = 0; | 
 |   } | 
 |   Line empty; | 
 |   Line *emptyP = ∅ | 
 |   uint8_t buff[256]; | 
 |   getSortKey(empty.name, empty.len, buff, 256); | 
 |   Line B0Line(B0); | 
 |   Line *B0LineP = &B0Line; | 
 |   const Line *xp = &x; | 
 |   int32_t result = comparer(&emptyP, &xp); | 
 |   if(result == 0) { | 
 |     return UCOL_IDENTICAL; | 
 |   } else if(result > 0) { | 
 |     return UCOL_OFF; | 
 |   } | 
 |   result = comparer(&B0LineP, &xp); | 
 |   if(result <= 0) { | 
 |     return UCOL_PRIMARY; | 
 |   } | 
 |   Line sexb0(SE); | 
 |   sexb0.append(x.name, x.len); | 
 |   sexb0.append(B0); | 
 |  | 
 |   Line seb0(SE); | 
 |   seb0.append(B0); | 
 |   uint8_t seb0K[256]; | 
 |   uint8_t sexb0K[256]; | 
 |   uint8_t seb2K[256]; | 
 |   uint8_t seb3K[256]; | 
 |   memset(seb0K, 0, 256); | 
 |   memset(sexb0K, 0, 256); | 
 |   memset(seb2K, 0, 256); | 
 |   memset(seb3K, 0, 256); | 
 |  | 
 |   getSortKey(seb0, seb0K, 256); | 
 |   getSortKey(sexb0, sexb0K, 256); | 
 |  | 
 |   if(compare(seb0, sexb0) <= 0) { | 
 |     Line seb2(SE); | 
 |     seb2.append(B2); | 
 |     getSortKey(seb2, seb2K, 256); | 
 |     result = compare(seb2, sexb0); | 
 |     if((result <= 0 && !frenchSecondary) || (result >= 0 && frenchSecondary)) { // swamps tertiary difference | 
 |       return UCOL_SECONDARY; | 
 |     } | 
 |     Line seb3(SE); | 
 |     seb3.append(B3); | 
 |     getSortKey(seb3, seb3K, 256); | 
 |     if(compare(seb3, sexb0) < 0) { | 
 |       return UCOL_TERTIARY; | 
 |     } | 
 |     return UCOL_QUATERNARY; | 
 |   } else { | 
 |     // if this was UCA, we would have a primary difference. | 
 |     // however, this might not be so, since not everybody  | 
 |     // makes well formed CEs. | 
 |     // in cs_CZ on linux, space is tertiary ignorable, but | 
 |     // its quaternary level strength is lower than quad  | 
 |     // strengths for non-ignorables. oh well, more testing | 
 |     // required | 
 |     // I think that we can only have quaternary difference | 
 |     // here (in addition to primary difference). | 
 |     //if(!probePrefix(x, empty, B3, B0)) { | 
 |       //return UCOL_QUATERNARY; | 
 |     //} else { | 
 |       return UCOL_PRIMARY; | 
 |     //} | 
 |   } | 
 | } | 
 |  | 
 | UColAttributeValue  | 
 | StrengthProbe::distanceFromEmptyString(const UnicodeString &x) { | 
 |   const Line xp(x); | 
 |   return distanceFromEmptyString(xp); | 
 | } | 
 |  | 
 |  | 
 | UColAttributeValue  | 
 | StrengthProbe::getPrefixedStrength(const Line &prefix, const Line &x, const Line &y) { | 
 |   contractionUtilFirst.setToConcat(&prefix, &x); | 
 |   contractionUtilSecond.setToConcat(&prefix, &y); | 
 |   return getStrength(contractionUtilFirst, contractionUtilSecond); | 
 | } | 
 |  | 
 |  | 
 | StrengthProbe::StrengthProbe(const StrengthProbe &that) { | 
 |   *this = that; | 
 | } | 
 |  | 
 | StrengthProbe & | 
 | StrengthProbe::operator=(const StrengthProbe &that) { | 
 |   if(this != &that) { | 
 |     B0 = that.B0; | 
 |     B1 = that.B1; | 
 |     B2 = that.B2; | 
 |     B3 = that.B3; | 
 |     SE = that.SE; | 
 |     frenchSecondary = that.frenchSecondary; | 
 |     comparer = that.comparer; | 
 |     skgetter = that.skgetter; | 
 |  | 
 |     utilFirstP = &utilFirst; | 
 |     utilSecondP = &utilSecond; | 
 |   } | 
 |  | 
 |   return *this; | 
 | } | 
 |  | 
 | UBool | 
 | StrengthProbe::isFrenchSecondary(UErrorCode &status) { | 
 |   utilFirst.setTo(B0); | 
 |   utilFirst.append(SE); | 
 |   utilFirst.append(B2); | 
 |   utilSecond.setTo(B2); | 
 |   utilSecond.append(SE); | 
 |   utilSecond.append(B0); | 
 |  | 
 |   int32_t result = compare(utilFirst, utilSecond); | 
 |  | 
 |   if(result < 0) { | 
 |     return FALSE; | 
 |   } else if(result > 0) { | 
 |     frenchSecondary = TRUE; | 
 |     return TRUE; | 
 |   } else { | 
 |     status = U_INTERNAL_PROGRAM_ERROR; | 
 |     return FALSE; | 
 |   } | 
 | } | 
 |  | 
 | UBool | 
 | StrengthProbe::isUpperFirst(UErrorCode &status) { | 
 |   UChar i = 0; | 
 |   int32_t result = 0; | 
 |   int32_t upper = 0, lower = 0, equal = 0; | 
 |   for(i = 0x41; i < 0x5B; i++) { | 
 |     utilFirst.setTo(i); | 
 |     utilSecond.setTo(i+0x20); | 
 |     result = compare(utilFirst, utilSecond); | 
 |     if(result < 0) { | 
 |       upper++; | 
 |     } else if(result > 0) { | 
 |       lower++; | 
 |     } else { | 
 |       equal++; | 
 |     } | 
 |   } | 
 |    | 
 |   if(lower == 0 && equal == 0) { | 
 |     return TRUE; | 
 |   } | 
 |   if(upper == 0 && equal == 0) { | 
 |     return FALSE; | 
 |   } | 
 |   status = U_INTERNAL_PROGRAM_ERROR; | 
 |   return FALSE; | 
 | } | 
 |  |