| // © 2017 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 2003, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * |
| * File line.h |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 07/07/2003 weiv Creation. |
| ******************************************************************************* |
| */ |
| |
| // |
| // class Line |
| // |
| // Each line from the source file (containing a name, presumably) gets |
| // one of these structs. |
| // |
| |
| #include "strengthprobe.h" |
| |
| StrengthProbe::StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE, |
| UChar B0, UChar B1, UChar B2, UChar B3) : |
| SE(SE), |
| B0(B0), B1(B1), B2(B2), B3(B3), |
| utilFirstP(&utilFirst), utilSecondP(&utilSecond), |
| frenchSecondary(FALSE), |
| comparer(comparer), skgetter(getter) |
| { |
| } |
| |
| int |
| StrengthProbe::setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3) |
| { |
| this->B0 = B0; |
| this->B1 = B1; |
| this->B2 = B2; |
| this-> |
| B3 = B3; |
| return checkSanity(); |
| } |
| |
| int |
| StrengthProbe::checkSanity() |
| { |
| int sanityRes; |
| utilFirst.setTo(B0); |
| utilSecond.setTo(B3); |
| if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { |
| return sanityRes*10 + 3; |
| } |
| utilSecond.setTo(B2); |
| if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { |
| return sanityRes*10 + 2; |
| } |
| utilSecond.setTo(B1); |
| if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { |
| return sanityRes*10 + 1; |
| } |
| utilFirst.setTo(B3); |
| utilSecond.setTo(B2); |
| if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { |
| return sanityRes*10 + 5; |
| } |
| utilSecond.setTo(B1); |
| if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { |
| return sanityRes*10 + 4; |
| } |
| utilFirst.setTo(B2); |
| if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { |
| return sanityRes*10 + 6; |
| } |
| utilFirst.setTo(B0); |
| if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { |
| return 1000; |
| } |
| utilFirst.setTo(B1); |
| if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { |
| return 1001; |
| } |
| utilFirst.setTo(B2); |
| if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { |
| return 1002; |
| } |
| utilFirst.setTo(B3); |
| if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { |
| return 1003; |
| } |
| return 0; |
| } |
| |
| UBool |
| StrengthProbe::probePrefix(const Line &x, const Line &y, UChar first, UChar second) { |
| utilFirst.name[0] = first; |
| utilFirst.name[1] = SE; |
| u_strcpy(utilFirst.name+2, x.name); |
| utilFirst.name[x.len+2] = 0; |
| utilFirst.len = x.len+2; |
| |
| utilSecond.name[0] = second; |
| utilSecond.name[1] = SE; |
| u_strcpy(utilSecond.name+2, y.name); |
| utilSecond.name[y.len+2] = 0; |
| utilSecond.len = y.len+2; |
| |
| if(comparer(&utilFirstP, &utilSecondP) < 0) { |
| return TRUE; |
| } else { |
| return FALSE; |
| } |
| } |
| |
| UBool |
| StrengthProbe::probeSuffix(const Line &x, const Line &y, UChar first, UChar second) { |
| u_strcpy(utilFirst.name, x.name); |
| utilFirst.name[x.len] = SE; |
| utilFirst.name[x.len+1] = first; |
| utilFirst.name[x.len+2] = 0; |
| utilFirst.len = x.len + 2; |
| u_strcpy(utilSecond.name, y.name); |
| utilSecond.name[y.len] = SE; |
| utilSecond.name[y.len+1] = second; |
| utilSecond.name[y.len+2] = 0; |
| utilSecond.len = y.len + 2; |
| |
| if(comparer(&utilFirstP, &utilSecondP) < 0) { |
| return TRUE; |
| } else { |
| return FALSE; |
| } |
| } |
| |
| UBool |
| StrengthProbe::probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second) { |
| utilFirst.name[0] = first; |
| u_strcpy(utilFirst.name+1, x.name); |
| utilFirst.name[x.len+1] = 0; |
| utilFirst.len = x.len + 1; |
| |
| utilSecond.name[0] = second; |
| u_strcpy(utilSecond.name+1, y.name); |
| utilSecond.name[y.len+1] = 0; |
| utilSecond.len = y.len + 1; |
| |
| if(comparer(&utilFirstP, &utilSecondP) < 0) { |
| return TRUE; |
| } else { |
| return FALSE; |
| } |
| } |
| |
| UBool |
| StrengthProbe::probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second) { |
| u_strcpy(utilFirst.name, x.name); |
| utilFirst.name[x.len] = first; |
| utilFirst.name[x.len+1] = 0; |
| utilFirst.len = x.len + 1; |
| u_strcpy(utilSecond.name, y.name); |
| utilSecond.name[y.len] = second; |
| utilSecond.name[y.len+1] = 0; |
| utilSecond.len = y.len + 1; |
| |
| if(comparer(&utilFirstP, &utilSecondP) < 0) { |
| return TRUE; |
| } else { |
| return FALSE; |
| } |
| } |
| |
| UColAttributeValue |
| StrengthProbe::getStrength(const Line &x, const Line &y) { |
| const Line *xp = &x; |
| const Line *yp = &y; |
| |
| Line empty; |
| Line *emptyP = ∅ |
| if(comparer(&emptyP, &xp) == 0) { |
| return distanceFromEmptyString(y); |
| } |
| |
| int32_t result = comparer(&xp, &yp); |
| |
| if(result == 0) { |
| return UCOL_IDENTICAL; |
| } else if(result > 0) { |
| return UCOL_OFF; // bad situation |
| } else { // we need to probe strength |
| if(probeSuffix(x, y, B1, B0)) { |
| //if(probePrefix(x, y, B2, B0)) { // swamps secondary difference |
| return UCOL_PRIMARY; |
| } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference |
| return UCOL_SECONDARY; |
| } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference |
| return UCOL_TERTIARY; |
| } else if(!probePrefix(x, y, B3, B0)) { |
| return UCOL_QUATERNARY; |
| } |
| /* |
| //if(probeSuffix(x, y, B1, B0)) { |
| if(probePrefix(x, y, B2, B0)) { // swamps secondary difference |
| return UCOL_PRIMARY; |
| } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference |
| return UCOL_SECONDARY; |
| } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference |
| return UCOL_TERTIARY; |
| } else if(!probePrefix(x, y, B3, B0)) { |
| return UCOL_QUATERNARY; |
| } |
| */ |
| } |
| return UCOL_OFF; // bad |
| } |
| |
| UColAttributeValue |
| StrengthProbe::getStrength(const UnicodeString &sx, const UnicodeString &sy) { |
| Line x(sx); |
| Line y(sy); |
| return getStrength(x, y); |
| } |
| |
| int32_t |
| StrengthProbe::compare(const UnicodeString &sx, const UnicodeString &sy) { |
| Line x(sx); |
| Line y(sy); |
| const Line *xp = &x; |
| const Line *yp = &y; |
| return comparer(&xp, &yp); |
| } |
| |
| int32_t |
| StrengthProbe::compare(const Line &x, const Line &y) { |
| const Line *xp = &x; |
| const Line *yp = &y; |
| return comparer(&xp, &yp); |
| } |
| |
| UColAttributeValue |
| StrengthProbe::distanceFromEmptyString(const Line &x) { |
| if(x.name[0] == 0x30D) { |
| int32_t putBreakPointHere = 0; |
| } |
| Line empty; |
| Line *emptyP = ∅ |
| uint8_t buff[256]; |
| getSortKey(empty.name, empty.len, buff, 256); |
| Line B0Line(B0); |
| Line *B0LineP = &B0Line; |
| const Line *xp = &x; |
| int32_t result = comparer(&emptyP, &xp); |
| if(result == 0) { |
| return UCOL_IDENTICAL; |
| } else if(result > 0) { |
| return UCOL_OFF; |
| } |
| result = comparer(&B0LineP, &xp); |
| if(result <= 0) { |
| return UCOL_PRIMARY; |
| } |
| Line sexb0(SE); |
| sexb0.append(x.name, x.len); |
| sexb0.append(B0); |
| |
| Line seb0(SE); |
| seb0.append(B0); |
| uint8_t seb0K[256]; |
| uint8_t sexb0K[256]; |
| uint8_t seb2K[256]; |
| uint8_t seb3K[256]; |
| memset(seb0K, 0, 256); |
| memset(sexb0K, 0, 256); |
| memset(seb2K, 0, 256); |
| memset(seb3K, 0, 256); |
| |
| getSortKey(seb0, seb0K, 256); |
| getSortKey(sexb0, sexb0K, 256); |
| |
| if(compare(seb0, sexb0) <= 0) { |
| Line seb2(SE); |
| seb2.append(B2); |
| getSortKey(seb2, seb2K, 256); |
| result = compare(seb2, sexb0); |
| if((result <= 0 && !frenchSecondary) || (result >= 0 && frenchSecondary)) { // swamps tertiary difference |
| return UCOL_SECONDARY; |
| } |
| Line seb3(SE); |
| seb3.append(B3); |
| getSortKey(seb3, seb3K, 256); |
| if(compare(seb3, sexb0) < 0) { |
| return UCOL_TERTIARY; |
| } |
| return UCOL_QUATERNARY; |
| } else { |
| // if this was UCA, we would have a primary difference. |
| // however, this might not be so, since not everybody |
| // makes well formed CEs. |
| // in cs_CZ on linux, space is tertiary ignorable, but |
| // its quaternary level strength is lower than quad |
| // strengths for non-ignorables. oh well, more testing |
| // required |
| // I think that we can only have quaternary difference |
| // here (in addition to primary difference). |
| //if(!probePrefix(x, empty, B3, B0)) { |
| //return UCOL_QUATERNARY; |
| //} else { |
| return UCOL_PRIMARY; |
| //} |
| } |
| } |
| |
| UColAttributeValue |
| StrengthProbe::distanceFromEmptyString(const UnicodeString &x) { |
| const Line xp(x); |
| return distanceFromEmptyString(xp); |
| } |
| |
| |
| UColAttributeValue |
| StrengthProbe::getPrefixedStrength(const Line &prefix, const Line &x, const Line &y) { |
| contractionUtilFirst.setToConcat(&prefix, &x); |
| contractionUtilSecond.setToConcat(&prefix, &y); |
| return getStrength(contractionUtilFirst, contractionUtilSecond); |
| } |
| |
| |
| StrengthProbe::StrengthProbe(const StrengthProbe &that) { |
| *this = that; |
| } |
| |
| StrengthProbe & |
| StrengthProbe::operator=(const StrengthProbe &that) { |
| if(this != &that) { |
| B0 = that.B0; |
| B1 = that.B1; |
| B2 = that.B2; |
| B3 = that.B3; |
| SE = that.SE; |
| frenchSecondary = that.frenchSecondary; |
| comparer = that.comparer; |
| skgetter = that.skgetter; |
| |
| utilFirstP = &utilFirst; |
| utilSecondP = &utilSecond; |
| } |
| |
| return *this; |
| } |
| |
| UBool |
| StrengthProbe::isFrenchSecondary(UErrorCode &status) { |
| utilFirst.setTo(B0); |
| utilFirst.append(SE); |
| utilFirst.append(B2); |
| utilSecond.setTo(B2); |
| utilSecond.append(SE); |
| utilSecond.append(B0); |
| |
| int32_t result = compare(utilFirst, utilSecond); |
| |
| if(result < 0) { |
| return FALSE; |
| } else if(result > 0) { |
| frenchSecondary = TRUE; |
| return TRUE; |
| } else { |
| status = U_INTERNAL_PROGRAM_ERROR; |
| return FALSE; |
| } |
| } |
| |
| UBool |
| StrengthProbe::isUpperFirst(UErrorCode &status) { |
| UChar i = 0; |
| int32_t result = 0; |
| int32_t upper = 0, lower = 0, equal = 0; |
| for(i = 0x41; i < 0x5B; i++) { |
| utilFirst.setTo(i); |
| utilSecond.setTo(i+0x20); |
| result = compare(utilFirst, utilSecond); |
| if(result < 0) { |
| upper++; |
| } else if(result > 0) { |
| lower++; |
| } else { |
| equal++; |
| } |
| } |
| |
| if(lower == 0 && equal == 0) { |
| return TRUE; |
| } |
| if(upper == 0 && equal == 0) { |
| return FALSE; |
| } |
| status = U_INTERNAL_PROGRAM_ERROR; |
| return FALSE; |
| } |
| |