ICU-7264 support supplementary code points in FractionalUCA.txt
X-SVN-Rev: 28773
diff --git a/tools/unicode/c/genuca/genuca.cpp b/tools/unicode/c/genuca/genuca.cpp
index c4d3f43..1bde05f 100644
--- a/tools/unicode/c/genuca/genuca.cpp
+++ b/tools/unicode/c/genuca/genuca.cpp
@@ -30,6 +30,7 @@
#include "ucol_imp.h"
#include "genuca.h"
#include "uoptions.h"
+#include "uparse.h"
#include "toolutil.h"
#include "unewdata.h"
#include "cstring.h"
@@ -37,6 +38,8 @@
#include <stdio.h>
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
/*
* Global - verbosity
*/
@@ -431,15 +434,12 @@
UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status) {
char buffer[2048], primary[100], secondary[100], tertiary[100];
- UBool detectedContraction;
int32_t i = 0;
unsigned int theValue;
char *pointer = NULL;
char *commentStart = NULL;
char *startCodePoint = NULL;
char *endCodePoint = NULL;
- char *spacePointer = NULL;
- char *dashPointer = NULL;
char *result = fgets(buffer, 2048, data);
int32_t buflen = (int32_t)uprv_strlen(buffer);
if(U_FAILURE(*status)) {
@@ -463,7 +463,8 @@
return NULL; // just a comment, skip whole line
}
- UCAElements *element = ≤ //(UCAElements *)malloc(sizeof(UCAElements));
+ UCAElements *element = ≤
+ memset(element, 0, sizeof(*element));
enum ActionType {
READCE,
@@ -512,7 +513,7 @@
{
fprintf(stderr, " scanf(hex) failed on !\n ");
}
- *(vt[cnt].what) = (UChar)theValue;
+ *(vt[cnt].what) = theValue;
//if(cnt == 1) { // first implicit
// we need to set the value for top next
//uint32_t nextTop = ucol_prv_calculateImplicitPrimary(0x4E00); // CJK base
@@ -567,8 +568,6 @@
fprintf(stdout, "UCA version [%hu.%hu.%hu.%hu]\n", UCAVersion[0], UCAVersion[1], UCAVersion[2], UCAVersion[3]);
}
}
- //element->cPoints[0] = (UChar)theValue;
- //return element;
return NULL;
}
}
@@ -576,7 +575,7 @@
//*status = U_INVALID_FORMAT_ERROR;
return NULL;
}
- element->variableTop = FALSE;
+ // element->variableTop = FALSE; -- see memset() above
startCodePoint = buffer;
endCodePoint = strchr(startCodePoint, ';');
@@ -589,47 +588,37 @@
*(endCodePoint) = 0;
}
- memset(element, 0, sizeof(*element));
+ char *pipePointer = strchr(buffer, '|');
+ if (pipePointer != NULL) {
+ // Read the prefix string which precedes the actual string.
+ *pipePointer = 0;
+ element->prefixSize =
+ u_parseString(startCodePoint,
+ element->prefixChars, LENGTHOF(element->prefixChars),
+ NULL, status);
+ if(U_FAILURE(*status)) {
+ fprintf(stderr, "error - parsing of prefix \"%s\" failed: %s\n",
+ startCodePoint, u_errorName(*status));
+ *status = U_INVALID_FORMAT_ERROR;
+ return NULL;
+ }
+ element->prefix = element->prefixChars;
+ startCodePoint = pipePointer + 1;
+ }
+ // Read the string which gets the CE(s) assigned.
+ element->cSize =
+ u_parseString(startCodePoint,
+ element->uchars, LENGTHOF(element->uchars),
+ NULL, status);
+ if(U_FAILURE(*status)) {
+ fprintf(stderr, "error - parsing of code point(s) \"%s\" failed: %s\n",
+ startCodePoint, u_errorName(*status));
+ *status = U_INVALID_FORMAT_ERROR;
+ return NULL;
+ }
element->cPoints = element->uchars;
- spacePointer = strchr(buffer, ' ');
- if(sscanf(buffer, "%4x", &theValue) != 1) /* read first code point */
- {
- fprintf(stderr, " scanf(hex) failed!\n ");
- }
- element->cPoints[0] = (UChar)theValue;
-
- if(spacePointer == 0) {
- detectedContraction = FALSE;
- element->cSize = 1;
- } else {
- dashPointer = strchr(buffer, '|');
- if (dashPointer != NULL) {
- // prefix characters
- element->prefixChars[0] = (UChar)theValue;
- element->prefixSize = 1;
- element->prefix = element->prefixChars;
- sscanf(dashPointer+1, "%4x", &theValue);
- element->cPoints[0] = (UChar)theValue;
- element->cSize = 1;
- }
- else {
- // Contractions or surrogate characters.
- i = 1;
- detectedContraction = TRUE;
- while(spacePointer != NULL) {
- sscanf(spacePointer+1, "%4x", &theValue);
- element->cPoints[i++] = (UChar)theValue;
- spacePointer = strchr(spacePointer+1, ' ');
- }
- element->cSize = i;
- }
-
-
- //fprintf(stderr, "Number of codepoints in contraction: %i\n", i);
- }
-
startCodePoint = endCodePoint+1;
commentStart = strchr(startCodePoint, '#');
@@ -1057,9 +1046,14 @@
/* first set up constants for implicit calculation */
uprv_uca_initImplicitConstants(status);
/* do the closure */
- int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, status);
+ UnicodeSet closed;
+ int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, &closed, status);
if(noOfClosures != 0) {
- fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures);
+ fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures);
+ UnicodeString pattern;
+ std::string utf8;
+ closed.toPattern(pattern, TRUE).toUTF8String(utf8);
+ fprintf(stderr, "UTF-8 pattern string: %s\n", utf8.c_str());
}
/* test */