blob: fcac4c11ddc37aec6bdb6488ba0ea9cfe3486fee [file] [log] [blame]
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// layoutpropsbuilder.cpp
// created: 2018aug30 Markus W. Scherer
#include <stdio.h>
#include <string.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/ucptrie.h"
#include "unicode/udata.h"
#include "unicode/umutablecptrie.h"
#include "unicode/uniset.h"
#include "cmemory.h"
#include "genprops.h"
#include "ppucd.h"
#include "uassert.h"
#include "writesrc.h"
U_NAMESPACE_USE
class LayoutPropsBuilder : public PropsBuilder {
public:
LayoutPropsBuilder(UErrorCode &errorCode);
virtual ~LayoutPropsBuilder() U_OVERRIDE;
virtual void setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) U_OVERRIDE;
virtual void build(UErrorCode &errorCode) U_OVERRIDE;
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode) U_OVERRIDE;
virtual void writeJavaSourceFile(const char *path, UErrorCode &errorCode) U_OVERRIDE;
private:
void setIntProp(const UniProps &, const UnicodeSet &newValues,
UProperty prop, UMutableCPTrie *trie,
UErrorCode &errorCode);
int32_t getMaxIntValue(UProperty prop) const {
return maxIntValues[prop - UCHAR_INT_START];
}
void checkMaxIntValue(UProperty prop, int32_t maxMax, UErrorCode &errorCode) const;
void writeMaxIntValue(FILE *f, const char *name, UProperty prop) const {
fprintf(f, "static const int32_t max%sValue = %ld;\n\n", name, (long)getMaxIntValue(prop));
}
int32_t maxIntValues[UCHAR_INT_LIMIT - UCHAR_INT_START];
UMutableCPTrie *inpcMutableTrie;
UMutableCPTrie *inscMutableTrie;
UMutableCPTrie *voMutableTrie;
UCPTrie *inpcTrie;
UCPTrie *inscTrie;
UCPTrie *voTrie;
};
LayoutPropsBuilder::LayoutPropsBuilder(UErrorCode &errorCode) :
inpcTrie(nullptr), inscTrie(nullptr), voTrie(nullptr) {
memset(maxIntValues, 0, sizeof(maxIntValues));
inpcMutableTrie = umutablecptrie_open(0, 0, &errorCode);
inscMutableTrie = umutablecptrie_open(0, 0, &errorCode);
voMutableTrie = umutablecptrie_open(0, 0, &errorCode);
if (U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: layoutpropsbuilder umutablecptrie_open() failed - %s\n",
u_errorName(errorCode));
}
}
LayoutPropsBuilder::~LayoutPropsBuilder() {
umutablecptrie_close(inpcMutableTrie);
umutablecptrie_close(inscMutableTrie);
umutablecptrie_close(voMutableTrie);
ucptrie_close(inpcTrie);
ucptrie_close(inscTrie);
ucptrie_close(voTrie);
}
void
LayoutPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
UErrorCode &errorCode) {
setIntProp(props, newValues, UCHAR_INDIC_POSITIONAL_CATEGORY, inpcMutableTrie, errorCode);
setIntProp(props, newValues, UCHAR_INDIC_SYLLABIC_CATEGORY, inscMutableTrie, errorCode);
setIntProp(props, newValues, UCHAR_VERTICAL_ORIENTATION, voMutableTrie, errorCode);
}
void LayoutPropsBuilder::setIntProp(const UniProps &props, const UnicodeSet &newValues,
UProperty prop, UMutableCPTrie *trie,
UErrorCode &errorCode) {
if (U_SUCCESS(errorCode) && newValues.contains(prop)) {
UChar32 start=props.start;
UChar32 end=props.end;
int32_t value = props.getIntProp(prop);
if (value < 0) {
fprintf(stderr, "error: unencodable negative value for property 0x%x %04lX..%04lX=%ld\n",
(int)prop, (long)start, (long)end, (long)value);
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (value > maxIntValues[prop - UCHAR_INT_START]) {
maxIntValues[prop - UCHAR_INT_START] = value;
}
if (start == end) {
umutablecptrie_set(trie, start, value, &errorCode);
} else {
umutablecptrie_setRange(trie, start, end, value, &errorCode);
}
if (U_FAILURE(errorCode)) {
fprintf(stderr, "error: umutablecptrie_set(prop 0x%x trie %04lX..%04lX) failed - %s\n",
(int)prop, (long)start, (long)end, u_errorName(errorCode));
}
}
}
namespace {
UCPTrie *buildUCPTrie(const char *name, UMutableCPTrie *mutableTrie,
UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
UCPTrie *trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: %s trie buildImmutable() failed: %s\n",
name, u_errorName(errorCode));
return trie;
}
if (!beQuiet) {
UErrorCode overflow = U_ZERO_ERROR;
int32_t length = ucptrie_toBinary(trie, nullptr, 0, &overflow);
printf("%11s trie size in bytes: %5u\n", name, (int)length);
}
return trie;
}
} // namespace
void
LayoutPropsBuilder::build(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
if (!beQuiet) {
puts("* text layout properties stats *");
}
checkMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY, 0xff, errorCode);
checkMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY, 0xff, errorCode);
checkMaxIntValue(UCHAR_VERTICAL_ORIENTATION, 0xff, errorCode);
inpcTrie = buildUCPTrie("inpc", inpcMutableTrie,
UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
inscTrie = buildUCPTrie("insc", inscMutableTrie,
UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
voTrie = buildUCPTrie("vo", voMutableTrie,
UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
}
void LayoutPropsBuilder::checkMaxIntValue(UProperty prop, int32_t maxMax,
UErrorCode &errorCode) const {
int32_t max = getMaxIntValue(prop);
if (max > maxMax) {
fprintf(stderr, "genprops error: 0x%x max value = %d overflow\n", (int)prop, (int)max);
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
}
void
LayoutPropsBuilder::writeCSourceFile(const char *path, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
FILE *f = usrc_create(path, "ulayout_props_data.h", 2018,
"icu/tools/unicode/c/genprops/layoutpropsbuilder.cpp");
if (f == nullptr) {
errorCode = U_FILE_ACCESS_ERROR;
return;
}
fputs("#ifdef INCLUDED_FROM_UPROPS_CPP\n\n", f);
writeMaxIntValue(f, "InPC", UCHAR_INDIC_POSITIONAL_CATEGORY);
usrc_writeUCPTrie(f, "inpc", inpcTrie);
writeMaxIntValue(f, "InSC", UCHAR_INDIC_SYLLABIC_CATEGORY);
usrc_writeUCPTrie(f, "insc", inscTrie);
writeMaxIntValue(f, "Vo", UCHAR_VERTICAL_ORIENTATION);
usrc_writeUCPTrie(f, "vo", voTrie);
fputs("#endif // INCLUDED_FROM_UPROPS_CPP\n", f);
fclose(f);
}
namespace {
// Write one byte as one char in a Java String literal.
// Java class file string literals use one byte per U+0001..U+007F,
// and two bytes per U+0000 or U+0080..U+07FF.
// This is reasonably compact if small byte values are more common, as usual.
// Since 0 is very common but takes two bytes, we swap it with U+007A 'z'.
int32_t appendByte(char *s, int32_t length, uint8_t b) {
if (b == 0) {
s[length++] = 'z';
} else if (b == 0x7a) {
s[length++] = '\\';
s[length++] = '0';
} else {
// Write all other bytes as octal escapes. (Java does not support \xhh.)
// We could make the source file smaller by writing ASCII characters
// directly where possible, but that would not make the class file any smaller,
// and we would have to be careful to still escape certain characters,
// and to escape digits after short octal escapes.
s[length++] = '\\';
if (b >= 0100) {
s[length++] = '0' + (b >> 6);
}
if (b >= 010) {
s[length++] = '0' + ((b >> 3) & 7);
}
s[length++] = '0' + (b & 7);
}
return length;
}
void writeBytesAsJavaString(FILE *f, const uint8_t *bytes, int32_t length) {
// Quotes, line feed, etc., with up to 16 bytes per line, up to 4 bytes "\377" each.
char line[80];
int32_t lineLength = 0;
for (int32_t i = 0;;) {
if ((i & 0xf) == 0) { // start of a line of 16 bytes
line[0] = '"';
lineLength = 1;
}
if (i < length) {
lineLength = appendByte(line, lineLength, bytes[i++]);
}
if (i == length) { // end of the string
line[lineLength++] = '"';
line[lineLength++] = '\n';
line[lineLength++] = '\n';
line[lineLength++] = 0;
fputs(line, f);
break;
}
if ((i & 0xf) == 0) { // end of a line of 16 bytes
line[lineLength++] = '"';
line[lineLength++] = ' ';
line[lineLength++] = '+';
line[lineLength++] = '\n';
line[lineLength++] = 0;
fputs(line, f);
}
}
}
static uint8_t trieBlock[100000];
void writeUCPTrieAsJavaString(FILE *f, const UCPTrie *trie, UErrorCode &errorCode) {
int32_t length = ucptrie_toBinary(trie, trieBlock, UPRV_LENGTHOF(trieBlock), &errorCode);
writeBytesAsJavaString(f, trieBlock, length);
}
} // namespace
// So far, this writes initializers to be copied into Java code, but not a complete Java file.
// We should probably write a regular, binary ICU data file and read that into Java.
void
LayoutPropsBuilder::writeJavaSourceFile(const char * /*path*/, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
FILE *f = usrc_create("/tmp", "ulayout_props_data.txt", 2018,
"icu/tools/unicode/c/genprops/layoutpropsbuilder.cpp");
if (f == nullptr) {
errorCode = U_FILE_ACCESS_ERROR;
return;
}
writeMaxIntValue(f, "InPC", UCHAR_INDIC_POSITIONAL_CATEGORY);
writeUCPTrieAsJavaString(f, inpcTrie, errorCode);
writeMaxIntValue(f, "InSC", UCHAR_INDIC_SYLLABIC_CATEGORY);
writeUCPTrieAsJavaString(f, inscTrie, errorCode);
writeMaxIntValue(f, "Vo", UCHAR_VERTICAL_ORIENTATION);
writeUCPTrieAsJavaString(f, voTrie, errorCode);
fclose(f);
puts(" ++ Java initializers written to /tmp/ulayout_props_data.txt");
}
PropsBuilder *
createLayoutPropsBuilder(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return nullptr; }
PropsBuilder *pb=new LayoutPropsBuilder(errorCode);
if(pb==nullptr) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
return pb;
}