blob: 88a05d103be0be09f2d45096f5b30bccb15e4d6e [file]
/* See LICENSE file for copyright and license details. */
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "util.h"
#define FILE_BIDI_BRACKETS "data/BidiBrackets.txt"
#define FILE_BIDI_CLASS "data/DerivedBidiClass.txt"
#define FILE_BIDI_MIRRORING "data/BidiMirroring.txt"
static const struct property_spec bidi_property[] = {
{
/* default */
.enumname = "L",
.file = FILE_BIDI_CLASS,
.ucdname = "L",
},
{
.enumname = "AL",
.file = FILE_BIDI_CLASS,
.ucdname = "AL",
},
{
.enumname = "AN",
.file = FILE_BIDI_CLASS,
.ucdname = "AN",
},
{
.enumname = "B",
.file = FILE_BIDI_CLASS,
.ucdname = "B",
},
{
.enumname = "BN",
.file = FILE_BIDI_CLASS,
.ucdname = "BN",
},
{
.enumname = "CS",
.file = FILE_BIDI_CLASS,
.ucdname = "CS",
},
{
.enumname = "EN",
.file = FILE_BIDI_CLASS,
.ucdname = "EN",
},
{
.enumname = "ES",
.file = FILE_BIDI_CLASS,
.ucdname = "ES",
},
{
.enumname = "ET",
.file = FILE_BIDI_CLASS,
.ucdname = "ET",
},
{
.enumname = "FSI",
.file = FILE_BIDI_CLASS,
.ucdname = "FSI",
},
{
.enumname = "LRE",
.file = FILE_BIDI_CLASS,
.ucdname = "LRE",
},
{
.enumname = "LRI",
.file = FILE_BIDI_CLASS,
.ucdname = "LRI",
},
{
.enumname = "LRO",
.file = FILE_BIDI_CLASS,
.ucdname = "LRO",
},
{
.enumname = "NSM",
.file = FILE_BIDI_CLASS,
.ucdname = "NSM",
},
{
.enumname = "ON",
.file = FILE_BIDI_CLASS,
.ucdname = "ON",
},
{
.enumname = "PDF",
.file = FILE_BIDI_CLASS,
.ucdname = "PDF",
},
{
.enumname = "PDI",
.file = FILE_BIDI_CLASS,
.ucdname = "PDI",
},
{
.enumname = "R",
.file = FILE_BIDI_CLASS,
.ucdname = "R",
},
{
.enumname = "RLE",
.file = FILE_BIDI_CLASS,
.ucdname = "RLE",
},
{
.enumname = "RLI",
.file = FILE_BIDI_CLASS,
.ucdname = "RLI",
},
{
.enumname = "RLO",
.file = FILE_BIDI_CLASS,
.ucdname = "RLO",
},
{
.enumname = "S",
.file = FILE_BIDI_CLASS,
.ucdname = "S",
},
{
.enumname = "WS",
.file = FILE_BIDI_CLASS,
.ucdname = "WS",
},
};
static struct {
uint_least32_t cp_base;
uint_least32_t cp_pair;
char type;
} *b = NULL;
static size_t blen;
static int
bracket_callback(const char *file, char **field, size_t nfields, char *comment,
void *payload)
{
(void)file;
(void)comment;
(void)payload;
if (nfields < 3) {
/* we have less than 3 fields, discard the line */
return 0;
}
/* extend bracket pair array */
if (!(b = realloc(b, (++blen) * sizeof(*b)))) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
/* parse field data */
hextocp(field[0], strlen(field[0]), &(b[blen - 1].cp_base));
hextocp(field[1], strlen(field[1]), &(b[blen - 1].cp_pair));
if (strlen(field[2]) != 1 ||
(field[2][0] != 'o' && field[2][0] != 'c')) {
/* malformed line */
return 1;
} else {
b[blen - 1].type = field[2][0];
}
return 0;
}
static void
post_process(struct properties *prop)
{
size_t i;
for (i = 0; i < blen; i++) {
/*
* given the base property fits in 5 bits, we simply
* store the bracket-offset in the bits above that.
*
* All those properties that are not set here implicitly
* have offset 0, which we prepared to contain a stub
* for a character that is not a bracket.
*/
prop[b[i].cp_base].property |= (i << 5);
}
}
static uint_least8_t
fill_missing(uint_least32_t cp)
{
/* based on the @missing-properties in data/DerivedBidiClass.txt */
if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) ||
(cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) ||
(cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) ||
(cp >= UINT32_C(0x10800) && cp <= UINT32_C(0x10CFF)) ||
(cp >= UINT32_C(0x10D40) && cp <= UINT32_C(0x10EBF)) ||
(cp >= UINT32_C(0x10F00) && cp <= UINT32_C(0x10F2F)) ||
(cp >= UINT32_C(0x10F70) && cp <= UINT32_C(0x10FFF)) ||
(cp >= UINT32_C(0x1E800) && cp <= UINT32_C(0x1EC6F)) ||
(cp >= UINT32_C(0x1ECC0) && cp <= UINT32_C(0x1ECFF)) ||
(cp >= UINT32_C(0x1ED50) && cp <= UINT32_C(0x1EDFF)) ||
(cp >= UINT32_C(0x1EF00) && cp <= UINT32_C(0x1EFFF))) {
return 17; /* class R */
} else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) ||
(cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) ||
(cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) ||
(cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) ||
(cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) ||
(cp >= UINT32_C(0x10D00) && cp <= UINT32_C(0x10D3F)) ||
(cp >= UINT32_C(0x10EC0) && cp <= UINT32_C(0x10EFF)) ||
(cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) ||
(cp >= UINT32_C(0x1EC70) && cp <= UINT32_C(0x1ECBF)) ||
(cp >= UINT32_C(0x1ED00) && cp <= UINT32_C(0x1ED4F)) ||
(cp >= UINT32_C(0x1EE00) && cp <= UINT32_C(0x1EEFF))) {
return 1; /* class AL */
} else if (cp >= UINT32_C(0x20A0) && cp <= UINT32_C(0x20CF)) {
return 8; /* class ET */
} else {
return 0; /* class L */
}
}
int
main(int argc, char *argv[])
{
size_t i;
(void)argc;
/*
* the first element in the bracket array is initialized to
* all-zeros, as we use the implicit 0-offset for all those
* codepoints that are not a bracket
*/
if (!(b = calloc(1, sizeof(*b)))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback, NULL);
properties_generate_break_property(bidi_property, LEN(bidi_property),
fill_missing, NULL, post_process,
"bidi", argv[0]);
printf("\nenum bracket_type {\n\tBIDI_BRACKET_NONE,\n\t"
"BIDI_BRACKET_OPEN,\n\tBIDI_BRACKET_CLOSE,\n};\n\n"
"struct bracket {\n\tenum bracket_type type;\n"
"\tuint_least32_t pair;\n};\n\n"
"static const struct bracket bidi_bracket[] = {\n");
for (i = 0; i < blen; i++) {
printf("\t{\n\t\t.type = %s,\n\t\t.pair = "
"UINT32_C(0x%06X),\n\t},\n",
(b[i].type == 'o') ? "BIDI_BRACKET_OPEN" :
(b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" :
"BIDI_BRACKET_NONE",
b[i].cp_pair);
}
printf("};\n");
return 0;
}