blob: 4ea77e0c250dff5d650dc8654f1b55d1df9edf67 [file] [log] [blame]
/* See LICENSE file for copyright and license details. */
#include <errno.h>
#include <inttypes.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../grapheme.h"
#include "util.h"
struct bidirectional_test {
uint_least32_t *cp;
size_t cplen;
enum grapheme_bidirectional_direction mode[3];
size_t modelen;
enum grapheme_bidirectional_direction resolved;
int_least8_t *level;
int_least16_t *reorder;
size_t reorderlen;
};
static const struct {
const char *class;
const uint_least32_t cp;
} classcpmap[] = {
{ .class = "L", .cp = UINT32_C(0x0041) },
{ .class = "AL", .cp = UINT32_C(0x0608) },
{ .class = "AN", .cp = UINT32_C(0x0600) },
{ .class = "B", .cp = UINT32_C(0x000A) },
{ .class = "BN", .cp = UINT32_C(0x0000) },
{ .class = "CS", .cp = UINT32_C(0x002C) },
{ .class = "EN", .cp = UINT32_C(0x0030) },
{ .class = "ES", .cp = UINT32_C(0x002B) },
{ .class = "ET", .cp = UINT32_C(0x0023) },
{ .class = "FSI", .cp = UINT32_C(0x2068) },
{ .class = "LRE", .cp = UINT32_C(0x202A) },
{ .class = "LRI", .cp = UINT32_C(0x2066) },
{ .class = "LRO", .cp = UINT32_C(0x202D) },
{ .class = "NSM", .cp = UINT32_C(0x0300) },
{ .class = "ON", .cp = UINT32_C(0x0021) },
{ .class = "PDF", .cp = UINT32_C(0x202C) },
{ .class = "PDI", .cp = UINT32_C(0x2069) },
{ .class = "R", .cp = UINT32_C(0x05BE) },
{ .class = "RLE", .cp = UINT32_C(0x202B) },
{ .class = "RLI", .cp = UINT32_C(0x2067) },
{ .class = "RLO", .cp = UINT32_C(0x202E) },
{ .class = "S", .cp = UINT32_C(0x0009) },
{ .class = "WS", .cp = UINT32_C(0x000C) },
};
static int
classtocp(const char *str, size_t len, uint_least32_t *cp)
{
size_t i;
for (i = 0; i < LEN(classcpmap); i++) {
if (!strncmp(str, classcpmap[i].class, len)) {
*cp = classcpmap[i].cp;
return 0;
}
}
fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,
str);
return 1;
}
static int
parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen)
{
size_t count, i;
const char *tmp1 = NULL, *tmp2 = NULL;
if (strlen(str) == 0) {
*cp = NULL;
*cplen = 0;
return 0;
}
/* count the number of spaces in the string and infer list length */
for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
count++, tmp1 = tmp2 + 1) {
;
}
/* allocate resources */
if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
/* go through the string again, parsing the classes */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
&((*cp)[i]))) {
return 1;
}
if (tmp2 != NULL) {
tmp1 = tmp2 + 1;
}
}
return 0;
}
static int
strtolevel(const char *str, size_t len, int_least8_t *level)
{
size_t i;
if (len == 1 && str[0] == 'x') {
/*
* 'x' indicates those characters that are ignored.
* We indicate this with a level of -1
*/
*level = -1;
return 0;
}
if (len > 3) {
/*
* given we can only express (positive) numbers from
* 0..127, more than 3 digits means an excess
*/
goto toolarge;
}
/* check if the string is completely numerical */
for (i = 0; i < len; i++) {
if (str[i] < '0' && str[i] > '9') {
fprintf(stderr, "strtolevel: '%.*s' is not an integer.\n",
(int)len, str);
return 1;
}
}
if (len == 3) {
if (str[0] != '1' || str[1] > '2' ||
(str[1] == '2' && str[2] > '7')) {
goto toolarge;
}
*level = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
(str[2] - '0');
} else if (len == 2) {
*level = (str[0] - '0') * 10 + (str[1] - '0');
} else if (len == 1) {
*level = (str[0] - '0');
} else { /* len == 0 */
*level = 0;
}
return 0;
toolarge:
fprintf(stderr, "strtolevel: '%.*s' is too large.\n", (int)len, str);
return 1;
}
static int
strtoreorder(const char *str, size_t len, int_least16_t *reorder)
{
size_t i;
if (len == 1 && str[0] == 'x') {
/*
* 'x' indicates those characters that are ignored.
* We indicate this with a reorder of -1
*/
*reorder = -1;
return 0;
}
if (len > 3) {
/*
* given we want to only express (positive) numbers from
* 0..999 (at most!), more than 3 digits means an excess
*/
goto toolarge;
}
/* check if the string is completely numerical */
for (i = 0; i < len; i++) {
if (str[i] < '0' && str[i] > '9') {
fprintf(stderr, "strtoreorder: '%.*s' is not an integer.\n",
(int)len, str);
return 1;
}
}
if (len == 3) {
*reorder = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
(str[2] - '0');
} else if (len == 2) {
*reorder = (str[0] - '0') * 10 + (str[1] - '0');
} else if (len == 1) {
*reorder = (str[0] - '0');
} else { /* len == 0 */
*reorder = 0;
}
return 0;
toolarge:
fprintf(stderr, "strtoreorder: '%.*s' is too large.\n", (int)len, str);
return 1;
}
static int
parse_level_list(const char *str, int_least8_t **level, size_t *levellen)
{
size_t count, i;
const char *tmp1 = NULL, *tmp2 = NULL;
if (strlen(str) == 0) {
*level = NULL;
*levellen = 0;
return 0;
}
/* count the number of spaces in the string and infer list length */
for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
count++, tmp1 = tmp2 + 1) {
;
}
/* allocate resources */
if (!(*level = calloc((*levellen = count), sizeof(**level)))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
/* go through the string again, parsing the levels */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
if (strtolevel(tmp1,
tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
&((*level)[i]))) {
return 1;
}
if (tmp2 != NULL) {
tmp1 = tmp2 + 1;
}
}
return 0;
}
static int
parse_reorder_list(const char *str, int_least16_t **reorder, size_t *reorderlen)
{
size_t count, i;
const char *tmp1 = NULL, *tmp2 = NULL;
if (strlen(str) == 0) {
*reorder = NULL;
*reorderlen = 0;
return 0;
}
/* count the number of spaces in the string and infer list length */
for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
count++, tmp1 = tmp2 + 1) {
;
}
/* allocate resources */
if (!(*reorder = calloc((*reorderlen = count), sizeof(**reorder)))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
/* go through the string again, parsing the reorders */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
if (strtoreorder(tmp1,
tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
&((*reorder)[i]))) {
return 1;
}
if (tmp2 != NULL) {
tmp1 = tmp2 + 1;
}
}
return 0;
}
static void
bidirectional_test_list_print(const struct bidirectional_test *test,
size_t testlen, const char *identifier,
const char *progname)
{
size_t i, j;
printf("/* Automatically generated by %s */\n"
"#include <stdint.h>\n#include <stddef.h>\n\n"
"#include \"../grapheme.h\"\n\n",
progname);
printf("static const struct {\n"
"\tuint_least32_t *cp;\n"
"\tsize_t cplen;\n"
"\tenum grapheme_bidirectional_direction *mode;\n"
"\tsize_t modelen;\n"
"\tenum grapheme_bidirectional_direction resolved;\n"
"\tint_least8_t *level;\n"
"\tint_least16_t *reorder;\n"
"\tsize_t reorderlen;\n} %s[] = {\n",
identifier);
for (i = 0; i < testlen; i++) {
printf("\t{\n");
printf("\t\t.cp = (uint_least32_t[]){");
for (j = 0; j < test[i].cplen; j++) {
printf(" UINT32_C(0x%06X)", test[i].cp[j]);
if (j + 1 < test[i].cplen) {
putchar(',');
}
}
printf(" },\n");
printf("\t\t.cplen = %zu,\n", test[i].cplen);
printf("\t\t.mode = (enum "
"grapheme_bidirectional_direction[]){");
for (j = 0; j < test[i].modelen; j++) {
if (test[i].mode[j] ==
GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) {
printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_"
"NEUTRAL");
} else if (test[i].mode[j] ==
GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) {
printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR");
} else if (test[i].mode[j] ==
GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) {
printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL");
}
if (j + 1 < test[i].modelen) {
putchar(',');
}
}
printf(" },\n");
printf("\t\t.modelen = %zu,\n", test[i].modelen);
printf("\t\t.resolved = ");
if (test[i].resolved ==
GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) {
printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_"
"NEUTRAL");
} else if (test[i].resolved ==
GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) {
printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR");
} else if (test[i].resolved ==
GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) {
printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL");
}
printf(",\n");
printf("\t\t.level = (int_least8_t[]){");
for (j = 0; j < test[i].cplen; j++) {
printf(" %" PRIdLEAST8, test[i].level[j]);
if (j + 1 < test[i].cplen) {
putchar(',');
}
}
printf(" },\n");
printf("\t\t.reorder = ");
if (test[i].reorderlen > 0) {
printf("(int_least16_t[]){");
for (j = 0; j < test[i].reorderlen; j++) {
printf(" %" PRIdLEAST16, test[i].reorder[j]);
if (j + 1 < test[i].reorderlen) {
putchar(',');
}
}
printf(" },\n");
} else {
printf("NULL,\n");
}
printf("\t\t.reorderlen = %zu,\n", test[i].reorderlen);
printf("\t},\n");
}
printf("};\n");
}
static struct bidirectional_test *test;
static size_t testlen;
static int_least8_t *current_level;
static size_t current_level_len;
static int_least16_t *current_reorder;
static size_t current_reorder_len;
static int
test_callback(const char *file, char **field, size_t nfields, char *comment,
void *payload)
{
char *tmp;
(void)file;
(void)comment;
(void)payload;
/* we either get a line beginning with an '@', or an input line */
if (nfields > 0 && field[0][0] == '@') {
if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) {
tmp = field[0] + sizeof("@Levels:") - 1;
for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
tmp++) {
;
}
free(current_level);
parse_level_list(tmp, &current_level,
&current_level_len);
} else if (!strncmp(field[0],
"@Reorder:", sizeof("@Reorder:") - 1)) {
tmp = field[0] + sizeof("@Reorder:") - 1;
for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
tmp++) {
;
}
free(current_reorder);
parse_reorder_list(tmp, &current_reorder,
&current_reorder_len);
} else {
fprintf(stderr, "Unknown @-input-line.\n");
exit(1);
}
} else {
if (nfields < 2) {
/* discard any line that does not have at least 2 fields
*/
return 0;
}
/* extend test array */
if (!(test = realloc(test, (++testlen) * sizeof(*test)))) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
/* parse field data */
parse_class_list(field[0], &(test[testlen - 1].cp),
&(test[testlen - 1].cplen));
/* copy current level- and reorder-arrays */
if (!(test[testlen - 1].level =
calloc(current_level_len,
sizeof(*(test[testlen - 1].level))))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
memcpy(test[testlen - 1].level, current_level,
current_level_len * sizeof(*(test[testlen - 1].level)));
if (!(test[testlen - 1].reorder =
calloc(current_reorder_len,
sizeof(*(test[testlen - 1].reorder))))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
if (current_reorder != NULL) {
memcpy(test[testlen - 1].reorder, current_reorder,
current_reorder_len *
sizeof(*(test[testlen - 1].reorder)));
}
test[testlen - 1].reorderlen = current_reorder_len;
if (current_level_len != test[testlen - 1].cplen) {
fprintf(stderr,
"mismatch between string and level lengths.\n");
exit(1);
}
/* parse paragraph-level-bitset */
if (strlen(field[1]) != 1) {
fprintf(stderr, "malformed paragraph-level-bitset.\n");
exit(1);
} else if (field[1][0] == '2') {
test[testlen - 1].mode[0] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
test[testlen - 1].modelen = 1;
} else if (field[1][0] == '3') {
/* auto=0 and LTR=1 */
test[testlen - 1].mode[0] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
test[testlen - 1].mode[1] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
test[testlen - 1].modelen = 2;
} else if (field[1][0] == '4') {
test[testlen - 1].mode[0] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
test[testlen - 1].modelen = 1;
} else if (field[1][0] == '5') {
test[testlen - 1].mode[0] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
test[testlen - 1].mode[1] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
test[testlen - 1].modelen = 2;
} else if (field[1][0] == '7') {
test[testlen - 1].mode[0] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
test[testlen - 1].mode[1] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
test[testlen - 1].mode[2] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
test[testlen - 1].modelen = 3;
} else {
fprintf(stderr,
"unhandled paragraph-level-bitset %s.\n",
field[1]);
exit(1);
}
/* the resolved paragraph level is always neutral as the test
* file does not specify it */
test[testlen - 1].resolved =
GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
}
return 0;
}
static int
character_test_callback(const char *file, char **field, size_t nfields,
char *comment, void *payload)
{
size_t tmp;
(void)file;
(void)comment;
(void)payload;
if (nfields < 5) {
/* discard any line that does not have at least 5 fields */
return 0;
}
/* extend test array */
if (!(test = realloc(test, (++testlen) * sizeof(*test)))) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
/* parse field data */
parse_cp_list(field[0], &(test[testlen - 1].cp),
&(test[testlen - 1].cplen));
parse_level_list(field[3], &(test[testlen - 1].level), &tmp);
parse_reorder_list(field[4], &(test[testlen - 1].reorder),
&(test[testlen - 1].reorderlen));
/* parse paragraph-level-mode */
if (strlen(field[1]) != 1) {
fprintf(stderr, "malformed paragraph-level-setting.\n");
exit(1);
} else if (field[1][0] == '0') {
test[testlen - 1].mode[0] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
} else if (field[1][0] == '1') {
test[testlen - 1].mode[0] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
} else if (field[1][0] == '2') {
test[testlen - 1].mode[0] =
GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
} else {
fprintf(stderr, "unhandled paragraph-level-setting.\n");
exit(1);
}
test[testlen - 1].modelen = 1;
/* parse resolved paragraph level */
if (strlen(field[2]) != 1) {
fprintf(stderr, "malformed resolved paragraph level.\n");
exit(1);
} else if (field[2][0] == '0') {
test[testlen - 1].resolved =
GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
} else if (field[2][0] == '1') {
test[testlen - 1].resolved =
GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
} else {
fprintf(stderr, "unhandled resolved paragraph level.\n");
exit(1);
}
if (tmp != test[testlen - 1].cplen) {
fprintf(stderr, "mismatch between string and level lengths.\n");
exit(1);
}
return 0;
}
int
main(int argc, char *argv[])
{
(void)argc;
parse_file_with_callback("data/BidiTest.txt", test_callback, NULL);
parse_file_with_callback("data/BidiCharacterTest.txt",
character_test_callback, NULL);
bidirectional_test_list_print(test, testlen, "bidirectional_test",
argv[0]);
return 0;
}