| // Copyright 2020 The Wuffs Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // ---------------- |
| |
| /* |
| This test program is typically run indirectly, by the "wuffs test" or "wuffs |
| bench" commands. These commands take an optional "-mimic" flag to check that |
| Wuffs' output mimics (i.e. exactly matches) other libraries' output, such as |
| giflib for GIF, libpng for PNG, etc. |
| |
| To manually run this test: |
| |
| for CC in clang gcc; do |
| $CC -std=c99 -Wall -Werror json.c && ./a.out |
| rm -f a.out |
| done |
| |
| Each edition should print "PASS", amongst other information, and exit(0). |
| |
| Add the "wuffs mimic cflags" (everything after the colon below) to the C |
| compiler flags (after the .c file) to run the mimic tests. |
| |
| To manually run the benchmarks, replace "-Wall -Werror" with "-O3" and replace |
| the first "./a.out" with "./a.out -bench". Combine these changes with the |
| "wuffs mimic cflags" to run the mimic benchmarks. |
| */ |
| |
| // !! wuffs mimic cflags: -DWUFFS_MIMIC |
| |
| // Wuffs ships as a "single file C library" or "header file library" as per |
| // https://github.com/nothings/stb/blob/master/docs/stb_howto.txt |
| // |
| // To use that single file as a "foo.c"-like implementation, instead of a |
| // "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or |
| // compiling it. |
| #define WUFFS_IMPLEMENTATION |
| |
| // Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of |
| // release/c/etc.c whitelist which parts of Wuffs to build. That file contains |
| // the entire Wuffs standard library, implementing a variety of codecs and file |
| // formats. Without this macro definition, an optimizing compiler or linker may |
| // very well discard Wuffs code for unused codecs, but listing the Wuffs |
| // modules we use makes that process explicit. Preprocessing means that such |
| // code simply isn't compiled. |
| #define WUFFS_CONFIG__MODULES |
| #define WUFFS_CONFIG__MODULE__BASE |
| #define WUFFS_CONFIG__MODULE__JSON |
| |
| // If building this program in an environment that doesn't easily accommodate |
| // relative includes, you can use the script/inline-c-relative-includes.go |
| // program to generate a stand-alone C file. |
| #include "../../../release/c/wuffs-unsupported-snapshot.c" |
| #include "../testlib/testlib.c" |
| #ifdef WUFFS_MIMIC |
| // No mimic library. |
| #endif |
| |
| // ---------------- String Conversions Tests |
| |
| // wuffs_base__private_implementation__high_prec_dec__to_debug_string converts |
| // hpd into a human-readable NUL-terminated C string. |
| const char* // |
| wuffs_base__private_implementation__high_prec_dec__to_debug_string( |
| wuffs_base__private_implementation__high_prec_dec* hpd, |
| wuffs_base__slice_u8 dst) { |
| if (!hpd) { |
| return "high_prec_dec__to_debug_string: invalid hpd"; |
| } |
| uint8_t* p = dst.ptr; |
| uint8_t* q = dst.ptr + dst.len; |
| |
| // Sign bit. |
| if ((q - p) < 1) { |
| goto too_short; |
| } |
| *p++ = hpd->negative ? '-' : '+'; |
| |
| // Digits and decimal point. |
| if (hpd->decimal_point > |
| +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) { |
| // We have "infinity". |
| if ((q - p) < 3) { |
| goto too_short; |
| } |
| *p++ = 'i'; |
| *p++ = 'n'; |
| *p++ = 'f'; |
| goto nul_terminator; |
| |
| } else if (hpd->decimal_point < |
| -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) { |
| // We have "epsilon": a very small number, equivalent to zero. |
| if ((q - p) < 3) { |
| goto too_short; |
| } |
| *p++ = 'e'; |
| *p++ = 'p'; |
| *p++ = 's'; |
| goto nul_terminator; |
| |
| } else if (hpd->num_digits == 0) { |
| // We have "0". |
| if ((q - p) < 1) { |
| goto too_short; |
| } |
| *p++ = '0'; |
| goto nul_terminator; |
| |
| } else if (hpd->decimal_point < 0) { |
| // Referring to the wuffs_base__private_implementation__high_prec_dec |
| // typedef's comment, we have something like ".00789". |
| if ((q - p) < (hpd->num_digits + ((uint32_t)(-hpd->decimal_point)) + 1)) { |
| goto too_short; |
| } |
| uint8_t* src = &hpd->digits[0]; |
| |
| // Step A.1: write the ".". |
| *p++ = '.'; |
| |
| // Step A.2: write the "00". |
| uint32_t n = ((uint32_t)(-hpd->decimal_point)); |
| if (n > 0) { |
| memset(p, '0', n); |
| p += n; |
| } |
| |
| // Step A.3: write the "789". |
| n = hpd->num_digits; |
| while (n--) { |
| *p++ = '0' | *src++; |
| } |
| |
| } else if (((uint32_t)(hpd->decimal_point)) <= hpd->num_digits) { |
| // Referring to the wuffs_base__private_implementation__high_prec_dec |
| // typedef's comment, we have something like "78.9". |
| if ((q - p) < (hpd->num_digits + 1)) { |
| goto too_short; |
| } |
| uint8_t* src = &hpd->digits[0]; |
| |
| // Step B.1: write the "78". |
| uint32_t n = ((uint32_t)(hpd->decimal_point)); |
| while (n--) { |
| *p++ = '0' | *src++; |
| } |
| |
| // Step B.2: write the ".". |
| *p++ = '.'; |
| |
| // Step B.3: write the "9". |
| n = hpd->num_digits - ((uint32_t)(hpd->decimal_point)); |
| while (n--) { |
| *p++ = '0' | *src++; |
| } |
| |
| } else { |
| // Referring to the wuffs_base__private_implementation__high_prec_dec |
| // typedef's comment, we have something like "78900.". |
| if ((q - p) < (((uint32_t)(hpd->decimal_point)) + 1)) { |
| goto too_short; |
| } |
| uint8_t* src = &hpd->digits[0]; |
| |
| // Step C.1: write the "789". |
| uint32_t n = hpd->num_digits; |
| while (n--) { |
| *p++ = '0' | *src++; |
| } |
| |
| // Step C.2: write the "00". |
| n = ((uint32_t)(hpd->decimal_point)) - hpd->num_digits; |
| if (n > 0) { |
| memset(p, '0', n); |
| p += n; |
| } |
| |
| // Step C.3: write the ".". |
| *p++ = '.'; |
| } |
| |
| // Truncated bit. |
| if (hpd->truncated) { |
| if ((q - p) < 1) { |
| goto too_short; |
| } |
| *p++ = '$'; |
| } |
| |
| nul_terminator: |
| if ((q - p) < 1) { |
| goto too_short; |
| } |
| *p++ = '\x00'; |
| return NULL; |
| |
| too_short: |
| return "high_prec_dec__to_debug_string: dst buffer is too short"; |
| } |
| |
| const char* // |
| test_strconv_hpd_rounded_integer() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| uint64_t want; |
| const char* str; |
| } test_cases[] = { |
| {.want = 4, .str = "-3.9"}, |
| {.want = 3, .str = "-3.14159"}, |
| {.want = 0, .str = "+0"}, |
| {.want = 0, .str = "0.0000000009"}, |
| {.want = 0, .str = "0.1"}, |
| {.want = 1, .str = "0.9"}, |
| {.want = 12, .str = "1234e-2"}, |
| {.want = 57, .str = "5678e-2"}, |
| {.want = 60, .str = "60.0"}, |
| {.want = 60, .str = "60.4999"}, |
| {.want = 60, .str = "60.5"}, |
| {.want = 60, .str = "60.5000"}, |
| {.want = 61, .str = "60.5001"}, |
| {.want = 61, .str = "60.6"}, |
| {.want = 61, .str = "61.0"}, |
| {.want = 61, .str = "61.4999"}, |
| {.want = 62, .str = "61.5"}, |
| {.want = 62, .str = "61.5000"}, |
| {.want = 62, .str = "61.5001"}, |
| {.want = 62, .str = "61.6"}, |
| {.want = 62, .str = "62.0"}, |
| {.want = 62, .str = "62.4999"}, |
| {.want = 62, .str = "62.5"}, |
| {.want = 62, .str = "62.5000"}, |
| {.want = 63, .str = "62.5001"}, |
| {.want = 63, .str = "62.6"}, |
| {.want = 1000, .str = "999.999"}, |
| {.want = 4560000, .str = "456e+4"}, |
| |
| // With round-to-even, ½ rounds to 0 but "a tiny bit more than ½" rounds |
| // to 1, even if the HPD struct truncates that "1" digit. |
| {.want = 0, .str = "0.5"}, |
| {.want = 1, // 50 '0's per row. |
| .str = "0.500000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000000" |
| "00000000000000000000000000000000000000000000000001"}, |
| |
| // Inputs with exactly 18 decimal digits before the decimal point. |
| {.want = 123456789012345679, .str = "123456789012345678.9"}, |
| {.want = 1000000000000000000, .str = "999999999999999999.9"}, |
| |
| // Inputs with exactly 19 decimal digits before the decimal point. |
| {.want = UINT64_MAX, .str = "1234567890123456789"}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| wuffs_base__private_implementation__high_prec_dec hpd; |
| CHECK_STATUS( |
| "hpd__parse", |
| wuffs_base__private_implementation__high_prec_dec__parse( |
| &hpd, wuffs_base__make_slice_u8((void*)test_cases[tc].str, |
| strlen(test_cases[tc].str)))); |
| uint64_t have = |
| wuffs_base__private_implementation__high_prec_dec__rounded_integer( |
| &hpd); |
| if (have != test_cases[tc].want) { |
| RETURN_FAIL("\"%s\": have %" PRIu64 ", want %" PRIu64, test_cases[tc].str, |
| have, test_cases[tc].want); |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_strconv_hpd_shift() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| const char* str; |
| int32_t shift; // -ve means left shift, +ve means right shift. |
| const char* want; |
| } test_cases[] = { |
| {.str = "0", .shift = +2, .want = "+0"}, |
| {.str = "1", .shift = +3, .want = "+.125"}, |
| {.str = "12e3", .shift = +5, .want = "+375."}, |
| {.str = "-0.007", .shift = +8, .want = "-.00002734375"}, |
| {.str = "3.14159E+26", |
| .shift = +60, |
| .want = "+272489496.244698869986677891574800014495849609375"}, |
| |
| {.str = "0", .shift = -2, .want = "+0"}, |
| {.str = ".125", .shift = -3, .want = "+1."}, |
| {.str = "3750e-1", .shift = -5, .want = "+12000."}, |
| {.str = "-2.734375e-5", .shift = -8, .want = "-.007"}, |
| {.str = "+272489496.244698869986677891574800014495849609375", |
| .shift = -60, |
| .want = "+314159000000000000000000000."}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| wuffs_base__private_implementation__high_prec_dec hpd; |
| CHECK_STATUS( |
| "hpd__parse", |
| wuffs_base__private_implementation__high_prec_dec__parse( |
| &hpd, wuffs_base__make_slice_u8((void*)test_cases[tc].str, |
| strlen(test_cases[tc].str)))); |
| int32_t shift = test_cases[tc].shift; |
| if (shift > 0) { |
| wuffs_base__private_implementation__high_prec_dec__small_rshift( |
| &hpd, (uint32_t)(+shift)); |
| } else if (shift < 0) { |
| wuffs_base__private_implementation__high_prec_dec__small_lshift( |
| &hpd, (uint32_t)(-shift)); |
| } |
| |
| uint8_t have[1024]; |
| CHECK_STRING( |
| wuffs_base__private_implementation__high_prec_dec__to_debug_string( |
| &hpd, |
| wuffs_base__make_slice_u8(have, WUFFS_TESTLIB_ARRAY_SIZE(have)))); |
| if (strcmp(((void*)(have)), test_cases[tc].want)) { |
| RETURN_FAIL("\"%s\" %s %" PRId32 ":\n have: \"%s\"\n want: \"%s\"", |
| test_cases[tc].str, ((shift > 0) ? ">>" : "<<"), |
| ((shift > 0) ? +shift : -shift), have, test_cases[tc].want); |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_strconv_hexadecimal() { |
| CHECK_FOCUS(__func__); |
| |
| { |
| const char* str = "6A6b7"; // The "7" should be ignored. |
| wuffs_base__slice_u8 dst = global_have_slice; |
| wuffs_base__slice_u8 src = |
| wuffs_base__make_slice_u8((void*)str, strlen(str)); |
| size_t have = wuffs_base__hexadecimal__decode2(dst, src); |
| if (have != 2) { |
| RETURN_FAIL("decode2: have %zu, want 2", have); |
| } |
| if (global_have_array[0] != 0x6A) { |
| RETURN_FAIL("decode2: dst[0]: have 0x%02X, want 0x6A", |
| (int)(global_have_array[0])); |
| } |
| if (global_have_array[1] != 0x6B) { |
| RETURN_FAIL("decode2: dst[1]: have 0x%02X, want 0x6B", |
| (int)(global_have_array[1])); |
| } |
| } |
| |
| { |
| const char* str = "\\xa9\\x00\\xFe"; |
| wuffs_base__slice_u8 dst = global_have_slice; |
| wuffs_base__slice_u8 src = |
| wuffs_base__make_slice_u8((void*)str, strlen(str)); |
| size_t have = wuffs_base__hexadecimal__decode4(dst, src); |
| if (have != 3) { |
| RETURN_FAIL("decode4: have %zu, want 3", have); |
| } |
| if (global_have_array[0] != 0xA9) { |
| RETURN_FAIL("decode4: dst[0]: have 0x%02X, want 0xA9", |
| (int)(global_have_array[0])); |
| } |
| if (global_have_array[1] != 0x00) { |
| RETURN_FAIL("decode4: dst[1]: have 0x%02X, want 0x00", |
| (int)(global_have_array[1])); |
| } |
| if (global_have_array[2] != 0xFE) { |
| RETURN_FAIL("decode4: dst[2]: have 0x%02X, want 0xFE", |
| (int)(global_have_array[2])); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| test_strconv_parse_number_f64() { |
| CHECK_FOCUS(__func__); |
| |
| const uint64_t fail = 0xDEADBEEF; |
| |
| struct { |
| uint64_t want; |
| const char* str; |
| } test_cases[] = { |
| {.want = 0x0000000000000000, .str = "+0.0"}, |
| {.want = 0x0000000000000000, .str = "0"}, |
| {.want = 0x0000000000000000, .str = "0e0"}, |
| {.want = 0x0000000000000001, .str = "4.9406564584124654e-324"}, |
| {.want = 0x000FFFFFFFFFFFFF, .str = "2.2250738585072009E-308"}, |
| {.want = 0x0010000000000000, .str = "2.2250738585072014E-308"}, |
| {.want = 0x369C314ABE948EB1, |
| .str = "0.0000000000000000000000000000000000000000000012345678900000"}, |
| {.want = 0x3F88000000000000, .str = "0.01171875"}, |
| {.want = 0x3FD0000000000000, .str = ".25"}, |
| {.want = 0x3FD3333333333333, |
| .str = "0.2999999999999999888977697537484345957636833190917968750000"}, |
| {.want = 0x3FD3333333333333, .str = "0.3"}, |
| {.want = 0x3FD3333333333334, .str = "0.30000000000000004"}, |
| {.want = 0x3FD3333333333334, |
| .str = "0.3000000000000000444089209850062616169452667236328125000000"}, |
| {.want = 0x3FD5555555555555, .str = "0.333333333333333333333333333333"}, |
| {.want = 0x3FEFFFFFFFFFFFFF, .str = "0.99999999999999988898"}, |
| {.want = 0x3FF0000000000000, .str = "0.999999999999999999999999999999"}, |
| {.want = 0x3FF0000000000000, .str = "1"}, |
| {.want = 0x3FF0000000000001, .str = "1.0000000000000002"}, |
| {.want = 0x3FF0000000000002, .str = "1.0000000000000004"}, |
| {.want = 0x3FF4000000000000, .str = "1.25"}, |
| {.want = 0x3FF8000000000000, .str = "+1.5"}, |
| {.want = 0x4000000000000000, .str = "2"}, |
| {.want = 0x400921FB54442D18, .str = "3.141592653589793238462643383279"}, |
| {.want = 0x400C000000000000, .str = "3.5"}, |
| {.want = 0x4014000000000000, .str = "5"}, |
| {.want = 0x4036000000000000, .str = "22"}, |
| {.want = 0x4036000000000000, .str = "_+__2_2__."}, |
| {.want = 0x4037000000000000, .str = "23"}, |
| {.want = 0x4038000000000000, .str = "2.4E+00000000001"}, |
| {.want = 0x4038000000000000, .str = "2.4E001"}, |
| {.want = 0x4038000000000000, .str = "2.4E1"}, |
| {.want = 0x4038000000000000, .str = "24"}, |
| {.want = 0x4038000000000000, .str = "2400_00000_00000.00000_e-_1_2"}, |
| {.want = 0x40FE240C9FCB0C02, .str = "123456.789012"}, |
| {.want = 0x4330000000000000, .str = "4503599627370496"}, // 1 << 52. |
| {.want = 0x4330000000000000, .str = "4503599627370496.5"}, |
| {.want = 0x4330000000000001, .str = "4503599627370497"}, |
| {.want = 0x4330000000000002, .str = "4503599627370497.5"}, |
| {.want = 0x4330000000000002, .str = "4503599627370498"}, |
| {.want = 0x4340000000000000, .str = "9007199254740992"}, // 1 << 53. |
| {.want = 0x4340000000000000, .str = "9007199254740993"}, |
| {.want = 0x4340000000000001, .str = "9007199254740994"}, |
| {.want = 0x4340000000000002, .str = "9007199254740995"}, |
| {.want = 0x4340000000000002, .str = "9007199254740996"}, |
| {.want = 0x4340000000000002, .str = "9_007__199_254__740_996"}, |
| {.want = 0x54B249AD2594C37D, .str = "+1E+100"}, |
| {.want = 0x54B249AD2594C37D, .str = "+_1_E_+_1_0_0_"}, |
| {.want = 0x7FEFFFFFFFFFFFFF, .str = "1.7976931348623157e308"}, |
| {.want = 0x7FF0000000000000, .str = "1.8e308"}, |
| {.want = 0x7FF0000000000000, .str = "1e999"}, |
| {.want = 0x7FF0000000000000, .str = "__InFinity__"}, |
| {.want = 0x7FF0000000000000, .str = "inf"}, |
| {.want = 0x7FFFFFFFFFFFFFFF, .str = "+nan"}, |
| {.want = 0x7FFFFFFFFFFFFFFF, .str = "_+_NaN_"}, |
| {.want = 0x7FFFFFFFFFFFFFFF, .str = "nan"}, |
| {.want = 0x8000000000000000, .str = "-0.000e0"}, |
| {.want = 0xC008000000000000, .str = "-3"}, |
| {.want = 0xFFF0000000000000, .str = "-2e308"}, |
| {.want = 0xFFF0000000000000, .str = "-inf"}, |
| {.want = 0xFFFFFFFFFFFFFFFF, .str = "-NAN"}, |
| |
| // We accept either ',' or '.'. |
| {.want = 0x3FFC000000000000, .str = "1,75"}, |
| {.want = 0x3FFC000000000000, .str = "1.75"}, |
| |
| {.want = fail, .str = " 0"}, |
| {.want = fail, .str = ""}, |
| {.want = fail, .str = "."}, |
| {.want = fail, .str = "00"}, |
| {.want = fail, .str = "001.2"}, |
| {.want = fail, .str = "06.44"}, |
| {.want = fail, .str = "0644"}, |
| {.want = fail, .str = "1234 67.8e9"}, |
| {.want = fail, .str = "2,345,678"}, // Two ','s. |
| {.want = fail, .str = "2.345,678"}, // One '.' and one ','. |
| {.want = fail, .str = "7 "}, |
| {.want = fail, .str = "7 .9"}, |
| {.want = fail, .str = "7e"}, |
| {.want = fail, .str = "7e-"}, |
| {.want = fail, .str = "7e-+1"}, |
| {.want = fail, .str = "7e++1"}, |
| {.want = fail, .str = "NAN "}, |
| {.want = fail, .str = "NANA"}, |
| {.want = fail, .str = "inf_inity"}, |
| {.want = fail, .str = "nun"}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| wuffs_base__result_f64 r = |
| wuffs_base__parse_number_f64(wuffs_base__make_slice_u8( |
| (void*)test_cases[tc].str, strlen(test_cases[tc].str))); |
| uint64_t have = |
| (r.status.repr == NULL) |
| ? wuffs_base__ieee_754_bit_representation__from_f64(r.value) |
| : fail; |
| if (have != test_cases[tc].want) { |
| RETURN_FAIL("\"%s\": have 0x%" PRIX64 ", want 0x%" PRIX64, |
| test_cases[tc].str, have, test_cases[tc].want); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| test_strconv_parse_number_i64() { |
| CHECK_FOCUS(__func__); |
| |
| const int64_t fail = 0xDEADBEEF; |
| |
| struct { |
| int64_t want; |
| const char* str; |
| } test_cases[] = { |
| {.want = +0x0000000000000000, .str = "+0"}, |
| {.want = +0x0000000000000000, .str = "-0"}, |
| {.want = +0x0000000000000000, .str = "0"}, |
| {.want = +0x000000000000012C, .str = "+300"}, |
| {.want = +0x7FFFFFFFFFFFFFFF, .str = "+9223372036854775807"}, |
| {.want = +0x7FFFFFFFFFFFFFFF, .str = "9223372036854775807"}, |
| {.want = -0x0000000000000002, .str = "-2"}, |
| {.want = -0x00000000000000AB, .str = "_-_0x_AB"}, |
| {.want = -0x7FFFFFFFFFFFFFFF, .str = "-9223372036854775807"}, |
| {.want = -0x8000000000000000, .str = "-9223372036854775808"}, |
| |
| {.want = fail, .str = "+ 1"}, |
| {.want = fail, .str = "++1"}, |
| {.want = fail, .str = "+-1"}, |
| {.want = fail, .str = "+9223372036854775808"}, // 1 << 63. |
| {.want = fail, .str = "-"}, |
| {.want = fail, .str = "-+1"}, |
| {.want = fail, .str = "-0x8000000000000001"}, // -((1 << 63) + 1). |
| {.want = fail, .str = "-9223372036854775809"}, // -((1 << 63) + 1). |
| {.want = fail, .str = "0x8000000000000000"}, // 1 << 63. |
| {.want = fail, .str = "1-"}, |
| {.want = fail, .str = "9223372036854775808"}, // 1 << 63. |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| wuffs_base__result_i64 r = |
| wuffs_base__parse_number_i64(wuffs_base__make_slice_u8( |
| (void*)test_cases[tc].str, strlen(test_cases[tc].str))); |
| int64_t have = (r.status.repr == NULL) ? r.value : fail; |
| if (have != test_cases[tc].want) { |
| RETURN_FAIL("\"%s\": have 0x%" PRIX64 ", want 0x%" PRIX64, |
| test_cases[tc].str, have, test_cases[tc].want); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| test_strconv_parse_number_u64() { |
| CHECK_FOCUS(__func__); |
| |
| const uint64_t fail = 0xDEADBEEF; |
| |
| struct { |
| uint64_t want; |
| const char* str; |
| } test_cases[] = { |
| {.want = 0x0000000000000000, .str = "0"}, |
| {.want = 0x0000000000000000, .str = "0_"}, |
| {.want = 0x0000000000000000, .str = "0d0"}, |
| {.want = 0x0000000000000000, .str = "0x000"}, |
| {.want = 0x0000000000000000, .str = "_0"}, |
| {.want = 0x0000000000000000, .str = "__0__"}, |
| {.want = 0x000000000000004A, .str = "0x4A"}, |
| {.want = 0x000000000000004B, .str = "0x__4_B_"}, |
| {.want = 0x000000000000007B, .str = "123"}, |
| {.want = 0x000000000000007C, .str = "12_4"}, |
| {.want = 0x000000000000007D, .str = "_1__2________5_"}, |
| {.want = 0x00000000000001F4, .str = "0d500"}, |
| {.want = 0x00000000000001F5, .str = "0D___5_01__"}, |
| {.want = 0x00000000FFFFFFFF, .str = "4294967295"}, |
| {.want = 0x0000000100000000, .str = "4294967296"}, |
| {.want = 0x0123456789ABCDEF, .str = "0x0123456789ABCDEF"}, |
| {.want = 0x0123456789ABCDEF, .str = "0x0123456789abcdef"}, |
| {.want = 0xFFFFFFFFFFFFFFF9, .str = "18446744073709551609"}, |
| {.want = 0xFFFFFFFFFFFFFFFA, .str = "18446744073709551610"}, |
| {.want = 0xFFFFFFFFFFFFFFFE, .str = "0xFFFFffffFFFFfffe"}, |
| {.want = 0xFFFFFFFFFFFFFFFE, .str = "18446744073709551614"}, |
| {.want = 0xFFFFFFFFFFFFFFFF, .str = "0xFFFF_FFFF_FFFF_FFFF"}, |
| {.want = 0xFFFFFFFFFFFFFFFF, .str = "18446744073709551615"}, |
| |
| {.want = fail, .str = " "}, |
| {.want = fail, .str = " 0"}, |
| {.want = fail, .str = " 12 "}, |
| {.want = fail, .str = ""}, |
| {.want = fail, .str = "+0"}, |
| {.want = fail, .str = "+1"}, |
| {.want = fail, .str = "-0"}, |
| {.want = fail, .str = "-1"}, |
| {.want = fail, .str = "0 "}, |
| {.want = fail, .str = "00"}, |
| {.want = fail, .str = "000000x"}, |
| {.want = fail, .str = "000000x0"}, |
| {.want = fail, .str = "007"}, |
| {.want = fail, .str = "0644"}, |
| {.want = fail, .str = "0_0"}, |
| {.want = fail, .str = "0_x1"}, |
| {.want = fail, .str = "0d___"}, |
| {.want = fail, .str = "0x"}, |
| {.want = fail, .str = "0x10000000000000000"}, // 1 << 64. |
| {.want = fail, .str = "0x1_0000_0000_0000_0000"}, // 1 << 64. |
| {.want = fail, .str = "1 23"}, |
| {.want = fail, .str = "1,23"}, |
| {.want = fail, .str = "1.23"}, |
| {.want = fail, .str = "123 "}, |
| {.want = fail, .str = "123456789012345678901234"}, |
| {.want = fail, .str = "12a3"}, |
| {.want = fail, .str = "18446744073709551616"}, // UINT64_MAX. |
| {.want = fail, .str = "18446744073709551617"}, |
| {.want = fail, .str = "18446744073709551618"}, |
| {.want = fail, .str = "18446744073709551619"}, |
| {.want = fail, .str = "18446744073709551620"}, |
| {.want = fail, .str = "18446744073709551621"}, |
| {.want = fail, .str = "_"}, |
| {.want = fail, .str = "d"}, |
| {.want = fail, .str = "x"}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| wuffs_base__result_u64 r = |
| wuffs_base__parse_number_u64(wuffs_base__make_slice_u8( |
| (void*)test_cases[tc].str, strlen(test_cases[tc].str))); |
| uint64_t have = (r.status.repr == NULL) ? r.value : fail; |
| if (have != test_cases[tc].want) { |
| RETURN_FAIL("\"%s\": have 0x%" PRIX64 ", want 0x%" PRIX64, |
| test_cases[tc].str, have, test_cases[tc].want); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| test_strconv_utf_8_next() { |
| CHECK_FOCUS(__func__); |
| |
| // Special case the "\x00" string, which is valid UTF-8 but its strlen is |
| // zero, not one. |
| uint8_t the_nul_byte[1]; |
| the_nul_byte[0] = '\x00'; |
| |
| struct { |
| uint32_t want_cp; |
| uint32_t want_bl; |
| const char* str; |
| } test_cases[] = { |
| {.want_cp = 0x00000000, .want_bl = 0, .str = ""}, |
| {.want_cp = 0x00000000, .want_bl = 1, .str = "The <NUL> byte"}, |
| {.want_cp = 0x00000009, .want_bl = 1, .str = "\t"}, |
| {.want_cp = 0x00000041, .want_bl = 1, .str = "A"}, |
| {.want_cp = 0x00000061, .want_bl = 1, .str = "abdefghij"}, |
| {.want_cp = 0x0000007F, .want_bl = 1, .str = "\x7F"}, |
| {.want_cp = 0x00000080, .want_bl = 2, .str = "\xC2\x80"}, |
| {.want_cp = 0x000007FF, .want_bl = 2, .str = "\xDF\xBF"}, |
| {.want_cp = 0x00000800, .want_bl = 3, .str = "\xE0\xA0\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 3, .str = "\xEF\xBF\xBD"}, |
| {.want_cp = 0x0000FFFF, .want_bl = 3, .str = "\xEF\xBF\xBF"}, |
| {.want_cp = 0x00010000, .want_bl = 4, .str = "\xF0\x90\x80\x80"}, |
| {.want_cp = 0x0010FFFF, .want_bl = 4, .str = "\xF4\x8F\xBF\xBF"}, |
| |
| // U+00000394 GREEK CAPITAL LETTER DELTA. |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94"}, |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94+"}, |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94++"}, |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94+++"}, |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94++++"}, |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94\x80"}, |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94\x80\x80"}, |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94\x80\x80\x80"}, |
| {.want_cp = 0x00000394, .want_bl = 2, .str = "\xCE\x94\x80\x80\x80\x80"}, |
| |
| // U+00002603 SNOWMAN. |
| {.want_cp = 0x00002603, .want_bl = 3, .str = "\xE2\x98\x83"}, |
| {.want_cp = 0x00002603, .want_bl = 3, .str = "\xE2\x98\x83+"}, |
| {.want_cp = 0x00002603, .want_bl = 3, .str = "\xE2\x98\x83++"}, |
| {.want_cp = 0x00002603, .want_bl = 3, .str = "\xE2\x98\x83+++"}, |
| {.want_cp = 0x00002603, .want_bl = 3, .str = "\xE2\x98\x83++++"}, |
| {.want_cp = 0x00002603, .want_bl = 3, .str = "\xE2\x98\x83\xFF"}, |
| |
| // U+0001F4A9 PILE OF POO. |
| {.want_cp = 0x0001F4A9, .want_bl = 4, .str = "\xF0\x9F\x92\xA9"}, |
| {.want_cp = 0x0001F4A9, .want_bl = 4, .str = "\xF0\x9F\x92\xA9+"}, |
| {.want_cp = 0x0001F4A9, .want_bl = 4, .str = "\xF0\x9F\x92\xA9++"}, |
| {.want_cp = 0x0001F4A9, .want_bl = 4, .str = "\xF0\x9F\x92\xA9+++"}, |
| {.want_cp = 0x0001F4A9, .want_bl = 4, .str = "\xF0\x9F\x92\xA9++++"}, |
| {.want_cp = 0x0001F4A9, .want_bl = 4, .str = "\xF0\x9F\x92\xA9\xFF"}, |
| |
| // Invalid. |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xBF"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xC0\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xC1\xBF"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xC2"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xC2\x7F"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xC2\xC0"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xC2\xFF"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xCE"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xDF\xC0"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xDF\xFF"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xE0\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xE0\x80\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xE0\x9F\xBF"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xE2"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xF0"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xF0\x80\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xF0\x80\x80\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xF0\x8F\xBF\xBF"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xF4\x90\x80\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xF5"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xF6\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xF7\x80\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xFF\xFF\xFF\xFF"}, |
| |
| // Invalid. UTF-8 cannot contain the surrogates U+D800 ..= U+DFFF. |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xED\xA0\x80"}, |
| {.want_cp = 0x0000FFFD, .want_bl = 1, .str = "\xED\xBF\xBF"}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| wuffs_base__slice_u8 s = wuffs_base__make_slice_u8( |
| (void*)test_cases[tc].str, strlen(test_cases[tc].str)); |
| |
| // Override "The <NUL> byte" with "\x00". |
| if ((test_cases[tc].want_cp == 0) && (test_cases[tc].want_bl == 1)) { |
| s = wuffs_base__make_slice_u8(&the_nul_byte[0], 1); |
| } |
| |
| wuffs_base__utf_8__next__output have = wuffs_base__utf_8__next(s); |
| if ((have.code_point != test_cases[tc].want_cp) || |
| (have.byte_length != test_cases[tc].want_bl)) { |
| RETURN_FAIL("\"%s\": have cp=0x%" PRIX32 " bl=%" PRIu32 |
| ", want cp=0x%" PRIX32 " bl=%" PRIu32, |
| test_cases[tc].str, have.code_point, have.byte_length, |
| test_cases[tc].want_cp, test_cases[tc].want_bl); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| // ---------------- Golden Tests |
| |
| golden_test json_australian_abc_gt = { |
| .want_filename = "test/data/australian-abc-local-stations.tokens", |
| .src_filename = "test/data/australian-abc-local-stations.json", |
| }; |
| |
| golden_test json_file_sizes_gt = { |
| .src_filename = "test/data/file-sizes.json", |
| }; |
| |
| golden_test json_github_tags_gt = { |
| .src_filename = "test/data/github-tags.json", |
| }; |
| |
| golden_test json_json_things_unformatted_gt = { |
| .want_filename = "test/data/json-things.unformatted.tokens", |
| .src_filename = "test/data/json-things.unformatted.json", |
| }; |
| |
| golden_test json_nobel_prizes_gt = { |
| .src_filename = "test/data/nobel-prizes.json", |
| }; |
| |
| golden_test json_json_quirks_gt = { |
| .want_filename = "test/data/json-quirks.tokens", |
| .src_filename = "test/data/json-quirks.json", |
| }; |
| |
| // ---------------- JSON Tests |
| |
| const char* // |
| test_wuffs_json_decode_interface() { |
| CHECK_FOCUS(__func__); |
| |
| { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", |
| wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED)); |
| CHECK_STRING(do_test__wuffs_base__token_decoder( |
| wuffs_json__decoder__upcast_as__wuffs_base__token_decoder(&dec), |
| &json_json_things_unformatted_gt)); |
| } |
| |
| { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", |
| wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED)); |
| CHECK_STRING(do_test__wuffs_base__token_decoder( |
| wuffs_json__decoder__upcast_as__wuffs_base__token_decoder(&dec), |
| &json_australian_abc_gt)); |
| } |
| |
| { |
| uint32_t quirks[] = { |
| WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_A, |
| WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_CAPITAL_U, |
| WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_E, |
| WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_QUESTION_MARK, |
| WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_SINGLE_QUOTE, |
| WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_V, |
| WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_X, |
| WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_ZERO, |
| WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK, |
| WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE, |
| WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA, |
| WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS, |
| WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR, |
| WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK, |
| WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE, |
| WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE, |
| 0, |
| }; |
| |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", |
| wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED)); |
| uint32_t i; |
| for (i = 0; quirks[i]; i++) { |
| wuffs_json__decoder__set_quirk_enabled(&dec, quirks[i], true); |
| } |
| CHECK_STRING(do_test__wuffs_base__token_decoder( |
| wuffs_json__decoder__upcast_as__wuffs_base__token_decoder(&dec), |
| &json_json_quirks_gt)); |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| wuffs_json_decode(wuffs_base__token_buffer* tok, |
| wuffs_base__io_buffer* src, |
| uint32_t wuffs_initialize_flags, |
| uint64_t wlimit, |
| uint64_t rlimit) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", |
| wuffs_json__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, |
| wuffs_initialize_flags)); |
| |
| while (true) { |
| wuffs_base__token_buffer limited_tok = |
| make_limited_token_writer(*tok, wlimit); |
| wuffs_base__io_buffer limited_src = make_limited_reader(*src, rlimit); |
| |
| wuffs_base__status status = wuffs_json__decoder__decode_tokens( |
| &dec, &limited_tok, &limited_src, global_work_slice); |
| |
| tok->meta.wi += limited_tok.meta.wi; |
| src->meta.ri += limited_src.meta.ri; |
| |
| if (((wlimit < UINT64_MAX) && |
| (status.repr == wuffs_base__suspension__short_write)) || |
| ((rlimit < UINT64_MAX) && |
| (status.repr == wuffs_base__suspension__short_read))) { |
| continue; |
| } |
| return status.repr; |
| } |
| } |
| |
| const char* // |
| test_wuffs_json_decode_end_of_data() { |
| CHECK_FOCUS(__func__); |
| |
| int i; |
| for (i = 0; i < 2; i++) { |
| uint8_t* src_ptr = (uint8_t*)("123null89"); |
| size_t src_len = i ? 3 : 9; |
| |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", |
| wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED)); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8(src_ptr, src_len), true); |
| CHECK_STATUS("decode_tokens", wuffs_json__decoder__decode_tokens( |
| &dec, &tok, &src, global_work_slice)); |
| if (src.meta.ri != 3) { |
| RETURN_FAIL("src.meta.ri: have %zu, want 3", src.meta.ri); |
| } |
| |
| const char* have = |
| wuffs_json__decoder__decode_tokens(&dec, &tok, &src, global_work_slice) |
| .repr; |
| if (have != wuffs_base__note__end_of_data) { |
| RETURN_FAIL("decode_tokens: have \"%s\", want \"%s\"", have, |
| wuffs_base__note__end_of_data); |
| } |
| if (src.meta.ri != 3) { |
| RETURN_FAIL("src.meta.ri: have %zu, want 3", src.meta.ri); |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_long_numbers() { |
| CHECK_FOCUS(__func__); |
| |
| // Spelling "false" with four letters helps clang-format align the test |
| // cases, when viewed in a fixed width font. |
| const bool fals = false; |
| |
| // Each test case produces multiple test strings: the test_cases[tc].suffix |
| // field is prefixed with N '9's, for multiple values of N, so that the test |
| // string's total length is near WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL. |
| // For example, a ".2e4" suffix means an overall string of "999etc999.2e4". |
| // |
| // The test_cases[tc].valid field holds the whether overall string is a valid |
| // JSON number. |
| struct { |
| bool valid; |
| const char* suffix; |
| } test_cases[] = { |
| {.valid = true, .suffix = ""}, |
| {.valid = true, .suffix = " "}, |
| {.valid = fals, .suffix = "."}, |
| {.valid = fals, .suffix = ". "}, |
| {.valid = fals, .suffix = "E"}, |
| {.valid = fals, .suffix = "E "}, |
| {.valid = fals, .suffix = "E-"}, |
| {.valid = fals, .suffix = "E- "}, |
| {.valid = true, .suffix = "e2"}, |
| {.valid = true, .suffix = "e2 "}, |
| {.valid = true, .suffix = "e+34"}, |
| {.valid = true, .suffix = "e+34 "}, |
| {.valid = true, .suffix = ".2"}, |
| {.valid = true, .suffix = ".2 "}, |
| {.valid = fals, .suffix = ".2e"}, |
| {.valid = fals, .suffix = ".2e "}, |
| {.valid = fals, .suffix = ".2e+"}, |
| {.valid = fals, .suffix = ".2e+ "}, |
| {.valid = true, .suffix = ".2e4"}, |
| {.valid = true, .suffix = ".2e4 "}, |
| {.valid = true, .suffix = ".2E+5"}, |
| {.valid = true, .suffix = ".2E+5 "}, |
| {.valid = true, .suffix = ".2e-5678"}, |
| {.valid = true, .suffix = ".2e-5678 "}, |
| }; |
| |
| // src_array holds the overall test string. 119 is arbitrary but long enough. |
| // See the "if (suffix_length > etc)" check below. 102 is also arbitrary but |
| // larger than WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL. |
| // |
| // See also test_wuffs_json_decode_src_io_buffer_length. |
| uint8_t src_array[119]; |
| memset(&src_array[0], '9', 102); |
| if (102 <= WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL) { |
| RETURN_FAIL("insufficient number_length test case coverage"); |
| } |
| |
| wuffs_json__decoder dec; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| size_t suffix_length = strlen(test_cases[tc].suffix); |
| if ((suffix_length + 1) > (119 - 102)) { // +1 for the terminal NUL. |
| RETURN_FAIL("tc=%d: src_array is too short", tc); |
| } |
| bool ends_with_space = (suffix_length > 0) && |
| (test_cases[tc].suffix[suffix_length - 1] == ' '); |
| |
| // Copying the terminal NUL isn't necessary for Wuffs' slices (which are a |
| // pointer-length pair), but this backstop can help debugging with printf |
| // where "%s" takes a C string (a bare pointer). |
| memcpy(&src_array[102], test_cases[tc].suffix, suffix_length + 1); |
| |
| size_t nines_length; |
| for (nines_length = 90; nines_length < 102; nines_length++) { |
| wuffs_base__slice_u8 src_data = ((wuffs_base__slice_u8){ |
| .ptr = &src_array[102 - nines_length], |
| .len = nines_length + suffix_length, |
| }); |
| size_t number_length = src_data.len - (ends_with_space ? 1 : 0); |
| |
| int closed; |
| for (closed = 0; closed < 2; closed++) { |
| CHECK_STATUS( |
| "initialize", |
| wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED)); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = |
| wuffs_base__make_io_buffer_reader(src_data, closed != 0); |
| const char* have = wuffs_json__decoder__decode_tokens(&dec, &tok, &src, |
| global_work_slice) |
| .repr; |
| |
| size_t total_length = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| wuffs_base__token* t = &tok.data.ptr[tok.meta.ri++]; |
| total_length += wuffs_base__token__length(t); |
| } |
| if (total_length != src.meta.ri) { |
| RETURN_FAIL( |
| "tc=%d, nines_length=%zu, closed=%d: total_length: have %zu, " |
| "want %zu", |
| tc, nines_length, closed, total_length, src.meta.ri); |
| } |
| |
| const char* want; |
| if (number_length > WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL) { |
| want = wuffs_json__error__unsupported_number_length; |
| } else if (closed || ends_with_space) { |
| want = test_cases[tc].valid ? NULL : wuffs_json__error__bad_input; |
| } else { |
| want = wuffs_base__suspension__short_read; |
| } |
| |
| if (have != want) { |
| RETURN_FAIL( |
| "tc=%d, nines_length=%zu, closed=%d: have \"%s\", want \"%s\"", |
| tc, nines_length, closed, have, want); |
| } |
| } |
| } |
| } |
| |
| return NULL; |
| } |
| |
| // test_wuffs_json_decode_prior_valid_utf_8 tests that when encountering |
| // invalid or incomplete UTF-8, or a backslash-escape, any prior valid UTF-8 is |
| // still output. The decoder batches output so that, ignoring the quotation |
| // marks, "abc\xCE\x94efg" can be a single 8-length token instead of multiple |
| // (e.g. 3+2+3) tokens. On the other hand, while "abc\xFF" ends with one byte |
| // of invalid UTF-8, the 3 good bytes before that should still be output. |
| const char* // |
| test_wuffs_json_decode_prior_valid_utf_8() { |
| CHECK_FOCUS(__func__); |
| |
| // The test cases contain combinations of valid, partial and invalid UTF-8: |
| // - "\xCE\x94" is U+00000394 GREEK CAPITAL LETTER DELTA. |
| // - "\xE2\x98\x83" is U+00002603 SNOWMAN. |
| // - "\xF0\x9F\x92\xA9" is U+0001F4A9 PILE OF POO. |
| // |
| // The code below can also add trailing 's' bytes, which change e.g. the |
| // partial multi-byte UTF-8 "\xE2" to be the invalid UTF-8 "\xE2s". |
| const char* test_cases[] = { |
| "", |
| "\\t", |
| "\\u", |
| "\\u1234", |
| "\x1F", // Valid UTF-8 but invalid in a JSON string. |
| "\x20", |
| "\xCE", |
| "\xCE\x94", |
| "\xE2", |
| "\xE2\x98", |
| "\xE2\x98\x83", |
| "\xE2\x98\x83\xCE", |
| "\xE2\x98\x83\xCE\x94", |
| "\xF0", |
| "\xF0\x9F", |
| "\xF0\x9F\x92", |
| "\xF0\x9F\x92\xA9", |
| "\xF0\x9F\x92\xA9\xCE", |
| "\xF0\x9F\x92\xA9\xCE\x94", |
| }; |
| |
| size_t prefixes[] = { |
| 0, |
| 1, |
| 15, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 9, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 8, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 7, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 6, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 5, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 4, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 3, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 2, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL - 1, |
| WUFFS_BASE__TOKEN__LENGTH__MAX_INCL + 0, |
| }; |
| |
| size_t suffixes[] = {0, 1, 17}; |
| |
| wuffs_json__decoder dec; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| size_t n = strlen(test_cases[tc]); |
| size_t num_preceding = 0; |
| while (num_preceding < n) { |
| wuffs_base__utf_8__next__output x = |
| wuffs_base__utf_8__next(wuffs_base__make_slice_u8( |
| (void*)(test_cases[tc]) + num_preceding, n - num_preceding)); |
| if (!wuffs_base__utf_8__next__output__is_valid(&x) || |
| (x.code_point < 0x20) || (x.code_point == '\\')) { |
| break; |
| } |
| num_preceding += x.byte_length; |
| if (num_preceding > n) { |
| RETURN_FAIL("tc=%d: utf_8__next overflow", tc); |
| } |
| } |
| |
| int pre; |
| for (pre = 0; pre < WUFFS_TESTLIB_ARRAY_SIZE(prefixes); pre++) { |
| size_t prefix = prefixes[pre]; |
| |
| int suf; |
| for (suf = 0; suf < WUFFS_TESTLIB_ARRAY_SIZE(suffixes); suf++) { |
| size_t suffix = suffixes[suf]; |
| |
| // Set src to "\"ppp...pppMIDDLEsss...sss", with a leading quotation |
| // mark, where prefix and suffix are the number of 'p's and 's's and |
| // test_cases[tc] is the "MIDDLE". |
| wuffs_base__slice_u8 src_data = ((wuffs_base__slice_u8){ |
| .ptr = global_src_array, |
| .len = 1 + prefix + n + suffix, |
| }); |
| if (src_data.len > IO_BUFFER_ARRAY_SIZE) { |
| RETURN_FAIL("total src length is too long"); |
| } |
| src_data.ptr[0] = '\"'; |
| memset(&src_data.ptr[1], 'p', prefix); |
| memcpy(&src_data.ptr[1 + prefix], test_cases[tc], n); |
| memset(&src_data.ptr[1 + prefix + n], 's', suffix); |
| |
| int closed; |
| for (closed = 0; closed < 2; closed++) { |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, 0)); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = |
| wuffs_base__make_io_buffer_reader(src_data, closed != 0); |
| wuffs_json__decoder__decode_tokens(&dec, &tok, &src, |
| global_work_slice); |
| |
| size_t have = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| wuffs_base__token* t = &tok.data.ptr[tok.meta.ri++]; |
| uint64_t vbc = wuffs_base__token__value_base_category(t); |
| if (vbc == WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT) { |
| break; |
| } else if (vbc == WUFFS_BASE__TOKEN__VBC__STRING) { |
| have += wuffs_base__token__length(t); |
| } else { |
| RETURN_FAIL( |
| "tc=%d, prefix=%zu, suffix=%zu, closed=%d: unexpected token", |
| tc, prefix, suffix, closed); |
| } |
| } |
| size_t want = 1 + prefix + num_preceding; // 1 for the leading '\"'. |
| if (num_preceding == n) { |
| want += suffix; |
| } |
| if (have != want) { |
| RETURN_FAIL( |
| "tc=%d, prefix=%zu, suffix=%zu, closed=%d: have %zu, want %zu", |
| tc, prefix, suffix, closed, have, want); |
| } |
| } |
| } |
| } |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_quirk_allow_backslash_etc() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| uint32_t want; |
| const char* str; |
| uint32_t quirk; |
| } test_cases[] = { |
| { |
| .want = 0x07, |
| .str = "\"\\a\"", |
| .quirk = WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_A, |
| }, |
| { |
| .want = 0x0001F4A9, |
| .str = "\"\\U0001F4A9\"", |
| .quirk = WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_CAPITAL_U, |
| }, |
| { |
| .want = 0x1B, |
| .str = "\"\\e\"", |
| .quirk = WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_E, |
| }, |
| { |
| .want = 0x3F, |
| .str = "\"\\?\"", |
| .quirk = WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_QUESTION_MARK, |
| }, |
| { |
| .want = 0x27, |
| .str = "\"\\'\"", |
| .quirk = WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_SINGLE_QUOTE, |
| }, |
| { |
| .want = 0x0B, |
| .str = "\"\\v\"", |
| .quirk = WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_V, |
| }, |
| { |
| .want = 0x00, |
| .str = "\"\\0\"", |
| .quirk = WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_ZERO, |
| }, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| int q; |
| for (q = 0; q < 2; q++) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__DEFAULT_OPTIONS)); |
| wuffs_json__decoder__set_quirk_enabled(&dec, test_cases[tc].quirk, q); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| |
| const char* have_status_repr = wuffs_json__decoder__decode_tokens( |
| &dec, &tok, &src, global_work_slice) |
| .repr; |
| const char* want_status_repr = |
| q ? NULL : wuffs_json__error__bad_backslash_escape; |
| if (have_status_repr != want_status_repr) { |
| RETURN_FAIL("tc=%d, q=%d: decode_tokens: have \"%s\", want \"%s\"", tc, |
| q, have_status_repr, want_status_repr); |
| } |
| if (want_status_repr != NULL) { |
| continue; |
| } |
| |
| uint32_t have = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| wuffs_base__token* t = &tok.data.ptr[tok.meta.ri++]; |
| uint64_t vbc = wuffs_base__token__value_base_category(t); |
| uint64_t vbd = wuffs_base__token__value_base_detail(t); |
| if (vbc == WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT) { |
| have = vbd; |
| break; |
| } |
| } |
| if (have != test_cases[tc].want) { |
| RETURN_FAIL("tc=%d, q=%d: Unicode code point: have U+%04" PRIX32 |
| ", want U+%04" PRIX32, |
| tc, q, have, test_cases[tc].want); |
| } |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_quirk_allow_backslash_x() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| uint64_t want_bytes; |
| const char* want_status_repr; |
| const char* str; |
| } test_cases[] = { |
| {.want_bytes = 0x12789A, |
| .want_status_repr = NULL, |
| .str = "\"\\x12\\u3456\\x78\\x9A\""}, |
| {.want_bytes = 0x00, |
| .want_status_repr = wuffs_json__error__bad_backslash_escape, |
| .str = "\"a\\X6A\""}, |
| {.want_bytes = 0x6A6B, |
| .want_status_repr = NULL, |
| .str = "\"a\\x6A\\x6bz\""}, |
| {.want_bytes = 0x6A, |
| .want_status_repr = wuffs_json__error__bad_backslash_escape, |
| .str = "\"a\\x6A\\x6yz\""}, |
| {.want_bytes = 0x00, |
| .want_status_repr = wuffs_json__error__bad_backslash_escape, |
| .str = "\"a\\x\""}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__DEFAULT_OPTIONS)); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_ALLOW_BACKSLASH_X, true); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__slice_u8 src_slice = wuffs_base__make_slice_u8( |
| (void*)(test_cases[tc].str), strlen(test_cases[tc].str)); |
| wuffs_base__io_buffer src = |
| wuffs_base__make_io_buffer_reader(src_slice, true); |
| const char* have_status_repr = |
| wuffs_json__decoder__decode_tokens(&dec, &tok, &src, global_work_slice) |
| .repr; |
| if (have_status_repr != test_cases[tc].want_status_repr) { |
| RETURN_FAIL("tc=%d: decode_tokens: have \"%s\", want \"%s\"", tc, |
| have_status_repr, test_cases[tc].want_status_repr); |
| } |
| |
| uint64_t src_index = 0; |
| uint64_t have_bytes = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| wuffs_base__token* t = &tok.data.ptr[tok.meta.ri++]; |
| uint64_t vbc = wuffs_base__token__value_base_category(t); |
| uint64_t vbd = wuffs_base__token__value_base_detail(t); |
| uint64_t token_length = wuffs_base__token__length(t); |
| if ((vbc == WUFFS_BASE__TOKEN__VBC__STRING) && |
| (vbd == |
| WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_4_SRC_BACKSLASH_X)) { |
| uint8_t b[8] = {0}; |
| size_t n = wuffs_base__hexadecimal__decode4( |
| wuffs_base__make_slice_u8(&b[0], 8), |
| wuffs_base__make_slice_u8(src_slice.ptr + src_index, token_length)); |
| size_t i = 0; |
| for (; i < n; i++) { |
| have_bytes <<= 8; |
| have_bytes |= b[i]; |
| } |
| } |
| |
| src_index += token_length; |
| } |
| if (have_bytes != test_cases[tc].want_bytes) { |
| RETURN_FAIL("tc=%d: have U+%08" PRIX64 ", want U+%08" PRIX64, tc, |
| have_bytes, test_cases[tc].want_bytes); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_quirk_allow_extra_comma() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| // want has 2 bytes, one for each possible q: |
| // - q&1 sets WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA. |
| // An 'X', '+' or '-' means that decoding should succeed (and consume the |
| // entire input), succeed (without consuming the entire input) or fail. |
| const char* want; |
| const char* str; |
| } test_cases[] = { |
| {.want = "-X", .str = "[0,]"}, |
| {.want = "-X", .str = "[[], {},{\"k\":\"v\",\n}\n,\n]"}, |
| {.want = "--", .str = "[,]"}, |
| {.want = "--", .str = "{,}"}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| int q; |
| for (q = 0; q < 2; q++) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__DEFAULT_OPTIONS)); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA, q & 1); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| const char* have = wuffs_json__decoder__decode_tokens(&dec, &tok, &src, |
| global_work_slice) |
| .repr; |
| const char* want = |
| (test_cases[tc].want[q] != '-') ? NULL : wuffs_json__error__bad_input; |
| if (have != want) { |
| RETURN_FAIL("tc=%d, q=%d: decode_tokens: have \"%s\", want \"%s\"", tc, |
| q, have, want); |
| } |
| |
| size_t total_length = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| total_length += wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]); |
| } |
| if (total_length != src.meta.ri) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.meta.ri); |
| } |
| if (test_cases[tc].want[q] == 'X') { |
| if (total_length != src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } else if (test_cases[tc].want[q] == '+') { |
| if (total_length >= src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want < %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_quirk_allow_inf_nan_numbers() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| // want has 2 bytes, one for each possible q: |
| // - q&1 sets WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS. |
| // An 'X', '+' or '-' means that decoding should succeed (and consume the |
| // entire input), succeed (without consuming the entire input) or fail. |
| const char* want; |
| const char* str; |
| } test_cases[] = { |
| {.want = "-X", .str = "InFiniTy"}, |
| {.want = "-X", .str = "[+inf, -infinity, +nan,-NaN,NAN]"}, |
| {.want = "-X", .str = "inf"}, |
| {.want = "-+", .str = "infinit"}, |
| {.want = "-+", .str = "infiQity"}, |
| {.want = "-+", .str = "nana"}, |
| {.want = "--", .str = "+-inf"}, |
| {.want = "--", .str = "-+inf"}, |
| {.want = "--", .str = "[infinit,"}, |
| {.want = "--", .str = "[infiQity,"}, |
| {.want = "--", .str = "[nana,"}, |
| {.want = "--", .str = "∞"}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| int q; |
| for (q = 0; q < 2; q++) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__DEFAULT_OPTIONS)); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS, q & 1); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| const char* have = wuffs_json__decoder__decode_tokens(&dec, &tok, &src, |
| global_work_slice) |
| .repr; |
| const char* want = |
| (test_cases[tc].want[q] != '-') ? NULL : wuffs_json__error__bad_input; |
| if (have != want) { |
| RETURN_FAIL("tc=%d, q=%d: decode_tokens: have \"%s\", want \"%s\"", tc, |
| q, have, want); |
| } |
| |
| size_t total_length = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| total_length += wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]); |
| } |
| if (total_length != src.meta.ri) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.meta.ri); |
| } |
| if (test_cases[tc].want[q] == 'X') { |
| if (total_length != src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } else if (test_cases[tc].want[q] == '+') { |
| if (total_length >= src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want < %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_quirk_allow_comment_etc() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| // want has 4 bytes, one for each possible q: |
| // - q&1 sets WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK. |
| // - q&2 sets WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE. |
| // An 'X', '+' or '-' means that decoding should succeed (and consume the |
| // entire input), succeed (without consuming the entire input) or fail. |
| const char* want; |
| const char* str; |
| } test_cases[] = { |
| {.want = "-X-X", .str = "[ /*com*/ 0]"}, |
| {.want = "--XX", .str = "//l\n //m\n0"}, |
| {.want = "---X", .str = "[ 0, /*com*/ 1 //l\n\n]"}, |
| {.want = "----", .str = "/*/0"}, // Not a valid slash-star comment. |
| {.want = "----", .str = "[4/5]"}, // Lone slash. |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| int q; |
| for (q = 0; q < 4; q++) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__DEFAULT_OPTIONS)); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK, q & 1); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE, q & 2); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| const char* have = wuffs_json__decoder__decode_tokens(&dec, &tok, &src, |
| global_work_slice) |
| .repr; |
| const char* want = |
| (test_cases[tc].want[q] != '-') ? NULL : wuffs_json__error__bad_input; |
| if (have != want) { |
| RETURN_FAIL("tc=%d, q=%d: decode_tokens: have \"%s\", want \"%s\"", tc, |
| q, have, want); |
| } |
| |
| size_t total_length = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| total_length += wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]); |
| } |
| if (total_length != src.meta.ri) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.meta.ri); |
| } |
| if (test_cases[tc].want[q] == 'X') { |
| if (total_length != src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } else if (test_cases[tc].want[q] == '+') { |
| if (total_length >= src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want < %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_quirk_allow_leading_etc() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| // want has 4 bytes, one for each possible q: |
| // - q&1 sets WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR. |
| // - q&2 sets WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK. |
| // An 'X', '+' or '-' means that decoding should succeed (and consume the |
| // entire input), succeed (without consuming the entire input) or fail. |
| const char* want; |
| const char* str; |
| } test_cases[] = { |
| {.want = "-X-X", .str = "\x1Etrue"}, |
| {.want = "--XX", .str = "\xEF\xBB\xBFtrue"}, |
| {.want = "---X", .str = "\x1E\xEF\xBB\xBFtrue"}, |
| {.want = "---X", .str = "\xEF\xBB\xBF\x1Etrue"}, |
| {.want = "----", .str = " \x1Etrue"}, |
| {.want = "----", .str = "\x1E \xEF\xBB\xBFtrue"}, |
| {.want = "----", .str = "\x1E\x1Etrue"}, |
| {.want = "----", .str = "\xEF\xBB"}, |
| {.want = "----", .str = "\xEF\xBB\xBF"}, |
| {.want = "----", .str = "\xEF\xBB\xBF$"}, |
| {.want = "----", .str = "\xEFtrue"}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| int q; |
| for (q = 0; q < 4; q++) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__DEFAULT_OPTIONS)); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_ALLOW_LEADING_ASCII_RECORD_SEPARATOR, q & 1); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_ALLOW_LEADING_UNICODE_BYTE_ORDER_MARK, q & 2); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| const char* have = wuffs_json__decoder__decode_tokens(&dec, &tok, &src, |
| global_work_slice) |
| .repr; |
| const char* want = |
| (test_cases[tc].want[q] != '-') ? NULL : wuffs_json__error__bad_input; |
| if (have != want) { |
| RETURN_FAIL("tc=%d, q=%d: decode_tokens: have \"%s\", want \"%s\"", tc, |
| q, have, want); |
| } |
| |
| size_t total_length = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| total_length += wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]); |
| } |
| if (total_length != src.meta.ri) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.meta.ri); |
| } |
| if (test_cases[tc].want[q] == 'X') { |
| if (total_length != src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } else if (test_cases[tc].want[q] == '+') { |
| if (total_length >= src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want < %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_quirk_allow_trailing_etc() { |
| CHECK_FOCUS(__func__); |
| |
| struct { |
| // want has 2 bytes, one for each possible q: |
| // - q&1 sets WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE. |
| // An 'X', '+' or '-' means that decoding should succeed (and consume the |
| // entire input), succeed (without consuming the entire input) or fail. |
| const char* want; |
| const char* str; |
| } test_cases[] = { |
| {.want = "++", .str = "0 \n "}, // |
| {.want = "++", .str = "0 \n\n"}, // |
| {.want = "++", .str = "0\n\n"}, // |
| {.want = "+-", .str = "0 true \n"}, // |
| {.want = "+-", .str = "007"}, // |
| {.want = "+-", .str = "007\n"}, // |
| {.want = "+-", .str = "0true "}, // |
| {.want = "+-", .str = "0true"}, // |
| {.want = "+X", .str = "0 "}, // |
| {.want = "+X", .str = "0 \n"}, // |
| {.want = "+X", .str = "0\n"}, // |
| {.want = "+X", .str = "0\t\r\n"}, // |
| {.want = "--", .str = "\n"}, // |
| {.want = "XX", .str = "0"}, // |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| int q; |
| for (q = 0; q < 2; q++) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__DEFAULT_OPTIONS)); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_ALLOW_TRAILING_NEW_LINE, q & 1); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| const char* have = wuffs_json__decoder__decode_tokens(&dec, &tok, &src, |
| global_work_slice) |
| .repr; |
| const char* want = |
| (test_cases[tc].want[q] != '-') ? NULL : wuffs_json__error__bad_input; |
| if (have != want) { |
| RETURN_FAIL("tc=%d, q=%d: decode_tokens: have \"%s\", want \"%s\"", tc, |
| q, have, want); |
| } |
| |
| size_t total_length = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| total_length += wuffs_base__token__length(&tok.data.ptr[tok.meta.ri++]); |
| } |
| if (total_length != src.meta.ri) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.meta.ri); |
| } |
| if (test_cases[tc].want[q] == 'X') { |
| if (total_length != src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } else if (test_cases[tc].want[q] == '+') { |
| if (total_length >= src.data.len) { |
| RETURN_FAIL("tc=%d, q=%d: total_length: have %zu, want < %zu", tc, q, |
| total_length, src.data.len); |
| } |
| } |
| } |
| } |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_quirk_replace_invalid_unicode() { |
| CHECK_FOCUS(__func__); |
| |
| // Decoding str should produce want, with invalid UTF-8 replaced by "?". A |
| // proper JSON decoder (with the quirk enabled) would replace with |
| // "\xEF\xBF\xBD", the UTF-8 encoding of U+FFFD, but using "?" leads to |
| // clearer, shorter test cases. |
| struct { |
| const char* want; |
| const char* str; |
| } test_cases[] = { |
| // Valid UTF-8. |
| {.want = "abc", .str = "\"abc\""}, |
| {.want = "del\xCE\x94ta", .str = "\"del\\u0394ta\""}, |
| {.want = "del\xCE\x94ta", .str = "\"del\xCE\x94ta\""}, |
| |
| // Invalid UTF-8: right byte lengths, wrong bytes. |
| {.want = "1byte?yz", .str = "\"1byte\xFFyz\""}, |
| {.want = "2byte??yz", .str = "\"2byte\xCE\xFFyz\""}, |
| {.want = "3byte???yz", .str = "\"3byte\xE2\x98\xFFyz\""}, |
| {.want = "4byte????yz", .str = "\"4byte\xF0\x9F\x92\xFFyz\""}, |
| |
| // Invalid UTF-8: wrong byte lengths. |
| {.want = "?", .str = "\"\xCE\""}, |
| {.want = "?g", .str = "\"\xCEg\""}, |
| {.want = "?gh", .str = "\"\xCEgh\""}, |
| {.want = "j?", .str = "\"j\xE2\""}, |
| {.want = "j?l", .str = "\"j\xE2l\""}, |
| {.want = "j?lm", .str = "\"j\xE2lm\""}, |
| {.want = "?", .str = "\"\xF0\""}, |
| {.want = "?r", .str = "\"\xF0r\""}, |
| {.want = "?rs", .str = "\"\xF0rs\""}, |
| |
| // U+DC00 (as an unpaired surrogate) is either 1 or 3 '?'s depending on |
| // whether it's backslash-u or backslash-x. |
| {.want = "a?z", .str = "\"a\\uDC00z\""}, |
| {.want = "a?zzzzzz", .str = "\"a\\uDC00zzzzzz\""}, |
| {.want = "a???z", .str = "\"a\xED\xB0\x80z\""}, |
| {.want = "a???zzzzzz", .str = "\"a\xED\xB0\x80zzzzzz\""}, |
| |
| // 1 or 2 unpaired surrogates each become '?'s, but for 3 surrogates |
| // where consecutive surrogates make a valid pair, there's only 1 '?'. |
| {.want = "a?z", .str = "\"a\\uD800z\""}, |
| {.want = "a??z", .str = "\"a\\uD800\\uDBFFz\""}, |
| {.want = "a?\xF4\x8F\xBF\xBFz", .str = "\"a\\uD800\\uDBFF\\uDFFFz\""}, |
| {.want = "a\xF0\x90\x80\x80?z", .str = "\"a\\uD800\\uDC00\\uDFFFz\""}, |
| }; |
| |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| wuffs_json__decoder dec; |
| CHECK_STATUS("initialize", wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__DEFAULT_OPTIONS)); |
| wuffs_json__decoder__set_quirk_enabled( |
| &dec, WUFFS_JSON__QUIRK_REPLACE_INVALID_UNICODE, true); |
| |
| wuffs_base__io_buffer have = |
| wuffs_base__make_io_buffer_writer(global_have_slice); |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| CHECK_STATUS("decode_tokens", wuffs_json__decoder__decode_tokens( |
| &dec, &tok, &src, global_work_slice)); |
| |
| uint64_t src_index = 0; |
| while (tok.meta.ri < tok.meta.wi) { |
| wuffs_base__token* t = &tok.data.ptr[tok.meta.ri++]; |
| uint64_t vbc = wuffs_base__token__value_base_category(t); |
| uint64_t vbd = wuffs_base__token__value_base_detail(t); |
| uint64_t token_length = wuffs_base__token__length(t); |
| |
| if (vbc == WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT) { |
| uint8_t u[WUFFS_BASE__UTF_8__BYTE_LENGTH__MAX_INCL]; |
| size_t n = wuffs_base__utf_8__encode( |
| wuffs_base__make_slice_u8(&u[0], |
| WUFFS_BASE__UTF_8__BYTE_LENGTH__MAX_INCL), |
| vbd); |
| if (vbd == 0xFFFD) { |
| u[0] = '?'; |
| n = 1; |
| } |
| if ((have.data.len - have.meta.wi) < n) { |
| RETURN_FAIL("tc=%d: token too long", tc); |
| } |
| memcpy(&have.data.ptr[have.meta.wi], &u[0], n); |
| have.meta.wi += n; |
| |
| } else if (vbc == WUFFS_BASE__TOKEN__VBC__STRING) { |
| if (vbd & WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_0_DST_1_SRC_DROP) { |
| // No-op. |
| } else if (vbd & |
| WUFFS_BASE__TOKEN__VBD__STRING__CONVERT_1_DST_1_SRC_COPY) { |
| if ((have.data.len - have.meta.wi) < token_length) { |
| RETURN_FAIL("tc=%d: token too long", tc); |
| } |
| memcpy(&have.data.ptr[have.meta.wi], &test_cases[tc].str[src_index], |
| token_length); |
| have.meta.wi += token_length; |
| } else { |
| RETURN_FAIL("tc=%d: unexpected string-token conversion", tc); |
| } |
| |
| } else { |
| RETURN_FAIL("tc=%d: unexpected token", tc); |
| } |
| |
| src_index += token_length; |
| } |
| |
| if (src_index != src.meta.ri) { |
| RETURN_FAIL("tc=%d: src_index: have %zu, want %zu", tc, src_index, |
| src.meta.ri); |
| } |
| |
| if (have.meta.wi >= have.data.len) { |
| RETURN_FAIL("tc=%d: too many have bytes", tc); |
| } |
| have.data.ptr[have.meta.wi] = '\x00'; |
| size_t len = strlen(test_cases[tc].want); |
| if ((len != have.meta.wi) || |
| (memcmp(have.data.ptr, test_cases[tc].want, len) != 0)) { |
| RETURN_FAIL("tc=%d: have \"%s\", want \"%s\"", tc, have.data.ptr, |
| test_cases[tc].want); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_unicode4_escapes() { |
| CHECK_FOCUS(__func__); |
| |
| const uint32_t fail = 0xDEADBEEF; |
| |
| struct { |
| uint32_t want; |
| const char* str; |
| } test_cases[] = { |
| // Simple (non-surrogate) successes. |
| {.want = 0x0000000A, .str = "\"\\u000a\""}, |
| {.want = 0x0000005C, .str = "\"\\\\u1234\""}, // U+005C is '\\'. |
| {.want = 0x00001000, .str = "\"\\u10002345\""}, |
| {.want = 0x00001000, .str = "\"\\u1000234\""}, |
| {.want = 0x00001000, .str = "\"\\u100023\""}, |
| {.want = 0x00001000, .str = "\"\\u10002\""}, |
| {.want = 0x00001234, .str = "\"\\u1234\""}, |
| {.want = 0x0000D7FF, .str = "\"\\ud7ff\""}, |
| {.want = 0x0000E000, .str = "\"\\uE000\""}, |
| {.want = 0x0000FFFF, .str = "\"\\uFffF\""}, |
| |
| // Unicode surrogate pair. U+0001F4A9 PILE OF POO is (U+D83D, U+DCA9), |
| // because ((0x03D << 10) | 0x0A9) is 0xF4A9: |
| // - High surrogates are in the range U+D800 ..= U+DBFF. |
| // - Low surrogates are in the range U+DC00 ..= U+DFFF. |
| {.want = 0x0001F4A9, .str = "\"\\uD83D\\udca9\""}, |
| |
| // More surrogate pairs. |
| {.want = 0x00010000, .str = "\"\\uD800\\uDC00\""}, |
| {.want = 0x0010FFFF, .str = "\"\\uDBFF\\uDFFF\""}, |
| |
| // Simple (non-surrogate) failures. |
| {.want = fail, .str = "\"\\U1234\""}, |
| {.want = fail, .str = "\"\\u123"}, |
| {.want = fail, .str = "\"\\u123\""}, |
| {.want = fail, .str = "\"\\u123x\""}, |
| {.want = fail, .str = "\"u1234\""}, |
| |
| // Invalid surrogate pairs. |
| {.want = fail, .str = "\"\\uD800\""}, // High alone. |
| {.want = fail, .str = "\"\\uD83D?udca9\""}, // High then not "\\u". |
| {.want = fail, .str = "\"\\uD83D\\ud7ff\""}, // High then non-surrogate. |
| {.want = fail, .str = "\"\\uD83D\\udbff\""}, // High then high. |
| {.want = fail, .str = "\"\\uD83D\\ue000\""}, // High then non-surrogate. |
| {.want = fail, .str = "\"\\uDC00\""}, // Low alone. |
| {.want = fail, .str = "\"\\uDC00\\u0000\""}, // Low then non-surrogate. |
| {.want = fail, .str = "\"\\uDC00\\ud800\""}, // Low then high. |
| {.want = fail, .str = "\"\\uDC00\\udfff\""}, // Low then low. |
| {.want = fail, .str = "\"\\uDFFF1234\""}, // Low alone. |
| }; |
| |
| wuffs_json__decoder dec; |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| CHECK_STATUS("initialize", |
| wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED)); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| wuffs_json__decoder__decode_tokens(&dec, &tok, &src, global_work_slice); |
| |
| uint32_t have = fail; |
| uint64_t total_length = 0; |
| size_t i; |
| for (i = tok.meta.ri; i < tok.meta.wi; i++) { |
| wuffs_base__token* t = &tok.data.ptr[i]; |
| total_length = |
| wuffs_base__u64__sat_add(total_length, wuffs_base__token__length(t)); |
| |
| // Set have to the first Unicode code point token. |
| if ((have == fail) && ((wuffs_base__token__value_base_category(t) == |
| WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT))) { |
| have = wuffs_base__token__value_base_detail(t); |
| if (have > 0x10FFFF) { // This also catches "have == fail". |
| RETURN_FAIL("%s: invalid Unicode code point", test_cases[tc].str); |
| } |
| |
| uint64_t have_length = wuffs_base__token__length(t); |
| uint64_t want_length = (have == 0x5C) ? 2 : ((have <= 0xFFFF) ? 6 : 12); |
| if (have_length != want_length) { |
| RETURN_FAIL("%s: token length: have %" PRIu64 ", want %" PRIu64, |
| test_cases[tc].str, have_length, want_length); |
| } |
| } |
| } |
| |
| if (have != test_cases[tc].want) { |
| RETURN_FAIL("%s: have 0x%" PRIX32 ", want 0x%" PRIX32, test_cases[tc].str, |
| have, test_cases[tc].want); |
| } |
| |
| if (total_length != src.meta.ri) { |
| RETURN_FAIL("%s: total length: have %" PRIu64 ", want %" PRIu64, |
| test_cases[tc].str, total_length, src.meta.ri); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| // test_wuffs_json_decode_src_io_buffer_length tests that given a sufficient |
| // amount of source data (WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL or |
| // more), decoding will always return a conclusive result, not a suspension |
| // such as "$short read". |
| // |
| // The JSON specification doesn't give a maximum byte length for a number, but |
| // implementations are permitted to impose one. Wuffs' implementation imposes |
| // WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL. |
| const char* // |
| test_wuffs_json_decode_src_io_buffer_length() { |
| CHECK_FOCUS(__func__); |
| |
| if (WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL >= |
| WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL) { |
| RETURN_FAIL( |
| "inconsistent WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL vs " |
| "WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL"); |
| } |
| |
| // src_array holds the test string of repeated '7's. 107 is arbitrary but |
| // long enough for the loop below. |
| uint8_t src_array[107]; |
| memset(&src_array[0], '7', 107); |
| |
| wuffs_json__decoder dec; |
| |
| int i; |
| for (i = WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL - 2; |
| i <= WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL + 2; i++) { |
| if (i < 0) { |
| RETURN_FAIL("invalid test case: i=%d", i); |
| } else if (i > 107) { |
| RETURN_FAIL("invalid test case: i=%d", i); |
| } |
| |
| wuffs_base__slice_u8 src_data = ((wuffs_base__slice_u8){ |
| .ptr = &src_array[0], |
| .len = i, |
| }); |
| |
| int closed; |
| for (closed = 0; closed < 2; closed++) { |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = |
| wuffs_base__make_io_buffer_reader(src_data, closed != 0); |
| CHECK_STATUS("initialize", |
| wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED)); |
| |
| wuffs_base__status have = wuffs_json__decoder__decode_tokens( |
| &dec, &tok, &src, global_work_slice); |
| const char* want = |
| (i > WUFFS_JSON__DECODER_NUMBER_LENGTH_MAX_INCL) |
| ? wuffs_json__error__unsupported_number_length |
| : (closed ? NULL : wuffs_base__suspension__short_read); |
| if (have.repr != want) { |
| RETURN_FAIL("i=%d, closed=%d: have \"%s\", want \"%s\"", i, closed, |
| have.repr, want); |
| } |
| |
| if ((i >= WUFFS_JSON__DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL) && |
| wuffs_base__status__is_suspension(&have)) { |
| RETURN_FAIL("i=%d, closed=%d: have a suspension", i, closed); |
| } |
| } |
| } |
| |
| return NULL; |
| } |
| |
| const char* // |
| test_wuffs_json_decode_string() { |
| CHECK_FOCUS(__func__); |
| |
| const char* bad_bac = wuffs_json__error__bad_backslash_escape; |
| const char* bad_ccc = wuffs_json__error__bad_c0_control_code; |
| const char* bad_utf = wuffs_json__error__bad_utf_8; |
| |
| struct { |
| const char* want_status_repr; |
| const char* str; |
| } test_cases[] = { |
| {.want_status_repr = NULL, .str = "\"+++\\\"+\\/+\\\\+++\""}, |
| {.want_status_repr = NULL, .str = "\"+++\\b+\\f+\\n+\\r+\\t+++\""}, |
| {.want_status_repr = NULL, .str = "\"\x20\""}, // U+00000020. |
| {.want_status_repr = NULL, .str = "\"\xC2\x80\""}, // U+00000080. |
| {.want_status_repr = NULL, .str = "\"\xCE\x94\""}, // U+00000394. |
| {.want_status_repr = NULL, .str = "\"\xDF\xBF\""}, // U+000007FF. |
| {.want_status_repr = NULL, .str = "\"\xE0\xA0\x80\""}, // U+00000800. |
| {.want_status_repr = NULL, .str = "\"\xE2\x98\x83\""}, // U+00002603. |
| {.want_status_repr = NULL, .str = "\"\xED\x80\x80\""}, // U+0000D000. |
| {.want_status_repr = NULL, .str = "\"\xED\x9F\xBF\""}, // U+0000D7FF. |
| {.want_status_repr = NULL, .str = "\"\xEE\x80\x80\""}, // U+0000E000. |
| {.want_status_repr = NULL, .str = "\"\xEF\xBF\xBD\""}, // U+0000FFFD. |
| {.want_status_repr = NULL, .str = "\"\xEF\xBF\xBF\""}, // U+0000FFFF. |
| {.want_status_repr = NULL, .str = "\"\xF0\x90\x80\x80\""}, // U+00010000. |
| {.want_status_repr = NULL, .str = "\"\xF0\x9F\x92\xA9\""}, // U+0001F4A9. |
| {.want_status_repr = NULL, .str = "\"\xF0\xB0\x80\x81\""}, // U+00030001. |
| {.want_status_repr = NULL, .str = "\"\xF1\xB0\x80\x82\""}, // U+00070002. |
| {.want_status_repr = NULL, .str = "\"\xF3\xB0\x80\x83\""}, // U+000F0003. |
| {.want_status_repr = NULL, .str = "\"\xF4\x80\x80\x84\""}, // U+00100004. |
| {.want_status_repr = NULL, .str = "\"\xF4\x8F\xBF\xBF\""}, // U+0010FFFF. |
| {.want_status_repr = NULL, .str = "\"abc\""}, |
| {.want_status_repr = NULL, .str = "\"i\x6Ak\""}, |
| {.want_status_repr = NULL, .str = "\"space+\x20+space\""}, |
| {.want_status_repr = NULL, .str = "\"tab+\\t+tab\""}, |
| {.want_status_repr = NULL, .str = "\"tab+\\u0009+tab\""}, |
| |
| {.want_status_repr = bad_bac, .str = "\"\\uIJKL\""}, |
| {.want_status_repr = bad_bac, .str = "\"space+\\x20+space\""}, |
| |
| {.want_status_repr = bad_ccc, .str = "\"\x1F\""}, |
| {.want_status_repr = bad_ccc, .str = "\"tab+\t+tab\""}, |
| |
| {.want_status_repr = bad_utf, .str = "\"\x80\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xBF\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xC1\x80\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xC2\x7F\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xDF\xC0\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xDF\xFF\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xE0\x9F\xBF\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xED\xA0\x80\""}, // U+0000D800. |
| {.want_status_repr = bad_utf, .str = "\"\xED\xAF\xBF\""}, // U+0000DBFF. |
| {.want_status_repr = bad_utf, .str = "\"\xED\xB0\x80\""}, // U+0000DC00. |
| {.want_status_repr = bad_utf, .str = "\"\xED\xBF\xBF\""}, // U+0000DFFF. |
| {.want_status_repr = bad_utf, .str = "\"\xF0\x80\x80\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xF0\x8F\xBF\xBF\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xF2\x7F\x80\x80\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xF2\x80\x7F\x80\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xF2\x80\x80\x7F\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xF4\x90\x80\x80\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xF5\""}, |
| {.want_status_repr = bad_utf, .str = "\"\xFF\xFF\xFF\xFF\""}, |
| }; |
| |
| wuffs_json__decoder dec; |
| int tc; |
| for (tc = 0; tc < WUFFS_TESTLIB_ARRAY_SIZE(test_cases); tc++) { |
| CHECK_STATUS("initialize", |
| wuffs_json__decoder__initialize( |
| &dec, sizeof dec, WUFFS_VERSION, |
| WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED)); |
| |
| wuffs_base__token_buffer tok = |
| wuffs_base__make_token_buffer_writer(global_have_token_slice); |
| wuffs_base__io_buffer src = wuffs_base__make_io_buffer_reader( |
| wuffs_base__make_slice_u8((void*)(test_cases[tc].str), |
| strlen(test_cases[tc].str)), |
| true); |
| wuffs_base__status have_status = |
| wuffs_json__decoder__decode_tokens(&dec, &tok, &src, global_work_slice); |
| |
| uint64_t total_length = 0; |
| size_t i; |
| for (i = tok.meta.ri; i < tok.meta.wi; i++) { |
| wuffs_base__token* t = &tok.data.ptr[i]; |
| total_length = |
| wuffs_base__u64__sat_add(total_length, wuffs_base__token__length(t)); |
| } |
| |
| if (have_status.repr != test_cases[tc].want_status_repr) { |
| RETURN_FAIL("%s: have \"%s\", want \"%s\"", test_cases[tc].str, |
| have_status.repr, test_cases[tc].want_status_repr); |
| } |
| |
| if (total_length != src.meta.ri) { |
| RETURN_FAIL("%s: total length: have %" PRIu64 ", want %" PRIu64, |
| test_cases[tc].str, total_length, src.meta.ri); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| // ---------------- Mimic Tests |
| |
| #ifdef WUFFS_MIMIC |
| |
| // No mimic tests. |
| |
| #endif // WUFFS_MIMIC |
| |
| // ---------------- JSON Benches |
| |
| const char* // |
| bench_wuffs_json_decode_1k() { |
| CHECK_FOCUS(__func__); |
| return do_bench_token_decoder( |
| wuffs_json_decode, WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED, |
| tcounter_src, &json_github_tags_gt, UINT64_MAX, UINT64_MAX, 10000); |
| } |
| |
| const char* // |
| bench_wuffs_json_decode_21k_formatted() { |
| CHECK_FOCUS(__func__); |
| return do_bench_token_decoder( |
| wuffs_json_decode, WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED, |
| tcounter_src, &json_file_sizes_gt, UINT64_MAX, UINT64_MAX, 300); |
| } |
| |
| const char* // |
| bench_wuffs_json_decode_26k_compact() { |
| CHECK_FOCUS(__func__); |
| return do_bench_token_decoder( |
| wuffs_json_decode, WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED, |
| tcounter_src, &json_australian_abc_gt, UINT64_MAX, UINT64_MAX, 250); |
| } |
| |
| const char* // |
| bench_wuffs_json_decode_217k_stringy() { |
| CHECK_FOCUS(__func__); |
| return do_bench_token_decoder( |
| wuffs_json_decode, WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED, |
| tcounter_src, &json_nobel_prizes_gt, UINT64_MAX, UINT64_MAX, 25); |
| } |
| |
| // ---------------- Mimic Benches |
| |
| #ifdef WUFFS_MIMIC |
| |
| // No mimic benches. |
| |
| #endif // WUFFS_MIMIC |
| |
| // ---------------- Manifest |
| |
| proc tests[] = { |
| |
| // These strconv tests are really testing the Wuffs base library. They |
| // aren't specific to the std/json code, but putting them here is as good |
| // as any other place. |
| test_strconv_hexadecimal, |
| test_strconv_hpd_rounded_integer, |
| test_strconv_hpd_shift, |
| test_strconv_parse_number_f64, |
| test_strconv_parse_number_i64, |
| test_strconv_parse_number_u64, |
| test_strconv_utf_8_next, |
| |
| test_wuffs_json_decode_end_of_data, |
| test_wuffs_json_decode_interface, |
| test_wuffs_json_decode_long_numbers, |
| test_wuffs_json_decode_prior_valid_utf_8, |
| test_wuffs_json_decode_quirk_allow_backslash_etc, |
| test_wuffs_json_decode_quirk_allow_backslash_x, |
| test_wuffs_json_decode_quirk_allow_comment_etc, |
| test_wuffs_json_decode_quirk_allow_extra_comma, |
| test_wuffs_json_decode_quirk_allow_inf_nan_numbers, |
| test_wuffs_json_decode_quirk_allow_leading_etc, |
| test_wuffs_json_decode_quirk_allow_trailing_etc, |
| test_wuffs_json_decode_quirk_replace_invalid_unicode, |
| test_wuffs_json_decode_src_io_buffer_length, |
| test_wuffs_json_decode_string, |
| test_wuffs_json_decode_unicode4_escapes, |
| |
| #ifdef WUFFS_MIMIC |
| |
| // No mimic tests. |
| |
| #endif // WUFFS_MIMIC |
| |
| NULL, |
| }; |
| |
| proc benches[] = { |
| |
| bench_wuffs_json_decode_1k, |
| bench_wuffs_json_decode_21k_formatted, |
| bench_wuffs_json_decode_26k_compact, |
| bench_wuffs_json_decode_217k_stringy, |
| |
| #ifdef WUFFS_MIMIC |
| |
| // No mimic benches. |
| |
| #endif // WUFFS_MIMIC |
| |
| NULL, |
| }; |
| |
| int // |
| main(int argc, char** argv) { |
| proc_package_name = "std/json"; |
| return test_main(argc, argv, tests, benches); |
| } |