blob: 391190a34fafb1915585e479ce73f3b170187bb7 [file] [log] [blame]
// Copyright 2020 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------
/*
json-to-cbor reads UTF-8 JSON (a text format) from stdin and writes the
equivalent CBOR (a binary format) to stdout.
See the "const char* g_usage" string below for details.
----
To run:
$CXX json-to-cbor.cc && ./a.out < ../../test/data/github-tags.json; rm -f a.out
for a C++ compiler $CXX, such as clang++ or g++.
*/
#if defined(__cplusplus) && (__cplusplus < 201103L)
#error "This C++ program requires -std=c++11 or later"
#endif
#include <stdio.h>
#include <string>
#include <vector>
// Wuffs ships as a "single file C library" or "header file library" as per
// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
//
// To use that single file as a "foo.c"-like implementation, instead of a
// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
// compiling it.
#define WUFFS_IMPLEMENTATION
// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
// release/c/etc.c choose which parts of Wuffs to build. That file contains the
// entire Wuffs standard library, implementing a variety of codecs and file
// formats. Without this macro definition, an optimizing compiler or linker may
// very well discard Wuffs code for unused codecs, but listing the Wuffs
// modules we use makes that process explicit. Preprocessing means that such
// code simply isn't compiled.
#define WUFFS_CONFIG__MODULES
#define WUFFS_CONFIG__MODULE__AUX__BASE
#define WUFFS_CONFIG__MODULE__AUX__JSON
#define WUFFS_CONFIG__MODULE__BASE
#define WUFFS_CONFIG__MODULE__JSON
// If building this program in an environment that doesn't easily accommodate
// relative includes, you can use the script/inline-c-relative-includes.go
// program to generate a stand-alone C++ file.
#include "../../release/c/wuffs-unsupported-snapshot.c"
#define TRY(error_msg) \
do { \
std::string z = error_msg; \
if (!z.empty()) { \
return z; \
} \
} while (false)
static const char* g_usage =
"Usage: json-to-cbor -flags input.json\n"
"\n"
"Flags:\n"
" -input-allow-comments\n"
" -input-allow-extra-comma\n"
" -input-allow-inf-nan-numbers\n"
" -input-jwcc\n"
" -jwcc\n"
"\n"
"The input.json filename is optional. If absent, it reads from stdin.\n"
"\n"
"----\n"
"\n"
"json-to-cbor reads UTF-8 JSON (a text format) from stdin and writes the\n"
"equivalent CBOR (a binary format) to stdout.\n"
"\n"
"The conversion may be lossy. For example, \"0.99999999999999999\" and\n"
"\"1.0\" are (technically) different JSON values, but they are converted\n"
"to the same CBOR bytes: F9 3C 00. Similarly, integer values outside ±M\n"
"may lose precision, where M is ((1<<53)-1), also known as JavaScript's\n"
"Number.MAX_SAFE_INTEGER.\n"
"\n"
"The CBOR output is not canonicalized in the RFC 7049 Section 3.9 sense.\n"
"Map keys are not guaranteed to be sorted or de-duplicated.\n"
"\n"
"----\n"
"\n"
"The -input-allow-comments flag allows \"/*slash-star*/\" and\n"
"\"//slash-slash\" C-style comments within JSON input.\n"
"\n"
"The -input-allow-extra-comma flag allows input like \"[1,2,]\", with a\n"
"comma after the final element of a JSON list or dictionary.\n"
"\n"
"The -input-allow-inf-nan-numbers flag allows non-finite floating point\n"
"numbers (infinities and not-a-numbers) within JSON input.\n"
"\n"
"Combining some of those flags results in speaking JWCC (JSON With Commas\n"
"and Comments), not plain JSON. For convenience, the -input-jwcc or -jwcc\n"
"flags enables all of:\n"
" -input-allow-comments\n"
" -input-allow-extra-comma\n"
"\n"
#if defined(WUFFS_EXAMPLE_SPEAK_JWCC_NOT_JSON)
"This program was configured at compile time to always use -jwcc.\n"
"\n"
#endif
"----\n"
"\n"
"The JSON specification permits implementations to set their own maximum\n"
"input depth. This JSON implementation sets it to 1024.";
// ----
#ifndef DST_BUFFER_ARRAY_SIZE
#define DST_BUFFER_ARRAY_SIZE (32 * 1024)
#endif
uint8_t g_dst_array[DST_BUFFER_ARRAY_SIZE];
wuffs_base__io_buffer g_dst;
std::vector<uint32_t> g_quirks;
struct {
int remaining_argc;
char** remaining_argv;
} g_flags = {0};
std::string //
parse_flags(int argc, char** argv) {
#if defined(WUFFS_EXAMPLE_SPEAK_JWCC_NOT_JSON)
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
#endif
int c = (argc > 0) ? 1 : 0; // Skip argv[0], the program name.
for (; c < argc; c++) {
char* arg = argv[c];
if (*arg++ != '-') {
break;
}
// A double-dash "--foo" is equivalent to a single-dash "-foo". As special
// cases, a bare "-" is not a flag (some programs may interpret it as
// stdin) and a bare "--" means to stop parsing flags.
if (*arg == '\x00') {
break;
} else if (*arg == '-') {
arg++;
if (*arg == '\x00') {
c++;
break;
}
}
if (!strcmp(arg, "input-allow-comments")) {
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
continue;
}
if (!strcmp(arg, "input-allow-extra-comma")) {
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
continue;
}
if (!strcmp(arg, "input-allow-inf-nan-numbers")) {
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS);
continue;
}
if (!strcmp(arg, "input-jwcc") || !strcmp(arg, "jwcc")) {
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
continue;
}
return g_usage;
}
g_flags.remaining_argc = argc - c;
g_flags.remaining_argv = argv + c;
return "";
}
// ----
std::string //
flush_dst() {
while (true) {
size_t n = g_dst.reader_length();
if (n == 0) {
break;
}
ssize_t i = fwrite(g_dst.reader_pointer(), 1, n, stdout);
if (i >= 0) {
g_dst.meta.ri += i;
}
if (i < n) {
return "main: error writing to stdout";
}
}
g_dst.compact();
return "";
}
std::string //
write_dst_slow(const void* s, size_t n) {
const uint8_t* p = static_cast<const uint8_t*>(s);
while (n > 0) {
size_t i = g_dst.writer_length();
if (i == 0) {
TRY(flush_dst());
i = g_dst.writer_length();
if (i == 0) {
return "main: g_dst buffer is full";
}
}
if (i > n) {
i = n;
}
memcpy(g_dst.data.ptr + g_dst.meta.wi, p, i);
g_dst.meta.wi += i;
p += i;
n -= i;
}
return "";
}
inline std::string //
write_dst(const void* s, size_t n) {
if (n <= (DST_BUFFER_ARRAY_SIZE - g_dst.meta.wi)) {
memcpy(g_dst.data.ptr + g_dst.meta.wi, s, n);
g_dst.meta.wi += n;
return "";
}
return write_dst_slow(s, n);
}
// ----
class Callbacks : public wuffs_aux::DecodeJsonCallbacks {
public:
Callbacks() = default;
std::string Append(uint64_t n, uint8_t base) {
uint8_t c[9];
if (n < 0x18) {
c[0] = base | static_cast<uint8_t>(n);
return write_dst(&c[0], 1);
} else if (n <= 0xFF) {
c[0] = base | 0x18;
c[1] = static_cast<uint8_t>(n);
return write_dst(&c[0], 2);
} else if (n <= 0xFFFF) {
c[0] = base | 0x19;
wuffs_base__poke_u16be__no_bounds_check(&c[1], static_cast<uint16_t>(n));
return write_dst(&c[0], 3);
} else if (n <= 0xFFFFFFFF) {
c[0] = base | 0x1A;
wuffs_base__poke_u32be__no_bounds_check(&c[1], static_cast<uint32_t>(n));
return write_dst(&c[0], 5);
}
c[0] = base | 0x1B;
wuffs_base__poke_u64be__no_bounds_check(&c[1], n);
return write_dst(&c[0], 9);
}
std::string AppendNull() override { return write_dst("\xF6", 1); }
std::string AppendBool(bool val) override {
return write_dst(val ? "\xF5" : "\xF4", 1);
}
std::string AppendF64(double val) override {
uint8_t c[9];
wuffs_base__lossy_value_u16 lv16 =
wuffs_base__ieee_754_bit_representation__from_f64_to_u16_truncate(val);
if (!lv16.lossy) {
c[0] = 0xF9;
wuffs_base__poke_u16be__no_bounds_check(&c[1], lv16.value);
return write_dst(&c[0], 3);
}
wuffs_base__lossy_value_u32 lv32 =
wuffs_base__ieee_754_bit_representation__from_f64_to_u32_truncate(val);
if (!lv32.lossy) {
c[0] = 0xFA;
wuffs_base__poke_u32be__no_bounds_check(&c[1], lv32.value);
return write_dst(&c[0], 5);
}
c[0] = 0xFB;
wuffs_base__poke_u64be__no_bounds_check(
&c[1], wuffs_base__ieee_754_bit_representation__from_f64_to_u64(val));
return write_dst(&c[0], 9);
}
std::string AppendI64(int64_t val) override {
return (val >= 0) ? Append(static_cast<uint64_t>(val), 0x00)
: Append(static_cast<uint64_t>(-(val + 1)), 0x20);
}
std::string AppendTextString(std::string&& val) override {
TRY(Append(val.size(), 0x60));
return write_dst(val.data(), val.size());
}
std::string Push(uint32_t flags) override {
return write_dst(
(flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) ? "\x9F" : "\xBF",
1);
}
std::string Pop(uint32_t flags) override { return write_dst("\xFF", 1); }
};
// ----
std::string //
main1(int argc, char** argv) {
g_dst = wuffs_base__ptr_u8__writer(&g_dst_array[0], DST_BUFFER_ARRAY_SIZE);
TRY(parse_flags(argc, argv));
FILE* in = stdin;
if (g_flags.remaining_argc > 1) {
return g_usage;
} else if (g_flags.remaining_argc == 1) {
in = fopen(g_flags.remaining_argv[0], "r");
if (!in) {
return std::string("main: cannot read input file");
}
}
Callbacks callbacks;
wuffs_aux::sync_io::FileInput input(in);
return wuffs_aux::DecodeJson(
callbacks, input,
wuffs_base__make_slice_u32(g_quirks.data(), g_quirks.size()))
.error_message;
}
// ----
int //
compute_exit_code(std::string status_msg) {
if (status_msg.empty()) {
return 0;
}
fputs(status_msg.c_str(), stderr);
fputc('\n', stderr);
// Return an exit code of 1 for regular (forseen) errors, e.g. badly
// formatted or unsupported input.
//
// Return an exit code of 2 for internal (exceptional) errors, e.g. defensive
// run-time checks found that an internal invariant did not hold.
//
// Automated testing, including badly formatted inputs, can therefore
// discriminate between expected failure (exit code 1) and unexpected failure
// (other non-zero exit codes). Specifically, exit code 2 for internal
// invariant violation, exit code 139 (which is 128 + SIGSEGV on x86_64
// linux) for a segmentation fault (e.g. null pointer dereference).
return (status_msg.find("internal error:") != std::string::npos) ? 2 : 1;
}
int //
main(int argc, char** argv) {
std::string z1 = main1(argc, argv);
std::string z2 = flush_dst();
int exit_code = compute_exit_code(z1.empty() ? z2 : z1);
return exit_code;
}