internal/cgen/base/strconv-public.h - external/github.com/google/wuffs - Git at Google

 // After editing this file, run "go generate" in the parent directory.

 // Copyright 2020 The Wuffs Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // ---------------- String Conversions

 // wuffs_base__parse_number_i64 parses the ASCII integer in s. For example, if
 // s contains the bytes "-123" then it will return the int64_t -123.
 //
 // It returns an error if s does not contain an integer or if the integer
 // within would overflow an int64_t.
 //
 // It is similar to wuffs_base__parse_number_u64 but it returns a signed
 // integer, not an unsigned integer. It also allows a leading '+' or '-'.
 wuffs_base__result_i64  //
 wuffs_base__parse_number_i64(wuffs_base__slice_u8 s);

 // wuffs_base__parse_number_u64 parses the ASCII integer in s. For example, if
 // s contains the bytes "123" then it will return the uint64_t 123.
 //
 // It returns an error if s does not contain an integer or if the integer
 // within would overflow a uint64_t.
 //
 // It is similar to the C standard library's strtoull function, but:
 //  - Errors are returned in-band (in a result type), not out-of-band (errno).
 //  - It takes a slice (a pointer and length), not a NUL-terminated C string.
 //  - It does not take an optional endptr argument. It does not allow a partial
 //    parse: it returns an error unless all of s is consumed.
 //  - It does not allow whitespace, leading or otherwise.
 //  - It does not allow a leading '+' or '-'.
 //  - It does not allow unnecessary leading zeroes ("0" is valid and its sole
 //    zero is necessary). All of "00", "0644" and "007" are invalid.
 //  - It does not take a base argument (e.g. base 10 vs base 16). Instead, it
 //    always accepts both decimal (e.g "1234", "0d5678") and hexadecimal (e.g.
 //    "0x9aBC"). The caller is responsible for prior filtering of e.g. hex
 //    numbers if they are unwanted. For example, Wuffs' JSON decoder will only
 //    produce a wuffs_base__token for decimal numbers, not hexadecimal.
 //  - It is not affected by i18n / l10n settings such as environment variables.
 //  - It does allow arbitrary underscores, except inside the optional 2-byte
 //    opening "0d" or "0X" that denotes base-10 or base-16. For example,
 //    "__0D_1_002" would successfully parse as "one thousand and two".
 wuffs_base__result_u64  //
 wuffs_base__parse_number_u64(wuffs_base__slice_u8 s);

 // ---------------- IEEE 754 Floating Point

 // wuffs_base__parse_number_f64 parses the floating point number in s. For
 // example, if s contains the bytes "1.5" then it will return the double 1.5.
 //
 // It returns an error if s does not contain a floating point number.
 //
 // It does not necessarily return an error if the conversion is lossy, e.g. if
 // s is "0.3", which double-precision floating point cannot represent exactly.
 //
 // Similarly, the returned value may be infinite (and no error returned) even
 // if s was not "inf", when the input is nominally finite but sufficiently
 // larger than DBL_MAX, about 1.8e+308.
 //
 // It is similar to the C standard library's strtod function, but:
 //  - Errors are returned in-band (in a result type), not out-of-band (errno).
 //  - It takes a slice (a pointer and length), not a NUL-terminated C string.
 //  - It does not take an optional endptr argument. It does not allow a partial
 //    parse: it returns an error unless all of s is consumed.
 //  - It does not allow whitespace, leading or otherwise.
 //  - It does not allow unnecessary leading zeroes ("0" is valid and its sole
 //    zero is necessary). All of "00", "0644" and "00.7" are invalid.
 //  - It is not affected by i18n / l10n settings such as environment variables.
 //  - Conversely, it always accepts either ',' or '.' as a decimal separator.
 //    In particular, "3,141,592" is always invalid but "3,141" is always valid
 //    (and approximately π). The caller is responsible for e.g. previously
 //    rejecting or filtering s if it contains a comma, if that is unacceptable
 //    to the caller. For example, JSON numbers always use a dot '.' and never a
 //    comma ',', regardless of the LOCALE environment variable.
 //  - It does allow arbitrary underscores. For example, "_3.141_592" would
 //    successfully parse, again approximately π.
 //  - It does allow "inf", "+Infinity" and "-NAN", case insensitive, but it
 //    does not permit "nan" to be followed by an integer mantissa.
 //  - It does not allow hexadecimal floating point numbers.
 wuffs_base__result_f64  //
 wuffs_base__parse_number_f64(wuffs_base__slice_u8 s);

 // wuffs_base__ieee_754_bit_representation__etc converts between a double
 // precision numerical value and its IEEE 754 64-bit representation (1 sign
 // bit, 11 exponent bits, 52 explicit significand bits).
 //
 // For example, it converts between:
 //  - +1.0 and 0x3FF0_0000_0000_0000.
 //  - +5.5 and 0x4016_0000_0000_0000.
 //  - -inf and 0xFFF0_0000_0000_0000.
 //
 // See https://en.wikipedia.org/wiki/Double-precision_floating-point_format

 static inline uint64_t  //
 wuffs_base__ieee_754_bit_representation__from_f64(double f) {
   uint64_t u = 0;
   if (sizeof(uint64_t) == sizeof(double)) {
     memcpy(&u, &f, sizeof(uint64_t));
   }
   return u;
 }

 static inline double  //
 wuffs_base__ieee_754_bit_representation__to_f64(uint64_t u) {
   double f = 0;
   if (sizeof(uint64_t) == sizeof(double)) {
     memcpy(&f, &u, sizeof(uint64_t));
   }
   return f;
 }

   // ---------------- Unicode and UTF-8

 #define WUFFS_BASE__UNICODE_CODE_POINT__MIN_INCL 0x00000000
 #define WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL 0x0010FFFF

 #define WUFFS_BASE__UNICODE_REPLACEMENT_CHARACTER 0x0000FFFD

 #define WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL 0x0000D800
 #define WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL 0x0000DFFF

 #define WUFFS_BASE__ASCII__MIN_INCL 0x00
 #define WUFFS_BASE__ASCII__MAX_INCL 0x7F

 #define WUFFS_BASE__UTF_8__BYTE_LENGTH__MIN_INCL 1
 #define WUFFS_BASE__UTF_8__BYTE_LENGTH__MAX_INCL 4

 #define WUFFS_BASE__UTF_8__BYTE_LENGTH_1__CODE_POINT__MIN_INCL 0x00000000
 #define WUFFS_BASE__UTF_8__BYTE_LENGTH_1__CODE_POINT__MAX_INCL 0x0000007F
 #define WUFFS_BASE__UTF_8__BYTE_LENGTH_2__CODE_POINT__MIN_INCL 0x00000080
 #define WUFFS_BASE__UTF_8__BYTE_LENGTH_2__CODE_POINT__MAX_INCL 0x000007FF
 #define WUFFS_BASE__UTF_8__BYTE_LENGTH_3__CODE_POINT__MIN_INCL 0x00000800
 #define WUFFS_BASE__UTF_8__BYTE_LENGTH_3__CODE_POINT__MAX_INCL 0x0000FFFF
 #define WUFFS_BASE__UTF_8__BYTE_LENGTH_4__CODE_POINT__MIN_INCL 0x00010000
 #define WUFFS_BASE__UTF_8__BYTE_LENGTH_4__CODE_POINT__MAX_INCL 0x0010FFFF

 // --------

 // wuffs_base__utf_8__next__output is the type returned by
 // wuffs_base__utf_8__next.
 typedef struct {
   uint32_t code_point;
   uint32_t byte_length;

 #ifdef __cplusplus
   inline bool is_valid() const;
 #endif  // __cplusplus

 } wuffs_base__utf_8__next__output;

 static inline wuffs_base__utf_8__next__output  //
 wuffs_base__make_utf_8__next__output(uint32_t code_point,
                                      uint32_t byte_length) {
   wuffs_base__utf_8__next__output ret;
   ret.code_point = code_point;
   ret.byte_length = byte_length;
   return ret;
 }

 static inline bool  //
 wuffs_base__utf_8__next__output__is_valid(
     const wuffs_base__utf_8__next__output* o) {
   if (o) {
     uint32_t cp = o->code_point;
     switch (o->byte_length) {
       case 1:
         return (cp <= 0x7F);
       case 2:
         return (0x080 <= cp) && (cp <= 0x7FF);
       case 3:
         // Avoid the 0xD800 ..= 0xDFFF surrogate range.
         return ((0x0800 <= cp) && (cp <= 0xD7FF)) ||
                ((0xE000 <= cp) && (cp <= 0xFFFF));
       case 4:
         return (0x00010000 <= cp) && (cp <= 0x0010FFFF);
     }
   }
   return false;
 }

 #ifdef __cplusplus

 inline bool  //
 wuffs_base__utf_8__next__output::is_valid() const {
   return wuffs_base__utf_8__next__output__is_valid(this);
 }

 #endif  // __cplusplus

 // --------

 // wuffs_base__utf_8__encode writes the UTF-8 encoding of code_point to s and
 // returns the number of bytes written. If code_point is invalid, or if s is
 // shorter than the entire encoding, it returns 0 (and no bytes are written).
 //
 // s will never be too short if its length is at least 4, also known as
 // WUFFS_BASE__UTF_8__BYTE_LENGTH__MAX_INCL.
 size_t  //
 wuffs_base__utf_8__encode(wuffs_base__slice_u8 dst, uint32_t code_point);

 // wuffs_base__utf_8__next returns the next UTF-8 code point (and that code
 // point's byte length) at the start of s.
 //
 // There are exactly two cases in which this function returns something where
 // wuffs_base__utf_8__next__output__is_valid is false:
 //  - If s is empty then it returns {.code_point=0, .byte_length=0}.
 //  - If s is non-empty and starts with invalid UTF-8 then it returns
 //    {.code_point=WUFFS_BASE__UNICODE_REPLACEMENT_CHARACTER, .byte_length=1}.
 //
 // Otherwise, it returns something where
 // wuffs_base__utf_8__next__output__is_valid is true.
 //
 // In any case, it always returns an output that satisfies both of:
 //  - (output.code_point  <= WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL).
 //  - (output.byte_length <= s.len).
 //
 // If s is a sub-slice of a larger slice of valid UTF-8, but that sub-slice
 // boundary occurs in the middle of a multi-byte UTF-8 encoding of a single
 // code point, then this function may return something invalid. It is the
 // caller's responsibility to split on or otherwise manage UTF-8 boundaries.
 wuffs_base__utf_8__next__output  //
 wuffs_base__utf_8__next(wuffs_base__slice_u8 s);

 // wuffs_base__utf_8__longest_valid_prefix returns the largest n such that the
 // sub-slice s[..n] is valid UTF-8.
 //
 // In particular, it returns s.len if and only if all of s is valid UTF-8.
 //
 // If s is a sub-slice of a larger slice of valid UTF-8, but that sub-slice
 // boundary occurs in the middle of a multi-byte UTF-8 encoding of a single
 // code point, then this function will return less than s.len. It is the
 // caller's responsibility to split on or otherwise manage UTF-8 boundaries.
 size_t  //
 wuffs_base__utf_8__longest_valid_prefix(wuffs_base__slice_u8 s);

 // wuffs_base__ascii__longest_valid_prefix returns the largest n such that the
 // sub-slice s[..n] is valid ASCII.
 //
 // In particular, it returns s.len if and only if all of s is valid ASCII.
 // Equivalently, when none of the bytes in s have the 0x80 high bit set.
 size_t  //
 wuffs_base__ascii__longest_valid_prefix(wuffs_base__slice_u8 s);
	// After editing this file, run "go generate" in the parent directory.

	// Copyright 2020 The Wuffs Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// ---------------- String Conversions

	// wuffs_base__parse_number_i64 parses the ASCII integer in s. For example, if
	// s contains the bytes "-123" then it will return the int64_t -123.
	//
	// It returns an error if s does not contain an integer or if the integer
	// within would overflow an int64_t.
	//
	// It is similar to wuffs_base__parse_number_u64 but it returns a signed
	// integer, not an unsigned integer. It also allows a leading '+' or '-'.
	wuffs_base__result_i64 //
	wuffs_base__parse_number_i64(wuffs_base__slice_u8 s);

	// wuffs_base__parse_number_u64 parses the ASCII integer in s. For example, if
	// s contains the bytes "123" then it will return the uint64_t 123.
	//
	// It returns an error if s does not contain an integer or if the integer
	// within would overflow a uint64_t.
	//
	// It is similar to the C standard library's strtoull function, but:
	// - Errors are returned in-band (in a result type), not out-of-band (errno).
	// - It takes a slice (a pointer and length), not a NUL-terminated C string.
	// - It does not take an optional endptr argument. It does not allow a partial
	// parse: it returns an error unless all of s is consumed.
	// - It does not allow whitespace, leading or otherwise.
	// - It does not allow a leading '+' or '-'.
	// - It does not allow unnecessary leading zeroes ("0" is valid and its sole
	// zero is necessary). All of "00", "0644" and "007" are invalid.
	// - It does not take a base argument (e.g. base 10 vs base 16). Instead, it
	// always accepts both decimal (e.g "1234", "0d5678") and hexadecimal (e.g.
	// "0x9aBC"). The caller is responsible for prior filtering of e.g. hex
	// numbers if they are unwanted. For example, Wuffs' JSON decoder will only
	// produce a wuffs_base__token for decimal numbers, not hexadecimal.
	// - It is not affected by i18n / l10n settings such as environment variables.
	// - It does allow arbitrary underscores, except inside the optional 2-byte
	// opening "0d" or "0X" that denotes base-10 or base-16. For example,
	// "__0D_1_002" would successfully parse as "one thousand and two".
	wuffs_base__result_u64 //
	wuffs_base__parse_number_u64(wuffs_base__slice_u8 s);

	// ---------------- IEEE 754 Floating Point

	// wuffs_base__parse_number_f64 parses the floating point number in s. For
	// example, if s contains the bytes "1.5" then it will return the double 1.5.
	//
	// It returns an error if s does not contain a floating point number.
	//
	// It does not necessarily return an error if the conversion is lossy, e.g. if
	// s is "0.3", which double-precision floating point cannot represent exactly.
	//
	// Similarly, the returned value may be infinite (and no error returned) even
	// if s was not "inf", when the input is nominally finite but sufficiently
	// larger than DBL_MAX, about 1.8e+308.
	//
	// It is similar to the C standard library's strtod function, but:
	// - Errors are returned in-band (in a result type), not out-of-band (errno).
	// - It takes a slice (a pointer and length), not a NUL-terminated C string.
	// - It does not take an optional endptr argument. It does not allow a partial
	// parse: it returns an error unless all of s is consumed.
	// - It does not allow whitespace, leading or otherwise.
	// - It does not allow unnecessary leading zeroes ("0" is valid and its sole
	// zero is necessary). All of "00", "0644" and "00.7" are invalid.
	// - It is not affected by i18n / l10n settings such as environment variables.
	// - Conversely, it always accepts either ',' or '.' as a decimal separator.
	// In particular, "3,141,592" is always invalid but "3,141" is always valid
	// (and approximately π). The caller is responsible for e.g. previously
	// rejecting or filtering s if it contains a comma, if that is unacceptable
	// to the caller. For example, JSON numbers always use a dot '.' and never a
	// comma ',', regardless of the LOCALE environment variable.
	// - It does allow arbitrary underscores. For example, "_3.141_592" would
	// successfully parse, again approximately π.
	// - It does allow "inf", "+Infinity" and "-NAN", case insensitive, but it
	// does not permit "nan" to be followed by an integer mantissa.
	// - It does not allow hexadecimal floating point numbers.
	wuffs_base__result_f64 //
	wuffs_base__parse_number_f64(wuffs_base__slice_u8 s);

	// wuffs_base__ieee_754_bit_representation__etc converts between a double
	// precision numerical value and its IEEE 754 64-bit representation (1 sign
	// bit, 11 exponent bits, 52 explicit significand bits).
	//
	// For example, it converts between:
	// - +1.0 and 0x3FF0_0000_0000_0000.
	// - +5.5 and 0x4016_0000_0000_0000.
	// - -inf and 0xFFF0_0000_0000_0000.
	//
	// See https://en.wikipedia.org/wiki/Double-precision_floating-point_format

	static inline uint64_t //
	wuffs_base__ieee_754_bit_representation__from_f64(double f) {
	uint64_t u = 0;
	if (sizeof(uint64_t) == sizeof(double)) {
	memcpy(&u, &f, sizeof(uint64_t));
	}
	return u;
	}

	static inline double //
	wuffs_base__ieee_754_bit_representation__to_f64(uint64_t u) {
	double f = 0;
	if (sizeof(uint64_t) == sizeof(double)) {
	memcpy(&f, &u, sizeof(uint64_t));
	}
	return f;
	}

	// ---------------- Unicode and UTF-8

	#define WUFFS_BASE__UNICODE_CODE_POINT__MIN_INCL 0x00000000
	#define WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL 0x0010FFFF

	#define WUFFS_BASE__UNICODE_REPLACEMENT_CHARACTER 0x0000FFFD

	#define WUFFS_BASE__UNICODE_SURROGATE__MIN_INCL 0x0000D800
	#define WUFFS_BASE__UNICODE_SURROGATE__MAX_INCL 0x0000DFFF

	#define WUFFS_BASE__ASCII__MIN_INCL 0x00
	#define WUFFS_BASE__ASCII__MAX_INCL 0x7F

	#define WUFFS_BASE__UTF_8__BYTE_LENGTH__MIN_INCL 1
	#define WUFFS_BASE__UTF_8__BYTE_LENGTH__MAX_INCL 4

	#define WUFFS_BASE__UTF_8__BYTE_LENGTH_1__CODE_POINT__MIN_INCL 0x00000000
	#define WUFFS_BASE__UTF_8__BYTE_LENGTH_1__CODE_POINT__MAX_INCL 0x0000007F
	#define WUFFS_BASE__UTF_8__BYTE_LENGTH_2__CODE_POINT__MIN_INCL 0x00000080
	#define WUFFS_BASE__UTF_8__BYTE_LENGTH_2__CODE_POINT__MAX_INCL 0x000007FF
	#define WUFFS_BASE__UTF_8__BYTE_LENGTH_3__CODE_POINT__MIN_INCL 0x00000800
	#define WUFFS_BASE__UTF_8__BYTE_LENGTH_3__CODE_POINT__MAX_INCL 0x0000FFFF
	#define WUFFS_BASE__UTF_8__BYTE_LENGTH_4__CODE_POINT__MIN_INCL 0x00010000
	#define WUFFS_BASE__UTF_8__BYTE_LENGTH_4__CODE_POINT__MAX_INCL 0x0010FFFF

	// --------

	// wuffs_base__utf_8__next__output is the type returned by
	// wuffs_base__utf_8__next.
	typedef struct {
	uint32_t code_point;
	uint32_t byte_length;

	#ifdef __cplusplus
	inline bool is_valid() const;
	#endif // __cplusplus

	} wuffs_base__utf_8__next__output;

	static inline wuffs_base__utf_8__next__output //
	wuffs_base__make_utf_8__next__output(uint32_t code_point,
	uint32_t byte_length) {
	wuffs_base__utf_8__next__output ret;
	ret.code_point = code_point;
	ret.byte_length = byte_length;
	return ret;
	}

	static inline bool //
	wuffs_base__utf_8__next__output__is_valid(
	const wuffs_base__utf_8__next__output* o) {
	if (o) {
	uint32_t cp = o->code_point;
	switch (o->byte_length) {
	case 1:
	return (cp <= 0x7F);
	case 2:
	return (0x080 <= cp) && (cp <= 0x7FF);
	case 3:
	// Avoid the 0xD800 ..= 0xDFFF surrogate range.
	return ((0x0800 <= cp) && (cp <= 0xD7FF)) \|\|
	((0xE000 <= cp) && (cp <= 0xFFFF));
	case 4:
	return (0x00010000 <= cp) && (cp <= 0x0010FFFF);
	}
	}
	return false;
	}

	#ifdef __cplusplus

	inline bool //
	wuffs_base__utf_8__next__output::is_valid() const {
	return wuffs_base__utf_8__next__output__is_valid(this);
	}

	#endif // __cplusplus

	// --------

	// wuffs_base__utf_8__encode writes the UTF-8 encoding of code_point to s and
	// returns the number of bytes written. If code_point is invalid, or if s is
	// shorter than the entire encoding, it returns 0 (and no bytes are written).
	//
	// s will never be too short if its length is at least 4, also known as
	// WUFFS_BASE__UTF_8__BYTE_LENGTH__MAX_INCL.
	size_t //
	wuffs_base__utf_8__encode(wuffs_base__slice_u8 dst, uint32_t code_point);

	// wuffs_base__utf_8__next returns the next UTF-8 code point (and that code
	// point's byte length) at the start of s.
	//
	// There are exactly two cases in which this function returns something where
	// wuffs_base__utf_8__next__output__is_valid is false:
	// - If s is empty then it returns {.code_point=0, .byte_length=0}.
	// - If s is non-empty and starts with invalid UTF-8 then it returns
	// {.code_point=WUFFS_BASE__UNICODE_REPLACEMENT_CHARACTER, .byte_length=1}.
	//
	// Otherwise, it returns something where
	// wuffs_base__utf_8__next__output__is_valid is true.
	//
	// In any case, it always returns an output that satisfies both of:
	// - (output.code_point <= WUFFS_BASE__UNICODE_CODE_POINT__MAX_INCL).
	// - (output.byte_length <= s.len).
	//
	// If s is a sub-slice of a larger slice of valid UTF-8, but that sub-slice
	// boundary occurs in the middle of a multi-byte UTF-8 encoding of a single
	// code point, then this function may return something invalid. It is the
	// caller's responsibility to split on or otherwise manage UTF-8 boundaries.
	wuffs_base__utf_8__next__output //
	wuffs_base__utf_8__next(wuffs_base__slice_u8 s);

	// wuffs_base__utf_8__longest_valid_prefix returns the largest n such that the
	// sub-slice s[..n] is valid UTF-8.
	//
	// In particular, it returns s.len if and only if all of s is valid UTF-8.
	//
	// If s is a sub-slice of a larger slice of valid UTF-8, but that sub-slice
	// boundary occurs in the middle of a multi-byte UTF-8 encoding of a single
	// code point, then this function will return less than s.len. It is the
	// caller's responsibility to split on or otherwise manage UTF-8 boundaries.
	size_t //
	wuffs_base__utf_8__longest_valid_prefix(wuffs_base__slice_u8 s);

	// wuffs_base__ascii__longest_valid_prefix returns the largest n such that the
	// sub-slice s[..n] is valid ASCII.
	//
	// In particular, it returns s.len if and only if all of s is valid ASCII.
	// Equivalently, when none of the bytes in s have the 0x80 high bit set.
	size_t //
	wuffs_base__ascii__longest_valid_prefix(wuffs_base__slice_u8 s);