Simplify parse_number_f64's use of __uint128_t
diff --git a/internal/cgen/base/floatconv-submodule-code.c b/internal/cgen/base/floatconv-submodule-code.c
index 68d390d..eaa2982 100644
--- a/internal/cgen/base/floatconv-submodule-code.c
+++ b/internal/cgen/base/floatconv-submodule-code.c
@@ -1006,19 +1006,10 @@
// As a consequence, x_hi has either 0 or 1 leading zeroes. Shifting x_hi
// right by either 9 or 10 bits (depending on x_hi's MSB) will therefore
// leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.
-#if defined(__SIZEOF_INT128__)
- // See commit 18449ad75d582dd015c236abc85a16f333b796f3 "Optimize 128-bit muls
- // in parse_number_f64_eisel" for benchmark numbers.
- __uint128_t x =
- ((__uint128_t)man) * (((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));
- uint64_t x_hi = ((uint64_t)(x >> 64));
- uint64_t x_lo = ((uint64_t)(x));
-#else
wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(
man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));
uint64_t x_hi = x.hi;
uint64_t x_lo = x.lo;
-#endif
// Before we shift right by at least 9 bits, recall that the look-up table
// entry was possibly truncated. We have so far only calculated a lower bound
@@ -1034,19 +1025,10 @@
// a "low resolution" 64-bit mantissa. Now use a "high resolution" 128-bit
// mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).
// Now calculate y = (man * bits_64_to_127_incl_of_e).
-#if defined(__SIZEOF_INT128__)
- // See commit 18449ad75d582dd015c236abc85a16f333b796f3 "Optimize 128-bit
- // muls in parse_number_f64_eisel" for benchmark numbers.
- __uint128_t y = ((__uint128_t)man) *
- (((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));
- uint64_t y_hi = ((uint64_t)(y >> 64));
- uint64_t y_lo = ((uint64_t)(y));
-#else
wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(
man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));
uint64_t y_hi = y.hi;
uint64_t y_lo = y.lo;
-#endif
// Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to
// calculate the 192-bit product of the 64-bit man by the 128-bit e.
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 1c13c3e..e3904b4 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -387,12 +387,11 @@
"" +
"// --------\n\n// wuffs_base__private_implementation__parse_number_f64_eisel_lemire produces\n// the IEEE 754 double-precision value for an exact mantissa and base-10\n// exponent. For example:\n// - when parsing \"12345.678e+02\", man is 12345678 and exp10 is -1.\n// - when parsing \"-12\", man is 12 and exp10 is 0. Processing the leading\n// minus sign is the responsibility of the caller, not this function.\n//\n// On success, it returns a non-negative int64_t such that the low 63 bits hold\n// the 11-bit exponent and 52-bit mantissa.\n//\n// On failure, it returns a negative value.\n//\n// The algorithm is based on an original idea by Michael Eisel that was refined\n// by Daniel Lemire. See\n// https://lemire.me/blog/2020/03/10/fast-float-parsing-in-practice/\n//\n// Preconditions:\n// - man is non-zero.\n// - exp10 is in the range -326 ..= 310, the same range of the\n// wuffs_base__private_implementation__powers_of_10 array.\nstatic int64_t //\nwuffs_base__private_implementation__parse_number_f64_eisel_lemire(\n uint64" +
"_t man,\n int32_t exp10) {\n // Look up the (possibly truncated) base-2 representation of (10 ** exp10).\n // The look-up table was constructed so that it is already normalized: the\n // table entry's mantissa's MSB (most significant bit) is on.\n const uint32_t* po10 =\n &wuffs_base__private_implementation__powers_of_10[5 * (exp10 + 326)];\n\n // Normalize the man argument. The (man != 0) precondition means that a\n // non-zero bit exists.\n uint32_t clz = wuffs_base__count_leading_zeroes_u64(man);\n man <<= clz;\n\n // Calculate the return value's base-2 exponent. We might tweak it by ±1\n // later, but its initial value comes from the look-up table and clz.\n uint64_t ret_exp2 = ((uint64_t)po10[4]) - ((uint64_t)clz);\n\n // Multiply the two mantissas. Normalization means that both mantissas are at\n // least (1<<63), so the 128-bit product must be at least (1<<126). The high\n // 64 bits of the product, x_hi, must therefore be at least (1<<62).\n //\n // As a consequence, x_hi has either 0 or 1 leading" +
- " zeroes. Shifting x_hi\n // right by either 9 or 10 bits (depending on x_hi's MSB) will therefore\n // leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.\n#if defined(__SIZEOF_INT128__)\n // See commit 18449ad75d582dd015c236abc85a16f333b796f3 \"Optimize 128-bit muls\n // in parse_number_f64_eisel\" for benchmark numbers.\n __uint128_t x =\n ((__uint128_t)man) * (((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));\n uint64_t x_hi = ((uint64_t)(x >> 64));\n uint64_t x_lo = ((uint64_t)(x));\n#else\n wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(\n man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));\n uint64_t x_hi = x.hi;\n uint64_t x_lo = x.lo;\n#endif\n\n // Before we shift right by at least 9 bits, recall that the look-up table\n // entry was possibly truncated. We have so far only calculated a lower bound\n // for the product (man * e), where e is (10 ** exp10). The upper bound would\n // add a further (man * 1) to the 128-bit product, which overflows the lower\n " +
- "// 64-bit limb if ((x_lo + man) < man).\n //\n // If overflow occurs, that adds 1 to x_hi. Since we're about to shift right\n // by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit\n // limb's low 9 bits are all on.\n if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {\n // Refine our calculation of (man * e). Before, our approximation of e used\n // a \"low resolution\" 64-bit mantissa. Now use a \"high resolution\" 128-bit\n // mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).\n // Now calculate y = (man * bits_64_to_127_incl_of_e).\n#if defined(__SIZEOF_INT128__)\n // See commit 18449ad75d582dd015c236abc85a16f333b796f3 \"Optimize 128-bit\n // muls in parse_number_f64_eisel\" for benchmark numbers.\n __uint128_t y = ((__uint128_t)man) *\n (((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));\n uint64_t y_hi = ((uint64_t)(y >> 64));\n uint64_t y_lo = ((uint64_t)(y));\n#else\n wuffs_base__multiply_u64__output y = wuffs_base__multipl" +
- "y_u64(\n man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));\n uint64_t y_hi = y.hi;\n uint64_t y_lo = y.lo;\n#endif\n\n // Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to\n // calculate the 192-bit product of the 64-bit man by the 128-bit e.\n // As we exit this if-block, we only care about the high 128 bits\n // (merged_hi and merged_lo) of that 192-bit product.\n uint64_t merged_hi = x_hi;\n uint64_t merged_lo = x_lo + y_hi;\n if (merged_lo < x_lo) {\n merged_hi++; // Carry the overflow bit.\n }\n\n // The \"high resolution\" approximation of e is still a lower bound. Once\n // again, see if the upper bound is large enough to produce a different\n // result. This time, if it does, give up instead of reaching for an even\n // more precise approximation to e.\n //\n // This three-part check is similar to the two-part check that guarded the\n // if block that we're now in, but it has an extra term for the middle 64\n // bits (checking that adding " +
- "1 to merged_lo would overflow).\n if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&\n (y_lo + man < man)) {\n return -1;\n }\n\n // Replace the 128-bit x with merged.\n x_hi = merged_hi;\n x_lo = merged_lo;\n }\n\n // As mentioned above, shifting x_hi right by either 9 or 10 bits will leave\n // the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on. If the\n // MSB (before shifting) was on, adjust ret_exp2 for the larger shift.\n //\n // Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit\n // number.\n uint64_t msb = x_hi >> 63;\n uint64_t ret_mantissa = x_hi >> (msb + 9);\n ret_exp2 -= 1 ^ msb;\n\n // IEEE 754 rounds to-nearest with ties rounded to-even. Rounding to-even can\n // be tricky. If we're half-way between two exactly representable numbers\n // (x's low 73 bits are zero and the next 2 bits that matter are \"01\"), give\n // up instead of trying to pick the winner.\n //\n // Technically, we could tighten the condition by changing \"73\" " +
- "to \"73 or 74,\n // depending on msb\", but a flat \"73\" is simpler.\n if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {\n return -1;\n }\n\n // If we're not halfway then it's rounding to-nearest. Starting with a 54-bit\n // number, carry the lowest bit (bit 0) up if it's on. Regardless of whether\n // it was on or off, shifting right by one then produces a 53-bit number. If\n // carrying up overflowed, shift again.\n ret_mantissa += ret_mantissa & 1;\n ret_mantissa >>= 1;\n if ((ret_mantissa >> 53) > 0) {\n ret_mantissa >>= 1;\n ret_exp2++;\n }\n\n // Starting with a 53-bit number, IEEE 754 double-precision normal numbers\n // have an implicit mantissa bit. Mask that away and keep the low 52 bits.\n ret_mantissa &= 0x000FFFFFFFFFFFFF;\n\n // IEEE 754 double-precision floating point has 11 exponent bits. All off (0)\n // means subnormal numbers. All on (2047) means infinity or NaN.\n if ((ret_exp2 <= 0) || (2047 <= ret_exp2)) {\n return -1;\n }\n\n // Pack the bits and return.\n return" +
- " ((int64_t)(ret_mantissa | (ret_exp2 << 52)));\n}\n\n" +
+ " zeroes. Shifting x_hi\n // right by either 9 or 10 bits (depending on x_hi's MSB) will therefore\n // leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.\n wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(\n man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));\n uint64_t x_hi = x.hi;\n uint64_t x_lo = x.lo;\n\n // Before we shift right by at least 9 bits, recall that the look-up table\n // entry was possibly truncated. We have so far only calculated a lower bound\n // for the product (man * e), where e is (10 ** exp10). The upper bound would\n // add a further (man * 1) to the 128-bit product, which overflows the lower\n // 64-bit limb if ((x_lo + man) < man).\n //\n // If overflow occurs, that adds 1 to x_hi. Since we're about to shift right\n // by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit\n // limb's low 9 bits are all on.\n if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {\n // Refine our calculation of (man * e). Before, our" +
+ " approximation of e used\n // a \"low resolution\" 64-bit mantissa. Now use a \"high resolution\" 128-bit\n // mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).\n // Now calculate y = (man * bits_64_to_127_incl_of_e).\n wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(\n man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));\n uint64_t y_hi = y.hi;\n uint64_t y_lo = y.lo;\n\n // Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to\n // calculate the 192-bit product of the 64-bit man by the 128-bit e.\n // As we exit this if-block, we only care about the high 128 bits\n // (merged_hi and merged_lo) of that 192-bit product.\n uint64_t merged_hi = x_hi;\n uint64_t merged_lo = x_lo + y_hi;\n if (merged_lo < x_lo) {\n merged_hi++; // Carry the overflow bit.\n }\n\n // The \"high resolution\" approximation of e is still a lower bound. Once\n // again, see if the upper bound is large enough to produce a different\n // result. This ti" +
+ "me, if it does, give up instead of reaching for an even\n // more precise approximation to e.\n //\n // This three-part check is similar to the two-part check that guarded the\n // if block that we're now in, but it has an extra term for the middle 64\n // bits (checking that adding 1 to merged_lo would overflow).\n if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&\n (y_lo + man < man)) {\n return -1;\n }\n\n // Replace the 128-bit x with merged.\n x_hi = merged_hi;\n x_lo = merged_lo;\n }\n\n // As mentioned above, shifting x_hi right by either 9 or 10 bits will leave\n // the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on. If the\n // MSB (before shifting) was on, adjust ret_exp2 for the larger shift.\n //\n // Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit\n // number.\n uint64_t msb = x_hi >> 63;\n uint64_t ret_mantissa = x_hi >> (msb + 9);\n ret_exp2 -= 1 ^ msb;\n\n // IEEE 754 rounds to-nearest with ties rounded to-even." +
+ " Rounding to-even can\n // be tricky. If we're half-way between two exactly representable numbers\n // (x's low 73 bits are zero and the next 2 bits that matter are \"01\"), give\n // up instead of trying to pick the winner.\n //\n // Technically, we could tighten the condition by changing \"73\" to \"73 or 74,\n // depending on msb\", but a flat \"73\" is simpler.\n if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {\n return -1;\n }\n\n // If we're not halfway then it's rounding to-nearest. Starting with a 54-bit\n // number, carry the lowest bit (bit 0) up if it's on. Regardless of whether\n // it was on or off, shifting right by one then produces a 53-bit number. If\n // carrying up overflowed, shift again.\n ret_mantissa += ret_mantissa & 1;\n ret_mantissa >>= 1;\n if ((ret_mantissa >> 53) > 0) {\n ret_mantissa >>= 1;\n ret_exp2++;\n }\n\n // Starting with a 53-bit number, IEEE 754 double-precision normal numbers\n // have an implicit mantissa bit. Mask that away and keep the low 52 bits" +
+ ".\n ret_mantissa &= 0x000FFFFFFFFFFFFF;\n\n // IEEE 754 double-precision floating point has 11 exponent bits. All off (0)\n // means subnormal numbers. All on (2047) means infinity or NaN.\n if ((ret_exp2 <= 0) || (2047 <= ret_exp2)) {\n return -1;\n }\n\n // Pack the bits and return.\n return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));\n}\n\n" +
"" +
"// --------\n\nstatic wuffs_base__result_f64 //\nwuffs_base__private_implementation__parse_number_f64_special(\n wuffs_base__slice_u8 s,\n uint32_t options) {\n do {\n if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {\n goto fail;\n }\n\n uint8_t* p = s.ptr;\n uint8_t* q = s.ptr + s.len;\n\n for (; (p < q) && (*p == '_'); p++) {\n }\n if (p >= q) {\n goto fail;\n }\n\n // Parse sign.\n bool negative = false;\n do {\n if (*p == '+') {\n p++;\n } else if (*p == '-') {\n negative = true;\n p++;\n } else {\n break;\n }\n for (; (p < q) && (*p == '_'); p++) {\n }\n } while (0);\n if (p >= q) {\n goto fail;\n }\n\n bool nan = false;\n switch (p[0]) {\n case 'I':\n case 'i':\n if (((q - p) < 3) || //\n ((p[1] != 'N') && (p[1] != 'n')) || //\n ((p[2] != 'F') && (p[2] != 'f'))) {\n goto fail;\n }\n p += 3;\n\n if ((p >= q) || (*p == '_" +
"')) {\n break;\n } else if (((q - p) < 5) || //\n ((p[0] != 'I') && (p[0] != 'i')) || //\n ((p[1] != 'N') && (p[1] != 'n')) || //\n ((p[2] != 'I') && (p[2] != 'i')) || //\n ((p[3] != 'T') && (p[3] != 't')) || //\n ((p[4] != 'Y') && (p[4] != 'y'))) {\n goto fail;\n }\n p += 5;\n\n if ((p >= q) || (*p == '_')) {\n break;\n }\n goto fail;\n\n case 'N':\n case 'n':\n if (((q - p) < 3) || //\n ((p[1] != 'A') && (p[1] != 'a')) || //\n ((p[2] != 'N') && (p[2] != 'n'))) {\n goto fail;\n }\n p += 3;\n\n if ((p >= q) || (*p == '_')) {\n nan = true;\n break;\n }\n goto fail;\n\n default:\n goto fail;\n }\n\n // Finish.\n for (; (p < q) && (*p == '_'); p++) {\n }\n if (p != q) {\n goto fail;\n }\n wuffs_base__result_f64 ret;\n" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index bd069d6..66007fa 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -11649,19 +11649,10 @@
// As a consequence, x_hi has either 0 or 1 leading zeroes. Shifting x_hi
// right by either 9 or 10 bits (depending on x_hi's MSB) will therefore
// leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.
-#if defined(__SIZEOF_INT128__)
- // See commit 18449ad75d582dd015c236abc85a16f333b796f3 "Optimize 128-bit muls
- // in parse_number_f64_eisel" for benchmark numbers.
- __uint128_t x =
- ((__uint128_t)man) * (((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));
- uint64_t x_hi = ((uint64_t)(x >> 64));
- uint64_t x_lo = ((uint64_t)(x));
-#else
wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(
man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));
uint64_t x_hi = x.hi;
uint64_t x_lo = x.lo;
-#endif
// Before we shift right by at least 9 bits, recall that the look-up table
// entry was possibly truncated. We have so far only calculated a lower bound
@@ -11677,19 +11668,10 @@
// a "low resolution" 64-bit mantissa. Now use a "high resolution" 128-bit
// mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).
// Now calculate y = (man * bits_64_to_127_incl_of_e).
-#if defined(__SIZEOF_INT128__)
- // See commit 18449ad75d582dd015c236abc85a16f333b796f3 "Optimize 128-bit
- // muls in parse_number_f64_eisel" for benchmark numbers.
- __uint128_t y = ((__uint128_t)man) *
- (((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));
- uint64_t y_hi = ((uint64_t)(y >> 64));
- uint64_t y_lo = ((uint64_t)(y));
-#else
wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(
man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));
uint64_t y_hi = y.hi;
uint64_t y_lo = y.lo;
-#endif
// Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to
// calculate the 192-bit product of the 64-bit man by the 128-bit e.