Narrow parse_number_f64_eisel_lemire's exp10 arg On a mid-range x86_64 laptop, processing the 181 MiB citylots.json file from github.com/zemirco/sf-city-lots-json: $ g++ -O3 script/process-json-numbers.c $ time ./a.out -parse-number-f64 < citylots.json Before/After: real 0m0.995s real 0m0.968s

commit: 8b45db0120b5038789b62296b755496faafbc6de [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Tue Sep 15 21:50:32 2020 +1000
committer: Nigel Tao <nigeltao@golang.org> Tue Sep 15 21:58:57 2020 +1000
tree: da1f2ecf9a8a951f908849cb06ae5a1369f4733f
parent: ddac2c50eeb94a0ef53d7a39d5c13fc4e03df771 [diff]
diff --git a/internal/cgen/base/floatconv-submodule-code.c b/internal/cgen/base/floatconv-submodule-code.c
index eaa2982..5c5cc4d 100644
--- a/internal/cgen/base/floatconv-submodule-code.c
+++ b/internal/cgen/base/floatconv-submodule-code.c

@@ -978,8 +978,14 @@
 //
 // Preconditions:
 //  - man is non-zero.
-//  - exp10 is in the range -326 ..= 310, the same range of the
+//  - exp10 is in the range -307 ..= 288, the same range of the
 //    wuffs_base__private_implementation__powers_of_10 array.
+//
+// The exp10 range (and the fact that man is in the range [1 ..= UINT64_MAX],
+// approximately [1 ..= 1.85e+19]) means that (man * (10 ** exp10)) is in the
+// range [1e-307 ..= 1.85e+307]. This is entirely within the range of normal
+// (neither subnormal nor non-finite) f64 values: DBL_MIN and DBL_MAX are
+// approximately 2.23e–308 and 1.80e+308.
 static int64_t  //
 wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
     uint64_t man,
@@ -1085,6 +1091,11 @@
   // carrying up overflowed, shift again.
   ret_mantissa += ret_mantissa & 1;
   ret_mantissa >>= 1;
+  // This if block is equivalent to (but benchmarks slightly faster than) the
+  // following branchless form:
+  //    uint64_t overflow_adjustment = ret_mantissa >> 53;
+  //    ret_mantissa >>= overflow_adjustment;
+  //    ret_exp2 += overflow_adjustment;
   if ((ret_mantissa >> 53) > 0) {
     ret_mantissa >>= 1;
     ret_exp2++;
@@ -1094,12 +1105,6 @@
   // have an implicit mantissa bit. Mask that away and keep the low 52 bits.
   ret_mantissa &= 0x000FFFFFFFFFFFFF;
 
-  // IEEE 754 double-precision floating point has 11 exponent bits. All off (0)
-  // means subnormal numbers. All on (2047) means infinity or NaN.
-  if ((ret_exp2 <= 0) || (2047 <= ret_exp2)) {
-    return -1;
-  }
-
   // Pack the bits and return.
   return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));
 }
@@ -1246,7 +1251,7 @@
         man = (10 * man) + h->digits[i];
       }
       int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));
-      if ((man != 0) && (-326 <= exp10) && (exp10 <= 310)) {
+      if ((man != 0) && (-307 <= exp10) && (exp10 <= 288)) {
         int64_t r =
             wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
                 man, exp10);
@@ -1555,8 +1560,8 @@
     }
 
     // The wuffs_base__private_implementation__parse_number_f64_eisel_lemire
-    // preconditions include that exp10 is in the range -326 ..= 310.
-    if ((exp10 < -326) || (310 < exp10)) {
+    // preconditions include that exp10 is in the range [-307 ..= 288].
+    if ((exp10 < -307) || (288 < exp10)) {
       goto fallback;
     }
 

diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index e3904b4..8e1c5d8 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go

@@ -385,18 +385,18 @@
 	"   upper_delta = +1;\n      } else if (hd != ud) {\n        // For example:\n        // h     = 12345???\n        // upper = 12346???\n        upper_delta = +0;\n      }\n    } else if (upper_delta == 0) {\n      if ((hd != 9) || (ud != 0)) {\n        // For example:\n        // h     = 1234598?\n        // upper = 1234600?\n        upper_delta = +1;\n      }\n    }\n\n    // We can round up if upper has a different digit than h and either upper\n    // is inclusive or upper is bigger than the result of rounding up.\n    bool can_round_up =\n        (upper_delta > 0) ||    //\n        ((upper_delta == 0) &&  //\n         (inclusive || ((ui + 1) < ((int32_t)(upper.num_digits)))));\n\n    // If we can round either way, round to nearest. If we can round only one\n    // way, do it. If we can't round, continue the loop.\n    if (can_round_down) {\n      if (can_round_up) {\n        wuffs_base__private_implementation__high_prec_dec__round_nearest(\n            h, hi + 1);\n        return;\n      } else {\n        wuffs_base__private_implementat" +
 	"ion__high_prec_dec__round_down(h,\n                                                                      hi + 1);\n        return;\n      }\n    } else {\n      if (can_round_up) {\n        wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);\n        return;\n      }\n    }\n  }\n}\n\n" +
 	"" +
-	"// --------\n\n// wuffs_base__private_implementation__parse_number_f64_eisel_lemire produces\n// the IEEE 754 double-precision value for an exact mantissa and base-10\n// exponent. For example:\n//  - when parsing \"12345.678e+02\", man is 12345678 and exp10 is -1.\n//  - when parsing \"-12\", man is 12 and exp10 is 0. Processing the leading\n//    minus sign is the responsibility of the caller, not this function.\n//\n// On success, it returns a non-negative int64_t such that the low 63 bits hold\n// the 11-bit exponent and 52-bit mantissa.\n//\n// On failure, it returns a negative value.\n//\n// The algorithm is based on an original idea by Michael Eisel that was refined\n// by Daniel Lemire. See\n// https://lemire.me/blog/2020/03/10/fast-float-parsing-in-practice/\n//\n// Preconditions:\n//  - man is non-zero.\n//  - exp10 is in the range -326 ..= 310, the same range of the\n//    wuffs_base__private_implementation__powers_of_10 array.\nstatic int64_t  //\nwuffs_base__private_implementation__parse_number_f64_eisel_lemire(\n    uint64" +
-	"_t man,\n    int32_t exp10) {\n  // Look up the (possibly truncated) base-2 representation of (10 ** exp10).\n  // The look-up table was constructed so that it is already normalized: the\n  // table entry's mantissa's MSB (most significant bit) is on.\n  const uint32_t* po10 =\n      &wuffs_base__private_implementation__powers_of_10[5 * (exp10 + 326)];\n\n  // Normalize the man argument. The (man != 0) precondition means that a\n  // non-zero bit exists.\n  uint32_t clz = wuffs_base__count_leading_zeroes_u64(man);\n  man <<= clz;\n\n  // Calculate the return value's base-2 exponent. We might tweak it by ±1\n  // later, but its initial value comes from the look-up table and clz.\n  uint64_t ret_exp2 = ((uint64_t)po10[4]) - ((uint64_t)clz);\n\n  // Multiply the two mantissas. Normalization means that both mantissas are at\n  // least (1<<63), so the 128-bit product must be at least (1<<126). The high\n  // 64 bits of the product, x_hi, must therefore be at least (1<<62).\n  //\n  // As a consequence, x_hi has either 0 or 1 leading" +
-	" zeroes. Shifting x_hi\n  // right by either 9 or 10 bits (depending on x_hi's MSB) will therefore\n  // leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.\n  wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(\n      man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));\n  uint64_t x_hi = x.hi;\n  uint64_t x_lo = x.lo;\n\n  // Before we shift right by at least 9 bits, recall that the look-up table\n  // entry was possibly truncated. We have so far only calculated a lower bound\n  // for the product (man * e), where e is (10 ** exp10). The upper bound would\n  // add a further (man * 1) to the 128-bit product, which overflows the lower\n  // 64-bit limb if ((x_lo + man) < man).\n  //\n  // If overflow occurs, that adds 1 to x_hi. Since we're about to shift right\n  // by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit\n  // limb's low 9 bits are all on.\n  if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {\n    // Refine our calculation of (man * e). Before, our" +
-	" approximation of e used\n    // a \"low resolution\" 64-bit mantissa. Now use a \"high resolution\" 128-bit\n    // mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).\n    // Now calculate y = (man * bits_64_to_127_incl_of_e).\n    wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(\n        man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));\n    uint64_t y_hi = y.hi;\n    uint64_t y_lo = y.lo;\n\n    // Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to\n    // calculate the 192-bit product of the 64-bit man by the 128-bit e.\n    // As we exit this if-block, we only care about the high 128 bits\n    // (merged_hi and merged_lo) of that 192-bit product.\n    uint64_t merged_hi = x_hi;\n    uint64_t merged_lo = x_lo + y_hi;\n    if (merged_lo < x_lo) {\n      merged_hi++;  // Carry the overflow bit.\n    }\n\n    // The \"high resolution\" approximation of e is still a lower bound. Once\n    // again, see if the upper bound is large enough to produce a different\n    // result. This ti" +
-	"me, if it does, give up instead of reaching for an even\n    // more precise approximation to e.\n    //\n    // This three-part check is similar to the two-part check that guarded the\n    // if block that we're now in, but it has an extra term for the middle 64\n    // bits (checking that adding 1 to merged_lo would overflow).\n    if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&\n        (y_lo + man < man)) {\n      return -1;\n    }\n\n    // Replace the 128-bit x with merged.\n    x_hi = merged_hi;\n    x_lo = merged_lo;\n  }\n\n  // As mentioned above, shifting x_hi right by either 9 or 10 bits will leave\n  // the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on. If the\n  // MSB (before shifting) was on, adjust ret_exp2 for the larger shift.\n  //\n  // Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit\n  // number.\n  uint64_t msb = x_hi >> 63;\n  uint64_t ret_mantissa = x_hi >> (msb + 9);\n  ret_exp2 -= 1 ^ msb;\n\n  // IEEE 754 rounds to-nearest with ties rounded to-even." +
-	" Rounding to-even can\n  // be tricky. If we're half-way between two exactly representable numbers\n  // (x's low 73 bits are zero and the next 2 bits that matter are \"01\"), give\n  // up instead of trying to pick the winner.\n  //\n  // Technically, we could tighten the condition by changing \"73\" to \"73 or 74,\n  // depending on msb\", but a flat \"73\" is simpler.\n  if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {\n    return -1;\n  }\n\n  // If we're not halfway then it's rounding to-nearest. Starting with a 54-bit\n  // number, carry the lowest bit (bit 0) up if it's on. Regardless of whether\n  // it was on or off, shifting right by one then produces a 53-bit number. If\n  // carrying up overflowed, shift again.\n  ret_mantissa += ret_mantissa & 1;\n  ret_mantissa >>= 1;\n  if ((ret_mantissa >> 53) > 0) {\n    ret_mantissa >>= 1;\n    ret_exp2++;\n  }\n\n  // Starting with a 53-bit number, IEEE 754 double-precision normal numbers\n  // have an implicit mantissa bit. Mask that away and keep the low 52 bits" +
-	".\n  ret_mantissa &= 0x000FFFFFFFFFFFFF;\n\n  // IEEE 754 double-precision floating point has 11 exponent bits. All off (0)\n  // means subnormal numbers. All on (2047) means infinity or NaN.\n  if ((ret_exp2 <= 0) || (2047 <= ret_exp2)) {\n    return -1;\n  }\n\n  // Pack the bits and return.\n  return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));\n}\n\n" +
+	"// --------\n\n// wuffs_base__private_implementation__parse_number_f64_eisel_lemire produces\n// the IEEE 754 double-precision value for an exact mantissa and base-10\n// exponent. For example:\n//  - when parsing \"12345.678e+02\", man is 12345678 and exp10 is -1.\n//  - when parsing \"-12\", man is 12 and exp10 is 0. Processing the leading\n//    minus sign is the responsibility of the caller, not this function.\n//\n// On success, it returns a non-negative int64_t such that the low 63 bits hold\n// the 11-bit exponent and 52-bit mantissa.\n//\n// On failure, it returns a negative value.\n//\n// The algorithm is based on an original idea by Michael Eisel that was refined\n// by Daniel Lemire. See\n// https://lemire.me/blog/2020/03/10/fast-float-parsing-in-practice/\n//\n// Preconditions:\n//  - man is non-zero.\n//  - exp10 is in the range -307 ..= 288, the same range of the\n//    wuffs_base__private_implementation__powers_of_10 array.\n//\n// The exp10 range (and the fact that man is in the range [1 ..= UINT64_MAX],\n// approximatel" +
+	"y [1 ..= 1.85e+19]) means that (man * (10 ** exp10)) is in the\n// range [1e-307 ..= 1.85e+307]. This is entirely within the range of normal\n// (neither subnormal nor non-finite) f64 values: DBL_MIN and DBL_MAX are\n// approximately 2.23e–308 and 1.80e+308.\nstatic int64_t  //\nwuffs_base__private_implementation__parse_number_f64_eisel_lemire(\n    uint64_t man,\n    int32_t exp10) {\n  // Look up the (possibly truncated) base-2 representation of (10 ** exp10).\n  // The look-up table was constructed so that it is already normalized: the\n  // table entry's mantissa's MSB (most significant bit) is on.\n  const uint32_t* po10 =\n      &wuffs_base__private_implementation__powers_of_10[5 * (exp10 + 326)];\n\n  // Normalize the man argument. The (man != 0) precondition means that a\n  // non-zero bit exists.\n  uint32_t clz = wuffs_base__count_leading_zeroes_u64(man);\n  man <<= clz;\n\n  // Calculate the return value's base-2 exponent. We might tweak it by ±1\n  // later, but its initial value comes from the look-up table and c" +
+	"lz.\n  uint64_t ret_exp2 = ((uint64_t)po10[4]) - ((uint64_t)clz);\n\n  // Multiply the two mantissas. Normalization means that both mantissas are at\n  // least (1<<63), so the 128-bit product must be at least (1<<126). The high\n  // 64 bits of the product, x_hi, must therefore be at least (1<<62).\n  //\n  // As a consequence, x_hi has either 0 or 1 leading zeroes. Shifting x_hi\n  // right by either 9 or 10 bits (depending on x_hi's MSB) will therefore\n  // leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.\n  wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(\n      man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));\n  uint64_t x_hi = x.hi;\n  uint64_t x_lo = x.lo;\n\n  // Before we shift right by at least 9 bits, recall that the look-up table\n  // entry was possibly truncated. We have so far only calculated a lower bound\n  // for the product (man * e), where e is (10 ** exp10). The upper bound would\n  // add a further (man * 1) to the 128-bit product, which overflows the lower\n " +
+	" // 64-bit limb if ((x_lo + man) < man).\n  //\n  // If overflow occurs, that adds 1 to x_hi. Since we're about to shift right\n  // by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit\n  // limb's low 9 bits are all on.\n  if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {\n    // Refine our calculation of (man * e). Before, our approximation of e used\n    // a \"low resolution\" 64-bit mantissa. Now use a \"high resolution\" 128-bit\n    // mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).\n    // Now calculate y = (man * bits_64_to_127_incl_of_e).\n    wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(\n        man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));\n    uint64_t y_hi = y.hi;\n    uint64_t y_lo = y.lo;\n\n    // Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to\n    // calculate the 192-bit product of the 64-bit man by the 128-bit e.\n    // As we exit this if-block, we only care about the high 128 bits\n    // (merged_hi and merged_l" +
+	"o) of that 192-bit product.\n    uint64_t merged_hi = x_hi;\n    uint64_t merged_lo = x_lo + y_hi;\n    if (merged_lo < x_lo) {\n      merged_hi++;  // Carry the overflow bit.\n    }\n\n    // The \"high resolution\" approximation of e is still a lower bound. Once\n    // again, see if the upper bound is large enough to produce a different\n    // result. This time, if it does, give up instead of reaching for an even\n    // more precise approximation to e.\n    //\n    // This three-part check is similar to the two-part check that guarded the\n    // if block that we're now in, but it has an extra term for the middle 64\n    // bits (checking that adding 1 to merged_lo would overflow).\n    if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&\n        (y_lo + man < man)) {\n      return -1;\n    }\n\n    // Replace the 128-bit x with merged.\n    x_hi = merged_hi;\n    x_lo = merged_lo;\n  }\n\n  // As mentioned above, shifting x_hi right by either 9 or 10 bits will leave\n  // the top 10 MSBs (bits 54 ..= 63) off and the 11" +
+	"th MSB (bit 53) on. If the\n  // MSB (before shifting) was on, adjust ret_exp2 for the larger shift.\n  //\n  // Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit\n  // number.\n  uint64_t msb = x_hi >> 63;\n  uint64_t ret_mantissa = x_hi >> (msb + 9);\n  ret_exp2 -= 1 ^ msb;\n\n  // IEEE 754 rounds to-nearest with ties rounded to-even. Rounding to-even can\n  // be tricky. If we're half-way between two exactly representable numbers\n  // (x's low 73 bits are zero and the next 2 bits that matter are \"01\"), give\n  // up instead of trying to pick the winner.\n  //\n  // Technically, we could tighten the condition by changing \"73\" to \"73 or 74,\n  // depending on msb\", but a flat \"73\" is simpler.\n  if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {\n    return -1;\n  }\n\n  // If we're not halfway then it's rounding to-nearest. Starting with a 54-bit\n  // number, carry the lowest bit (bit 0) up if it's on. Regardless of whether\n  // it was on or off, shifting right by one then produc" +
+	"es a 53-bit number. If\n  // carrying up overflowed, shift again.\n  ret_mantissa += ret_mantissa & 1;\n  ret_mantissa >>= 1;\n  // This if block is equivalent to (but benchmarks slightly faster than) the\n  // following branchless form:\n  //    uint64_t overflow_adjustment = ret_mantissa >> 53;\n  //    ret_mantissa >>= overflow_adjustment;\n  //    ret_exp2 += overflow_adjustment;\n  if ((ret_mantissa >> 53) > 0) {\n    ret_mantissa >>= 1;\n    ret_exp2++;\n  }\n\n  // Starting with a 53-bit number, IEEE 754 double-precision normal numbers\n  // have an implicit mantissa bit. Mask that away and keep the low 52 bits.\n  ret_mantissa &= 0x000FFFFFFFFFFFFF;\n\n  // Pack the bits and return.\n  return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));\n}\n\n" +
 	"" +
 	"// --------\n\nstatic wuffs_base__result_f64  //\nwuffs_base__private_implementation__parse_number_f64_special(\n    wuffs_base__slice_u8 s,\n    uint32_t options) {\n  do {\n    if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {\n      goto fail;\n    }\n\n    uint8_t* p = s.ptr;\n    uint8_t* q = s.ptr + s.len;\n\n    for (; (p < q) && (*p == '_'); p++) {\n    }\n    if (p >= q) {\n      goto fail;\n    }\n\n    // Parse sign.\n    bool negative = false;\n    do {\n      if (*p == '+') {\n        p++;\n      } else if (*p == '-') {\n        negative = true;\n        p++;\n      } else {\n        break;\n      }\n      for (; (p < q) && (*p == '_'); p++) {\n      }\n    } while (0);\n    if (p >= q) {\n      goto fail;\n    }\n\n    bool nan = false;\n    switch (p[0]) {\n      case 'I':\n      case 'i':\n        if (((q - p) < 3) ||                     //\n            ((p[1] != 'N') && (p[1] != 'n')) ||  //\n            ((p[2] != 'F') && (p[2] != 'f'))) {\n          goto fail;\n        }\n        p += 3;\n\n        if ((p >= q) || (*p == '_" +
 	"')) {\n          break;\n        } else if (((q - p) < 5) ||                     //\n                   ((p[0] != 'I') && (p[0] != 'i')) ||  //\n                   ((p[1] != 'N') && (p[1] != 'n')) ||  //\n                   ((p[2] != 'I') && (p[2] != 'i')) ||  //\n                   ((p[3] != 'T') && (p[3] != 't')) ||  //\n                   ((p[4] != 'Y') && (p[4] != 'y'))) {\n          goto fail;\n        }\n        p += 5;\n\n        if ((p >= q) || (*p == '_')) {\n          break;\n        }\n        goto fail;\n\n      case 'N':\n      case 'n':\n        if (((q - p) < 3) ||                     //\n            ((p[1] != 'A') && (p[1] != 'a')) ||  //\n            ((p[2] != 'N') && (p[2] != 'n'))) {\n          goto fail;\n        }\n        p += 3;\n\n        if ((p >= q) || (*p == '_')) {\n          nan = true;\n          break;\n        }\n        goto fail;\n\n      default:\n        goto fail;\n    }\n\n    // Finish.\n    for (; (p < q) && (*p == '_'); p++) {\n    }\n    if (p != q) {\n      goto fail;\n    }\n    wuffs_base__result_f64 ret;\n" +
 	"    ret.status.repr = NULL;\n    ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(\n        (nan ? 0x7FFFFFFFFFFFFFFF : 0x7FF0000000000000) |\n        (negative ? 0x8000000000000000 : 0));\n    return ret;\n  } while (0);\n\nfail:\n  do {\n    wuffs_base__result_f64 ret;\n    ret.status.repr = wuffs_base__error__bad_argument;\n    ret.value = 0;\n    return ret;\n  } while (0);\n}\n\nWUFFS_BASE__MAYBE_STATIC wuffs_base__result_f64  //\nwuffs_base__private_implementation__high_prec_dec__to_f64(\n    wuffs_base__private_implementation__high_prec_dec* h,\n    uint32_t options) {\n  do {\n    // powers converts decimal powers of 10 to binary powers of 2. For example,\n    // (10000 >> 13) is 1. It stops before the elements exceed 60, also known\n    // as WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.\n    static const uint32_t num_powers = 19;\n    static const uint8_t powers[19] = {\n        0,  3,  6,  9,  13, 16, 19, 23, 26, 29,  //\n        33, 36, 39, 43, 46, 49, 53, 56, 59,      //\n    };\n\n    // Handl" +
-	"e zero and obvious extremes. The largest and smallest positive\n    // finite f64 values are approximately 1.8e+308 and 4.9e-324.\n    if ((h->num_digits == 0) || (h->decimal_point < -326)) {\n      goto zero;\n    } else if (h->decimal_point > 310) {\n      goto infinity;\n    }\n\n    // Try the fast Eisel-Lemire algorithm again. Calculating the (man, exp10)\n    // pair from the high_prec_dec h is more correct but slower than the\n    // approach taken in wuffs_base__parse_number_f64. The latter is optimized\n    // for the common cases (e.g. assuming no underscores or a leading '+'\n    // sign) rather than the full set of cases allowed by the Wuffs API.\n    if (h->num_digits <= 19) {\n      uint64_t man = 0;\n      uint32_t i;\n      for (i = 0; i < h->num_digits; i++) {\n        man = (10 * man) + h->digits[i];\n      }\n      int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));\n      if ((man != 0) && (-326 <= exp10) && (exp10 <= 310)) {\n        int64_t r =\n            wuffs_base__private_implementation__parse" +
+	"e zero and obvious extremes. The largest and smallest positive\n    // finite f64 values are approximately 1.8e+308 and 4.9e-324.\n    if ((h->num_digits == 0) || (h->decimal_point < -326)) {\n      goto zero;\n    } else if (h->decimal_point > 310) {\n      goto infinity;\n    }\n\n    // Try the fast Eisel-Lemire algorithm again. Calculating the (man, exp10)\n    // pair from the high_prec_dec h is more correct but slower than the\n    // approach taken in wuffs_base__parse_number_f64. The latter is optimized\n    // for the common cases (e.g. assuming no underscores or a leading '+'\n    // sign) rather than the full set of cases allowed by the Wuffs API.\n    if (h->num_digits <= 19) {\n      uint64_t man = 0;\n      uint32_t i;\n      for (i = 0; i < h->num_digits; i++) {\n        man = (10 * man) + h->digits[i];\n      }\n      int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));\n      if ((man != 0) && (-307 <= exp10) && (exp10 <= 288)) {\n        int64_t r =\n            wuffs_base__private_implementation__parse" +
 	"_number_f64_eisel_lemire(\n                man, exp10);\n        if (r >= 0) {\n          wuffs_base__result_f64 ret;\n          ret.status.repr = NULL;\n          ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(\n              ((uint64_t)r) | (((uint64_t)(h->negative)) << 63));\n          return ret;\n        }\n      }\n    }\n\n    // Scale by powers of 2 until we're in the range [½ .. 1], which gives us\n    // our exponent (in base-2). First we shift right, possibly a little too\n    // far, ending with a value certainly below 1 and possibly below ½...\n    const int32_t f64_bias = -1023;\n    int32_t exp2 = 0;\n    while (h->decimal_point > 0) {\n      uint32_t n = (uint32_t)(+h->decimal_point);\n      uint32_t shift =\n          (n < num_powers)\n              ? powers[n]\n              : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;\n\n      wuffs_base__private_implementation__high_prec_dec__small_rshift(h, shift);\n      if (h->decimal_point <\n          -WUFFS_BASE__PRIVATE_IMPLEMENTATION__" +
 	"HPD__DECIMAL_POINT__RANGE) {\n        goto zero;\n      }\n      exp2 += (int32_t)shift;\n    }\n    // ...then we shift left, putting us in [½ .. 1].\n    while (h->decimal_point <= 0) {\n      uint32_t shift;\n      if (h->decimal_point == 0) {\n        if (h->digits[0] >= 5) {\n          break;\n        }\n        shift = (h->digits[0] < 2) ? 2 : 1;\n      } else {\n        uint32_t n = (uint32_t)(-h->decimal_point);\n        shift = (n < num_powers)\n                    ? powers[n]\n                    : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;\n      }\n\n      wuffs_base__private_implementation__high_prec_dec__small_lshift(h, shift);\n      if (h->decimal_point >\n          +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {\n        goto infinity;\n      }\n      exp2 -= (int32_t)shift;\n    }\n\n    // We're in the range [½ .. 1] but f64 uses [1 .. 2].\n    exp2--;\n\n    // The minimum normal exponent is (f64_bias + 1).\n    while ((f64_bias + 1) > exp2) {\n      uint32_t n = (uint32_t)((f64_bias + 1" +
 	") - exp2);\n      if (n > WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {\n        n = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;\n      }\n      wuffs_base__private_implementation__high_prec_dec__small_rshift(h, n);\n      exp2 += (int32_t)n;\n    }\n\n    // Check for overflow.\n    if ((exp2 - f64_bias) >= 0x07FF) {  // (1 << 11) - 1.\n      goto infinity;\n    }\n\n    // Extract 53 bits for the mantissa (in base-2).\n    wuffs_base__private_implementation__high_prec_dec__small_lshift(h, 53);\n    uint64_t man2 =\n        wuffs_base__private_implementation__high_prec_dec__rounded_integer(h);\n\n    // Rounding might have added one bit. If so, shift and re-check overflow.\n    if ((man2 >> 53) != 0) {\n      man2 >>= 1;\n      exp2++;\n      if ((exp2 - f64_bias) >= 0x07FF) {  // (1 << 11) - 1.\n        goto infinity;\n      }\n    }\n\n    // Handle subnormal numbers.\n    if ((man2 >> 52) == 0) {\n      exp2 = f64_bias;\n    }\n\n    // Pack the bits and return.\n    uint64_t exp2_bits =\n        (uint64_t)((ex" +
@@ -407,9 +407,9 @@
 	"0 * man) + ((uint8_t)(*p - '0'));\n      }\n    } else {\n      goto fallback;\n    }\n\n    // Walk the \"d\"s after the optional decimal separator ('.' or ','),\n    // updating the man and exp10 variables.\n    int32_t exp10 = 0;\n    if (*p ==\n        ((options & WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)\n             ? ','\n             : '.')) {\n      p++;\n      const uint8_t* first_after_separator_ptr = p;\n      if (!wuffs_base__private_implementation__is_decimal_digit(*p)) {\n        goto fallback;\n      }\n      man = (10 * man) + ((uint8_t)(*p - '0'));\n      p++;\n      for (; wuffs_base__private_implementation__is_decimal_digit(*p); p++) {\n        man = (10 * man) + ((uint8_t)(*p - '0'));\n      }\n      exp10 = ((int32_t)(first_after_separator_ptr - p));\n    }\n\n    // Count the number of digits:\n    //  - for an input of \"314159\",  digit_count is 6.\n    //  - for an input of \"3.14159\", digit_count is 7.\n    //\n    // This is off-by-one if there is a decimal separator. That's OK for now.\n    // We'" +
 	"ll correct for that later. The \"script/process-json-numbers.c with\n    // -p\" benchmark is noticably slower if we try to correct for that now.\n    uint32_t digit_count = (uint32_t)(p - start_of_digits_ptr);\n\n    // Update exp10 for the optional exponent, starting with 'E' or 'e'.\n    if ((*p | 0x20) == 'e') {\n      p++;\n      int32_t exp_sign = +1;\n      if (*p == '-') {\n        p++;\n        exp_sign = -1;\n      } else if (*p == '+') {\n        p++;\n      }\n      if (!wuffs_base__private_implementation__is_decimal_digit(*p)) {\n        goto fallback;\n      }\n      int32_t exp_num = ((uint8_t)(*p - '0'));\n      p++;\n      // The rest of the exp_num walking has a peculiar control flow but, once\n      // again, the \"script/process-json-numbers.c with -p\" benchmark is\n      // sensitive to alternative formulations.\n      if (wuffs_base__private_implementation__is_decimal_digit(*p)) {\n        exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));\n        p++;\n      }\n      if (wuffs_base__private_implementation__is_decim" +
 	"al_digit(*p)) {\n        exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));\n        p++;\n      }\n      while (wuffs_base__private_implementation__is_decimal_digit(*p)) {\n        if (exp_num > 0x1000000) {\n          goto fallback;\n        }\n        exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));\n        p++;\n      }\n      exp10 += exp_sign * exp_num;\n    }\n\n    // The Wuffs API is that the original slice has no trailing data. It also\n    // allows underscores, which we don't catch here but the fallback should.\n    if (p != &z[s.len]) {\n      goto fallback;\n    }\n\n    // Check that the uint64_t typed man variable has not overflowed, based on\n    // digit_count.\n    //\n    // For reference:\n    //   - (1 << 63) is  9223372036854775808, which has 19 decimal digits.\n    //   - (1 << 64) is 18446744073709551616, which has 20 decimal digits.\n    //   - 19 nines,  9999999999999999999, is  0x8AC7230489E7FFFF, which has 64\n    //     bits and 16 hexadecimal digits.\n    //   - 20 nines, 99999999999999999999, is 0x56BC75" +
-	"E2D630FFFFF, which has 67\n    //     bits and 17 hexadecimal digits.\n    if (digit_count > 19) {\n      // Even if we have more than 19 pseudo-digits, it's not yet definitely an\n      // overflow. Recall that digit_count might be off-by-one (too large) if\n      // there's a decimal separator. It will also over-report the number of\n      // meaningful digits if the input looks something like \"0.000dddExxx\".\n      //\n      // We adjust by the number of leading '0's and '.'s and re-compare to 19.\n      // Once again, technically, we could skip ','s too, but that perturbs the\n      // \"script/process-json-numbers.c with -p\" benchmark.\n      const uint8_t* q = start_of_digits_ptr;\n      for (; (*q == '0') || (*q == '.'); q++) {\n      }\n      digit_count -= (uint32_t)(q - start_of_digits_ptr);\n      if (digit_count > 19) {\n        goto fallback;\n      }\n    }\n\n    // The wuffs_base__private_implementation__parse_number_f64_eisel_lemire\n    // preconditions include that exp10 is in the range -326 ..= 310.\n    if ((ex" +
-	"p10 < -326) || (310 < exp10)) {\n      goto fallback;\n    }\n\n    // If both man and (10 ** exp10) are exactly representable by a double, we\n    // don't need to run the Eisel-Lemire algorithm.\n    if ((-22 <= exp10) && (exp10 <= 22) && ((man >> 53) == 0)) {\n      double d = (double)man;\n      if (exp10 >= 0) {\n        d *= wuffs_base__private_implementation__f64_powers_of_10[+exp10];\n      } else {\n        d /= wuffs_base__private_implementation__f64_powers_of_10[-exp10];\n      }\n      wuffs_base__result_f64 ret;\n      ret.status.repr = NULL;\n      ret.value = negative ? -d : +d;\n      return ret;\n    }\n\n    // The wuffs_base__private_implementation__parse_number_f64_eisel_lemire\n    // preconditions include that man is non-zero. Parsing \"0\" should be caught\n    // by the \"If both man and (10 ** exp10)\" above, but \"0e99\" might not.\n    if (man == 0) {\n      goto fallback;\n    }\n\n    // Our man and exp10 are in range. Run the Eisel-Lemire algorithm.\n    int64_t r =\n        wuffs_base__private_implementation__pa" +
-	"rse_number_f64_eisel_lemire(\n            man, exp10);\n    if (r < 0) {\n      goto fallback;\n    }\n    wuffs_base__result_f64 ret;\n    ret.status.repr = NULL;\n    ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(\n        ((uint64_t)r) | (((uint64_t)negative) << 63));\n    return ret;\n  } while (0);\n\nfallback:\n  do {\n    wuffs_base__private_implementation__high_prec_dec h;\n    wuffs_base__status status =\n        wuffs_base__private_implementation__high_prec_dec__parse(&h, s,\n                                                                 options);\n    if (status.repr) {\n      return wuffs_base__private_implementation__parse_number_f64_special(\n          s, options);\n    }\n    return wuffs_base__private_implementation__high_prec_dec__to_f64(&h,\n                                                                     options);\n  } while (0);\n}\n\n" +
+	"E2D630FFFFF, which has 67\n    //     bits and 17 hexadecimal digits.\n    if (digit_count > 19) {\n      // Even if we have more than 19 pseudo-digits, it's not yet definitely an\n      // overflow. Recall that digit_count might be off-by-one (too large) if\n      // there's a decimal separator. It will also over-report the number of\n      // meaningful digits if the input looks something like \"0.000dddExxx\".\n      //\n      // We adjust by the number of leading '0's and '.'s and re-compare to 19.\n      // Once again, technically, we could skip ','s too, but that perturbs the\n      // \"script/process-json-numbers.c with -p\" benchmark.\n      const uint8_t* q = start_of_digits_ptr;\n      for (; (*q == '0') || (*q == '.'); q++) {\n      }\n      digit_count -= (uint32_t)(q - start_of_digits_ptr);\n      if (digit_count > 19) {\n        goto fallback;\n      }\n    }\n\n    // The wuffs_base__private_implementation__parse_number_f64_eisel_lemire\n    // preconditions include that exp10 is in the range [-307 ..= 288].\n    if ((" +
+	"exp10 < -307) || (288 < exp10)) {\n      goto fallback;\n    }\n\n    // If both man and (10 ** exp10) are exactly representable by a double, we\n    // don't need to run the Eisel-Lemire algorithm.\n    if ((-22 <= exp10) && (exp10 <= 22) && ((man >> 53) == 0)) {\n      double d = (double)man;\n      if (exp10 >= 0) {\n        d *= wuffs_base__private_implementation__f64_powers_of_10[+exp10];\n      } else {\n        d /= wuffs_base__private_implementation__f64_powers_of_10[-exp10];\n      }\n      wuffs_base__result_f64 ret;\n      ret.status.repr = NULL;\n      ret.value = negative ? -d : +d;\n      return ret;\n    }\n\n    // The wuffs_base__private_implementation__parse_number_f64_eisel_lemire\n    // preconditions include that man is non-zero. Parsing \"0\" should be caught\n    // by the \"If both man and (10 ** exp10)\" above, but \"0e99\" might not.\n    if (man == 0) {\n      goto fallback;\n    }\n\n    // Our man and exp10 are in range. Run the Eisel-Lemire algorithm.\n    int64_t r =\n        wuffs_base__private_implementation__" +
+	"parse_number_f64_eisel_lemire(\n            man, exp10);\n    if (r < 0) {\n      goto fallback;\n    }\n    wuffs_base__result_f64 ret;\n    ret.status.repr = NULL;\n    ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(\n        ((uint64_t)r) | (((uint64_t)negative) << 63));\n    return ret;\n  } while (0);\n\nfallback:\n  do {\n    wuffs_base__private_implementation__high_prec_dec h;\n    wuffs_base__status status =\n        wuffs_base__private_implementation__high_prec_dec__parse(&h, s,\n                                                                 options);\n    if (status.repr) {\n      return wuffs_base__private_implementation__parse_number_f64_special(\n          s, options);\n    }\n    return wuffs_base__private_implementation__high_prec_dec__to_f64(&h,\n                                                                     options);\n  } while (0);\n}\n\n" +
 	"" +
 	"// --------\n\nstatic inline size_t  //\nwuffs_base__private_implementation__render_inf(wuffs_base__slice_u8 dst,\n                                               bool neg,\n                                               uint32_t options) {\n  if (neg) {\n    if (dst.len < 4) {\n      return 0;\n    }\n    wuffs_base__store_u32le__no_bounds_check(dst.ptr, 0x666E492D);  // '-Inf'le.\n    return 4;\n  }\n\n  if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {\n    if (dst.len < 4) {\n      return 0;\n    }\n    wuffs_base__store_u32le__no_bounds_check(dst.ptr, 0x666E492B);  // '+Inf'le.\n    return 4;\n  }\n\n  if (dst.len < 3) {\n    return 0;\n  }\n  wuffs_base__store_u24le__no_bounds_check(dst.ptr, 0x666E49);  // 'Inf'le.\n  return 3;\n}\n\nstatic inline size_t  //\nwuffs_base__private_implementation__render_nan(wuffs_base__slice_u8 dst) {\n  if (dst.len < 3) {\n    return 0;\n  }\n  wuffs_base__store_u24le__no_bounds_check(dst.ptr, 0x4E614E);  // 'NaN'le.\n  return 3;\n}\n\nstatic size_t  //\nwuffs_base__private_implementation__high" +
 	"_prec_dec__render_exponent_absent(\n    wuffs_base__slice_u8 dst,\n    wuffs_base__private_implementation__high_prec_dec* h,\n    uint32_t precision,\n    uint32_t options) {\n  size_t n = (h->negative ||\n              (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN))\n                 ? 1\n                 : 0;\n  if (h->decimal_point <= 0) {\n    n += 1;\n  } else {\n    n += (size_t)(h->decimal_point);\n  }\n  if (precision > 0) {\n    n += precision + 1;  // +1 for the '.'.\n  }\n\n  // Don't modify dst if the formatted number won't fit.\n  if (n > dst.len) {\n    return 0;\n  }\n\n  // Align-left or align-right.\n  uint8_t* ptr = (options & WUFFS_BASE__RENDER_NUMBER_XXX__ALIGN_RIGHT)\n                     ? &dst.ptr[dst.len - n]\n                     : &dst.ptr[0];\n\n  // Leading \"±\".\n  if (h->negative) {\n    *ptr++ = '-';\n  } else if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {\n    *ptr++ = '+';\n  }\n\n  // Integral digits.\n  if (h->decimal_point <= 0) {\n    *ptr++ = '0';\n  } else {\n    uint32_t m =\n" +

diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 66007fa..21218a4 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c

@@ -11621,8 +11621,14 @@
 //
 // Preconditions:
 //  - man is non-zero.
-//  - exp10 is in the range -326 ..= 310, the same range of the
+//  - exp10 is in the range -307 ..= 288, the same range of the
 //    wuffs_base__private_implementation__powers_of_10 array.
+//
+// The exp10 range (and the fact that man is in the range [1 ..= UINT64_MAX],
+// approximately [1 ..= 1.85e+19]) means that (man * (10 ** exp10)) is in the
+// range [1e-307 ..= 1.85e+307]. This is entirely within the range of normal
+// (neither subnormal nor non-finite) f64 values: DBL_MIN and DBL_MAX are
+// approximately 2.23e–308 and 1.80e+308.
 static int64_t  //
 wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
     uint64_t man,
@@ -11728,6 +11734,11 @@
   // carrying up overflowed, shift again.
   ret_mantissa += ret_mantissa & 1;
   ret_mantissa >>= 1;
+  // This if block is equivalent to (but benchmarks slightly faster than) the
+  // following branchless form:
+  //    uint64_t overflow_adjustment = ret_mantissa >> 53;
+  //    ret_mantissa >>= overflow_adjustment;
+  //    ret_exp2 += overflow_adjustment;
   if ((ret_mantissa >> 53) > 0) {
     ret_mantissa >>= 1;
     ret_exp2++;
@@ -11737,12 +11748,6 @@
   // have an implicit mantissa bit. Mask that away and keep the low 52 bits.
   ret_mantissa &= 0x000FFFFFFFFFFFFF;
 
-  // IEEE 754 double-precision floating point has 11 exponent bits. All off (0)
-  // means subnormal numbers. All on (2047) means infinity or NaN.
-  if ((ret_exp2 <= 0) || (2047 <= ret_exp2)) {
-    return -1;
-  }
-
   // Pack the bits and return.
   return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));
 }
@@ -11889,7 +11894,7 @@
         man = (10 * man) + h->digits[i];
       }
       int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));
-      if ((man != 0) && (-326 <= exp10) && (exp10 <= 310)) {
+      if ((man != 0) && (-307 <= exp10) && (exp10 <= 288)) {
         int64_t r =
             wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
                 man, exp10);
@@ -12198,8 +12203,8 @@
     }
 
     // The wuffs_base__private_implementation__parse_number_f64_eisel_lemire
-    // preconditions include that exp10 is in the range -326 ..= 310.
-    if ((exp10 < -326) || (310 < exp10)) {
+    // preconditions include that exp10 is in the range [-307 ..= 288].
+    if ((exp10 < -307) || (288 < exp10)) {
       goto fallback;
     }
commit	8b45db0120b5038789b62296b755496faafbc6de	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Tue Sep 15 21:50:32 2020 +1000
committer	Nigel Tao <nigeltao@golang.org>	Tue Sep 15 21:58:57 2020 +1000
tree	da1f2ecf9a8a951f908849cb06ae5a1369f4733f
parent	ddac2c50eeb94a0ef53d7a39d5c13fc4e03df771 [diff]