Use a formula to calculate Eisel-Lemire power-of-2 On a mid-range x86_64 laptop, processing the 181 MiB citylots.json file from github.com/zemirco/sf-city-lots-json: $ g++ -O3 script/process-json-numbers.c $ time ./a.out -parse-number-f64 < citylots.json Before/After: real 0m0.992s real 0m1.035s

commit: b6d855210c872092b8c3119715ee9712fdeb44d0 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Wed Sep 23 15:21:47 2020 +1000
committer: Nigel Tao <nigeltao@golang.org> Wed Sep 23 15:23:12 2020 +1000
tree: b8ae972bdf6bb4270c5cead8e29b93673af836d9
parent: cd4cbc9102e7c58ecd6561d8c2d2055f76de4141 [diff]
diff --git a/internal/cgen/base/floatconv-submodule-code.c b/internal/cgen/base/floatconv-submodule-code.c
index 16ed86b..af842c4 100644
--- a/internal/cgen/base/floatconv-submodule-code.c
+++ b/internal/cgen/base/floatconv-submodule-code.c

@@ -1002,8 +1002,21 @@
   man <<= clz;
 
   // Calculate the return value's base-2 exponent. We might tweak it by ±1
-  // later, but its initial value comes from the look-up table and clz.
-  uint64_t ret_exp2 = ((uint64_t)po10[4]) - ((uint64_t)clz);
+  // later, but its initial value comes from a linear scaling of exp10,
+  // converting from power-of-10 to power-of-2, and adjusting by clz.
+  //
+  // The magic constants are:
+  //  - 1087 = 1023 + 64. The 1023 is the f64 exponent bias. The 64 is because
+  //    the look-up table uses 64-bit mantissas.
+  //  - 217706 is such that the ratio 217706 / 65536 ≈ 3.321930 is close enough
+  //    (over the practical range of exp10) to log(10) / log(2) ≈ 3.321928.
+  //  - 65536 = 1<<16 is arbitrary but a power of 2, so division is a shift.
+  //
+  // Equality of the linearly-scaled value and the actual power-of-2, over the
+  // range of exp10 arguments that this function accepts, is confirmed by
+  // script/print-mpb-powers-of-10.go
+  uint64_t ret_exp2 =
+      ((uint64_t)(((217706 * exp10) >> 16) + 1087)) - ((uint64_t)clz);
 
   // Multiply the two mantissas. Normalization means that both mantissas are at
   // least (1<<63), so the 128-bit product must be at least (1<<126). The high

diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 5bc3861..ce8958b 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go

@@ -386,12 +386,13 @@
 	"ion__high_prec_dec__round_down(h,\n                                                                      hi + 1);\n        return;\n      }\n    } else {\n      if (can_round_up) {\n        wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);\n        return;\n      }\n    }\n  }\n}\n\n" +
 	"" +
 	"// --------\n\n// wuffs_base__private_implementation__parse_number_f64_eisel_lemire produces\n// the IEEE 754 double-precision value for an exact mantissa and base-10\n// exponent. For example:\n//  - when parsing \"12345.678e+02\", man is 12345678 and exp10 is -1.\n//  - when parsing \"-12\", man is 12 and exp10 is 0. Processing the leading\n//    minus sign is the responsibility of the caller, not this function.\n//\n// On success, it returns a non-negative int64_t such that the low 63 bits hold\n// the 11-bit exponent and 52-bit mantissa.\n//\n// On failure, it returns a negative value.\n//\n// The algorithm is based on an original idea by Michael Eisel that was refined\n// by Daniel Lemire. See\n// https://lemire.me/blog/2020/03/10/fast-float-parsing-in-practice/\n//\n// Preconditions:\n//  - man is non-zero.\n//  - exp10 is in the range [-307 ..= 288], the same range of the\n//    wuffs_base__private_implementation__powers_of_10 array.\n//\n// The exp10 range (and the fact that man is in the range [1 ..= UINT64_MAX],\n// approximat" +
-	"ely [1 ..= 1.85e+19]) means that (man * (10 ** exp10)) is in the\n// range [1e-307 ..= 1.85e+307]. This is entirely within the range of normal\n// (neither subnormal nor non-finite) f64 values: DBL_MIN and DBL_MAX are\n// approximately 2.23e–308 and 1.80e+308.\nstatic int64_t  //\nwuffs_base__private_implementation__parse_number_f64_eisel_lemire(\n    uint64_t man,\n    int32_t exp10) {\n  // Look up the (possibly truncated) base-2 representation of (10 ** exp10).\n  // The look-up table was constructed so that it is already normalized: the\n  // table entry's mantissa's MSB (most significant bit) is on.\n  const uint32_t* po10 =\n      &wuffs_base__private_implementation__powers_of_10[5 * (exp10 + 307)];\n\n  // Normalize the man argument. The (man != 0) precondition means that a\n  // non-zero bit exists.\n  uint32_t clz = wuffs_base__count_leading_zeroes_u64(man);\n  man <<= clz;\n\n  // Calculate the return value's base-2 exponent. We might tweak it by ±1\n  // later, but its initial value comes from the look-up table and" +
-	" clz.\n  uint64_t ret_exp2 = ((uint64_t)po10[4]) - ((uint64_t)clz);\n\n  // Multiply the two mantissas. Normalization means that both mantissas are at\n  // least (1<<63), so the 128-bit product must be at least (1<<126). The high\n  // 64 bits of the product, x_hi, must therefore be at least (1<<62).\n  //\n  // As a consequence, x_hi has either 0 or 1 leading zeroes. Shifting x_hi\n  // right by either 9 or 10 bits (depending on x_hi's MSB) will therefore\n  // leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.\n  wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(\n      man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));\n  uint64_t x_hi = x.hi;\n  uint64_t x_lo = x.lo;\n\n  // Before we shift right by at least 9 bits, recall that the look-up table\n  // entry was possibly truncated. We have so far only calculated a lower bound\n  // for the product (man * e), where e is (10 ** exp10). The upper bound would\n  // add a further (man * 1) to the 128-bit product, which overflows the lower" +
-	"\n  // 64-bit limb if ((x_lo + man) < man).\n  //\n  // If overflow occurs, that adds 1 to x_hi. Since we're about to shift right\n  // by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit\n  // limb's low 9 bits are all on.\n  if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {\n    // Refine our calculation of (man * e). Before, our approximation of e used\n    // a \"low resolution\" 64-bit mantissa. Now use a \"high resolution\" 128-bit\n    // mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).\n    // Now calculate y = (man * bits_64_to_127_incl_of_e).\n    wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(\n        man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));\n    uint64_t y_hi = y.hi;\n    uint64_t y_lo = y.lo;\n\n    // Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to\n    // calculate the 192-bit product of the 64-bit man by the 128-bit e.\n    // As we exit this if-block, we only care about the high 128 bits\n    // (merged_hi and merged" +
-	"_lo) of that 192-bit product.\n    uint64_t merged_hi = x_hi;\n    uint64_t merged_lo = x_lo + y_hi;\n    if (merged_lo < x_lo) {\n      merged_hi++;  // Carry the overflow bit.\n    }\n\n    // The \"high resolution\" approximation of e is still a lower bound. Once\n    // again, see if the upper bound is large enough to produce a different\n    // result. This time, if it does, give up instead of reaching for an even\n    // more precise approximation to e.\n    //\n    // This three-part check is similar to the two-part check that guarded the\n    // if block that we're now in, but it has an extra term for the middle 64\n    // bits (checking that adding 1 to merged_lo would overflow).\n    if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&\n        (y_lo + man < man)) {\n      return -1;\n    }\n\n    // Replace the 128-bit x with merged.\n    x_hi = merged_hi;\n    x_lo = merged_lo;\n  }\n\n  // As mentioned above, shifting x_hi right by either 9 or 10 bits will leave\n  // the top 10 MSBs (bits 54 ..= 63) off and the " +
-	"11th MSB (bit 53) on. If the\n  // MSB (before shifting) was on, adjust ret_exp2 for the larger shift.\n  //\n  // Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit\n  // number.\n  uint64_t msb = x_hi >> 63;\n  uint64_t ret_mantissa = x_hi >> (msb + 9);\n  ret_exp2 -= 1 ^ msb;\n\n  // IEEE 754 rounds to-nearest with ties rounded to-even. Rounding to-even can\n  // be tricky. If we're half-way between two exactly representable numbers\n  // (x's low 73 bits are zero and the next 2 bits that matter are \"01\"), give\n  // up instead of trying to pick the winner.\n  //\n  // Technically, we could tighten the condition by changing \"73\" to \"73 or 74,\n  // depending on msb\", but a flat \"73\" is simpler.\n  if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {\n    return -1;\n  }\n\n  // If we're not halfway then it's rounding to-nearest. Starting with a 54-bit\n  // number, carry the lowest bit (bit 0) up if it's on. Regardless of whether\n  // it was on or off, shifting right by one then prod" +
-	"uces a 53-bit number. If\n  // carrying up overflowed, shift again.\n  ret_mantissa += ret_mantissa & 1;\n  ret_mantissa >>= 1;\n  // This if block is equivalent to (but benchmarks slightly faster than) the\n  // following branchless form:\n  //    uint64_t overflow_adjustment = ret_mantissa >> 53;\n  //    ret_mantissa >>= overflow_adjustment;\n  //    ret_exp2 += overflow_adjustment;\n  if ((ret_mantissa >> 53) > 0) {\n    ret_mantissa >>= 1;\n    ret_exp2++;\n  }\n\n  // Starting with a 53-bit number, IEEE 754 double-precision normal numbers\n  // have an implicit mantissa bit. Mask that away and keep the low 52 bits.\n  ret_mantissa &= 0x000FFFFFFFFFFFFF;\n\n  // Pack the bits and return.\n  return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));\n}\n\n" +
+	"ely [1 ..= 1.85e+19]) means that (man * (10 ** exp10)) is in the\n// range [1e-307 ..= 1.85e+307]. This is entirely within the range of normal\n// (neither subnormal nor non-finite) f64 values: DBL_MIN and DBL_MAX are\n// approximately 2.23e–308 and 1.80e+308.\nstatic int64_t  //\nwuffs_base__private_implementation__parse_number_f64_eisel_lemire(\n    uint64_t man,\n    int32_t exp10) {\n  // Look up the (possibly truncated) base-2 representation of (10 ** exp10).\n  // The look-up table was constructed so that it is already normalized: the\n  // table entry's mantissa's MSB (most significant bit) is on.\n  const uint32_t* po10 =\n      &wuffs_base__private_implementation__powers_of_10[5 * (exp10 + 307)];\n\n  // Normalize the man argument. The (man != 0) precondition means that a\n  // non-zero bit exists.\n  uint32_t clz = wuffs_base__count_leading_zeroes_u64(man);\n  man <<= clz;\n\n  // Calculate the return value's base-2 exponent. We might tweak it by ±1\n  // later, but its initial value comes from a linear scaling of e" +
+	"xp10,\n  // converting from power-of-10 to power-of-2, and adjusting by clz.\n  //\n  // The magic constants are:\n  //  - 1087 = 1023 + 64. The 1023 is the f64 exponent bias. The 64 is because\n  //    the look-up table uses 64-bit mantissas.\n  //  - 217706 is such that the ratio 217706 / 65536 ≈ 3.321930 is close enough\n  //    (over the practical range of exp10) to log(10) / log(2) ≈ 3.321928.\n  //  - 65536 = 1<<16 is arbitrary but a power of 2, so division is a shift.\n  //\n  // Equality of the linearly-scaled value and the actual power-of-2, over the\n  // range of exp10 arguments that this function accepts, is confirmed by\n  // script/print-mpb-powers-of-10.go\n  uint64_t ret_exp2 =\n      ((uint64_t)(((217706 * exp10) >> 16) + 1087)) - ((uint64_t)clz);\n\n  // Multiply the two mantissas. Normalization means that both mantissas are at\n  // least (1<<63), so the 128-bit product must be at least (1<<126). The high\n  // 64 bits of the product, x_hi, must therefore be at least (1<<62).\n  //\n  // As a consequence, " +
+	"x_hi has either 0 or 1 leading zeroes. Shifting x_hi\n  // right by either 9 or 10 bits (depending on x_hi's MSB) will therefore\n  // leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.\n  wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(\n      man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));\n  uint64_t x_hi = x.hi;\n  uint64_t x_lo = x.lo;\n\n  // Before we shift right by at least 9 bits, recall that the look-up table\n  // entry was possibly truncated. We have so far only calculated a lower bound\n  // for the product (man * e), where e is (10 ** exp10). The upper bound would\n  // add a further (man * 1) to the 128-bit product, which overflows the lower\n  // 64-bit limb if ((x_lo + man) < man).\n  //\n  // If overflow occurs, that adds 1 to x_hi. Since we're about to shift right\n  // by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit\n  // limb's low 9 bits are all on.\n  if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {\n    // Refine our calcula" +
+	"tion of (man * e). Before, our approximation of e used\n    // a \"low resolution\" 64-bit mantissa. Now use a \"high resolution\" 128-bit\n    // mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).\n    // Now calculate y = (man * bits_64_to_127_incl_of_e).\n    wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(\n        man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));\n    uint64_t y_hi = y.hi;\n    uint64_t y_lo = y.lo;\n\n    // Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to\n    // calculate the 192-bit product of the 64-bit man by the 128-bit e.\n    // As we exit this if-block, we only care about the high 128 bits\n    // (merged_hi and merged_lo) of that 192-bit product.\n    uint64_t merged_hi = x_hi;\n    uint64_t merged_lo = x_lo + y_hi;\n    if (merged_lo < x_lo) {\n      merged_hi++;  // Carry the overflow bit.\n    }\n\n    // The \"high resolution\" approximation of e is still a lower bound. Once\n    // again, see if the upper bound is large enough to produce a di" +
+	"fferent\n    // result. This time, if it does, give up instead of reaching for an even\n    // more precise approximation to e.\n    //\n    // This three-part check is similar to the two-part check that guarded the\n    // if block that we're now in, but it has an extra term for the middle 64\n    // bits (checking that adding 1 to merged_lo would overflow).\n    if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&\n        (y_lo + man < man)) {\n      return -1;\n    }\n\n    // Replace the 128-bit x with merged.\n    x_hi = merged_hi;\n    x_lo = merged_lo;\n  }\n\n  // As mentioned above, shifting x_hi right by either 9 or 10 bits will leave\n  // the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on. If the\n  // MSB (before shifting) was on, adjust ret_exp2 for the larger shift.\n  //\n  // Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit\n  // number.\n  uint64_t msb = x_hi >> 63;\n  uint64_t ret_mantissa = x_hi >> (msb + 9);\n  ret_exp2 -= 1 ^ msb;\n\n  // IEEE 754 rounds to-near" +
+	"est with ties rounded to-even. Rounding to-even can\n  // be tricky. If we're half-way between two exactly representable numbers\n  // (x's low 73 bits are zero and the next 2 bits that matter are \"01\"), give\n  // up instead of trying to pick the winner.\n  //\n  // Technically, we could tighten the condition by changing \"73\" to \"73 or 74,\n  // depending on msb\", but a flat \"73\" is simpler.\n  if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {\n    return -1;\n  }\n\n  // If we're not halfway then it's rounding to-nearest. Starting with a 54-bit\n  // number, carry the lowest bit (bit 0) up if it's on. Regardless of whether\n  // it was on or off, shifting right by one then produces a 53-bit number. If\n  // carrying up overflowed, shift again.\n  ret_mantissa += ret_mantissa & 1;\n  ret_mantissa >>= 1;\n  // This if block is equivalent to (but benchmarks slightly faster than) the\n  // following branchless form:\n  //    uint64_t overflow_adjustment = ret_mantissa >> 53;\n  //    ret_mantissa >>= overflo" +
+	"w_adjustment;\n  //    ret_exp2 += overflow_adjustment;\n  if ((ret_mantissa >> 53) > 0) {\n    ret_mantissa >>= 1;\n    ret_exp2++;\n  }\n\n  // Starting with a 53-bit number, IEEE 754 double-precision normal numbers\n  // have an implicit mantissa bit. Mask that away and keep the low 52 bits.\n  ret_mantissa &= 0x000FFFFFFFFFFFFF;\n\n  // Pack the bits and return.\n  return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));\n}\n\n" +
 	"" +
 	"// --------\n\nstatic wuffs_base__result_f64  //\nwuffs_base__private_implementation__parse_number_f64_special(\n    wuffs_base__slice_u8 s,\n    uint32_t options) {\n  do {\n    if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {\n      goto fail;\n    }\n\n    uint8_t* p = s.ptr;\n    uint8_t* q = s.ptr + s.len;\n\n    for (; (p < q) && (*p == '_'); p++) {\n    }\n    if (p >= q) {\n      goto fail;\n    }\n\n    // Parse sign.\n    bool negative = false;\n    do {\n      if (*p == '+') {\n        p++;\n      } else if (*p == '-') {\n        negative = true;\n        p++;\n      } else {\n        break;\n      }\n      for (; (p < q) && (*p == '_'); p++) {\n      }\n    } while (0);\n    if (p >= q) {\n      goto fail;\n    }\n\n    bool nan = false;\n    switch (p[0]) {\n      case 'I':\n      case 'i':\n        if (((q - p) < 3) ||                     //\n            ((p[1] != 'N') && (p[1] != 'n')) ||  //\n            ((p[2] != 'F') && (p[2] != 'f'))) {\n          goto fail;\n        }\n        p += 3;\n\n        if ((p >= q) || (*p == '_" +
 	"')) {\n          break;\n        } else if (((q - p) < 5) ||                     //\n                   ((p[0] != 'I') && (p[0] != 'i')) ||  //\n                   ((p[1] != 'N') && (p[1] != 'n')) ||  //\n                   ((p[2] != 'I') && (p[2] != 'i')) ||  //\n                   ((p[3] != 'T') && (p[3] != 't')) ||  //\n                   ((p[4] != 'Y') && (p[4] != 'y'))) {\n          goto fail;\n        }\n        p += 5;\n\n        if ((p >= q) || (*p == '_')) {\n          break;\n        }\n        goto fail;\n\n      case 'N':\n      case 'n':\n        if (((q - p) < 3) ||                     //\n            ((p[1] != 'A') && (p[1] != 'a')) ||  //\n            ((p[2] != 'N') && (p[2] != 'n'))) {\n          goto fail;\n        }\n        p += 3;\n\n        if ((p >= q) || (*p == '_')) {\n          nan = true;\n          break;\n        }\n        goto fail;\n\n      default:\n        goto fail;\n    }\n\n    // Finish.\n    for (; (p < q) && (*p == '_'); p++) {\n    }\n    if (p != q) {\n      goto fail;\n    }\n    wuffs_base__result_f64 ret;\n" +

diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 855abda..e381a28 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c

@@ -11612,8 +11612,21 @@
   man <<= clz;
 
   // Calculate the return value's base-2 exponent. We might tweak it by ±1
-  // later, but its initial value comes from the look-up table and clz.
-  uint64_t ret_exp2 = ((uint64_t)po10[4]) - ((uint64_t)clz);
+  // later, but its initial value comes from a linear scaling of exp10,
+  // converting from power-of-10 to power-of-2, and adjusting by clz.
+  //
+  // The magic constants are:
+  //  - 1087 = 1023 + 64. The 1023 is the f64 exponent bias. The 64 is because
+  //    the look-up table uses 64-bit mantissas.
+  //  - 217706 is such that the ratio 217706 / 65536 ≈ 3.321930 is close enough
+  //    (over the practical range of exp10) to log(10) / log(2) ≈ 3.321928.
+  //  - 65536 = 1<<16 is arbitrary but a power of 2, so division is a shift.
+  //
+  // Equality of the linearly-scaled value and the actual power-of-2, over the
+  // range of exp10 arguments that this function accepts, is confirmed by
+  // script/print-mpb-powers-of-10.go
+  uint64_t ret_exp2 =
+      ((uint64_t)(((217706 * exp10) >> 16) + 1087)) - ((uint64_t)clz);
 
   // Multiply the two mantissas. Normalization means that both mantissas are at
   // least (1<<63), so the 128-bit product must be at least (1<<126). The high

diff --git a/script/print-mpb-powers-of-10.go b/script/print-mpb-powers-of-10.go
index 82ec20e..160567a 100644
--- a/script/print-mpb-powers-of-10.go
+++ b/script/print-mpb-powers-of-10.go

@@ -128,6 +128,14 @@
 		return fmt.Errorf("invalid hexadecimal representation %q", hex)
 	}
 
+	// Confirm that the linear approximation to the biased-value-of-n is
+	// correct for this particular value of e.
+	approxN := uint32(((217706 * e) >> 16) + 1087)
+	biasedN := bias + uint32(n)
+	if approxN != biasedN {
+		return fmt.Errorf("biased-n approximation: have %d, want %d", approxN, biasedN)
+	}
+
 	fmt.Printf("    0x%s, 0x%s, 0x%s, 0x%s, 0x%04X,  // 1e%-04d",
 		hex[24:], hex[16:24], hex[8:16], hex[:8], uint32(n)+bias, e)
 	if *detail {
commit	b6d855210c872092b8c3119715ee9712fdeb44d0	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Wed Sep 23 15:21:47 2020 +1000
committer	Nigel Tao <nigeltao@golang.org>	Wed Sep 23 15:23:12 2020 +1000
tree	b8ae972bdf6bb4270c5cead8e29b93673af836d9
parent	cd4cbc9102e7c58ecd6561d8c2d2055f76de4141 [diff]