Optimize high_prec_dec__round_just_enough

name                                                               old time/op  new time/op  delta
wuffs_strconv_render_number_f64_just_enough_fractions/clang5       56.4µs ± 1%  56.3µs ± 1%     ~     (p=1.000 n=5+5)
wuffs_strconv_render_number_f64_just_enough_small_integers/clang5  4.12µs ± 2%  1.00µs ± 1%  -75.65%  (p=0.008 n=5+5)
wuffs_strconv_render_number_f64_just_enough_fractions/gcc7         54.3µs ± 0%  54.2µs ± 0%     ~     (p=0.151 n=5+5)
wuffs_strconv_render_number_f64_just_enough_small_integers/gcc7    4.16µs ± 0%  1.01µs ± 0%  -75.77%  (p=0.000 n=4+5)
diff --git a/internal/cgen/base/f64conv-submodule.c b/internal/cgen/base/f64conv-submodule.c
index 5e48340..e82ccba 100644
--- a/internal/cgen/base/f64conv-submodule.c
+++ b/internal/cgen/base/f64conv-submodule.c
@@ -727,8 +727,9 @@
   // Let f be the floating point number represented by exp2 and mantissa (and
   // also the number in h): the number (mantissa * (2 ** (exp2 - 52))).
   //
-  // If f is zero, we can return early.
-  if (mantissa == 0) {
+  // If f is zero or a small integer, we can return early.
+  if ((mantissa == 0) ||
+      ((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {
     return;
   }
 
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 14335a7..b70c85d 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -59,13 +59,13 @@
 	"" +
 	"// --------\n\n// wuffs_base__private_implementation__high_prec_dec__round_etc rounds h's\n// number. For those functions that take an n argument, rounding produces at\n// most n digits (which is not necessarily at most n decimal places). Negative\n// n values are ignored, as well as any n greater than or equal to h's number\n// of digits. The etc__round_just_enough function implicitly chooses an n to\n// implement WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION.\n//\n// Preconditions:\n//  - h is non-NULL.\n//  - h->decimal_point is \"not extreme\".\n//\n// \"Not extreme\" means within\n// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.\n\nstatic void  //\nwuffs_base__private_implementation__high_prec_dec__round_down(\n    wuffs_base__private_implementation__high_prec_dec* h,\n    int32_t n) {\n  if ((n < 0) || (h->num_digits <= (uint32_t)n)) {\n    return;\n  }\n  h->num_digits = (uint32_t)(n);\n  wuffs_base__private_implementation__high_prec_dec__trim(h);\n}\n\nstatic void  //\nwuffs_base__private_implementation__hi" +
 	"gh_prec_dec__round_up(\n    wuffs_base__private_implementation__high_prec_dec* h,\n    int32_t n) {\n  if ((n < 0) || (h->num_digits <= (uint32_t)n)) {\n    return;\n  }\n\n  for (n--; n >= 0; n--) {\n    if (h->digits[n] < 9) {\n      h->digits[n]++;\n      h->num_digits = (uint32_t)(n + 1);\n      return;\n    }\n  }\n\n  // The number is all 9s. Change to a single 1 and adjust the decimal point.\n  h->digits[0] = 1;\n  h->num_digits = 1;\n  h->decimal_point++;\n}\n\nstatic void  //\nwuffs_base__private_implementation__high_prec_dec__round_nearest(\n    wuffs_base__private_implementation__high_prec_dec* h,\n    int32_t n) {\n  if ((n < 0) || (h->num_digits <= (uint32_t)n)) {\n    return;\n  }\n  bool up = h->digits[n] >= 5;\n  if ((h->digits[n] == 5) && ((n + 1) == ((int32_t)(h->num_digits)))) {\n    up = h->truncated ||  //\n         ((n > 0) && ((h->digits[n - 1] & 1) != 0));\n  }\n\n  if (up) {\n    wuffs_base__private_implementation__high_prec_dec__round_up(h, n);\n  } else {\n    wuffs_base__private_implementation__high_prec_dec__round_do" +
-	"wn(h, n);\n  }\n}\n\nstatic void  //\nwuffs_base__private_implementation__high_prec_dec__round_just_enough(\n    wuffs_base__private_implementation__high_prec_dec* h,\n    int32_t exp2,\n    uint64_t mantissa) {\n  // The magic numbers 52 and 53 in this function are because IEEE 754 double\n  // precision has 52 mantissa bits.\n  //\n  // Let f be the floating point number represented by exp2 and mantissa (and\n  // also the number in h): the number (mantissa * (2 ** (exp2 - 52))).\n  //\n  // If f is zero, we can return early.\n  if (mantissa == 0) {\n    return;\n  }\n\n  // The smallest normal f has an exp2 of -1022 and a mantissa of (1 << 52).\n  // Subnormal numbers have the same exp2 but a smaller mantissa.\n  static const int32_t min_incl_normal_exp2 = -1022;\n  static const uint64_t min_incl_normal_mantissa = 0x0010000000000000ul;\n\n  // Compute lower and upper bounds such that any number between them (possibly\n  // inclusive) will round to f. First, the lower bound. Our number f is:\n  //   ((mantissa + 0)         * (2 ** ( " +
-	" exp2 - 52)))\n  //\n  // The next lowest floating point number is:\n  //   ((mantissa - 1)         * (2 ** (  exp2 - 52)))\n  // unless (mantissa - 1) drops the (1 << 52) bit and exp2 is not the\n  // min_incl_normal_exp2. Either way, call it:\n  //   ((l_mantissa)           * (2 ** (l_exp2 - 52)))\n  //\n  // The lower bound is halfway between them (noting that 52 became 53):\n  //   (((2 * l_mantissa) + 1) * (2 ** (l_exp2 - 53)))\n  int32_t l_exp2 = exp2;\n  uint64_t l_mantissa = mantissa - 1;\n  if ((exp2 > min_incl_normal_exp2) && (mantissa <= min_incl_normal_mantissa)) {\n    l_exp2 = exp2 - 1;\n    l_mantissa = (2 * mantissa) - 1;\n  }\n  wuffs_base__private_implementation__high_prec_dec lower;\n  wuffs_base__private_implementation__high_prec_dec__assign(\n      &lower, (2 * l_mantissa) + 1, false);\n  wuffs_base__private_implementation__high_prec_dec__lshift(&lower,\n                                                            l_exp2 - 53);\n\n  // Next, the upper bound. Our number f is:\n  //   ((mantissa + 0)       * (2 **" +
-	" (exp2 - 52)))\n  //\n  // The next highest floating point number is:\n  //   ((mantissa + 1)       * (2 ** (exp2 - 52)))\n  //\n  // The upper bound is halfway between them (noting that 52 became 53):\n  //   (((2 * mantissa) + 1) * (2 ** (exp2 - 53)))\n  wuffs_base__private_implementation__high_prec_dec upper;\n  wuffs_base__private_implementation__high_prec_dec__assign(\n      &upper, (2 * mantissa) + 1, false);\n  wuffs_base__private_implementation__high_prec_dec__lshift(&upper, exp2 - 53);\n\n  // The lower and upper bounds are possible outputs only if the original\n  // mantissa is even, so that IEEE round-to-even would round to the original\n  // mantissa and not its neighbors.\n  bool inclusive = (mantissa & 1) == 0;\n\n  // As we walk the digits, we want to know whether rounding up would fall\n  // within the upper bound. This is tracked by upper_delta:\n  //  - When -1, the digits of h and upper are the same so far.\n  //  - When +0, we saw a difference of 1 between h and upper on a previous\n  //    digit and subsequen" +
-	"tly only 9s for h and 0s for upper. Thus, rounding\n  //    up may fall outside of the bound if !inclusive.\n  //  - When +1, the difference is greater than 1 and we know that rounding up\n  //    falls within the bound.\n  //\n  // This is a state machine with three states. The numerical value for each\n  // state (-1, +0 or +1) isn't important, other than their order.\n  int upper_delta = -1;\n\n  // We can now figure out the shortest number of digits required. Walk the\n  // digits until h has distinguished itself from lower or upper.\n  //\n  // The zi and zd variables are indexes and digits, for z in l (lower), h (the\n  // number) and u (upper).\n  //\n  // The lower, h and upper numbers may have their decimal points at different\n  // places. In this case, upper is the longest, so we iterate ui starting from\n  // 0 and iterate li and hi starting from either 0 or -1.\n  int32_t ui = 0;\n  for (;; ui++) {\n    // Calculate hd, the middle number's digit.\n    int32_t hi = ui - upper.decimal_point + h->decimal_point;\n    if (" +
-	"hi >= ((int32_t)(h->num_digits))) {\n      break;\n    }\n    uint8_t hd = (((uint32_t)hi) < h->num_digits) ? h->digits[hi] : 0;\n\n    // Calculate ld, the lower bound's digit.\n    int32_t li = ui - upper.decimal_point + lower.decimal_point;\n    uint8_t ld = (((uint32_t)li) < lower.num_digits) ? lower.digits[li] : 0;\n\n    // We can round down (truncate) if lower has a different digit than h or if\n    // lower is inclusive and is exactly the result of rounding down (i.e. we\n    // have reached the final digit of lower).\n    bool can_round_down =\n        (ld != hd) ||  //\n        (inclusive && ((li + 1) == ((int32_t)(lower.num_digits))));\n\n    // Calculate ud, the upper bound's digit, and update upper_delta.\n    uint8_t ud = (((uint32_t)ui) < upper.num_digits) ? upper.digits[ui] : 0;\n    if (upper_delta < 0) {\n      if ((hd + 1) < ud) {\n        // For example:\n        // h     = 12345???\n        // upper = 12347???\n        upper_delta = +1;\n      } else if (hd != ud) {\n        // For example:\n        // h     = 123" +
-	"45???\n        // upper = 12346???\n        upper_delta = +0;\n      }\n    } else if (upper_delta == 0) {\n      if ((hd != 9) || (ud != 0)) {\n        // For example:\n        // h     = 1234598?\n        // upper = 1234600?\n        upper_delta = +1;\n      }\n    }\n\n    // We can round up if upper has a different digit than h and either upper\n    // is inclusive or upper is bigger than the result of rounding up.\n    bool can_round_up =\n        (upper_delta > 0) ||    //\n        ((upper_delta == 0) &&  //\n         (inclusive || ((ui + 1) < ((int32_t)(upper.num_digits)))));\n\n    // If we can round either way, round to nearest. If we can round only one\n    // way, do it. If we can't round, continue the loop.\n    if (can_round_down) {\n      if (can_round_up) {\n        wuffs_base__private_implementation__high_prec_dec__round_nearest(\n            h, hi + 1);\n        return;\n      } else {\n        wuffs_base__private_implementation__high_prec_dec__round_down(h,\n                                                              " +
-	"        hi + 1);\n        return;\n      }\n    } else {\n      if (can_round_up) {\n        wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);\n        return;\n      }\n    }\n  }\n}\n\n" +
+	"wn(h, n);\n  }\n}\n\nstatic void  //\nwuffs_base__private_implementation__high_prec_dec__round_just_enough(\n    wuffs_base__private_implementation__high_prec_dec* h,\n    int32_t exp2,\n    uint64_t mantissa) {\n  // The magic numbers 52 and 53 in this function are because IEEE 754 double\n  // precision has 52 mantissa bits.\n  //\n  // Let f be the floating point number represented by exp2 and mantissa (and\n  // also the number in h): the number (mantissa * (2 ** (exp2 - 52))).\n  //\n  // If f is zero or a small integer, we can return early.\n  if ((mantissa == 0) ||\n      ((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {\n    return;\n  }\n\n  // The smallest normal f has an exp2 of -1022 and a mantissa of (1 << 52).\n  // Subnormal numbers have the same exp2 but a smaller mantissa.\n  static const int32_t min_incl_normal_exp2 = -1022;\n  static const uint64_t min_incl_normal_mantissa = 0x0010000000000000ul;\n\n  // Compute lower and upper bounds such that any number between them (possibly\n  // inclusive) wil" +
+	"l round to f. First, the lower bound. Our number f is:\n  //   ((mantissa + 0)         * (2 ** (  exp2 - 52)))\n  //\n  // The next lowest floating point number is:\n  //   ((mantissa - 1)         * (2 ** (  exp2 - 52)))\n  // unless (mantissa - 1) drops the (1 << 52) bit and exp2 is not the\n  // min_incl_normal_exp2. Either way, call it:\n  //   ((l_mantissa)           * (2 ** (l_exp2 - 52)))\n  //\n  // The lower bound is halfway between them (noting that 52 became 53):\n  //   (((2 * l_mantissa) + 1) * (2 ** (l_exp2 - 53)))\n  int32_t l_exp2 = exp2;\n  uint64_t l_mantissa = mantissa - 1;\n  if ((exp2 > min_incl_normal_exp2) && (mantissa <= min_incl_normal_mantissa)) {\n    l_exp2 = exp2 - 1;\n    l_mantissa = (2 * mantissa) - 1;\n  }\n  wuffs_base__private_implementation__high_prec_dec lower;\n  wuffs_base__private_implementation__high_prec_dec__assign(\n      &lower, (2 * l_mantissa) + 1, false);\n  wuffs_base__private_implementation__high_prec_dec__lshift(&lower,\n                                                            " +
+	"l_exp2 - 53);\n\n  // Next, the upper bound. Our number f is:\n  //   ((mantissa + 0)       * (2 ** (exp2 - 52)))\n  //\n  // The next highest floating point number is:\n  //   ((mantissa + 1)       * (2 ** (exp2 - 52)))\n  //\n  // The upper bound is halfway between them (noting that 52 became 53):\n  //   (((2 * mantissa) + 1) * (2 ** (exp2 - 53)))\n  wuffs_base__private_implementation__high_prec_dec upper;\n  wuffs_base__private_implementation__high_prec_dec__assign(\n      &upper, (2 * mantissa) + 1, false);\n  wuffs_base__private_implementation__high_prec_dec__lshift(&upper, exp2 - 53);\n\n  // The lower and upper bounds are possible outputs only if the original\n  // mantissa is even, so that IEEE round-to-even would round to the original\n  // mantissa and not its neighbors.\n  bool inclusive = (mantissa & 1) == 0;\n\n  // As we walk the digits, we want to know whether rounding up would fall\n  // within the upper bound. This is tracked by upper_delta:\n  //  - When -1, the digits of h and upper are the same so far.\n  //  -" +
+	" When +0, we saw a difference of 1 between h and upper on a previous\n  //    digit and subsequently only 9s for h and 0s for upper. Thus, rounding\n  //    up may fall outside of the bound if !inclusive.\n  //  - When +1, the difference is greater than 1 and we know that rounding up\n  //    falls within the bound.\n  //\n  // This is a state machine with three states. The numerical value for each\n  // state (-1, +0 or +1) isn't important, other than their order.\n  int upper_delta = -1;\n\n  // We can now figure out the shortest number of digits required. Walk the\n  // digits until h has distinguished itself from lower or upper.\n  //\n  // The zi and zd variables are indexes and digits, for z in l (lower), h (the\n  // number) and u (upper).\n  //\n  // The lower, h and upper numbers may have their decimal points at different\n  // places. In this case, upper is the longest, so we iterate ui starting from\n  // 0 and iterate li and hi starting from either 0 or -1.\n  int32_t ui = 0;\n  for (;; ui++) {\n    // Calculate hd, t" +
+	"he middle number's digit.\n    int32_t hi = ui - upper.decimal_point + h->decimal_point;\n    if (hi >= ((int32_t)(h->num_digits))) {\n      break;\n    }\n    uint8_t hd = (((uint32_t)hi) < h->num_digits) ? h->digits[hi] : 0;\n\n    // Calculate ld, the lower bound's digit.\n    int32_t li = ui - upper.decimal_point + lower.decimal_point;\n    uint8_t ld = (((uint32_t)li) < lower.num_digits) ? lower.digits[li] : 0;\n\n    // We can round down (truncate) if lower has a different digit than h or if\n    // lower is inclusive and is exactly the result of rounding down (i.e. we\n    // have reached the final digit of lower).\n    bool can_round_down =\n        (ld != hd) ||  //\n        (inclusive && ((li + 1) == ((int32_t)(lower.num_digits))));\n\n    // Calculate ud, the upper bound's digit, and update upper_delta.\n    uint8_t ud = (((uint32_t)ui) < upper.num_digits) ? upper.digits[ui] : 0;\n    if (upper_delta < 0) {\n      if ((hd + 1) < ud) {\n        // For example:\n        // h     = 12345???\n        // upper = 12347???\n     " +
+	"   upper_delta = +1;\n      } else if (hd != ud) {\n        // For example:\n        // h     = 12345???\n        // upper = 12346???\n        upper_delta = +0;\n      }\n    } else if (upper_delta == 0) {\n      if ((hd != 9) || (ud != 0)) {\n        // For example:\n        // h     = 1234598?\n        // upper = 1234600?\n        upper_delta = +1;\n      }\n    }\n\n    // We can round up if upper has a different digit than h and either upper\n    // is inclusive or upper is bigger than the result of rounding up.\n    bool can_round_up =\n        (upper_delta > 0) ||    //\n        ((upper_delta == 0) &&  //\n         (inclusive || ((ui + 1) < ((int32_t)(upper.num_digits)))));\n\n    // If we can round either way, round to nearest. If we can round only one\n    // way, do it. If we can't round, continue the loop.\n    if (can_round_down) {\n      if (can_round_up) {\n        wuffs_base__private_implementation__high_prec_dec__round_nearest(\n            h, hi + 1);\n        return;\n      } else {\n        wuffs_base__private_implementat" +
+	"ion__high_prec_dec__round_down(h,\n                                                                      hi + 1);\n        return;\n      }\n    } else {\n      if (can_round_up) {\n        wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);\n        return;\n      }\n    }\n  }\n}\n\n" +
 	"" +
 	"// --------\n\n// The wuffs_base__private_implementation__etc_powers_of_10 tables were printed\n// by script/print-mpb-powers-of-10.go. That script has an optional -comments\n// flag, whose output is not copied here, which prints further detail.\n//\n// These tables are used in\n// wuffs_base__private_implementation__medium_prec_bin__assign_from_hpd.\n\n// wuffs_base__private_implementation__big_powers_of_10 contains approximations\n// to the powers of 10, ranging from 1e-348 to 1e+340, with the exponent\n// stepping by 8: -348, -340, -332, ..., -12, -4, +4, +12, ..., +340. Each step\n// consists of three uint32_t elements. There are 87 triples, 87 * 3 = 261.\n//\n// For example, the third approximation, for 1e-332, consists of the uint32_t\n// triple (0x3055AC76, 0x8B16FB20, 0xFFFFFB72). The first two of that triple\n// are a little-endian uint64_t value: 0x8B16FB203055AC76. The last one is an\n// int32_t value: -1166. Together, they represent the approximation:\n//   1e-332 ≈ 0x8B16FB203055AC76 * (2 ** -1166)\n// Similarly," +
 	" the (0x00000000, 0x9C400000, 0xFFFFFFCE) uint32_t triple means:\n//   1e+4   ≈ 0x9C40000000000000 * (2 **   -50)  // This approx'n is exact.\n// Similarly, the (0xD4C4FB27, 0xED63A231, 0x000000A2) uint32_t triple means:\n//   1e+68  ≈ 0xED63A231D4C4FB27 * (2 **   162)\nstatic const uint32_t\n    wuffs_base__private_implementation__big_powers_of_10[261] = {\n        0x081C0288, 0xFA8FD5A0, 0xFFFFFB3C, 0xA23EBF76, 0xBAAEE17F, 0xFFFFFB57,\n        0x3055AC76, 0x8B16FB20, 0xFFFFFB72, 0x5DCE35EA, 0xCF42894A, 0xFFFFFB8C,\n        0x55653B2D, 0x9A6BB0AA, 0xFFFFFBA7, 0x3D1A45DF, 0xE61ACF03, 0xFFFFFBC1,\n        0xC79AC6CA, 0xAB70FE17, 0xFFFFFBDC, 0xBEBCDC4F, 0xFF77B1FC, 0xFFFFFBF6,\n        0x416BD60C, 0xBE5691EF, 0xFFFFFC11, 0x907FFC3C, 0x8DD01FAD, 0xFFFFFC2C,\n        0x31559A83, 0xD3515C28, 0xFFFFFC46, 0xADA6C9B5, 0x9D71AC8F, 0xFFFFFC61,\n        0x23EE8BCB, 0xEA9C2277, 0xFFFFFC7B, 0x4078536D, 0xAECC4991, 0xFFFFFC96,\n        0x5DB6CE57, 0x823C1279, 0xFFFFFCB1, 0x4DFB5637, 0xC2109436, 0xFFFFFCCB,\n        0x3848984F, 0x909" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 9032742..24a88d0 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -9673,8 +9673,9 @@
   // Let f be the floating point number represented by exp2 and mantissa (and
   // also the number in h): the number (mantissa * (2 ** (exp2 - 52))).
   //
-  // If f is zero, we can return early.
-  if (mantissa == 0) {
+  // If f is zero or a small integer, we can return early.
+  if ((mantissa == 0) ||
+      ((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {
     return;
   }
 
diff --git a/script/print-render-number-f64-tests.go b/script/print-render-number-f64-tests.go
index b4aec2b..df0a548 100644
--- a/script/print-render-number-f64-tests.go
+++ b/script/print-render-number-f64-tests.go
@@ -160,6 +160,8 @@
 	0x4330000000000000,
 	0x4330000000000001,
 	0x4330000000000002,
+	0x433FFFFFFFFFFFFE,
+	0x433FFFFFFFFFFFFF,
 	0x4340000000000000,
 	0x4340000000000001,
 	0x4340000000000002,
diff --git a/test/c/std/json.c b/test/c/std/json.c
index 5924dd4..56740ae 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c
@@ -659,6 +659,8 @@
       {.want = 0x4330000000000001, .str = "4503599627370497"},
       {.want = 0x4330000000000002, .str = "4503599627370497.5"},
       {.want = 0x4330000000000002, .str = "4503599627370498"},
+      {.want = 0x433FFFFFFFFFFFFE, .str = "9007199254740990"},
+      {.want = 0x433FFFFFFFFFFFFF, .str = "9.007199254740991e+15"},
       {.want = 0x4340000000000000, .str = "9007199254740992"},  // 1 << 53.
       {.want = 0x4340000000000000, .str = "9007199254740993"},
       {.want = 0x4340000000000001, .str = "9007199254740994"},
@@ -1275,6 +1277,24 @@
           .want_4g = "4.504e+15",
       },
       {
+          .x = 0x433FFFFFFFFFFFFE,
+          .want__e = "9.00719925474099e+15",
+          .want__f = "9007199254740990",
+          .want_0g = "9e+15",
+          .want_2e = "9.01e+15",
+          .want_3f = "9007199254740990.000",
+          .want_4g = "9.007e+15",
+      },
+      {
+          .x = 0x433FFFFFFFFFFFFF,
+          .want__e = "9.007199254740991e+15",
+          .want__f = "9007199254740991",
+          .want_0g = "9e+15",
+          .want_2e = "9.01e+15",
+          .want_3f = "9007199254740991.000",
+          .want_4g = "9.007e+15",
+      },
+      {
           .x = 0x4340000000000000,
           .want__e = "9.007199254740992e+15",
           .want__f = "9007199254740992",