Let wuffs_base__multiply_u64 use __uint128_t
diff --git a/internal/cgen/base/fundamental-public.h b/internal/cgen/base/fundamental-public.h
index c7f9c3b..0fc4680 100644
--- a/internal/cgen/base/fundamental-public.h
+++ b/internal/cgen/base/fundamental-public.h
@@ -447,6 +447,13 @@
 // The maximum inclusive output hi_lo is 0xFFFFFFFFFFFFFFFE_0000000000000001.
 static inline wuffs_base__multiply_u64__output  //
 wuffs_base__multiply_u64(uint64_t x, uint64_t y) {
+#if defined(__SIZEOF_INT128__)
+  __uint128_t z = ((__uint128_t)x) * ((__uint128_t)y);
+  wuffs_base__multiply_u64__output o;
+  o.lo = ((uint64_t)(z));
+  o.hi = ((uint64_t)(z >> 64));
+  return o;
+#else
   uint64_t x0 = x & 0xFFFFFFFF;
   uint64_t x1 = x >> 32;
   uint64_t y0 = y & 0xFFFFFFFF;
@@ -460,6 +467,7 @@
   o.lo = x * y;
   o.hi = (x1 * y1) + w2 + (w1 >> 32);
   return o;
+#endif
 }
 
 // --------
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 6884997..55e342a 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -77,7 +77,7 @@
 	"// --------\n\n// Saturating arithmetic (sat_add, sat_sub) branchless bit-twiddling algorithms\n// are per https://locklessinc.com/articles/sat_arithmetic/\n//\n// It is important that the underlying types are unsigned integers, as signed\n// integer arithmetic overflow is undefined behavior in C.\n\nstatic inline uint8_t  //\nwuffs_base__u8__sat_add(uint8_t x, uint8_t y) {\n  uint8_t res = (uint8_t)(x + y);\n  res |= (uint8_t)(-(res < x));\n  return res;\n}\n\nstatic inline uint8_t  //\nwuffs_base__u8__sat_sub(uint8_t x, uint8_t y) {\n  uint8_t res = (uint8_t)(x - y);\n  res &= (uint8_t)(-(res <= x));\n  return res;\n}\n\nstatic inline uint16_t  //\nwuffs_base__u16__sat_add(uint16_t x, uint16_t y) {\n  uint16_t res = (uint16_t)(x + y);\n  res |= (uint16_t)(-(res < x));\n  return res;\n}\n\nstatic inline uint16_t  //\nwuffs_base__u16__sat_sub(uint16_t x, uint16_t y) {\n  uint16_t res = (uint16_t)(x - y);\n  res &= (uint16_t)(-(res <= x));\n  return res;\n}\n\nstatic inline uint32_t  //\nwuffs_base__u32__sat_add(uint32_t x, uint32_t y) {\n  uint32" +
 	"_t res = (uint32_t)(x + y);\n  res |= (uint32_t)(-(res < x));\n  return res;\n}\n\nstatic inline uint32_t  //\nwuffs_base__u32__sat_sub(uint32_t x, uint32_t y) {\n  uint32_t res = (uint32_t)(x - y);\n  res &= (uint32_t)(-(res <= x));\n  return res;\n}\n\nstatic inline uint64_t  //\nwuffs_base__u64__sat_add(uint64_t x, uint64_t y) {\n  uint64_t res = (uint64_t)(x + y);\n  res |= (uint64_t)(-(res < x));\n  return res;\n}\n\nstatic inline uint64_t  //\nwuffs_base__u64__sat_sub(uint64_t x, uint64_t y) {\n  uint64_t res = (uint64_t)(x - y);\n  res &= (uint64_t)(-(res <= x));\n  return res;\n}\n\n" +
 	"" +
-	"// --------\n\ntypedef struct {\n  uint64_t lo;\n  uint64_t hi;\n} wuffs_base__multiply_u64__output;\n\n// wuffs_base__multiply_u64 returns x*y as a 128-bit value.\n//\n// The maximum inclusive output hi_lo is 0xFFFFFFFFFFFFFFFE_0000000000000001.\nstatic inline wuffs_base__multiply_u64__output  //\nwuffs_base__multiply_u64(uint64_t x, uint64_t y) {\n  uint64_t x0 = x & 0xFFFFFFFF;\n  uint64_t x1 = x >> 32;\n  uint64_t y0 = y & 0xFFFFFFFF;\n  uint64_t y1 = y >> 32;\n  uint64_t w0 = x0 * y0;\n  uint64_t t = (x1 * y0) + (w0 >> 32);\n  uint64_t w1 = t & 0xFFFFFFFF;\n  uint64_t w2 = t >> 32;\n  w1 += x0 * y1;\n  wuffs_base__multiply_u64__output o;\n  o.lo = x * y;\n  o.hi = (x1 * y1) + w2 + (w1 >> 32);\n  return o;\n}\n\n" +
+	"// --------\n\ntypedef struct {\n  uint64_t lo;\n  uint64_t hi;\n} wuffs_base__multiply_u64__output;\n\n// wuffs_base__multiply_u64 returns x*y as a 128-bit value.\n//\n// The maximum inclusive output hi_lo is 0xFFFFFFFFFFFFFFFE_0000000000000001.\nstatic inline wuffs_base__multiply_u64__output  //\nwuffs_base__multiply_u64(uint64_t x, uint64_t y) {\n#if defined(__SIZEOF_INT128__)\n  __uint128_t z = ((__uint128_t)x) * ((__uint128_t)y);\n  wuffs_base__multiply_u64__output o;\n  o.lo = ((uint64_t)(z));\n  o.hi = ((uint64_t)(z >> 64));\n  return o;\n#else\n  uint64_t x0 = x & 0xFFFFFFFF;\n  uint64_t x1 = x >> 32;\n  uint64_t y0 = y & 0xFFFFFFFF;\n  uint64_t y1 = y >> 32;\n  uint64_t w0 = x0 * y0;\n  uint64_t t = (x1 * y0) + (w0 >> 32);\n  uint64_t w1 = t & 0xFFFFFFFF;\n  uint64_t w2 = t >> 32;\n  w1 += x0 * y1;\n  wuffs_base__multiply_u64__output o;\n  o.lo = x * y;\n  o.hi = (x1 * y1) + w2 + (w1 >> 32);\n  return o;\n#endif\n}\n\n" +
 	"" +
 	"// --------\n\n#if defined(__GNUC__) && (__SIZEOF_LONG__ == 8)\n\nstatic inline uint32_t  //\nwuffs_base__count_leading_zeroes_u64(uint64_t u) {\n  return u ? ((uint32_t)(__builtin_clzl(u))) : 64u;\n}\n\n#else\n\nstatic inline uint32_t  //\nwuffs_base__count_leading_zeroes_u64(uint64_t u) {\n  if (u == 0) {\n    return 64;\n  }\n\n  uint32_t n = 0;\n  if ((u >> 32) == 0) {\n    n |= 32;\n    u <<= 32;\n  }\n  if ((u >> 48) == 0) {\n    n |= 16;\n    u <<= 16;\n  }\n  if ((u >> 56) == 0) {\n    n |= 8;\n    u <<= 8;\n  }\n  if ((u >> 60) == 0) {\n    n |= 4;\n    u <<= 4;\n  }\n  if ((u >> 62) == 0) {\n    n |= 2;\n    u <<= 2;\n  }\n  if ((u >> 63) == 0) {\n    n |= 1;\n    u <<= 1;\n  }\n  return n;\n}\n\n#endif  // defined(__GNUC__) && (__SIZEOF_LONG__ == 8)\n\n" +
 	"" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index a634200..94ac014 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -521,6 +521,13 @@
 // The maximum inclusive output hi_lo is 0xFFFFFFFFFFFFFFFE_0000000000000001.
 static inline wuffs_base__multiply_u64__output  //
 wuffs_base__multiply_u64(uint64_t x, uint64_t y) {
+#if defined(__SIZEOF_INT128__)
+  __uint128_t z = ((__uint128_t)x) * ((__uint128_t)y);
+  wuffs_base__multiply_u64__output o;
+  o.lo = ((uint64_t)(z));
+  o.hi = ((uint64_t)(z >> 64));
+  return o;
+#else
   uint64_t x0 = x & 0xFFFFFFFF;
   uint64_t x1 = x >> 32;
   uint64_t y0 = y & 0xFFFFFFFF;
@@ -534,6 +541,7 @@
   o.lo = x * y;
   o.hi = (x1 * y1) + w2 + (w1 >> 32);
   return o;
+#endif
 }
 
 // --------