Put multiply_u64__output in little-endian order
diff --git a/internal/cgen/base/fundamental-public.h b/internal/cgen/base/fundamental-public.h
index 744f1cd..c7f9c3b 100644
--- a/internal/cgen/base/fundamental-public.h
+++ b/internal/cgen/base/fundamental-public.h
@@ -438,8 +438,8 @@
// --------
typedef struct {
- uint64_t hi;
uint64_t lo;
+ uint64_t hi;
} wuffs_base__multiply_u64__output;
// wuffs_base__multiply_u64 returns x*y as a 128-bit value.
@@ -457,8 +457,8 @@
uint64_t w2 = t >> 32;
w1 += x0 * y1;
wuffs_base__multiply_u64__output o;
- o.hi = (x1 * y1) + w2 + (w1 >> 32);
o.lo = x * y;
+ o.hi = (x1 * y1) + w2 + (w1 >> 32);
return o;
}
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index b64c4a3..6884997 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -77,7 +77,7 @@
"// --------\n\n// Saturating arithmetic (sat_add, sat_sub) branchless bit-twiddling algorithms\n// are per https://locklessinc.com/articles/sat_arithmetic/\n//\n// It is important that the underlying types are unsigned integers, as signed\n// integer arithmetic overflow is undefined behavior in C.\n\nstatic inline uint8_t //\nwuffs_base__u8__sat_add(uint8_t x, uint8_t y) {\n uint8_t res = (uint8_t)(x + y);\n res |= (uint8_t)(-(res < x));\n return res;\n}\n\nstatic inline uint8_t //\nwuffs_base__u8__sat_sub(uint8_t x, uint8_t y) {\n uint8_t res = (uint8_t)(x - y);\n res &= (uint8_t)(-(res <= x));\n return res;\n}\n\nstatic inline uint16_t //\nwuffs_base__u16__sat_add(uint16_t x, uint16_t y) {\n uint16_t res = (uint16_t)(x + y);\n res |= (uint16_t)(-(res < x));\n return res;\n}\n\nstatic inline uint16_t //\nwuffs_base__u16__sat_sub(uint16_t x, uint16_t y) {\n uint16_t res = (uint16_t)(x - y);\n res &= (uint16_t)(-(res <= x));\n return res;\n}\n\nstatic inline uint32_t //\nwuffs_base__u32__sat_add(uint32_t x, uint32_t y) {\n uint32" +
"_t res = (uint32_t)(x + y);\n res |= (uint32_t)(-(res < x));\n return res;\n}\n\nstatic inline uint32_t //\nwuffs_base__u32__sat_sub(uint32_t x, uint32_t y) {\n uint32_t res = (uint32_t)(x - y);\n res &= (uint32_t)(-(res <= x));\n return res;\n}\n\nstatic inline uint64_t //\nwuffs_base__u64__sat_add(uint64_t x, uint64_t y) {\n uint64_t res = (uint64_t)(x + y);\n res |= (uint64_t)(-(res < x));\n return res;\n}\n\nstatic inline uint64_t //\nwuffs_base__u64__sat_sub(uint64_t x, uint64_t y) {\n uint64_t res = (uint64_t)(x - y);\n res &= (uint64_t)(-(res <= x));\n return res;\n}\n\n" +
"" +
- "// --------\n\ntypedef struct {\n uint64_t hi;\n uint64_t lo;\n} wuffs_base__multiply_u64__output;\n\n// wuffs_base__multiply_u64 returns x*y as a 128-bit value.\n//\n// The maximum inclusive output hi_lo is 0xFFFFFFFFFFFFFFFE_0000000000000001.\nstatic inline wuffs_base__multiply_u64__output //\nwuffs_base__multiply_u64(uint64_t x, uint64_t y) {\n uint64_t x0 = x & 0xFFFFFFFF;\n uint64_t x1 = x >> 32;\n uint64_t y0 = y & 0xFFFFFFFF;\n uint64_t y1 = y >> 32;\n uint64_t w0 = x0 * y0;\n uint64_t t = (x1 * y0) + (w0 >> 32);\n uint64_t w1 = t & 0xFFFFFFFF;\n uint64_t w2 = t >> 32;\n w1 += x0 * y1;\n wuffs_base__multiply_u64__output o;\n o.hi = (x1 * y1) + w2 + (w1 >> 32);\n o.lo = x * y;\n return o;\n}\n\n" +
+ "// --------\n\ntypedef struct {\n uint64_t lo;\n uint64_t hi;\n} wuffs_base__multiply_u64__output;\n\n// wuffs_base__multiply_u64 returns x*y as a 128-bit value.\n//\n// The maximum inclusive output hi_lo is 0xFFFFFFFFFFFFFFFE_0000000000000001.\nstatic inline wuffs_base__multiply_u64__output //\nwuffs_base__multiply_u64(uint64_t x, uint64_t y) {\n uint64_t x0 = x & 0xFFFFFFFF;\n uint64_t x1 = x >> 32;\n uint64_t y0 = y & 0xFFFFFFFF;\n uint64_t y1 = y >> 32;\n uint64_t w0 = x0 * y0;\n uint64_t t = (x1 * y0) + (w0 >> 32);\n uint64_t w1 = t & 0xFFFFFFFF;\n uint64_t w2 = t >> 32;\n w1 += x0 * y1;\n wuffs_base__multiply_u64__output o;\n o.lo = x * y;\n o.hi = (x1 * y1) + w2 + (w1 >> 32);\n return o;\n}\n\n" +
"" +
"// --------\n\n#if defined(__GNUC__) && (__SIZEOF_LONG__ == 8)\n\nstatic inline uint32_t //\nwuffs_base__count_leading_zeroes_u64(uint64_t u) {\n return u ? ((uint32_t)(__builtin_clzl(u))) : 64u;\n}\n\n#else\n\nstatic inline uint32_t //\nwuffs_base__count_leading_zeroes_u64(uint64_t u) {\n if (u == 0) {\n return 64;\n }\n\n uint32_t n = 0;\n if ((u >> 32) == 0) {\n n |= 32;\n u <<= 32;\n }\n if ((u >> 48) == 0) {\n n |= 16;\n u <<= 16;\n }\n if ((u >> 56) == 0) {\n n |= 8;\n u <<= 8;\n }\n if ((u >> 60) == 0) {\n n |= 4;\n u <<= 4;\n }\n if ((u >> 62) == 0) {\n n |= 2;\n u <<= 2;\n }\n if ((u >> 63) == 0) {\n n |= 1;\n u <<= 1;\n }\n return n;\n}\n\n#endif // defined(__GNUC__) && (__SIZEOF_LONG__ == 8)\n\n" +
"" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 02cf276..a634200 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -512,8 +512,8 @@
// --------
typedef struct {
- uint64_t hi;
uint64_t lo;
+ uint64_t hi;
} wuffs_base__multiply_u64__output;
// wuffs_base__multiply_u64 returns x*y as a 128-bit value.
@@ -531,8 +531,8 @@
uint64_t w2 = t >> 32;
w1 += x0 * y1;
wuffs_base__multiply_u64__output o;
- o.hi = (x1 * y1) + w2 + (w1 >> 32);
o.lo = x * y;
+ o.hi = (x1 * y1) + w2 + (w1 >> 32);
return o;
}