Have _mm_cvtsi32_si128 cast its arg to int32_t
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index f3e22d2..90101d9 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -460,7 +460,7 @@
switch method {
case t.IDLoadU32:
- b.writes(" = _mm_cvtsi32_si128((int)(")
+ b.writes(" = _mm_cvtsi32_si128((int32_t)(")
case t.IDLoadU64:
b.writes(" = _mm_cvtsi64_si128((int64_t)(")
case t.IDLoadSlice128:
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 708040f..482178c 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -31161,12 +31161,12 @@
v_c.len = 4;
uint8_t* i_end0_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 8) * 8);
while (v_c.ptr < i_end0_c) {
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_a128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
v_c.ptr += 4;
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_a128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
@@ -31175,7 +31175,7 @@
v_c.len = 4;
uint8_t* i_end1_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 4) * 4);
while (v_c.ptr < i_end1_c) {
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_a128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
@@ -31215,13 +31215,13 @@
uint8_t* i_end0_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 8) * 8);
while (v_c.ptr < i_end0_c) {
v_p128 = _mm_avg_epu8(_mm_and_si128(v_a128, v_k128), v_b128);
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
v_c.ptr += 4;
v_p128 = _mm_avg_epu8(_mm_and_si128(v_a128, v_k128), v_b128);
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
@@ -31231,7 +31231,7 @@
uint8_t* i_end1_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 4) * 4);
while (v_c.ptr < i_end1_c) {
v_p128 = _mm_avg_epu8(_mm_and_si128(v_a128, v_k128), v_b128);
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
@@ -31251,19 +31251,19 @@
v_p.len = 4;
uint8_t* i_end0_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 8) * 8);
while (v_c.ptr < i_end0_c) {
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_p128 = _mm_avg_epu8(v_a128, v_b128);
v_p128 = _mm_sub_epi8(v_p128, _mm_and_si128(v_k128, _mm_xor_si128(v_a128, v_b128)));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
v_c.ptr += 4;
v_p.ptr += 4;
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_p128 = _mm_avg_epu8(v_a128, v_b128);
v_p128 = _mm_sub_epi8(v_p128, _mm_and_si128(v_k128, _mm_xor_si128(v_a128, v_b128)));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
@@ -31274,10 +31274,10 @@
v_p.len = 4;
uint8_t* i_end1_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 4) * 4);
while (v_c.ptr < i_end1_c) {
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_p128 = _mm_avg_epu8(v_a128, v_b128);
v_p128 = _mm_sub_epi8(v_p128, _mm_and_si128(v_k128, _mm_xor_si128(v_a128, v_b128)));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
@@ -31326,7 +31326,7 @@
v_p.len = 4;
uint8_t* i_end0_c = v_c.ptr + wuffs_base__iterate_total_advance((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)), 7, 6);
while (v_c.ptr < i_end0_c) {
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_b128 = _mm_unpacklo_epi8(v_b128, v_z128);
v_pa128 = _mm_sub_epi16(v_b128, v_c128);
v_pb128 = _mm_sub_epi16(v_a128, v_c128);
@@ -31336,7 +31336,7 @@
v_pc128 = _mm_abs_epi16(v_pc128);
v_smallest128 = _mm_min_epi16(v_pc128, _mm_min_epi16(v_pb128, v_pa128));
v_p128 = _mm_blendv_epi8(_mm_blendv_epi8(v_c128, v_b128, _mm_cmpeq_epi16(v_smallest128, v_pb128)), v_a128, _mm_cmpeq_epi16(v_smallest128, v_pa128));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_unpacklo_epi8(v_x128, v_z128);
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
@@ -31345,7 +31345,7 @@
wuffs_base__poke_u24le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
v_c.ptr += 3;
v_p.ptr += 3;
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_b128 = _mm_unpacklo_epi8(v_b128, v_z128);
v_pa128 = _mm_sub_epi16(v_b128, v_c128);
v_pb128 = _mm_sub_epi16(v_a128, v_c128);
@@ -31355,7 +31355,7 @@
v_pc128 = _mm_abs_epi16(v_pc128);
v_smallest128 = _mm_min_epi16(v_pc128, _mm_min_epi16(v_pb128, v_pa128));
v_p128 = _mm_blendv_epi8(_mm_blendv_epi8(v_c128, v_b128, _mm_cmpeq_epi16(v_smallest128, v_pb128)), v_a128, _mm_cmpeq_epi16(v_smallest128, v_pa128));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_unpacklo_epi8(v_x128, v_z128);
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
@@ -31369,7 +31369,7 @@
v_p.len = 4;
uint8_t* i_end1_c = v_c.ptr + wuffs_base__iterate_total_advance((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)), 4, 3);
while (v_c.ptr < i_end1_c) {
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_b128 = _mm_unpacklo_epi8(v_b128, v_z128);
v_pa128 = _mm_sub_epi16(v_b128, v_c128);
v_pb128 = _mm_sub_epi16(v_a128, v_c128);
@@ -31379,7 +31379,7 @@
v_pc128 = _mm_abs_epi16(v_pc128);
v_smallest128 = _mm_min_epi16(v_pc128, _mm_min_epi16(v_pb128, v_pa128));
v_p128 = _mm_blendv_epi8(_mm_blendv_epi8(v_c128, v_b128, _mm_cmpeq_epi16(v_smallest128, v_pb128)), v_a128, _mm_cmpeq_epi16(v_smallest128, v_pa128));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_unpacklo_epi8(v_x128, v_z128);
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
@@ -31393,7 +31393,7 @@
v_p.len = 3;
uint8_t* i_end2_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 3) * 3);
while (v_c.ptr < i_end2_c) {
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u24le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u24le__no_bounds_check(v_p.ptr)));
v_b128 = _mm_unpacklo_epi8(v_b128, v_z128);
v_pa128 = _mm_sub_epi16(v_b128, v_c128);
v_pb128 = _mm_sub_epi16(v_a128, v_c128);
@@ -31403,7 +31403,7 @@
v_pc128 = _mm_abs_epi16(v_pc128);
v_smallest128 = _mm_min_epi16(v_pc128, _mm_min_epi16(v_pb128, v_pa128));
v_p128 = _mm_blendv_epi8(_mm_blendv_epi8(v_c128, v_b128, _mm_cmpeq_epi16(v_smallest128, v_pb128)), v_a128, _mm_cmpeq_epi16(v_smallest128, v_pa128));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u24le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u24le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_unpacklo_epi8(v_x128, v_z128);
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_x128 = _mm_packus_epi16(v_x128, v_x128);
@@ -31452,7 +31452,7 @@
v_p.len = 4;
uint8_t* i_end0_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 8) * 8);
while (v_c.ptr < i_end0_c) {
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_b128 = _mm_unpacklo_epi8(v_b128, v_z128);
v_pa128 = _mm_sub_epi16(v_b128, v_c128);
v_pb128 = _mm_sub_epi16(v_a128, v_c128);
@@ -31462,7 +31462,7 @@
v_pc128 = _mm_abs_epi16(v_pc128);
v_smallest128 = _mm_min_epi16(v_pc128, _mm_min_epi16(v_pb128, v_pa128));
v_p128 = _mm_blendv_epi8(_mm_blendv_epi8(v_c128, v_b128, _mm_cmpeq_epi16(v_smallest128, v_pb128)), v_a128, _mm_cmpeq_epi16(v_smallest128, v_pa128));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_unpacklo_epi8(v_x128, v_z128);
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
@@ -31471,7 +31471,7 @@
wuffs_base__poke_u32le__no_bounds_check(v_c.ptr, ((uint32_t)(_mm_cvtsi128_si32(v_x128))));
v_c.ptr += 4;
v_p.ptr += 4;
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_b128 = _mm_unpacklo_epi8(v_b128, v_z128);
v_pa128 = _mm_sub_epi16(v_b128, v_c128);
v_pb128 = _mm_sub_epi16(v_a128, v_c128);
@@ -31481,7 +31481,7 @@
v_pc128 = _mm_abs_epi16(v_pc128);
v_smallest128 = _mm_min_epi16(v_pc128, _mm_min_epi16(v_pb128, v_pa128));
v_p128 = _mm_blendv_epi8(_mm_blendv_epi8(v_c128, v_b128, _mm_cmpeq_epi16(v_smallest128, v_pb128)), v_a128, _mm_cmpeq_epi16(v_smallest128, v_pa128));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_unpacklo_epi8(v_x128, v_z128);
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;
@@ -31495,7 +31495,7 @@
v_p.len = 4;
uint8_t* i_end1_c = v_c.ptr + (((i_slice_c.len - (size_t)(v_c.ptr - i_slice_c.ptr)) / 4) * 4);
while (v_c.ptr < i_end1_c) {
- v_b128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
+ v_b128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_p.ptr)));
v_b128 = _mm_unpacklo_epi8(v_b128, v_z128);
v_pa128 = _mm_sub_epi16(v_b128, v_c128);
v_pb128 = _mm_sub_epi16(v_a128, v_c128);
@@ -31505,7 +31505,7 @@
v_pc128 = _mm_abs_epi16(v_pc128);
v_smallest128 = _mm_min_epi16(v_pc128, _mm_min_epi16(v_pb128, v_pa128));
v_p128 = _mm_blendv_epi8(_mm_blendv_epi8(v_c128, v_b128, _mm_cmpeq_epi16(v_smallest128, v_pb128)), v_a128, _mm_cmpeq_epi16(v_smallest128, v_pa128));
- v_x128 = _mm_cvtsi32_si128((int)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
+ v_x128 = _mm_cvtsi32_si128((int32_t)(wuffs_base__peek_u32le__no_bounds_check(v_c.ptr)));
v_x128 = _mm_unpacklo_epi8(v_x128, v_z128);
v_x128 = _mm_add_epi8(v_x128, v_p128);
v_a128 = v_x128;