Add x86_sse42_utility.make_m128i_slice128
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index cbbbb1a..dd8a322 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -456,29 +456,6 @@
armNeon := recv.MType().Eq(typeExprARMNeon64) || recv.MType().Eq(typeExprARMNeon128)
switch method {
- case t.IDLoadSlice128:
- if !sideEffectsOnly {
- // Generate a two part expression using the comma operator: "(etc,
- // return_empty_struct call)". The final part is a function call
- // (to a static inline function) instead of a struct literal, to
- // avoid a "expression result unused" compiler error.
- b.writes("(")
- }
- if err := g.writeExpr(b, recv, false, depth); err != nil {
- return err
- }
-
- b.writes(" = _mm_lddqu_si128((const __m128i*)(const void*)(")
- if err := g.writeExpr(b, args[0].AsArg().Value(), false, depth); err != nil {
- return err
- }
- b.writes(".ptr))")
-
- if !sideEffectsOnly {
- b.writes(", wuffs_base__make_empty_struct())")
- }
- return nil
-
case t.IDTruncateU32, t.IDTruncateU64, t.IDStoreSlice128:
switch method {
case t.IDTruncateU32:
@@ -830,7 +807,7 @@
func (g *gen) writeBuiltinCPUArchX86(b *buffer, recv *a.Expr, method t.ID, args []*a.Node, sideEffectsOnly bool, depth uint32) error {
methodStr := method.Str(g.tm)
if strings.HasPrefix(methodStr, "make_") {
- fName, tName := "", ""
+ fName, tName, ptr := "", "", false
switch methodStr {
case "make_m128i_multiple_u8":
fName, tName = "_mm_set_epi8", "int8_t"
@@ -852,6 +829,8 @@
fName, tName = "_mm_cvtsi32_si128", "int32_t"
case "make_m128i_single_u64":
fName, tName = "_mm_cvtsi64x_si128", "int64_t"
+ case "make_m128i_slice128":
+ fName, tName, ptr = "_mm_lddqu_si128", "const __m128i*)(const void*", true
case "make_m128i_zeroes":
fName, tName = "_mm_setzero_si128", ""
default:
@@ -869,6 +848,9 @@
if err := g.writeExpr(b, o.Value(), false, depth); err != nil {
return err
}
+ if ptr {
+ b.writes(".ptr")
+ }
b.writes(")")
}
b.writes(")")
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index e9eb743..d73ca44 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -680,13 +680,12 @@
"x86_sse42_utility.make_m128i_single_u32(a: u32) x86_m128i",
"x86_sse42_utility.make_m128i_single_u64(a: u64) x86_m128i",
+ "x86_sse42_utility.make_m128i_slice128(a: slice base.u8) x86_m128i",
+
"x86_sse42_utility.make_m128i_zeroes() x86_m128i",
// ---- x86_m128i
- "x86_m128i.load_u32!(a: u32)",
- "x86_m128i.load_u64!(a: u64)",
- "x86_m128i.load_slice128!(a: slice base.u8)",
"x86_m128i.truncate_u32() u32",
"x86_m128i.truncate_u64() u64",
"x86_m128i.store_slice128!(a: slice base.u8)",
diff --git a/lang/check/bounds.go b/lang/check/bounds.go
index 553facd..24df237 100644
--- a/lang/check/bounds.go
+++ b/lang/check/bounds.go
@@ -19,6 +19,7 @@
"fmt"
"math/big"
"strconv"
+ "strings"
"github.com/google/wuffs/lib/interval"
@@ -1220,9 +1221,16 @@
}
} else if recvTyp.IsCPUArchType() {
- if method >= t.IDLoadSlice128 {
- if m := method - t.IDLoadSlice128; m < t.ID(len(lsMethodAdvances)) {
- advance, update = lsMethodAdvances[m], false
+ if s := method.Str(q.tm); strings.HasPrefix(s, "make_") || strings.HasPrefix(s, "store_") {
+ switch {
+ case strings.HasSuffix(s, "_slice64"): // 64 bits is 8 bytes.
+ advance = eight
+ case strings.HasSuffix(s, "_slice128"): // 128 bits is 16 bytes.
+ advance = sixteen
+ case strings.HasSuffix(s, "_slice256"): // 256 bits is 32 bytes.
+ advance = thirtyTwo
+ case strings.HasSuffix(s, "_slice512"): // 512 bits is 64 bytes.
+ advance = sixtyFour
}
}
}
@@ -1439,24 +1447,6 @@
t.IDWriteExtendedTokenFast - t.IDPeekU8: {one, true},
}
-var lsMethodAdvances = [...]*big.Int{
- // 64 bits is 8 bytes.
- // 128 bits is 16 bytes.
- // 256 bits is 32 bytes.
- // 512 bits is 64 bytes.
-
- t.IDLoadSlice128 - t.IDLoadSlice128: sixteen,
- t.IDLoadSlice256 - t.IDLoadSlice128: thirtyTwo,
- t.IDLoadSlice512 - t.IDLoadSlice128: sixtyFour,
-
- t.IDStoreSlice128 - t.IDLoadSlice128: sixteen,
- t.IDStoreSlice256 - t.IDLoadSlice128: thirtyTwo,
- t.IDStoreSlice512 - t.IDLoadSlice128: sixtyFour,
-
- t.IDCreateSlice64 - t.IDLoadSlice128: eight,
- t.IDCreateSlice128 - t.IDLoadSlice128: sixteen,
-}
-
func makeConstValueExpr(tm *t.Map, cv *big.Int) (*a.Expr, error) {
id, err := tm.Insert(cv.String())
if err != nil {
diff --git a/lang/check/type.go b/lang/check/type.go
index b9fcacf..8b53343 100644
--- a/lang/check/type.go
+++ b/lang/check/type.go
@@ -607,11 +607,6 @@
}
genericType1 = a.NewTypeExpr(t.IDSlice, 0, 0, nil, nil, genericType2.Inner())
}
-
- if f.FuncName().IsBuiltInLoad() && (lhs.LHS().AsExpr().Operator() != 0) {
- return fmt.Errorf(`check: %q receiver %q must be a local variable`,
- f.QQID().Str(q.tm), lhs.LHS().AsExpr().Str(q.tm))
- }
}
// Check that the func's in type matches the arguments.
diff --git a/lang/token/list.go b/lang/token/list.go
index 4ffd216..6773914 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -115,7 +115,6 @@
func (x ID) IsAssign() bool { return minAssign <= x && x <= maxAssign }
func (x ID) IsBuiltInCPUArch() bool { return minBuiltInCPUArch <= x && x <= maxBuiltInCPUArch }
-func (x ID) IsBuiltInLoad() bool { return minBuiltInLoad <= x && x <= maxBuiltInLoad }
func (x ID) IsCannotAssignTo() bool { return minCannotAssignTo <= x && x <= maxCannotAssignTo }
func (x ID) IsClose() bool { return minClose <= x && x <= maxClose }
func (x ID) IsKeyword() bool { return minKeyword <= x && x <= maxKeyword }
@@ -699,18 +698,13 @@
// --------
- minBuiltInLoad = 0x380
- maxBuiltInLoad = 0x387
+ IDStoreSlice64 = ID(0x380)
+ IDStoreSlice128 = ID(0x381)
+ IDStoreSlice256 = ID(0x382)
+ IDStoreSlice512 = ID(0x383)
- IDLoadSlice128 = ID(0x382)
- IDLoadSlice256 = ID(0x383)
- IDLoadSlice512 = ID(0x384)
-
- IDTruncateU32 = ID(0x388)
- IDTruncateU64 = ID(0x389)
- IDStoreSlice128 = ID(0x38A)
- IDStoreSlice256 = ID(0x38B)
- IDStoreSlice512 = ID(0x38C)
+ IDTruncateU32 = ID(0x388)
+ IDTruncateU64 = ID(0x389)
IDCreateSlice64 = ID(0x390)
IDCreateSlice128 = ID(0x391)
@@ -1117,16 +1111,14 @@
IDX86M128I: "x86_m128i",
- IDLoadSlice128: "load_slice128",
- IDLoadSlice256: "load_slice256",
- IDLoadSlice512: "load_slice512",
-
- IDTruncateU32: "truncate_u32",
- IDTruncateU64: "truncate_u64",
+ IDStoreSlice64: "store_slice64",
IDStoreSlice128: "store_slice128",
IDStoreSlice256: "store_slice256",
IDStoreSlice512: "store_slice512",
+ IDTruncateU32: "truncate_u32",
+ IDTruncateU64: "truncate_u64",
+
IDCreateSlice64: "create_slice64",
IDCreateSlice128: "create_slice128",
}
diff --git a/std/adler32/common_up_x86_sse42.wuffs b/std/adler32/common_up_x86_sse42.wuffs
index 239790e..93c5c1b 100644
--- a/std/adler32/common_up_x86_sse42.wuffs
+++ b/std/adler32/common_up_x86_sse42.wuffs
@@ -97,8 +97,8 @@
//
// Let p__left = [u8×16: p00, p01, p02, ..., p15]
// Let p_right = [u8×16: p16, p17, p18, ..., p31]
- p__left.load_slice128!(a: p[.. 16])
- p_right.load_slice128!(a: p[16 .. 32])
+ p__left = util.make_m128i_slice128(a: p[.. 16])
+ p_right = util.make_m128i_slice128(a: p[16 .. 32])
// For v2j, we need to calculate the sums of the s1j terms for each
// of p's 32 elements. This is simply 32 times the same number,