Add sse128_i.load_slice128
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index b525474..f3e22d2 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -446,8 +446,7 @@
func (g *gen) writeBuiltinCPUArch(b *buffer, recv *a.Expr, method t.ID, args []*a.Node, sideEffectsOnly bool, depth uint32) error {
switch method {
- case t.IDLoadU32:
- // TODO: ensure that the receiver is a variable, not an arbitrary expression.
+ case t.IDLoadU32, t.IDLoadU64, t.IDLoadSlice128:
if !sideEffectsOnly {
// Generate a two part expression using the comma operator: "(etc,
// return_empty_struct call)". The final part is a function call
@@ -458,22 +457,66 @@
if err := g.writeExpr(b, recv, false, depth); err != nil {
return err
}
- b.writes(" = _mm_cvtsi32_si128((int)(")
+
+ switch method {
+ case t.IDLoadU32:
+ b.writes(" = _mm_cvtsi32_si128((int)(")
+ case t.IDLoadU64:
+ b.writes(" = _mm_cvtsi64_si128((int64_t)(")
+ case t.IDLoadSlice128:
+ b.writes(" = _mm_lddqu_si128((const __m128i*)(const void*)(")
+ }
+
if err := g.writeExpr(b, args[0].AsArg().Value(), false, depth); err != nil {
return err
}
- b.writes("))")
+
+ switch method {
+ case t.IDLoadSlice128:
+ b.writes(".ptr))")
+ default:
+ b.writes("))")
+ }
+
if !sideEffectsOnly {
b.writes(", wuffs_base__make_empty_struct())")
}
return nil
- case t.IDTruncateU32:
- b.writes("((uint32_t)(_mm_cvtsi128_si32(")
+ case t.IDTruncateU32, t.IDTruncateU64, t.IDStoreSlice128:
+ switch method {
+ case t.IDTruncateU32:
+ b.writes("((uint32_t)(_mm_cvtsi128_si32(")
+ case t.IDTruncateU64:
+ b.writes("((uint64_t)(_mm_cvtsi128_si64(")
+ case t.IDStoreSlice128:
+ if !sideEffectsOnly {
+ // Generate a two part expression using the comma operator: "(etc,
+ // return_empty_struct call)". The final part is a function call
+ // (to a static inline function) instead of a struct literal, to
+ // avoid a "expression result unused" compiler error.
+ b.writes("(")
+ }
+ b.writes("_mm_storeu_si128((__m128i*)(void*)(")
+ if err := g.writeExpr(b, args[0].AsArg().Value(), false, depth); err != nil {
+ return err
+ }
+ b.writes(".ptr), ")
+ }
+
if err := g.writeExpr(b, recv, false, depth); err != nil {
return err
}
- b.writes(")))")
+
+ switch method {
+ case t.IDStoreSlice128:
+ b.writes(")")
+ if !sideEffectsOnly {
+ b.writes(", wuffs_base__make_empty_struct())")
+ }
+ default:
+ b.writes(")))")
+ }
return nil
}
@@ -503,9 +546,16 @@
}
for _, o := range args {
b.writes(", ")
- if err := g.writeExpr(b, o.AsArg().Value(), false, depth); err != nil {
+ after := ""
+ v := o.AsArg().Value()
+ if !v.MType().IsCPUArchType() {
+ b.writes("(int32_t)(")
+ after = ")"
+ }
+ if err := g.writeExpr(b, v, false, depth); err != nil {
return err
}
+ b.writes(after)
}
}
b.writes(")")
diff --git a/lang/ast/ast.go b/lang/ast/ast.go
index fd462f5..a6a5b24 100644
--- a/lang/ast/ast.go
+++ b/lang/ast/ast.go
@@ -797,6 +797,10 @@
return n.id0 == 0 && n.id1 == t.IDBase && n.id2 == t.IDBool
}
+func (n *TypeExpr) IsCPUArchType() bool {
+ return n.id0 == 0 && n.id1 == t.IDBase && n.id2.IsBuiltInCPUArch()
+}
+
func (n *TypeExpr) IsIdeal() bool {
return n.id0 == 0 && n.id1 == t.IDBase && n.id2 == t.IDQIdeal
}
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index a21803f..1e13710 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -534,10 +534,12 @@
// ---- sse128_i
- "sse128_i.load_slice!(a: slice base.u8)",
"sse128_i.load_u32!(a: u32)",
- "sse128_i.store_slice!(a: slice base.u8)",
+ "sse128_i.load_u64!(a: u64)",
+ "sse128_i.load_slice128!(a: slice base.u8)",
"sse128_i.truncate_u32() u32",
+ "sse128_i.truncate_u64() u64",
+ "sse128_i.store_slice128!(a: slice base.u8)",
// TODO: generate these methods automatically?
@@ -552,6 +554,8 @@
"sse128_i._mm_cmpeq_epi16(b: sse128_i) sse128_i",
"sse128_i._mm_min_epi16(b: sse128_i) sse128_i",
"sse128_i._mm_packus_epi16(b: sse128_i) sse128_i",
+ "sse128_i._mm_slli_si128(imm8: u32) sse128_i",
+ "sse128_i._mm_srli_si128(imm8: u32) sse128_i",
"sse128_i._mm_sub_epi16(b: sse128_i) sse128_i",
"sse128_i._mm_sub_epi8(b: sse128_i) sse128_i",
"sse128_i._mm_unpacklo_epi8(b: sse128_i) sse128_i",
diff --git a/lang/check/bounds.go b/lang/check/bounds.go
index ffcd7a8..423860d 100644
--- a/lang/check/bounds.go
+++ b/lang/check/bounds.go
@@ -1153,10 +1153,21 @@
advance, update = au.advance, au.update
}
}
+
+ } else if recvTyp.IsCPUArchType() {
+ if method >= t.IDLoadSlice128 {
+ if m := method - t.IDLoadSlice128; m < t.ID(len(lsMethodAdvances)) {
+ advance, update = lsMethodAdvances[m], false
+ }
+ }
}
if (advance != nil) || (advanceExpr != nil) {
- if ok, err := q.optimizeIOMethodAdvance(recv, advance, advanceExpr, update); err != nil {
+ subject := recv
+ if recv.MType().IsCPUArchType() {
+ subject = n.Args()[0].AsArg().Value()
+ }
+ if ok, err := q.optimizeIOMethodAdvance(subject, advance, advanceExpr, update); err != nil {
return bounds{}, err
} else if !ok {
adv := ""
@@ -1166,9 +1177,9 @@
adv = advanceExpr.Str(q.tm)
}
return bounds{}, fmt.Errorf("check: could not prove %s pre-condition: %s.length() >= %s",
- method.Str(q.tm), recv.Str(q.tm), adv)
+ method.Str(q.tm), subject.Str(q.tm), adv)
}
- // TODO: drop other recv-related facts?
+ // TODO: drop other subject-related facts?
}
return bounds{}, errNotASpecialCase
@@ -1363,6 +1374,18 @@
t.IDWriteExtendedTokenFast - t.IDPeekU8: {one, true},
}
+var lsMethodAdvances = [...]*big.Int{
+ // 128 bits is 16 bytes. 256 bits is 32 bytes. 512 bits is 64 bytes.
+
+ t.IDLoadSlice128 - t.IDLoadSlice128: sixteen,
+ t.IDLoadSlice256 - t.IDLoadSlice128: thirtyTwo,
+ t.IDLoadSlice512 - t.IDLoadSlice128: sixtyFour,
+
+ t.IDStoreSlice128 - t.IDLoadSlice128: sixteen,
+ t.IDStoreSlice256 - t.IDLoadSlice128: thirtyTwo,
+ t.IDStoreSlice512 - t.IDLoadSlice128: sixtyFour,
+}
+
func makeConstValueExpr(tm *t.Map, cv *big.Int) (*a.Expr, error) {
id, err := tm.Insert(cv.String())
if err != nil {
diff --git a/lang/token/list.go b/lang/token/list.go
index ae05d9c..0ab1da5 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -114,6 +114,7 @@
func (x ID) IsTightRight() bool { return x < ID(len(isTightRight)) && isTightRight[x] }
func (x ID) IsAssign() bool { return minAssign <= x && x <= maxAssign }
+func (x ID) IsBuiltInCPUArch() bool { return minBuiltInCPUArch <= x && x <= maxBuiltInCPUArch }
func (x ID) IsBuiltInLoad() bool { return minBuiltInLoad <= x && x <= maxBuiltInLoad }
func (x ID) IsCannotAssignTo() bool { return minCannotAssignTo <= x && x <= maxCannotAssignTo }
func (x ID) IsClose() bool { return minClose <= x && x <= maxClose }
@@ -659,6 +660,9 @@
// -------- 0x300 block.
+ minBuiltInCPUArch = 0x300
+ maxBuiltInCPUArch = 0x32F
+
// 0x30? are reserved for NEON.
IDSSE128 = ID(0x310)
@@ -671,13 +675,17 @@
minBuiltInLoad = 0x380
maxBuiltInLoad = 0x387
- IDLoadSlice = ID(0x380)
- IDLoadU32 = ID(0x381)
- IDLoadU64 = ID(0x382)
+ IDLoadU32 = ID(0x380)
+ IDLoadU64 = ID(0x381)
+ IDLoadSlice128 = ID(0x382)
+ IDLoadSlice256 = ID(0x383)
+ IDLoadSlice512 = ID(0x384)
- IDStoreSlice = ID(0x388)
- IDTruncateU32 = ID(0x389)
- IDTruncateU64 = ID(0x38A)
+ IDTruncateU32 = ID(0x388)
+ IDTruncateU64 = ID(0x389)
+ IDStoreSlice128 = ID(0x38A)
+ IDStoreSlice256 = ID(0x38B)
+ IDStoreSlice512 = ID(0x38C)
IDCreateMMSet1EPI8 = ID(0x390)
)
@@ -1067,13 +1075,17 @@
IDSSE128: "sse128",
IDSSE128I: "sse128_i",
- IDLoadSlice: "load_slice",
- IDLoadU32: "load_u32",
- IDLoadU64: "load_u64",
+ IDLoadU32: "load_u32",
+ IDLoadU64: "load_u64",
+ IDLoadSlice128: "load_slice128",
+ IDLoadSlice256: "load_slice256",
+ IDLoadSlice512: "load_slice512",
- IDStoreSlice: "store_slice",
- IDTruncateU32: "truncate_u32",
- IDTruncateU64: "truncate_u64",
+ IDTruncateU32: "truncate_u32",
+ IDTruncateU64: "truncate_u64",
+ IDStoreSlice128: "store_slice128",
+ IDStoreSlice256: "store_slice256",
+ IDStoreSlice512: "store_slice512",
IDCreateMMSet1EPI8: "create_mm_set1_epi8",
}