Add x86_sse42_utility.make_m128i_slice128
diff --git a/internal/cgen/builtin.go b/internal/cgen/builtin.go
index cbbbb1a..dd8a322 100644
--- a/internal/cgen/builtin.go
+++ b/internal/cgen/builtin.go
@@ -456,29 +456,6 @@
 	armNeon := recv.MType().Eq(typeExprARMNeon64) || recv.MType().Eq(typeExprARMNeon128)
 
 	switch method {
-	case t.IDLoadSlice128:
-		if !sideEffectsOnly {
-			// Generate a two part expression using the comma operator: "(etc,
-			// return_empty_struct call)". The final part is a function call
-			// (to a static inline function) instead of a struct literal, to
-			// avoid a "expression result unused" compiler error.
-			b.writes("(")
-		}
-		if err := g.writeExpr(b, recv, false, depth); err != nil {
-			return err
-		}
-
-		b.writes(" = _mm_lddqu_si128((const __m128i*)(const void*)(")
-		if err := g.writeExpr(b, args[0].AsArg().Value(), false, depth); err != nil {
-			return err
-		}
-		b.writes(".ptr))")
-
-		if !sideEffectsOnly {
-			b.writes(", wuffs_base__make_empty_struct())")
-		}
-		return nil
-
 	case t.IDTruncateU32, t.IDTruncateU64, t.IDStoreSlice128:
 		switch method {
 		case t.IDTruncateU32:
@@ -830,7 +807,7 @@
 func (g *gen) writeBuiltinCPUArchX86(b *buffer, recv *a.Expr, method t.ID, args []*a.Node, sideEffectsOnly bool, depth uint32) error {
 	methodStr := method.Str(g.tm)
 	if strings.HasPrefix(methodStr, "make_") {
-		fName, tName := "", ""
+		fName, tName, ptr := "", "", false
 		switch methodStr {
 		case "make_m128i_multiple_u8":
 			fName, tName = "_mm_set_epi8", "int8_t"
@@ -852,6 +829,8 @@
 			fName, tName = "_mm_cvtsi32_si128", "int32_t"
 		case "make_m128i_single_u64":
 			fName, tName = "_mm_cvtsi64x_si128", "int64_t"
+		case "make_m128i_slice128":
+			fName, tName, ptr = "_mm_lddqu_si128", "const __m128i*)(const void*", true
 		case "make_m128i_zeroes":
 			fName, tName = "_mm_setzero_si128", ""
 		default:
@@ -869,6 +848,9 @@
 			if err := g.writeExpr(b, o.Value(), false, depth); err != nil {
 				return err
 			}
+			if ptr {
+				b.writes(".ptr")
+			}
 			b.writes(")")
 		}
 		b.writes(")")
diff --git a/lang/builtin/builtin.go b/lang/builtin/builtin.go
index e9eb743..d73ca44 100644
--- a/lang/builtin/builtin.go
+++ b/lang/builtin/builtin.go
@@ -680,13 +680,12 @@
 	"x86_sse42_utility.make_m128i_single_u32(a: u32) x86_m128i",
 	"x86_sse42_utility.make_m128i_single_u64(a: u64) x86_m128i",
 
+	"x86_sse42_utility.make_m128i_slice128(a: slice base.u8) x86_m128i",
+
 	"x86_sse42_utility.make_m128i_zeroes() x86_m128i",
 
 	// ---- x86_m128i
 
-	"x86_m128i.load_u32!(a: u32)",
-	"x86_m128i.load_u64!(a: u64)",
-	"x86_m128i.load_slice128!(a: slice base.u8)",
 	"x86_m128i.truncate_u32() u32",
 	"x86_m128i.truncate_u64() u64",
 	"x86_m128i.store_slice128!(a: slice base.u8)",
diff --git a/lang/check/bounds.go b/lang/check/bounds.go
index 553facd..24df237 100644
--- a/lang/check/bounds.go
+++ b/lang/check/bounds.go
@@ -19,6 +19,7 @@
 	"fmt"
 	"math/big"
 	"strconv"
+	"strings"
 
 	"github.com/google/wuffs/lib/interval"
 
@@ -1220,9 +1221,16 @@
 		}
 
 	} else if recvTyp.IsCPUArchType() {
-		if method >= t.IDLoadSlice128 {
-			if m := method - t.IDLoadSlice128; m < t.ID(len(lsMethodAdvances)) {
-				advance, update = lsMethodAdvances[m], false
+		if s := method.Str(q.tm); strings.HasPrefix(s, "make_") || strings.HasPrefix(s, "store_") {
+			switch {
+			case strings.HasSuffix(s, "_slice64"): //   64 bits is  8 bytes.
+				advance = eight
+			case strings.HasSuffix(s, "_slice128"): // 128 bits is 16 bytes.
+				advance = sixteen
+			case strings.HasSuffix(s, "_slice256"): // 256 bits is 32 bytes.
+				advance = thirtyTwo
+			case strings.HasSuffix(s, "_slice512"): // 512 bits is 64 bytes.
+				advance = sixtyFour
 			}
 		}
 	}
@@ -1439,24 +1447,6 @@
 	t.IDWriteExtendedTokenFast - t.IDPeekU8: {one, true},
 }
 
-var lsMethodAdvances = [...]*big.Int{
-	//  64 bits is  8 bytes.
-	// 128 bits is 16 bytes.
-	// 256 bits is 32 bytes.
-	// 512 bits is 64 bytes.
-
-	t.IDLoadSlice128 - t.IDLoadSlice128: sixteen,
-	t.IDLoadSlice256 - t.IDLoadSlice128: thirtyTwo,
-	t.IDLoadSlice512 - t.IDLoadSlice128: sixtyFour,
-
-	t.IDStoreSlice128 - t.IDLoadSlice128: sixteen,
-	t.IDStoreSlice256 - t.IDLoadSlice128: thirtyTwo,
-	t.IDStoreSlice512 - t.IDLoadSlice128: sixtyFour,
-
-	t.IDCreateSlice64 - t.IDLoadSlice128:  eight,
-	t.IDCreateSlice128 - t.IDLoadSlice128: sixteen,
-}
-
 func makeConstValueExpr(tm *t.Map, cv *big.Int) (*a.Expr, error) {
 	id, err := tm.Insert(cv.String())
 	if err != nil {
diff --git a/lang/check/type.go b/lang/check/type.go
index b9fcacf..8b53343 100644
--- a/lang/check/type.go
+++ b/lang/check/type.go
@@ -607,11 +607,6 @@
 			}
 			genericType1 = a.NewTypeExpr(t.IDSlice, 0, 0, nil, nil, genericType2.Inner())
 		}
-
-		if f.FuncName().IsBuiltInLoad() && (lhs.LHS().AsExpr().Operator() != 0) {
-			return fmt.Errorf(`check: %q receiver %q must be a local variable`,
-				f.QQID().Str(q.tm), lhs.LHS().AsExpr().Str(q.tm))
-		}
 	}
 
 	// Check that the func's in type matches the arguments.
diff --git a/lang/token/list.go b/lang/token/list.go
index 4ffd216..6773914 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -115,7 +115,6 @@
 
 func (x ID) IsAssign() bool         { return minAssign <= x && x <= maxAssign }
 func (x ID) IsBuiltInCPUArch() bool { return minBuiltInCPUArch <= x && x <= maxBuiltInCPUArch }
-func (x ID) IsBuiltInLoad() bool    { return minBuiltInLoad <= x && x <= maxBuiltInLoad }
 func (x ID) IsCannotAssignTo() bool { return minCannotAssignTo <= x && x <= maxCannotAssignTo }
 func (x ID) IsClose() bool          { return minClose <= x && x <= maxClose }
 func (x ID) IsKeyword() bool        { return minKeyword <= x && x <= maxKeyword }
@@ -699,18 +698,13 @@
 
 	// --------
 
-	minBuiltInLoad = 0x380
-	maxBuiltInLoad = 0x387
+	IDStoreSlice64  = ID(0x380)
+	IDStoreSlice128 = ID(0x381)
+	IDStoreSlice256 = ID(0x382)
+	IDStoreSlice512 = ID(0x383)
 
-	IDLoadSlice128 = ID(0x382)
-	IDLoadSlice256 = ID(0x383)
-	IDLoadSlice512 = ID(0x384)
-
-	IDTruncateU32   = ID(0x388)
-	IDTruncateU64   = ID(0x389)
-	IDStoreSlice128 = ID(0x38A)
-	IDStoreSlice256 = ID(0x38B)
-	IDStoreSlice512 = ID(0x38C)
+	IDTruncateU32 = ID(0x388)
+	IDTruncateU64 = ID(0x389)
 
 	IDCreateSlice64  = ID(0x390)
 	IDCreateSlice128 = ID(0x391)
@@ -1117,16 +1111,14 @@
 
 	IDX86M128I: "x86_m128i",
 
-	IDLoadSlice128: "load_slice128",
-	IDLoadSlice256: "load_slice256",
-	IDLoadSlice512: "load_slice512",
-
-	IDTruncateU32:   "truncate_u32",
-	IDTruncateU64:   "truncate_u64",
+	IDStoreSlice64:  "store_slice64",
 	IDStoreSlice128: "store_slice128",
 	IDStoreSlice256: "store_slice256",
 	IDStoreSlice512: "store_slice512",
 
+	IDTruncateU32: "truncate_u32",
+	IDTruncateU64: "truncate_u64",
+
 	IDCreateSlice64:  "create_slice64",
 	IDCreateSlice128: "create_slice128",
 }
diff --git a/std/adler32/common_up_x86_sse42.wuffs b/std/adler32/common_up_x86_sse42.wuffs
index 239790e..93c5c1b 100644
--- a/std/adler32/common_up_x86_sse42.wuffs
+++ b/std/adler32/common_up_x86_sse42.wuffs
@@ -97,8 +97,8 @@
 			//
 			// Let p__left = [u8×16: p00, p01, p02, ..., p15]
 			// Let p_right = [u8×16: p16, p17, p18, ..., p31]
-			p__left.load_slice128!(a: p[.. 16])
-			p_right.load_slice128!(a: p[16 .. 32])
+			p__left = util.make_m128i_slice128(a: p[.. 16])
+			p_right = util.make_m128i_slice128(a: p[16 .. 32])
 
 			// For v2j, we need to calculate the sums of the s1j terms for each
 			// of p's 32 elements. This is simply 32 times the same number,