Add arm_crc32,arm_neon cpu_arch conditions
diff --git a/internal/cgen/base/fundamental-public.h b/internal/cgen/base/fundamental-public.h
index f58f4ed..d39cac1 100644
--- a/internal/cgen/base/fundamental-public.h
+++ b/internal/cgen/base/fundamental-public.h
@@ -47,11 +47,33 @@
 #if defined(WUFFS_CONFIG__AVOID_CPU_ARCH)
 // No-op.
 #else
-#if defined(__GNUC__) && defined(__x86_64__)
+#if defined(__GNUC__)
+
+// To simplify Wuffs code, "cpu_arch >= arm_xxx" requires xxx but also
+// unaligned little-endian load/stores.
+#if defined(__ARM_FEATURE_UNALIGNED) && defined(__BYTE_ORDER__) && \
+    (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+// Not all gcc versions define __ARM_ACLE, even if they support crc32
+// intrinsics. Look for __ARM_FEATURE_CRC32 instead.
+#if defined(__ARM_FEATURE_CRC32)
+#include <arm_acle.h>
+#define WUFFS_BASE__CPU_ARCH__ARM_CRC32
+#endif  // defined(__ARM_FEATURE_CRC32)
+#if defined(__ARM_NEON)
+#include <arm_neon.h>
+#define WUFFS_BASE__CPU_ARCH__ARM_NEON
+#endif  // defined(__ARM_NEON)
+#endif  // defined(__ARM_FEATURE_UNALIGNED) etc
+
+// Similarly, "cpu_arch >= x86_sse42" requires SSE4.2 but also PCLMUL and
+// POPCNT. This is checked at runtime via cpuid, not at compile time.
+#if defined(__x86_64__)
 #include <cpuid.h>
 #include <x86intrin.h>
 #define WUFFS_BASE__CPU_ARCH__X86_64
-#endif  // defined(__GNUC__) && defined(__x86_64__)
+#endif  // defined(__x86_64__)
+
+#endif  // defined(__GNUC__)
 #endif  // defined(WUFFS_CONFIG__AVOID_CPU_ARCH)
 
 // --------
@@ -68,6 +90,24 @@
 
 // ---------------- CPU Architecture
 
+static inline bool  //
+wuffs_base__cpu_arch__have_arm_crc32() {
+#if defined(WUFFS_BASE__CPU_ARCH__ARM_CRC32)
+  return true;
+#else
+  return false;
+#endif  // defined(WUFFS_BASE__CPU_ARCH__ARM_CRC32)
+}
+
+static inline bool  //
+wuffs_base__cpu_arch__have_arm_neon() {
+#if defined(WUFFS_BASE__CPU_ARCH__ARM_NEON)
+  return true;
+#else
+  return false;
+#endif  // defined(WUFFS_BASE__CPU_ARCH__ARM_NEON)
+}
+
 // WUFFS_BASE__CPU_ARCH__X86_64__ETC are bits returned by
 // wuffs_base__cpu_arch__x86_64__capabilities.
 // - "SSE42" means all of SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, PCLMUL and
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 4bad0d8..2947016 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -54,12 +54,14 @@
 	"// ---------------- Version\n\n// WUFFS_VERSION is the major.minor.patch version, as per https://semver.org/,\n// as a uint64_t. The major number is the high 32 bits. The minor number is the\n// middle 16 bits. The patch number is the low 16 bits. The pre-release label\n// and build metadata are part of the string representation (such as\n// \"1.2.3-beta+456.20181231\") but not the uint64_t representation.\n//\n// WUFFS_VERSION_PRE_RELEASE_LABEL (such as \"\", \"beta\" or \"rc.1\") being\n// non-empty denotes a developer preview, not a release version, and has no\n// backwards or forwards compatibility guarantees.\n//\n// WUFFS_VERSION_BUILD_METADATA_XXX, if non-zero, are the number of commits and\n// the last commit date in the repository used to build this library. Within\n// each major.minor branch, the commit count should increase monotonically.\n//\n// !! Some code generation programs can override WUFFS_VERSION.\n#define WUFFS_VERSION 0\n#define WUFFS_VERSION_MAJOR 0\n#define WUFFS_VERSION_MINOR 0\n#define WUFFS_VERSION_PATCH 0\n#de" +
 	"fine WUFFS_VERSION_PRE_RELEASE_LABEL \"work.in.progress\"\n#define WUFFS_VERSION_BUILD_METADATA_COMMIT_COUNT 0\n#define WUFFS_VERSION_BUILD_METADATA_COMMIT_DATE 0\n#define WUFFS_VERSION_STRING \"0.0.0+0.00000000\"\n\n" +
 	"" +
-	"// ---------------- Configuration\n\n// Define WUFFS_CONFIG__AVOID_CPU_ARCH to avoid any code tied to a specific CPU\n// architecture, such as SSE SIMD for the x86 CPU family.\n#if defined(WUFFS_CONFIG__AVOID_CPU_ARCH)\n// No-op.\n#else\n#if defined(__GNUC__) && defined(__x86_64__)\n#include <cpuid.h>\n#include <x86intrin.h>\n#define WUFFS_BASE__CPU_ARCH__X86_64\n#endif  // defined(__GNUC__) && defined(__x86_64__)\n#endif  // defined(WUFFS_CONFIG__AVOID_CPU_ARCH)\n\n" +
+	"// ---------------- Configuration\n\n// Define WUFFS_CONFIG__AVOID_CPU_ARCH to avoid any code tied to a specific CPU\n// architecture, such as SSE SIMD for the x86 CPU family.\n#if defined(WUFFS_CONFIG__AVOID_CPU_ARCH)\n// No-op.\n#else\n#if defined(__GNUC__)\n\n// To simplify Wuffs code, \"cpu_arch >= arm_xxx\" requires xxx but also\n// unaligned little-endian load/stores.\n#if defined(__ARM_FEATURE_UNALIGNED) && defined(__BYTE_ORDER__) && \\\n    (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)\n// Not all gcc versions define __ARM_ACLE, even if they support crc32\n// intrinsics. Look for __ARM_FEATURE_CRC32 instead.\n#if defined(__ARM_FEATURE_CRC32)\n#include <arm_acle.h>\n#define WUFFS_BASE__CPU_ARCH__ARM_CRC32\n#endif  // defined(__ARM_FEATURE_CRC32)\n#if defined(__ARM_NEON)\n#include <arm_neon.h>\n#define WUFFS_BASE__CPU_ARCH__ARM_NEON\n#endif  // defined(__ARM_NEON)\n#endif  // defined(__ARM_FEATURE_UNALIGNED) etc\n\n// Similarly, \"cpu_arch >= x86_sse42\" requires SSE4.2 but also PCLMUL and\n// POPCNT. This is checked at runtime via cpu" +
+	"id, not at compile time.\n#if defined(__x86_64__)\n#include <cpuid.h>\n#include <x86intrin.h>\n#define WUFFS_BASE__CPU_ARCH__X86_64\n#endif  // defined(__x86_64__)\n\n#endif  // defined(__GNUC__)\n#endif  // defined(WUFFS_CONFIG__AVOID_CPU_ARCH)\n\n" +
 	"" +
 	"// --------\n\n// Define WUFFS_CONFIG__STATIC_FUNCTIONS to make all of Wuffs' functions have\n// static storage. The motivation is discussed in the \"ALLOW STATIC\n// IMPLEMENTATION\" section of\n// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt\n#if defined(WUFFS_CONFIG__STATIC_FUNCTIONS)\n#define WUFFS_BASE__MAYBE_STATIC static\n#else\n#define WUFFS_BASE__MAYBE_STATIC\n#endif  // defined(WUFFS_CONFIG__STATIC_FUNCTIONS)\n\n" +
 	"" +
-	"// ---------------- CPU Architecture\n\n// WUFFS_BASE__CPU_ARCH__X86_64__ETC are bits returned by\n// wuffs_base__cpu_arch__x86_64__capabilities.\n// - \"SSE42\" means all of SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, PCLMUL and\n//   POPCNT.\n// - \"AVX2\"  means all of AVX and AVX2.\n// - \"AVX512ETC\" is reserved, pending need. Note that AVX-512 consists of\n//   multiple extensions that may be implemented independently.\n#define WUFFS_BASE__CPU_ARCH__X86_64__SSE42 0x01\n#define WUFFS_BASE__CPU_ARCH__X86_64__AVX2 0x02\n\nstatic inline uint32_t  //\nwuffs_base__cpu_arch__x86_64__capabilities() {\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n  uint32_t ret = 0;\n\n  unsigned int eax1 = 0;\n  unsigned int ebx1 = 0;\n  unsigned int ecx1 = 0;\n  unsigned int edx1 = 0;\n  if (__get_cpuid(1, &eax1, &ebx1, &ecx1, &edx1)) {\n    const unsigned int sse42_ecx1 = bit_PCLMUL | bit_POPCNT | bit_SSE4_2;\n    if ((ecx1 & sse42_ecx1) == sse42_ecx1) {\n      ret |= WUFFS_BASE__CPU_ARCH__X86_64__SSE42;\n    }\n  }\n\n  unsigned int eax7 = 0;\n  unsigned int ebx7 =" +
-	" 0;\n  unsigned int ecx7 = 0;\n  unsigned int edx7 = 0;\n  if (__get_cpuid_count(7, 0, &eax7, &ebx7, &ecx7, &edx7)) {\n    const unsigned int avx256_ebx7 = bit_AVX2;\n    if ((ebx7 & avx256_ebx7) == avx256_ebx7) {\n      ret |= WUFFS_BASE__CPU_ARCH__X86_64__AVX2;\n    }\n  }\n\n  return ret;\n#else\n  return 0;\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n}\n\nstatic inline bool  //\nwuffs_base__cpu_arch__have_sse42() {\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n  unsigned int eax1 = 0;\n  unsigned int ebx1 = 0;\n  unsigned int ecx1 = 0;\n  unsigned int edx1 = 0;\n  if (__get_cpuid(1, &eax1, &ebx1, &ecx1, &edx1)) {\n    const unsigned int sse42_ecx1 = bit_PCLMUL | bit_POPCNT | bit_SSE4_2;\n    return (ecx1 & sse42_ecx1) == sse42_ecx1;\n  }\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n  return false;\n}\n\n" +
+	"// ---------------- CPU Architecture\n\nstatic inline bool  //\nwuffs_base__cpu_arch__have_arm_crc32() {\n#if defined(WUFFS_BASE__CPU_ARCH__ARM_CRC32)\n  return true;\n#else\n  return false;\n#endif  // defined(WUFFS_BASE__CPU_ARCH__ARM_CRC32)\n}\n\nstatic inline bool  //\nwuffs_base__cpu_arch__have_arm_neon() {\n#if defined(WUFFS_BASE__CPU_ARCH__ARM_NEON)\n  return true;\n#else\n  return false;\n#endif  // defined(WUFFS_BASE__CPU_ARCH__ARM_NEON)\n}\n\n// WUFFS_BASE__CPU_ARCH__X86_64__ETC are bits returned by\n// wuffs_base__cpu_arch__x86_64__capabilities.\n// - \"SSE42\" means all of SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, PCLMUL and\n//   POPCNT.\n// - \"AVX2\"  means all of AVX and AVX2.\n// - \"AVX512ETC\" is reserved, pending need. Note that AVX-512 consists of\n//   multiple extensions that may be implemented independently.\n#define WUFFS_BASE__CPU_ARCH__X86_64__SSE42 0x01\n#define WUFFS_BASE__CPU_ARCH__X86_64__AVX2 0x02\n\nstatic inline uint32_t  //\nwuffs_base__cpu_arch__x86_64__capabilities() {\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n" +
+	"  uint32_t ret = 0;\n\n  unsigned int eax1 = 0;\n  unsigned int ebx1 = 0;\n  unsigned int ecx1 = 0;\n  unsigned int edx1 = 0;\n  if (__get_cpuid(1, &eax1, &ebx1, &ecx1, &edx1)) {\n    const unsigned int sse42_ecx1 = bit_PCLMUL | bit_POPCNT | bit_SSE4_2;\n    if ((ecx1 & sse42_ecx1) == sse42_ecx1) {\n      ret |= WUFFS_BASE__CPU_ARCH__X86_64__SSE42;\n    }\n  }\n\n  unsigned int eax7 = 0;\n  unsigned int ebx7 = 0;\n  unsigned int ecx7 = 0;\n  unsigned int edx7 = 0;\n  if (__get_cpuid_count(7, 0, &eax7, &ebx7, &ecx7, &edx7)) {\n    const unsigned int avx256_ebx7 = bit_AVX2;\n    if ((ebx7 & avx256_ebx7) == avx256_ebx7) {\n      ret |= WUFFS_BASE__CPU_ARCH__X86_64__AVX2;\n    }\n  }\n\n  return ret;\n#else\n  return 0;\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n}\n\nstatic inline bool  //\nwuffs_base__cpu_arch__have_sse42() {\n#if defined(WUFFS_BASE__CPU_ARCH__X86_64)\n  unsigned int eax1 = 0;\n  unsigned int ebx1 = 0;\n  unsigned int ecx1 = 0;\n  unsigned int edx1 = 0;\n  if (__get_cpuid(1, &eax1, &ebx1, &ecx1, &edx1)) {\n    const unsigned" +
+	" int sse42_ecx1 = bit_PCLMUL | bit_POPCNT | bit_SSE4_2;\n    return (ecx1 & sse42_ecx1) == sse42_ecx1;\n  }\n#endif  // defined(WUFFS_BASE__CPU_ARCH__X86_64)\n  return false;\n}\n\n" +
 	"" +
 	"// ---------------- Fundamentals\n\n// Wuffs assumes that:\n//  - converting a uint32_t to a size_t will never overflow.\n//  - converting a size_t to a uint64_t will never overflow.\n#ifdef __WORDSIZE\n#if (__WORDSIZE != 32) && (__WORDSIZE != 64)\n#error \"Wuffs requires a word size of either 32 or 64 bits\"\n#endif\n#endif\n\n#if defined(__clang__)\n#define WUFFS_BASE__POTENTIALLY_UNUSED_FIELD __attribute__((unused))\n#else\n#define WUFFS_BASE__POTENTIALLY_UNUSED_FIELD\n#endif\n\n// Clang also defines \"__GNUC__\".\n#if defined(__GNUC__)\n#define WUFFS_BASE__POTENTIALLY_UNUSED __attribute__((unused))\n#define WUFFS_BASE__WARN_UNUSED_RESULT __attribute__((warn_unused_result))\n#else\n#define WUFFS_BASE__POTENTIALLY_UNUSED\n#define WUFFS_BASE__WARN_UNUSED_RESULT\n#endif\n\n" +
 	"" +
diff --git a/internal/cgen/func.go b/internal/cgen/func.go
index caa35bf..4526328 100644
--- a/internal/cgen/func.go
+++ b/internal/cgen/func.go
@@ -202,7 +202,10 @@
 }
 
 func (g *gen) writeFuncPrototype(b *buffer, n *a.Func) error {
-	caMacro, _, _ := cpuArchCNames(n.Asserts())
+	caMacro, _, _, err := cpuArchCNames(n.Asserts())
+	if err != nil {
+		return err
+	}
 	if caMacro != "" {
 		b.printf("#if defined(WUFFS_BASE__CPU_ARCH__%s)\n", caMacro)
 	}
@@ -228,7 +231,10 @@
 
 	b.printf("// -------- func %s.%s\n\n", g.pkgName, n.QQID().Str(g.tm))
 
-	caMacro, _, caAttribute := cpuArchCNames(n.Asserts())
+	caMacro, _, caAttribute, err := cpuArchCNames(n.Asserts())
+	if err != nil {
+		return err
+	}
 	if caMacro != "" {
 		b.printf("#if defined(WUFFS_BASE__CPU_ARCH__%s)\n", caMacro)
 	}
diff --git a/internal/cgen/statement.go b/internal/cgen/statement.go
index e83d1d4..773a0c4 100644
--- a/internal/cgen/statement.go
+++ b/internal/cgen/statement.go
@@ -238,7 +238,10 @@
 		if n.Name() == id {
 			suffix = "__choosy_default"
 		}
-		caMacro, caName, _ := cpuArchCNames(g.findAstFunc(t.QQID{recv[0], recv[1], id}).Asserts())
+		caMacro, caName, _, err := cpuArchCNames(g.findAstFunc(t.QQID{recv[0], recv[1], id}).Asserts())
+		if err != nil {
+			return err
+		}
 		if caMacro == "" {
 			b.printf("&%s%s__%s%s", g.pkgPrefix, recv.Str(g.tm), id.Str(g.tm), suffix)
 			conclusive = true
@@ -257,22 +260,27 @@
 	return nil
 }
 
-func cpuArchCNames(asserts []*a.Node) (caMacro string, caName string, caAttribute string) {
-	sse42 := false
+func cpuArchCNames(asserts []*a.Node) (caMacro string, caName string, caAttribute string, retErr error) {
+	match := false
 	for _, o := range asserts {
-		o := o.AsAssert()
-		if !o.IsChooseCPUArch() {
-			continue
-		}
-		switch o.Condition().RHS().AsExpr().Ident() {
-		case t.IDX86SSE42:
-			sse42 = true
+		if o := o.AsAssert(); o.IsChooseCPUArch() {
+			if match {
+				// TODO: support multiple choose-cpu_arch preconditions?
+				return "", "", "", fmt.Errorf("too many choose-cpu_arch preconditions")
+			}
+			match = true
+
+			switch o.Condition().RHS().AsExpr().Ident() {
+			case t.IDARMCRC32:
+				caMacro, caName, caAttribute = "ARM_CRC32", "arm_crc32", ""
+			case t.IDARMNeon:
+				caMacro, caName, caAttribute = "ARM_NEON", "arm_neon", ""
+			case t.IDX86SSE42:
+				caMacro, caName, caAttribute = "X86_64", "sse42", "__attribute__((target(\"sse4.2\")))"
+			}
 		}
 	}
-	if sse42 {
-		return "X86_64", "sse42", "__attribute__((target(\"sse4.2\")))"
-	}
-	return "", "", ""
+	return caMacro, caName, caAttribute, nil
 }
 
 func (g *gen) writeStatementIOBind(b *buffer, n *a.IOBind, depth uint32) error {
diff --git a/lang/ast/ast.go b/lang/ast/ast.go
index bb4c562..c39830f 100644
--- a/lang/ast/ast.go
+++ b/lang/ast/ast.go
@@ -455,7 +455,7 @@
 		return false
 	}
 	switch rhs.Ident() {
-	case t.IDX86SSE42, t.IDX86AVX2:
+	case t.IDARMCRC32, t.IDARMNeon, t.IDX86SSE42, t.IDX86AVX2:
 		return true
 	}
 	return false
diff --git a/lang/token/list.go b/lang/token/list.go
index 1422fcc..cffb403 100644
--- a/lang/token/list.go
+++ b/lang/token/list.go
@@ -664,7 +664,8 @@
 	minBuiltInCPUArch = 0x300
 	maxBuiltInCPUArch = 0x33F
 
-	// [0x300 ..= 0x31F] are reserved for ARM/NEON.
+	IDARMCRC32 = ID(0x300)
+	IDARMNeon  = ID(0x301)
 
 	IDX86SSE42 = ID(0x320)
 	IDX86AVX2  = ID(0x321)
@@ -1081,6 +1082,9 @@
 
 	// -------- 0x300 block.
 
+	IDARMCRC32: "arm_crc32",
+	IDARMNeon:  "arm_neon",
+
 	IDX86SSE42: "x86_sse42",
 	IDX86AVX2:  "x86_avx2",
 
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 1e1941a..5adf821 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -81,11 +81,33 @@
 #if defined(WUFFS_CONFIG__AVOID_CPU_ARCH)
 // No-op.
 #else
-#if defined(__GNUC__) && defined(__x86_64__)
+#if defined(__GNUC__)
+
+// To simplify Wuffs code, "cpu_arch >= arm_xxx" requires xxx but also
+// unaligned little-endian load/stores.
+#if defined(__ARM_FEATURE_UNALIGNED) && defined(__BYTE_ORDER__) && \
+    (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+// Not all gcc versions define __ARM_ACLE, even if they support crc32
+// intrinsics. Look for __ARM_FEATURE_CRC32 instead.
+#if defined(__ARM_FEATURE_CRC32)
+#include <arm_acle.h>
+#define WUFFS_BASE__CPU_ARCH__ARM_CRC32
+#endif  // defined(__ARM_FEATURE_CRC32)
+#if defined(__ARM_NEON)
+#include <arm_neon.h>
+#define WUFFS_BASE__CPU_ARCH__ARM_NEON
+#endif  // defined(__ARM_NEON)
+#endif  // defined(__ARM_FEATURE_UNALIGNED) etc
+
+// Similarly, "cpu_arch >= x86_sse42" requires SSE4.2 but also PCLMUL and
+// POPCNT. This is checked at runtime via cpuid, not at compile time.
+#if defined(__x86_64__)
 #include <cpuid.h>
 #include <x86intrin.h>
 #define WUFFS_BASE__CPU_ARCH__X86_64
-#endif  // defined(__GNUC__) && defined(__x86_64__)
+#endif  // defined(__x86_64__)
+
+#endif  // defined(__GNUC__)
 #endif  // defined(WUFFS_CONFIG__AVOID_CPU_ARCH)
 
 // --------
@@ -102,6 +124,24 @@
 
 // ---------------- CPU Architecture
 
+static inline bool  //
+wuffs_base__cpu_arch__have_arm_crc32() {
+#if defined(WUFFS_BASE__CPU_ARCH__ARM_CRC32)
+  return true;
+#else
+  return false;
+#endif  // defined(WUFFS_BASE__CPU_ARCH__ARM_CRC32)
+}
+
+static inline bool  //
+wuffs_base__cpu_arch__have_arm_neon() {
+#if defined(WUFFS_BASE__CPU_ARCH__ARM_NEON)
+  return true;
+#else
+  return false;
+#endif  // defined(WUFFS_BASE__CPU_ARCH__ARM_NEON)
+}
+
 // WUFFS_BASE__CPU_ARCH__X86_64__ETC are bits returned by
 // wuffs_base__cpu_arch__x86_64__capabilities.
 // - "SSE42" means all of SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, PCLMUL and