Disable cpu_arch specific code for Native Client
diff --git a/internal/cgen/base/fundamental-public.h b/internal/cgen/base/fundamental-public.h
index 11cfa4a..7d4a0e7 100644
--- a/internal/cgen/base/fundamental-public.h
+++ b/internal/cgen/base/fundamental-public.h
@@ -60,8 +60,8 @@
 
 // To simplify Wuffs code, "cpu_arch >= arm_xxx" requires xxx but also
 // unaligned little-endian load/stores.
-#if defined(__ARM_FEATURE_UNALIGNED) && defined(__BYTE_ORDER__) && \
-    (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#if defined(__ARM_FEATURE_UNALIGNED) && !defined(__native_client__) && \
+    defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
 // Not all gcc versions define __ARM_ACLE, even if they support crc32
 // intrinsics. Look for __ARM_FEATURE_CRC32 instead.
 #if defined(__ARM_FEATURE_CRC32)
@@ -78,11 +78,11 @@
 // POPCNT. This is checked at runtime via cpuid, not at compile time.
 //
 // Likewise, "cpu_arch >= x86_avx2" also requires PCLMUL, POPCNT and SSE4.2.
-#if defined(__x86_64__)
+#if defined(__x86_64__) && !defined(__native_client__)
 #include <cpuid.h>
 #include <x86intrin.h>
 #define WUFFS_BASE__CPU_ARCH__X86_64
-#endif  // defined(__x86_64__)
+#endif  // defined(__x86_64__) && !defined(__native_client__)
 
 #elif defined(_MSC_VER)  // (#if-chain ref AVOID_CPU_ARCH_1)
 
diff --git a/internal/cgen/data/data.go b/internal/cgen/data/data.go
index 413ee06..8a83223 100644
--- a/internal/cgen/data/data.go
+++ b/internal/cgen/data/data.go
@@ -54,10 +54,10 @@
 	"// ---------------- Version\n\n// WUFFS_VERSION is the major.minor.patch version, as per https://semver.org/,\n// as a uint64_t. The major number is the high 32 bits. The minor number is the\n// middle 16 bits. The patch number is the low 16 bits. The pre-release label\n// and build metadata are part of the string representation (such as\n// \"1.2.3-beta+456.20181231\") but not the uint64_t representation.\n//\n// WUFFS_VERSION_PRE_RELEASE_LABEL (such as \"\", \"beta\" or \"rc.1\") being\n// non-empty denotes a developer preview, not a release version, and has no\n// backwards or forwards compatibility guarantees.\n//\n// WUFFS_VERSION_BUILD_METADATA_XXX, if non-zero, are the number of commits and\n// the last commit date in the repository used to build this library. Within\n// each major.minor branch, the commit count should increase monotonically.\n//\n// ¡ Some code generation programs can override WUFFS_VERSION.\n#define WUFFS_VERSION 0\n#define WUFFS_VERSION_MAJOR 0\n#define WUFFS_VERSION_MINOR 0\n#define WUFFS_VERSION_PATCH 0\n#de" +
 	"fine WUFFS_VERSION_PRE_RELEASE_LABEL \"work.in.progress\"\n#define WUFFS_VERSION_BUILD_METADATA_COMMIT_COUNT 0\n#define WUFFS_VERSION_BUILD_METADATA_COMMIT_DATE 0\n#define WUFFS_VERSION_STRING \"0.0.0+0.00000000\"\n\n" +
 	"" +
-	"// ---------------- Configuration\n\n// Define WUFFS_CONFIG__AVOID_CPU_ARCH to avoid any code tied to a specific CPU\n// architecture, such as SSE SIMD for the x86 CPU family.\n#if defined(WUFFS_CONFIG__AVOID_CPU_ARCH)  // (#if-chain ref AVOID_CPU_ARCH_0)\n// No-op.\n#else  // (#if-chain ref AVOID_CPU_ARCH_0)\n\n// The \"defined(__clang__)\" isn't redundant. While vanilla clang defines\n// __GNUC__, clang-cl (which mimics MSVC's cl.exe) does not.\n#if defined(__GNUC__) || defined(__clang__)\n#define WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(arg) __attribute__((target(arg)))\n#else\n#define WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(arg)\n#endif  // defined(__GNUC__) || defined(__clang__)\n\n#if defined(__GNUC__)  // (#if-chain ref AVOID_CPU_ARCH_1)\n\n// To simplify Wuffs code, \"cpu_arch >= arm_xxx\" requires xxx but also\n// unaligned little-endian load/stores.\n#if defined(__ARM_FEATURE_UNALIGNED) && defined(__BYTE_ORDER__) && \\\n    (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)\n// Not all gcc versions define __ARM_ACLE, even if they support crc32" +
-	"\n// intrinsics. Look for __ARM_FEATURE_CRC32 instead.\n#if defined(__ARM_FEATURE_CRC32)\n#include <arm_acle.h>\n#define WUFFS_BASE__CPU_ARCH__ARM_CRC32\n#endif  // defined(__ARM_FEATURE_CRC32)\n#if defined(__ARM_NEON)\n#include <arm_neon.h>\n#define WUFFS_BASE__CPU_ARCH__ARM_NEON\n#endif  // defined(__ARM_NEON)\n#endif  // defined(__ARM_FEATURE_UNALIGNED) etc\n\n// Similarly, \"cpu_arch >= x86_sse42\" requires SSE4.2 but also PCLMUL and\n// POPCNT. This is checked at runtime via cpuid, not at compile time.\n//\n// Likewise, \"cpu_arch >= x86_avx2\" also requires PCLMUL, POPCNT and SSE4.2.\n#if defined(__x86_64__)\n#include <cpuid.h>\n#include <x86intrin.h>\n#define WUFFS_BASE__CPU_ARCH__X86_64\n#endif  // defined(__x86_64__)\n\n#elif defined(_MSC_VER)  // (#if-chain ref AVOID_CPU_ARCH_1)\n\n#if defined(_M_X64)\n#if defined(__AVX__) || defined(__clang__)\n\n// We need <intrin.h> for the __cpuid function.\n#include <intrin.h>\n// That's not enough for X64 SIMD, with clang-cl, if we want to use\n// \"__attribute__((target(arg)))\" without e.g. \"/" +
-	"arch:AVX\".\n//\n// Some web pages suggest that <immintrin.h> is all you need, as it pulls in\n// the earlier SIMD families like SSE4.2, but that doesn't seem to work in\n// practice, possibly for the same reason that just <intrin.h> doesn't work.\n#include <immintrin.h>  // AVX, AVX2, FMA, POPCNT\n#include <nmmintrin.h>  // SSE4.2\n#include <wmmintrin.h>  // AES, PCLMUL\n#define WUFFS_BASE__CPU_ARCH__X86_64\n\n#else  // defined(__AVX__) || defined(__clang__)\n\n// clang-cl (which defines both __clang__ and _MSC_VER) supports\n// \"__attribute__((target(arg)))\".\n//\n// For MSVC's cl.exe (unlike clang or gcc), SIMD capability is a compile-time\n// property of the source file (e.g. a /arch:AVX or -mavx compiler flag), not\n// of individual functions (that can be conditionally selected at runtime).\n#pragma message(\"Wuffs with MSVC+X64 needs /arch:AVX for best performance\")\n\n#endif  // defined(__AVX__) || defined(__clang__)\n#endif  // defined(_M_X64)\n\n#endif  // (#if-chain ref AVOID_CPU_ARCH_1)\n#endif  // (#if-chain ref AVOID_CPU_" +
-	"ARCH_0)\n\n" +
+	"// ---------------- Configuration\n\n// Define WUFFS_CONFIG__AVOID_CPU_ARCH to avoid any code tied to a specific CPU\n// architecture, such as SSE SIMD for the x86 CPU family.\n#if defined(WUFFS_CONFIG__AVOID_CPU_ARCH)  // (#if-chain ref AVOID_CPU_ARCH_0)\n// No-op.\n#else  // (#if-chain ref AVOID_CPU_ARCH_0)\n\n// The \"defined(__clang__)\" isn't redundant. While vanilla clang defines\n// __GNUC__, clang-cl (which mimics MSVC's cl.exe) does not.\n#if defined(__GNUC__) || defined(__clang__)\n#define WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(arg) __attribute__((target(arg)))\n#else\n#define WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(arg)\n#endif  // defined(__GNUC__) || defined(__clang__)\n\n#if defined(__GNUC__)  // (#if-chain ref AVOID_CPU_ARCH_1)\n\n// To simplify Wuffs code, \"cpu_arch >= arm_xxx\" requires xxx but also\n// unaligned little-endian load/stores.\n#if defined(__ARM_FEATURE_UNALIGNED) && !defined(__native_client__) && \\\n    defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)\n// Not all gcc versions define __ARM_A" +
+	"CLE, even if they support crc32\n// intrinsics. Look for __ARM_FEATURE_CRC32 instead.\n#if defined(__ARM_FEATURE_CRC32)\n#include <arm_acle.h>\n#define WUFFS_BASE__CPU_ARCH__ARM_CRC32\n#endif  // defined(__ARM_FEATURE_CRC32)\n#if defined(__ARM_NEON)\n#include <arm_neon.h>\n#define WUFFS_BASE__CPU_ARCH__ARM_NEON\n#endif  // defined(__ARM_NEON)\n#endif  // defined(__ARM_FEATURE_UNALIGNED) etc\n\n// Similarly, \"cpu_arch >= x86_sse42\" requires SSE4.2 but also PCLMUL and\n// POPCNT. This is checked at runtime via cpuid, not at compile time.\n//\n// Likewise, \"cpu_arch >= x86_avx2\" also requires PCLMUL, POPCNT and SSE4.2.\n#if defined(__x86_64__) && !defined(__native_client__)\n#include <cpuid.h>\n#include <x86intrin.h>\n#define WUFFS_BASE__CPU_ARCH__X86_64\n#endif  // defined(__x86_64__) && !defined(__native_client__)\n\n#elif defined(_MSC_VER)  // (#if-chain ref AVOID_CPU_ARCH_1)\n\n#if defined(_M_X64)\n#if defined(__AVX__) || defined(__clang__)\n\n// We need <intrin.h> for the __cpuid function.\n#include <intrin.h>\n// That's not enough for" +
+	" X64 SIMD, with clang-cl, if we want to use\n// \"__attribute__((target(arg)))\" without e.g. \"/arch:AVX\".\n//\n// Some web pages suggest that <immintrin.h> is all you need, as it pulls in\n// the earlier SIMD families like SSE4.2, but that doesn't seem to work in\n// practice, possibly for the same reason that just <intrin.h> doesn't work.\n#include <immintrin.h>  // AVX, AVX2, FMA, POPCNT\n#include <nmmintrin.h>  // SSE4.2\n#include <wmmintrin.h>  // AES, PCLMUL\n#define WUFFS_BASE__CPU_ARCH__X86_64\n\n#else  // defined(__AVX__) || defined(__clang__)\n\n// clang-cl (which defines both __clang__ and _MSC_VER) supports\n// \"__attribute__((target(arg)))\".\n//\n// For MSVC's cl.exe (unlike clang or gcc), SIMD capability is a compile-time\n// property of the source file (e.g. a /arch:AVX or -mavx compiler flag), not\n// of individual functions (that can be conditionally selected at runtime).\n#pragma message(\"Wuffs with MSVC+X64 needs /arch:AVX for best performance\")\n\n#endif  // defined(__AVX__) || defined(__clang__)\n#endif  // defi" +
+	"ned(_M_X64)\n\n#endif  // (#if-chain ref AVOID_CPU_ARCH_1)\n#endif  // (#if-chain ref AVOID_CPU_ARCH_0)\n\n" +
 	"" +
 	"// --------\n\n// Define WUFFS_CONFIG__STATIC_FUNCTIONS to make all of Wuffs' functions have\n// static storage. The motivation is discussed in the \"ALLOW STATIC\n// IMPLEMENTATION\" section of\n// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt\n#if defined(WUFFS_CONFIG__STATIC_FUNCTIONS)\n#define WUFFS_BASE__MAYBE_STATIC static\n#else\n#define WUFFS_BASE__MAYBE_STATIC\n#endif  // defined(WUFFS_CONFIG__STATIC_FUNCTIONS)\n\n" +
 	"" +
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 02ce6b7..6bbfa58 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -99,8 +99,8 @@
 
 // To simplify Wuffs code, "cpu_arch >= arm_xxx" requires xxx but also
 // unaligned little-endian load/stores.
-#if defined(__ARM_FEATURE_UNALIGNED) && defined(__BYTE_ORDER__) && \
-    (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#if defined(__ARM_FEATURE_UNALIGNED) && !defined(__native_client__) && \
+    defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
 // Not all gcc versions define __ARM_ACLE, even if they support crc32
 // intrinsics. Look for __ARM_FEATURE_CRC32 instead.
 #if defined(__ARM_FEATURE_CRC32)
@@ -117,11 +117,11 @@
 // POPCNT. This is checked at runtime via cpuid, not at compile time.
 //
 // Likewise, "cpu_arch >= x86_avx2" also requires PCLMUL, POPCNT and SSE4.2.
-#if defined(__x86_64__)
+#if defined(__x86_64__) && !defined(__native_client__)
 #include <cpuid.h>
 #include <x86intrin.h>
 #define WUFFS_BASE__CPU_ARCH__X86_64
-#endif  // defined(__x86_64__)
+#endif  // defined(__x86_64__) && !defined(__native_client__)
 
 #elif defined(_MSC_VER)  // (#if-chain ref AVOID_CPU_ARCH_1)