Split out memset opts into separate cpp files

These used to be in SkUtils_opts. I also renamed everything so that
they're declared in SkMemset.h, and defined in SkMemset_opts.h.

Bug: b/40045064
Bug: b/40045066
Change-Id: Idefa5b8d5f59595ddce907742f50e02174e42964
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/740638
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: Kevin Lubick <kjlubick@google.com>
diff --git a/bench/MemsetBench.cpp b/bench/MemsetBench.cpp
index ad87e18..aae390a 100644
--- a/bench/MemsetBench.cpp
+++ b/bench/MemsetBench.cpp
@@ -7,7 +7,7 @@
 
 #include "bench/Benchmark.h"
 #include "include/private/base/SkTemplates.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 
 using namespace skia_private;
 
diff --git a/gn/core.gni b/gn/core.gni
index 87391cd..91f4f57 100644
--- a/gn/core.gni
+++ b/gn/core.gni
@@ -421,6 +421,10 @@
   "$_src/core/SkMatrixInvert.h",
   "$_src/core/SkMatrixPriv.h",
   "$_src/core/SkMatrixUtils.h",
+  "$_src/core/SkMemset.h",
+  "$_src/core/SkMemset_opts.cpp",
+  "$_src/core/SkMemset_opts_avx.cpp",
+  "$_src/core/SkMemset_opts_erms.cpp",
   "$_src/core/SkMessageBus.h",
   "$_src/core/SkMipmap.cpp",
   "$_src/core/SkMipmap.h",
@@ -432,7 +436,6 @@
   "$_src/core/SkOSFile.h",
   "$_src/core/SkOpts.cpp",
   "$_src/core/SkOpts.h",
-  "$_src/core/SkOpts_erms.cpp",
   "$_src/core/SkOverdrawCanvas.cpp",
   "$_src/core/SkPaint.cpp",
   "$_src/core/SkPaintDefaults.h",
@@ -604,11 +607,11 @@
   "$_src/opts/SkBitmapProcState_opts.h",
   "$_src/opts/SkBlitMask_opts.h",
   "$_src/opts/SkBlitRow_opts.h",
+  "$_src/opts/SkMemset_opts.h",
   "$_src/opts/SkOpts_RestoreTarget.h",
   "$_src/opts/SkOpts_SetTarget.h",
   "$_src/opts/SkRasterPipeline_opts.h",
   "$_src/opts/SkSwizzler_opts.h",
-  "$_src/opts/SkUtils_opts.h",
   "$_src/shaders/SkBitmapProcShader.cpp",
   "$_src/shaders/SkBitmapProcShader.h",
   "$_src/shaders/SkBlendShader.cpp",
diff --git a/public.bzl b/public.bzl
index bc808e4..cc96869 100644
--- a/public.bzl
+++ b/public.bzl
@@ -535,6 +535,10 @@
     "src/core/SkMatrixInvert.h",
     "src/core/SkMatrixPriv.h",
     "src/core/SkMatrixUtils.h",
+    "src/core/SkMemset.h",
+    "src/core/SkMemset_opts.cpp",
+    "src/core/SkMemset_opts_avx.cpp",
+    "src/core/SkMemset_opts_erms.cpp",
     "src/core/SkMesh.cpp",
     "src/core/SkMeshPriv.h",
     "src/core/SkMessageBus.h",
@@ -548,7 +552,6 @@
     "src/core/SkOSFile.h",
     "src/core/SkOpts.cpp",
     "src/core/SkOpts.h",
-    "src/core/SkOpts_erms.cpp",
     "src/core/SkOrderedReadBuffer.h",
     "src/core/SkOverdrawCanvas.cpp",
     "src/core/SkPaint.cpp",
@@ -1297,11 +1300,11 @@
     "src/opts/SkBitmapProcState_opts.h",
     "src/opts/SkBlitMask_opts.h",
     "src/opts/SkBlitRow_opts.h",
+    "src/opts/SkMemset_opts.h",
     "src/opts/SkOpts_RestoreTarget.h",
     "src/opts/SkOpts_SetTarget.h",
     "src/opts/SkRasterPipeline_opts.h",
     "src/opts/SkSwizzler_opts.h",
-    "src/opts/SkUtils_opts.h",
     "src/pathops/SkAddIntersections.cpp",
     "src/pathops/SkAddIntersections.h",
     "src/pathops/SkDConicLineIntersection.cpp",
diff --git a/src/codec/SkPngCodec.cpp b/src/codec/SkPngCodec.cpp
index 1953be4..307f276 100644
--- a/src/codec/SkPngCodec.cpp
+++ b/src/codec/SkPngCodec.cpp
@@ -26,7 +26,7 @@
 #include "src/codec/SkColorPalette.h"
 #include "src/codec/SkPngPriv.h"
 #include "src/codec/SkSwizzler.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 
 #include <csetjmp>
 #include <algorithm>
diff --git a/src/codec/SkSampler.cpp b/src/codec/SkSampler.cpp
index 3a6832c1..c8e8796 100644
--- a/src/codec/SkSampler.cpp
+++ b/src/codec/SkSampler.cpp
@@ -12,7 +12,7 @@
 #include "include/core/SkImageInfo.h"
 #include "include/private/base/SkTemplates.h"
 #include "src/codec/SkCodecPriv.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 
 #include <cstdint>
 #include <cstring>
diff --git a/src/core/BUILD.bazel b/src/core/BUILD.bazel
index d130463..ed73da8 100644
--- a/src/core/BUILD.bazel
+++ b/src/core/BUILD.bazel
@@ -194,6 +194,10 @@
     "SkMatrix.cpp",
     "SkMatrixPriv.h",
     "SkMatrixUtils.h",
+    "SkMemset.h",
+    "SkMemset_opts.cpp",
+    "SkMemset_opts_avx.cpp",
+    "SkMemset_opts_erms.cpp",
     "SkMessageBus.h",
     "SkMipmap.cpp",
     "SkMipmap.h",
@@ -205,7 +209,6 @@
     "SkOSFile.h",
     "SkOpts.cpp",
     "SkOpts.h",
-    "SkOpts_erms.cpp",
     "SkOverdrawCanvas.cpp",
     "SkPaint.cpp",
     "SkPaintDefaults.h",
diff --git a/src/core/SkAlphaRuns.cpp b/src/core/SkAlphaRuns.cpp
index aab0275..083c7f3 100644
--- a/src/core/SkAlphaRuns.cpp
+++ b/src/core/SkAlphaRuns.cpp
@@ -6,7 +6,7 @@
  */
 #include "src/core/SkAlphaRuns.h"
 
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 
 void SkAlphaRuns::reset(int width) {
     SkASSERT(width > 0);
diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp
index 00a31ff..37cf4c2 100644
--- a/src/core/SkBitmapProcState.cpp
+++ b/src/core/SkBitmapProcState.cpp
@@ -14,8 +14,8 @@
 #include "include/core/SkTileMode.h"
 #include "include/private/base/SkMacros.h"
 #include "include/private/base/SkTPin.h"
+#include "src/core/SkMemset.h"
 #include "src/core/SkMipmapAccessor.h"
-#include "src/core/SkOpts.h"
 
 #include <algorithm>
 #include <cstring>
diff --git a/src/core/SkBitmapProcState_matrixProcs.cpp b/src/core/SkBitmapProcState_matrixProcs.cpp
index 92932f3..b6f0a77 100644
--- a/src/core/SkBitmapProcState_matrixProcs.cpp
+++ b/src/core/SkBitmapProcState_matrixProcs.cpp
@@ -16,7 +16,7 @@
 #include "include/private/base/SkTPin.h"
 #include "include/private/base/SkTo.h"
 #include "src/core/SkBitmapProcState.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 
 #include <cstdint>
 #include <cstring>
diff --git a/src/core/SkBlitRow_D32.cpp b/src/core/SkBlitRow_D32.cpp
index 131018d..4e9dfec 100644
--- a/src/core/SkBlitRow_D32.cpp
+++ b/src/core/SkBlitRow_D32.cpp
@@ -11,7 +11,7 @@
 #include "include/private/SkColorData.h"
 #include "include/private/base/SkCPUTypes.h"
 #include "src/core/SkBlitRow.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 
 #include <cstring>
 #include <iterator>
diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp
index 4a84745..5dd978e 100644
--- a/src/core/SkBlitter.cpp
+++ b/src/core/SkBlitter.cpp
@@ -29,7 +29,7 @@
 #include "src/core/SkCoreBlitters.h"
 #include "src/core/SkMask.h"
 #include "src/core/SkMaskFilterBase.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "src/core/SkPaintPriv.h"
 #include "src/core/SkRegionPriv.h"
 #include "src/shaders/SkShaderBase.h"
diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp
index 7cdb27f..be411cf 100644
--- a/src/core/SkBlitter_ARGB32.cpp
+++ b/src/core/SkBlitter_ARGB32.cpp
@@ -23,7 +23,7 @@
 #include "src/core/SkBlitRow.h"
 #include "src/core/SkCoreBlitters.h"
 #include "src/core/SkMask.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "src/shaders/SkShaderBase.h"
 
 #include <cstddef>
diff --git a/src/core/SkDevice.cpp b/src/core/SkDevice.cpp
index 5ebdffe..a052ac1 100644
--- a/src/core/SkDevice.cpp
+++ b/src/core/SkDevice.cpp
@@ -24,7 +24,7 @@
 #include "src/core/SkImagePriv.h"
 #include "src/core/SkLatticeIter.h"
 #include "src/core/SkMatrixPriv.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "src/core/SkPathPriv.h"
 #include "src/core/SkRasterClip.h"
 #include "src/core/SkRectPriv.h"
diff --git a/src/core/SkGraphics.cpp b/src/core/SkGraphics.cpp
index 0211fd3..35c97b1 100644
--- a/src/core/SkGraphics.cpp
+++ b/src/core/SkGraphics.cpp
@@ -26,6 +26,7 @@
 #include "src/core/SkCpu.h"
 #include "src/core/SkGeometry.h"
 #include "src/core/SkImageFilter_Base.h"
+#include "src/core/SkMemset.h"
 #include "src/core/SkOpts.h"
 #include "src/core/SkResourceCache.h"
 #include "src/core/SkScalerContext.h"
@@ -41,6 +42,7 @@
     SkOpts::Init_BitmapProcState();
     SkOpts::Init_BlitMask();
     SkOpts::Init_BlitRow();
+    SkOpts::Init_Memset();
     SkOpts::Init_Swizzler();
 }
 
diff --git a/src/core/SkMemset.h b/src/core/SkMemset.h
new file mode 100644
index 0000000..229fd06
--- /dev/null
+++ b/src/core/SkMemset.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkMemset_DEFINED
+#define SkMemset_DEFINED
+
+#include <cstddef>
+#include <cstdint>
+
+namespace SkOpts {
+    extern void (*memset16)(uint16_t[], uint16_t, int);
+    extern void (*memset32)(uint32_t[], uint32_t, int);
+    extern void (*memset64)(uint64_t[], uint64_t, int);
+
+    extern void (*rect_memset16)(uint16_t[], uint16_t, int, size_t, int);
+    extern void (*rect_memset32)(uint32_t[], uint32_t, int, size_t, int);
+    extern void (*rect_memset64)(uint64_t[], uint64_t, int, size_t, int);
+
+    void Init_Memset();
+}  // namespace SkOpts
+
+#endif // SkMemset_DEFINED
diff --git a/src/core/SkMemset_opts.cpp b/src/core/SkMemset_opts.cpp
new file mode 100644
index 0000000..8509bcd
--- /dev/null
+++ b/src/core/SkMemset_opts.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkFeatures.h"
+#include "src/core/SkCpu.h"
+#include "src/core/SkMemset.h"
+#include "src/core/SkOpts.h"
+
+#define SK_OPTS_TARGET SK_OPTS_TARGET_DEFAULT
+#include "src/opts/SkOpts_SetTarget.h"
+
+#include "src/opts/SkMemset_opts.h"  // IWYU pragma: keep
+
+#include "src/opts/SkOpts_RestoreTarget.h"
+
+namespace SkOpts {
+    DEFINE_DEFAULT(memset16);
+    DEFINE_DEFAULT(memset32);
+    DEFINE_DEFAULT(memset64);
+
+    DEFINE_DEFAULT(rect_memset16);
+    DEFINE_DEFAULT(rect_memset32);
+    DEFINE_DEFAULT(rect_memset64);
+
+    void Init_Memset_avx();
+    void Init_Memset_erms();
+
+    static bool init() {
+    #if defined(SK_ENABLE_OPTIMIZE_SIZE)
+        // All Init_foo functions are omitted when optimizing for size
+    #elif defined(SK_CPU_X86)
+        #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_AVX
+            if (SkCpu::Supports(SkCpu::AVX)) { Init_Memset_avx(); }
+        #endif
+
+        if (SkCpu::Supports(SkCpu::ERMS)) { Init_Memset_erms(); }
+    #endif
+      return true;
+    }
+
+    void Init_Memset() {
+        [[maybe_unused]] static bool gInitialized = init();
+    }
+}  // namespace SkOpts
diff --git a/src/core/SkMemset_opts_avx.cpp b/src/core/SkMemset_opts_avx.cpp
new file mode 100644
index 0000000..87e9bd5
--- /dev/null
+++ b/src/core/SkMemset_opts_avx.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkFeatures.h"
+#include "src/core/SkMemset.h"
+#include "src/core/SkOpts.h"
+
+#if defined(SK_CPU_X86) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
+
+// The order of these includes is important:
+// 1) Select the target CPU architecture by defining SK_OPTS_TARGET and including SkOpts_SetTarget
+// 2) Include the code to compile, typically in a _opts.h file.
+// 3) Include SkOpts_RestoreTarget to switch back to the default CPU architecture
+
+#define SK_OPTS_TARGET SK_OPTS_TARGET_AVX
+#include "src/opts/SkOpts_SetTarget.h"
+
+#include "src/opts/SkMemset_opts.h"
+
+#include "src/opts/SkOpts_RestoreTarget.h"
+
+namespace SkOpts {
+    void Init_Memset_avx() {
+        memset16 = avx::memset16;
+        memset32 = avx::memset32;
+        memset64 = avx::memset64;
+
+        rect_memset16 = avx::rect_memset16;
+        rect_memset32 = avx::rect_memset32;
+        rect_memset64 = avx::rect_memset64;
+    }
+}  // namespace SkOpts
+
+#endif // SK_CPU_X86 && !SK_ENABLE_OPTIMIZE_SIZE
diff --git a/src/core/SkMemset_opts_erms.cpp b/src/core/SkMemset_opts_erms.cpp
new file mode 100644
index 0000000..763b312
--- /dev/null
+++ b/src/core/SkMemset_opts_erms.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2020 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkFeatures.h"
+#include "src/base/SkMSAN.h"
+#include "src/core/SkMemset.h"
+
+// memset16 and memset32 could work on 32-bit x86 too, but for simplicity just use this on x64
+#if (defined(__x86_64__) || defined(_M_X64)) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
+
+static const char* note = "MSAN can't see that rep sto initializes memory.";
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
+    sk_msan_mark_initialized(dst, dst + n, note);
+    __stosw(dst, v, n);
+}
+static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
+    sk_msan_mark_initialized(dst, dst + n, note);
+    static_assert(sizeof(uint32_t) == sizeof(unsigned long));
+    __stosd(reinterpret_cast<unsigned long*>(dst), v, n);
+}
+static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
+    sk_msan_mark_initialized(dst, dst + n, note);
+    __stosq(dst, v, n);
+}
+#else
+static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
+    sk_msan_mark_initialized(dst, dst + n, note);
+    asm volatile("rep stosw" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
+}
+static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
+    sk_msan_mark_initialized(dst, dst + n, note);
+    asm volatile("rep stosl" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
+}
+static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
+    sk_msan_mark_initialized(dst, dst + n, note);
+    asm volatile("rep stosq" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
+}
+#endif
+
+// ERMS is ideal for large copies but has a relatively high setup cost,
+// so we use the previous best routine for small inputs.  FSRM would make this moot.
+static void (*g_memset16_prev)(uint16_t*, uint16_t, int);
+static void (*g_memset32_prev)(uint32_t*, uint32_t, int);
+static void (*g_memset64_prev)(uint64_t*, uint64_t, int);
+static void (*g_rect_memset16_prev)(uint16_t*, uint16_t, int, size_t, int);
+static void (*g_rect_memset32_prev)(uint32_t*, uint32_t, int, size_t, int);
+static void (*g_rect_memset64_prev)(uint64_t*, uint64_t, int, size_t, int);
+
+// Empirically determined with `nanobench -m memset`.
+static bool small(size_t bytes) { return bytes < 1024; }
+
+namespace erms {
+
+static inline void memset16(uint16_t* dst, uint16_t v, int n) {
+    return small(sizeof(v) * n) ? g_memset16_prev(dst, v, n) : repsto(dst, v, n);
+}
+static inline void memset32(uint32_t* dst, uint32_t v, int n) {
+    return small(sizeof(v) * n) ? g_memset32_prev(dst, v, n) : repsto(dst, v, n);
+}
+static inline void memset64(uint64_t* dst, uint64_t v, int n) {
+    return small(sizeof(v) * n) ? g_memset64_prev(dst, v, n) : repsto(dst, v, n);
+}
+
+static inline void rect_memset16(uint16_t* dst, uint16_t v, int n, size_t rowBytes, int height) {
+    if (small(sizeof(v) * n)) {
+        return g_rect_memset16_prev(dst, v, n, rowBytes, height);
+    }
+    for (int stride = rowBytes / sizeof(v); height-- > 0; dst += stride) {
+        repsto(dst, v, n);
+    }
+}
+static inline void rect_memset32(uint32_t* dst, uint32_t v, int n, size_t rowBytes, int height) {
+    if (small(sizeof(v) * n)) {
+        return g_rect_memset32_prev(dst, v, n, rowBytes, height);
+    }
+    for (int stride = rowBytes / sizeof(v); height-- > 0; dst += stride) {
+        repsto(dst, v, n);
+    }
+}
+static inline void rect_memset64(uint64_t* dst, uint64_t v, int n, size_t rowBytes, int height) {
+    if (small(sizeof(v) * n)) {
+        return g_rect_memset64_prev(dst, v, n, rowBytes, height);
+    }
+    for (int stride = rowBytes / sizeof(v); height-- > 0; dst += stride) {
+        repsto(dst, v, n);
+    }
+}
+
+}  // namespace erms
+
+#endif // X86_64 && !SK_ENABLE_OPTIMIZE_SIZE
+
+namespace SkOpts {
+    void Init_Memset_erms() {
+        #if (defined(__x86_64__) || defined(_M_X64)) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
+            g_memset16_prev      = memset16;
+            g_memset32_prev      = memset32;
+            g_memset64_prev      = memset64;
+            g_rect_memset16_prev = rect_memset16;
+            g_rect_memset32_prev = rect_memset32;
+            g_rect_memset64_prev = rect_memset64;
+
+            memset16      = erms::memset16;
+            memset32      = erms::memset32;
+            memset64      = erms::memset64;
+            rect_memset16 = erms::rect_memset16;
+            rect_memset32 = erms::rect_memset32;
+            rect_memset64 = erms::rect_memset64;
+        #endif  // X86_64 && !SK_ENABLE_OPTIMIZE_SIZE
+    }
+}  // namespace SkOpts
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index d7bb2f9..5efa581 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -5,8 +5,7 @@
  * found in the LICENSE file.
  */
 
-#include "include/private/base/SkOnce.h"
-#include "src/base/SkHalf.h"
+#include "include/private/base/SkFeatures.h"
 #include "src/core/SkCpu.h"
 #include "src/core/SkOpts.h"
 
@@ -14,7 +13,6 @@
 #include "src/opts/SkOpts_SetTarget.h"
 
 #include "src/opts/SkRasterPipeline_opts.h"
-#include "src/opts/SkUtils_opts.h"
 
 #include "src/opts/SkOpts_RestoreTarget.h"
 
@@ -23,16 +21,6 @@
     // If our global compile options are set high enough, these defaults might even be
     // CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults.
     // They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1.
-    DEFINE_DEFAULT(memset16);
-    DEFINE_DEFAULT(memset32);
-    DEFINE_DEFAULT(memset64);
-
-    DEFINE_DEFAULT(rect_memset16);
-    DEFINE_DEFAULT(rect_memset32);
-    DEFINE_DEFAULT(rect_memset64);
-
-#undef DEFINE_DEFAULT
-
     size_t raster_pipeline_lowp_stride  = SK_OPTS_NS::raster_pipeline_lowp_stride();
     size_t raster_pipeline_highp_stride = SK_OPTS_NS::raster_pipeline_highp_stride();
 
@@ -51,28 +39,20 @@
 #undef M
 
     // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
-    void Init_avx();
     void Init_hsw();
-    void Init_erms();
 
-    static void init() {
+    static bool init() {
     #if defined(SK_ENABLE_OPTIMIZE_SIZE)
         // All Init_foo functions are omitted when optimizing for size
     #elif defined(SK_CPU_X86)
-        #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_AVX
-            if (SkCpu::Supports(SkCpu::AVX)) { Init_avx(); }
-        #endif
-
         #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_AVX2
             if (SkCpu::Supports(SkCpu::HSW)) { Init_hsw(); }
         #endif
-
-        if (SkCpu::Supports(SkCpu::ERMS)) { Init_erms(); }
     #endif
+        return true;
     }
 
     void Init() {
-        static SkOnce once;
-        once(init);
+        [[maybe_unused]] static bool gInitialized = init();
     }
 }  // namespace SkOpts
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 5b1a7db..8a1967c 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -8,7 +8,6 @@
 #ifndef SkOpts_DEFINED
 #define SkOpts_DEFINED
 
-#include "include/core/SkColor.h"
 #include "include/core/SkTypes.h"
 #include "src/core/SkRasterPipelineOpList.h"
 
@@ -57,9 +56,6 @@
  */
 
 struct SkRasterPipelineStage;
-namespace SkSL {
-class TraceHook;
-}
 
 #define SK_OPTS_TARGET_DEFAULT 0x00
 #define SK_OPTS_TARGET_SSSE3   0x01
@@ -72,14 +68,6 @@
     // Called by SkGraphics::Init().
     void Init();
 
-    extern void (*memset16)(uint16_t[], uint16_t, int);
-    extern void (*memset32)(uint32_t[], uint32_t, int);
-    extern void (*memset64)(uint64_t[], uint64_t, int);
-
-    extern void (*rect_memset16)(uint16_t[], uint16_t, int, size_t, int);
-    extern void (*rect_memset32)(uint32_t[], uint32_t, int, size_t, int);
-    extern void (*rect_memset64)(uint64_t[], uint64_t, int, size_t, int);
-
     // We can't necessarily express the type of SkRasterPipeline stage functions here,
     // so we just use this void(*)(void) as a stand-in.
     using StageFn = void(*)(void);
diff --git a/src/core/SkOpts_erms.cpp b/src/core/SkOpts_erms.cpp
deleted file mode 100644
index 4e1e096..0000000
--- a/src/core/SkOpts_erms.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "src/base/SkMSAN.h"
-#include "src/core/SkOpts.h"
-
-#if defined(__x86_64__) || defined(_M_X64)  // memset16 and memset32 could work on 32-bit x86 too.
-
-    static const char* note = "MSAN can't see that rep sto initializes memory.";
-
-    #if defined(_MSC_VER)
-        #include <intrin.h>
-        static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
-            sk_msan_mark_initialized(dst,dst+n,note);
-            __stosw(dst, v, n);
-        }
-        static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
-            sk_msan_mark_initialized(dst,dst+n,note);
-            static_assert(sizeof(uint32_t) == sizeof(unsigned long));
-            __stosd(reinterpret_cast<unsigned long*>(dst), v, n);
-        }
-        static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
-            sk_msan_mark_initialized(dst,dst+n,note);
-            __stosq(dst, v, n);
-        }
-    #else
-        static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
-            sk_msan_mark_initialized(dst,dst+n,note);
-            asm volatile("rep stosw" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
-        }
-        static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
-            sk_msan_mark_initialized(dst,dst+n,note);
-            asm volatile("rep stosl" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
-        }
-        static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
-            sk_msan_mark_initialized(dst,dst+n,note);
-            asm volatile("rep stosq" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
-        }
-    #endif
-
-    // ERMS is ideal for large copies but has a relatively high setup cost,
-    // so we use the previous best routine for small inputs.  FSRM would make this moot.
-    static void (*g_memset16_prev)(uint16_t*, uint16_t, int);
-    static void (*g_memset32_prev)(uint32_t*, uint32_t, int);
-    static void (*g_memset64_prev)(uint64_t*, uint64_t, int);
-    static void (*g_rect_memset16_prev)(uint16_t*, uint16_t, int, size_t, int);
-    static void (*g_rect_memset32_prev)(uint32_t*, uint32_t, int, size_t, int);
-    static void (*g_rect_memset64_prev)(uint64_t*, uint64_t, int, size_t, int);
-
-    // Empirically determined with `nanobench -m memset`.
-    static bool small(size_t bytes) { return bytes < 1024; }
-
-    #define SK_OPTS_NS erms
-    namespace SK_OPTS_NS {
-        static inline void memset16(uint16_t* dst, uint16_t v, int n) {
-            return small(sizeof(v)*n) ? g_memset16_prev(dst, v, n)
-                                      : repsto(dst, v, n);
-        }
-        static inline void memset32(uint32_t* dst, uint32_t v, int n) {
-            return small(sizeof(v)*n) ? g_memset32_prev(dst, v, n)
-                                      : repsto(dst, v, n);
-        }
-        static inline void memset64(uint64_t* dst, uint64_t v, int n) {
-            return small(sizeof(v)*n) ? g_memset64_prev(dst, v, n)
-                                      : repsto(dst, v, n);
-        }
-
-        static inline void rect_memset16(uint16_t* dst, uint16_t v, int n,
-                                         size_t rowBytes, int height) {
-            if (small(sizeof(v)*n)) {
-                return g_rect_memset16_prev(dst,v,n, rowBytes,height);
-            }
-            for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
-                repsto(dst, v, n);
-            }
-        }
-        static inline void rect_memset32(uint32_t* dst, uint32_t v, int n,
-                                         size_t rowBytes, int height) {
-            if (small(sizeof(v)*n)) {
-                return g_rect_memset32_prev(dst,v,n, rowBytes,height);
-            }
-            for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
-                repsto(dst, v, n);
-            }
-        }
-        static inline void rect_memset64(uint64_t* dst, uint64_t v, int n,
-                                         size_t rowBytes, int height) {
-            if (small(sizeof(v)*n)) {
-                return g_rect_memset64_prev(dst,v,n, rowBytes,height);
-            }
-            for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
-                repsto(dst, v, n);
-            }
-        }
-    }  // namespace SK_OPTS_NS
-
-    namespace SkOpts {
-        void Init_erms() {
-            g_memset16_prev      = memset16;
-            g_memset32_prev      = memset32;
-            g_memset64_prev      = memset64;
-            g_rect_memset16_prev = rect_memset16;
-            g_rect_memset32_prev = rect_memset32;
-            g_rect_memset64_prev = rect_memset64;
-
-            memset16      = SK_OPTS_NS::memset16;
-            memset32      = SK_OPTS_NS::memset32;
-            memset64      = SK_OPTS_NS::memset64;
-            rect_memset16 = SK_OPTS_NS::rect_memset16;
-            rect_memset32 = SK_OPTS_NS::rect_memset32;
-            rect_memset64 = SK_OPTS_NS::rect_memset64;
-        }
-    }
-#else
-    namespace SkOpts {
-        void Init_erms() {}
-    }
-#endif
diff --git a/src/core/SkPixmap.cpp b/src/core/SkPixmap.cpp
index 658bdfc..6fa6837 100644
--- a/src/core/SkPixmap.cpp
+++ b/src/core/SkPixmap.cpp
@@ -21,7 +21,7 @@
 #include "src/core/SkMask.h"
 #include "src/core/SkReadPixelsRec.h"
 #include "src/core/SkSwizzlePriv.h"
-#include "src/opts/SkUtils_opts.h"
+#include "src/opts/SkMemset_opts.h"
 
 #include <cstring>
 #include <iterator>
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index 13d2f7a..f244e55 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -19,7 +19,7 @@
 #include "src/core/SkColorSpaceXformSteps.h"
 #include "src/core/SkEffectPriv.h"
 #include "src/core/SkMask.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "src/core/SkRasterPipeline.h"
 #include "src/effects/colorfilters/SkColorFilterBase.h"
 #include "src/shaders/SkShaderBase.h"
diff --git a/src/core/SkVertices.cpp b/src/core/SkVertices.cpp
index 47ceaf9..b9ba0bf 100644
--- a/src/core/SkVertices.cpp
+++ b/src/core/SkVertices.cpp
@@ -11,7 +11,6 @@
 #include "include/private/base/SkTo.h"
 #include "src/base/SkSafeMath.h"
 #include "src/core/SkCanvasPriv.h"
-#include "src/core/SkOpts.h"
 #include "src/core/SkReadBuffer.h"
 #include "src/core/SkSafeRange.h"
 #include "src/core/SkVerticesPriv.h"
diff --git a/src/gpu/ganesh/GrDrawOpAtlas.cpp b/src/gpu/ganesh/GrDrawOpAtlas.cpp
index c6aeff2..6943174 100644
--- a/src/gpu/ganesh/GrDrawOpAtlas.cpp
+++ b/src/gpu/ganesh/GrDrawOpAtlas.cpp
@@ -10,7 +10,6 @@
 #include <memory>
 
 #include "include/private/base/SkTPin.h"
-#include "src/core/SkOpts.h"
 #include "src/gpu/ganesh/GrBackendUtils.h"
 #include "src/gpu/ganesh/GrCaps.h"
 #include "src/gpu/ganesh/GrOnFlushResourceProvider.h"
diff --git a/src/gpu/ganesh/GrResourceCache.cpp b/src/gpu/ganesh/GrResourceCache.cpp
index 184b963..f67058c 100644
--- a/src/gpu/ganesh/GrResourceCache.cpp
+++ b/src/gpu/ganesh/GrResourceCache.cpp
@@ -15,7 +15,6 @@
 #include "src/base/SkScopeExit.h"
 #include "src/base/SkTSort.h"
 #include "src/core/SkMessageBus.h"
-#include "src/core/SkOpts.h"
 #include "src/gpu/ganesh/GrCaps.h"
 #include "src/gpu/ganesh/GrDirectContextPriv.h"
 #include "src/gpu/ganesh/GrGpuResourceCacheAccess.h"
diff --git a/src/gpu/ganesh/vk/GrVkPipelineStateCache.cpp b/src/gpu/ganesh/vk/GrVkPipelineStateCache.cpp
index 1205f7e..2b31b62 100644
--- a/src/gpu/ganesh/vk/GrVkPipelineStateCache.cpp
+++ b/src/gpu/ganesh/vk/GrVkPipelineStateCache.cpp
@@ -7,7 +7,6 @@
 
 #include "include/gpu/GrContextOptions.h"
 #include "include/gpu/GrDirectContext.h"
-#include "src/core/SkOpts.h"
 #include "src/gpu/ganesh/GrDirectContextPriv.h"
 #include "src/gpu/ganesh/GrFragmentProcessor.h"
 #include "src/gpu/ganesh/GrProcessor.h"
diff --git a/src/gpu/graphite/DrawAtlas.cpp b/src/gpu/graphite/DrawAtlas.cpp
index 6750967..93bcd4a 100644
--- a/src/gpu/graphite/DrawAtlas.cpp
+++ b/src/gpu/graphite/DrawAtlas.cpp
@@ -15,7 +15,6 @@
 #include "include/private/base/SkTPin.h"
 
 #include "src/base/SkMathPriv.h"
-#include "src/core/SkOpts.h"
 #include "src/core/SkTraceEvent.h"
 #include "src/gpu/AtlasTypes.h"
 #include "src/gpu/graphite/Caps.h"
diff --git a/src/opts/BUILD.bazel b/src/opts/BUILD.bazel
index 4e4eb71..7add537 100644
--- a/src/opts/BUILD.bazel
+++ b/src/opts/BUILD.bazel
@@ -34,11 +34,11 @@
         "SkBitmapProcState_opts.h",
         "SkBlitMask_opts.h",
         "SkBlitRow_opts.h",
+        "SkMemset_opts.h",
         "SkOpts_RestoreTarget.h",
         "SkOpts_SetTarget.h",
         "SkRasterPipeline_opts.h",
         "SkSwizzler_opts.h",
-        "SkUtils_opts.h",
     ],
     visibility = ["//src:__pkg__"],
 )
diff --git a/src/opts/SkUtils_opts.h b/src/opts/SkMemset_opts.h
similarity index 100%
rename from src/opts/SkUtils_opts.h
rename to src/opts/SkMemset_opts.h
diff --git a/src/opts/SkOpts_avx.cpp b/src/opts/SkOpts_avx.cpp
index bceb3e1..bdc172e 100644
--- a/src/opts/SkOpts_avx.cpp
+++ b/src/opts/SkOpts_avx.cpp
@@ -5,23 +5,4 @@
  * found in the LICENSE file.
  */
 
-#include "src/core/SkOpts.h"
-
-#if !defined(SK_ENABLE_OPTIMIZE_SIZE)
-
-#define SK_OPTS_NS avx
-#include "src/opts/SkUtils_opts.h"
-
-namespace SkOpts {
-    void Init_avx() {
-        memset16 = SK_OPTS_NS::memset16;
-        memset32 = SK_OPTS_NS::memset32;
-        memset64 = SK_OPTS_NS::memset64;
-
-        rect_memset16 = SK_OPTS_NS::rect_memset16;
-        rect_memset32 = SK_OPTS_NS::rect_memset32;
-        rect_memset64 = SK_OPTS_NS::rect_memset64;
-    }
-}  // namespace SkOpts
-
-#endif // SK_ENABLE_OPTIMIZE_SIZE
+// This file intentionally blank until references are removed from client projects
diff --git a/src/pdf/SkPDFGraphicState.h b/src/pdf/SkPDFGraphicState.h
index 4eda9ba..bad471e 100644
--- a/src/pdf/SkPDFGraphicState.h
+++ b/src/pdf/SkPDFGraphicState.h
@@ -10,7 +10,6 @@
 #define SkPDFGraphicState_DEFINED
 
 #include "include/private/base/SkMacros.h"
-#include "src/core/SkOpts.h"
 #include "src/pdf/SkPDFTypes.h"
 
 class SkPaint;
diff --git a/src/ports/SkScalerContext_mac_ct.cpp b/src/ports/SkScalerContext_mac_ct.cpp
index 9593ba2..ea86968 100644
--- a/src/ports/SkScalerContext_mac_ct.cpp
+++ b/src/ports/SkScalerContext_mac_ct.cpp
@@ -39,7 +39,7 @@
 #include "src/core/SkGlyph.h"
 #include "src/core/SkMask.h"
 #include "src/core/SkMaskGamma.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "src/ports/SkScalerContext_mac_ct.h"
 #include "src/ports/SkTypeface_mac_ct.h"
 #include "src/sfnt/SkOTTableTypes.h"
diff --git a/tests/ApplyGammaTest.cpp b/tests/ApplyGammaTest.cpp
index 0ab8d76..ca1224b 100644
--- a/tests/ApplyGammaTest.cpp
+++ b/tests/ApplyGammaTest.cpp
@@ -23,7 +23,7 @@
 #include "include/gpu/GrDirectContext.h"
 #include "include/gpu/ganesh/SkSurfaceGanesh.h"
 #include "include/private/base/SkTemplates.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "src/gpu/ganesh/GrCaps.h"
 #include "src/gpu/ganesh/GrDirectContextPriv.h"
 #include "src/gpu/ganesh/GrShaderCaps.h"
diff --git a/tests/BitmapCopyTest.cpp b/tests/BitmapCopyTest.cpp
index bc6ca63..dfa013b 100644
--- a/tests/BitmapCopyTest.cpp
+++ b/tests/BitmapCopyTest.cpp
@@ -17,7 +17,7 @@
 #include "include/core/SkRefCnt.h"
 #include "include/core/SkSize.h"
 #include "include/core/SkTypes.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "tests/Test.h"
 #include "tools/ToolUtils.h"
 
diff --git a/tests/CachedDecodingPixelRefTest.cpp b/tests/CachedDecodingPixelRefTest.cpp
index 92d4ed2..5090aa2 100644
--- a/tests/CachedDecodingPixelRefTest.cpp
+++ b/tests/CachedDecodingPixelRefTest.cpp
@@ -16,7 +16,7 @@
 #include "include/core/SkRefCnt.h"
 #include "include/core/SkTypes.h"
 #include "include/private/SkColorData.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "tests/Test.h"
 #include "tools/ToolUtils.h"
 
diff --git a/tests/CopySurfaceTest.cpp b/tests/CopySurfaceTest.cpp
index d0f1610..4953941 100644
--- a/tests/CopySurfaceTest.cpp
+++ b/tests/CopySurfaceTest.cpp
@@ -16,7 +16,7 @@
 #include "include/gpu/GrDirectContext.h"
 #include "include/gpu/GrTypes.h"
 #include "include/private/base/SkTemplates.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "src/gpu/ganesh/GrDirectContextPriv.h"
 #include "src/gpu/ganesh/GrImageInfo.h"
 #include "src/gpu/ganesh/GrPixmap.h"
diff --git a/tests/ImageTest.cpp b/tests/ImageTest.cpp
index 4edc46c..a855d726 100644
--- a/tests/ImageTest.cpp
+++ b/tests/ImageTest.cpp
@@ -54,7 +54,7 @@
 #include "src/core/SkBitmapCache.h"
 #include "src/core/SkColorSpacePriv.h"
 #include "src/core/SkImagePriv.h"
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "src/gpu/ResourceKey.h"
 #include "src/gpu/ganesh/GrCaps.h"
 #include "src/gpu/ganesh/GrDirectContextPriv.h"
diff --git a/tests/MemsetTest.cpp b/tests/MemsetTest.cpp
index 0802153..180d3bb 100644
--- a/tests/MemsetTest.cpp
+++ b/tests/MemsetTest.cpp
@@ -5,7 +5,7 @@
  * found in the LICENSE file.
  */
 
-#include "src/core/SkOpts.h"
+#include "src/core/SkMemset.h"
 #include "tests/Test.h"
 
 #include <cstddef>
diff --git a/tests/RasterPipelineBuilderTest.cpp b/tests/RasterPipelineBuilderTest.cpp
index aeda12e..bf5ab35 100644
--- a/tests/RasterPipelineBuilderTest.cpp
+++ b/tests/RasterPipelineBuilderTest.cpp
@@ -8,7 +8,6 @@
 #include "include/core/SkStream.h"
 #include "src/base/SkArenaAlloc.h"
 #include "src/base/SkStringView.h"
-#include "src/core/SkOpts.h"
 #include "src/core/SkRasterPipeline.h"
 #include "src/sksl/codegen/SkSLRasterPipelineBuilder.h"
 #include "src/sksl/tracing/SkSLDebugTracePriv.h"
diff --git a/tools/fiddle/fiddle_main.cpp b/tools/fiddle/fiddle_main.cpp
index 1a3ac9f..3bb8ce7 100644
--- a/tools/fiddle/fiddle_main.cpp
+++ b/tools/fiddle/fiddle_main.cpp
@@ -11,8 +11,8 @@
 #include <string>
 
 #include "src/core/SkAutoPixmapStorage.h"
+#include "src/core/SkMemset.h"
 #include "src/core/SkMipmap.h"
-#include "src/core/SkOpts.h"
 #include "tools/flags/CommandLineFlags.h"
 
 #include "tools/fiddle/fiddle_main.h"