Migrate baseline transform code into a separate translation unit.

Change-Id: Ia85610ec12db4a698993824578bb2e668c10c31e
Bug: b/310927123
Reviewed-on: https://skia-review.googlesource.com/c/skcms/+/774644
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/skcms.cc b/skcms.cc
index 763589c..7bab43d 100644
--- a/skcms.cc
+++ b/skcms.cc
@@ -2295,24 +2295,8 @@
 
 // ~~~~ Impl. of skcms_Transform() ~~~~
 
-namespace baseline {
-#if defined(SKCMS_PORTABLE)
-    // Build skcms in a portable scalar configuration.
-    #define N 1
-    template <typename T> using V = T;
-#else
-    // Build skcms with basic four-line SIMD support. (SSE on Intel, or Neon on ARM.) The precise
-    // level of support--e.g. SSE2 vs SSE4.1, or Neon F16C support--is decided by the `-march`
-    // compiler setting.
-    #define N 4
-    template <typename T> using V = skcms_private::Vec<N, T>;
-#endif
-
-    #include "src/Transform_inl.h"
-    #undef N
-}
-
-// Now, instantiate any other versions of run_program() we may want for runtime detection.
+// Instantiate specialized versions of run_program().
+// TODO(b/310927123): move these into separate translation units
 #if !defined(SKCMS_DISABLE_HSW)
     #if defined(__clang__)
         #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
@@ -2321,6 +2305,7 @@
         #pragma GCC target("avx2,f16c")
     #endif
 
+    namespace skcms_private {
     namespace hsw {
         #define USING_AVX
         #define USING_AVX_F16C
@@ -2333,6 +2318,7 @@
         // src/Transform_inl.h will undefine USING_* for us.
         #undef N
     }
+    }
 
     #if defined(__clang__)
         #pragma clang attribute pop
@@ -2349,6 +2335,7 @@
         #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
     #endif
 
+    namespace skcms_private {
     namespace skx {
         #define USING_AVX512F
         #define N 16
@@ -2359,6 +2346,7 @@
         // src/Transform_inl.h will undefine USING_* for us.
         #undef N
     }
+    }
 
     #if defined(__clang__)
         #pragma clang attribute pop
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index 8098a7b..1158a63 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -1449,9 +1449,9 @@
     }
 }
 
-static void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
-                        const char* src, char* dst, int n,
-                        const size_t src_bpp, const size_t dst_bpp) {
+void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+                 const char* src, char* dst, int n,
+                 const size_t src_bpp, const size_t dst_bpp) {
     int i = 0;
     while (n >= N) {
         exec_ops(program, contexts, src, dst, i);
diff --git a/src/skcms_Transform.h b/src/skcms_Transform.h
index f618c8c..9c13e50 100644
--- a/src/skcms_Transform.h
+++ b/src/skcms_Transform.h
@@ -134,4 +134,27 @@
     template <int N, typename T> using Vec = typename VecHelper<N, T>::V;
 #endif
 
+/** Interface */
+
+namespace baseline {
+
+void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+                 const char* src, char* dst, int n,
+                 const size_t src_bpp, const size_t dst_bpp);
+
+}
+namespace hsw {
+
+void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+                 const char* src, char* dst, int n,
+                 const size_t src_bpp, const size_t dst_bpp);
+
+}
+namespace skx {
+
+void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+                 const char* src, char* dst, int n,
+                 const size_t src_bpp, const size_t dst_bpp);
+
+}
 }  // namespace skcms_private
diff --git a/src/skcms_TransformBaseline.cc b/src/skcms_TransformBaseline.cc
index cab2034..bfe1df6 100644
--- a/src/skcms_TransformBaseline.cc
+++ b/src/skcms_TransformBaseline.cc
@@ -1,8 +1,48 @@
 /*
- * Copyright 2023 Google LLC
+ * Copyright 2018 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
 
-// This file intentionally left blank.
+#include "skcms_public.h"     // NO_G3_REWRITE
+#include "skcms_internals.h"  // NO_G3_REWRITE
+#include "skcms_Transform.h"  // NO_G3_REWRITE
+#include <assert.h>
+#include <float.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__ARM_NEON)
+    #include <arm_neon.h>
+#elif defined(__SSE__)
+    #include <immintrin.h>
+
+    #if defined(__clang__)
+        // That #include <immintrin.h> is usually enough, but Clang's headers
+        // avoid #including the whole kitchen sink when _MSC_VER is defined,
+        // because lots of programs on Windows would include that and it'd be
+        // a lot slower. But we want all those headers included, so we can use
+        // their features (after making runtime checks).
+        #include <smmintrin.h>
+    #endif
+#endif
+
+namespace skcms_private {
+namespace baseline {
+
+#if defined(SKCMS_PORTABLE)
+    // Build skcms in a portable scalar configuration.
+    #define N 1
+    template <typename T> using V = T;
+#else
+    // Build skcms with basic four-line SIMD support. (SSE on Intel, or Neon on ARM)
+    #define N 4
+    template <typename T> using V = skcms_private::Vec<N,T>;
+#endif
+
+#include "Transform_inl.h"
+
+}  // namespace baseline
+}  // namespace skcms_private