Migrate HSW/SKX transform code into separate translation units.

This should be safe to land after http://review.skia.org/780000
is submitted.

Bug: b/310927123
Change-Id: I16f026a45b10cd79388dc2ca1ea7ffa985773d58
Reviewed-on: https://skia-review.googlesource.com/c/skcms/+/780102
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/skcms.cc b/skcms.cc
index 7bab43d..2d85bd7 100644
--- a/skcms.cc
+++ b/skcms.cc
@@ -2293,68 +2293,6 @@
     return isfinitef_(*max_error);
 }
 
-// ~~~~ Impl. of skcms_Transform() ~~~~
-
-// Instantiate specialized versions of run_program().
-// TODO(b/310927123): move these into separate translation units
-#if !defined(SKCMS_DISABLE_HSW)
-    #if defined(__clang__)
-        #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
-    #elif defined(__GNUC__)
-        #pragma GCC push_options
-        #pragma GCC target("avx2,f16c")
-    #endif
-
-    namespace skcms_private {
-    namespace hsw {
-        #define USING_AVX
-        #define USING_AVX_F16C
-        #define USING_AVX2
-        #define N 8
-        template <typename T> using V = skcms_private::Vec<N,T>;
-
-        #include "src/Transform_inl.h"
-
-        // src/Transform_inl.h will undefine USING_* for us.
-        #undef N
-    }
-    }
-
-    #if defined(__clang__)
-        #pragma clang attribute pop
-    #elif defined(__GNUC__)
-        #pragma GCC pop_options
-    #endif
-#endif
-
-#if !defined(SKCMS_DISABLE_SKX)
-    #if defined(__clang__)
-        #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
-    #elif defined(__GNUC__)
-        #pragma GCC push_options
-        #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
-    #endif
-
-    namespace skcms_private {
-    namespace skx {
-        #define USING_AVX512F
-        #define N 16
-        template <typename T> using V = skcms_private::Vec<N,T>;
-
-        #include "src/Transform_inl.h"
-
-        // src/Transform_inl.h will undefine USING_* for us.
-        #undef N
-    }
-    }
-
-    #if defined(__clang__)
-        #pragma clang attribute pop
-    #elif defined(__GNUC__)
-        #pragma GCC pop_options
-    #endif
-#endif
-
 enum class CpuType { Baseline, HSW, SKX };
 
 static CpuType cpu_type() {
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index 3585f59..5b3fe49 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -1467,24 +1467,3 @@
         memcpy((char*)dst + (size_t)i*dst_bpp, tmp, (size_t)n*dst_bpp);
     }
 }
-
-// Clean up any #defines we may have set so that we can be #included again.
-#if defined(USING_AVX)
-    #undef  USING_AVX
-#endif
-#if defined(USING_AVX_F16C)
-    #undef  USING_AVX_F16C
-#endif
-#if defined(USING_AVX2)
-    #undef  USING_AVX2
-#endif
-#if defined(USING_AVX512F)
-    #undef  USING_AVX512F
-#endif
-
-#if defined(USING_NEON)
-    #undef  USING_NEON
-#endif
-#if defined(USING_NEON_F16C)
-    #undef  USING_NEON_F16C
-#endif
diff --git a/src/skcms_Transform.h b/src/skcms_Transform.h
index 3401885..97413f4 100644
--- a/src/skcms_Transform.h
+++ b/src/skcms_Transform.h
@@ -141,21 +141,21 @@
 
 namespace baseline {
 
-void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+void run_program(const Op* program, const void** contexts, ptrdiff_t programSize,
                  const char* src, char* dst, int n,
                  const size_t src_bpp, const size_t dst_bpp);
 
 }
 namespace hsw {
 
-void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+void run_program(const Op* program, const void** contexts, ptrdiff_t programSize,
                  const char* src, char* dst, int n,
                  const size_t src_bpp, const size_t dst_bpp);
 
 }
 namespace skx {
 
-void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+void run_program(const Op* program, const void** contexts, ptrdiff_t programSize,
                  const char* src, char* dst, int n,
                  const size_t src_bpp, const size_t dst_bpp);
 
diff --git a/src/skcms_TransformHsw.cc b/src/skcms_TransformHsw.cc
index cab2034..cd3673b 100644
--- a/src/skcms_TransformHsw.cc
+++ b/src/skcms_TransformHsw.cc
@@ -1,8 +1,61 @@
 /*
- * Copyright 2023 Google LLC
+ * Copyright 2018 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
 
-// This file intentionally left blank.
+#include "skcms_public.h"     // NO_G3_REWRITE
+#include "skcms_internals.h"  // NO_G3_REWRITE
+#include "skcms_Transform.h"  // NO_G3_REWRITE
+#include <assert.h>
+#include <float.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__ARM_NEON)
+    #include <arm_neon.h>
+#elif defined(__SSE__)
+    #include <immintrin.h>
+
+    #if defined(__clang__)
+        // That #include <immintrin.h> is usually enough, but Clang's headers
+        // avoid #including the whole kitchen sink when _MSC_VER is defined,
+        // because lots of programs on Windows would include that and it'd be
+        // a lot slower. But we want all those headers included, so we can use
+        // their features (after making runtime checks).
+        #include <smmintrin.h>
+        #include <avxintrin.h>
+        #include <avx2intrin.h>
+        #include <avx512fintrin.h>
+        #include <avx512dqintrin.h>
+    #endif
+#endif
+
+namespace skcms_private {
+namespace hsw {
+
+#if defined(SKCMS_DISABLE_HSW)
+
+void run_program(const Op* program, const void** contexts, ptrdiff_t programSize,
+                 const char* src, char* dst, int n,
+                 const size_t src_bpp, const size_t dst_bpp) {
+    skcms_private::baseline::run_program(program, contexts, programSize,
+                                         src, dst, n, src_bpp, dst_bpp);
+}
+
+#else
+
+#define USING_AVX
+#define USING_AVX_F16C
+#define USING_AVX2
+#define N 8
+template <typename T> using V = skcms_private::Vec<N,T>;
+
+#include "Transform_inl.h"
+
+#endif
+
+}  // namespace hsw
+}  // namespace skcms_private
diff --git a/src/skcms_TransformSkx.cc b/src/skcms_TransformSkx.cc
index cab2034..3e849dd 100644
--- a/src/skcms_TransformSkx.cc
+++ b/src/skcms_TransformSkx.cc
@@ -1,8 +1,58 @@
 /*
- * Copyright 2023 Google LLC
+ * Copyright 2018 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
 
-// This file intentionally left blank.
+#include "skcms_public.h"     // NO_G3_REWRITE
+#include "skcms_internals.h"  // NO_G3_REWRITE
+#include "skcms_Transform.h"  // NO_G3_REWRITE
+#include <assert.h>
+#include <float.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__ARM_NEON)
+    #include <arm_neon.h>
+#elif defined(__SSE__)
+    #include <immintrin.h>
+
+    #if defined(__clang__)
+        // That #include <immintrin.h> is usually enough, but Clang's headers
+        // avoid #including the whole kitchen sink when _MSC_VER is defined,
+        // because lots of programs on Windows would include that and it'd be
+        // a lot slower. But we want all those headers included, so we can use
+        // their features (after making runtime checks).
+        #include <smmintrin.h>
+        #include <avxintrin.h>
+        #include <avx2intrin.h>
+        #include <avx512fintrin.h>
+        #include <avx512dqintrin.h>
+    #endif
+#endif
+
+namespace skcms_private {
+namespace skx {
+
+#if defined(SKCMS_DISABLE_SKX)
+
+void run_program(const Op* program, const void** contexts, ptrdiff_t programSize,
+                 const char* src, char* dst, int n,
+                 const size_t src_bpp, const size_t dst_bpp) {
+    skcms_private::baseline::run_program(program, contexts, programSize,
+                                         src, dst, n, src_bpp, dst_bpp);
+}
+
+#else
+
+#define USING_AVX512F
+#define N 16
+template <typename T> using V = skcms_private::Vec<N,T>;
+#include "Transform_inl.h"
+
+#endif
+
+}  // namespace skx
+}  // namespace skcms_private