Migrate baseline transform code into a separate translation unit.
Change-Id: Ia85610ec12db4a698993824578bb2e668c10c31e
Bug: b/310927123
Reviewed-on: https://skia-review.googlesource.com/c/skcms/+/774644
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/skcms.cc b/skcms.cc
index 763589c..7bab43d 100644
--- a/skcms.cc
+++ b/skcms.cc
@@ -2295,24 +2295,8 @@
// ~~~~ Impl. of skcms_Transform() ~~~~
-namespace baseline {
-#if defined(SKCMS_PORTABLE)
- // Build skcms in a portable scalar configuration.
- #define N 1
- template <typename T> using V = T;
-#else
- // Build skcms with basic four-line SIMD support. (SSE on Intel, or Neon on ARM.) The precise
- // level of support--e.g. SSE2 vs SSE4.1, or Neon F16C support--is decided by the `-march`
- // compiler setting.
- #define N 4
- template <typename T> using V = skcms_private::Vec<N, T>;
-#endif
-
- #include "src/Transform_inl.h"
- #undef N
-}
-
-// Now, instantiate any other versions of run_program() we may want for runtime detection.
+// Instantiate specialized versions of run_program().
+// TODO(b/310927123): move these into separate translation units
#if !defined(SKCMS_DISABLE_HSW)
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
@@ -2321,6 +2305,7 @@
#pragma GCC target("avx2,f16c")
#endif
+ namespace skcms_private {
namespace hsw {
#define USING_AVX
#define USING_AVX_F16C
@@ -2333,6 +2318,7 @@
// src/Transform_inl.h will undefine USING_* for us.
#undef N
}
+ }
#if defined(__clang__)
#pragma clang attribute pop
@@ -2349,6 +2335,7 @@
#pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
#endif
+ namespace skcms_private {
namespace skx {
#define USING_AVX512F
#define N 16
@@ -2359,6 +2346,7 @@
// src/Transform_inl.h will undefine USING_* for us.
#undef N
}
+ }
#if defined(__clang__)
#pragma clang attribute pop
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index 8098a7b..1158a63 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -1449,9 +1449,9 @@
}
}
-static void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
- const char* src, char* dst, int n,
- const size_t src_bpp, const size_t dst_bpp) {
+void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+ const char* src, char* dst, int n,
+ const size_t src_bpp, const size_t dst_bpp) {
int i = 0;
while (n >= N) {
exec_ops(program, contexts, src, dst, i);
diff --git a/src/skcms_Transform.h b/src/skcms_Transform.h
index f618c8c..9c13e50 100644
--- a/src/skcms_Transform.h
+++ b/src/skcms_Transform.h
@@ -134,4 +134,27 @@
template <int N, typename T> using Vec = typename VecHelper<N, T>::V;
#endif
+/** Interface */
+
+namespace baseline {
+
+void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+ const char* src, char* dst, int n,
+ const size_t src_bpp, const size_t dst_bpp);
+
+}
+namespace hsw {
+
+void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+ const char* src, char* dst, int n,
+ const size_t src_bpp, const size_t dst_bpp);
+
+}
+namespace skx {
+
+void run_program(const Op* program, const void** contexts, ptrdiff_t /*programSize*/,
+ const char* src, char* dst, int n,
+ const size_t src_bpp, const size_t dst_bpp);
+
+}
} // namespace skcms_private
diff --git a/src/skcms_TransformBaseline.cc b/src/skcms_TransformBaseline.cc
index cab2034..bfe1df6 100644
--- a/src/skcms_TransformBaseline.cc
+++ b/src/skcms_TransformBaseline.cc
@@ -1,8 +1,48 @@
/*
- * Copyright 2023 Google LLC
+ * Copyright 2018 Google LLC
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
-// This file intentionally left blank.
+#include "skcms_public.h" // NO_G3_REWRITE
+#include "skcms_internals.h" // NO_G3_REWRITE
+#include "skcms_Transform.h" // NO_G3_REWRITE
+#include <assert.h>
+#include <float.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__ARM_NEON)
+ #include <arm_neon.h>
+#elif defined(__SSE__)
+ #include <immintrin.h>
+
+ #if defined(__clang__)
+ // That #include <immintrin.h> is usually enough, but Clang's headers
+ // avoid #including the whole kitchen sink when _MSC_VER is defined,
+ // because lots of programs on Windows would include that and it'd be
+ // a lot slower. But we want all those headers included, so we can use
+ // their features (after making runtime checks).
+ #include <smmintrin.h>
+ #endif
+#endif
+
+namespace skcms_private {
+namespace baseline {
+
+#if defined(SKCMS_PORTABLE)
+ // Build skcms in a portable scalar configuration.
+ #define N 1
+ template <typename T> using V = T;
+#else
+ // Build skcms with basic four-line SIMD support. (SSE on Intel, or Neon on ARM)
+ #define N 4
+ template <typename T> using V = skcms_private::Vec<N,T>;
+#endif
+
+#include "Transform_inl.h"
+
+} // namespace baseline
+} // namespace skcms_private