Fix feature detection for AVX512

Our AVX512 code was all gated behind checks for AVX512F, but also used a
couple intrinsics that are part of AVX512DQ. If someone built with
-mavx512f (ie certain Xeon chips before Skylake X), you would get
compile failures.

Now the checks also verify that __AVX512DQ__ is available. Note that the
runtime detection code was already testing for all of (F,DQ,CD,BW,VL),
so the skx:: version of the code will still only be used when it's safe
to do so.

Change-Id: Id3ebbf506c10eab469ee587f62a963a7272af04a
Reviewed-on: https://skia-review.googlesource.com/c/skcms/+/570101
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/skcms.cc b/skcms.cc
index 6eaa9c7..7d3efa5 100644
--- a/skcms.cc
+++ b/skcms.cc
@@ -2339,7 +2339,7 @@
     #define N 1
     template <typename T> using V = T;
     using Color = float;
-#elif defined(__AVX512F__)
+#elif defined(__AVX512F__) && defined(__AVX512DQ__)
     #define N 16
     template <typename T> using V = Vec<N,T>;
     using Color = float;
@@ -2399,7 +2399,7 @@
         #define TEST_FOR_HSW
     #endif
 
-    #if !defined(__AVX512F__)
+    #if !defined(__AVX512F__) || !defined(__AVX512DQ__)
         #if defined(__clang__)
             #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
         #elif defined(__GNUC__)
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index 348a765..7b8aa8a 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -43,7 +43,7 @@
 #if !defined(USING_AVX2)     && defined(USING_AVX) && defined(__AVX2__)
     #define  USING_AVX2
 #endif
-#if !defined(USING_AVX512F)  && N == 16 && defined(__AVX512F__)
+#if !defined(USING_AVX512F)  && N == 16 && defined(__AVX512F__) && defined(__AVX512DQ__)
     #define  USING_AVX512F
 #endif