Build: Set FLOATTEST more intelligently
The "32bit" vs. "64bit" floating point test results actually have
nothing to do with the FPU. That was a fallacious assumption based on
the observation that, with multiple CPU types, 32-bit and 64-bit builds
produce different floating point test results. It seems that this is,
in fact, due to differing compiler behavior-- more specifically, whether
fused multiply-add (FMA) instructions are used to combine multiple
floating point operations into a single instruction ("floating point
expression contraction".) GCC does this by default if the target
supports FMA instructions, which PowerPC and AArch64 targets both do.
Fixes #468
diff --git a/.travis.yml b/.travis.yml
index dfddbd6..b5aeebd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -51,6 +51,11 @@
CFLAGS_RELWITHDEBINFO="-O3 -g -fsanitize=memory -fPIE"
CMAKE_FLAGS="-DWITH_SIMD=0"
CTEST_OUTPUT_ON_FAILURE=1
+ - os: linux
+ compiler: clang
+ arch: arm64
+ env:
+ CTEST_OUTPUT_ON_FAILURE=1
before_install:
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then
@@ -106,7 +111,7 @@
! "${CMAKE_FLAGS[0]}" =~ "WITH_SIMD" &&
"$TRAVIS_CPU_ARCH" = "amd64" ]]; then
JSIMD_FORCESSE2=1 make test &&
- cmake -DFLOATTEST=32bit .. &&
+ cmake -DFLOATTEST=no-fp-contract .. &&
JSIMD_FORCENONE=1 make test;
fi &&
popd;
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6ed7b5..a832909 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -709,10 +709,11 @@
set(MD5_JPEG_3x2_FLOAT_PROG_SSE a8c17daf77b457725ec929e215b603f8)
set(MD5_PPM_3x2_FLOAT_SSE 42876ab9e5c2f76a87d08db5fbd57956)
- set(MD5_JPEG_3x2_FLOAT_PROG_32BIT a8c17daf77b457725ec929e215b603f8)
- set(MD5_PPM_3x2_FLOAT_32BIT ${MD5_PPM_3x2_FLOAT_SSE})
- set(MD5_JPEG_3x2_FLOAT_PROG_64BIT ${MD5_JPEG_3x2_FLOAT_PROG_32BIT})
- set(MD5_PPM_3x2_FLOAT_64BIT ${MD5_PPM_3x2_FLOAT_SSE})
+ set(MD5_JPEG_3x2_FLOAT_PROG_NO_FP_CONTRACT a8c17daf77b457725ec929e215b603f8)
+ set(MD5_PPM_3x2_FLOAT_NO_FP_CONTRACT ${MD5_PPM_3x2_FLOAT_SSE})
+ set(MD5_JPEG_3x2_FLOAT_PROG_FP_CONTRACT
+ ${MD5_JPEG_3x2_FLOAT_PROG_NO_FP_CONTRACT})
+ set(MD5_PPM_3x2_FLOAT_FP_CONTRACT ${MD5_PPM_3x2_FLOAT_SSE})
set(MD5_JPEG_3x2_FLOAT_PROG_387 bc6dbbefac2872f6b9d6c4a0ae60c3c0)
set(MD5_PPM_3x2_FLOAT_387 bcc5723c61560463ac60f772e742d092)
set(MD5_JPEG_3x2_FLOAT_PROG_MSVC e27840755870fa849872e58aa0cd1400)
@@ -761,10 +762,11 @@
set(MD5_JPEG_3x2_FLOAT_PROG_SSE 343e3f8caf8af5986ebaf0bdc13b5c71)
set(MD5_PPM_3x2_FLOAT_SSE 1a75f36e5904d6fc3a85a43da9ad89bb)
- set(MD5_JPEG_3x2_FLOAT_PROG_32BIT 9bca803d2042bd1eb03819e2bf92b3e5)
- set(MD5_PPM_3x2_FLOAT_32BIT f6bfab038438ed8f5522fbd33595dcdc)
- set(MD5_JPEG_3x2_FLOAT_PROG_64BIT ${MD5_JPEG_3x2_FLOAT_PROG_32BIT})
- set(MD5_PPM_3x2_FLOAT_64BIT 0e917a34193ef976b679a6b069b1be26)
+ set(MD5_JPEG_3x2_FLOAT_PROG_NO_FP_CONTRACT 9bca803d2042bd1eb03819e2bf92b3e5)
+ set(MD5_PPM_3x2_FLOAT_NO_FP_CONTRACT f6bfab038438ed8f5522fbd33595dcdc)
+ set(MD5_JPEG_3x2_FLOAT_PROG_FP_CONTRACT
+ ${MD5_JPEG_3x2_FLOAT_PROG_NO_FP_CONTRACT})
+ set(MD5_PPM_3x2_FLOAT_FP_CONTRACT 0e917a34193ef976b679a6b069b1be26)
set(MD5_JPEG_3x2_FLOAT_PROG_387 1657664a410e0822c924b54f6f65e6e9)
set(MD5_PPM_3x2_FLOAT_387 cb0a1f027f3d2917c902b5640214e025)
set(MD5_JPEG_3x2_FLOAT_PROG_MSVC 7999ce9cd0ee9b6c7043b7351ab7639d)
@@ -874,11 +876,16 @@
#
# sse = validate against the expected results from the libjpeg-turbo SSE SIMD
# extensions
-# 32bit = validate against the expected results from the C code when running on
-# a 32-bit FPU (or when SSE is being used for floating point math,
-# which is generally the default with x86-64 compilers)
-# 64bit = validate against the expected results from the C code when running
-# on a 64-bit FPU
+# no-fp-contract = validate against the expected results from the C code when
+# floating point expression contraction is disabled (the
+# default with Clang, with GCC when building for platforms
+# that lack fused multiply-add [FMA] instructions, or when
+# passing -ffp-contract=off to the compiler)
+# fp-contract = validate against the expected results from the C code when
+# floating point expression contraction is enabled (the default
+# with GCC when building for platforms that have fused multiply-
+# add [FMA] instructions or when passing -ffp-contract=fast to
+# the compiler)
# 387 = validate against the expected results from the C code when the 387 FPU
# is being used for floating point math (which is generally the default
# with x86 compilers)
@@ -889,15 +896,20 @@
if(WITH_SIMD)
set(DEFAULT_FLOATTEST sse)
elseif(CPU_TYPE STREQUAL "x86_64")
- set(DEFAULT_FLOATTEST 32bit)
+ set(DEFAULT_FLOATTEST no-fp-contract)
elseif(CPU_TYPE STREQUAL "i386" AND MSVC)
set(DEFAULT_FLOATTEST msvc)
+ # else we can't really set an intelligent default for i386. The appropriate
+ # value could be 387, no-fp-contract, or fp-contract, depending on the
+ # compiler and compiler options. We leave it to the user to set FLOATTEST
+ # manually.
endif()
else()
- if(BITS EQUAL 64)
- set(DEFAULT_FLOATTEST 64bit)
- elseif(BITS EQUAL 32)
- set(DEFAULT_FLOATTEST 32bit)
+ if((CPU_TYPE STREQUAL "powerpc" OR CPU_TYPE STREQUAL "arm64") AND
+ NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
+ set(DEFAULT_FLOATTEST fp-contract)
+ else()
+ set(DEFAULT_FLOATTEST no-fp-contract)
endif()
endif()
@@ -908,15 +920,17 @@
endif()
set(WITH_SIMD_INT ${WITH_SIMD} CACHE INTERNAL "")
set(FLOATTEST ${DEFAULT_FLOATTEST} CACHE STRING
- "The type of floating point math used by the floating point DCT/IDCT algorithms. This tells the testing system which numerical results it should expect from those tests. [sse = libjpeg-turbo x86/x86-64 SIMD extensions, 32bit = generic 32-bit FPU or SSE, 64bit = generic 64-bit FPU, 387 = 387 FPU, msvc = 32-bit Visual Studio] (default = ${DEFAULT_FLOATTEST})"
+ "The type of floating point math used by the floating point DCT/IDCT algorithms. This tells the testing system which numerical results it should expect from those tests. [sse = libjpeg-turbo x86/x86-64 SIMD extensions, no-fp-contract = generic FPU with floating point expression contraction disabled, fp-contract = generic FPU with floating point expression contraction enabled, 387 = 387 FPU, msvc = 32-bit Visual Studio] (default = ${DEFAULT_FLOATTEST})"
${FORCE_FLOATTEST})
message(STATUS "FLOATTEST = ${FLOATTEST}")
if(FLOATTEST)
string(TOUPPER ${FLOATTEST} FLOATTEST_UC)
+ string(REGEX REPLACE "-" "_" FLOATTEST_UC ${FLOATTEST_UC})
string(TOLOWER ${FLOATTEST} FLOATTEST)
- if(NOT FLOATTEST STREQUAL "sse" AND NOT FLOATTEST STREQUAL "32bit" AND
- NOT FLOATTEST STREQUAL "64bit" AND NOT FLOATTEST STREQUAL "387" AND
+ if(NOT FLOATTEST STREQUAL "sse" AND
+ NOT FLOATTEST STREQUAL "no-fp-contract" AND
+ NOT FLOATTEST STREQUAL "fp-contract" AND NOT FLOATTEST STREQUAL "387" AND
NOT FLOATTEST STREQUAL "msvc")
message(FATAL_ERROR "\"${FLOATTEST}\" is not a valid value for FLOATTEST.")
endif()