Neon: Finalize intrinsics implementation

- Remove gas-preprocessor.pl.  None of the compilers that can build the
  new intrinsics implementation require gas-preprocessor.pl (tested
  with Xcode and with Clang 3.9+ for Linux.)
- Document that Xcode 6.3.x or later is now required for iOS builds
  (older versions of Xcode do not have a full set of Neon intrinsics.)
- Add a change log entry.
- Do not enable the ASM CMake language unless NEON_INTRINSICS is false.
- Add a Clang/Arm64 test to .travis.yml in order to test the new
  intrinsics implementation.

Closes #455
diff --git a/.travis.yml b/.travis.yml
index 0070732..6038e7d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -61,6 +61,11 @@
       arch: arm64
       env:
         CTEST_OUTPUT_ON_FAILURE=1
+    - os: linux
+      compiler: clang
+      arch: arm64
+      env:
+        CTEST_OUTPUT_ON_FAILURE=1
 
 before_install:
   - if [ "$BUILD_X32" = "1" ]; then
@@ -72,7 +77,6 @@
       . ./macports-ci install &&
       sudo /opt/local/bin/port -N install yasm md5sha1sum &&
       popd &&
-      git clone --depth=1 https://github.com/libjpeg-turbo/gas-preprocessor.git ~/src/gas-preprocessor &&
       ln -fs /Applications/Xcode.app /Applications/Xcode83.app;
     fi
   - if [ "${BUILD_OFFICIAL:-}" != "" ]; then
@@ -100,7 +104,7 @@
         sudo chown -R travis:travis ~/src/ljt.nightly &&
         mv ~/src/ljt.nightly/latest/log-$TRAVIS_OS_NAME.txt ~/src/ljt.nightly/latest/files/;
       else
-        PATH=$PATH:~/src/gas-preprocessor ~/src/buildscripts/buildljt -d $TRAVIS_BUILD_DIR -v &&
+        ~/src/buildscripts/buildljt -d $TRAVIS_BUILD_DIR -v &&
         mv ~/src/ljt.nightly/latest/log-$TRAVIS_OS_NAME.txt ~/src/ljt.nightly/latest/files/;
       fi
     fi
diff --git a/BUILDING.md b/BUILDING.md
index ec579e4..8a19f01 100644
--- a/BUILDING.md
+++ b/BUILDING.md
@@ -400,16 +400,9 @@
 for these platforms.
 
 
-### Additional build requirements
-
-- For configurations that require [gas-preprocessor.pl]
-  (https://raw.githubusercontent.com/libjpeg-turbo/gas-preprocessor/master/gas-preprocessor.pl),
-  it should be installed in your `PATH`.
-
-
 ### Armv8 (64-bit)
 
-**gas-preprocessor.pl required if using Xcode < 6**
+**Xcode 6.3.x or later required**
 
 The following script demonstrates how to build libjpeg-turbo to run on the
 iPhone 5S/iPad Mini 2/iPad Air and newer.
diff --git a/ChangeLog.md b/ChangeLog.md
index 2b89d83..8fb57a9 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -108,6 +108,28 @@
 jpeg-9d, as well as the ability to expand the image size using the `-crop`
 option.  Refer to jpegtran.1 or usage.txt for more details.
 
+13. Added a complete intrinsics implementation of the Arm Neon SIMD extensions,
+thus providing SIMD acceleration on Arm platforms for all of the algorithms
+that are SIMD-accelerated on x86 platforms.  This new implementation is
+significantly faster in some cases than the old GAS implementation--
+depending on the algorithms used, the type of CPU core, and the compiler.  GCC,
+as of this writing, does not provide a full or optimal set of Neon intrinsics,
+so for performance reasons, the default when building libjpeg-turbo with GCC is
+to continue using the GAS implementation of the following algorithms:
+
+     - 32-bit RGB-to-YCbCr color conversion
+     - 32-bit fast and accurate inverse DCT
+     - 64-bit RGB-to-YCbCr and YCbCr-to-RGB color conversion
+     - 64-bit accurate forward and inverse DCT
+     - 64-bit Huffman encoding
+
+    A new CMake variable (`NEON_INTRINSICS`) can be used to override this
+default.
+
+    Since the new intrinsics implementation includes SIMD acceleration
+for merged upsampling/color conversion, 1.5.1[5] is no longer necessary and has
+been reverted.
+
 
 2.0.6
 =====
diff --git a/simd/CMakeLists.txt b/simd/CMakeLists.txt
index 234e578..3636e6f 100644
--- a/simd/CMakeLists.txt
+++ b/simd/CMakeLists.txt
@@ -213,58 +213,6 @@
 
 elseif(CPU_TYPE STREQUAL "arm64" OR CPU_TYPE STREQUAL "arm")
 
-enable_language(ASM)
-
-set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_ASM_FLAGS}")
-
-string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
-set(EFFECTIVE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CMAKE_ASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}")
-message(STATUS "CMAKE_ASM_FLAGS = ${EFFECTIVE_ASM_FLAGS}")
-
-# Test whether we need gas-preprocessor.pl
-if(CPU_TYPE STREQUAL "arm")
-  file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/gastest.S "
-    .text
-    .fpu neon
-    .arch armv7a
-    .object_arch armv4
-    .arm
-    pld [r0]
-    vmovn.u16 d0, q0")
-else()
-  file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/gastest.S "
-    .text
-    MYVAR .req x0
-    movi v0.16b, #100
-    mov MYVAR, #100
-    .unreq MYVAR")
-endif()
-
-separate_arguments(CMAKE_ASM_FLAGS_SEP UNIX_COMMAND "${CMAKE_ASM_FLAGS}")
-
-execute_process(COMMAND ${CMAKE_ASM_COMPILER} ${CMAKE_ASM_FLAGS_SEP}
-    -x assembler-with-cpp -c ${CMAKE_CURRENT_BINARY_DIR}/gastest.S
-  RESULT_VARIABLE RESULT OUTPUT_VARIABLE OUTPUT ERROR_VARIABLE ERROR)
-if(NOT RESULT EQUAL 0)
-  message(STATUS "GAS appears to be broken.  Trying gas-preprocessor.pl ...")
-  execute_process(COMMAND gas-preprocessor.pl ${CMAKE_ASM_COMPILER}
-      ${CMAKE_ASM_FLAGS_SEP} -x assembler-with-cpp -c
-      ${CMAKE_CURRENT_BINARY_DIR}/gastest.S
-    RESULT_VARIABLE RESULT OUTPUT_VARIABLE OUTPUT ERROR_VARIABLE ERROR)
-  if(NOT RESULT EQUAL 0)
-    simd_fail("SIMD extensions disabled: GAS is not working properly")
-    return()
-  else()
-    message(STATUS "Using gas-preprocessor.pl")
-    configure_file(gas-preprocessor.in gas-preprocessor @ONLY)
-    set(CMAKE_ASM_COMPILER ${CMAKE_CURRENT_BINARY_DIR}/gas-preprocessor)
-  endif()
-else()
-  message(STATUS "GAS is working properly")
-endif()
-
-file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/gastest.S)
-
 set(SIMD_SOURCES arm/jcgray-neon.c arm/jcphuff-neon.c arm/jcsample-neon.c
   arm/jdmerge-neon.c arm/jdsample-neon.c arm/jfdctfst-neon.c
   arm/jidctred-neon.c arm/jquanti-neon.c)
@@ -282,6 +230,14 @@
   set_source_files_properties(${SIMD_SOURCES} COMPILE_FLAGS -mfpu=neon)
 endif()
 if(NOT NEON_INTRINSICS)
+  enable_language(ASM)
+
+  set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_ASM_FLAGS}")
+
+  string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
+  set(EFFECTIVE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CMAKE_ASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}")
+  message(STATUS "CMAKE_ASM_FLAGS = ${EFFECTIVE_ASM_FLAGS}")
+
   set(SIMD_SOURCES ${SIMD_SOURCES} arm/aarch${BITS}/jsimd_neon.S)
 endif()
 
diff --git a/simd/gas-preprocessor.in b/simd/gas-preprocessor.in
deleted file mode 100755
index 560f788..0000000
--- a/simd/gas-preprocessor.in
+++ /dev/null
@@ -1 +0,0 @@
-gas-preprocessor.pl @CMAKE_ASM_COMPILER@ ${1+"$@"}