blob: 23caae9d79eae9fec945f3278285195be75c3853 [file] [log] [blame]
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/hash/internal/hash.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/base/optimization.h"
#include "absl/base/prefetch.h"
#include "absl/hash/internal/city.h"
#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
#error ABSL_AES_INTERNAL_HAVE_X86_SIMD cannot be directly set
#elif defined(__SSE4_2__) && defined(__AES__)
#define ABSL_AES_INTERNAL_HAVE_X86_SIMD
#endif
#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
#include <smmintrin.h>
#include <wmmintrin.h>
#include <xmmintrin.h>
#endif // ABSL_AES_INTERNAL_HAVE_X86_SIMD
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace hash_internal {
namespace {
void PrefetchFutureDataToLocalCache(const uint8_t* ptr) {
PrefetchToLocalCache(ptr + 5 * ABSL_CACHELINE_SIZE);
}
#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
uint64_t Mix4x16Vectors(__m128i a, __m128i b, __m128i c, __m128i d) {
// res128 = encrypt(a + c, d) + decrypt(b - d, a)
auto res128 = _mm_add_epi64(_mm_aesenc_si128(_mm_add_epi64(a, c), d),
_mm_aesdec_si128(_mm_sub_epi64(b, d), a));
auto x64 = static_cast<uint64_t>(_mm_cvtsi128_si64(res128));
auto y64 = static_cast<uint64_t>(_mm_extract_epi64(res128, 1));
return x64 ^ y64;
}
uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
assert(len > 32);
assert(len <= 64);
__m128i state =
_mm_set_epi64x(static_cast<int64_t>(seed), static_cast<int64_t>(len));
auto a = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr));
auto b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr + 16));
auto* last32_ptr = ptr + len - 32;
auto c = _mm_loadu_si128(reinterpret_cast<const __m128i*>(last32_ptr));
auto d = _mm_loadu_si128(reinterpret_cast<const __m128i*>(last32_ptr + 16));
// Bits of the second argument to _mm_aesdec_si128/_mm_aesenc_si128 are
// XORed with the state argument after encryption.
// We use each value as the first argument to shuffle all the bits around.
// We do not add any salt to the state or loaded data, instead we vary
// instructions used to mix bits _mm_aesdec_si128/_mm_aesenc_si128 and
// _mm_add_epi64/_mm_sub_epi64.
// _mm_add_epi64/_mm_sub_epi64 are combined to one instruction with data
// loading like `vpaddq xmm1, xmm0, xmmword ptr [rdi]`.
auto na = _mm_aesdec_si128(_mm_add_epi64(state, a), state);
auto nb = _mm_aesdec_si128(_mm_sub_epi64(state, b), state);
auto nc = _mm_aesenc_si128(_mm_add_epi64(state, c), state);
auto nd = _mm_aesenc_si128(_mm_sub_epi64(state, d), state);
// We perform another round of encryption to mix bits between two halves of
// the input.
return Mix4x16Vectors(na, nb, nc, nd);
}
[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
assert(len > 64);
const uint8_t* ptr = static_cast<const uint8_t*>(data);
const uint8_t* last_32_ptr = ptr + len - 32;
// If we have more than 64 bytes, we're going to handle chunks of 64
// bytes at a time. We're going to build up four separate hash states
// which we will then hash together. This avoids short dependency chains.
__m128i state0 =
_mm_set_epi64x(static_cast<int64_t>(seed), static_cast<int64_t>(len));
__m128i state1 = state0;
__m128i state2 = state1;
__m128i state3 = state2;
// Mixing two 128-bit vectors at a time with corresponding states.
// All variables are mixed slightly differently to avoid hash collision
// due to trivial byte rotation.
// We combine state and data with _mm_add_epi64/_mm_sub_epi64 before applying
// AES encryption to make hash function dependent on the order of the blocks.
// See comments in LowLevelHash33To64 for more considerations.
auto mix_ab = [&state0,
&state1](const uint8_t* p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
// i128 a = *p;
// i128 b = *(p + 16);
// state0 = decrypt(state0 + a, state0);
// state1 = decrypt(state1 - b, state1);
auto a = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
auto b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p + 16));
state0 = _mm_aesdec_si128(_mm_add_epi64(state0, a), state0);
state1 = _mm_aesdec_si128(_mm_sub_epi64(state1, b), state1);
};
auto mix_cd = [&state2,
&state3](const uint8_t* p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
// i128 c = *p;
// i128 d = *(p + 16);
// state2 = encrypt(state2 + c, state2);
// state3 = encrypt(state3 - d, state3);
auto c = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
auto d = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p + 16));
state2 = _mm_aesenc_si128(_mm_add_epi64(state2, c), state2);
state3 = _mm_aesenc_si128(_mm_sub_epi64(state3, d), state3);
};
do {
PrefetchFutureDataToLocalCache(ptr);
mix_ab(ptr);
mix_cd(ptr + 32);
ptr += 64;
len -= 64;
} while (len > 64);
// We now have a data `ptr` with at most 64 bytes.
if (len > 32) {
mix_ab(ptr);
}
mix_cd(last_32_ptr);
return Mix4x16Vectors(state0, state1, state2, state3);
}
#else
uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) {
uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state);
uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state);
return cs0 ^ cs1;
}
uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
assert(len > 32);
assert(len <= 64);
uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
const uint8_t* last_32_ptr = ptr + len - 32;
return Mix32Bytes(last_32_ptr, Mix32Bytes(ptr, current_state));
}
[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
assert(len > 64);
const uint8_t* ptr = static_cast<const uint8_t*>(data);
uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
const uint8_t* last_32_ptr = ptr + len - 32;
// If we have more than 64 bytes, we're going to handle chunks of 64
// bytes at a time. We're going to build up four separate hash states
// which we will then hash together. This avoids short dependency chains.
uint64_t duplicated_state0 = current_state;
uint64_t duplicated_state1 = current_state;
uint64_t duplicated_state2 = current_state;
do {
PrefetchFutureDataToLocalCache(ptr);
uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32);
uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40);
uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48);
uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56);
current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state);
duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0);
duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1);
duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2);
ptr += 64;
len -= 64;
} while (len > 64);
current_state = (current_state ^ duplicated_state0) ^
(duplicated_state1 + duplicated_state2);
// We now have a data `ptr` with at most 64 bytes and the current state
// of the hashing state machine stored in current_state.
if (len > 32) {
current_state = Mix32Bytes(ptr, current_state);
}
// We now have a data `ptr` with at most 32 bytes and the current state
// of the hashing state machine stored in current_state. But we can
// safely read from `ptr + len - 32`.
return Mix32Bytes(last_32_ptr, current_state);
}
#endif // ABSL_AES_INTERNAL_HAVE_X86_SIMD
[[maybe_unused]] uint64_t LowLevelHashLenGt32(uint64_t seed, const void* data,
size_t len) {
assert(len > 32);
if (ABSL_PREDICT_FALSE(len > 64)) {
return LowLevelHashLenGt64(seed, data, len);
}
return LowLevelHash33To64(seed, static_cast<const uint8_t*>(data), len);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit(
uint64_t state, const unsigned char* data, size_t len) {
// TODO(b/417141985): expose and use CityHash32WithSeed.
// Note: we can't use PrecombineLengthMix here because len can be up to 1024.
return CombineRawImpl(
state + len,
hash_internal::CityHash32(reinterpret_cast<const char*>(data), len));
}
ABSL_ATTRIBUTE_NOINLINE uint64_t
SplitAndCombineOn32Bit(uint64_t state, const unsigned char* first, size_t len) {
while (len >= PiecewiseChunkSize()) {
state = HashBlockOn32Bit(state, first, PiecewiseChunkSize());
len -= PiecewiseChunkSize();
first += PiecewiseChunkSize();
}
// Do not call CombineContiguousImpl for empty range since it is modifying
// state.
if (len == 0) {
return state;
}
// Handle the remainder.
return CombineContiguousImpl(state, first, len,
std::integral_constant<int, 4>{});
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn64Bit(
uint64_t state, const unsigned char* data, size_t len) {
#ifdef ABSL_HAVE_INTRINSIC_INT128
return LowLevelHashLenGt32(state, data, len);
#else
return hash_internal::CityHash64WithSeed(reinterpret_cast<const char*>(data),
len, state);
#endif
}
ABSL_ATTRIBUTE_NOINLINE uint64_t
SplitAndCombineOn64Bit(uint64_t state, const unsigned char* first, size_t len) {
while (len >= PiecewiseChunkSize()) {
state = HashBlockOn64Bit(state, first, PiecewiseChunkSize());
len -= PiecewiseChunkSize();
first += PiecewiseChunkSize();
}
// Do not call CombineContiguousImpl for empty range since it is modifying
// state.
if (len == 0) {
return state;
}
// Handle the remainder.
return CombineContiguousImpl(state, first, len,
std::integral_constant<int, 8>{});
}
} // namespace
uint64_t CombineLargeContiguousImplOn32BitLengthGt8(uint64_t state,
const unsigned char* first,
size_t len) {
assert(len > 8);
assert(sizeof(size_t) == 4); // NOLINT(misc-static-assert)
if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
return HashBlockOn32Bit(state, first, len);
}
return SplitAndCombineOn32Bit(state, first, len);
}
uint64_t CombineLargeContiguousImplOn64BitLengthGt32(uint64_t state,
const unsigned char* first,
size_t len) {
assert(len > 32);
assert(sizeof(size_t) == 8); // NOLINT(misc-static-assert)
if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
return HashBlockOn64Bit(state, first, len);
}
return SplitAndCombineOn64Bit(state, first, len);
}
ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed;
} // namespace hash_internal
ABSL_NAMESPACE_END
} // namespace absl