AK: Implement SipHash as the default hash algorithm for most use cases

SipHash is highly HashDoS-resistent, initialized with a random seed at
startup (i.e. non-deterministic) and usable for security-critical use
cases with large enough parameters. We just use it because it's
reasonably secure with parameters 1-3 while having excellent properties
and not being significantly slower than before.
This commit is contained in:
kleines Filmröllchen 2023-09-21 00:14:35 +02:00 committed by Ali Mohammad Pur
parent 5e15c29e22
commit 9a026fc8d5
9 changed files with 282 additions and 11 deletions

View file

@ -23,6 +23,7 @@ set(AK_SOURCES
NumberFormat.cpp
OptionParser.cpp
Random.cpp
SipHash.cpp
StackInfo.cpp
Stream.cpp
String.cpp

View file

@ -14,6 +14,9 @@
namespace AK {
// A map datastructure, mapping keys K to values V, based on a hash table with closed hashing.
// HashMap can optionally provide ordered iteration based on the order of keys when IsOrdered = true.
// HashMap is based on HashTable, which should be used instead if just a set datastructure is required.
template<typename K, typename V, typename KeyTraits, typename ValueTraits, bool IsOrdered>
class HashMap {
private:

View file

@ -114,6 +114,9 @@ private:
BucketType* m_bucket { nullptr };
};
// A set datastructure based on a hash table with closed hashing.
// HashTable can optionally provide ordered iteration when IsOrdered = true.
// For a (more commonly required) map datastructure with key-value entries, see HashMap.
template<typename T, typename TraitsForT, bool IsOrdered>
class HashTable {
static constexpr size_t grow_capacity_at_least = 8;

184
AK/SipHash.cpp Normal file
View file

@ -0,0 +1,184 @@
/*
* Copyright (c) 2023, kleines Filmröllchen <filmroellchen@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/ByteReader.h>
#include <AK/Singleton.h>
#include <AK/SipHash.h>
#include <AK/Span.h>
#include <AK/UFixedBigInt.h>
#ifdef KERNEL
# include <Kernel/Security/Random.h>
#else
# include <AK/Random.h>
#endif
namespace AK {
ALWAYS_INLINE constexpr u64 rotate_left(u64 x, u64 bits)
{
return static_cast<u64>(((x) << (bits)) | ((x) >> (64 - (bits))));
}
ALWAYS_INLINE constexpr void sipround(u64& v0, u64& v1, u64& v2, u64& v3)
{
v0 += v1;
v1 = rotate_left(v1, 13);
v1 ^= v0;
v0 = rotate_left(v0, 32);
v2 += v3;
v3 = rotate_left(v3, 16);
v3 ^= v2;
v0 += v3;
v3 = rotate_left(v3, 21);
v3 ^= v0;
v2 += v1;
v1 = rotate_left(v1, 17);
v1 ^= v2;
v2 = rotate_left(v2, 32);
}
// Can handle u64 or u128 output as per reference implementation.
// We currenly only use u64 and further fold it to u32 (unsigned) for use in Traits.
template<size_t message_block_rounds, size_t finalization_rounds>
static void do_siphash(ReadonlyBytes input, u128 key, Bytes output)
{
VERIFY((output.size() == 8) || (output.size() == 16));
u64 v0 = 0x736f6d6570736575ull;
u64 v1 = 0x646f72616e646f6dull;
u64 v2 = 0x6c7967656e657261ull;
u64 v3 = 0x7465646279746573ull;
auto const left = input.size() & 7;
// The end of 64-bit blocks.
auto const block_end = input.size() - (input.size() % sizeof(u64));
u64 b = input.size() << 56;
v3 ^= key.high();
v2 ^= key.low();
v1 ^= key.high();
v0 ^= key.low();
if (output.size() == 16)
v1 ^= 0xee;
for (size_t input_index = 0; input_index < block_end; input_index += 8) {
u64 const m = bit_cast<LittleEndian<u64>>(ByteReader::load64(input.slice(input_index, sizeof(u64)).data()));
v3 ^= m;
for (size_t i = 0; i < message_block_rounds; ++i)
sipround(v0, v1, v2, v3);
v0 ^= m;
}
switch (left) {
case 7:
b |= (static_cast<u64>(input[block_end + 6])) << 48;
[[fallthrough]];
case 6:
b |= (static_cast<u64>(input[block_end + 5])) << 40;
[[fallthrough]];
case 5:
b |= (static_cast<u64>(input[block_end + 4])) << 32;
[[fallthrough]];
case 4:
b |= (static_cast<u64>(input[block_end + 3])) << 24;
[[fallthrough]];
case 3:
b |= (static_cast<u64>(input[block_end + 2])) << 16;
[[fallthrough]];
case 2:
b |= (static_cast<u64>(input[block_end + 1])) << 8;
[[fallthrough]];
case 1:
b |= (static_cast<u64>(input[block_end + 0]));
break;
case 0:
break;
}
v3 ^= b;
for (size_t i = 0; i < message_block_rounds; ++i)
sipround(v0, v1, v2, v3);
v0 ^= b;
if (output.size() == 16)
v2 ^= 0xee;
else
v2 ^= 0xff;
for (size_t i = 0; i < finalization_rounds; ++i)
sipround(v0, v1, v2, v3);
b = v0 ^ v1 ^ v2 ^ v3;
LittleEndian<u64> b_le { b };
output.overwrite(0, &b_le, sizeof(b_le));
if (output.size() == 8)
return;
v1 ^= 0xdd;
for (size_t i = 0; i < finalization_rounds; ++i)
sipround(v0, v1, v2, v3);
b = v0 ^ v1 ^ v2 ^ v3;
b_le = b;
output.overwrite(sizeof(b_le), &b_le, sizeof(b_le));
}
struct SipHashKey {
SipHashKey()
{
#ifdef KERNEL
key = Kernel::get_good_random<u128>();
#else
// get_random is assumed to be secure, otherwise SipHash doesn't deliver on its promises!
key = get_random<u128>();
#endif
}
constexpr u128 operator*() const { return key; }
u128 key;
};
// Using a singleton is a little heavier than a plain static, but avoids an initialization order fiasco.
static Singleton<SipHashKey> static_sip_hash_key;
template<size_t message_block_rounds, size_t finalization_rounds>
unsigned sip_hash_u64(u64 input)
{
ReadonlyBytes input_bytes { &input, sizeof(input) };
u64 const output_u64 = sip_hash_bytes<message_block_rounds, finalization_rounds>(input_bytes);
return static_cast<unsigned>(output_u64 ^ (output_u64 >> 32));
}
unsigned standard_sip_hash(u64 input)
{
return sip_hash_u64<1, 3>(input);
}
unsigned secure_sip_hash(u64 input)
{
return sip_hash_u64<4, 8>(input);
}
template<size_t message_block_rounds, size_t finalization_rounds>
u64 sip_hash_bytes(ReadonlyBytes input)
{
auto sip_hash_key = **static_sip_hash_key;
u64 output = 0;
Bytes output_bytes { &output, sizeof(output) };
do_siphash<message_block_rounds, finalization_rounds>(input, sip_hash_key, output_bytes);
return output;
}
// Instantiate all used SipHash variants here:
template u64 sip_hash_bytes<1, 3>(ReadonlyBytes);
template u64 sip_hash_bytes<4, 8>(ReadonlyBytes);
}

29
AK/SipHash.h Normal file
View file

@ -0,0 +1,29 @@
/*
* Copyright (c) 2023, kleines Filmröllchen <filmroellchen@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Forward.h>
namespace AK {
// Ported from the SipHash reference implementation, released to the public domain:
// https://github.com/veorq/SipHash/blob/eee7d0d84dc7731df2359b243aa5e75d85f6eaef/siphash.c
// The standard is SipHash-2-4, but we use 1-3 for a little more speed.
// Cryptography should use 4-8 for (relative) conservative security,
// though SipHash itself is NOT a cryptographically secure hash algorithm.
template<size_t message_block_rounds, size_t finalization_rounds>
u64 sip_hash_bytes(ReadonlyBytes input);
unsigned standard_sip_hash(u64 input);
unsigned secure_sip_hash(u64 input);
}
#ifdef USING_AK_GLOBALLY
using AK::secure_sip_hash;
using AK::sip_hash_bytes;
using AK::standard_sip_hash;
#endif

View file

@ -10,6 +10,10 @@
namespace AK {
// FIXME: This hashing algorithm isn't well-known and may not be good at all.
// We can't use SipHash since that depends on runtime parameters,
// but some string hashes like IPC endpoint magic numbers need to be deterministic.
// Maybe use a SipHash with a statically-known key?
constexpr u32 string_hash(char const* characters, size_t length, u32 seed = 0)
{
u32 hash = seed;

View file

@ -10,6 +10,7 @@
#include <AK/Concepts.h>
#include <AK/Forward.h>
#include <AK/HashFunctions.h>
#include <AK/SipHash.h>
#include <AK/StringHash.h>
namespace AK {
@ -33,12 +34,9 @@ template<Integral T>
struct Traits<T> : public GenericTraits<T> {
static constexpr bool is_trivial() { return true; }
static constexpr bool is_trivially_serializable() { return true; }
static constexpr unsigned hash(T value)
static unsigned hash(T value)
{
if constexpr (sizeof(T) < 8)
return int_hash(value);
else
return u64_hash(value);
return standard_sip_hash(static_cast<u64>(value));
}
};
@ -47,19 +45,16 @@ template<FloatingPoint T>
struct Traits<T> : public GenericTraits<T> {
static constexpr bool is_trivial() { return true; }
static constexpr bool is_trivially_serializable() { return true; }
static constexpr unsigned hash(T value)
static unsigned hash(T value)
{
if constexpr (sizeof(T) < 8)
return int_hash(bit_cast<u32>(value));
else
return u64_hash(bit_cast<u64>(value));
return standard_sip_hash(bit_cast<u64>(static_cast<double>(value)));
}
};
#endif
template<typename T>
requires(IsPointer<T> && !Detail::IsPointerOfType<char, T>) struct Traits<T> : public GenericTraits<T> {
static unsigned hash(T p) { return ptr_hash(p); }
static unsigned hash(T p) { return standard_sip_hash(bit_cast<FlatPtr>(p)); }
static constexpr bool is_trivial() { return true; }
};

View file

@ -522,6 +522,7 @@ set(AK_SOURCES
../AK/GenericLexer.cpp
../AK/Hex.cpp
../AK/MemoryStream.cpp
../AK/SipHash.cpp
../AK/Stream.cpp
../AK/StringBuilder.cpp
../AK/StringUtils.cpp

View file

@ -7,6 +7,7 @@
#include <LibTest/TestCase.h>
#include <AK/HashFunctions.h>
#include <AK/SipHash.h>
#include <AK/Types.h>
TEST_CASE(int_hash)
@ -53,3 +54,53 @@ TEST_CASE(constexpr_ptr_hash)
// "ptr_hash" test binds the result.
static_assert(ptr_hash(FlatPtr(42)));
}
// Testing concrete hash results is not possible due to SipHash's non-determinism.
// We instead perform some sanity checks and try to hit any asserts caused by programming errors.
TEST_CASE(sip_hash)
{
EXPECT_EQ(standard_sip_hash(42), standard_sip_hash(42));
EXPECT_EQ(secure_sip_hash(42), secure_sip_hash(42));
EXPECT_NE(standard_sip_hash(42), secure_sip_hash(42));
}
TEST_CASE(sip_hash_bytes)
{
constexpr Array<u8, 8> short_test_array { 1, 2, 3, 4, 5, 6, 7, 8 };
constexpr Array<u8, 16> common_prefix_array { 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0 };
EXPECT_EQ((sip_hash_bytes<1, 3>(short_test_array.span())), (sip_hash_bytes<1, 3>(short_test_array.span())));
EXPECT_NE((sip_hash_bytes<1, 3>(short_test_array.span())), (sip_hash_bytes<1, 3>(common_prefix_array.span())));
for (size_t prefix_length = 1; prefix_length < 8; ++prefix_length) {
EXPECT_NE((sip_hash_bytes<1, 3>(short_test_array.span().trim(prefix_length))), (sip_hash_bytes<1, 3>(short_test_array.span())));
EXPECT_EQ((sip_hash_bytes<1, 3>(short_test_array.span().trim(prefix_length))), (sip_hash_bytes<1, 3>(common_prefix_array.span().trim(prefix_length))));
}
}
template<typename HashFunction>
requires(IsCallableWithArguments<HashFunction, unsigned, u64>)
static void run_benchmark(HashFunction hash_function)
{
for (size_t i = 0; i < 1'000'000; ++i) {
auto a = hash_function(i);
AK::taint_for_optimizer(a);
auto b = hash_function(i);
AK::taint_for_optimizer(b);
EXPECT_EQ(a, b);
}
}
BENCHMARK_CASE(deterministic_hash)
{
run_benchmark(u64_hash);
}
BENCHMARK_CASE(fast_sip_hash)
{
run_benchmark(standard_sip_hash);
}
BENCHMARK_CASE(secure_sip_hash)
{
run_benchmark(secure_sip_hash);
}