mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
AK: Implement SipHash as the default hash algorithm for most use cases
SipHash is highly HashDoS-resistent, initialized with a random seed at startup (i.e. non-deterministic) and usable for security-critical use cases with large enough parameters. We just use it because it's reasonably secure with parameters 1-3 while having excellent properties and not being significantly slower than before.
This commit is contained in:
parent
5e15c29e22
commit
9a026fc8d5
9 changed files with 282 additions and 11 deletions
|
@ -23,6 +23,7 @@ set(AK_SOURCES
|
|||
NumberFormat.cpp
|
||||
OptionParser.cpp
|
||||
Random.cpp
|
||||
SipHash.cpp
|
||||
StackInfo.cpp
|
||||
Stream.cpp
|
||||
String.cpp
|
||||
|
|
|
@ -14,6 +14,9 @@
|
|||
|
||||
namespace AK {
|
||||
|
||||
// A map datastructure, mapping keys K to values V, based on a hash table with closed hashing.
|
||||
// HashMap can optionally provide ordered iteration based on the order of keys when IsOrdered = true.
|
||||
// HashMap is based on HashTable, which should be used instead if just a set datastructure is required.
|
||||
template<typename K, typename V, typename KeyTraits, typename ValueTraits, bool IsOrdered>
|
||||
class HashMap {
|
||||
private:
|
||||
|
|
|
@ -114,6 +114,9 @@ private:
|
|||
BucketType* m_bucket { nullptr };
|
||||
};
|
||||
|
||||
// A set datastructure based on a hash table with closed hashing.
|
||||
// HashTable can optionally provide ordered iteration when IsOrdered = true.
|
||||
// For a (more commonly required) map datastructure with key-value entries, see HashMap.
|
||||
template<typename T, typename TraitsForT, bool IsOrdered>
|
||||
class HashTable {
|
||||
static constexpr size_t grow_capacity_at_least = 8;
|
||||
|
|
184
AK/SipHash.cpp
Normal file
184
AK/SipHash.cpp
Normal file
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright (c) 2023, kleines Filmröllchen <filmroellchen@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/ByteReader.h>
|
||||
#include <AK/Singleton.h>
|
||||
#include <AK/SipHash.h>
|
||||
#include <AK/Span.h>
|
||||
#include <AK/UFixedBigInt.h>
|
||||
|
||||
#ifdef KERNEL
|
||||
# include <Kernel/Security/Random.h>
|
||||
#else
|
||||
# include <AK/Random.h>
|
||||
#endif
|
||||
|
||||
namespace AK {
|
||||
|
||||
ALWAYS_INLINE constexpr u64 rotate_left(u64 x, u64 bits)
|
||||
{
|
||||
return static_cast<u64>(((x) << (bits)) | ((x) >> (64 - (bits))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE constexpr void sipround(u64& v0, u64& v1, u64& v2, u64& v3)
|
||||
{
|
||||
v0 += v1;
|
||||
v1 = rotate_left(v1, 13);
|
||||
v1 ^= v0;
|
||||
v0 = rotate_left(v0, 32);
|
||||
v2 += v3;
|
||||
v3 = rotate_left(v3, 16);
|
||||
v3 ^= v2;
|
||||
v0 += v3;
|
||||
v3 = rotate_left(v3, 21);
|
||||
v3 ^= v0;
|
||||
v2 += v1;
|
||||
v1 = rotate_left(v1, 17);
|
||||
v1 ^= v2;
|
||||
v2 = rotate_left(v2, 32);
|
||||
}
|
||||
|
||||
// Can handle u64 or u128 output as per reference implementation.
|
||||
// We currenly only use u64 and further fold it to u32 (unsigned) for use in Traits.
|
||||
template<size_t message_block_rounds, size_t finalization_rounds>
|
||||
static void do_siphash(ReadonlyBytes input, u128 key, Bytes output)
|
||||
{
|
||||
VERIFY((output.size() == 8) || (output.size() == 16));
|
||||
|
||||
u64 v0 = 0x736f6d6570736575ull;
|
||||
u64 v1 = 0x646f72616e646f6dull;
|
||||
u64 v2 = 0x6c7967656e657261ull;
|
||||
u64 v3 = 0x7465646279746573ull;
|
||||
auto const left = input.size() & 7;
|
||||
// The end of 64-bit blocks.
|
||||
auto const block_end = input.size() - (input.size() % sizeof(u64));
|
||||
u64 b = input.size() << 56;
|
||||
v3 ^= key.high();
|
||||
v2 ^= key.low();
|
||||
v1 ^= key.high();
|
||||
v0 ^= key.low();
|
||||
|
||||
if (output.size() == 16)
|
||||
v1 ^= 0xee;
|
||||
|
||||
for (size_t input_index = 0; input_index < block_end; input_index += 8) {
|
||||
u64 const m = bit_cast<LittleEndian<u64>>(ByteReader::load64(input.slice(input_index, sizeof(u64)).data()));
|
||||
v3 ^= m;
|
||||
|
||||
for (size_t i = 0; i < message_block_rounds; ++i)
|
||||
sipround(v0, v1, v2, v3);
|
||||
|
||||
v0 ^= m;
|
||||
}
|
||||
|
||||
switch (left) {
|
||||
case 7:
|
||||
b |= (static_cast<u64>(input[block_end + 6])) << 48;
|
||||
[[fallthrough]];
|
||||
case 6:
|
||||
b |= (static_cast<u64>(input[block_end + 5])) << 40;
|
||||
[[fallthrough]];
|
||||
case 5:
|
||||
b |= (static_cast<u64>(input[block_end + 4])) << 32;
|
||||
[[fallthrough]];
|
||||
case 4:
|
||||
b |= (static_cast<u64>(input[block_end + 3])) << 24;
|
||||
[[fallthrough]];
|
||||
case 3:
|
||||
b |= (static_cast<u64>(input[block_end + 2])) << 16;
|
||||
[[fallthrough]];
|
||||
case 2:
|
||||
b |= (static_cast<u64>(input[block_end + 1])) << 8;
|
||||
[[fallthrough]];
|
||||
case 1:
|
||||
b |= (static_cast<u64>(input[block_end + 0]));
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
v3 ^= b;
|
||||
|
||||
for (size_t i = 0; i < message_block_rounds; ++i)
|
||||
sipround(v0, v1, v2, v3);
|
||||
|
||||
v0 ^= b;
|
||||
|
||||
if (output.size() == 16)
|
||||
v2 ^= 0xee;
|
||||
else
|
||||
v2 ^= 0xff;
|
||||
|
||||
for (size_t i = 0; i < finalization_rounds; ++i)
|
||||
sipround(v0, v1, v2, v3);
|
||||
|
||||
b = v0 ^ v1 ^ v2 ^ v3;
|
||||
|
||||
LittleEndian<u64> b_le { b };
|
||||
output.overwrite(0, &b_le, sizeof(b_le));
|
||||
|
||||
if (output.size() == 8)
|
||||
return;
|
||||
|
||||
v1 ^= 0xdd;
|
||||
|
||||
for (size_t i = 0; i < finalization_rounds; ++i)
|
||||
sipround(v0, v1, v2, v3);
|
||||
|
||||
b = v0 ^ v1 ^ v2 ^ v3;
|
||||
b_le = b;
|
||||
output.overwrite(sizeof(b_le), &b_le, sizeof(b_le));
|
||||
}
|
||||
|
||||
struct SipHashKey {
|
||||
SipHashKey()
|
||||
{
|
||||
#ifdef KERNEL
|
||||
key = Kernel::get_good_random<u128>();
|
||||
#else
|
||||
// get_random is assumed to be secure, otherwise SipHash doesn't deliver on its promises!
|
||||
key = get_random<u128>();
|
||||
#endif
|
||||
}
|
||||
constexpr u128 operator*() const { return key; }
|
||||
u128 key;
|
||||
};
|
||||
// Using a singleton is a little heavier than a plain static, but avoids an initialization order fiasco.
|
||||
static Singleton<SipHashKey> static_sip_hash_key;
|
||||
|
||||
template<size_t message_block_rounds, size_t finalization_rounds>
|
||||
unsigned sip_hash_u64(u64 input)
|
||||
{
|
||||
ReadonlyBytes input_bytes { &input, sizeof(input) };
|
||||
u64 const output_u64 = sip_hash_bytes<message_block_rounds, finalization_rounds>(input_bytes);
|
||||
return static_cast<unsigned>(output_u64 ^ (output_u64 >> 32));
|
||||
}
|
||||
|
||||
unsigned standard_sip_hash(u64 input)
|
||||
{
|
||||
return sip_hash_u64<1, 3>(input);
|
||||
}
|
||||
|
||||
unsigned secure_sip_hash(u64 input)
|
||||
{
|
||||
return sip_hash_u64<4, 8>(input);
|
||||
}
|
||||
|
||||
template<size_t message_block_rounds, size_t finalization_rounds>
|
||||
u64 sip_hash_bytes(ReadonlyBytes input)
|
||||
{
|
||||
auto sip_hash_key = **static_sip_hash_key;
|
||||
u64 output = 0;
|
||||
Bytes output_bytes { &output, sizeof(output) };
|
||||
do_siphash<message_block_rounds, finalization_rounds>(input, sip_hash_key, output_bytes);
|
||||
return output;
|
||||
}
|
||||
|
||||
// Instantiate all used SipHash variants here:
|
||||
template u64 sip_hash_bytes<1, 3>(ReadonlyBytes);
|
||||
template u64 sip_hash_bytes<4, 8>(ReadonlyBytes);
|
||||
|
||||
}
|
29
AK/SipHash.h
Normal file
29
AK/SipHash.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright (c) 2023, kleines Filmröllchen <filmroellchen@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Forward.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
// Ported from the SipHash reference implementation, released to the public domain:
|
||||
// https://github.com/veorq/SipHash/blob/eee7d0d84dc7731df2359b243aa5e75d85f6eaef/siphash.c
|
||||
// The standard is SipHash-2-4, but we use 1-3 for a little more speed.
|
||||
// Cryptography should use 4-8 for (relative) conservative security,
|
||||
// though SipHash itself is NOT a cryptographically secure hash algorithm.
|
||||
template<size_t message_block_rounds, size_t finalization_rounds>
|
||||
u64 sip_hash_bytes(ReadonlyBytes input);
|
||||
unsigned standard_sip_hash(u64 input);
|
||||
unsigned secure_sip_hash(u64 input);
|
||||
|
||||
}
|
||||
|
||||
#ifdef USING_AK_GLOBALLY
|
||||
using AK::secure_sip_hash;
|
||||
using AK::sip_hash_bytes;
|
||||
using AK::standard_sip_hash;
|
||||
#endif
|
|
@ -10,6 +10,10 @@
|
|||
|
||||
namespace AK {
|
||||
|
||||
// FIXME: This hashing algorithm isn't well-known and may not be good at all.
|
||||
// We can't use SipHash since that depends on runtime parameters,
|
||||
// but some string hashes like IPC endpoint magic numbers need to be deterministic.
|
||||
// Maybe use a SipHash with a statically-known key?
|
||||
constexpr u32 string_hash(char const* characters, size_t length, u32 seed = 0)
|
||||
{
|
||||
u32 hash = seed;
|
||||
|
|
17
AK/Traits.h
17
AK/Traits.h
|
@ -10,6 +10,7 @@
|
|||
#include <AK/Concepts.h>
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/HashFunctions.h>
|
||||
#include <AK/SipHash.h>
|
||||
#include <AK/StringHash.h>
|
||||
|
||||
namespace AK {
|
||||
|
@ -33,12 +34,9 @@ template<Integral T>
|
|||
struct Traits<T> : public GenericTraits<T> {
|
||||
static constexpr bool is_trivial() { return true; }
|
||||
static constexpr bool is_trivially_serializable() { return true; }
|
||||
static constexpr unsigned hash(T value)
|
||||
static unsigned hash(T value)
|
||||
{
|
||||
if constexpr (sizeof(T) < 8)
|
||||
return int_hash(value);
|
||||
else
|
||||
return u64_hash(value);
|
||||
return standard_sip_hash(static_cast<u64>(value));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -47,19 +45,16 @@ template<FloatingPoint T>
|
|||
struct Traits<T> : public GenericTraits<T> {
|
||||
static constexpr bool is_trivial() { return true; }
|
||||
static constexpr bool is_trivially_serializable() { return true; }
|
||||
static constexpr unsigned hash(T value)
|
||||
static unsigned hash(T value)
|
||||
{
|
||||
if constexpr (sizeof(T) < 8)
|
||||
return int_hash(bit_cast<u32>(value));
|
||||
else
|
||||
return u64_hash(bit_cast<u64>(value));
|
||||
return standard_sip_hash(bit_cast<u64>(static_cast<double>(value)));
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
requires(IsPointer<T> && !Detail::IsPointerOfType<char, T>) struct Traits<T> : public GenericTraits<T> {
|
||||
static unsigned hash(T p) { return ptr_hash(p); }
|
||||
static unsigned hash(T p) { return standard_sip_hash(bit_cast<FlatPtr>(p)); }
|
||||
static constexpr bool is_trivial() { return true; }
|
||||
};
|
||||
|
||||
|
|
|
@ -522,6 +522,7 @@ set(AK_SOURCES
|
|||
../AK/GenericLexer.cpp
|
||||
../AK/Hex.cpp
|
||||
../AK/MemoryStream.cpp
|
||||
../AK/SipHash.cpp
|
||||
../AK/Stream.cpp
|
||||
../AK/StringBuilder.cpp
|
||||
../AK/StringUtils.cpp
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <LibTest/TestCase.h>
|
||||
|
||||
#include <AK/HashFunctions.h>
|
||||
#include <AK/SipHash.h>
|
||||
#include <AK/Types.h>
|
||||
|
||||
TEST_CASE(int_hash)
|
||||
|
@ -53,3 +54,53 @@ TEST_CASE(constexpr_ptr_hash)
|
|||
// "ptr_hash" test binds the result.
|
||||
static_assert(ptr_hash(FlatPtr(42)));
|
||||
}
|
||||
|
||||
// Testing concrete hash results is not possible due to SipHash's non-determinism.
|
||||
// We instead perform some sanity checks and try to hit any asserts caused by programming errors.
|
||||
TEST_CASE(sip_hash)
|
||||
{
|
||||
EXPECT_EQ(standard_sip_hash(42), standard_sip_hash(42));
|
||||
EXPECT_EQ(secure_sip_hash(42), secure_sip_hash(42));
|
||||
EXPECT_NE(standard_sip_hash(42), secure_sip_hash(42));
|
||||
}
|
||||
|
||||
TEST_CASE(sip_hash_bytes)
|
||||
{
|
||||
constexpr Array<u8, 8> short_test_array { 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
constexpr Array<u8, 16> common_prefix_array { 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
EXPECT_EQ((sip_hash_bytes<1, 3>(short_test_array.span())), (sip_hash_bytes<1, 3>(short_test_array.span())));
|
||||
EXPECT_NE((sip_hash_bytes<1, 3>(short_test_array.span())), (sip_hash_bytes<1, 3>(common_prefix_array.span())));
|
||||
|
||||
for (size_t prefix_length = 1; prefix_length < 8; ++prefix_length) {
|
||||
EXPECT_NE((sip_hash_bytes<1, 3>(short_test_array.span().trim(prefix_length))), (sip_hash_bytes<1, 3>(short_test_array.span())));
|
||||
EXPECT_EQ((sip_hash_bytes<1, 3>(short_test_array.span().trim(prefix_length))), (sip_hash_bytes<1, 3>(common_prefix_array.span().trim(prefix_length))));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HashFunction>
|
||||
requires(IsCallableWithArguments<HashFunction, unsigned, u64>)
|
||||
static void run_benchmark(HashFunction hash_function)
|
||||
{
|
||||
for (size_t i = 0; i < 1'000'000; ++i) {
|
||||
auto a = hash_function(i);
|
||||
AK::taint_for_optimizer(a);
|
||||
auto b = hash_function(i);
|
||||
AK::taint_for_optimizer(b);
|
||||
EXPECT_EQ(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK_CASE(deterministic_hash)
|
||||
{
|
||||
run_benchmark(u64_hash);
|
||||
}
|
||||
|
||||
BENCHMARK_CASE(fast_sip_hash)
|
||||
{
|
||||
run_benchmark(standard_sip_hash);
|
||||
}
|
||||
|
||||
BENCHMARK_CASE(secure_sip_hash)
|
||||
{
|
||||
run_benchmark(secure_sip_hash);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue