2020-01-18 03:38:21 -05:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
|
|
|
*
|
2021-04-22 04:24:48 -04:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-01-18 03:38:21 -05:00
|
|
|
*/
|
|
|
|
|
2018-10-10 05:53:07 -04:00
|
|
|
#pragma once
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
#include <AK/HashFunctions.h>
|
2019-06-27 10:36:31 -04:00
|
|
|
#include <AK/StdLibExtras.h>
|
2020-10-15 17:34:07 -04:00
|
|
|
#include <AK/Types.h>
|
|
|
|
#include <AK/kmalloc.h>
|
2018-10-10 05:53:07 -04:00
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
2020-07-06 17:44:33 -04:00
|
|
|
enum class HashSetResult {
|
|
|
|
InsertedNewEntry,
|
|
|
|
ReplacedExistingEntry
|
|
|
|
};
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
template<typename HashTableType, typename T, typename BucketType>
|
2019-06-27 09:57:49 -04:00
|
|
|
class HashTableIterator {
|
2020-10-15 17:34:07 -04:00
|
|
|
friend HashTableType;
|
|
|
|
|
2019-06-27 09:57:49 -04:00
|
|
|
public:
|
2020-10-15 17:34:07 -04:00
|
|
|
bool operator==(const HashTableIterator& other) const { return m_bucket == other.m_bucket; }
|
|
|
|
bool operator!=(const HashTableIterator& other) const { return m_bucket != other.m_bucket; }
|
|
|
|
T& operator*() { return *m_bucket->slot(); }
|
|
|
|
T* operator->() { return m_bucket->slot(); }
|
|
|
|
void operator++() { skip_to_next(); }
|
2019-06-27 09:57:49 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
private:
|
2019-06-27 09:57:49 -04:00
|
|
|
void skip_to_next()
|
|
|
|
{
|
2020-10-15 17:34:07 -04:00
|
|
|
if (!m_bucket)
|
|
|
|
return;
|
|
|
|
do {
|
|
|
|
++m_bucket;
|
|
|
|
if (m_bucket->used)
|
2019-06-27 09:57:49 -04:00
|
|
|
return;
|
2020-10-15 17:34:07 -04:00
|
|
|
} while (!m_bucket->end);
|
|
|
|
if (m_bucket->end)
|
|
|
|
m_bucket = nullptr;
|
2019-06-27 09:57:49 -04:00
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
explicit HashTableIterator(BucketType* bucket)
|
|
|
|
: m_bucket(bucket)
|
2019-06-27 09:57:49 -04:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
BucketType* m_bucket { nullptr };
|
2019-06-27 09:57:49 -04:00
|
|
|
};
|
|
|
|
|
2018-10-10 05:53:07 -04:00
|
|
|
template<typename T, typename TraitsForT>
|
|
|
|
class HashTable {
|
2020-10-16 02:32:35 -04:00
|
|
|
static constexpr size_t load_factor_in_percent = 60;
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
struct Bucket {
|
|
|
|
bool used;
|
|
|
|
bool deleted;
|
|
|
|
bool end;
|
|
|
|
alignas(T) u8 storage[sizeof(T)];
|
|
|
|
|
|
|
|
T* slot() { return reinterpret_cast<T*>(storage); }
|
|
|
|
const T* slot() const { return reinterpret_cast<const T*>(storage); }
|
|
|
|
};
|
2018-10-10 05:53:07 -04:00
|
|
|
|
|
|
|
public:
|
2021-01-10 18:29:28 -05:00
|
|
|
HashTable() = default;
|
2021-04-11 04:24:35 -04:00
|
|
|
explicit HashTable(size_t capacity) { rehash(capacity); }
|
2020-10-17 09:44:43 -04:00
|
|
|
|
|
|
|
~HashTable()
|
|
|
|
{
|
|
|
|
if (!m_buckets)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < m_capacity; ++i) {
|
|
|
|
if (m_buckets[i].used)
|
|
|
|
m_buckets[i].slot()->~T();
|
|
|
|
}
|
|
|
|
|
|
|
|
kfree(m_buckets);
|
|
|
|
}
|
2020-10-15 17:34:07 -04:00
|
|
|
|
2019-06-24 05:57:54 -04:00
|
|
|
HashTable(const HashTable& other)
|
|
|
|
{
|
2020-10-15 17:34:07 -04:00
|
|
|
rehash(other.capacity());
|
2019-06-24 05:57:54 -04:00
|
|
|
for (auto& it : other)
|
|
|
|
set(it);
|
|
|
|
}
|
2020-10-15 17:34:07 -04:00
|
|
|
|
2019-06-24 05:57:54 -04:00
|
|
|
HashTable& operator=(const HashTable& other)
|
|
|
|
{
|
2020-10-17 09:08:09 -04:00
|
|
|
HashTable temporary(other);
|
|
|
|
swap(*this, temporary);
|
2019-06-24 05:57:54 -04:00
|
|
|
return *this;
|
|
|
|
}
|
2020-10-15 17:34:07 -04:00
|
|
|
|
2020-10-17 09:08:09 -04:00
|
|
|
HashTable(HashTable&& other) noexcept
|
2018-10-10 05:53:07 -04:00
|
|
|
: m_buckets(other.m_buckets)
|
|
|
|
, m_size(other.m_size)
|
|
|
|
, m_capacity(other.m_capacity)
|
2020-10-15 17:34:07 -04:00
|
|
|
, m_deleted_count(other.m_deleted_count)
|
2018-10-10 05:53:07 -04:00
|
|
|
{
|
|
|
|
other.m_size = 0;
|
|
|
|
other.m_capacity = 0;
|
2020-10-15 17:34:07 -04:00
|
|
|
other.m_deleted_count = 0;
|
2018-10-10 05:53:07 -04:00
|
|
|
other.m_buckets = nullptr;
|
|
|
|
}
|
2020-10-15 17:34:07 -04:00
|
|
|
|
2020-10-17 09:08:09 -04:00
|
|
|
HashTable& operator=(HashTable&& other) noexcept
|
2018-10-10 05:53:07 -04:00
|
|
|
{
|
2020-10-17 09:08:09 -04:00
|
|
|
swap(*this, other);
|
2018-10-10 05:53:07 -04:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2020-10-17 08:44:59 -04:00
|
|
|
friend void swap(HashTable& a, HashTable& b) noexcept
|
|
|
|
{
|
|
|
|
swap(a.m_buckets, b.m_buckets);
|
|
|
|
swap(a.m_size, b.m_size);
|
|
|
|
swap(a.m_capacity, b.m_capacity);
|
|
|
|
swap(a.m_deleted_count, b.m_deleted_count);
|
|
|
|
}
|
|
|
|
|
2021-04-11 04:25:22 -04:00
|
|
|
[[nodiscard]] bool is_empty() const { return !m_size; }
|
|
|
|
[[nodiscard]] size_t size() const { return m_size; }
|
|
|
|
[[nodiscard]] size_t capacity() const { return m_capacity; }
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
template<typename U, size_t N>
|
|
|
|
void set_from(U (&from_array)[N])
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < N; ++i) {
|
|
|
|
set(from_array[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-24 03:42:52 -05:00
|
|
|
void ensure_capacity(size_t capacity)
|
2019-05-27 07:07:20 -04:00
|
|
|
{
|
2021-02-23 14:42:32 -05:00
|
|
|
VERIFY(capacity >= size());
|
2020-10-15 17:34:07 -04:00
|
|
|
rehash(capacity * 2);
|
2019-05-27 07:07:20 -04:00
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
bool contains(const T& value) const
|
|
|
|
{
|
|
|
|
return find(value) != end();
|
|
|
|
}
|
2020-08-16 05:04:00 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
using Iterator = HashTableIterator<HashTable, T, Bucket>;
|
|
|
|
|
|
|
|
Iterator begin()
|
2020-08-16 05:04:00 -04:00
|
|
|
{
|
2020-10-15 17:34:07 -04:00
|
|
|
for (size_t i = 0; i < m_capacity; ++i) {
|
|
|
|
if (m_buckets[i].used)
|
|
|
|
return Iterator(&m_buckets[i]);
|
2020-08-16 05:04:00 -04:00
|
|
|
}
|
2020-10-15 17:34:07 -04:00
|
|
|
return end();
|
2020-08-16 05:04:00 -04:00
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
Iterator end()
|
|
|
|
{
|
|
|
|
return Iterator(nullptr);
|
|
|
|
}
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
using ConstIterator = HashTableIterator<const HashTable, const T, const Bucket>;
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
ConstIterator begin() const
|
2019-06-29 15:09:40 -04:00
|
|
|
{
|
2020-10-15 17:34:07 -04:00
|
|
|
for (size_t i = 0; i < m_capacity; ++i) {
|
|
|
|
if (m_buckets[i].used)
|
|
|
|
return ConstIterator(&m_buckets[i]);
|
|
|
|
}
|
2019-06-29 15:09:40 -04:00
|
|
|
return end();
|
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
ConstIterator end() const
|
|
|
|
{
|
|
|
|
return ConstIterator(nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void clear()
|
|
|
|
{
|
2020-10-17 09:44:43 -04:00
|
|
|
*this = HashTable();
|
2020-10-15 17:34:07 -04:00
|
|
|
}
|
|
|
|
|
2021-01-15 17:59:55 -05:00
|
|
|
template<typename U = T>
|
|
|
|
HashSetResult set(U&& value)
|
2020-10-15 17:34:07 -04:00
|
|
|
{
|
|
|
|
auto& bucket = lookup_for_writing(value);
|
|
|
|
if (bucket.used) {
|
2021-01-15 17:59:55 -05:00
|
|
|
(*bucket.slot()) = forward<U>(value);
|
2020-10-15 17:34:07 -04:00
|
|
|
return HashSetResult::ReplacedExistingEntry;
|
|
|
|
}
|
|
|
|
|
2021-01-15 17:59:55 -05:00
|
|
|
new (bucket.slot()) T(forward<U>(value));
|
2020-10-15 17:34:07 -04:00
|
|
|
bucket.used = true;
|
|
|
|
if (bucket.deleted) {
|
|
|
|
bucket.deleted = false;
|
|
|
|
--m_deleted_count;
|
|
|
|
}
|
|
|
|
++m_size;
|
|
|
|
return HashSetResult::InsertedNewEntry;
|
|
|
|
}
|
|
|
|
|
2019-06-29 15:09:40 -04:00
|
|
|
template<typename Finder>
|
2020-10-15 17:34:07 -04:00
|
|
|
Iterator find(unsigned hash, Finder finder)
|
2019-06-29 15:09:40 -04:00
|
|
|
{
|
2020-10-15 17:34:07 -04:00
|
|
|
return Iterator(lookup_with_hash(hash, move(finder)));
|
2019-06-29 15:09:40 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
Iterator find(const T& value)
|
|
|
|
{
|
|
|
|
return find(TraitsForT::hash(value), [&](auto& other) { return TraitsForT::equals(value, other); });
|
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
template<typename Finder>
|
|
|
|
ConstIterator find(unsigned hash, Finder finder) const
|
|
|
|
{
|
|
|
|
return ConstIterator(lookup_with_hash(hash, move(finder)));
|
|
|
|
}
|
|
|
|
|
2019-06-29 15:09:40 -04:00
|
|
|
ConstIterator find(const T& value) const
|
|
|
|
{
|
|
|
|
return find(TraitsForT::hash(value), [&](auto& other) { return TraitsForT::equals(value, other); });
|
|
|
|
}
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-07-06 17:44:33 -04:00
|
|
|
bool remove(const T& value)
|
2018-10-13 08:22:09 -04:00
|
|
|
{
|
|
|
|
auto it = find(value);
|
2020-07-06 17:44:33 -04:00
|
|
|
if (it != end()) {
|
2018-10-13 08:22:09 -04:00
|
|
|
remove(it);
|
2020-07-06 17:44:33 -04:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2018-10-13 08:22:09 -04:00
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
void remove(Iterator iterator)
|
|
|
|
{
|
2021-02-23 14:42:32 -05:00
|
|
|
VERIFY(iterator.m_bucket);
|
2020-10-15 17:34:07 -04:00
|
|
|
auto& bucket = *iterator.m_bucket;
|
2021-02-23 14:42:32 -05:00
|
|
|
VERIFY(bucket.used);
|
|
|
|
VERIFY(!bucket.end);
|
|
|
|
VERIFY(!bucket.deleted);
|
2020-10-15 17:34:07 -04:00
|
|
|
bucket.slot()->~T();
|
|
|
|
bucket.used = false;
|
|
|
|
bucket.deleted = true;
|
|
|
|
--m_size;
|
|
|
|
++m_deleted_count;
|
|
|
|
}
|
2018-10-13 08:22:09 -04:00
|
|
|
|
2018-10-10 05:53:07 -04:00
|
|
|
private:
|
2020-10-15 17:34:07 -04:00
|
|
|
void insert_during_rehash(T&& value)
|
2019-06-29 15:09:40 -04:00
|
|
|
{
|
2020-10-15 17:34:07 -04:00
|
|
|
auto& bucket = lookup_for_writing(value);
|
|
|
|
new (bucket.slot()) T(move(value));
|
|
|
|
bucket.used = true;
|
2019-06-29 15:09:40 -04:00
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
void rehash(size_t new_capacity)
|
2019-06-29 15:09:40 -04:00
|
|
|
{
|
2020-10-15 17:34:07 -04:00
|
|
|
new_capacity = max(new_capacity, static_cast<size_t>(4));
|
2021-05-15 04:06:41 -04:00
|
|
|
new_capacity = kmalloc_good_size(new_capacity * sizeof(Bucket)) / sizeof(Bucket);
|
2019-06-29 15:09:40 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
auto* old_buckets = m_buckets;
|
|
|
|
auto old_capacity = m_capacity;
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
m_buckets = (Bucket*)kmalloc(sizeof(Bucket) * (new_capacity + 1));
|
|
|
|
__builtin_memset(m_buckets, 0, sizeof(Bucket) * (new_capacity + 1));
|
|
|
|
m_capacity = new_capacity;
|
|
|
|
m_deleted_count = 0;
|
2019-06-27 09:57:49 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
m_buckets[m_capacity].end = true;
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
if (!old_buckets)
|
|
|
|
return;
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
for (size_t i = 0; i < old_capacity; ++i) {
|
|
|
|
auto& old_bucket = old_buckets[i];
|
|
|
|
if (old_bucket.used) {
|
|
|
|
insert_during_rehash(move(*old_bucket.slot()));
|
|
|
|
old_bucket.slot()->~T();
|
|
|
|
}
|
2019-03-24 23:23:17 -04:00
|
|
|
}
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
kfree(old_buckets);
|
2018-11-06 19:38:51 -05:00
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
template<typename Finder>
|
2021-04-01 22:02:33 -04:00
|
|
|
Bucket* lookup_with_hash(unsigned hash, Finder finder) const
|
2020-10-15 17:34:07 -04:00
|
|
|
{
|
|
|
|
if (is_empty())
|
|
|
|
return nullptr;
|
2021-04-01 21:52:32 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
for (;;) {
|
2021-04-01 21:52:32 -04:00
|
|
|
auto& bucket = m_buckets[hash % m_capacity];
|
2020-10-15 17:34:07 -04:00
|
|
|
|
|
|
|
if (bucket.used && finder(*bucket.slot()))
|
|
|
|
return &bucket;
|
|
|
|
|
|
|
|
if (!bucket.used && !bucket.deleted)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
hash = double_hash(hash);
|
2018-10-10 05:53:07 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
const Bucket* lookup_for_reading(const T& value) const
|
|
|
|
{
|
|
|
|
return lookup_with_hash(TraitsForT::hash(value), [&value](auto& entry) { return TraitsForT::equals(entry, value); });
|
2019-01-30 13:32:54 -05:00
|
|
|
}
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
Bucket& lookup_for_writing(const T& value)
|
|
|
|
{
|
2020-10-16 02:32:35 -04:00
|
|
|
if (should_grow())
|
|
|
|
rehash(capacity() * 2);
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2021-04-01 22:02:33 -04:00
|
|
|
auto hash = TraitsForT::hash(value);
|
|
|
|
Bucket* first_empty_bucket = nullptr;
|
2020-10-15 17:34:07 -04:00
|
|
|
for (;;) {
|
2021-04-01 21:52:32 -04:00
|
|
|
auto& bucket = m_buckets[hash % m_capacity];
|
2021-04-01 22:02:33 -04:00
|
|
|
|
|
|
|
if (bucket.used && TraitsForT::equals(*bucket.slot(), value))
|
2020-10-15 17:34:07 -04:00
|
|
|
return bucket;
|
2021-04-01 22:02:33 -04:00
|
|
|
|
|
|
|
if (!bucket.used) {
|
|
|
|
if (!first_empty_bucket)
|
|
|
|
first_empty_bucket = &bucket;
|
|
|
|
|
|
|
|
if (!bucket.deleted)
|
|
|
|
return *const_cast<Bucket*>(first_empty_bucket);
|
|
|
|
}
|
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
hash = double_hash(hash);
|
|
|
|
}
|
|
|
|
}
|
2018-10-13 08:22:09 -04:00
|
|
|
|
2021-04-11 04:25:22 -04:00
|
|
|
[[nodiscard]] size_t used_bucket_count() const { return m_size + m_deleted_count; }
|
|
|
|
[[nodiscard]] bool should_grow() const { return ((used_bucket_count() + 1) * 100) >= (m_capacity * load_factor_in_percent); }
|
2018-10-10 05:53:07 -04:00
|
|
|
|
2020-10-15 17:34:07 -04:00
|
|
|
Bucket* m_buckets { nullptr };
|
|
|
|
size_t m_size { 0 };
|
|
|
|
size_t m_capacity { 0 };
|
|
|
|
size_t m_deleted_count { 0 };
|
|
|
|
};
|
2018-10-10 05:53:07 -04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
using AK::HashTable;
|