Kernel: Start implementing kmalloc_aligned more efficiently

This now only requires `size + alignment` bytes while searching for a
free memory location. For the actual allocation, the memory area is
properly trimmed to the required alignment.
This commit is contained in:
Tim Schumacher 2022-12-07 05:02:59 +01:00 committed by Linus Groh
parent 30a553ef80
commit 2577bb8416
3 changed files with 37 additions and 35 deletions

View file

@ -68,11 +68,16 @@ public:
return needed_chunks * CHUNK_SIZE + (needed_chunks + 7) / 8;
}
void* allocate(size_t size, CallerWillInitializeMemory caller_will_initialize_memory)
void* allocate(size_t size, size_t alignment, CallerWillInitializeMemory caller_will_initialize_memory)
{
// The minimum possible alignment is CHUNK_SIZE, since we only track chunks here, nothing smaller.
if (alignment < CHUNK_SIZE)
alignment = CHUNK_SIZE;
// We need space for the AllocationHeader at the head of the block.
size_t real_size = size + sizeof(AllocationHeader);
size_t chunks_needed = (real_size + CHUNK_SIZE - 1) / CHUNK_SIZE;
size_t chunk_alignment = (alignment + CHUNK_SIZE - 1) / CHUNK_SIZE;
if (chunks_needed > free_chunks())
return nullptr;
@ -80,21 +85,29 @@ public:
Optional<size_t> first_chunk;
// Choose the right policy for allocation.
// FIXME: These should utilize the alignment directly instead of trying to allocate `size + alignment`.
constexpr u32 best_fit_threshold = 128;
if (chunks_needed < best_fit_threshold) {
first_chunk = m_bitmap.find_first_fit(chunks_needed);
first_chunk = m_bitmap.find_first_fit(chunks_needed + chunk_alignment);
} else {
first_chunk = m_bitmap.find_best_fit(chunks_needed);
first_chunk = m_bitmap.find_best_fit(chunks_needed + chunk_alignment);
}
if (!first_chunk.has_value())
return nullptr;
auto* a = (AllocationHeader*)(m_chunks + (first_chunk.value() * CHUNK_SIZE));
// Align the starting address and verify that we haven't gone outside the calculated free area.
a = (AllocationHeader*)((FlatPtr)a + alignment - (FlatPtr)a->data % alignment);
auto aligned_first_chunk = ((FlatPtr)a - (FlatPtr)m_chunks) / CHUNK_SIZE;
VERIFY(first_chunk.value() <= aligned_first_chunk);
VERIFY(aligned_first_chunk + chunks_needed <= first_chunk.value() + chunks_needed + chunk_alignment);
u8* ptr = a->data;
a->allocation_size_in_chunks = chunks_needed;
m_bitmap.set_range_and_verify_that_all_bits_flip(first_chunk.value(), chunks_needed, true);
m_bitmap.set_range_and_verify_that_all_bits_flip(aligned_first_chunk, chunks_needed, true);
m_allocated_chunks += chunks_needed;
if (caller_will_initialize_memory == CallerWillInitializeMemory::No) {
@ -102,6 +115,8 @@ public:
__builtin_memset(ptr, HEAP_SCRUB_BYTE_ALLOC, (chunks_needed * CHUNK_SIZE) - sizeof(AllocationHeader));
}
}
VERIFY((FlatPtr)ptr % alignment == 0);
return ptr;
}

View file

@ -28,6 +28,7 @@ static constexpr size_t CHUNK_SIZE = 64;
static_assert(is_power_of_two(CHUNK_SIZE));
static constexpr size_t INITIAL_KMALLOC_MEMORY_SIZE = 2 * MiB;
static constexpr size_t KMALLOC_DEFAULT_ALIGNMENT = 16;
// Treat the heap as logically separate from .bss
__attribute__((section(".heap"))) static u8 initial_kmalloc_memory[INITIAL_KMALLOC_MEMORY_SIZE];
@ -192,7 +193,7 @@ public:
++block;
block_to_remove.list_node.remove();
block_to_remove.~KmallocSlabBlock();
kfree_aligned(&block_to_remove);
kfree_sized(&block_to_remove, KmallocSlabBlock::block_size);
did_purge = true;
}
@ -222,17 +223,17 @@ struct KmallocGlobalData {
subheaps.append(*subheap);
}
void* allocate(size_t size, CallerWillInitializeMemory caller_will_initialize_memory)
void* allocate(size_t size, size_t alignment, CallerWillInitializeMemory caller_will_initialize_memory)
{
VERIFY(!expansion_in_progress);
for (auto& slabheap : slabheaps) {
if (size <= slabheap.slab_size())
if (size <= slabheap.slab_size() && alignment <= slabheap.slab_size())
return slabheap.allocate(caller_will_initialize_memory);
}
for (auto& subheap : subheaps) {
if (auto* ptr = subheap.allocator.allocate(size, caller_will_initialize_memory))
if (auto* ptr = subheap.allocator.allocate(size, alignment, caller_will_initialize_memory))
return ptr;
}
@ -249,7 +250,7 @@ struct KmallocGlobalData {
}
}
if (did_purge)
return allocate(size, caller_will_initialize_memory);
return allocate(size, alignment, caller_will_initialize_memory);
}
if (!try_expand(size)) {
@ -257,7 +258,7 @@ struct KmallocGlobalData {
return nullptr;
}
return allocate(size, caller_will_initialize_memory);
return allocate(size, alignment, caller_will_initialize_memory);
}
void deallocate(void* ptr, size_t size)
@ -422,13 +423,16 @@ UNMAP_AFTER_INIT void kmalloc_init()
s_lock.initialize();
}
static void* kmalloc_impl(size_t size, CallerWillInitializeMemory caller_will_initialize_memory)
static void* kmalloc_impl(size_t size, size_t alignment, CallerWillInitializeMemory caller_will_initialize_memory)
{
// Catch bad callers allocating under spinlock.
if constexpr (KMALLOC_VERIFY_NO_SPINLOCK_HELD) {
Processor::verify_no_spinlocks_held();
}
// Alignment must be a power of two.
VERIFY(popcount(alignment) == 1);
SpinlockLocker lock(s_lock);
++g_kmalloc_call_count;
@ -437,7 +441,7 @@ static void* kmalloc_impl(size_t size, CallerWillInitializeMemory caller_will_in
Kernel::dump_backtrace();
}
void* ptr = g_kmalloc_global->allocate(size, caller_will_initialize_memory);
void* ptr = g_kmalloc_global->allocate(size, alignment, caller_will_initialize_memory);
Thread* current_thread = Thread::current();
if (!current_thread)
@ -454,7 +458,7 @@ static void* kmalloc_impl(size_t size, CallerWillInitializeMemory caller_will_in
void* kmalloc(size_t size)
{
return kmalloc_impl(size, CallerWillInitializeMemory::No);
return kmalloc_impl(size, KMALLOC_DEFAULT_ALIGNMENT, CallerWillInitializeMemory::No);
}
void* kcalloc(size_t count, size_t size)
@ -462,7 +466,7 @@ void* kcalloc(size_t count, size_t size)
if (Checked<size_t>::multiplication_would_overflow(count, size))
return nullptr;
size_t new_size = count * size;
auto* ptr = kmalloc_impl(new_size, CallerWillInitializeMemory::Yes);
auto* ptr = kmalloc_impl(new_size, KMALLOC_DEFAULT_ALIGNMENT, CallerWillInitializeMemory::Yes);
if (ptr)
memset(ptr, 0, new_size);
return ptr;
@ -511,17 +515,7 @@ size_t kmalloc_good_size(size_t size)
void* kmalloc_aligned(size_t size, size_t alignment)
{
Checked<size_t> real_allocation_size = size;
real_allocation_size += alignment;
real_allocation_size += sizeof(ptrdiff_t) + sizeof(size_t);
void* ptr = kmalloc(real_allocation_size.value());
if (ptr == nullptr)
return nullptr;
size_t max_addr = (size_t)ptr + alignment;
void* aligned_ptr = (void*)(max_addr - (max_addr % alignment));
((ptrdiff_t*)aligned_ptr)[-1] = (ptrdiff_t)((u8*)aligned_ptr - (u8*)ptr);
((size_t*)aligned_ptr)[-2] = real_allocation_size.value();
return aligned_ptr;
return kmalloc_impl(size, alignment, CallerWillInitializeMemory::No);
}
void* operator new(size_t size)
@ -571,9 +565,9 @@ void operator delete(void* ptr, size_t size) noexcept
return kfree_sized(ptr, size);
}
void operator delete(void* ptr, size_t, std::align_val_t) noexcept
void operator delete(void* ptr, size_t size, std::align_val_t) noexcept
{
return kfree_aligned(ptr);
return kfree_sized(ptr, size);
}
void operator delete[](void*) noexcept

View file

@ -27,7 +27,7 @@ public: \
} \
void operator delete(void* ptr) noexcept \
{ \
kfree_aligned(ptr); \
kfree_sized(ptr, sizeof(type)); \
} \
\
private:
@ -82,13 +82,6 @@ void operator delete[](void* ptr, size_t) noexcept;
[[gnu::malloc, gnu::alloc_size(1), gnu::alloc_align(2)]] void* kmalloc_aligned(size_t size, size_t alignment);
inline void kfree_aligned(void* ptr)
{
if (ptr == nullptr)
return;
kfree_sized((u8*)ptr - ((ptrdiff_t const*)ptr)[-1], ((size_t const*)ptr)[-2]);
}
size_t kmalloc_good_size(size_t);
void kmalloc_enable_expand();