From ec6bceaa0876f00c0ea6eb585f5527a842210747 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 7 Sep 2019 15:50:44 +0200 Subject: [PATCH] Kernel: Support thread-local storage This patch adds support for TLS according to the x86 System V ABI. Each thread gets a thread-specific memory region, and the GS segment register always points _to a pointer_ to the thread-specific memory. In other words, to access thread-local variables, userspace programs start by dereferencing the pointer at [gs:0]. The Process keeps a master copy of the TLS segment that new threads should use, and when a new thread is created, they get a copy of it. It's basically whatever the PT_TLS program header in the ELF says. --- AK/ELF/ELFLoader.cpp | 5 +++++ AK/ELF/ELFLoader.h | 1 + Kernel/Process.cpp | 26 ++++++++++++++++++++++++-- Kernel/Process.h | 4 ++++ Kernel/Scheduler.cpp | 6 ++++++ Kernel/Thread.cpp | 43 ++++++++++++++++++++++++++++++++++++++++--- Kernel/Thread.h | 12 ++++++++++++ 7 files changed, 92 insertions(+), 5 deletions(-) diff --git a/AK/ELF/ELFLoader.cpp b/AK/ELF/ELFLoader.cpp index 163a4bcbb04..e9ab4a77b42 100644 --- a/AK/ELF/ELFLoader.cpp +++ b/AK/ELF/ELFLoader.cpp @@ -35,6 +35,11 @@ bool ELFLoader::layout() { bool failed = false; m_image.for_each_program_header([&](const ELFImage::ProgramHeader& program_header) { + if (program_header.type() == PT_TLS) { + auto* tls_image = tls_section_hook(program_header.size_in_memory(), program_header.alignment()); + memcpy(tls_image, program_header.raw_data(), program_header.size_in_image()); + return; + } if (program_header.type() != PT_LOAD) return; #ifdef ELFLOADER_DEBUG diff --git a/AK/ELF/ELFLoader.h b/AK/ELF/ELFLoader.h index ef02e8eab89..692bfdd73bc 100644 --- a/AK/ELF/ELFLoader.h +++ b/AK/ELF/ELFLoader.h @@ -19,6 +19,7 @@ public: bool load(); #if defined(KERNEL) Function alloc_section_hook; + Function tls_section_hook; Function map_section_hook; VirtualAddress entry() const { return m_image.entry(); } #endif diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index 05b14793e5a..cccea564dc4 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -310,6 +310,9 @@ Process* Process::fork(RegisterDump& regs) auto cloned_region = region.clone(); child->m_regions.append(move(cloned_region)); MM.map_region(*child, child->m_regions.last()); + + if (®ion == m_master_tls_region) + child->m_master_tls_region = child->m_regions.last(); } for (auto gid : m_gids) @@ -403,6 +406,10 @@ int Process::do_exec(String path, Vector arguments, Vector envir RefPtr region = allocate_region_with_vmo(VirtualAddress(), metadata.size, vmo, 0, description->absolute_path(), PROT_READ); ASSERT(region); + RefPtr master_tls_region; + size_t master_tls_size = 0; + size_t master_tls_alignment = 0; + OwnPtr loader; { // Okay, here comes the sleight of hand, pay close attention.. @@ -433,6 +440,13 @@ int Process::do_exec(String path, Vector arguments, Vector envir (void)allocate_region(vaddr, size, String(name), prot); return vaddr.as_ptr(); }; + loader->tls_section_hook = [&](size_t size, size_t alignment) { + ASSERT(size); + master_tls_region = allocate_region({}, size, String(), PROT_READ | PROT_WRITE); + master_tls_size = size; + master_tls_alignment = alignment; + return master_tls_region->vaddr().as_ptr(); + }; bool success = loader->load(); if (!success || !loader->entry().get()) { m_page_directory = move(old_page_directory); @@ -451,6 +465,9 @@ int Process::do_exec(String path, Vector arguments, Vector envir m_elf_loader = move(loader); m_executable = description->custody(); + // Copy of the master TLS region that we will clone for new threads + m_master_tls_region = master_tls_region.ptr(); + if (metadata.is_setuid()) m_euid = metadata.uid; if (metadata.is_setgid()) @@ -483,6 +500,11 @@ int Process::do_exec(String path, Vector arguments, Vector envir // ss0 sp!!!!!!!!! u32 old_esp0 = main_thread().m_tss.esp0; + m_master_tls_size = master_tls_size; + m_master_tls_alignment = master_tls_alignment; + + main_thread().make_thread_specific_region({}); + memset(&main_thread().m_tss, 0, sizeof(main_thread().m_tss)); main_thread().m_tss.eflags = 0x0202; main_thread().m_tss.eip = entry_eip; @@ -490,7 +512,7 @@ int Process::do_exec(String path, Vector arguments, Vector envir main_thread().m_tss.ds = 0x23; main_thread().m_tss.es = 0x23; main_thread().m_tss.fs = 0x23; - main_thread().m_tss.gs = 0x23; + main_thread().m_tss.gs = thread_specific_selector() | 3; main_thread().m_tss.ss = 0x23; main_thread().m_tss.cr3 = page_directory().cr3(); main_thread().make_userspace_stack_for_main_thread(move(arguments), move(environment)); @@ -2661,7 +2683,7 @@ int Process::sys$create_thread(int (*entry)(void*), void* argument) tss.eflags = 0x0202; tss.cr3 = page_directory().cr3(); thread->make_userspace_stack_for_secondary_thread(argument); - + thread->make_thread_specific_region({}); thread->set_state(Thread::State::Runnable); return thread->tid(); } diff --git a/Kernel/Process.h b/Kernel/Process.h index e3beb925ab0..36cbbfec062 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -369,6 +369,10 @@ private: RefPtr m_tracer; OwnPtr m_elf_loader; + RefPtr m_master_tls_region; + size_t m_master_tls_size { 0 }; + size_t m_master_tls_alignment { 0 }; + Lock m_big_lock { "Process" }; u64 m_alarm_deadline { 0 }; diff --git a/Kernel/Scheduler.cpp b/Kernel/Scheduler.cpp index 00e42d7df71..6e56b97b007 100644 --- a/Kernel/Scheduler.cpp +++ b/Kernel/Scheduler.cpp @@ -460,6 +460,12 @@ bool Scheduler::context_switch(Thread& thread) descriptor.descriptor_type = 0; } + if (!thread.thread_specific_data().is_null()) { + auto& descriptor = thread_specific_descriptor(); + descriptor.set_base(thread.thread_specific_data().as_ptr()); + descriptor.set_limit(sizeof(ThreadSpecificData*)); + } + auto& descriptor = get_gdt_entry(thread.selector()); descriptor.type = 11; // Busy TSS flush_gdt(); diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp index e25fc7333fc..c360b937fe8 100644 --- a/Kernel/Thread.cpp +++ b/Kernel/Thread.cpp @@ -9,6 +9,28 @@ //#define SIGNAL_DEBUG +u16 thread_specific_selector() +{ + static u16 selector; + if (!selector) { + selector = gdt_alloc_entry(); + auto& descriptor = get_gdt_entry(selector); + descriptor.dpl = 3; + descriptor.segment_present = 1; + descriptor.granularity = 0; + descriptor.zero = 0; + descriptor.operation_size = 1; + descriptor.descriptor_type = 1; + descriptor.type = 2; + } + return selector; +} + +Descriptor& thread_specific_descriptor() +{ + return get_gdt_entry(thread_specific_selector()); +} + HashTable& thread_table() { ASSERT_INTERRUPTS_DISABLED(); @@ -32,22 +54,24 @@ Thread::Thread(Process& process) // Only IF is set when a process boots. m_tss.eflags = 0x0202; - u16 cs, ds, ss; + u16 cs, ds, ss, gs; if (m_process.is_ring0()) { cs = 0x08; ds = 0x10; ss = 0x10; + gs = 0; } else { cs = 0x1b; ds = 0x23; ss = 0x23; + gs = thread_specific_selector() | 3; } m_tss.ds = ds; m_tss.es = ds; m_tss.fs = ds; - m_tss.gs = ds; + m_tss.gs = gs; m_tss.ss = ss; m_tss.cs = cs; @@ -425,7 +449,7 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal) m_tss.ds = 0x23; m_tss.es = 0x23; m_tss.fs = 0x23; - m_tss.gs = 0x23; + m_tss.gs = thread_specific_selector() | 3; m_tss.eip = regs.eip; m_tss.esp = regs.esp_if_crossRing; // FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal. @@ -525,6 +549,7 @@ Thread* Thread::clone(Process& process) clone->m_fpu_state = (FPUState*)kmalloc_aligned(sizeof(FPUState), 16); memcpy(clone->m_fpu_state, m_fpu_state, sizeof(FPUState)); clone->m_has_used_fpu = m_has_used_fpu; + clone->m_thread_specific_data = m_thread_specific_data; return clone; } @@ -602,3 +627,15 @@ String Thread::backtrace_impl() const } return builder.to_string(); } + +void Thread::make_thread_specific_region(Badge) +{ + size_t thread_specific_region_alignment = max(process().m_master_tls_alignment, alignof(ThreadSpecificData)); + size_t thread_specific_region_size = align_up_to(process().m_master_tls_size, thread_specific_region_alignment) + sizeof(ThreadSpecificData); + auto* region = process().allocate_region({}, thread_specific_region_size, "Thread-specific", PROT_READ | PROT_WRITE, true); + auto* thread_specific_data = (ThreadSpecificData*)region->vaddr().offset(align_up_to(process().m_master_tls_size, thread_specific_region_alignment)).as_ptr(); + auto* thread_local_storage = (u8*)((u8*)thread_specific_data) - align_up_to(process().m_master_tls_size, process().m_master_tls_alignment); + m_thread_specific_data = VirtualAddress((u32)thread_specific_data); + thread_specific_data->self = thread_specific_data; + memcpy(thread_local_storage, process().m_master_tls_region->vaddr().as_ptr(), process().m_master_tls_size); +} diff --git a/Kernel/Thread.h b/Kernel/Thread.h index e367b9f28d8..7bf6e173d2e 100644 --- a/Kernel/Thread.h +++ b/Kernel/Thread.h @@ -30,6 +30,10 @@ struct SignalActionData { int flags { 0 }; }; +struct ThreadSpecificData { + ThreadSpecificData* self; +}; + class Thread { friend class Process; friend class Scheduler; @@ -214,6 +218,8 @@ public: const char* state_string() const; u32 ticks() const { return m_ticks; } + VirtualAddress thread_specific_data() const { return m_thread_specific_data; } + u64 sleep(u32 ticks); enum class BlockResult { @@ -301,6 +307,8 @@ public: void make_userspace_stack_for_main_thread(Vector arguments, Vector environment); void make_userspace_stack_for_secondary_thread(void* argument); + void make_thread_specific_region(Badge); + Thread* clone(Process&); template @@ -336,6 +344,7 @@ private: RefPtr m_userspace_stack_region; RefPtr m_kernel_stack_region; RefPtr m_kernel_stack_for_signal_handler_region; + VirtualAddress m_thread_specific_data; SignalActionData m_signal_action_data[32]; Region* m_signal_stack_user_region { nullptr }; IntrusiveList m_blockers; @@ -432,3 +441,6 @@ inline IterationDecision Scheduler::for_each_nonrunnable(Callback callback) return IterationDecision::Continue; } + +u16 thread_specific_selector(); +Descriptor& thread_specific_descriptor();