serenity/Kernel/Thread.cpp
Sergey Bugaev 1e1ddce9d8 Kernel: Unwind kernel stacks before dying
While executing in the kernel, a thread can acquire various resources
that need cleanup, such as locks and references to RefCounted objects.
This cleanup normally happens on the exit path, such as in destructors
for various RAII guards. But we weren't calling those exit paths when
killing threads that have been executing in the kernel, such as threads
blocked on reading or sleeping, thus causing leaks.

This commit changes how killing threads works. Now, instead of killing
a thread directly, one is supposed to call thread->set_should_die(),
which will unblock it and make it unwind the stack if it is blocked
in the kernel. Then, just before returning to the userspace, the thread
will automatically die.
2019-11-14 20:05:58 +01:00

729 lines
22 KiB
C++

#include <AK/StringBuilder.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/Process.h>
#include <Kernel/Scheduler.h>
#include <Kernel/Thread.h>
#include <Kernel/VM/MemoryManager.h>
#include <LibC/signal_numbers.h>
#include <LibELF/ELFLoader.h>
//#define SIGNAL_DEBUG
u16 thread_specific_selector()
{
static u16 selector;
if (!selector) {
selector = gdt_alloc_entry();
auto& descriptor = get_gdt_entry(selector);
descriptor.dpl = 3;
descriptor.segment_present = 1;
descriptor.granularity = 0;
descriptor.zero = 0;
descriptor.operation_size = 1;
descriptor.descriptor_type = 1;
descriptor.type = 2;
}
return selector;
}
Descriptor& thread_specific_descriptor()
{
return get_gdt_entry(thread_specific_selector());
}
HashTable<Thread*>& thread_table()
{
ASSERT_INTERRUPTS_DISABLED();
static HashTable<Thread*>* table;
if (!table)
table = new HashTable<Thread*>;
return *table;
}
Thread::Thread(Process& process)
: m_process(process)
, m_tid(process.m_next_tid++)
{
dbgprintf("Thread{%p}: New thread TID=%u in %s(%u)\n", this, m_tid, process.name().characters(), process.pid());
set_default_signal_dispositions();
m_fpu_state = (FPUState*)kmalloc_aligned(sizeof(FPUState), 16);
memset(m_fpu_state, 0, sizeof(FPUState));
memset(&m_tss, 0, sizeof(m_tss));
// Only IF is set when a process boots.
m_tss.eflags = 0x0202;
u16 cs, ds, ss, gs;
if (m_process.is_ring0()) {
cs = 0x08;
ds = 0x10;
ss = 0x10;
gs = 0;
} else {
cs = 0x1b;
ds = 0x23;
ss = 0x23;
gs = thread_specific_selector() | 3;
}
m_tss.ds = ds;
m_tss.es = ds;
m_tss.fs = ds;
m_tss.gs = gs;
m_tss.ss = ss;
m_tss.cs = cs;
m_tss.cr3 = m_process.page_directory().cr3();
if (m_process.is_ring0()) {
// FIXME: This memory is leaked.
// But uh, there's also no kernel process termination, so I guess it's not technically leaked...
m_kernel_stack_base = (u32)kmalloc_eternal(default_kernel_stack_size);
m_kernel_stack_top = (m_kernel_stack_base + default_kernel_stack_size) & 0xfffffff8u;
m_tss.esp = m_kernel_stack_top;
} else {
// Ring3 processes need a separate stack for Ring0.
m_kernel_stack_region = MM.allocate_kernel_region(default_kernel_stack_size, String::format("Kernel Stack (Thread %d)", m_tid));
m_kernel_stack_base = m_kernel_stack_region->vaddr().get();
m_kernel_stack_top = m_kernel_stack_region->vaddr().offset(default_kernel_stack_size).get() & 0xfffffff8u;
m_tss.ss0 = 0x10;
m_tss.esp0 = m_kernel_stack_top;
}
// HACK: Ring2 SS in the TSS is the current PID.
m_tss.ss2 = m_process.pid();
m_far_ptr.offset = 0x98765432;
if (m_process.pid() != 0) {
InterruptDisabler disabler;
thread_table().set(this);
Scheduler::init_thread(*this);
}
}
Thread::~Thread()
{
dbgprintf("~Thread{%p}\n", this);
kfree_aligned(m_fpu_state);
{
InterruptDisabler disabler;
thread_table().remove(this);
}
if (g_last_fpu_thread == this)
g_last_fpu_thread = nullptr;
if (selector())
gdt_free_entry(selector());
if (m_userspace_stack_region)
m_process.deallocate_region(*m_userspace_stack_region);
}
void Thread::unblock()
{
if (current == this) {
set_state(Thread::Running);
return;
}
ASSERT(m_state != Thread::Runnable && m_state != Thread::Running);
set_state(Thread::Runnable);
}
void Thread::set_should_die()
{
if (m_should_die)
return;
InterruptDisabler disabler;
// Remember that we should die instead of returning to
// the userspace.
m_should_die = true;
if (is_blocked()) {
ASSERT(in_kernel());
ASSERT(m_blocker != nullptr);
// We're blocked in the kernel. Pretend to have
// been interrupted by a signal (perhaps that is
// what has actually killed us).
m_blocker->set_interrupted_by_signal();
unblock();
} else if (!in_kernel()) {
// We're executing in userspace (and we're clearly
// not the current thread). No need to unwind, so
// set the state to dying right away. This also
// makes sure we won't be scheduled anymore.
set_state(Thread::State::Dying);
}
}
void Thread::die_if_needed()
{
ASSERT(current == this);
if (!m_should_die)
return;
InterruptDisabler disabler;
set_state(Thread::State::Dying);
if (!Scheduler::is_active())
Scheduler::pick_next_and_switch_now();
}
void Thread::block_helper()
{
// This function mostly exists to avoid circular header dependencies. If
// anything needs adding, think carefully about whether it belongs in
// block() instead. Remember that we're unlocking here, so be very careful
// about altering any state once we're unlocked!
bool did_unlock = process().big_lock().unlock_if_locked();
Scheduler::yield();
if (did_unlock)
process().big_lock().lock();
}
u64 Thread::sleep(u32 ticks)
{
ASSERT(state() == Thread::Running);
u64 wakeup_time = g_uptime + ticks;
auto ret = current->block<Thread::SleepBlocker>(wakeup_time);
if (wakeup_time > g_uptime) {
ASSERT(ret == Thread::BlockResult::InterruptedBySignal);
}
return wakeup_time;
}
u64 Thread::sleep_until(u64 wakeup_time)
{
ASSERT(state() == Thread::Running);
auto ret = current->block<Thread::SleepBlocker>(wakeup_time);
if (wakeup_time > g_uptime)
ASSERT(ret == Thread::BlockResult::InterruptedBySignal);
return wakeup_time;
}
const char* Thread::state_string() const
{
switch (state()) {
case Thread::Invalid:
return "Invalid";
case Thread::Runnable:
return "Runnable";
case Thread::Running:
return "Running";
case Thread::Dying:
return "Dying";
case Thread::Dead:
return "Dead";
case Thread::Stopped:
return "Stopped";
case Thread::Skip1SchedulerPass:
return "Skip1";
case Thread::Skip0SchedulerPasses:
return "Skip0";
case Thread::Blocked:
ASSERT(m_blocker != nullptr);
return m_blocker->state_string();
}
kprintf("Thread::state_string(): Invalid state: %u\n", state());
ASSERT_NOT_REACHED();
return nullptr;
}
void Thread::finalize()
{
ASSERT(current == g_finalizer);
dbgprintf("Finalizing Thread %u in %s(%u)\n", tid(), m_process.name().characters(), pid());
set_state(Thread::State::Dead);
if (m_dump_backtrace_on_finalization)
dbg() << backtrace_impl();
if (this == &m_process.main_thread()) {
m_process.finalize();
return;
}
delete this;
}
void Thread::finalize_dying_threads()
{
ASSERT(current == g_finalizer);
Vector<Thread*, 32> dying_threads;
{
InterruptDisabler disabler;
for_each_in_state(Thread::State::Dying, [&](Thread& thread) {
dying_threads.append(&thread);
return IterationDecision::Continue;
});
}
for (auto* thread : dying_threads)
thread->finalize();
}
bool Thread::tick()
{
++m_ticks;
if (tss().cs & 3)
++m_process.m_ticks_in_user;
else
++m_process.m_ticks_in_kernel;
return --m_ticks_left;
}
void Thread::send_signal(u8 signal, Process* sender)
{
ASSERT(signal < 32);
InterruptDisabler disabler;
// FIXME: Figure out what to do for masked signals. Should we also ignore them here?
if (should_ignore_signal(signal)) {
dbg() << "signal " << signal << " was ignored by " << process();
return;
}
if (sender)
dbgprintf("signal: %s(%u) sent %d to %s(%u)\n", sender->name().characters(), sender->pid(), signal, process().name().characters(), pid());
else
dbgprintf("signal: kernel sent %d to %s(%u)\n", signal, process().name().characters(), pid());
m_pending_signals |= 1 << (signal - 1);
}
// Certain exceptions, such as SIGSEGV and SIGILL, put a
// thread into a state where the signal handler must be
// invoked immediately, otherwise it will continue to fault.
// This function should be used in an exception handler to
// ensure that when the thread resumes, it's executing in
// the appropriate signal handler.
void Thread::send_urgent_signal_to_self(u8 signal)
{
// FIXME: because of a bug in dispatch_signal we can't
// setup a signal while we are the current thread. Because of
// this we use a work-around where we send the signal and then
// block, allowing the scheduler to properly dispatch the signal
// before the thread is next run.
send_signal(signal, &process());
(void)block<SemiPermanentBlocker>(SemiPermanentBlocker::Reason::Signal);
}
bool Thread::has_unmasked_pending_signals() const
{
return m_pending_signals & ~m_signal_mask;
}
ShouldUnblockThread Thread::dispatch_one_pending_signal()
{
ASSERT_INTERRUPTS_DISABLED();
u32 signal_candidates = m_pending_signals & ~m_signal_mask;
ASSERT(signal_candidates);
u8 signal = 1;
for (; signal < 32; ++signal) {
if (signal_candidates & (1 << (signal - 1))) {
break;
}
}
return dispatch_signal(signal);
}
enum class DefaultSignalAction {
Terminate,
Ignore,
DumpCore,
Stop,
Continue,
};
DefaultSignalAction default_signal_action(u8 signal)
{
ASSERT(signal && signal < NSIG);
switch (signal) {
case SIGHUP:
case SIGINT:
case SIGKILL:
case SIGPIPE:
case SIGALRM:
case SIGUSR1:
case SIGUSR2:
case SIGVTALRM:
case SIGSTKFLT:
case SIGIO:
case SIGPROF:
case SIGTERM:
case SIGPWR:
return DefaultSignalAction::Terminate;
case SIGCHLD:
case SIGURG:
case SIGWINCH:
return DefaultSignalAction::Ignore;
case SIGQUIT:
case SIGILL:
case SIGTRAP:
case SIGABRT:
case SIGBUS:
case SIGFPE:
case SIGSEGV:
case SIGXCPU:
case SIGXFSZ:
case SIGSYS:
return DefaultSignalAction::DumpCore;
case SIGCONT:
return DefaultSignalAction::Continue;
case SIGSTOP:
case SIGTSTP:
case SIGTTIN:
case SIGTTOU:
return DefaultSignalAction::Stop;
}
ASSERT_NOT_REACHED();
}
bool Thread::should_ignore_signal(u8 signal) const
{
ASSERT(signal < 32);
auto& action = m_signal_action_data[signal];
if (action.handler_or_sigaction.is_null())
return default_signal_action(signal) == DefaultSignalAction::Ignore;
if (action.handler_or_sigaction.as_ptr() == SIG_IGN)
return true;
return false;
}
bool Thread::has_signal_handler(u8 signal) const
{
ASSERT(signal < 32);
auto& action = m_signal_action_data[signal];
return !action.handler_or_sigaction.is_null();
}
static void push_value_on_user_stack(u32* stack, u32 data)
{
*stack -= 4;
*(u32*)*stack = data;
}
ShouldUnblockThread Thread::dispatch_signal(u8 signal)
{
ASSERT_INTERRUPTS_DISABLED();
ASSERT(signal > 0 && signal <= 32);
ASSERT(!process().is_ring0());
#ifdef SIGNAL_DEBUG
kprintf("dispatch_signal %s(%u) <- %u\n", process().name().characters(), pid(), signal);
#endif
auto& action = m_signal_action_data[signal];
// FIXME: Implement SA_SIGINFO signal handlers.
ASSERT(!(action.flags & SA_SIGINFO));
// Mark this signal as handled.
m_pending_signals &= ~(1 << (signal - 1));
if (signal == SIGSTOP) {
set_state(Stopped);
return ShouldUnblockThread::No;
}
if (signal == SIGCONT && state() == Stopped)
set_state(Runnable);
auto handler_vaddr = action.handler_or_sigaction;
if (handler_vaddr.is_null()) {
switch (default_signal_action(signal)) {
case DefaultSignalAction::Stop:
set_state(Stopped);
return ShouldUnblockThread::No;
case DefaultSignalAction::DumpCore:
process().for_each_thread([](auto& thread) {
thread.set_dump_backtrace_on_finalization();
return IterationDecision::Continue;
});
[[fallthrough]];
case DefaultSignalAction::Terminate:
m_process.terminate_due_to_signal(signal);
return ShouldUnblockThread::No;
case DefaultSignalAction::Ignore:
ASSERT_NOT_REACHED();
case DefaultSignalAction::Continue:
return ShouldUnblockThread::Yes;
}
ASSERT_NOT_REACHED();
}
if (handler_vaddr.as_ptr() == SIG_IGN) {
#ifdef SIGNAL_DEBUG
kprintf("%s(%u) ignored signal %u\n", process().name().characters(), pid(), signal);
#endif
return ShouldUnblockThread::Yes;
}
ProcessPagingScope paging_scope(m_process);
u32 old_signal_mask = m_signal_mask;
u32 new_signal_mask = action.mask;
if (action.flags & SA_NODEFER)
new_signal_mask &= ~(1 << (signal - 1));
else
new_signal_mask |= 1 << (signal - 1);
m_signal_mask |= new_signal_mask;
auto setup_stack = [&]<typename ThreadState>(ThreadState state, u32 * stack)
{
u32 old_esp = *stack;
u32 ret_eip = state.eip;
u32 ret_eflags = state.eflags;
// Align the stack to 16 bytes.
// Note that we push 56 bytes (4 * 14) on to the stack,
// so we need to account for this here.
u32 stack_alignment = (*stack - 56) % 16;
*stack -= stack_alignment;
push_value_on_user_stack(stack, ret_eflags);
push_value_on_user_stack(stack, ret_eip);
push_value_on_user_stack(stack, state.eax);
push_value_on_user_stack(stack, state.ecx);
push_value_on_user_stack(stack, state.edx);
push_value_on_user_stack(stack, state.ebx);
push_value_on_user_stack(stack, old_esp);
push_value_on_user_stack(stack, state.ebp);
push_value_on_user_stack(stack, state.esi);
push_value_on_user_stack(stack, state.edi);
// PUSH old_signal_mask
push_value_on_user_stack(stack, old_signal_mask);
push_value_on_user_stack(stack, signal);
push_value_on_user_stack(stack, handler_vaddr.get());
push_value_on_user_stack(stack, 0); //push fake return address
ASSERT((*stack % 16) == 0);
};
// We now place the thread state on the userspace stack.
// Note that when we are in the kernel (ie. blocking) we cannot use the
// tss, as that will contain kernel state; instead, we use a RegisterDump.
// Conversely, when the thread isn't blocking the RegisterDump may not be
// valid (fork, exec etc) but the tss will, so we use that instead.
if (!in_kernel()) {
u32* stack = &m_tss.esp;
setup_stack(m_tss, stack);
Scheduler::prepare_to_modify_tss(*this);
m_tss.cs = 0x1b;
m_tss.ds = 0x23;
m_tss.es = 0x23;
m_tss.fs = 0x23;
m_tss.gs = thread_specific_selector() | 3;
m_tss.eip = g_return_to_ring3_from_signal_trampoline.get();
// FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal.
set_state(Skip1SchedulerPass);
} else {
auto& regs = get_RegisterDump_from_stack();
u32* stack = &regs.esp_if_crossRing;
setup_stack(regs, stack);
regs.eip = g_return_to_ring3_from_signal_trampoline.get();
}
#ifdef SIGNAL_DEBUG
kprintf("signal: Okay, %s(%u) {%s} has been primed with signal handler %w:%x\n", process().name().characters(), pid(), state_string(), m_tss.cs, m_tss.eip);
#endif
return ShouldUnblockThread::Yes;
}
void Thread::set_default_signal_dispositions()
{
// FIXME: Set up all the right default actions. See signal(7).
memset(&m_signal_action_data, 0, sizeof(m_signal_action_data));
m_signal_action_data[SIGCHLD].handler_or_sigaction = VirtualAddress((u32)SIG_IGN);
m_signal_action_data[SIGWINCH].handler_or_sigaction = VirtualAddress((u32)SIG_IGN);
}
void Thread::push_value_on_stack(u32 value)
{
m_tss.esp -= 4;
u32* stack_ptr = (u32*)m_tss.esp;
*stack_ptr = value;
}
RegisterDump& Thread::get_RegisterDump_from_stack()
{
// The userspace registers should be stored at the top of the stack
// We have to subtract 2 because the processor decrements the kernel
// stack before pushing the args.
return *(RegisterDump*)(kernel_stack_top() - sizeof(RegisterDump) - 2);
}
void Thread::make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment)
{
auto* region = m_process.allocate_region(VirtualAddress(), default_userspace_stack_size, "Stack (Main thread)", PROT_READ | PROT_WRITE, false);
ASSERT(region);
m_tss.esp = region->vaddr().offset(default_userspace_stack_size).get();
char* stack_base = (char*)region->vaddr().get();
int argc = arguments.size();
char** argv = (char**)stack_base;
char** env = argv + arguments.size() + 1;
char* bufptr = stack_base + (sizeof(char*) * (arguments.size() + 1)) + (sizeof(char*) * (environment.size() + 1));
for (int i = 0; i < arguments.size(); ++i) {
argv[i] = bufptr;
memcpy(bufptr, arguments[i].characters(), arguments[i].length());
bufptr += arguments[i].length();
*(bufptr++) = '\0';
}
argv[arguments.size()] = nullptr;
for (int i = 0; i < environment.size(); ++i) {
env[i] = bufptr;
memcpy(bufptr, environment[i].characters(), environment[i].length());
bufptr += environment[i].length();
*(bufptr++) = '\0';
}
env[environment.size()] = nullptr;
// NOTE: The stack needs to be 16-byte aligned.
push_value_on_stack((u32)env);
push_value_on_stack((u32)argv);
push_value_on_stack((u32)argc);
push_value_on_stack(0);
}
void Thread::make_userspace_stack_for_secondary_thread(void* argument)
{
m_userspace_stack_region = m_process.allocate_region(VirtualAddress(), default_userspace_stack_size, String::format("Stack (Thread %d)", tid()), PROT_READ | PROT_WRITE, false);
ASSERT(m_userspace_stack_region);
m_tss.esp = m_userspace_stack_region->vaddr().offset(default_userspace_stack_size).get();
// NOTE: The stack needs to be 16-byte aligned.
push_value_on_stack((u32)argument);
push_value_on_stack(0);
}
Thread* Thread::clone(Process& process)
{
auto* clone = new Thread(process);
memcpy(clone->m_signal_action_data, m_signal_action_data, sizeof(m_signal_action_data));
clone->m_signal_mask = m_signal_mask;
memcpy(clone->m_fpu_state, m_fpu_state, sizeof(FPUState));
clone->m_has_used_fpu = m_has_used_fpu;
clone->m_thread_specific_data = m_thread_specific_data;
return clone;
}
void Thread::initialize()
{
Scheduler::initialize();
}
Vector<Thread*> Thread::all_threads()
{
Vector<Thread*> threads;
InterruptDisabler disabler;
threads.ensure_capacity(thread_table().size());
for (auto* thread : thread_table())
threads.unchecked_append(thread);
return threads;
}
bool Thread::is_thread(void* ptr)
{
ASSERT_INTERRUPTS_DISABLED();
return thread_table().contains((Thread*)ptr);
}
void Thread::set_state(State new_state)
{
InterruptDisabler disabler;
if (new_state == Blocked) {
// we should always have a Blocker while blocked
ASSERT(m_blocker != nullptr);
}
m_state = new_state;
if (m_process.pid() != 0) {
Scheduler::update_state_for_thread(*this);
}
}
String Thread::backtrace(ProcessInspectionHandle&) const
{
return backtrace_impl();
}
String Thread::backtrace_impl() const
{
auto& process = const_cast<Process&>(this->process());
ProcessPagingScope paging_scope(process);
struct RecognizedSymbol {
u32 address;
const KSym* ksym;
};
StringBuilder builder;
Vector<RecognizedSymbol, 64> recognized_symbols;
recognized_symbols.append({ tss().eip, ksymbolicate(tss().eip) });
for (u32* stack_ptr = (u32*)frame_ptr(); process.validate_read_from_kernel(VirtualAddress((u32)stack_ptr)); stack_ptr = (u32*)*stack_ptr) {
u32 retaddr = stack_ptr[1];
recognized_symbols.append({ retaddr, ksymbolicate(retaddr) });
}
for (auto& symbol : recognized_symbols) {
if (!symbol.address)
break;
if (!symbol.ksym) {
if (!Scheduler::is_active() && process.elf_loader() && process.elf_loader()->has_symbols())
builder.appendf("%p %s\n", symbol.address, process.elf_loader()->symbolicate(symbol.address).characters());
else
builder.appendf("%p\n", symbol.address);
continue;
}
unsigned offset = symbol.address - symbol.ksym->address;
if (symbol.ksym->address == ksym_highest_address && offset > 4096)
builder.appendf("%p\n", symbol.address);
else
builder.appendf("%p %s +%u\n", symbol.address, symbol.ksym->name, offset);
}
return builder.to_string();
}
void Thread::make_thread_specific_region(Badge<Process>)
{
size_t thread_specific_region_alignment = max(process().m_master_tls_alignment, alignof(ThreadSpecificData));
size_t thread_specific_region_size = align_up_to(process().m_master_tls_size, thread_specific_region_alignment) + sizeof(ThreadSpecificData);
auto* region = process().allocate_region({}, thread_specific_region_size, "Thread-specific", PROT_READ | PROT_WRITE, true);
auto* thread_specific_data = (ThreadSpecificData*)region->vaddr().offset(align_up_to(process().m_master_tls_size, thread_specific_region_alignment)).as_ptr();
auto* thread_local_storage = (u8*)((u8*)thread_specific_data) - align_up_to(process().m_master_tls_size, process().m_master_tls_alignment);
m_thread_specific_data = VirtualAddress((u32)thread_specific_data);
thread_specific_data->self = thread_specific_data;
if (process().m_master_tls_size)
memcpy(thread_local_storage, process().m_master_tls_region->vaddr().as_ptr(), process().m_master_tls_size);
}
const LogStream& operator<<(const LogStream& stream, const Thread& value)
{
return stream << value.process().name() << "(" << value.pid() << ":" << value.tid() << ")";
}
const char* to_string(ThreadPriority priority)
{
switch (priority) {
case ThreadPriority::Idle:
return "Idle";
case ThreadPriority::Low:
return "Low";
case ThreadPriority::Normal:
return "Normal";
case ThreadPriority::High:
return "High";
}
dbg() << "to_string(ThreadPriority): Invalid priority: " << (u32)priority;
ASSERT_NOT_REACHED();
return nullptr;
}