/* * Copyright (c) 2018-2021, Andreas Kling * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Kernel { static void create_signal_trampoline(); RecursiveSpinLock g_processes_lock; static Atomic next_pid; READONLY_AFTER_INIT InlineLinkedList* g_processes; READONLY_AFTER_INIT String* g_hostname; READONLY_AFTER_INIT Lock* g_hostname_lock; READONLY_AFTER_INIT HashMap>* g_modules; READONLY_AFTER_INIT Region* g_signal_trampoline_region; ProcessID Process::allocate_pid() { // Overflow is UB, and negative PIDs wreck havoc. // TODO: Handle PID overflow // For example: Use an Atomic, mask the most significant bit, // retry if PID is already taken as a PID, taken as a TID, // takes as a PGID, taken as a SID, or zero. return next_pid.fetch_add(1, AK::MemoryOrder::memory_order_acq_rel); } UNMAP_AFTER_INIT void Process::initialize() { g_modules = new HashMap>; next_pid.store(0, AK::MemoryOrder::memory_order_release); g_processes = new InlineLinkedList; g_process_groups = new InlineLinkedList; g_hostname = new String("courage"); g_hostname_lock = new Lock; create_signal_trampoline(); } Vector Process::all_pids() { Vector pids; ScopedSpinLock lock(g_processes_lock); pids.ensure_capacity((int)g_processes->size_slow()); for (auto& process : *g_processes) pids.append(process.pid()); return pids; } NonnullRefPtrVector Process::all_processes() { NonnullRefPtrVector processes; ScopedSpinLock lock(g_processes_lock); processes.ensure_capacity((int)g_processes->size_slow()); for (auto& process : *g_processes) processes.append(NonnullRefPtr(process)); return processes; } bool Process::in_group(gid_t gid) const { return this->gid() == gid || extra_gids().contains_slow(gid); } void Process::kill_threads_except_self() { InterruptDisabler disabler; if (thread_count() <= 1) return; auto current_thread = Thread::current(); for_each_thread([&](Thread& thread) { if (&thread == current_thread || thread.state() == Thread::State::Dead || thread.state() == Thread::State::Dying) return IterationDecision::Continue; // We need to detach this thread in case it hasn't been joined thread.detach(); thread.set_should_die(); return IterationDecision::Continue; }); big_lock().clear_waiters(); } void Process::kill_all_threads() { for_each_thread([&](Thread& thread) { // We need to detach this thread in case it hasn't been joined thread.detach(); thread.set_should_die(); return IterationDecision::Continue; }); } RefPtr Process::create_user_process(RefPtr& first_thread, const String& path, uid_t uid, gid_t gid, ProcessID parent_pid, int& error, Vector&& arguments, Vector&& environment, TTY* tty) { auto parts = path.split('/'); if (arguments.is_empty()) { arguments.append(parts.last()); } RefPtr cwd; { ScopedSpinLock lock(g_processes_lock); if (auto parent = Process::from_pid(parent_pid)) { cwd = parent->m_cwd; } } if (!cwd) cwd = VFS::the().root_custody(); auto process = adopt(*new Process(first_thread, parts.take_last(), uid, gid, parent_pid, false, move(cwd), nullptr, tty)); if (!first_thread) return {}; process->m_fds.resize(m_max_open_file_descriptors); auto& device_to_use_as_tty = tty ? (CharacterDevice&)*tty : NullDevice::the(); auto description = device_to_use_as_tty.open(O_RDWR).value(); process->m_fds[0].set(*description); process->m_fds[1].set(*description); process->m_fds[2].set(*description); error = process->exec(path, move(arguments), move(environment)); if (error != 0) { dbgln("Failed to exec {}: {}", path, error); first_thread = nullptr; return {}; } { ScopedSpinLock lock(g_processes_lock); g_processes->prepend(process); process->ref(); } error = 0; return process; } RefPtr Process::create_kernel_process(RefPtr& first_thread, String&& name, void (*entry)(void*), void* entry_data, u32 affinity) { auto process = adopt(*new Process(first_thread, move(name), (uid_t)0, (gid_t)0, ProcessID(0), true)); if (!first_thread) return {}; first_thread->tss().eip = (FlatPtr)entry; first_thread->tss().esp = FlatPtr(entry_data); // entry function argument is expected to be in tss.esp if (process->pid() != 0) { ScopedSpinLock lock(g_processes_lock); g_processes->prepend(process); process->ref(); } ScopedSpinLock lock(g_scheduler_lock); first_thread->set_affinity(affinity); first_thread->set_state(Thread::State::Runnable); return process; } void Process::protect_data() { MM.set_page_writable_direct(VirtualAddress { this }, false); } void Process::unprotect_data() { MM.set_page_writable_direct(VirtualAddress { this }, true); } Process::Process(RefPtr& first_thread, const String& name, uid_t uid, gid_t gid, ProcessID ppid, bool is_kernel_process, RefPtr cwd, RefPtr executable, TTY* tty, Process* fork_parent) : m_name(move(name)) , m_is_kernel_process(is_kernel_process) , m_executable(move(executable)) , m_cwd(move(cwd)) , m_tty(tty) , m_wait_block_condition(*this) { // Ensure that we protect the process data when exiting the constructor. ProtectedDataMutationScope scope { *this }; m_pid = allocate_pid(); m_ppid = ppid; m_uid = uid; m_gid = gid; m_euid = uid; m_egid = gid; m_suid = uid; m_sgid = gid; dbgln_if(PROCESS_DEBUG, "Created new process {}({})", m_name, this->pid().value()); m_space = Space::create(*this, fork_parent ? &fork_parent->space() : nullptr); if (fork_parent) { // NOTE: fork() doesn't clone all threads; the thread that called fork() becomes the only thread in the new process. first_thread = Thread::current()->clone(*this); } else { // NOTE: This non-forked code path is only taken when the kernel creates a process "manually" (at boot.) auto thread_or_error = Thread::try_create(*this); VERIFY(!thread_or_error.is_error()); first_thread = thread_or_error.release_value(); first_thread->detach(); } } Process::~Process() { unprotect_data(); VERIFY(thread_count() == 0); // all threads should have been finalized VERIFY(!m_alarm_timer); { ScopedSpinLock processses_lock(g_processes_lock); if (prev() || next()) g_processes->remove(this); } } // Make sure the compiler doesn't "optimize away" this function: extern void signal_trampoline_dummy(); void signal_trampoline_dummy() { #if ARCH(I386) // The trampoline preserves the current eax, pushes the signal code and // then calls the signal handler. We do this because, when interrupting a // blocking syscall, that syscall may return some special error code in eax; // This error code would likely be overwritten by the signal handler, so it's // necessary to preserve it here. asm( ".intel_syntax noprefix\n" "asm_signal_trampoline:\n" "push ebp\n" "mov ebp, esp\n" "push eax\n" // we have to store eax 'cause it might be the return value from a syscall "sub esp, 4\n" // align the stack to 16 bytes "mov eax, [ebp+12]\n" // push the signal code "push eax\n" "call [ebp+8]\n" // call the signal handler "add esp, 8\n" "mov eax, %P0\n" "int 0x82\n" // sigreturn syscall "asm_signal_trampoline_end:\n" ".att_syntax" ::"i"(Syscall::SC_sigreturn)); #elif ARCH(X86_64) asm("asm_signal_trampoline:\n" "cli;hlt\n" "asm_signal_trampoline_end:\n"); #endif } extern "C" void asm_signal_trampoline(void); extern "C" void asm_signal_trampoline_end(void); void create_signal_trampoline() { // NOTE: We leak this region. g_signal_trampoline_region = MM.allocate_kernel_region(PAGE_SIZE, "Signal trampolines", Region::Access::Read | Region::Access::Write).leak_ptr(); g_signal_trampoline_region->set_syscall_region(true); u8* trampoline = (u8*)asm_signal_trampoline; u8* trampoline_end = (u8*)asm_signal_trampoline_end; size_t trampoline_size = trampoline_end - trampoline; u8* code_ptr = (u8*)g_signal_trampoline_region->vaddr().as_ptr(); memcpy(code_ptr, trampoline, trampoline_size); g_signal_trampoline_region->set_writable(false); g_signal_trampoline_region->remap(); } void Process::crash(int signal, u32 eip, bool out_of_memory) { VERIFY(!is_dead()); VERIFY(Process::current() == this); if (out_of_memory) { dbgln("\033[31;1mOut of memory\033[m, killing: {}", *this); } else { if (eip >= 0xc0000000 && g_kernel_symbols_available) { auto* symbol = symbolicate_kernel_address(eip); dbgln("\033[31;1m{:p} {} +{}\033[0m\n", eip, (symbol ? demangle(symbol->name) : "(k?)"), (symbol ? eip - symbol->address : 0)); } else { dbgln("\033[31;1m{:p} (?)\033[0m\n", eip); } dump_backtrace(); } { ProtectedDataMutationScope scope { *this }; m_termination_signal = signal; } set_dump_core(!out_of_memory); space().dump_regions(); VERIFY(is_user_process()); die(); // We can not return from here, as there is nowhere // to unwind to, so die right away. Thread::current()->die_if_needed(); VERIFY_NOT_REACHED(); } RefPtr Process::from_pid(ProcessID pid) { ScopedSpinLock lock(g_processes_lock); for (auto& process : *g_processes) { process.pid(); if (process.pid() == pid) return &process; } return {}; } RefPtr Process::file_description(int fd) const { if (fd < 0) return nullptr; if (static_cast(fd) < m_fds.size()) return m_fds[fd].description(); return nullptr; } int Process::fd_flags(int fd) const { if (fd < 0) return -1; if (static_cast(fd) < m_fds.size()) return m_fds[fd].flags(); return -1; } int Process::number_of_open_file_descriptors() const { int count = 0; for (auto& description : m_fds) { if (description) ++count; } return count; } int Process::alloc_fd(int first_candidate_fd) { for (int i = first_candidate_fd; i < (int)m_max_open_file_descriptors; ++i) { if (!m_fds[i]) return i; } return -EMFILE; } Time kgettimeofday() { return TimeManagement::now(); } siginfo_t Process::wait_info() { siginfo_t siginfo {}; siginfo.si_signo = SIGCHLD; siginfo.si_pid = pid().value(); siginfo.si_uid = uid(); if (m_termination_signal) { siginfo.si_status = m_termination_signal; siginfo.si_code = CLD_KILLED; } else { siginfo.si_status = m_termination_status; siginfo.si_code = CLD_EXITED; } return siginfo; } Custody& Process::current_directory() { if (!m_cwd) m_cwd = VFS::the().root_custody(); return *m_cwd; } KResultOr Process::get_syscall_path_argument(const char* user_path, size_t path_length) const { if (path_length == 0) return EINVAL; if (path_length > PATH_MAX) return ENAMETOOLONG; auto copied_string = copy_string_from_user(user_path, path_length); if (copied_string.is_null()) return EFAULT; return copied_string; } KResultOr Process::get_syscall_path_argument(const Syscall::StringArgument& path) const { return get_syscall_path_argument(path.characters, path.length); } bool Process::dump_core() { VERIFY(is_dumpable()); VERIFY(should_core_dump()); dbgln("Generating coredump for pid: {}", pid().value()); auto coredump_path = String::formatted("/tmp/coredump/{}_{}_{}", name(), pid().value(), RTC::now()); auto coredump = CoreDump::create(*this, coredump_path); if (!coredump) return false; return !coredump->write().is_error(); } bool Process::dump_perfcore() { VERIFY(is_dumpable()); VERIFY(m_perf_event_buffer); dbgln("Generating perfcore for pid: {}", pid().value()); auto description_or_error = VFS::the().open(String::formatted("perfcore.{}", pid().value()), O_CREAT | O_EXCL, 0400, current_directory(), UidAndGid { uid(), gid() }); if (description_or_error.is_error()) return false; auto& description = description_or_error.value(); KBufferBuilder builder; if (!m_perf_event_buffer->to_json(builder)) return false; auto json = builder.build(); if (!json) return false; auto json_buffer = UserOrKernelBuffer::for_kernel_buffer(json->data()); return !description->write(json_buffer, json->size()).is_error(); } void Process::finalize() { VERIFY(Thread::current() == g_finalizer); dbgln_if(PROCESS_DEBUG, "Finalizing process {}", *this); if (is_dumpable()) { if (m_should_dump_core) dump_core(); if (m_perf_event_buffer) dump_perfcore(); } m_threads_for_coredump.clear(); if (m_alarm_timer) TimerQueue::the().cancel_timer(m_alarm_timer.release_nonnull()); m_fds.clear(); m_tty = nullptr; m_executable = nullptr; m_cwd = nullptr; m_root_directory = nullptr; m_root_directory_relative_to_global_root = nullptr; m_arguments.clear(); m_environment.clear(); m_dead = true; { // FIXME: PID/TID BUG if (auto parent_thread = Thread::from_tid(ppid().value())) { if (!(parent_thread->m_signal_action_data[SIGCHLD].flags & SA_NOCLDWAIT)) parent_thread->send_signal(SIGCHLD, this); } } { ScopedSpinLock processses_lock(g_processes_lock); if (!!ppid()) { if (auto parent = Process::from_pid(ppid())) { parent->m_ticks_in_user_for_dead_children += m_ticks_in_user + m_ticks_in_user_for_dead_children; parent->m_ticks_in_kernel_for_dead_children += m_ticks_in_kernel + m_ticks_in_kernel_for_dead_children; } } } unblock_waiters(Thread::WaitBlocker::UnblockFlags::Terminated); m_space->remove_all_regions({}); VERIFY(ref_count() > 0); // WaitBlockCondition::finalize will be in charge of dropping the last // reference if there are still waiters around, or whenever the last // waitable states are consumed. Unless there is no parent around // anymore, in which case we'll just drop it right away. m_wait_block_condition.finalize(); } void Process::disowned_by_waiter(Process& process) { m_wait_block_condition.disowned_by_waiter(process); } void Process::unblock_waiters(Thread::WaitBlocker::UnblockFlags flags, u8 signal) { if (auto parent = Process::from_pid(ppid())) parent->m_wait_block_condition.unblock(*this, flags, signal); } void Process::die() { // Let go of the TTY, otherwise a slave PTY may keep the master PTY from // getting an EOF when the last process using the slave PTY dies. // If the master PTY owner relies on an EOF to know when to wait() on a // slave owner, we have to allow the PTY pair to be torn down. m_tty = nullptr; for_each_thread([&](auto& thread) { m_threads_for_coredump.append(thread); return IterationDecision::Continue; }); { ScopedSpinLock lock(g_processes_lock); for (auto* process = g_processes->head(); process;) { auto* next_process = process->next(); if (process->has_tracee_thread(pid())) { dbgln_if(PROCESS_DEBUG, "Process {} ({}) is attached by {} ({}) which will exit", process->name(), process->pid(), name(), pid()); process->stop_tracing(); auto err = process->send_signal(SIGSTOP, this); if (err.is_error()) dbgln("Failed to send the SIGSTOP signal to {} ({})", process->name(), process->pid()); } process = next_process; } } kill_all_threads(); } void Process::terminate_due_to_signal(u8 signal) { VERIFY_INTERRUPTS_DISABLED(); VERIFY(signal < 32); VERIFY(Process::current() == this); dbgln("Terminating {} due to signal {}", *this, signal); { ProtectedDataMutationScope scope { *this }; m_termination_status = 0; m_termination_signal = signal; } die(); } KResult Process::send_signal(u8 signal, Process* sender) { // Try to send it to the "obvious" main thread: auto receiver_thread = Thread::from_tid(pid().value()); // If the main thread has died, there may still be other threads: if (!receiver_thread) { // The first one should be good enough. // Neither kill(2) nor kill(3) specify any selection precedure. for_each_thread([&receiver_thread](Thread& thread) -> IterationDecision { receiver_thread = &thread; return IterationDecision::Break; }); } if (receiver_thread) { receiver_thread->send_signal(signal, sender); return KSuccess; } return ESRCH; } RefPtr Process::create_kernel_thread(void (*entry)(void*), void* entry_data, u32 priority, const String& name, u32 affinity, bool joinable) { VERIFY((priority >= THREAD_PRIORITY_MIN) && (priority <= THREAD_PRIORITY_MAX)); // FIXME: Do something with guard pages? auto thread_or_error = Thread::try_create(*this); if (thread_or_error.is_error()) return {}; auto thread = thread_or_error.release_value(); thread->set_name(name); thread->set_affinity(affinity); thread->set_priority(priority); if (!joinable) thread->detach(); auto& tss = thread->tss(); tss.eip = (FlatPtr)entry; tss.esp = FlatPtr(entry_data); // entry function argument is expected to be in tss.esp ScopedSpinLock lock(g_scheduler_lock); thread->set_state(Thread::State::Runnable); return thread; } void Process::FileDescriptionAndFlags::clear() { m_description = nullptr; m_flags = 0; } void Process::FileDescriptionAndFlags::set(NonnullRefPtr&& description, u32 flags) { m_description = move(description); m_flags = flags; } Custody& Process::root_directory() { if (!m_root_directory) m_root_directory = VFS::the().root_custody(); return *m_root_directory; } Custody& Process::root_directory_relative_to_global_root() { if (!m_root_directory_relative_to_global_root) m_root_directory_relative_to_global_root = root_directory(); return *m_root_directory_relative_to_global_root; } void Process::set_root_directory(const Custody& root) { m_root_directory = root; } void Process::set_tty(TTY* tty) { m_tty = tty; } void Process::start_tracing_from(ProcessID tracer) { m_tracer = ThreadTracer::create(tracer); } void Process::stop_tracing() { m_tracer = nullptr; } void Process::tracer_trap(Thread& thread, const RegisterState& regs) { VERIFY(m_tracer.ptr()); m_tracer->set_regs(regs); thread.send_urgent_signal_to_self(SIGTRAP); } bool Process::create_perf_events_buffer_if_needed() { if (!m_perf_event_buffer) { m_perf_event_buffer = PerformanceEventBuffer::try_create_with_size(4 * MiB); m_perf_event_buffer->add_process(*this); } return !!m_perf_event_buffer; } bool Process::remove_thread(Thread& thread) { ProtectedDataMutationScope scope { *this }; auto thread_cnt_before = m_thread_count.fetch_sub(1, AK::MemoryOrder::memory_order_acq_rel); VERIFY(thread_cnt_before != 0); ScopedSpinLock thread_list_lock(m_thread_list_lock); m_thread_list.remove(thread); return thread_cnt_before == 1; } bool Process::add_thread(Thread& thread) { ProtectedDataMutationScope scope { *this }; bool is_first = m_thread_count.fetch_add(1, AK::MemoryOrder::memory_order_relaxed) == 0; ScopedSpinLock thread_list_lock(m_thread_list_lock); m_thread_list.append(thread); return is_first; } void Process::set_dumpable(bool dumpable) { if (dumpable == m_dumpable) return; ProtectedDataMutationScope scope { *this }; m_dumpable = dumpable; } }