Kernel: Add a way to specify which memory regions can make syscalls

This patch adds sys$msyscall() which is loosely based on an OpenBSD
mechanism for preventing syscalls from non-blessed memory regions.

It works similarly to pledge and unveil, you can call it as many
times as you like, and when you're finished, you call it with a null
pointer and it will stop accepting new regions from then on.

If a syscall later happens and doesn't originate from one of the
previously blessed regions, the kernel will simply crash the process.
This commit is contained in:
Andreas Kling 2021-02-02 19:56:11 +01:00
parent d57b4128a1
commit 823186031d
10 changed files with 43 additions and 1 deletions

View file

@ -190,7 +190,8 @@ namespace Kernel {
S(mremap) \
S(set_coredump_metadata) \
S(abort) \
S(anon_create)
S(anon_create) \
S(msyscall)
namespace Syscall {

View file

@ -327,6 +327,7 @@ static bool procfs$pid_vm(InodeIdentifier identifier, KBufferBuilder& builder)
region_object.add("executable", region.is_executable());
region_object.add("stack", region.is_stack());
region_object.add("shared", region.is_shared());
region_object.add("syscall", region.is_syscall_region());
region_object.add("user_accessible", region.is_user_accessible());
region_object.add("purgeable", region.vmobject().is_anonymous());
if (region.vmobject().is_anonymous()) {

View file

@ -129,6 +129,7 @@ Region& Process::allocate_split_region(const Region& source_region, const Range&
{
auto& region = add_region(
Region::create_user_accessible(this, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared()));
region.set_syscall_region(source_region.is_syscall_region());
region.set_mmap(source_region.is_mmap());
region.set_stack(source_region.is_stack());
size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE;
@ -423,6 +424,7 @@ void create_signal_trampolines()
InterruptDisabler disabler;
// NOTE: We leak this region.
auto* trampoline_region = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Signal trampolines", Region::Access::Read | Region::Access::Write | Region::Access::Execute, false).leak_ptr();
trampoline_region->set_syscall_region(true);
g_return_to_ring3_from_signal_trampoline = trampoline_region->vaddr();
u8* trampoline = (u8*)asm_signal_trampoline;

View file

@ -266,6 +266,7 @@ public:
int sys$set_mmap_name(Userspace<const Syscall::SC_set_mmap_name_params*>);
int sys$mprotect(void*, size_t, int prot);
int sys$madvise(void*, size_t, int advice);
int sys$msyscall(void*);
int sys$purge(int mode);
int sys$select(const Syscall::SC_select_params*);
int sys$poll(Userspace<const Syscall::SC_poll_params*>);
@ -510,6 +511,8 @@ public:
PerformanceEventBuffer* perf_events() { return m_perf_event_buffer; }
bool enforces_syscall_regions() const { return m_enforces_syscall_regions; }
private:
friend class MemoryManager;
friend class Scheduler;
@ -648,6 +651,8 @@ private:
RefPtr<Timer> m_alarm_timer;
bool m_enforces_syscall_regions { false };
bool m_has_promises { false };
u32 m_promises { 0 };
bool m_has_execpromises { false };

View file

@ -189,6 +189,12 @@ void syscall_handler(TrapFrame* trap)
ASSERT_NOT_REACHED();
}
if (process.enforces_syscall_regions() && !calling_region->is_syscall_region()) {
dbgln("Syscall from non-syscall region");
handle_crash(regs, "Syscall from non-syscall region", SIGSEGV);
ASSERT_NOT_REACHED();
}
process.big_lock().lock();
u32 function = regs.eax;
u32 arg1 = regs.edx;

View file

@ -549,6 +549,8 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
m_execpromises = 0;
m_has_execpromises = false;
m_enforces_syscall_regions = false;
m_veil_state = VeilState::None;
m_unveiled_paths.clear();

View file

@ -47,6 +47,7 @@ pid_t Process::sys$fork(RegisterState& regs)
child->m_has_execpromises = m_has_execpromises;
child->m_veil_state = m_veil_state;
child->m_unveiled_paths = m_unveiled_paths.deep_copy();
child->m_enforces_syscall_regions = m_enforces_syscall_regions;
child->m_fds = m_fds;
child->m_sid = m_sid;
child->m_pg = m_pg;

View file

@ -550,4 +550,22 @@ void* Process::sys$allocate_tls(size_t size)
return m_master_tls_region.unsafe_ptr()->vaddr().as_ptr();
}
int Process::sys$msyscall(void* address)
{
if (m_enforces_syscall_regions)
return -EPERM;
if (!address) {
m_enforces_syscall_regions = true;
return 0;
}
auto* region = find_region_containing(Range { VirtualAddress { address }, 1 });
if (!region)
return -EINVAL;
region->set_syscall_region(true);
return 0;
}
}

View file

@ -105,6 +105,7 @@ OwnPtr<Region> Region::clone(Process& new_owner)
region->copy_purgeable_page_ranges(*this);
region->set_mmap(m_mmap);
region->set_shared(m_shared);
region->set_syscall_region(is_syscall_region());
return region;
}
@ -127,6 +128,7 @@ OwnPtr<Region> Region::clone(Process& new_owner)
ASSERT(vmobject().is_anonymous());
clone_region->set_stack(true);
}
clone_region->set_syscall_region(is_syscall_region());
clone_region->set_mmap(m_mmap);
return clone_region;
}

View file

@ -247,6 +247,9 @@ public:
RefPtr<Process> get_owner();
bool is_syscall_region() const { return m_syscall_region; }
void set_syscall_region(bool b) { m_syscall_region = b; }
private:
bool do_remap_vmobject_page_range(size_t page_index, size_t page_count);
@ -282,6 +285,7 @@ private:
bool m_stack : 1 { false };
bool m_mmap : 1 { false };
bool m_kernel : 1 { false };
bool m_syscall_region : 1 { false };
WeakPtr<Process> m_owner;
};