Kernel+Userland: Introduce the copy_mount syscall

This new syscall will be used by the upcoming runc (run-container)
utility.

In addition to that, this syscall allows userspace to neatly copy RAMFS
instances to other places, which was not possible in the past.
This commit is contained in:
Liav A. 2024-01-27 19:36:26 +02:00 committed by Tim Schumacher
parent 816f2efb4e
commit 4370bbb3ad
8 changed files with 105 additions and 0 deletions

View file

@ -63,6 +63,7 @@ enum class NeedsBigProcessLock {
S(clock_settime, NeedsBigProcessLock::No) \
S(close, NeedsBigProcessLock::No) \
S(connect, NeedsBigProcessLock::No) \
S(copy_mount, NeedsBigProcessLock::No) \
S(create_inode_watcher, NeedsBigProcessLock::No) \
S(create_thread, NeedsBigProcessLock::No) \
S(dbgputstr, NeedsBigProcessLock::No) \
@ -451,6 +452,14 @@ struct SC_umount_params {
StringArgument target;
};
struct SC_copy_mount_params {
int original_vfs_root_context_id;
int target_vfs_root_context_id;
StringArgument original_path;
StringArgument target_path;
int flags;
};
struct SC_pledge_params {
StringArgument promises;
StringArgument execpromises;

View file

@ -202,6 +202,25 @@ ErrorOr<void> VirtualFileSystem::mount(VFSRootContext& context, MountFile& mount
});
}
ErrorOr<void> VirtualFileSystem::copy_mount(Custody& original_custody, VFSRootContext& destination_context, Custody& new_mount_point, int flags)
{
// NOTE: Don't allow moving mounts of inode which are not the root inode
// of a filesystem. This will prevent copying bindmounts, but the intention
// of this functionality was never to allow such thing.
if (&original_custody.inode() != &original_custody.inode().fs().root_inode())
return EINVAL;
// NOTE: If the user specified the root custody ("/") on the destination context
// then try to `pivot_root` the destination context root mount with the desired
// custody.
auto destination_context_root_custody = destination_context.root_custody().with([](auto& custody) -> NonnullRefPtr<Custody> { return custody; });
if (&new_mount_point == destination_context_root_custody.ptr())
return pivot_root_by_copying_mounted_fs_instance(destination_context, original_custody.inode().fs(), flags);
TRY(destination_context.add_new_mount(VFSRootContext::DoBindMount::No, original_custody.inode(), new_mount_point, flags));
return {};
}
ErrorOr<void> VirtualFileSystem::bind_mount(VFSRootContext& context, Custody& source, Custody& mount_point, int flags)
{
return context.add_new_mount(VFSRootContext::DoBindMount::Yes, source.inode(), mount_point, flags);

View file

@ -73,6 +73,7 @@ public:
ErrorOr<void> pivot_root_by_copying_mounted_fs_instance(VFSRootContext&, FileSystem& fs, int root_mount_flags);
ErrorOr<void> bind_mount(VFSRootContext&, Custody& source, Custody& mount_point, int flags);
ErrorOr<void> copy_mount(Custody& source, VFSRootContext& destination, Custody& mount_point, int flags);
ErrorOr<void> remount(VFSRootContext&, Custody& mount_point, int new_flags);
ErrorOr<void> unmount(VFSRootContext&, Custody& mount_point);
ErrorOr<void> unmount(VFSRootContext&, Inode& guest_inode, StringView custody_path);

View file

@ -13,6 +13,35 @@
namespace Kernel {
ErrorOr<FlatPtr> Process::sys$copy_mount(Userspace<Syscall::SC_copy_mount_params const*> user_params)
{
VERIFY_NO_PROCESS_BIG_LOCK(this);
TRY(require_promise(Pledge::mount));
auto credentials = this->credentials();
if (!credentials->is_superuser())
return EPERM;
auto params = TRY(copy_typed_from_user(user_params));
// NOTE: If some userspace program uses MS_REMOUNT, return EINVAL to indicate that we never want this
// flag to appear in the mount table...
if (params.flags & MS_REMOUNT || params.flags & MS_BIND)
return Error::from_errno(EINVAL);
auto original_path = TRY(try_copy_kstring_from_user(params.original_path));
auto target_path = TRY(try_copy_kstring_from_user(params.target_path));
auto mount_original_context = TRY(context_for_mount_operation(params.original_vfs_root_context_id, original_path->view()));
auto mount_target_context = TRY(context_for_mount_operation(params.target_vfs_root_context_id, target_path->view()));
TRY(VirtualFileSystem::the().copy_mount(
mount_original_context.custody,
mount_target_context.vfs_root_context,
mount_target_context.custody,
params.flags));
return 0;
}
ErrorOr<FlatPtr> Process::sys$fsopen(Userspace<Syscall::SC_fsopen_params const*> user_params)
{
VERIFY_NO_PROCESS_BIG_LOCK(this);

View file

@ -459,6 +459,7 @@ public:
ErrorOr<FlatPtr> sys$kill_thread(pid_t tid, int signal);
ErrorOr<FlatPtr> sys$rename(Userspace<Syscall::SC_rename_params const*>);
ErrorOr<FlatPtr> sys$mknod(Userspace<Syscall::SC_mknod_params const*>);
ErrorOr<FlatPtr> sys$copy_mount(Userspace<Syscall::SC_copy_mount_params const*> user_params);
ErrorOr<FlatPtr> sys$realpath(Userspace<Syscall::SC_realpath_params const*>);
ErrorOr<FlatPtr> sys$getrandom(Userspace<void*>, size_t, unsigned int);
ErrorOr<FlatPtr> sys$getkeymap(Userspace<Syscall::SC_getkeymap_params const*>);

View file

@ -270,6 +270,22 @@ ErrorOr<void> ptrace_peekbuf(pid_t tid, void const* tracee_addr, Bytes destinati
HANDLE_SYSCALL_RETURN_VALUE("ptrace_peekbuf", rc, {});
}
ErrorOr<void> copy_mount(Optional<i32> original_vfs_context_id, Optional<i32> target_vfs_context_id, StringView original_mountpoint, StringView target_mountpoint, int flags)
{
if (target_mountpoint.is_null() || original_mountpoint.is_null())
return Error::from_errno(EFAULT);
Syscall::SC_copy_mount_params params {
original_vfs_context_id.value_or(-1),
target_vfs_context_id.value_or(-1),
{ original_mountpoint.characters_without_null_termination(), original_mountpoint.length() },
{ target_mountpoint.characters_without_null_termination(), target_mountpoint.length() },
flags,
};
int rc = syscall(SC_copy_mount, &params);
HANDLE_SYSCALL_RETURN_VALUE("copy_mount", rc, {});
}
ErrorOr<void> bindmount(Optional<i32> vfs_context_id, int source_fd, StringView target, int flags)
{
if (target.is_null())

View file

@ -57,6 +57,7 @@ ErrorOr<int> recvfd(int sockfd, int options);
ErrorOr<void> ptrace_peekbuf(pid_t tid, void const* tracee_addr, Bytes destination_buf);
ErrorOr<void> mount(Optional<i32> vfs_context_id, int source_fd, StringView target, StringView fs_type, int flags);
ErrorOr<void> bindmount(Optional<i32> vfs_context_id, int source_fd, StringView target, int flags);
ErrorOr<void> copy_mount(Optional<i32> original_vfs_context_id, Optional<i32> target_vfs_context_id, StringView original_mountpoint, StringView target_mountpoint, int flags);
ErrorOr<int> fsopen(StringView fs_type, int flags);
ErrorOr<void> fsmount(Optional<i32> vfs_context_id, int mount_fd, int source_fd, StringView target_path);
ErrorOr<void> remount(Optional<i32> vfs_context_id, StringView target, int flags);

View file

@ -0,0 +1,29 @@
/*
* Copyright (c) 2024, Liav A. <liavalb@hotmail.co.il>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibCore/ArgsParser.h>
#include <LibCore/System.h>
#include <LibMain/Main.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
ErrorOr<int> serenity_main(Main::Arguments arguments)
{
StringView original_mountpoint;
StringView target_mountpoint;
Core::ArgsParser args_parser;
// FIXME: Possibly allow to pass VFS root context IDs and flags?
args_parser.add_positional_argument(original_mountpoint, "Source path", "source", Core::ArgsParser::Required::Yes);
args_parser.add_positional_argument(target_mountpoint, "Mount point", "mountpoint", Core::ArgsParser::Required::Yes);
args_parser.parse(arguments);
VERIFY(!(original_mountpoint.is_null() || original_mountpoint.is_empty()));
VERIFY(!(target_mountpoint.is_null() || target_mountpoint.is_empty()));
TRY(Core::System::copy_mount({}, {}, original_mountpoint, target_mountpoint, 0));
return 0;
}