mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 08:35:19 -05:00
vfs-6.11.pidfs
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZpEHIgAKCRCRxhvAZXjc ovTvAQDvxpq1CIJz4arkf6lkI1VX1PcSfyV1+aIsXkrGF01tfwD+PekJH0xJ7RqU ysuMo1uG3i1OO2xIdrdwCXJDng4QggE= =LtRf -----END PGP SIGNATURE----- Merge tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull pidfs updates from Christian Brauner: "This contains work to make it possible to derive namespace file descriptors from pidfd file descriptors. Right now it is already possible to use a pidfd with setns() to atomically change multiple namespaces at the same time. In other words, it is possible to switch to the namespace context of a process using a pidfd. There is no need to first open namespace file descriptors via procfs. The work included here is an extension of these abilities by allowing to open namespace file descriptors using a pidfd. This means it is now possible to interact with namespaces without ever touching procfs. To this end a new set of ioctls() on pidfds is introduced covering all supported namespace types" * tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: pidfs: allow retrieval of namespace file descriptors nsfs: add open_namespace() nsproxy: add helper to go from arbitrary namespace to ns_common nsproxy: add a cleanup helper for nsproxy file: add take_fd() cleanup helper
This commit is contained in:
commit
98f3a9a4fd
7 changed files with 179 additions and 30 deletions
|
@ -17,6 +17,7 @@ struct fs_context;
|
||||||
struct pipe_inode_info;
|
struct pipe_inode_info;
|
||||||
struct iov_iter;
|
struct iov_iter;
|
||||||
struct mnt_idmap;
|
struct mnt_idmap;
|
||||||
|
struct ns_common;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* block/bdev.c
|
* block/bdev.c
|
||||||
|
@ -239,6 +240,7 @@ extern void mnt_pin_kill(struct mount *m);
|
||||||
* fs/nsfs.c
|
* fs/nsfs.c
|
||||||
*/
|
*/
|
||||||
extern const struct dentry_operations ns_dentry_operations;
|
extern const struct dentry_operations ns_dentry_operations;
|
||||||
|
int open_namespace(struct ns_common *ns);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* fs/stat.c:
|
* fs/stat.c:
|
||||||
|
|
57
fs/nsfs.c
57
fs/nsfs.c
|
@ -84,40 +84,47 @@ int ns_get_path(struct path *path, struct task_struct *task,
|
||||||
return ns_get_path_cb(path, ns_get_path_task, &args);
|
return ns_get_path_cb(path, ns_get_path_task, &args);
|
||||||
}
|
}
|
||||||
|
|
||||||
int open_related_ns(struct ns_common *ns,
|
/**
|
||||||
struct ns_common *(*get_ns)(struct ns_common *ns))
|
* open_namespace - open a namespace
|
||||||
|
* @ns: the namespace to open
|
||||||
|
*
|
||||||
|
* This will consume a reference to @ns indendent of success or failure.
|
||||||
|
*
|
||||||
|
* Return: A file descriptor on success or a negative error code on failure.
|
||||||
|
*/
|
||||||
|
int open_namespace(struct ns_common *ns)
|
||||||
{
|
{
|
||||||
struct path path = {};
|
struct path path __free(path_put) = {};
|
||||||
struct ns_common *relative;
|
|
||||||
struct file *f;
|
struct file *f;
|
||||||
int err;
|
int err;
|
||||||
int fd;
|
|
||||||
|
|
||||||
fd = get_unused_fd_flags(O_CLOEXEC);
|
/* call first to consume reference */
|
||||||
|
err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
|
||||||
|
if (err < 0)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
CLASS(get_unused_fd, fd)(O_CLOEXEC);
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
return fd;
|
return fd;
|
||||||
|
|
||||||
relative = get_ns(ns);
|
|
||||||
if (IS_ERR(relative)) {
|
|
||||||
put_unused_fd(fd);
|
|
||||||
return PTR_ERR(relative);
|
|
||||||
}
|
|
||||||
|
|
||||||
err = path_from_stashed(&relative->stashed, nsfs_mnt, relative, &path);
|
|
||||||
if (err < 0) {
|
|
||||||
put_unused_fd(fd);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
f = dentry_open(&path, O_RDONLY, current_cred());
|
f = dentry_open(&path, O_RDONLY, current_cred());
|
||||||
path_put(&path);
|
if (IS_ERR(f))
|
||||||
if (IS_ERR(f)) {
|
return PTR_ERR(f);
|
||||||
put_unused_fd(fd);
|
|
||||||
fd = PTR_ERR(f);
|
|
||||||
} else
|
|
||||||
fd_install(fd, f);
|
|
||||||
|
|
||||||
return fd;
|
fd_install(fd, f);
|
||||||
|
return take_fd(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
int open_related_ns(struct ns_common *ns,
|
||||||
|
struct ns_common *(*get_ns)(struct ns_common *ns))
|
||||||
|
{
|
||||||
|
struct ns_common *relative;
|
||||||
|
|
||||||
|
relative = get_ns(ns);
|
||||||
|
if (IS_ERR(relative))
|
||||||
|
return PTR_ERR(relative);
|
||||||
|
|
||||||
|
return open_namespace(relative);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(open_related_ns);
|
EXPORT_SYMBOL_GPL(open_related_ns);
|
||||||
|
|
||||||
|
|
90
fs/pidfs.c
90
fs/pidfs.c
|
@ -11,10 +11,16 @@
|
||||||
#include <linux/proc_fs.h>
|
#include <linux/proc_fs.h>
|
||||||
#include <linux/proc_ns.h>
|
#include <linux/proc_ns.h>
|
||||||
#include <linux/pseudo_fs.h>
|
#include <linux/pseudo_fs.h>
|
||||||
|
#include <linux/ptrace.h>
|
||||||
#include <linux/seq_file.h>
|
#include <linux/seq_file.h>
|
||||||
#include <uapi/linux/pidfd.h>
|
#include <uapi/linux/pidfd.h>
|
||||||
|
#include <linux/ipc_namespace.h>
|
||||||
|
#include <linux/time_namespace.h>
|
||||||
|
#include <linux/utsname.h>
|
||||||
|
#include <net/net_namespace.h>
|
||||||
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
#include "mount.h"
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
/**
|
/**
|
||||||
|
@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
|
||||||
return poll_flags;
|
return poll_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||||
|
{
|
||||||
|
struct task_struct *task __free(put_task) = NULL;
|
||||||
|
struct nsproxy *nsp __free(put_nsproxy) = NULL;
|
||||||
|
struct pid *pid = pidfd_pid(file);
|
||||||
|
struct ns_common *ns_common;
|
||||||
|
|
||||||
|
if (arg)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
task = get_pid_task(pid, PIDTYPE_PID);
|
||||||
|
if (!task)
|
||||||
|
return -ESRCH;
|
||||||
|
|
||||||
|
scoped_guard(task_lock, task) {
|
||||||
|
nsp = task->nsproxy;
|
||||||
|
if (nsp)
|
||||||
|
get_nsproxy(nsp);
|
||||||
|
}
|
||||||
|
if (!nsp)
|
||||||
|
return -ESRCH; /* just pretend it didn't exist */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're trying to open a file descriptor to the namespace so perform a
|
||||||
|
* filesystem cred ptrace check. Also, we mirror nsfs behavior.
|
||||||
|
*/
|
||||||
|
if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
|
||||||
|
return -EACCES;
|
||||||
|
|
||||||
|
switch (cmd) {
|
||||||
|
/* Namespaces that hang of nsproxy. */
|
||||||
|
case PIDFD_GET_CGROUP_NAMESPACE:
|
||||||
|
get_cgroup_ns(nsp->cgroup_ns);
|
||||||
|
ns_common = to_ns_common(nsp->cgroup_ns);
|
||||||
|
break;
|
||||||
|
case PIDFD_GET_IPC_NAMESPACE:
|
||||||
|
get_ipc_ns(nsp->ipc_ns);
|
||||||
|
ns_common = to_ns_common(nsp->ipc_ns);
|
||||||
|
break;
|
||||||
|
case PIDFD_GET_MNT_NAMESPACE:
|
||||||
|
get_mnt_ns(nsp->mnt_ns);
|
||||||
|
ns_common = to_ns_common(nsp->mnt_ns);
|
||||||
|
break;
|
||||||
|
case PIDFD_GET_NET_NAMESPACE:
|
||||||
|
ns_common = to_ns_common(nsp->net_ns);
|
||||||
|
get_net_ns(ns_common);
|
||||||
|
break;
|
||||||
|
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
|
||||||
|
get_pid_ns(nsp->pid_ns_for_children);
|
||||||
|
ns_common = to_ns_common(nsp->pid_ns_for_children);
|
||||||
|
break;
|
||||||
|
case PIDFD_GET_TIME_NAMESPACE:
|
||||||
|
get_time_ns(nsp->time_ns);
|
||||||
|
ns_common = to_ns_common(nsp->time_ns);
|
||||||
|
break;
|
||||||
|
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
|
||||||
|
get_time_ns(nsp->time_ns_for_children);
|
||||||
|
ns_common = to_ns_common(nsp->time_ns_for_children);
|
||||||
|
break;
|
||||||
|
case PIDFD_GET_UTS_NAMESPACE:
|
||||||
|
get_uts_ns(nsp->uts_ns);
|
||||||
|
ns_common = to_ns_common(nsp->uts_ns);
|
||||||
|
break;
|
||||||
|
/* Namespaces that don't hang of nsproxy. */
|
||||||
|
case PIDFD_GET_USER_NAMESPACE:
|
||||||
|
rcu_read_lock();
|
||||||
|
ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
|
||||||
|
rcu_read_unlock();
|
||||||
|
break;
|
||||||
|
case PIDFD_GET_PID_NAMESPACE:
|
||||||
|
rcu_read_lock();
|
||||||
|
ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
|
||||||
|
rcu_read_unlock();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -ENOIOCTLCMD;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* open_namespace() unconditionally consumes the reference */
|
||||||
|
return open_namespace(ns_common);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct file_operations pidfs_file_operations = {
|
static const struct file_operations pidfs_file_operations = {
|
||||||
.poll = pidfd_poll,
|
.poll = pidfd_poll,
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
.show_fdinfo = pidfd_show_fdinfo,
|
.show_fdinfo = pidfd_show_fdinfo,
|
||||||
#endif
|
#endif
|
||||||
|
.unlocked_ioctl = pidfd_ioctl,
|
||||||
|
.compat_ioctl = compat_ptr_ioctl,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pid *pidfd_pid(const struct file *file)
|
struct pid *pidfd_pid(const struct file *file)
|
||||||
|
|
|
@ -63,17 +63,20 @@
|
||||||
|
|
||||||
#define __free(_name) __cleanup(__free_##_name)
|
#define __free(_name) __cleanup(__free_##_name)
|
||||||
|
|
||||||
#define __get_and_null_ptr(p) \
|
#define __get_and_null(p, nullvalue) \
|
||||||
({ __auto_type __ptr = &(p); \
|
({ \
|
||||||
__auto_type __val = *__ptr; \
|
__auto_type __ptr = &(p); \
|
||||||
*__ptr = NULL; __val; })
|
__auto_type __val = *__ptr; \
|
||||||
|
*__ptr = nullvalue; \
|
||||||
|
__val; \
|
||||||
|
})
|
||||||
|
|
||||||
static inline __must_check
|
static inline __must_check
|
||||||
const volatile void * __must_check_fn(const volatile void *val)
|
const volatile void * __must_check_fn(const volatile void *val)
|
||||||
{ return val; }
|
{ return val; }
|
||||||
|
|
||||||
#define no_free_ptr(p) \
|
#define no_free_ptr(p) \
|
||||||
((typeof(p)) __must_check_fn(__get_and_null_ptr(p)))
|
((typeof(p)) __must_check_fn(__get_and_null(p, NULL)))
|
||||||
|
|
||||||
#define return_ptr(p) return no_free_ptr(p)
|
#define return_ptr(p) return no_free_ptr(p)
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,26 @@ extern void put_unused_fd(unsigned int fd);
|
||||||
DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
|
DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
|
||||||
get_unused_fd_flags(flags), unsigned flags)
|
get_unused_fd_flags(flags), unsigned flags)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* take_fd() will take care to set @fd to -EBADF ensuring that
|
||||||
|
* CLASS(get_unused_fd) won't call put_unused_fd(). This makes it
|
||||||
|
* easier to rely on CLASS(get_unused_fd):
|
||||||
|
*
|
||||||
|
* struct file *f;
|
||||||
|
*
|
||||||
|
* CLASS(get_unused_fd, fd)(O_CLOEXEC);
|
||||||
|
* if (fd < 0)
|
||||||
|
* return fd;
|
||||||
|
*
|
||||||
|
* f = dentry_open(&path, O_RDONLY, current_cred());
|
||||||
|
* if (IS_ERR(f))
|
||||||
|
* return PTR_ERR(fd);
|
||||||
|
*
|
||||||
|
* fd_install(fd, f);
|
||||||
|
* return take_fd(fd);
|
||||||
|
*/
|
||||||
|
#define take_fd(fd) __get_and_null(fd, -EBADF)
|
||||||
|
|
||||||
extern void fd_install(unsigned int fd, struct file *file);
|
extern void fd_install(unsigned int fd, struct file *file);
|
||||||
|
|
||||||
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);
|
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);
|
||||||
|
|
|
@ -42,6 +42,17 @@ struct nsproxy {
|
||||||
};
|
};
|
||||||
extern struct nsproxy init_nsproxy;
|
extern struct nsproxy init_nsproxy;
|
||||||
|
|
||||||
|
#define to_ns_common(__ns) \
|
||||||
|
_Generic((__ns), \
|
||||||
|
struct cgroup_namespace *: &(__ns->ns), \
|
||||||
|
struct ipc_namespace *: &(__ns->ns), \
|
||||||
|
struct net *: &(__ns->ns), \
|
||||||
|
struct pid_namespace *: &(__ns->ns), \
|
||||||
|
struct mnt_namespace *: &(__ns->ns), \
|
||||||
|
struct time_namespace *: &(__ns->ns), \
|
||||||
|
struct user_namespace *: &(__ns->ns), \
|
||||||
|
struct uts_namespace *: &(__ns->ns))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A structure to encompass all bits needed to install
|
* A structure to encompass all bits needed to install
|
||||||
* a partial or complete new set of namespaces.
|
* a partial or complete new set of namespaces.
|
||||||
|
@ -112,4 +123,6 @@ static inline void get_nsproxy(struct nsproxy *ns)
|
||||||
refcount_inc(&ns->count);
|
refcount_inc(&ns->count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEFINE_FREE(put_nsproxy, struct nsproxy *, if (_T) put_nsproxy(_T))
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/fcntl.h>
|
#include <linux/fcntl.h>
|
||||||
|
#include <linux/ioctl.h>
|
||||||
|
|
||||||
/* Flags for pidfd_open(). */
|
/* Flags for pidfd_open(). */
|
||||||
#define PIDFD_NONBLOCK O_NONBLOCK
|
#define PIDFD_NONBLOCK O_NONBLOCK
|
||||||
|
@ -15,4 +16,17 @@
|
||||||
#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
|
#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
|
||||||
#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
|
#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
|
||||||
|
|
||||||
|
#define PIDFS_IOCTL_MAGIC 0xFF
|
||||||
|
|
||||||
|
#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
|
||||||
|
#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
|
||||||
|
#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
|
||||||
|
#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
|
||||||
|
#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
|
||||||
|
#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
|
||||||
|
#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
|
||||||
|
#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
|
||||||
|
#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
|
||||||
|
#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
|
||||||
|
|
||||||
#endif /* _UAPI_LINUX_PIDFD_H */
|
#endif /* _UAPI_LINUX_PIDFD_H */
|
||||||
|
|
Loading…
Add table
Reference in a new issue