1
0
Fork 0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-01-24 17:23:25 -05:00

fs: rcu-walk aware d_revalidate method

Require filesystems be aware of .d_revalidate being called in rcu-walk
mode (nd->flags & LOOKUP_RCU). For now do a simple push down, returning
-ECHILD from all implementations.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
This commit is contained in:
Nick Piggin 2011-01-07 17:49:57 +11:00
parent 44a7d7a878
commit 34286d6662
27 changed files with 215 additions and 61 deletions

View file

@ -9,7 +9,7 @@ be able to use diff(1).
--------------------------- dentry_operations --------------------------
prototypes:
int (*d_revalidate)(struct dentry *, int);
int (*d_revalidate)(struct dentry *, struct nameidata *);
int (*d_hash)(const struct dentry *, const struct inode *,
struct qstr *);
int (*d_compare)(const struct dentry *, const struct inode *,
@ -21,14 +21,14 @@ prototypes:
char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
locking rules:
rename_lock ->d_lock may block
d_revalidate: no no yes
d_hash no no no
d_compare: yes no no
d_delete: no yes no
d_release: no no yes
d_iput: no no yes
d_dname: no no no
rename_lock ->d_lock may block rcu-walk
d_revalidate: no no yes (ref-walk) maybe
d_hash no no no maybe
d_compare: yes no no maybe
d_delete: no yes no no
d_release: no no yes no
d_iput: no no yes no
d_dname: no no no no
--------------------------- inode_operations ---------------------------
prototypes:

View file

@ -317,11 +317,10 @@ The detailed design for rcu-walk is like this:
The cases where rcu-walk cannot continue are:
* NULL dentry (ie. any uncached path element)
* parent with d_inode->i_op->permission or ACLs
* dentries with d_revalidate
* Following links
In future patches, permission checks and d_revalidate become rcu-walk aware. It
may be possible eventually to make following links rcu-walk aware.
In future patches, permission checks become rcu-walk aware. It may be possible
eventually to make following links rcu-walk aware.
Uncached path elements will always require dropping to ref-walk mode, at the
very least because i_mutex needs to be grabbed, and objects allocated.

View file

@ -360,3 +360,23 @@ i_dentry to be reinitialized before it is freed, so an:
INIT_LIST_HEAD(&inode->i_dentry);
must be done in the RCU callback.
--
[recommended]
vfs now tries to do path walking in "rcu-walk mode", which avoids
atomic operations and scalability hazards on dentries and inodes (see
Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above)
are examples of the changes required to support this. For more complex
filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so
no changes are required to the filesystem. However, this is costly and loses
the benefits of rcu-walk mode. We will begin to add filesystem callbacks that
are rcu-walk aware, shown below. Filesystems should take advantage of this
where possible.
--
[mandatory]
d_revalidate is a callback that is made on every path element (if
the filesystem provides it), which requires dropping out of rcu-walk mode. This
may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
returned if the filesystem cannot handle rcu-walk. See
Documentation/filesystems/vfs.txt for more details.

View file

@ -863,6 +863,15 @@ struct dentry_operations {
dcache. Most filesystems leave this as NULL, because all their
dentries in the dcache are valid
d_revalidate may be called in rcu-walk mode (nd->flags & LOOKUP_RCU).
If in rcu-walk mode, the filesystem must revalidate the dentry without
blocking or storing to the dentry, d_parent and d_inode should not be
used without care (because they can go NULL), instead nd->inode should
be used.
If a situation is encountered that rcu-walk cannot handle, return
-ECHILD and it will be called again in ref-walk mode.
d_hash: called when the VFS adds a dentry to the hash table. The first
dentry passed to d_hash is the parent directory that the name is
to be hashed into. The inode is the dentry's inode.

View file

@ -154,13 +154,16 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str
* yet completely filled in, and revalidate has to delay such
* lookups..
*/
static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd)
static int autofs_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode * dir;
struct autofs_sb_info *sbi;
struct autofs_dir_ent *ent;
int res;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
lock_kernel();
dir = dentry->d_parent->d_inode;
sbi = autofs_sbi(dir->i_sb);

View file

@ -14,6 +14,7 @@
#include <linux/ctype.h>
#include <linux/net.h>
#include <linux/sched.h>
#include <linux/namei.h>
#include "smb_fs.h"
#include "smb_mount.h"
@ -301,13 +302,20 @@ static const struct dentry_operations smbfs_dentry_operations_case =
* This is the callback when the dcache has a lookup hit.
*/
static int
smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
smb_lookup_validate(struct dentry *dentry, struct nameidata *nd)
{
struct smb_sb_info *server = server_from_dentry(dentry);
struct inode * inode = dentry->d_inode;
unsigned long age = jiffies - dentry->d_time;
struct smb_sb_info *server;
struct inode *inode;
unsigned long age;
int valid;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
server = server_from_dentry(dentry);
inode = dentry->d_inode;
age = jiffies - dentry->d_time;
/*
* The default validation is based on dentry age:
* we believe in dentries for a few seconds. (But each

View file

@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/ctype.h>
#include <linux/sched.h>
@ -607,6 +608,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
void *dir_version;
int ret;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
vnode = AFS_FS_I(dentry->d_inode);
if (dentry->d_inode)

View file

@ -315,12 +315,19 @@ out_error:
*/
static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir = dentry->d_parent->d_inode;
struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
int oz_mode = autofs4_oz_mode(sbi);
struct inode *dir;
struct autofs_sb_info *sbi;
int oz_mode;
int flags = nd ? nd->flags : 0;
int status = 1;
if (flags & LOOKUP_RCU)
return -ECHILD;
dir = dentry->d_parent->d_inode;
sbi = autofs4_sbi(dir->i_sb);
oz_mode = autofs4_oz_mode(sbi);
/* Pending dentry */
spin_lock(&sbi->fs_lock);
if (autofs4_ispending(dentry)) {

View file

@ -990,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/
static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir = dentry->d_parent->d_inode;
struct inode *dir;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
dir = dentry->d_parent->d_inode;
dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode,

View file

@ -656,6 +656,9 @@ lookup_out:
static int
cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
if (direntry->d_inode) {
if (cifs_revalidate_dentry(direntry))
return 0;

View file

@ -18,6 +18,7 @@
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/namei.h>
#include <asm/uaccess.h>
@ -541,9 +542,13 @@ out:
/* called when a cache lookup succeeds */
static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
{
struct inode *inode = de->d_inode;
struct inode *inode;
struct coda_inode_info *cii;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = de->d_inode;
if (!inode || coda_isroot(inode))
goto out;
if (is_bad_inode(inode))

View file

@ -44,12 +44,17 @@
*/
static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
struct dentry *lower_dentry;
struct vfsmount *lower_mnt;
struct dentry *dentry_save;
struct vfsmount *vfsmount_save;
int rc = 1;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
lower_dentry = ecryptfs_dentry_to_lower(dentry);
lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
goto out;
dentry_save = nd->path.dentry;

View file

@ -43,6 +43,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
/* This is not negative dentry. Always valid. */
if (dentry->d_inode)
return 1;
@ -51,6 +54,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
* This is not negative dentry. Always valid.
*

View file

@ -156,8 +156,12 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
*/
static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
{
struct inode *inode = entry->d_inode;
struct inode *inode;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = entry->d_inode;
if (inode && is_bad_inode(inode))
return 0;
else if (fuse_dentry_time(entry) < get_jiffies_64()) {

View file

@ -11,6 +11,7 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/namei.h>
#include <linux/crc32.h>
#include "gfs2.h"
@ -34,15 +35,23 @@
static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
{
struct dentry *parent = dget_parent(dentry);
struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode);
struct gfs2_inode *dip = GFS2_I(parent->d_inode);
struct inode *inode = dentry->d_inode;
struct dentry *parent;
struct gfs2_sbd *sdp;
struct gfs2_inode *dip;
struct inode *inode;
struct gfs2_holder d_gh;
struct gfs2_inode *ip = NULL;
int error;
int had_lock = 0;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
sdp = GFS2_SB(parent->d_inode);
dip = GFS2_I(parent->d_inode);
inode = dentry->d_inode;
if (inode) {
if (is_bad_inode(inode))
goto invalid;

View file

@ -8,15 +8,20 @@
* This file contains the code to do various system dependent things.
*/
#include <linux/namei.h>
#include "hfs_fs.h"
/* dentry case-handling: just lowercase everything */
static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct inode *inode;
int diff;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
if(!inode)
return 1;

View file

@ -1608,6 +1608,8 @@ out:
static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
* This is not negative dentry. Always valid.
*

View file

@ -563,10 +563,26 @@ void release_open_intent(struct nameidata *nd)
fput(nd->intent.open.file);
}
static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
int status;
status = dentry->d_op->d_revalidate(dentry, nd);
if (status == -ECHILD) {
if (nameidata_dentry_drop_rcu(nd, dentry))
return status;
status = dentry->d_op->d_revalidate(dentry, nd);
}
return status;
}
static inline struct dentry *
do_revalidate(struct dentry *dentry, struct nameidata *nd)
{
int status = dentry->d_op->d_revalidate(dentry, nd);
int status;
status = d_revalidate(dentry, nd);
if (unlikely(status <= 0)) {
/*
* The dentry failed validation.
@ -574,14 +590,20 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
* the dentry otherwise d_revalidate is asking us
* to return a fail status.
*/
if (!status) {
if (status < 0) {
/* If we're in rcu-walk, we don't have a ref */
if (!(nd->flags & LOOKUP_RCU))
dput(dentry);
dentry = ERR_PTR(status);
} else {
/* Don't d_invalidate in rcu-walk mode */
if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
return ERR_PTR(-ECHILD);
if (!d_invalidate(dentry)) {
dput(dentry);
dentry = NULL;
}
} else {
dput(dentry);
dentry = ERR_PTR(status);
}
}
return dentry;
@ -626,7 +648,7 @@ force_reval_path(struct path *path, struct nameidata *nd)
if (!need_reval_dot(dentry))
return 0;
status = dentry->d_op->d_revalidate(dentry, nd);
status = d_revalidate(dentry, nd);
if (status > 0)
return 0;
@ -1039,12 +1061,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
return -ECHILD;
nd->seq = seq;
if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
/* We commonly drop rcu-walk here */
if (nameidata_dentry_drop_rcu(nd, dentry))
return -ECHILD;
if (dentry->d_flags & DCACHE_OP_REVALIDATE)
goto need_revalidate;
}
path->mnt = mnt;
path->dentry = dentry;
__follow_mount_rcu(nd, path, inode);
@ -1292,12 +1310,11 @@ return_reval:
* We may need to check the cached dentry for staleness.
*/
if (need_reval_dot(nd->path.dentry)) {
if (nameidata_drop_rcu_maybe(nd))
return -ECHILD;
err = -ESTALE;
/* Note: we do not d_invalidate() */
if (!nd->path.dentry->d_op->d_revalidate(
nd->path.dentry, nd))
err = d_revalidate(nd->path.dentry, nd);
if (!err)
err = -ESTALE;
if (err < 0)
break;
}
return_base:
@ -2080,10 +2097,11 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
dir = nd->path.dentry;
case LAST_DOT:
if (need_reval_dot(dir)) {
if (!dir->d_op->d_revalidate(dir, nd)) {
error = d_revalidate(nd->path.dentry, nd);
if (!error)
error = -ESTALE;
if (error < 0)
goto exit;
}
}
/* fallthrough */
case LAST_ROOT:

View file

@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/namei.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
@ -308,6 +309,9 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
int res, val = 0, len;
__u8 __name[NCP_MAXPATHLEN + 1];
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
dir = parent->d_inode;

View file

@ -29,6 +29,7 @@
#include <linux/vfs.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
#include <linux/ncp_fs.h>

View file

@ -938,7 +938,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
* component of the path.
* We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
*/
static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask)
static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
unsigned int mask)
{
if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
return 0;
@ -1018,7 +1019,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
* If the parent directory is seen to have changed, we throw out the
* cached dentry and do a new lookup.
*/
static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir;
struct inode *inode;
@ -1027,6 +1028,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
struct nfs_fattr *fattr = NULL;
int error;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
dir = parent->d_inode;
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);

View file

@ -52,9 +52,15 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
static int ocfs2_dentry_revalidate(struct dentry *dentry,
struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct inode *inode;
int ret = 0; /* if all else fails, just return false */
struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
struct ocfs2_super *osb;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
osb = OCFS2_SB(dentry->d_sb);
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);

View file

@ -1719,10 +1719,16 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
*/
static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct task_struct *task = get_proc_task(inode);
struct inode *inode;
struct task_struct *task;
const struct cred *cred;
if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
task = get_proc_task(inode);
if (task) {
if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
task_dumpable(task)) {
@ -1888,12 +1894,19 @@ static int proc_fd_link(struct inode *inode, struct path *path)
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct task_struct *task = get_proc_task(inode);
int fd = proc_fd(inode);
struct inode *inode;
struct task_struct *task;
int fd;
struct files_struct *files;
const struct cred *cred;
if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
task = get_proc_task(inode);
fd = proc_fd(inode);
if (task) {
files = get_files_struct(task);
if (files) {
@ -2563,8 +2576,14 @@ static const struct pid_entry proc_base_stuff[] = {
*/
static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct task_struct *task = get_proc_task(inode);
struct inode *inode;
struct task_struct *task;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
task = get_proc_task(inode);
if (task) {
put_task_struct(task);
return 1;

View file

@ -5,6 +5,7 @@
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/namei.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@ -389,6 +390,8 @@ static const struct inode_operations proc_sys_dir_operations = {
static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
return !PROC_I(dentry->d_inode)->sysctl->unregistering;
}

View file

@ -972,6 +972,8 @@ int reiserfs_permission(struct inode *inode, int mask)
static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
return -EPERM;
}

View file

@ -239,9 +239,13 @@ static int sysfs_dentry_delete(const struct dentry *dentry)
static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct sysfs_dirent *sd = dentry->d_fsdata;
struct sysfs_dirent *sd;
int is_dir;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
sd = dentry->d_fsdata;
mutex_lock(&sysfs_mutex);
/* The sysfs dirent has been deleted */

View file

@ -190,7 +190,6 @@ struct dentry_operations {
#define DCACHE_OP_REVALIDATE 0x4000
#define DCACHE_OP_DELETE 0x8000
extern spinlock_t dcache_inode_lock;
extern seqlock_t rename_lock;