mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 08:35:19 -05:00
bcachefs: bch2_inode_or_descendents_is_open()
fsck can now correctly check if inodes in interior snapshot nodes are open/in use. - Tweak the vfs inode rhashtable so that the subvolume ID isn't hashed, meaning inums in different subvolumes will hash to the same slot. Note that this is a hack, and will cause problems if anyone ever has the same file in many different snapshots open all at the same time. - Then check if any of those subvolumes is a descendent of the snapshot ID being checked Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
84878e8245
commit
9d86178782
4 changed files with 103 additions and 21 deletions
106
fs/bcachefs/fs.c
106
fs/bcachefs/fs.c
|
@ -157,6 +157,20 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
|
||||||
return a.subvol == b.subvol && a.inum == b.inum;
|
return a.subvol == b.subvol && a.inum == b.inum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed)
|
||||||
|
{
|
||||||
|
const subvol_inum *inum = data;
|
||||||
|
|
||||||
|
return jhash(&inum->inum, sizeof(inum->inum), seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed)
|
||||||
|
{
|
||||||
|
const struct bch_inode_info *inode = data;
|
||||||
|
|
||||||
|
return bch2_vfs_inode_hash_fn(&inode->ei_inum, sizeof(inode->ei_inum), seed);
|
||||||
|
}
|
||||||
|
|
||||||
static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
|
static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
|
||||||
const void *obj)
|
const void *obj)
|
||||||
{
|
{
|
||||||
|
@ -170,32 +184,93 @@ static const struct rhashtable_params bch2_vfs_inodes_params = {
|
||||||
.head_offset = offsetof(struct bch_inode_info, hash),
|
.head_offset = offsetof(struct bch_inode_info, hash),
|
||||||
.key_offset = offsetof(struct bch_inode_info, ei_inum),
|
.key_offset = offsetof(struct bch_inode_info, ei_inum),
|
||||||
.key_len = sizeof(subvol_inum),
|
.key_len = sizeof(subvol_inum),
|
||||||
|
.hashfn = bch2_vfs_inode_hash_fn,
|
||||||
|
.obj_hashfn = bch2_vfs_inode_obj_hash_fn,
|
||||||
.obj_cmpfn = bch2_vfs_inode_cmp_fn,
|
.obj_cmpfn = bch2_vfs_inode_cmp_fn,
|
||||||
.automatic_shrinking = true,
|
.automatic_shrinking = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
|
int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
|
||||||
{
|
{
|
||||||
return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
|
struct bch_fs *c = trans->c;
|
||||||
}
|
struct rhashtable *ht = &c->vfs_inodes_table;
|
||||||
|
subvol_inum inum = (subvol_inum) { .inum = p.offset };
|
||||||
|
DARRAY(u32) subvols;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
bool bch2_inode_is_open(struct bch_fs *c, struct bpos p)
|
|
||||||
{
|
|
||||||
if (!test_bit(BCH_FS_started, &c->flags))
|
if (!test_bit(BCH_FS_started, &c->flags))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
subvol_inum inum = {
|
darray_init(&subvols);
|
||||||
.subvol = snapshot_t(c, p.snapshot)->subvol,
|
restart_from_top:
|
||||||
.inum = p.offset,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* snapshot tree interior node, can't safely delete while online (yet) */
|
/*
|
||||||
if (!inum.subvol) {
|
* Tweaked version of __rhashtable_lookup(); we need to get a list of
|
||||||
bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot);
|
* subvolumes in which the given inode number is open.
|
||||||
return true;
|
*
|
||||||
|
* For this to work, we don't include the subvolume ID in the key that
|
||||||
|
* we hash - all inodes with the same inode number regardless of
|
||||||
|
* subvolume will hash to the same slot.
|
||||||
|
*
|
||||||
|
* This will be less than ideal if the same file is ever open
|
||||||
|
* simultaneously in many different snapshots:
|
||||||
|
*/
|
||||||
|
rcu_read_lock();
|
||||||
|
struct rhash_lock_head __rcu *const *bkt;
|
||||||
|
struct rhash_head *he;
|
||||||
|
unsigned int hash;
|
||||||
|
struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
|
||||||
|
restart:
|
||||||
|
hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params);
|
||||||
|
bkt = rht_bucket(tbl, hash);
|
||||||
|
do {
|
||||||
|
struct bch_inode_info *inode;
|
||||||
|
|
||||||
|
rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) {
|
||||||
|
if (inode->ei_inum.inum == inum.inum) {
|
||||||
|
ret = darray_push_gfp(&subvols, inode->ei_inum.subvol,
|
||||||
|
GFP_NOWAIT|__GFP_NOWARN);
|
||||||
|
if (ret) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
ret = darray_make_room(&subvols, 1);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
subvols.nr = 0;
|
||||||
|
goto restart_from_top;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* An object might have been moved to a different hash chain,
|
||||||
|
* while we walk along it - better check and retry.
|
||||||
|
*/
|
||||||
|
} while (he != RHT_NULLS_MARKER(bkt));
|
||||||
|
|
||||||
|
/* Ensure we see any new tables. */
|
||||||
|
smp_rmb();
|
||||||
|
|
||||||
|
tbl = rht_dereference_rcu(tbl->future_tbl, ht);
|
||||||
|
if (unlikely(tbl))
|
||||||
|
goto restart;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
darray_for_each(subvols, i) {
|
||||||
|
u32 snap;
|
||||||
|
ret = bch2_subvolume_get_snapshot(trans, *i, &snap);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
err:
|
||||||
|
darray_exit(&subvols);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
return __bch2_inode_hash_find(c, inum) != NULL;
|
static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
|
||||||
|
{
|
||||||
|
return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __wait_on_freeing_inode(struct bch_fs *c,
|
static void __wait_on_freeing_inode(struct bch_fs *c,
|
||||||
|
@ -271,7 +346,8 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
|
||||||
|
|
||||||
set_bit(EI_INODE_HASHED, &inode->ei_flags);
|
set_bit(EI_INODE_HASHED, &inode->ei_flags);
|
||||||
retry:
|
retry:
|
||||||
if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,
|
if (unlikely(rhashtable_lookup_insert_key(&c->vfs_inodes_table,
|
||||||
|
&inode->ei_inum,
|
||||||
&inode->hash,
|
&inode->hash,
|
||||||
bch2_vfs_inodes_params))) {
|
bch2_vfs_inodes_params))) {
|
||||||
old = bch2_inode_hash_find(c, trans, inode->ei_inum);
|
old = bch2_inode_hash_find(c, trans, inode->ei_inum);
|
||||||
|
|
|
@ -146,6 +146,8 @@ struct bch_inode_info *
|
||||||
__bch2_create(struct mnt_idmap *, struct bch_inode_info *,
|
__bch2_create(struct mnt_idmap *, struct bch_inode_info *,
|
||||||
struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
|
struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
|
||||||
|
|
||||||
|
int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p);
|
||||||
|
|
||||||
int bch2_fs_quota_transfer(struct bch_fs *,
|
int bch2_fs_quota_transfer(struct bch_fs *,
|
||||||
struct bch_inode_info *,
|
struct bch_inode_info *,
|
||||||
struct bch_qid,
|
struct bch_qid,
|
||||||
|
@ -179,8 +181,6 @@ void bch2_inode_update_after_write(struct btree_trans *,
|
||||||
int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
|
int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
|
||||||
inode_set_fn, void *, unsigned);
|
inode_set_fn, void *, unsigned);
|
||||||
|
|
||||||
bool bch2_inode_is_open(struct bch_fs *c, struct bpos p);
|
|
||||||
|
|
||||||
int bch2_setattr_nonsize(struct mnt_idmap *,
|
int bch2_setattr_nonsize(struct mnt_idmap *,
|
||||||
struct bch_inode_info *,
|
struct bch_inode_info *,
|
||||||
struct iattr *);
|
struct iattr *);
|
||||||
|
@ -198,7 +198,7 @@ int bch2_vfs_init(void);
|
||||||
|
|
||||||
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
|
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
|
||||||
|
|
||||||
static inline bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) { return false; }
|
static inline int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) { return 0; }
|
||||||
|
|
||||||
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
|
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
|
||||||
snapshot_id_list *s) {}
|
snapshot_id_list *s) {}
|
||||||
|
|
|
@ -1213,7 +1213,11 @@ static int check_inode(struct btree_trans *trans,
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
} else {
|
} else {
|
||||||
if (fsck_err_on(!bch2_inode_is_open(c, k.k->p),
|
ret = bch2_inode_or_descendents_is_open(trans, k.k->p);
|
||||||
|
if (ret < 0)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
if (fsck_err_on(!ret,
|
||||||
trans, inode_unlinked_and_not_open,
|
trans, inode_unlinked_and_not_open,
|
||||||
"inode %llu%u unlinked and not open",
|
"inode %llu%u unlinked and not open",
|
||||||
u.bi_inum, u.bi_snapshot)) {
|
u.bi_inum, u.bi_snapshot)) {
|
||||||
|
@ -1221,6 +1225,7 @@ static int check_inode(struct btree_trans *trans,
|
||||||
bch_err_msg(c, ret, "in fsck deleting inode");
|
bch_err_msg(c, ret, "in fsck deleting inode");
|
||||||
goto err_noprint;
|
goto err_noprint;
|
||||||
}
|
}
|
||||||
|
ret = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1244,8 +1244,9 @@ static int delete_ancestor_snapshot_inodes(struct btree_trans *trans, struct bpo
|
||||||
if (!unlinked)
|
if (!unlinked)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (bch2_inode_is_open(trans->c, pos))
|
ret = lockrestart_do(trans, bch2_inode_or_descendents_is_open(trans, pos));
|
||||||
return 0;
|
if (ret)
|
||||||
|
return ret < 0 ? ret : 0;
|
||||||
|
|
||||||
ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot);
|
ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
|
Loading…
Reference in a new issue