mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 17:23:25 -05:00
ceph: choose readdir frag based on previous readdir reply
The dirfragtree is lazily updated, it's not always accurate. Infinite loops happens in following circumstance. - client send request to read frag A - frag A has been fragmented into frag B and C. So mds fills the reply with contents of frag B - client wants to read next frag C. ceph_choose_frag(frag value of C) return frag A. The fix is using previous readdir reply to calculate next readdir frag when possible. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
e010dd0ada
commit
b50c2de51e
1 changed files with 12 additions and 7 deletions
|
@ -294,7 +294,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
|
||||||
struct ceph_mds_client *mdsc = fsc->mdsc;
|
struct ceph_mds_client *mdsc = fsc->mdsc;
|
||||||
int i;
|
int i;
|
||||||
int err;
|
int err;
|
||||||
u32 ftype;
|
unsigned frag = -1;
|
||||||
struct ceph_mds_reply_info_parsed *rinfo;
|
struct ceph_mds_reply_info_parsed *rinfo;
|
||||||
|
|
||||||
dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
|
dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
|
||||||
|
@ -341,7 +341,6 @@ more:
|
||||||
/* do we have the correct frag content buffered? */
|
/* do we have the correct frag content buffered? */
|
||||||
if (need_send_readdir(fi, ctx->pos)) {
|
if (need_send_readdir(fi, ctx->pos)) {
|
||||||
struct ceph_mds_request *req;
|
struct ceph_mds_request *req;
|
||||||
unsigned frag;
|
|
||||||
int op = ceph_snap(inode) == CEPH_SNAPDIR ?
|
int op = ceph_snap(inode) == CEPH_SNAPDIR ?
|
||||||
CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
|
CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
|
||||||
|
|
||||||
|
@ -352,8 +351,11 @@ more:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_hash_order(ctx->pos)) {
|
if (is_hash_order(ctx->pos)) {
|
||||||
frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
|
/* fragtree isn't always accurate. choose frag
|
||||||
NULL, NULL);
|
* based on previous reply when possible. */
|
||||||
|
if (frag == (unsigned)-1)
|
||||||
|
frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
|
||||||
|
NULL, NULL);
|
||||||
} else {
|
} else {
|
||||||
frag = fpos_frag(ctx->pos);
|
frag = fpos_frag(ctx->pos);
|
||||||
}
|
}
|
||||||
|
@ -480,6 +482,7 @@ more:
|
||||||
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
|
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
|
||||||
struct ceph_vino vino;
|
struct ceph_vino vino;
|
||||||
ino_t ino;
|
ino_t ino;
|
||||||
|
u32 ftype;
|
||||||
|
|
||||||
BUG_ON(rde->offset < ctx->pos);
|
BUG_ON(rde->offset < ctx->pos);
|
||||||
|
|
||||||
|
@ -502,15 +505,17 @@ more:
|
||||||
ctx->pos++;
|
ctx->pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ceph_mdsc_put_request(fi->last_readdir);
|
||||||
|
fi->last_readdir = NULL;
|
||||||
|
|
||||||
if (fi->next_offset > 2) {
|
if (fi->next_offset > 2) {
|
||||||
ceph_mdsc_put_request(fi->last_readdir);
|
frag = fi->frag;
|
||||||
fi->last_readdir = NULL;
|
|
||||||
goto more;
|
goto more;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* more frags? */
|
/* more frags? */
|
||||||
if (!ceph_frag_is_rightmost(fi->frag)) {
|
if (!ceph_frag_is_rightmost(fi->frag)) {
|
||||||
unsigned frag = ceph_frag_next(fi->frag);
|
frag = ceph_frag_next(fi->frag);
|
||||||
if (is_hash_order(ctx->pos)) {
|
if (is_hash_order(ctx->pos)) {
|
||||||
loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
|
loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
|
||||||
fi->next_offset, true);
|
fi->next_offset, true);
|
||||||
|
|
Loading…
Add table
Reference in a new issue