1
0
Fork 0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-01-24 17:23:25 -05:00

ceph: choose readdir frag based on previous readdir reply

The dirfragtree is lazily updated, it's not always accurate. Infinite
loops happens in following circumstance.

- client send request to read frag A
- frag A has been fragmented into frag B and C. So mds fills the reply
  with contents of frag B
- client wants to read next frag C. ceph_choose_frag(frag value of C)
  return frag A.

The fix is using previous readdir reply to calculate next readdir frag
when possible.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Yan, Zheng 2017-04-24 11:56:50 +08:00 committed by Ilya Dryomov
parent e010dd0ada
commit b50c2de51e

View file

@ -294,7 +294,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
int i; int i;
int err; int err;
u32 ftype; unsigned frag = -1;
struct ceph_mds_reply_info_parsed *rinfo; struct ceph_mds_reply_info_parsed *rinfo;
dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
@ -341,7 +341,6 @@ more:
/* do we have the correct frag content buffered? */ /* do we have the correct frag content buffered? */
if (need_send_readdir(fi, ctx->pos)) { if (need_send_readdir(fi, ctx->pos)) {
struct ceph_mds_request *req; struct ceph_mds_request *req;
unsigned frag;
int op = ceph_snap(inode) == CEPH_SNAPDIR ? int op = ceph_snap(inode) == CEPH_SNAPDIR ?
CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
@ -352,8 +351,11 @@ more:
} }
if (is_hash_order(ctx->pos)) { if (is_hash_order(ctx->pos)) {
frag = ceph_choose_frag(ci, fpos_hash(ctx->pos), /* fragtree isn't always accurate. choose frag
NULL, NULL); * based on previous reply when possible. */
if (frag == (unsigned)-1)
frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
NULL, NULL);
} else { } else {
frag = fpos_frag(ctx->pos); frag = fpos_frag(ctx->pos);
} }
@ -480,6 +482,7 @@ more:
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
struct ceph_vino vino; struct ceph_vino vino;
ino_t ino; ino_t ino;
u32 ftype;
BUG_ON(rde->offset < ctx->pos); BUG_ON(rde->offset < ctx->pos);
@ -502,15 +505,17 @@ more:
ctx->pos++; ctx->pos++;
} }
ceph_mdsc_put_request(fi->last_readdir);
fi->last_readdir = NULL;
if (fi->next_offset > 2) { if (fi->next_offset > 2) {
ceph_mdsc_put_request(fi->last_readdir); frag = fi->frag;
fi->last_readdir = NULL;
goto more; goto more;
} }
/* more frags? */ /* more frags? */
if (!ceph_frag_is_rightmost(fi->frag)) { if (!ceph_frag_is_rightmost(fi->frag)) {
unsigned frag = ceph_frag_next(fi->frag); frag = ceph_frag_next(fi->frag);
if (is_hash_order(ctx->pos)) { if (is_hash_order(ctx->pos)) {
loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
fi->next_offset, true); fi->next_offset, true);