mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 16:53:58 -05:00
orangefs: delay freeing slot until cancel completes
Make cancels reuse the aborted read/write op, to make sure they do not fail on lack of memory. Don't issue a cancel unless the daemon has seen our read/write, has not replied and isn't being shut down. If cancel *is* issued, don't wait for it to complete; stash the slot in there and just have it freed when cancel is finally replied to or purged (and delay dropping the reference until then, obviously). Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
This commit is contained in:
parent
1357d06d49
commit
78699e29fd
7 changed files with 95 additions and 155 deletions
|
@ -438,6 +438,8 @@ wakeup:
|
|||
}
|
||||
}
|
||||
out:
|
||||
if (unlikely(op_is_cancel(op)))
|
||||
put_cancel(op);
|
||||
op_release(op);
|
||||
return ret;
|
||||
|
||||
|
@ -546,6 +548,11 @@ int is_daemon_in_service(void)
|
|||
return in_service;
|
||||
}
|
||||
|
||||
bool __is_daemon_in_service(void)
|
||||
{
|
||||
return open_access_count == 1;
|
||||
}
|
||||
|
||||
static inline long check_ioctl_command(unsigned int command)
|
||||
{
|
||||
/* Check for valid ioctl codes */
|
||||
|
|
|
@ -181,17 +181,6 @@ populate_shared_memory:
|
|||
}
|
||||
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* XXX: needs to be optimized - we only need to cancel if it
|
||||
* had been seen by daemon and not completed
|
||||
*/
|
||||
if (!op_state_serviced(new_op)) {
|
||||
orangefs_cancel_op_in_progress(new_op->tag);
|
||||
} else {
|
||||
complete(&new_op->done);
|
||||
}
|
||||
orangefs_bufmap_put(buffer_index);
|
||||
buffer_index = -1;
|
||||
/*
|
||||
* don't write an error to syslog on signaled operation
|
||||
* termination unless we've got debugging turned on, as
|
||||
|
@ -207,7 +196,10 @@ populate_shared_memory:
|
|||
type == ORANGEFS_IO_READ ?
|
||||
"read from" : "write to",
|
||||
handle, ret);
|
||||
goto out;
|
||||
if (orangefs_cancel_op_in_progress(new_op))
|
||||
return ret;
|
||||
|
||||
goto done_copying;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -101,6 +101,15 @@ char *get_opname_string(struct orangefs_kernel_op_s *new_op)
|
|||
return "OP_UNKNOWN?";
|
||||
}
|
||||
|
||||
void orangefs_new_tag(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
spin_lock(&next_tag_value_lock);
|
||||
op->tag = next_tag_value++;
|
||||
if (next_tag_value == 0)
|
||||
next_tag_value = 100;
|
||||
spin_unlock(&next_tag_value_lock);
|
||||
}
|
||||
|
||||
struct orangefs_kernel_op_s *op_alloc(__s32 type)
|
||||
{
|
||||
struct orangefs_kernel_op_s *new_op = NULL;
|
||||
|
@ -120,14 +129,9 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type)
|
|||
new_op->downcall.status = -1;
|
||||
|
||||
new_op->op_state = OP_VFS_STATE_UNKNOWN;
|
||||
new_op->tag = 0;
|
||||
|
||||
/* initialize the op specific tag and upcall credentials */
|
||||
spin_lock(&next_tag_value_lock);
|
||||
new_op->tag = next_tag_value++;
|
||||
if (next_tag_value == 0)
|
||||
next_tag_value = 100;
|
||||
spin_unlock(&next_tag_value_lock);
|
||||
orangefs_new_tag(new_op);
|
||||
new_op->upcall.type = type;
|
||||
new_op->attempts = 0;
|
||||
gossip_debug(GOSSIP_CACHE_DEBUG,
|
||||
|
|
|
@ -190,9 +190,14 @@ struct orangefs_kernel_op_s {
|
|||
/*
|
||||
* Set uses_shared_memory to 1 if this operation uses shared memory.
|
||||
* If true, then a retry on the op must also get a new shared memory
|
||||
* buffer and re-populate it.
|
||||
* buffer and re-populate it. Cancels don't care - it only matters
|
||||
* for service_operation() retry logics and cancels don't go through
|
||||
* it anymore.
|
||||
*/
|
||||
int uses_shared_memory;
|
||||
union {
|
||||
int uses_shared_memory;
|
||||
int slot_to_free;
|
||||
};
|
||||
|
||||
struct orangefs_upcall_s upcall;
|
||||
struct orangefs_downcall_s downcall;
|
||||
|
@ -219,17 +224,13 @@ static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op)
|
|||
op->op_state = OP_VFS_STATE_SERVICED;
|
||||
wake_up_interruptible(&op->waitq);
|
||||
}
|
||||
static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
op->op_state |= OP_VFS_STATE_PURGED;
|
||||
wake_up_interruptible(&op->waitq);
|
||||
}
|
||||
|
||||
#define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING)
|
||||
#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR)
|
||||
#define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED)
|
||||
#define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED)
|
||||
#define op_state_given_up(op) ((op)->op_state & OP_VFS_STATE_GIVEN_UP)
|
||||
#define op_is_cancel(op) ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL)
|
||||
|
||||
static inline void get_op(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
|
@ -249,6 +250,27 @@ static inline void op_release(struct orangefs_kernel_op_s *op)
|
|||
}
|
||||
}
|
||||
|
||||
extern void orangefs_bufmap_put(int);
|
||||
static inline void put_cancel(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
orangefs_bufmap_put(op->slot_to_free);
|
||||
op_release(op);
|
||||
}
|
||||
|
||||
static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
spin_lock(&op->lock);
|
||||
if (unlikely(op_is_cancel(op))) {
|
||||
list_del(&op->list);
|
||||
spin_unlock(&op->lock);
|
||||
put_cancel(op);
|
||||
} else {
|
||||
op->op_state |= OP_VFS_STATE_PURGED;
|
||||
wake_up_interruptible(&op->waitq);
|
||||
spin_unlock(&op->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/* per inode private orangefs info */
|
||||
struct orangefs_inode_s {
|
||||
struct orangefs_object_kref refn;
|
||||
|
@ -448,6 +470,7 @@ static inline int match_handle(struct orangefs_khandle resp_handle,
|
|||
int op_cache_initialize(void);
|
||||
int op_cache_finalize(void);
|
||||
struct orangefs_kernel_op_s *op_alloc(__s32 type);
|
||||
void orangefs_new_tag(struct orangefs_kernel_op_s *op);
|
||||
char *get_opname_string(struct orangefs_kernel_op_s *new_op);
|
||||
|
||||
int orangefs_inode_cache_initialize(void);
|
||||
|
@ -528,6 +551,7 @@ ssize_t orangefs_inode_read(struct inode *inode,
|
|||
int orangefs_dev_init(void);
|
||||
void orangefs_dev_cleanup(void);
|
||||
int is_daemon_in_service(void);
|
||||
bool __is_daemon_in_service(void);
|
||||
int fs_mount_pending(__s32 fsid);
|
||||
|
||||
/*
|
||||
|
@ -562,7 +586,7 @@ void orangefs_set_signals(sigset_t *);
|
|||
|
||||
int orangefs_unmount_sb(struct super_block *sb);
|
||||
|
||||
int orangefs_cancel_op_in_progress(__u64 tag);
|
||||
bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
|
||||
|
||||
static inline __u64 orangefs_convert_time_field(const struct timespec *ts)
|
||||
{
|
||||
|
|
|
@ -260,14 +260,12 @@ void purge_inprogress_ops(void)
|
|||
next,
|
||||
&htable_ops_in_progress[i],
|
||||
list) {
|
||||
spin_lock(&op->lock);
|
||||
gossip_debug(GOSSIP_INIT_DEBUG,
|
||||
"pvfs2-client-core: purging in-progress op tag "
|
||||
"%llu %s\n",
|
||||
llu(op->tag),
|
||||
get_opname_string(op));
|
||||
set_op_state_purged(op);
|
||||
spin_unlock(&op->lock);
|
||||
}
|
||||
spin_unlock(&htable_ops_in_progress_lock);
|
||||
}
|
||||
|
|
|
@ -688,38 +688,6 @@ int orangefs_unmount_sb(struct super_block *sb)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: on successful cancellation, be sure to return -EINTR, as
|
||||
* that's the return value the caller expects
|
||||
*/
|
||||
int orangefs_cancel_op_in_progress(__u64 tag)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
struct orangefs_kernel_op_s *new_op = NULL;
|
||||
|
||||
gossip_debug(GOSSIP_UTILS_DEBUG,
|
||||
"orangefs_cancel_op_in_progress called on tag %llu\n",
|
||||
llu(tag));
|
||||
|
||||
new_op = op_alloc(ORANGEFS_VFS_OP_CANCEL);
|
||||
if (!new_op)
|
||||
return -ENOMEM;
|
||||
new_op->upcall.req.cancel.op_tag = tag;
|
||||
|
||||
gossip_debug(GOSSIP_UTILS_DEBUG,
|
||||
"Attempting ORANGEFS operation cancellation of tag %llu\n",
|
||||
llu(new_op->upcall.req.cancel.op_tag));
|
||||
|
||||
ret = service_operation(new_op, "orangefs_cancel", ORANGEFS_OP_CANCELLATION);
|
||||
|
||||
gossip_debug(GOSSIP_UTILS_DEBUG,
|
||||
"orangefs_cancel_op_in_progress: got return value of %d\n",
|
||||
ret);
|
||||
|
||||
op_release(new_op);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void orangefs_make_bad_inode(struct inode *inode)
|
||||
{
|
||||
if (is_root_handle(inode)) {
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "orangefs-kernel.h"
|
||||
#include "orangefs-bufmap.h"
|
||||
|
||||
static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *);
|
||||
static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
|
||||
|
||||
/*
|
||||
|
@ -36,23 +35,27 @@ void purge_waiting_ops(void)
|
|||
"pvfs2-client-core: purging op tag %llu %s\n",
|
||||
llu(op->tag),
|
||||
get_opname_string(op));
|
||||
spin_lock(&op->lock);
|
||||
set_op_state_purged(op);
|
||||
spin_unlock(&op->lock);
|
||||
}
|
||||
spin_unlock(&orangefs_request_list_lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
__add_op_to_request_list(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
spin_lock(&op->lock);
|
||||
set_op_state_waiting(op);
|
||||
list_add_tail(&op->list, &orangefs_request_list);
|
||||
spin_unlock(&op->lock);
|
||||
wake_up_interruptible(&orangefs_request_list_waitq);
|
||||
}
|
||||
|
||||
static inline void
|
||||
add_op_to_request_list(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
spin_lock(&orangefs_request_list_lock);
|
||||
spin_lock(&op->lock);
|
||||
set_op_state_waiting(op);
|
||||
list_add_tail(&op->list, &orangefs_request_list);
|
||||
__add_op_to_request_list(op);
|
||||
spin_unlock(&orangefs_request_list_lock);
|
||||
spin_unlock(&op->lock);
|
||||
wake_up_interruptible(&orangefs_request_list_waitq);
|
||||
}
|
||||
|
||||
static inline
|
||||
|
@ -159,15 +162,7 @@ retry_servicing:
|
|||
if (flags & ORANGEFS_OP_ASYNC)
|
||||
return 0;
|
||||
|
||||
if (flags & ORANGEFS_OP_CANCELLATION) {
|
||||
gossip_debug(GOSSIP_WAIT_DEBUG,
|
||||
"%s:"
|
||||
"About to call wait_for_cancellation_downcall.\n",
|
||||
__func__);
|
||||
ret = wait_for_cancellation_downcall(op);
|
||||
} else {
|
||||
ret = wait_for_matching_downcall(op);
|
||||
}
|
||||
ret = wait_for_matching_downcall(op);
|
||||
|
||||
if (ret < 0) {
|
||||
/* failed to get matching downcall */
|
||||
|
@ -273,6 +268,36 @@ retry_servicing:
|
|||
return ret;
|
||||
}
|
||||
|
||||
bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
u64 tag = op->tag;
|
||||
if (!op_state_in_progress(op))
|
||||
return false;
|
||||
|
||||
op->slot_to_free = op->upcall.req.io.buf_index;
|
||||
memset(&op->upcall, 0, sizeof(op->upcall));
|
||||
memset(&op->downcall, 0, sizeof(op->downcall));
|
||||
op->upcall.type = ORANGEFS_VFS_OP_CANCEL;
|
||||
op->upcall.req.cancel.op_tag = tag;
|
||||
op->downcall.type = ORANGEFS_VFS_OP_INVALID;
|
||||
op->downcall.status = -1;
|
||||
orangefs_new_tag(op);
|
||||
|
||||
spin_lock(&orangefs_request_list_lock);
|
||||
/* orangefs_request_list_lock is enough of a barrier here */
|
||||
if (!__is_daemon_in_service()) {
|
||||
spin_unlock(&orangefs_request_list_lock);
|
||||
return false;
|
||||
}
|
||||
__add_op_to_request_list(op);
|
||||
spin_unlock(&orangefs_request_list_lock);
|
||||
|
||||
gossip_debug(GOSSIP_UTILS_DEBUG,
|
||||
"Attempting ORANGEFS operation cancellation of tag %llu\n",
|
||||
llu(tag));
|
||||
return true;
|
||||
}
|
||||
|
||||
static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
/*
|
||||
|
@ -426,81 +451,3 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* similar to wait_for_matching_downcall(), but used in the special case
|
||||
* of I/O cancellations.
|
||||
*
|
||||
* Note we need a special wait function because if this is called we already
|
||||
* know that a signal is pending in current and need to service the
|
||||
* cancellation upcall anyway. the only way to exit this is to either
|
||||
* timeout or have the cancellation be serviced properly.
|
||||
*/
|
||||
static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
DEFINE_WAIT(wait_entry);
|
||||
|
||||
while (1) {
|
||||
spin_lock(&op->lock);
|
||||
prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
|
||||
if (op_state_serviced(op)) {
|
||||
gossip_debug(GOSSIP_WAIT_DEBUG,
|
||||
"%s:op-state is SERVICED.\n",
|
||||
__func__);
|
||||
spin_unlock(&op->lock);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (signal_pending(current)) {
|
||||
gossip_debug(GOSSIP_WAIT_DEBUG,
|
||||
"%s:operation interrupted by a signal (tag"
|
||||
" %llu, op %p)\n",
|
||||
__func__,
|
||||
llu(op->tag),
|
||||
op);
|
||||
orangefs_clean_up_interrupted_operation(op);
|
||||
ret = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
gossip_debug(GOSSIP_WAIT_DEBUG,
|
||||
"%s:About to call schedule_timeout.\n",
|
||||
__func__);
|
||||
spin_unlock(&op->lock);
|
||||
ret = schedule_timeout(op_timeout_secs * HZ);
|
||||
|
||||
gossip_debug(GOSSIP_WAIT_DEBUG,
|
||||
"%s:Value returned from schedule_timeout(%d).\n",
|
||||
__func__,
|
||||
ret);
|
||||
if (!ret) {
|
||||
gossip_debug(GOSSIP_WAIT_DEBUG,
|
||||
"%s:*** operation timed out: %p\n",
|
||||
__func__,
|
||||
op);
|
||||
spin_lock(&op->lock);
|
||||
orangefs_clean_up_interrupted_operation(op);
|
||||
ret = -ETIMEDOUT;
|
||||
break;
|
||||
}
|
||||
|
||||
gossip_debug(GOSSIP_WAIT_DEBUG,
|
||||
"%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
|
||||
__func__);
|
||||
ret = -ETIMEDOUT;
|
||||
break;
|
||||
}
|
||||
|
||||
spin_lock(&op->lock);
|
||||
finish_wait(&op->waitq, &wait_entry);
|
||||
spin_unlock(&op->lock);
|
||||
|
||||
gossip_debug(GOSSIP_WAIT_DEBUG,
|
||||
"%s:returning ret(%d)\n",
|
||||
__func__,
|
||||
ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue