summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorAlexander.Boyko <alexander_boyko@xyratex.com>2014-06-23 05:32:12 +0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-06-27 04:45:05 +0400
commit5c689e689baa5a8a239a8b5d29c3fb77aa62f134 (patch)
tree274d8d36f694edb090046e3ac1a129f11cf6a501 /drivers
parenta2ff0f973c11c7b42bf2497e5f66520e0f990332 (diff)
downloadlinux-5c689e689baa5a8a239a8b5d29c3fb77aa62f134.tar.xz
staging/lustre/ptlrpc: race at req processing
Race between ptlrpc_resend_req() and ptlrpc_check_set(). 1 thread do ptlrpc_check_set()->after_reply() 2 thread do ptlrpc_resend_req() The result is request with rq_resend = 1 and MSG_REPLY flag. When this request will came to server it will cause client eviction. The patch skip ptlrpc_resend_req logic if rq_replied is set, and clear rq_resend flag at reply_in_callback() when client got reply. Signed-off-by: Alexander Boyko <alexander_boyko@xyratex.com> Xyratex-bug-id: MRP-1888 Reviewed-on: http://review.whamcloud.com/10471 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5116 Reviewed-by: Andreas Dilger <andreas.dilger@intel.com> Reviewed-by: Mike Pershin <mike.pershin@intel.com> Reviewed-by: Chris Horn <hornc@cray.com> Signed-off-by: Oleg Drokin <oleg.drokin@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/client.c11
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/events.c2
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/niobuf.c2
3 files changed, 14 insertions, 1 deletions
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index 7246e8ce9c19..d806257f31ee 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -2530,10 +2530,19 @@ EXPORT_SYMBOL(ptlrpc_cleanup_client);
void ptlrpc_resend_req(struct ptlrpc_request *req)
{
DEBUG_REQ(D_HA, req, "going to resend");
+ spin_lock(&req->rq_lock);
+
+ /* Request got reply but linked to the import list still.
+ Let ptlrpc_check_set() to process it. */
+ if (ptlrpc_client_replied(req)) {
+ spin_unlock(&req->rq_lock);
+ DEBUG_REQ(D_HA, req, "it has reply, so skip it");
+ return;
+ }
+
lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 });
req->rq_status = -EAGAIN;
- spin_lock(&req->rq_lock);
req->rq_resend = 1;
req->rq_net_err = 0;
req->rq_timedout = 0;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
index aa85239f6cd5..9f9b8d1f835e 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -145,6 +145,8 @@ void reply_in_callback(lnet_event_t *ev)
/* Real reply */
req->rq_rep_swab_mask = 0;
req->rq_replied = 1;
+ /* Got reply, no resend required */
+ req->rq_resend = 0;
req->rq_reply_off = ev->offset;
req->rq_nob_received = ev->mlength;
/* LNetMDUnlink can't be called under the LNET_LOCK,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
index ef18639036c5..f760504e0696 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -505,6 +505,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
/* If this is a re-transmit, we're required to have disengaged
* cleanly from the previous attempt */
LASSERT(!request->rq_receiving_reply);
+ LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
+ (request->rq_import->imp_state == LUSTRE_IMP_FULL)));
if (unlikely(obd != NULL && obd->obd_fail)) {
CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",