From 5223a84cb0fc68ba98b95f51791f350b3932b7f4 Mon Sep 17 00:00:00 2001 From: ericm <ericm> Date: Mon, 26 May 2008 23:27:28 +0000 Subject: [PATCH] branch: b1_6 do not drop replay according to msg flags, instead we check the per-export recovery request queue for duplication of transno. b=15756 r=adilger r=rread --- lustre/ChangeLog | 7 +++ lustre/include/lustre_export.h | 1 + lustre/include/obd_support.h | 1 + lustre/ldlm/ldlm_lib.c | 82 +++++++++++++++++++++++++++++----- lustre/obdclass/genops.c | 2 + lustre/tests/replay-single.sh | 13 ++++++ 6 files changed, 95 insertions(+), 11 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index c83935f86c..865ce021b5 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -24,6 +24,13 @@ tbd Sun Microsystems, Inc. 'tunefs.lustre --param="mdt.quota_type=ug1" $MDTDEV'. For more information, please refer to bugzilla 13904. +Severity : normal +Bugzilla : 15756 +Frequency : rare, replay get lost on server +Description: server incorrectly drop resent replays lead to recovery failure. +Details : do not drop replay according to msg flags, instead we check the + per-export recovery request queue for duplication of transno. + Severity : normal Bugzilla : 14835 Frequency : after recovery diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 7ece7a25bf..6890dbd867 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -88,6 +88,7 @@ struct obd_export { struct ldlm_export_data exp_ldlm_data; struct list_head exp_outstanding_replies; time_t exp_last_request_time; + struct list_head exp_req_replay_queue; spinlock_t exp_lock; /* protects flags int below */ /* ^ protects exp_outstanding_replies too */ __u64 exp_connect_flags; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 792a6d47ce..2ee8438d45 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -254,6 +254,7 @@ extern unsigned int obd_alloc_fail_rate; #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 #define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 #define OBD_FAIL_TGT_TOOMANY_THREADS 0x706 +#define OBD_FAIL_TGT_REPLAY_DROP 0x707 #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 #define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 50e769ffb4..cb5c4ef75a 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -973,6 +973,47 @@ void target_destroy_export(struct obd_export *exp) * Recovery functions */ +static int target_exp_enqueue_req_replay(struct ptlrpc_request *req) +{ + __u64 transno = lustre_msg_get_transno(req->rq_reqmsg); + struct obd_export *exp = req->rq_export; + struct ptlrpc_request *reqiter; + int dup = 0; + + LASSERT(exp); + + spin_lock(&exp->exp_lock); + list_for_each_entry(reqiter, &exp->exp_req_replay_queue, + rq_replay_list) { + if (lustre_msg_get_transno(reqiter->rq_reqmsg) == transno) { + dup = 1; + break; + } + } + + if (dup) { + /* we expect it with RESENT and REPLAY flags */ + if ((lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY)) != (MSG_RESENT | MSG_REPLAY)) + CERROR("invalid flags %x of resent replay\n", + lustre_msg_get_flags(req->rq_reqmsg)); + } else { + list_add_tail(&req->rq_replay_list, &exp->exp_req_replay_queue); + } + + spin_unlock(&exp->exp_lock); + return dup; +} + +static void target_exp_dequeue_req_replay(struct ptlrpc_request *req) +{ + LASSERT(!list_empty(&req->rq_replay_list)); + LASSERT(req->rq_export); + + spin_lock(&req->rq_export->exp_lock); + list_del_init(&req->rq_replay_list); + spin_unlock(&req->rq_export->exp_lock); +} static void target_release_saved_req(struct ptlrpc_request *req) { @@ -1017,6 +1058,7 @@ static void abort_recovery_queue(struct obd_device *obd) list_for_each_safe(tmp, n, &obd->obd_recovery_queue) { req = list_entry(tmp, struct ptlrpc_request, rq_list); + target_exp_dequeue_req_replay(req); list_del(&req->rq_list); DEBUG_REQ(D_ERROR, req, "aborted:"); req->rq_status = -ENOTCONN; @@ -1066,6 +1108,7 @@ void target_cleanup_recovery(struct obd_device *obd) list_for_each_safe(tmp, n, &obd->obd_recovery_queue) { req = list_entry(tmp, struct ptlrpc_request, rq_list); + target_exp_dequeue_req_replay(req); list_del(&req->rq_list); target_release_saved_req(req); } @@ -1278,6 +1321,7 @@ static void process_recovery_queue(struct obd_device *obd) } continue; } + target_exp_dequeue_req_replay(req); list_del_init(&req->rq_list); obd->obd_requests_queued_for_recovery--; spin_unlock_bh(&obd->obd_processing_task_lock); @@ -1314,6 +1358,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req, __u64 transno = lustre_msg_get_transno(req->rq_reqmsg); struct ptlrpc_request *saved_req; struct lustre_msg *reqmsg; + int rc = 0; /* CAVEAT EMPTOR: The incoming request message has been swabbed * (i.e. buflens etc are in my own byte order), but type-dependent @@ -1351,20 +1396,12 @@ int target_queue_recovery_request(struct ptlrpc_request *req, /* Processing the queue right now, don't re-add. */ LASSERT(list_empty(&req->rq_list)); spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *saved_req); - return 1; + GOTO(err_free, rc = 1); } - /* A resent, replayed request that is still on the queue; just drop it. - The queued request will handle this. */ - if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT|MSG_REPLAY)) == - (MSG_RESENT | MSG_REPLAY)) { - DEBUG_REQ(D_ERROR, req, "dropping resent queued req"); + if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_REPLAY_DROP))) { spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *saved_req); - return 0; + GOTO(err_free, rc = 0); } memcpy(saved_req, req, sizeof *req); @@ -1373,6 +1410,13 @@ int target_queue_recovery_request(struct ptlrpc_request *req, req->rq_reqmsg = reqmsg; class_export_get(req->rq_export); CFS_INIT_LIST_HEAD(&req->rq_list); + CFS_INIT_LIST_HEAD(&req->rq_replay_list); + + if (target_exp_enqueue_req_replay(req)) { + spin_unlock_bh(&obd->obd_processing_task_lock); + DEBUG_REQ(D_ERROR, req, "dropping resent queued req"); + GOTO(err_exp, rc = 0); + } /* XXX O(n^2) */ list_for_each(tmp, &obd->obd_recovery_queue) { @@ -1384,6 +1428,15 @@ int target_queue_recovery_request(struct ptlrpc_request *req, inserted = 1; break; } + + if (unlikely(lustre_msg_get_transno(reqiter->rq_reqmsg) == + transno)) { + spin_unlock_bh(&obd->obd_processing_task_lock); + DEBUG_REQ(D_ERROR, req, "dropping replay: transno " + "has been claimed by another client"); + target_exp_dequeue_req_replay(req); + GOTO(err_exp, rc = 0); + } } if (!inserted) { @@ -1409,6 +1462,13 @@ int target_queue_recovery_request(struct ptlrpc_request *req, process_recovery_queue(obd); return 0; + +err_exp: + class_export_put(req->rq_export); +err_free: + OBD_FREE(reqmsg, req->rq_reqlen); + OBD_FREE(saved_req, sizeof(*saved_req)); + return rc; } struct obd_device * target_req2obd(struct ptlrpc_request *req) diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 63d52c26b5..f6fbf45f7c 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -640,6 +640,7 @@ void class_export_destroy(struct obd_export *exp) ptlrpc_put_connection_superhack(exp->exp_connection); LASSERT(list_empty(&exp->exp_outstanding_replies)); + LASSERT(list_empty(&exp->exp_req_replay_queue)); obd_destroy_export(exp); OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle); @@ -664,6 +665,7 @@ struct obd_export *class_new_export(struct obd_device *obd, atomic_set(&export->exp_rpc_count, 0); export->exp_obd = obd; CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); + CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue); /* XXX this should be in LDLM init */ CFS_INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks); spin_lock_init(&export->exp_ldlm_data.led_lock); diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 5142e45f96..b60f032d45 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1367,6 +1367,19 @@ test_61c() { } run_test 61c "test race mds llog sync vs llog cleanup" +test_62() { # Bug 15756 - don't mis-drop resent replay + replay_barrier mds + createmany -o $DIR/$tdir/$tfile- 25 +#define OBD_FAIL_TGT_REPLAY_DROP 0x707 + do_facet mds "sysctl -w lustre.fail_loc=0x80000707" + facet_failover mds + df $MOUNT || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + unlinkmany $DIR/$tdir/$tfile- 25 || return 2 + return 0 +} +run_test 62 "don't mis-drop resent replay" + #Adaptive Timeouts (bug 3055) AT_MAX_SET=0 -- GitLab