From 45569f8327d70593c2a77bf61819e299fdd1d4d5 Mon Sep 17 00:00:00 2001 From: tappro <tappro> Date: Wed, 24 Sep 2008 07:50:18 +0000 Subject: [PATCH] - prevent the growing of recovery timeout b:16389 i:nathan, rread --- lustre/ptlrpc/service.c | 20 +++++++++++++------- lustre/tests/replay-dual.sh | 29 +++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 59cfe9799e..aaf891558f 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -699,15 +699,21 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req, RETURN(-ENOSYS); } - if (extra_time) { - /* Fake our processing time into the future to ask the - * clients for some extra amount of time */ - extra_time += cfs_time_current_sec() - - req->rq_arrival_time.tv_sec; - at_add(&svc->srv_at_estimate, extra_time); + if (req->rq_export && req->rq_export->exp_obd->obd_recovering) { + newdl = cfs_time_current_sec() + + req->rq_export->exp_obd->obd_recovery_timeout; + } else { + if (extra_time) { + /* Fake our processing time into the future to ask the + * clients for some extra amount of time */ + extra_time += cfs_time_current_sec() - + req->rq_arrival_time.tv_sec; + at_add(&svc->srv_at_estimate, extra_time); + } + newdl = req->rq_arrival_time.tv_sec + + at_get(&svc->srv_at_estimate); } - newdl = req->rq_arrival_time.tv_sec + at_get(&svc->srv_at_estimate); if (req->rq_deadline >= newdl) { /* We're not adding any time, no need to send an early reply (e.g. maybe at adaptive_max) */ diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index cf397cba53..ffa2186d29 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -369,6 +369,35 @@ test_19() { # Bug 10991 - resend of open request does not fail assertion. } run_test 19 "resend of open request" +test_20() { #16389 + BEFORE=`date +%s` + replay_barrier mds + touch $MOUNT1/a + touch $MOUNT2/b + umount $MOUNT2 + facet_failover mds + df $MOUNT1 || return 1 + rm $MOUNT1/a + zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" + TIER1=$((`date +%s` - BEFORE)) + echo "First recovery time is $TIER1 sec" + BEFORE=`date +%s` + replay_barrier mds + touch $MOUNT1/a + touch $MOUNT2/b + umount $MOUNT2 + facet_failover mds + df $MOUNT1 || return 1 + rm $MOUNT1/a + zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" + TIER2=$((`date +%s` - BEFORE)) + echo "Second recovery time is $TIER2 sec" + [ $TIER2 -ge $((TIER1 * 2)) ] && \ + error "recovery time is growing $TIER2 > $TIER1" + return 0 +} +run_test 20 "recovery time is not increasing" + equals_msg `basename $0`: test complete, cleaning up SLEEP=$((`date +%s` - $NOW)) [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP -- GitLab