diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 59cfe9799e9ec7ebfe154b0c399d8f77724709e8..aaf891558f2befc55237dff63a49774289026318 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -699,15 +699,21 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req, RETURN(-ENOSYS); } - if (extra_time) { - /* Fake our processing time into the future to ask the - * clients for some extra amount of time */ - extra_time += cfs_time_current_sec() - - req->rq_arrival_time.tv_sec; - at_add(&svc->srv_at_estimate, extra_time); + if (req->rq_export && req->rq_export->exp_obd->obd_recovering) { + newdl = cfs_time_current_sec() + + req->rq_export->exp_obd->obd_recovery_timeout; + } else { + if (extra_time) { + /* Fake our processing time into the future to ask the + * clients for some extra amount of time */ + extra_time += cfs_time_current_sec() - + req->rq_arrival_time.tv_sec; + at_add(&svc->srv_at_estimate, extra_time); + } + newdl = req->rq_arrival_time.tv_sec + + at_get(&svc->srv_at_estimate); } - newdl = req->rq_arrival_time.tv_sec + at_get(&svc->srv_at_estimate); if (req->rq_deadline >= newdl) { /* We're not adding any time, no need to send an early reply (e.g. maybe at adaptive_max) */ diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index cf397cba533eec9bed547560eb73988790bf91db..ffa2186d29199d71858f5821049ff33c5d54c679 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -369,6 +369,35 @@ test_19() { # Bug 10991 - resend of open request does not fail assertion. } run_test 19 "resend of open request" +test_20() { #16389 + BEFORE=`date +%s` + replay_barrier mds + touch $MOUNT1/a + touch $MOUNT2/b + umount $MOUNT2 + facet_failover mds + df $MOUNT1 || return 1 + rm $MOUNT1/a + zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" + TIER1=$((`date +%s` - BEFORE)) + echo "First recovery time is $TIER1 sec" + BEFORE=`date +%s` + replay_barrier mds + touch $MOUNT1/a + touch $MOUNT2/b + umount $MOUNT2 + facet_failover mds + df $MOUNT1 || return 1 + rm $MOUNT1/a + zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" + TIER2=$((`date +%s` - BEFORE)) + echo "Second recovery time is $TIER2 sec" + [ $TIER2 -ge $((TIER1 * 2)) ] && \ + error "recovery time is growing $TIER2 > $TIER1" + return 0 +} +run_test 20 "recovery time is not increasing" + equals_msg `basename $0`: test complete, cleaning up SLEEP=$((`date +%s` - $NOW)) [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP