From 45569f8327d70593c2a77bf61819e299fdd1d4d5 Mon Sep 17 00:00:00 2001
From: tappro <tappro>
Date: Wed, 24 Sep 2008 07:50:18 +0000
Subject: [PATCH] - prevent the growing of recovery timeout   b:16389  
 i:nathan, rread

---
 lustre/ptlrpc/service.c     | 20 +++++++++++++-------
 lustre/tests/replay-dual.sh | 29 +++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c
index 59cfe9799e..aaf891558f 100644
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -699,15 +699,21 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
                 RETURN(-ENOSYS);
         }
 
-        if (extra_time) {
-                /* Fake our processing time into the future to ask the
-                 * clients for some extra amount of time */
-                extra_time += cfs_time_current_sec() -
-                              req->rq_arrival_time.tv_sec;
-                at_add(&svc->srv_at_estimate, extra_time);
+        if (req->rq_export && req->rq_export->exp_obd->obd_recovering) {
+                newdl = cfs_time_current_sec() +
+                        req->rq_export->exp_obd->obd_recovery_timeout;
+        } else {
+                if (extra_time) {
+                        /* Fake our processing time into the future to ask the
+                         * clients for some extra amount of time */
+                        extra_time += cfs_time_current_sec() -
+                                      req->rq_arrival_time.tv_sec;
+                        at_add(&svc->srv_at_estimate, extra_time);
+                }
+                newdl = req->rq_arrival_time.tv_sec +
+                        at_get(&svc->srv_at_estimate);
         }
 
-        newdl = req->rq_arrival_time.tv_sec + at_get(&svc->srv_at_estimate);
         if (req->rq_deadline >= newdl) {
                 /* We're not adding any time, no need to send an early reply
                    (e.g. maybe at adaptive_max) */
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh
index cf397cba53..ffa2186d29 100755
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -369,6 +369,35 @@ test_19() { # Bug 10991 - resend of open request does not fail assertion.
 }
 run_test 19 "resend of open request"
 
+test_20() { #16389
+    BEFORE=`date +%s`
+    replay_barrier mds
+    touch $MOUNT1/a
+    touch $MOUNT2/b
+    umount $MOUNT2
+    facet_failover mds
+    df $MOUNT1 || return 1
+    rm $MOUNT1/a
+    zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
+    TIER1=$((`date +%s` - BEFORE))
+    echo "First recovery time is $TIER1 sec"
+    BEFORE=`date +%s`
+    replay_barrier mds
+    touch $MOUNT1/a
+    touch $MOUNT2/b
+    umount $MOUNT2
+    facet_failover mds
+    df $MOUNT1 || return 1
+    rm $MOUNT1/a
+    zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
+    TIER2=$((`date +%s` - BEFORE))
+    echo "Second recovery time is $TIER2 sec"
+    [ $TIER2 -ge $((TIER1 * 2)) ] && \
+        error "recovery time is growing $TIER2 > $TIER1"
+    return 0
+}
+run_test 20 "recovery time is not increasing"
+
 equals_msg `basename $0`: test complete, cleaning up
 SLEEP=$((`date +%s` - $NOW))
 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
-- 
GitLab