From de8d4ef652ad7474e8deefbefbe22fd654ff76ea Mon Sep 17 00:00:00 2001
From: ericm <ericm>
Date: Tue, 25 Mar 2008 06:24:10 +0000
Subject: [PATCH] branch=b1_6 ping_evictor don't evict client too eagerly just
 because of it's idle. b=15192 r=nathan r=adilger

---
 lustre/include/obd_support.h   | 8 ++++++--
 lustre/tests/recovery-small.sh | 4 ++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index ded394addb..b42a71efa6 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -63,8 +63,12 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_RECOVERY_FACTOR (5 / 2) /* times obd_timeout */
 /* Change recovery-small 26b time if you change this */
 #define PING_INTERVAL max(obd_timeout / 4, 1U)
-/* Client may skip 1 ping; wait for 2.5 */
-#define PING_EVICT_TIMEOUT (PING_INTERVAL * 5 / 2)
+/* Client may skip 1 ping; we must wait at least 2.5. But for multiple
+ * failover targets the client only pings one server at a time, and pings
+ * can be lost on a loaded network. Since eviction has serious consequences,
+ * and there's no urgent need to evict a client just because it's idle, we
+ * should be very conservative here. */
+#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
 #define DISK_TIMEOUT 50          /* Beyond this we warn about disk speed */
 #define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */
  /* Max connect interval for nonresponsive servers; ~50s to avoid building up
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh
index c974ffe93e..6c106dbf0d 100755
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -639,8 +639,8 @@ test_26b() {      # bug 10140 - evict dead exports by pinger
 	# evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.  
         # But if there's a race to start the evictor from various obds, 
         # the loser might have to wait for the next ping.
-	echo Waiting for $(($TIMEOUT * 8)) secs
-	sleep $(($TIMEOUT * 8))
+	echo Waiting for $(($TIMEOUT * 3)) secs
+	sleep $(($TIMEOUT * 3))
         OST_NEXP2="`do_facet ost1 lctl get_param -n $OST_FILE | cut -d' ' -f2`"
         MDS_NEXP2="`do_facet mds lctl get_param -n $MDS_FILE | cut -d' ' -f2`"
 	echo ending with $OST_NEXP2 OST and $MDS_NEXP2 MDS exports
-- 
GitLab