From bba3020484ca94d164c1e15869626ec8a8d96e2d Mon Sep 17 00:00:00 2001
From: grev <grev>
Date: Thu, 6 Mar 2008 17:49:58 +0000
Subject: [PATCH] b=14957 i=Adilger

test_23a fix
---
 lustre/tests/conf-sanity.sh | 40 ++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh
index b610870555..3e5a48f350 100644
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -15,8 +15,8 @@ ONLY=${ONLY:-"$*"}
 #              xml xml xml xml xml xml dumb
 MOUNTCONFSKIP="10  11  12  13  13b 14  15 "
 
-# bug number for skipped test:                     14957  13369 12743
-ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT $MOUNTCONFSKIP 23a    34a   36"
+# bug number for skipped test:                     13369 12743
+ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT $MOUNTCONFSKIP 34a   36"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 SRCDIR=`dirname $0`
@@ -825,25 +825,47 @@ test_23a() {	# was test_23
         # fail mds
 	stop mds   
 	# force down client so that recovering mds waits for reconnect
-	zconf_umount `hostname` $MOUNT -f
+	local running=$(grep -c $MOUNT /proc/mounts) || true
+    	if [ $running -ne 0 ]; then
+        	echo "Stopping client $MOUNT (opts: -f)"
+        	umount -f $MOUNT
+    	fi
+
 	# enter recovery on mds
 	start_mds
 	# try to start a new client
 	mount_client $MOUNT &
-	MOUNT_PID=$!
 	sleep 5
+	MOUNT_PID=$(ps -ef | grep "t lustre" | grep -v grep | awk '{print $2}')
 	MOUNT_LUSTRE_PID=`ps -ef | grep mount.lustre | grep -v grep | awk '{print $2}'`
 	echo mount pid is ${MOUNT_PID}, mount.lustre pid is ${MOUNT_LUSTRE_PID}
 	ps --ppid $MOUNT_PID
 	ps --ppid $MOUNT_LUSTRE_PID
 	# FIXME why o why can't I kill these? Manual "ctrl-c" works...
-	kill -TERM $MOUNT_PID
+	kill -TERM $MOUNT_LUSTRE_PID
 	echo "waiting for mount to finish"
 	ps -ef | grep mount
-	wait $MOUNT_PID
-
-	stop_mds
-	stop_ost
+	# we can not wait $MOUNT_PID because it is not a child of this shell
+	local PID1
+	local PID2
+	local WAIT=0
+	local MAX_WAIT=20
+	local sleep=1
+	while [ "$WAIT" -lt "$MAX_WAIT" ]; do
+		sleep $sleep
+		PID1=$(ps -ef | awk '{print $2}' | grep -w $MOUNT_PID)
+		PID2=$(ps -ef | awk '{print $2}' | grep -w $MOUNT_LUSTRE_PID)
+		echo PID1=$PID1
+		echo PID2=$PID2
+		[ -z "$PID1" -a -z "$PID2" ] && break
+		echo "waiting for mount to finish ... "
+		WAIT=$(( WAIT + sleep))
+	done
+	[ "$WAIT" -eq "$MAX_WAIT" ] && error "MOUNT_PID $MOUNT_PID and \
+		MOUNT__LUSTRE_PID $MOUNT__LUSTRE_PID still not killed in $WAIT secs"
+	ps -ef | grep mount
+	stop_mds || error
+	stop_ost || error
 }
 run_test 23a "interrupt client during recovery mount delay"
 
-- 
GitLab