From eddad0728def47823004e305fe64cee04b3bac8b Mon Sep 17 00:00:00 2001 From: grev <grev> Date: Wed, 30 Jul 2008 20:47:53 +0000 Subject: [PATCH] b=15864 i=Adilger test_6 fix: kill pdsh df; new t-f fn wait_remote_prog --- lustre/tests/insanity.sh | 8 +++++++- lustre/tests/test-framework.sh | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index f4b7754225..018770c657 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -35,6 +35,8 @@ assert_env mds_HOST MDS_MKFS_OPTS assert_env ost_HOST OST_MKFS_OPTS OSTCOUNT assert_env LIVE_CLIENT FSNAME +# FAIL_CLIENTS list should not contain the LIVE_CLIENT +FAIL_CLIENTS=$(echo " $FAIL_CLIENTS " | sed -re "s/\s+$LIVE_CLIENT\s+/ /g") # This can be a regexp, to allow more clients CLIENTS=${CLIENTS:-"`comma_list $LIVE_CLIENT $FAIL_CLIENTS`"} @@ -371,6 +373,7 @@ test_6() { echo "Test Lustre stability after OST failure" client_df & DFPIDA=$! + echo DFPIDA=$DFPIDA sleep 5 #CLIENT Portion @@ -381,17 +384,20 @@ test_6() { echo "Test Lustre stability after CLIENTs failure" client_df & DFPIDB=$! + echo DFPIDB=$DFPIDB sleep 5 #Reintegration echo "Reintegrating OST/CLIENTs" wait_for ost1 start_ost 1 - reintegrate_clients + reintegrate_clients || return 1 sleep 5 + wait_remote_prog df $((TIMEOUT * 3 + 10)) wait $DFPIDA wait $DFPIDB + echo "Verifying mount" [ -z "$(mounted_lustre_filesystems)" ] && return 3 client_df diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 39518fc1df..321d8f3d2a 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -674,6 +674,38 @@ wait_exit_ST () { return 1 } +wait_remote_prog () { + local prog=$1 + local WAIT=0 + local INTERVAL=5 + local rc=0 + + [ "$PDSH" = "no_dsh" ] && return 0 + + while [ $WAIT -lt $2 ]; do + running=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep) + [ -z "${running}" ] && return 0 + echo "waited $WAIT for: " + echo "$running" + [ $INTERVAL -lt 60 ] && INTERVAL=$((INTERVAL + INTERVAL)) + sleep $INTERVAL + WAIT=$((WAIT + INTERVAL)) + done + local pids=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep | awk '{print $2}') + [ -z "$pids" ] && return 0 + echo "$PDSH processes still exists after $WAIT seconds. Still running: $pids" + for pid in $pids; do + cat /proc/${pid}/status || true + cat /proc/${pid}/wchan || true + echo "Killing $pid" + kill -9 $pid || true + sleep 1 + ps -P $pid && rc=1 + done + + return $rc +} + client_df() { # not every config has many clients if [ ! -z "$CLIENTS" ]; then -- GitLab