diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh
index f4b775422543d6a0fe23573a249ebfa52c63794a..018770c6573ca4fafad64255ec6c3e51533c3a3b 100755
--- a/lustre/tests/insanity.sh
+++ b/lustre/tests/insanity.sh
@@ -35,6 +35,8 @@ assert_env mds_HOST MDS_MKFS_OPTS
 assert_env ost_HOST OST_MKFS_OPTS OSTCOUNT
 assert_env LIVE_CLIENT FSNAME
 
+# FAIL_CLIENTS list should not contain the LIVE_CLIENT
+FAIL_CLIENTS=$(echo " $FAIL_CLIENTS " | sed -re "s/\s+$LIVE_CLIENT\s+/ /g")
 
 # This can be a regexp, to allow more clients
 CLIENTS=${CLIENTS:-"`comma_list $LIVE_CLIENT $FAIL_CLIENTS`"}
@@ -371,6 +373,7 @@ test_6() {
     echo "Test Lustre stability after OST failure"
     client_df &
     DFPIDA=$!
+    echo DFPIDA=$DFPIDA
     sleep 5
 
     #CLIENT Portion
@@ -381,17 +384,20 @@ test_6() {
     echo "Test Lustre stability after CLIENTs failure"
     client_df &
     DFPIDB=$!
+    echo DFPIDB=$DFPIDB
     sleep 5
     
     #Reintegration
     echo "Reintegrating OST/CLIENTs"
     wait_for ost1
     start_ost 1
-    reintegrate_clients
+    reintegrate_clients || return 1
     sleep 5 
 
+    wait_remote_prog df $((TIMEOUT * 3 + 10)) 
     wait $DFPIDA
     wait $DFPIDB
+
     echo "Verifying mount"
     [ -z "$(mounted_lustre_filesystems)" ] && return 3
     client_df
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
index 39518fc1df9f5434dc5560c18ceef129029b0cea..321d8f3d2af1bd6e569063f7da97397bf3ef0500 100644
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -674,6 +674,38 @@ wait_exit_ST () {
     return 1
 }
 
+wait_remote_prog () {
+   local prog=$1
+   local WAIT=0
+   local INTERVAL=5
+   local rc=0
+
+   [ "$PDSH" = "no_dsh" ] && return 0
+   
+   while [ $WAIT -lt $2 ]; do
+        running=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep)
+        [ -z "${running}" ] && return 0
+        echo "waited $WAIT for: "
+        echo "$running"
+        [ $INTERVAL -lt 60 ] && INTERVAL=$((INTERVAL + INTERVAL))
+        sleep $INTERVAL
+        WAIT=$((WAIT + INTERVAL))
+    done
+    local pids=$(ps  uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep | awk '{print $2}')
+    [ -z "$pids" ] && return 0
+    echo "$PDSH processes still exists after $WAIT seconds.  Still running: $pids"
+    for pid in $pids; do
+        cat /proc/${pid}/status || true
+        cat /proc/${pid}/wchan || true
+        echo "Killing $pid"
+        kill -9 $pid || true
+        sleep 1
+        ps -P $pid && rc=1 
+    done
+
+    return $rc
+}
+
 client_df() {
     # not every config has many clients
     if [ ! -z "$CLIENTS" ]; then