Skip to content
Snippets Groups Projects
Commit aa4f18eb authored by Elena Gryaznova's avatar Elena Gryaznova
Browse files

b=16488

i=Oleg.Drokin
new runracer script
parent c930adb5
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
#set -vx
set -e
LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
. $LUSTRE/tests/test-framework.sh
init_test_env $@
. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
racer=`which racer.sh`
[ -z "$racer" ] && echo racer is not installed && exit 1
CLIENTS=${CLIENTS:-$HOSTNAME}
RDIR=$DIR/racer
mkdir -p $RDIR
DURATION=${DURATION:-120}
assert_env CLIENTS
timer_on () {
sleep $1 && kill -s ALRM $$ &
TIMERPID=$!
echo TIMERPID=$TIMERPID
}
do_racer_cleanup () {
trap 0
local WAIT=0
local INTERVAL=5
local pids
local rc=0
echo "DOING RACER CLEANUP ... "
# Check if all processes are killed
local clients=$CLIENTS
# 1.Let chance to racer to kill all it's processes
# FIXME: not sure how long does it take for racer to kill all processes
# 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec
while [ $WAIT -lt 90 ]; do
running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true)
[ -z "$running" ] && rc=0 && break
echo "clients $clients are still running the racer processes. Waited $WAIT secs"
echo $running
rc=1
[ $INTERVAL -lt 40 ] && INTERVAL=$((INTERVAL + INTERVAL))
sleep $INTERVAL
WAIT=$((WAIT + INTERVAL))
done
# 2. Kill the remaining processes
if [ $rc -ne 0 ]; then
for C in ${clients//,/ } ; do
pids=$(do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" | awk '{print $2}' || true)
if [ ! -z "$pids" ]; then
echo "client $C still running racer processes after $WAIT seconds. Killing $pids"
do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)"
do_node $C kill -TERM $pids || true
# let processes to be killed
sleep 2
# 3. Check if the processes were killed
# exit error if the processes still exist
for pid in $pids; do
do_node $C "ps -P $pid" && RC=1 || true
done
else
echo "All processes on client $C exited after $WAIT seconds. OK."
fi
done
else
echo "No racer processes running after $WAIT seconds. OK."
wait_remote_prog $racer 10
fi
}
racer_cleanup () {
if [ "$timeout" == "timeout" ]; then
echo $timeout killing RACERPID=$RACERPID
kill $RACERPID || true
sleep 2 # give chance racer to kill it's processes
do_racer_cleanup
else
echo "Racer completed before DURATION=$DURATION expired. Cleaning up..."
kill $TIMERPID
do_racer_cleanup
fi
}
racer_timeout () {
timeout="timeout"
racer_cleanup
echo "$0: completed $RC"
exit $RC
}
# run racer
log "Start racer on clients: $CLIENTS DURATION=$DURATION"
RC=0
trap racer_timeout ALRM
timer_on $((DURATION + 5))
do_nodes $CLIENTS "DURATION=$DURATION $racer $RDIR" &
RACERPID=$!
echo RACERPID=$RACERPID
wait $RACERPID || RC=2
racer_cleanup
echo "$0: completed $RC"
exit $RC
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment