Commit eed1978d authored by d.bertini's avatar d.bertini
Browse files

adding utility script to cleanup local directories

parent e6734266
......@@ -112,3 +112,29 @@ removing local directory: /tmp/dbertini/51281895 on node: lxbk1037
removing local directory: /tmp/dbertini/51281895 on node: lxbk1034
```
## Cleanup scripts
If for any reason your job crashed or has been cancelled, etc.., you will need to clean up the
`/tmp` directories on the cluster nodes which were used.
To ease this process one can find in the `utils` directory a cleanup script.
To use it you will need to give as arguments
- how many `days` to look back in time
- the `job_name`
For example to cleanup the `/tmp` used by all jobs with name `artis_l` during the last 2 days:
```
./cleanup.sh -d 2 -j artis_l
```
giving the output:
```
Cleanup jobs with job_name: artis-l from date: 2022-05-28
cleanup will execute on nodelist: lxbk[1047-1056] corresponding to: 10 nodes.
Submitted batch job 52711719
```
#!/bin/bash
OPTIND=1
usage() { echo "Usage: $0 [-d <days> -j <job_name>]" 1>&2; return; }
while getopts ":d:j:" o; do
case "${o}" in
d)
d=${OPTARG}
;;
j)
j=${OPTARG}
;;
*)
usage
;;
esac
done
shift $((OPTIND-1))
if [ -z "${d}" ] && [ -z "${j}" ]; then
usage
exit
fi
pdate=`date -d "${d} day ago" '+%Y-%m-%d'`
echo "Cleanup jobs with job_name: " $j "from date: " $pdate
arr_jid=$( sacct -n -X --format jobid --name $j -S $pdate )
for i in $arr_jid
do
used_nodes=$( sacct -n -X -j "${i}" -o "Node" )
nlist="$nlist"",""$used_nodes"
done
nlist="${nlist:1}"
# get the number of nodes
t_nodes=`sinfo -h -O "Nodes" -n "${nlist}"`
n_list=`sinfo -h -O "Nodelist" -n "${nlist}"`
echo "cleanup will execute on nodelist: " $n_list " corresponding to: " $t_nodes " nodes."
sbatch -J artis_c --nodes=$t_nodes --tasks-per-node=1 --partition=main --time=08:00:00 --mail-type=ALL --mail-user=${USER}@gsi.de --nodelist=$n_list --output='%j.log' -- ./rm_tmp.sh
#!/bin/bash
OPTIND=1
usage() { echo "Usage: $0 [-d <days> -j <job_name>]" 1>&2; return; }
while getopts ":d:j:" o; do
case "${o}" in
d)
d=${OPTARG}
;;
j)
j=${OPTARG}
;;
*)
usage
;;
esac
done
shift $((OPTIND-1))
if [ -z "${d}" ] && [ -z "${j}" ]; then
usage
exit
fi
pdate=`date -d "${d} day ago" '+%Y-%m-%d'`
echo "Cleanup jobs with job_name: " $j "from date: " $pdate
arr_jid=$( sacct -n -X --format jobid --name $j -S $pdate )
for i in $arr_jid
do
# echo "${i}"
used_nodes=$( sacct -n -X -j "${i}" -o "Node" )
# echo $used_nodes
nlist="$nlist"",""$used_nodes"
done
nlist="${nlist:1}"
#echo "${nlist}"
# get the number of nodes
t_nodes=`sinfo -h -O "Nodes" -n "${nlist}"`
n_list=`sinfo -h -O "Nodelist" -n "${nlist}"`
echo "cleanup will execute on nodelist: " $n_list " corresponding to: " $t_nodes " nodes."
sbatch -J artis_c --nodes=$t_nodes --tasks-per-node=1 --partition=main --time=08:00:00 --mail-type=ALL --mail-user=${USER}@gsi.de --nodelist=$n_list --output='%j.log' -- ./rm_tmp.sh
#!/bin/bash
myhost=`hostname`
# check if /tmp/$USER exist on the node
MYTMP="/tmp/$USER"
echo "look for local directory: " $MYTMP on Node: $myhost
if [ -d "$MYTMP" ]; then
echo "found a local directory: " $MYTMP on Node: $myhost " removing ..."
rm -rf $MYTMP
fi
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment