1
0
Fork 0
mirror of https://github.com/hcartiaux/dotfiles.git synced 2024-10-18 17:25:23 +02:00
dotfiles/bash/bash_slurm
2020-01-07 17:26:50 +01:00

83 lines
3.5 KiB
Bash
Executable file

# Job + Remaining time
[[ -n $SLURM_JOB_ID && -z $SLURM_JOB_WALLTIME_SECONDS ]] && SLURM_JOB_WALLTIME_SECONDS=$(scontrol show job "$SLURM_JOB_ID" | grep -Po 'TimeLimit=\K[^ ]*' | awk -F: '/:/ { print ($1 * 3600) + ($2 * 60) + $3 }')
__slurm_ps1_remaining_time() {
if [ -n "$SLURM_JOB_ID" ]; then
DATE_NOW=$(date +%s)
DATE_JOB_START=$(stat -c %Y "/sys/fs/cgroup/cpu/slurm/uid_${UID}/job_${SLURM_JOB_ID}")
((DATE_TMP = (SLURM_JOB_WALLTIME_SECONDS - DATE_NOW + DATE_JOB_START) / 60))
echo -n "[SLURM$SLURM_JOB_ID->$DATE_TMP]"
fi
}
[[ -n $INTERACTIVE && -n $SLURM_PTY_PORT ]] && (
echo "[SLURM] SLURM_JOB_ID=${SLURM_JOB_ID}"
echo "[SLURM] Your nodes are:"
[[ $(echo "$SLURM_JOB_CPUS_PER_NODE" | grep '(x') ]] && nbcpu=$(echo "$SLURM_JOB_CPUS_PER_NODE" | grep -o '^[^(]*' )
for node in $(scontrol show hostname "$SLURM_JOB_NODELIST") ; do
i=$((i+1))
echo " ${node}*${nbcpu:-$(echo "$SLURM_JOB_CPUS_PER_NODE" | cut -d, -f$i)}"
done
) || true
alias sql=' squeue -la'
alias sqlu='squeue -la -u $USER'
alias ssj=' scontrol show job'
alias sp=' sprio'
alias spl=' sprio -l'
alias sd=' sinfo -d'
alias i=' srun -p interactive --qos qos-interactive --time=5:0 --pty bash -i'
alias ix=' srun -p interactive --qos qos-interactive --time=5:0 --x11 --pty bash -i'
sjoin() {
if [[ -z $1 ]]; then
echo "Job ID not given."
else
JOBID=$1
[[ -n $2 ]] && NODE="-w $2"
srun --jobid "$JOBID" "$NODE" --pty bash -i
fi
}
irisstat(){
# -c to show compute only, -s to show storage only, (-a is default == all)
OPT=${1:-"-a"}
if [[ "$OPT" == "-c" || "$OPT" == "-a" ]]; then
sinfo -h --format=%C | awk -F '/' '{printf "Utilization: %.2f%%\n", $1/$4*100}'
printf "%0.s-" {1..50} ; printf "\n"
printf "%34s\n" "alloc/idle/other/total"
printf "%11s %s\n" "IRIS" "$(sinfo -h --format=%C | paste -sd " ")"
for p in batch bigmem gpu interactive long; do
if [[ "$p" == "gpu" ]]; then
#NGPUS=$(sinfo -h -N -p gpu -o %G | cut -d : -f3 | paste -sd + | bc)
NGPUS=72
NGPUSUSED=$(squeue -h -t R -p gpu -o "%b*%D" | grep gpu | cut -d : -f 2 | sed 's/gpu/1/g' | paste -sd + |bc)
gpudata=" GPU: $NGPUSUSED/$NGPUS"
else
gpudata=""
fi
usagedata=$(sinfo -h -p $p --format=%C | paste -sd " ")
printf "%11s %s%s\n" $p "$usagedata" "$gpudata"
done
printf "%0.s-" {1..50} ; printf "\n"
echo "Drained nodes: $(sinfo -h -t drain -o '%D')"
printf "%0.s-" {1..50} ; printf "\n"
fi
[[ "$OPT" == "-s" || "$OPT" == "-a" ]] && (df -Th | egrep 'Filesystem|nfs|gpfs|lustre' | awk '{printf("%20s %5s %5s %5s %5s\n", $7,$3,$4,$5,$6)}');
printf "%0.s-" {1..50} ; printf "\n"
printf "Jobs status: \n"
squeue -h -o "%t,%r" | sort | uniq -c | sort -r
}
irisqosusage(){
printf "%20s %8s %9s %9s\n" "QOS" "CPU Max" "CPU Used" "CPU Free"
for qosdetails in $(sacctmgr -n -P list qos format=name,grptres | grep ^qos- | sed '/|$/d;s/cpu=//g' | sort); do
qos=$(echo $qosdetails | cut -d '|' -f 1)
qoscpumax=$(echo $qosdetails | cut -d '|' -f 2)
qoscpuused=$(squeue -h --qos $qos -t R -o %C | paste -sd + | bc)
qoscpuused=${qoscpuused:=0}
qoscpufree=$(echo "$qoscpumax-$qoscpuused" | bc)
printf "%20s %8s %9s %9s\n" "$qos" "$qoscpumax" "$qoscpuused" "$qoscpufree"
done
}