mirror of
https://github.com/hcartiaux/dotfiles.git
synced 2024-10-18 17:25:23 +02:00
84 lines
3.7 KiB
Bash
Executable file
84 lines
3.7 KiB
Bash
Executable file
# Job + Remaining time
|
|
|
|
[[ -n $SLURM_JOB_ID && -z $SLURM_JOB_WALLTIME_SECONDS ]] && SLURM_JOB_WALLTIME_SECONDS=$(scontrol show job "$SLURM_JOB_ID" | grep -Po 'TimeLimit=\K[^ ]*' | awk -F: '/:/ { print ($1 * 3600) + ($2 * 60) + $3 }')
|
|
|
|
__slurm_ps1_remaining_time() {
|
|
if [ -n "$SLURM_JOB_ID" ]; then
|
|
DATE_NOW=$(date +%s)
|
|
DATE_JOB_START=$(stat -c %Y "/sys/fs/cgroup/cpu/slurm/uid_${UID}/job_${SLURM_JOB_ID}")
|
|
((DATE_TMP = (SLURM_JOB_WALLTIME_SECONDS - DATE_NOW + DATE_JOB_START) / 60))
|
|
echo -n "[SLURM$SLURM_JOB_ID->$DATE_TMP]"
|
|
fi
|
|
}
|
|
|
|
[[ -n $INTERACTIVE && -n $SLURM_PTY_PORT ]] && (
|
|
echo "[SLURM] SLURM_JOB_ID=${SLURM_JOB_ID}"
|
|
echo "[SLURM] Your nodes are:"
|
|
[[ $(echo "$SLURM_JOB_CPUS_PER_NODE" | grep '(x') ]] && nbcpu=$(echo "$SLURM_JOB_CPUS_PER_NODE" | grep -o '^[^(]*' )
|
|
for node in $(scontrol show hostname "$SLURM_JOB_NODELIST") ; do
|
|
i=$((i+1))
|
|
echo " ${node}*${nbcpu:-$(echo "$SLURM_JOB_CPUS_PER_NODE" | cut -d, -f$i)}"
|
|
done
|
|
) || true
|
|
|
|
alias sql=' squeue -la'
|
|
alias sqlu='squeue -la -u $USER'
|
|
alias ssj=' scontrol show job'
|
|
alias sp=' sprio'
|
|
alias spl=' sprio -l'
|
|
alias sd=' sinfo -d'
|
|
alias i=' srun -p interactive --qos qos-interactive --time=5:0 --pty bash -i'
|
|
alias ix=' srun -p interactive --qos qos-interactive --time=5:0 --x11 --pty bash -i'
|
|
|
|
sjoin() {
|
|
if [[ -z $1 ]]; then
|
|
echo "Job ID not given."
|
|
else
|
|
JOBID=$1
|
|
[[ -n $2 ]] && NODE="-w $2"
|
|
srun --jobid "$JOBID" "$NODE" --pty bash -i
|
|
fi
|
|
}
|
|
|
|
irisstat(){
|
|
# -c to show compute only, -s to show storage only, (-a is default == all)
|
|
OPT=${1:-"-a"}
|
|
if [[ "$OPT" == "-c" || "$OPT" == "-a" ]]; then
|
|
sinfo -h --format=%C | awk -F '/' '{printf "Utilization: %.2f%%\n", $1/$4*100}'
|
|
printf "%0.s-" {1..50} ; printf "\n"
|
|
printf "%34s\n" "alloc/idle/other/total"
|
|
printf "%11s %s\n" "IRIS" "$(sinfo -h --format=%C | paste -sd " ")"
|
|
for p in batch bigmem gpu interactive long; do
|
|
if [[ "$p" == "gpu" ]]; then
|
|
#NGPUS=$(sinfo -h -N -p gpu -o %G | cut -d : -f3 | paste -sd + | bc)
|
|
NGPUS=96
|
|
NGPUSUSED=$(squeue -h -t R -p gpu -o "%b*%D" | grep gpu | cut -d : -f 2 | sed 's/gpu/1/g' | paste -sd + |bc)
|
|
gpudata=" GPU: $NGPUSUSED/$NGPUS"
|
|
else
|
|
gpudata=""
|
|
fi
|
|
usagedata=$(sinfo -h -p $p --format=%C | paste -sd " ")
|
|
printf "%11s %s%s\n" $p "$usagedata" "$gpudata"
|
|
done
|
|
printf "%0.s-" {1..50} ; printf "\n"
|
|
echo "Drained nodes: $(sinfo -h -t drain -o '%D')"
|
|
printf "%0.s-" {1..50} ; printf "\n"
|
|
fi
|
|
[[ "$OPT" == "-s" || "$OPT" == "-a" ]] && (df -Th | egrep 'Filesystem|nfs|gpfs|lustre' | awk '{printf("%20s %5s %5s %5s %5s\n", $7,$3,$4,$5,$6)}');
|
|
[[ "$OPT" == "-s" || "$OPT" == "-a" ]] && (df -Thi | egrep 'Filesystem|nfs|gpfs|lustre' | awk '{printf("%20s %5s %5s %5s %5s\n", $7,$3,$4,$5,$6)}');
|
|
printf "%0.s-" {1..50} ; printf "\n"
|
|
printf "Jobs status: \n"
|
|
squeue -h -o "%t,%r" | sort | uniq -c | sort -r
|
|
}
|
|
|
|
irisqosusage(){
|
|
printf "%20s %8s %9s %9s\n" "QOS" "CPU Max" "CPU Used" "CPU Free"
|
|
for qosdetails in $(sacctmgr -n -P list qos format=name,grptres | grep ^qos- | sed '/|$/d;s/cpu=//g' | sort); do
|
|
qos=$(echo $qosdetails | cut -d '|' -f 1)
|
|
qoscpumax=$(echo $qosdetails | cut -d '|' -f 2)
|
|
qoscpuused=$(squeue -h --qos $qos -t R -o %C | paste -sd + | bc)
|
|
qoscpuused=${qoscpuused:=0}
|
|
qoscpufree=$(echo "$qoscpumax-$qoscpuused" | bc)
|
|
printf "%20s %8s %9s %9s\n" "$qos" "$qoscpumax" "$qoscpuused" "$qoscpufree"
|
|
done
|
|
}
|