#!/bin/bash
#
# Warewulf Node Health Check -- Node Offlining Helper
#
# Michael Jennings <mej@lbl.gov>
# 16 September 2011
#
# $Id: node-mark-offline 1194 2012-11-07 23:24:26Z mej $
#

# This script is a simple pbsnodes wrapper that the node health check
# can run in the background to mark nodes offline.  It will first
# obtain the current node state information to avoid overwriting notes
# which were not placed by NHC.  If these checks pass, the node is
# marked offline with the note supplied.

PBSNODES="${PBSNODES:-pbsnodes}"
PBSNODES_LIST_ARGS="${PBSNODES_LIST_ARGS:--n -l all}"
PBSNODES_OFFLINE_ARGS="${PBSNODES_OFFLINE_ARGS:--o -N}"
IGNORE_EMPTY_NOTE="${IGNORE_EMPTY_NOTE:-0}"
LEADER="NHC:"

echo "`date '+%Y%m%d %H:%M:%S'` $0 $*"

HOSTNAME="$1"
shift
NOTE="$*"

LINE=( $($PBSNODES $PBSNODES_LIST_ARGS $HOSTNAME) )
STATUS="${LINE[1]}"
OLD_NOTE_LEADER="${LINE[2]}"
OLD_NOTE="${LINE[*]:3}"
case $STATUS in
    *down*|*offline*|*unknown*)
        if [[ "${STATUS/offline}" != "${STATUS}" ]]; then
            # If the node is already offline, and there is no old note, and
            # we've not been told to ignore that, do not touch the node.
            if [[ -z "$OLD_NOTE_LEADER" && "$IGNORE_EMPTY_NOTE" != "1" ]]; then
                echo "$0:  Not offlining $HOSTNAME:  Already offline with no note set."
                exit 0
            fi
        fi
        # If there's an old note that wasn't set by NHC, preserve it.
        if [[ -n "$OLD_NOTE_LEADER" && "$OLD_NOTE_LEADER" != "$LEADER" ]]; then
            LEADER="$OLD_NOTE_LEADER"
            NOTE="$OLD_NOTE"
        fi
        ;;
esac

echo "$0:  Marking $STATUS $HOSTNAME offline:  $LEADER $NOTE"
exec $PBSNODES $PBSNODES_OFFLINE_ARGS "$LEADER $NOTE" $HOSTNAME
