#!/usr/bin/bash

set -f
PROGNAME=${0##*/}
NAGIOS_STATUS_TEXT=(
    [0]='OK'
    [1]='WARNING'
    [2]='CRITICAL'
    [3]='UNKNOWN'
)
NAGIOS_STATUS_PRIO_TR=( 0 2 3 1 )

DEVICE=
IS_REGEX=
ENV=
HOST=
PORT=
SSH_USER=
SSH_IDENTITY=
TIMEOUT=
SUDO_USER=
SNMP_OID=
SNMP_OPTIONS=()

function nagios_exit_usage() {
    echo "\
Usage: $PROGNAME [OPTION...] [-d DEVICE]
Nagios plugin to check for certificates expiration in an OpenSSL CA

Common options:
  -d, --device        PATH    Disk device path, eg: /dev/sda
  -r, --regex         IBOOL   Given device is a regex, 0 or 1
  -S, --sudo-user     USER    Run smartctl command with sudo -u USER
  -E, --bash-env      STRING  Environment to export, eg: PATH=\$PATH:/sbin
  -h, --help                  Display this help

SSH options:
  -s, --ssh           CMD     Custom SSH command, see documentation below
  -H, --host          HOST    SSH host
  -p, --port          INT     SSH port
  -u, --ssh-user      USER    SSH username
  -i, --ssh-identity  FILE    SSH identity key file
  -t, --timeout       INT     SSH connect timeout

SNMP options:
  -H, --host          HOST    SNMP host
  -p, --port          INT     SNMP port
  -t, --timeout       INT     Alias for SNMP option -t (timeout)
  -O, --snmp-oid      OID     OID that return result in get_smart format
  -P, --snmp-options  ARGS    SNMP options, eg: -v 2c -c public, end with --
  -T, --snmp-sample-script    Display sample SNMP extend script

When using option -s, --ssh, classic SSH options get ignored. It is then
up to the caller to properly escape args and handle SSH options like
identity, port, user, connect timeout.
Usage example: check_disk_smart -s 'ssh -i key cmd@bastion host' ...

When using option -O, --snmp-oid, common option are ignored and the
result is retrieved from the given OID. The result is expected to be in
get_smart format, one device per line: <device>\t<status>, eg:
/dev/sda	PASSED
/dev/sdb	PASSED
A sample script for the snmpd extend OID is printed on stdout with option
-T, --snmp-sample-script. Put it on the target host and add an extend line
in your snmpd.conf to access its output.
"
    exit 3
}

function sample_snmp_script() {
    cat <<'EOF'
#!/bin/bash

# sudoers sample for Debian
# root@sd-131018:~# cat /etc/sudoers.d/Debian-snmp
# Debian-snmp ALL=(ALL) NOPASSWD: /opt/ztools/bin/get-smart
#
# snmpd.conf extend line
# OID for script output .1.3.6.1.4.1.8072.1.3.2.3.1.2.5.115.109.97.114.116
# extend smart /path/to/script

set -f
export LC_ALL=C
[[ $UID == 0 ]] || exec sudo -n "$0" "$@"
OIFS=$IFS
IFS=$'\n'
disks=( $(smartctl --scan) )
IFS=$OIFS
(( ${#disks[@]} == 0 )) && exit 1

for i in "${disks[@]}"; do
    opts=${i%%#*}
    [[ $opts == *,* ]] && subdev=":${opts##*,}" || subdev=
    status=$(smartctl -H $opts |
        sed -nre 's,.*(Health Status|overall-health self-assessment test result): (.+),\2,p')
    printf '%s\t%s\n' "${i%% *}${subdev}" "$status"
done
EOF
    exit 3
}

# $1: Nagios status code
# $2: Plugin output message
# $@: Optional lines of long plugin output
function nagios_die() {
    echo "${NAGIOS_STATUS_TEXT[$1]}: $2"
    (( $# > 2 )) && (IFS=$'\n'; echo "${*:3}")
    exit "$1"
}

# $1: current state
# $2: want state
function increase_prio() {
    REPLY=$1
    if (( NAGIOS_STATUS_PRIO_TR[$2] > NAGIOS_STATUS_PRIO_TR[$1] )); then
        REPLY=$2
    fi
}

function nagopt_var() {
    [[ -z $2 || $2 == '$' ]] && return
    declare -g "$1=$2"
}

function nagopt_arglist() {
    local var=$1; shift
    local value=()
    while (( $# > 0 )) && [[ $1 != -- ]]; do
        value+=( "$1" )
        shift
    done
    declare -ga "$var=( \"\${value[@]}\" )"
}

function get_cmd() {
    local cmd=( "$@" )
    local sshopts sshargs ret
    if [[ -n $SSH_CMD || -n $HOST ]]; then
        printf -v sshargs "%q " "${cmd[@]}"
        if [[ -n $SSH_CMD ]]; then
            # custom ssh command: it is up to the caller to properly escape args
            # and handle ssh options like identity, port, user, connect timeout
            cmd=( $SSH_CMD -- "$sshargs" )
        elif [[ -n $HOST ]]; then
            cmd=(
                ssh ${SSH_IDENTITY:+-i "$SSH_IDENTITY"} ${SSH_USER:+-l "$SSH_USER"}
                ${PORT:+-p "$PORT"} -q -C -T -o StrictHostKeyChecking=no
                -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=publickey
                ${TIMEOUT:+-o "ConnectTimeout=$TIMEOUT"} "$HOST" -- "$sshargs"
            )
        fi
    fi
    REPLY=$( "${cmd[@]}" ); ret=$?
    echo "$REPLY"
    return "$ret"
}

function get_smart() {
    local smartctl=( ${SUDO_USER:+sudo -n -u "$SUDO_USER"} smartctl )
    local disks=()
    local i status

    if [[ $IS_REGEX == 1 ]]; then
        local OIFS=$IFS IFS=$'\n'
        disks+=( $("${smartctl[@]}" --scan |awk -v "DEVICE=$DEVICE" '$1 ~ DEVICE {print}') )
        local IFS=$OIFS
        (( ${#disks[@]} == 0 )) && return 1
    else
        disks+=( "$DEVICE" )
    fi

    for i in "${disks[@]}"; do
        opts=${i%%#*}
        [[ $opts == *,* ]] && subdev=":${opts##*,}" || subdev=
        status=$(smartctl -H $opts |
            sed -nre 's,.*(Health Status|overall-health self-assessment test result): (.+),\2,p')
        printf '%s\t%s\n' "${i%% *}${subdev}" "$status"
    done
    return 0
}

function get_snmp() {
    snmpget -OqevU "${SNMP_OPTIONS[@]}" ${TIMEOUT:+-t "$TIMEOUT"} "${HOST}${PORT:+:$PORT}" "$SNMP_OID" |
        tr -d '"' |grep -Fv 'No Such' || return 2
}

function reply_get_smart() {
    local OIFS=$IFS
    local IFS=$'\n'
    REPLY=($(
        if [[ -n $SNMP_OID && -n $HOST ]]; then
            get_snmp
        else
            {   echo ${ENV:+"export $ENV"}
                echo 'export LC_ALL=C'
                declare -p DEVICE IS_REGEX SUDO_USER
                declare -f get_smart
                echo get_smart
            } |IFS=$OIFS get_cmd bash
        fi
    ))
}

while (( $# > 0 )); do
    case "$1" in
        # common
        -d|--device) nagopt_var DEVICE "$2"; shift ;;
        -r|--regex) nagopt_var IS_REGEX "$2"; shift ;;
        -S|--sudo-user) nagopt_var SUDO_USER "$2"; shift ;;
        -E|--bash-env) nagopt_var ENV "$2"; shift ;;
        -h|--help) nagios_exit_usage ;;
        # ssh, snmp
        -H|--host) nagopt_var HOST "$2"; shift ;;
        -p|--port) nagopt_var PORT "$2"; shift ;;
        -t|--timeout) nagopt_var TIMEOUT "$2"; shift ;;
        # ssh
        -s|--ssh) nagopt_var SSH_CMD "$2"; shift ;;
        -u|--ssh-user) nagopt_var SSH_USER "$2"; shift ;;
        -i|--ssh-identity) nagopt_var SSH_IDENTITY "$2"; shift ;;
        # snmp
        -O|--snmp-oid) nagopt_var SNMP_OID "$2"; shift ;;
        -P|--snmp-options) nagopt_arglist SNMP_OPTIONS "${@:2}"; shift $((${#SNMP_OPTIONS[@]}+1)) ;;
        -T|--snmp-sample-script) sample_snmp_script ;;

        *) nagios_exit_usage ;;
    esac
    shift
done

[[ -z $IS_REGEX ]] && IS_REGEX=1

if [[ $IS_REGEX != 0 && $IS_REGEX != 1 ]]; then
    nagios_die 3 'Invalid regex flag, 0 or 1 expected'
fi
if [[ -z $DEVICE ]]; then
    [[ $IS_REGEX == 0 ]] && nagios_die 3 'Device required'
    DEVICE=^
fi
if [[ -n $TIMEOUT && -n ${TIMEOUT//[0-9]} ]]; then
    nagios_die 3 'Invalid SSH connect timeout, integer expected'
fi
if [[ -n $SSH_IDENTITY && ! -r $SSH_IDENTITY ]]; then
    nagios_die 3 'Cannot read SSH identity file'
fi

reply_get_smart
ret=$?

if [[ ( -n $SSH_CMD || (-n $HOST && -z $SNMP_OID) ) ]] && (( ret == 255 )); then
    nagios_die 3 'SSH connect failed'
fi

if [[ -n $IS_REGEX ]] && (( ret == 1 )); then
    nagios_die 3 'No disk mathing given pattern'
fi

if (( ret != 0 )); then
    nagios_die 3 'Failed to get smart data'
fi

status=0
message=

for i in "${REPLY[@]}"; do
    IFS=$'\t' set -- $i

    if [[ -z $2 ]]; then
        increase_prio "$status" 3; status=$REPLY
        message="**$1 <unknown>**${message:+, $message}"
    elif [[ $2 != PASSED && $2 != OK ]]; then
        increase_prio "$status" 2; status=$REPLY
        message="**$1 $2**${message:+, $message}"
    else
        message+="${message:+, }$1 $2"
    fi
done

nagios_die "$status" "SMART status: $message"
