#!/usr/bin/bash

function exit_usage() {
    local status=${1:-0}
    [[ $status != 0 ]] && exec >&2
    cat << EOF
Usage: ${0#*/} [OPTION...]
List elasticsearch indices or print disk usage by alias or datastream

Available options:
  -w, --watch [SEC]     Listing in watch mode, ie. with periodic refresh; it
                        prints the last indices of each alias or datastream,
                        and single indices
  -s, --size            Size mode, grouped by alias or datastream
  -r, --size-raw        Output sizes in bytes, for size mode
  -n, --size-asc        Sort by size ascending, for size mode
  -N, --size-desc       Sort by size descending for size mode
  -a, --all             Force to include all system hidden indices, aliases
                        or datastreams using an '*,.*' filter in API calls

  -E, --grep            Post filter passed to grep -E before pretty columns
  -v, --invert-match    Pass -v to post filter grep
EOF
    exit "$status"
}

function mode_listing() {
    es-curl "_cat/indices/${ALL:+*,.*}?s=i" |
        if [[ -z $WATCHING ]]; then
            cat |grep -E "$POST_FILTER" ${POST_FILTER_INVERT:+-v}
        else
            # watch mode, print last indice of aliases/datastreams and single indices
            awk '
            ARGIND == 1 { if ($(NF) == "true") { keep[$2]=1; } is_aliased[$2]=1; }
            ARGIND == 2 { keep[$1]=1; }
            ARGIND == 3 && (keep[$3] || (!is_aliased[$3] && substr($3, 1, 4) != ".ds-")) { print $0; }
            '   <(es-curl "_cat/aliases/${ALL:+*,.*}?expand_wildcards=all&s=a") \
                <(es-curl "_data_stream/${ALL:+*,.*}?expand_wildcards=all" |
                    jq -r '.data_streams |map(.indices |last |.index_name)[]') \
                <(cat) |
            grep -E "$POST_FILTER" ${POST_FILTER_INVERT:+-v} |
            column -t -o ' '
        fi
}

function mode_size() {
    awk -v "SIZE_RAW=$SIZE_RAW" \
        -v "SIZE_SORT=$SIZE_SORT" \
    '
function human(input, mult, sep, _sym) {
    _sym = 1;
    while (input >= mult && _sym < HSYM_LEN) {
        _sym++;
        input = input / mult;
    }
    return sprintf("%.1lf%s%s", input, sep, HSYM[_sym]);
}
BEGIN {
    HSYM_LEN = split(",K,M,G,T", HSYM, ",");
}
ARGIND == 1 || ARGIND == 2 {
    num_groups_by_indice[$2]++;
    indice2group[$2,num_groups_by_indice[$2]] = $1;
}
ARGIND == 3 {
    for (i = 1; i <= num_groups_by_indice[$3]; i++) {
        group = indice2group[$3,i];
        store_size_by_group[group] += $(NF-2)
        pri_store_size_by_group[group] += $(NF-1)
    }
}
END {
    # gawk specific sort
    ordered_groups_len = asorti(store_size_by_group, ordered_groups, SIZE_SORT)
    for (i = 1; i <= ordered_groups_len; i++) {
        group = ordered_groups[i];
        store_size = store_size_by_group[group];
        pri_store_size = pri_store_size_by_group[group];
        printf("%s %s %s\n", group,
            SIZE_RAW ? store_size : (human(store_size, 1024) "B"),
            SIZE_RAW ? pri_store_size : (human(pri_store_size, 1024) "B"));
    }
}
    '   <(es-curl "_cat/aliases/${ALL:+*,.*}?expand_wildcards=all&s=a") \
        <(es-curl "_data_stream/${ALL:+*,.*}?expand_wildcards=all" |
            jq -r '.data_streams |map(.name as $ds |.indices |map(.index_name)[] |"\($ds) \(.)") []') \
        <(es-curl "_cat/indices/${ALL:+*,.*}?s=i&bytes=b") |
    grep -E "$POST_FILTER" ${POST_FILTER_INVERT:+-v} |
    column -t -o ' ' -R 2,3
}

SIZE=
SIZE_RAW=
SIZE_SORT=@ind_str_asc
ALL=${ALL:-}
WATCH=
WATCH_INTERVAL=5
WATCHING=${WATCHING:-}
POST_FILTER=${POST_FILTER:-^}
POST_FILTER_INVERT=${POST_FILTER_INVERT:-}
shopt -s extglob
while (( $# > 0 )); do
    case "$1" in
        -h|--help) exit_usage 0 ;;
        -s|--size) SIZE=1 ;;
        -r|--size-raw) SIZE_RAW=1 ;;
        -n|--size-asc) SIZE_SORT=@val_num_asc ;;
        -N|--size-desc) SIZE_SORT=@val_num_desc ;;
        -a|--all) ALL=1 ;;
        -E|--grep) POST_FILTER=$2; shift ;;
        -v|--invert-match) POST_FILTER_INVERT=1 ;;
        -w|--watch) WATCH=1; [[ ${2:0:1} == [0-9]* ]] && { WATCH_INTERVAL=$2; shift; } ;;
        -+([srnNavwh]))
            for (( i = 1; i < ${#1}; i++ )); do
                case "${1:i:1}" in
                    s) SIZE=1 ;;
                    r) SIZE_RAW=1 ;;
                    n) SIZE_SORT=@val_num_asc ;;
                    N) SIZE_SORT=@val_num_desc ;;
                    a) ALL=1 ;;
                    v) POST_FILTER_INVERT=1 ;;
                    w) WATCH=1 ;;
                    h) exit_usage 0 ;;
                esac
            done
            ;;
        *) exit_usage 1 ;;
    esac
    shift
done
shopt -u extglob

if [[ -n $WATCH ]]; then
    [[ -z ${WATCH_INTERVAL//[0-9]} ]] || exit_usage 1
    export WATCHING=1 ALL POST_FILTER POST_FILTER_INVERT
    exec watch -n "$WATCH_INTERVAL" "$0"
elif [[ -n $SIZE ]]; then
    mode_size
else
    mode_listing
fi
