#!/usr/bin/bash
#
# meta-availability-calc - Calculate availability for a meta-indicator (group of entities)
#
# A meta-indicator aggregates multiple hosts/services using AND or OR logic:
#   - AND: all members must be available (takes worst state)
#   - OR:  at least one member must be available (takes best state)
#
# Input format (tab-separated, sorted by timestamp):
#   timestamp  entity_id  state
#
# Where:
#   timestamp - Unix epoch or ISO format
#   entity_id - Identifier for the entity (e.g., "sql", "www", "host1:HTTP")
#   state     - OK, WARNING, CRITICAL, UNKNOWN, UP, DOWN, UNREACHABLE, AVAILABLE, UNAVAILABLE
#
# Parameters:
#   -v SINCE=value   - Period start (unix epoch or ISO), required
#   -v BEFORE=value  - Period end (unix epoch or ISO), required
#   -v OP=value      - Aggregation operator: AND (default) or OR
#
# Output (tab-separated):
#   available  unavailable  unknown  availability%
#
# State hierarchy (worst to best):
#   UNAVAILABLE/CRITICAL/DOWN > UNKNOWN/UNREACHABLE > AVAILABLE/OK/WARNING/UP
#
# Examples:
#   # AND: sql ET www must both be available
#   echo -e "0\tsql\tOK\n0\twww\tCRITICAL\n300\twww\tOK\n500\tsql\tCRITICAL" | \
#     meta-availability-calc -v SINCE=0 -v BEFORE=1000 -v OP=AND
#   # Result: 200s available (300-500), 800s unavailable = 20%
#
#   # OR: sql OU www (redundancy)
#   echo -e "0\tsql\tOK\n0\twww\tCRITICAL\n300\twww\tOK\n500\tsql\tCRITICAL" | \
#     meta-availability-calc -v SINCE=0 -v BEFORE=1000 -v OP=OR
#   # Result: 1000s available (always at least one OK) = 100%
#

exec gawk "$@" '
# Convert state to numeric priority (lower = worse)
# UNAVAILABLE=0, UNKNOWN=1, AVAILABLE=2
function state_priority(state) {
    if (state == "CRITICAL" || state == "DOWN" || state == "UNAVAILABLE") {
        return 0  # unavailable
    }
    if (state == "UNKNOWN" || state == "UNREACHABLE" || state == "") {
        return 1  # unknown
    }
    # OK, WARNING, UP, AVAILABLE
    return 2  # available
}

function priority_to_category(p) {
    if (p == 0) return "unavailable"
    if (p == 1) return "unknown"
    return "available"
}

# Aggregate states using operator
# Returns priority (0=unavailable, 1=unknown, 2=available)
function aggregate_states(    e, p, result) {
    if (op == "OR") {
        # OR: take best (max priority)
        result = -1  # Start with "no data"
        for (e in current_states) {
            p = state_priority(current_states[e])
            if (result < 0 || p > result) {
                result = p
            }
        }
        return result < 0 ? 1 : result  # No data = unknown
    } else {
        # AND: take worst (min priority)
        result = -1  # Start with "no data"
        for (e in current_states) {
            p = state_priority(current_states[e])
            if (result < 0 || p < result) {
                result = p
            }
        }
        return result < 0 ? 1 : result  # No data = unknown
    }
}

function parse_timestamp(ts) {
    if (ts ~ /^[0-9]+$/) {
        return ts + 0
    }
    if (ts ~ /^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$/) {
        gsub(/[-:]/, " ", ts)
        return mktime(ts)
    }
    if (ts ~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}/) {
        gsub(/T/, " ", ts)
        gsub(/[-:]/, " ", ts)
        sub(/Z$/, "", ts)
        return mktime(ts)
    }
    return -1
}

BEGIN {
    FS = "\t"

    since = parse_timestamp(SINCE)
    before = parse_timestamp(BEFORE)

    if (since < 0 || before < 0) {
        print "ERROR: SINCE and BEFORE must be valid timestamps" > "/dev/stderr"
        exit 1
    }

    if (since >= before) {
        print "ERROR: SINCE must be before BEFORE" > "/dev/stderr"
        exit 1
    }

    # Default operator is AND
    op = (OP == "OR") ? "OR" : "AND"

    # Collect all events
    n_events = 0
}

{
    ts = parse_timestamp($1)
    if (ts < 0) {
        print "WARNING: Invalid timestamp: " $1 > "/dev/stderr"
        next
    }

    entity = $2
    state = $3

    # Track all entities seen
    all_entities[entity] = 1

    # Store event
    n_events++
    events_ts[n_events] = ts
    events_entity[n_events] = entity
    events_state[n_events] = state
}

END {
    available = 0
    unavailable = 0
    unknown = 0

    # Initialize current_states for all entities (unknown until first event)
    for (e in all_entities) {
        current_states[e] = ""
    }

    # Process events in order, tracking state changes
    # First, collect initial states (before period) and events within period

    # Pass 1: Set initial states from events before period
    for (i = 1; i <= n_events; i++) {
        if (events_ts[i] < since) {
            current_states[events_entity[i]] = events_state[i]
        }
    }

    # Collect unique timestamps within period for timeline
    n_timeline = 0
    for (i = 1; i <= n_events; i++) {
        ts = events_ts[i]
        if (ts >= since && ts < before) {
            # Check if already in timeline
            found = 0
            for (j = 1; j <= n_timeline; j++) {
                if (timeline[j] == ts) {
                    found = 1
                    break
                }
            }
            if (!found) {
                n_timeline++
                timeline[n_timeline] = ts
            }
        }
    }

    # Sort timeline (simple bubble sort, ok for small lists)
    for (i = 1; i <= n_timeline - 1; i++) {
        for (j = i + 1; j <= n_timeline; j++) {
            if (timeline[i] > timeline[j]) {
                tmp = timeline[i]
                timeline[i] = timeline[j]
                timeline[j] = tmp
            }
        }
    }

    # Process timeline
    current_ts = since
    current_agg = aggregate_states()

    for (t = 1; t <= n_timeline; t++) {
        next_ts = timeline[t]

        # Add duration for current aggregate state
        duration = next_ts - current_ts
        if (duration > 0) {
            cat = priority_to_category(current_agg)
            if (cat == "available") available += duration
            else if (cat == "unavailable") unavailable += duration
            else unknown += duration
        }

        # Apply all state changes at this timestamp
        for (i = 1; i <= n_events; i++) {
            if (events_ts[i] == next_ts) {
                current_states[events_entity[i]] = events_state[i]
            }
        }

        # Recalculate aggregate
        current_agg = aggregate_states()
        current_ts = next_ts
    }

    # Add final duration (from last event to period end)
    duration = before - current_ts
    if (duration > 0) {
        cat = priority_to_category(current_agg)
        if (cat == "available") available += duration
        else if (cat == "unavailable") unavailable += duration
        else unknown += duration
    }

    # Calculate availability percentage
    counted = available + unavailable
    if (counted > 0) {
        availability = available * 100.0 / counted
    } else {
        availability = 100.0
    }

    printf "%d\t%d\t%d\t%.4f\n", available, unavailable, unknown, availability
}
'
