#!/usr/bin/bash

if [[ $(id -un) != zlccli ]]; then
    exec sudo -u zlccli -n "$0" "$@"
    # unreachable
fi

(( XDEBUG )) || [[ $1 == --x-debug ]] && set -x

[[ -f /etc/profile.d/logcenter-path.sh ]] && source /etc/profile.d/logcenter-path.sh

set -f
umask 0077
PROGNAME=${0##*/}

# Database configuration
DB_DIR="${DB_DIR:-$HOME/stats}"
DB_FILE="$DB_DIR/indices.db"

# Create database directory if it doesn't exist
mkdir -p "$DB_DIR"

# Initialize SQLite database
init_database() {
    sqlite3 "$DB_FILE" <<EOF
    CREATE TABLE IF NOT EXISTS host_hour_data (
        indice TEXT,
        host TEXT,
        timestamp TEXT,
        doc_count INTEGER,
        size_bytes INTEGER
    );

    CREATE UNIQUE INDEX IF NOT EXISTS idx_host_hour_data_pk ON host_hour_data(indice,host,timestamp);
    CREATE INDEX IF NOT EXISTS idx_host_hour_data_indice ON host_hour_data(indice);
    CREATE INDEX IF NOT EXISTS idx_host_hour_data_host ON host_hour_data(host);
    CREATE INDEX IF NOT EXISTS idx_host_hour_data_timestamp ON host_hour_data(timestamp);
EOF
}

compute-metrics() {
    awk '{COUNT+=$7;SIZE+=$9}END{print COUNT,SIZE,(COUNT>0?int(SIZE/COUNT+1):0)}'
}

# Function to build Elasticsearch query dynamically
build_es_query() {
    local start_time=$1; shift
    local end_time=$1; shift

    # Start building the query
    local query="{"

    printf -v query '"aggs": {"per_hour": {"date_histogram": {"field": "@timestamp", "calendar_interval": "1h"}}}'
    printf -v query '"aggs": {"per_hosts": {"terms": {"field": "host.ip", "size": 1000}, %s}}' "$query"
    printf -v query '"aggs": {"per_index": {"terms": {"field": "_index", "size": 1000}, %s}}' "$query"
    # Add time filter if requested
    if [[ "$start_time" ]]; then
        printf -v query '"query": {"range": {"@timestamp": {"gte": "%s","lt": "%s||/h"}}}, %s' "$start_time" "$end_time" "$query"
    fi

    echo "{$query}"
}

JQ_HOST_HOUR='.aggregations
    | .per_index.buckets[] | .key as $index
    | .per_hosts.buckets[] | .key as $host
    | .per_hour.buckets[]
    | [$index, $host, .key_as_string, .doc_count, (.doc_count * $size_per_entry)]
'

docs-size-per-host-per-hour() {
    local indice=$1; shift
    local size_per_entry=$1; shift
    local start_time=$1; shift
    local end_time=$1; shift

    # Build query dynamically
    local query=$(build_es_query "$start_time" "$end_time")

    es-curl "$indice-*/_search?size=0" -d "$query" |
        jq --argjson size_per_entry "$size_per_entry" -r "$JQ_HOST_HOUR | @tsv"
}

# Initialize the database
init_database

# Get the last timestamp from database
last_timestamp=$(sqlite3 "$DB_FILE" "SELECT MAX(timestamp) FROM host_hour_data;")

# Set time range for filtering
if [[ "$last_timestamp" ]]; then
    # Use the last timestamp as start time
    start_time=$(date --utc -d "$last_timestamp" +"%Y-%m-%dT%H:%M:%S.000Z")
    echo "# Using last log timestamp: $last_timestamp" >&2

    # End time is current hour minus 1 hour (to ensure complete data)
    end_time=$(date --utc +"%Y-%m-%dT%H:%M:%S.000Z")
    echo "# Time range: $start_time to $end_time" >&2
fi

# Get indices data
read -d '' output < <(es-curl /_cat/indices?bytes=b)
indices=( $(sed -rns 's/^\w+ \w+ (\.ds-logs-[^ ]*) .*/\1/p'<<<"$output" | sort -u) )

(( DEBUG )) && echo "# INDICES: ${indices[*]}" >&2

# Delete entries for indices that no longer exist
echo "# Cleaning up obsolete indices from database..." >&2

# Build SQL DELETE statement with indices list
if [ ${#indices[@]} -gt 0 ]; then
    # Create SQL IN clause with all current indices
    indices_sql="'$(echo "${indices[*]}" | sed "s/ /','/g")'"

    # Delete entries for indices not in the current list
    sqlite3 "$DB_FILE" "DELETE FROM host_hour_data WHERE indice NOT IN ($indices_sql);"

    # Report number of deleted rows
    deleted_count=$(sqlite3 "$DB_FILE" "SELECT changes();")
    echo "# Deleted $deleted_count entries for obsolete indices" >&2
else
    echo "# Warning: No indices found, skipping cleanup" >&2
fi

# build indice prefix
indices=( $(sed -rns 's/^\w+ \w+ (\.ds-logs-.*)-....\...\...-.......*/\1/p'<<<"$output" | sort -u) )

echo "# INDICES by prefix: ${indices[*]}" >&2

# Process each index
for indice in ${indices[@]}; do
    metrics=( $(grep -E "^\\w+ \\w+ $indice-"<<<"$output" | compute-metrics) )

    # Get and store host-hour data with or without time filter
    docs-size-per-host-per-hour "$indice" "${metrics[2]}" "$start_time" "$end_time" |
        while read -r i h t d s; do
            printf "REPLACE INTO host_hour_data (indice,host,timestamp,doc_count,size_bytes) VALUES (%s,%s,%s,%d,%d);\n" \
                "'${i//\'/\'\'}'" "'${h//\'/\'\'}'" "'${t//\'/\'\'}'" "$d" "$s"
        done |
        sqlite3 "$DB_FILE"

    echo "# METRICS($indice): ${metrics[*]}" >&2
done
