#!/usr/bin/lua
--
-- Copyright(C) 2024 @ ZENETYS
-- This script is licensed under MIT License (http://opensource.org/licenses/MIT)
--

require 'json'

PROGNAME = arg[0]:match('([^/]*)$')
VERBOSE = 1

DEFAULT = {
    year = os.date('%Y'),
    facility = 1,
    severity = 6,
    hostname = 'localhost',
    app_name = '-',
    procid = '-',
    msgid = '-',
    sd = '-',
    msg = ''
}

PAT_DATE_RFC3339 = '%d%d%d%d%-%d%d%-%d%dT%d%d:%d%d:%d%d[^+]*%+%d%d:%d%d'

PARSER_DATE = {
    {
        name = 'date-received/date-reported',
        match = {
            pattern = '^('..PAT_DATE_RFC3339..')/('..PAT_DATE_RFC3339..')$',
            set = { received = 1, reported = 2 },
        },
    },
    {
        name = 'date-reported',
        match = {
            pattern = '^('..PAT_DATE_RFC3339..')$',
            set = { reported = 1 },
        },
    },
}

PARSER_LEGACY_DATE = {
    {
        name = 'legacy-date',
        match = {
            fn = function (input, def, vars)
                local cap = { input:match('^(%u%l%l) ([%d ]%d) (%d%d):(%d%d):(%d%d)(.*)$') }
                local m,d,h,i,s = LEGACY_MONTH2NUM[cap[1]], tonumber(cap[2]), tonumber(cap[3]), tonumber(cap[4]), tonumber(cap[5])
                if not m then return false end
                if not d or d < 1 or d > 31 then return false end
                if not h or h < 0 or h > 23 then return false end
                if not i or i < 0 or i > 59 then return false end
                if not s or s < 0 or s > 59 then return false end
                vars._rest = cap[6]
                -- try to get a year, which may conflict with hostname...
                local y = vars._rest:match('^ (2%d%d%d) [^ ]')
                if y then vars._rest = vars._rest:sub(6) end
                vars.reported = os.date('%Y-%m-%dT%H:%M:%S%z', os.time({
                    day=d, month=m, year=(y or DEFAULT.year), hour=h, min=i, sec=s }))
                vars.reported = vars.reported:gsub('(..)$', ':%1')
                return true
            end,
        }
    }
}

function is_hostname(input)
    return input:match('^[%w_.-]+$') and
        not input:find('%.%.') and
        input:sub(1, 1) ~= '.' and
        input:sub(-1) ~= '.'
end

PARSER_HOSTNAME = {
    {
        name = 'hostname/ip',
        match = {
            pattern = '^([^/]+)/([0-9a-fA-F:.]+)',
            fn = function (input, def, vars, cap) return is_hostname(cap[1]) end,
            set = { hostname = 1, src_ip = 2 },
        },
    },
    {
        name = 'hostname',
        match = {
            fn = function (input) return is_hostname(input) end,
            set = { hostname = 0 }
        },
    },
}

PARSER_TAG = {
    {
        name = 'program[pid]',
        match = {
            pattern = '^([^%[]+)%[([^%]]+)%]$',
            set = { app_name = 1, procid = 2 },
        },
    },
    {
        name = 'program[]',
        match = {
            pattern = '^([^%[]+)%[%]$',
            set = { app_name = 1 },
        },
    },
    {
        name = 'program',
        match = {
            pattern = '^([^%[]+)$',
            set = { app_name = 1 },
        },
    },
}

-- main parsers

PARSER_MAIN_ARCHIVE_LINE_V2 = {
    -- 2023-11-30T09:35:20.411141+01:00 authpriv.info lb1/10.10.1.231 sshd 1341672 - - msg...
    -- 2023-11-30T09:35:20.411141+01:00/2018-10-31T18:00:00+01:00 authpriv.info lb1/10.10.1.231 sshd 1341672 - - msg...
    -- 2023-11-30T09:35:20.411141+01:00 authpriv.info lb1 sshd 1341672 - { "sd": "<rfc5424-sd>", "msg": "msg..." }
    -- 2023-11-30T09:35:20.411141+01:00 authpriv.info lb1 sshd 1341672 - { "msg": "multi\nlines\nmsg..." }
    name = 'archive-line-v2',
    match = {
        pattern = '^([^ ]+) ([^.]+)%.([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) (.+)$',
        fn = function (_,_,_, cap) return FACILITY_TEXT2NUM[cap[2]] ~= nil and SEVERITY_TEXT2NUM[cap[3]] ~= nil end,
        set = { _date = 1, facility = 2, severity = 3, _hostname = 4, app_name = 5, procid = 6, msgid = 7, _rest = 8 },
    },
    chain = {
        { input = '_date', parser = PARSER_DATE },
        { input = '_hostname', parser = PARSER_HOSTNAME },
        {
            input = '_rest',
            parser = {
                {
                    name = 'msg',
                    match = { fn = function (input) return input:sub(1,2) == '- ' end },
                    finalize = { fn = function (input, vars) vars.msg = input:sub(3); return true end },
                },
                {
                    name = 'json-[sd-]msg',
                    --match = { pattern = '^{.+' },
                    match = { fn = function (input) return input:sub(1,1) == '{' end },
                    finalize = {
                        fn = function (input, vars)
                            local success, data = pcall(json.decode, vars._rest);
                            if not success or not data.msg then return false end
                            vars.msg = data.msg
                            if data.sd then vars.sd = data.sd end
                            return true
                        end,
                    },
                },
            },
        },
    }
}

PARSER_MAIN_ARCHIVE_LINE_V1 = {
    -- 2018-10-31T18:00:00.189614+01:00 local0.info lb1/10.10.1.231 haproxy: msg...
    -- 2018-10-31T18:00:00.189614+01:00 local0.info lb1/10.10.1.231 haproxy[32098]: msg...
    -- 2018-10-31T18:00:00.189614+01:00 local0.info lb1 haproxy: msg...
    -- 2018-10-31T18:00:00.189614+01:00 local0.info lb1 haproxy[32098]: msg...
    -- 2018-10-31T18:00:00.189614+01:00/2018-10-31T18:00:00+01:00 local0.info lb1/10.10.1.231 haproxy: msg...
    -- 2018-10-31T18:00:00.189614+01:00/2018-10-31T18:00:00+01:00 local0.info lb1/10.10.1.231 haproxy[32098]: msg...
    -- 2018-10-31T18:00:00.189614+01:00/2018-10-31T18:00:00+01:00 local0.info lb1 haproxy: msg...
    -- 2018-10-31T18:00:00.189614+01:00/2018-10-31T18:00:00+01:00 local0.info lb1 haproxy[32098]: msg...
    name = 'archive-line-v1',
    match = {
        pattern = '^([^ ]+) ([^.]+)%.([^ ]+) ([^ ]+) ([^: ]+): (.*)$',
        fn = function (_,_,_, cap) return FACILITY_TEXT2NUM[cap[2]] ~= nil and SEVERITY_TEXT2NUM[cap[3]] ~= nil end,
        set = { _date = 1, facility = 2, severity = 3, _hostname = 4, _tag = 5, msg = 6 },
    },
    chain = {
        { input = '_date', parser = PARSER_DATE },
        { input = '_hostname', parser = PARSER_HOSTNAME },
        { input = '_tag', parser = PARSER_TAG },
    },
}

PARSER_MAIN_RSYSLOG_FileFormat = {
    -- 2024-09-16T03:55:17.235459+02:00 zvault-col-01.zenetys.loc rsyslogd-pstats: { "name": "resource-usage", ...
    name = 'rsyslog-file-format',
    match = {
        pattern = '^([^ ]+) ([^ ]+) ([^: ]+): (.*)$',
        set = { _date = 1, _hostname = 2, _tag = 3, msg = 4 },
    },
    chain = {
        { input = '_date', parser = PARSER_DATE },
        { input = '_hostname', parser = PARSER_HOSTNAME },
        { input = '_tag', parser = PARSER_TAG },
    },
}

PARSER_MAIN_RSYSLOG_TraditionalFileFormat = {
    -- Sep 17 03:40:06 zvault-col-01 systemd[157685]: Reached target Exit the Session.
    name = 'rsyslog-traditional-file-format',
    chain = {
        { parser = PARSER_LEGACY_DATE },
        {
            input = '_rest',
            parser = {
                {
                    name = 'after-legacy-date',
                    match = {
                        pattern = '^ ([^ ]+) ([^: ]+): (.*)$',
                        set = { _hostname = 1, _tag = 2, msg = 3 },
                    },
                    chain = {
                        { input = '_hostname', parser = PARSER_HOSTNAME },
                        { input = '_tag', parser = PARSER_TAG },
                    },
                },
            },
        },
    },
}

PARSER_MAIN = {
    PARSER_MAIN_ARCHIVE_LINE_V2,
    PARSER_MAIN_ARCHIVE_LINE_V1,
    PARSER_MAIN_RSYSLOG_FileFormat,
    PARSER_MAIN_RSYSLOG_TraditionalFileFormat,
}

FACILITY_TEXT2NUM = {
    kern = 0,
    user = 1,
    mail = 2,
    daemon = 3,
    auth = 4,
    syslog = 5,
    lpr = 6,
    news = 7,
    uucp = 8,
    cron = 9,
    authpriv = 10,
    ftp = 11,
    ntp = 12,
    audit = 13,
    alert = 14,
    clock = 15,
    local0 = 16,
    local1 = 17,
    local2 = 18,
    local3 = 19,
    local4 = 20,
    local5 = 21,
    local6 = 22,
    local7 = 23,
}

SEVERITY_TEXT2NUM = {
    emerg = 0,
    alert = 1,
    crit = 2,
    err = 3,
    warning = 4,
    notice = 5,
    info = 6,
    debug = 7,
}

LEGACY_MONTH2NUM = {
    Jan = 1,
    Feb = 2,
    Mar = 3,
    Apr = 4,
    May = 5,
    Jun = 6,
    Jul = 7,
    Aug = 8,
    Sep = 9,
    Oct = 10,
    Nov = 11,
    Dec = 12,
}

function perr(message)
    io.stderr:write(PROGNAME..': '..message..'\n');
    io.stderr:flush()
end

function verbose(message)
    if VERBOSE < 1 then return end
    perr(message)
end

function debug(message)
    if VERBOSE < 2 then return end
    perr(message)
end

function num2pri(facility, severity)
    return (facility << 3) + severity;
end

function date_rfc3339(ts)
    local d = os.date('%Y-%m-%dT%H:%M:%S%z', ts)
    return d:sub(1, -3)..':'..d:sub(-2)
end

function parse(input, defs, vars, is_recurse)
    function try_def_match(input, def, vars)
        local cap, verdict
        if (def.match.pattern) then
            cap = { input:match(def.match.pattern) }
            verdict = (#cap > 0)
            debug('def='..def.name..', test pattern => '..tostring(verdict));
            if not verdict then return false end
        end
        if (def.match.fn) then
            verdict = def.match.fn(input, def, vars, cap, table.unpack(def.match.args or {}))
            debug('def='..def.name..', test fn => '..tostring(verdict));
            if not verdict then return false end
        end
        if (def.match.set) then
            for kk,vv in pairs(def.match.set) do
                if vv == 0 then vars[kk] = input
                else vars[kk] = cap[vv] end
            end
        end
        return true
    end
    function try_def_finalize(input, def, vars)
        local verdict
        verdict = def.finalize.fn(input, vars, table.unpack(def.finalize.args or {}))
        debug('def='..def.name..', finalize fn => '..tostring(verdict));
        if not verdict then return false end
        return true
    end
    function try_def_chain(input, def, vars)
        local verdict
        for i,v in ipairs(def.chain) do
            verdict,_ = parse(vars[v.input] or input, v.parser, vars, true);
            debug('def='..def.name..', chain['..i..'] => '..tostring(verdict));
            if not verdict then return false end
        end
        return true
    end

    for i,def in ipairs(defs) do
        -- reset <vars> container for next main parser
        if not is_recurse then for k,_ in pairs(vars) do vars[k] = nil end end

        debug('def='..def.name..', input='..input);
        local verdict = (not def.match or try_def_match(input, def, vars)) and
            (not def.finalize or try_def_finalize(input, def, vars)) and
            (not def.chain or try_def_chain(input, def, vars))
        debug('def='..def.name..' => '..tostring(verdict)..(vars.__is_fatal and ' [fatal]' or ''));
        if verdict then return true
        elseif vars.__is_fatal then break end
    end
    return false, 'no match'
end


function process(line)
    local vars = {}
    local ret, err = parse(line, PARSER_MAIN, vars)

    debug('Parse '..(ret and 'OK' or 'ERR')..', vars='..json.encode(vars));
    if not ret then return ret, err end

    local pri = vars.pri
    if not pri then
        local facility = (vars.facility and FACILITY_TEXT2NUM[vars.facility] or DEFAULT.facility)
        local severity = (vars.severity and SEVERITY_TEXT2NUM[vars.severity] or DEFAULT.severity)
        pri = num2pri(facility, severity)
    end

    local reported = vars.reported or date_rfc3339()
    local hostname = vars.hostname or DEFAULT.hostname
    local app_name = vars.app_name or DEFAULT.app_name
    local procid = vars.procid or DEFAULT.procid
    local msgid = vars.msgid or DEFAULT.msgid
    local msg = vars.msg or DEFAULT.msg

    local sd_zlc = ''
    local src_ip = vars.src_ip or DEFAULT.src_ip
    local src_host = vars.src_host or DEFAULT.src_host
    local received = vars.received or DEFAULT.received
    local rj_id = vars.rj_id or DEFAULT.rj_id
    if src_ip then sd_zlc = sd_zlc..' srcIp="'..src_ip..'"' end
    if src_host then sd_zlc = sd_zlc..' srcHost="'..src_host..'"' end
    if received then sd_zlc = sd_zlc..' timeReceived="'..received..'"' end
    if rj_id then sd_zlc = sd_zlc..' rjId="'..rj_id..'"' end

    local sd = (vars.sd or '')
    if sd_zlc ~= '' then sd = sd..'[zlc@0'..sd_zlc..']' end
    if sd == '' then sd = DEFAULT.sd end

    io.write('<'..pri..'>1 '..reported..' '..hostname..' '..app_name..' '..procid..' '..msgid..' '..sd..' '..msg..'\n')
    io.flush()
    return true
end

function exit_usage(status)
    print('Usage: '..PROGNAME..' [option]\
Read log archives from stdin, format for reinject on stdout\n\
Available options:\
  -d, --default     Set default, eg: -d src_ip 10.109.21.13\
  -q, --quiet       Set verbose = 0\
  -v, --verbose     Increase verbose\
  -h, --help        Display this help\n\
Expected input is a stream of lines in the following format:\
  @reported/@received facility.severity hostname/ip program[pid]: message\
  @reported/@received facility.severity hostname program[pid]: message')
    os.exit(status or 0)
end

function main(arg)
    local i = 1
    while i <= #arg do
        if arg[i] == '-d' or arg[i] == '--default' then
            DEFAULT[arg[i+1]] = arg[i+2]; i = i + 2;
        elseif arg[i] == '-q' or arg[i] == '--quiet' then
            VERBOSE = 0
        elseif arg[i] == '-v' or arg[i] == '--verbose' then
            VERBOSE = VERBOSE + 1
        elseif arg[i] == '-h' or arg[i] == '--help' then
            exit_usage(0)
        else
            exit_usage(1)
        end
        i = i + 1
    end

    LOG_RJID = DEFAULT_RJID
    LOG_TYPE = DEFAULT_TYPE
    LOG_SOURCE = DEFAULT_SOURCE

    local file, iline, line = nil, 0, nil
    local perr_line = function (head)
        verbose(head..': F='..(file or '-')..' L='..iline..': '..
            (#line <= 100 and line or line:sub(1, 100)..'...'))
    end

    while true do
        line = io.read('*line')
        if line == nil then break end
        iline = iline + 1
        local status, call_err, process_err = pcall(process, line)
        if status then
            -- if status is true (no exception), call_err is the return
            -- value #1 of process(line), which is true on successful parsing,
            -- false otherwise.
            if not call_err then
                perr_line('Unsupported format! '..tostring(process_err))
            end
        else
            perr_line('BUG! '..tostring(call_err))
        end
    end
end

status, err = pcall(main, arg)
if not status then
    perr(err)
    os.exit(2)
end
