#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2016             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# This plugin was sponsored by BenV. Thanks!
# https://notes.benv.junerules.com/mtr/

# Concept:
# Read config mtr.cfg
# For every host:
# parse outstanding reports (and delete them)
# If current time > last check + config(time)//300 start new mtr in background
#    MTR results are stored in $VARDIR/mtr_${host}.report
# return previous host data

import ConfigParser
import glob
import os
import re
import subprocess
import sys
import time
from unicodedata import normalize

mk_confdir = os.getenv("MK_CONFDIR") or "/etc/check_mk"
mk_vardir = os.getenv("MK_VARDIR") or "/var/lib/check_mk_agent"

config_filename = mk_confdir + "/mtr.cfg"
config_dir = mk_confdir + "/mtr.d/*.cfg"
status_filename = mk_vardir + "/mtr.state"
report_filepre = mk_vardir + "/mtr.report."

debug = '-d' in sys.argv[2:] or '--debug' in sys.argv[1:]


def which(program):
    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, _fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file

    return None


# See if we have mtr
mtr_prog = which('mtr')
if mtr_prog is None:
    if debug:
        sys.stdout.write("Could not find mtr binary\n")
    sys.exit(0)


def read_config():
    default_options = {
        'type': 'icmp',
        'count': "10",
        'force_ipv4': "0",
        'force_ipv6': "0",
        'size': "64",
        'time': "0",
        'dns': "0",
        'port': None,
        'address': None,
        'interval': None,
        'timeout': None
    }
    if not os.path.exists(config_filename):
        if debug:
            sys.stdout.write("Not configured, %s missing\n" % config_filename)
        sys.exit(0)

    cfg = ConfigParser.SafeConfigParser(default_options)
    # Let ConfigParser figure it out
    for config_file in [config_filename] + glob.glob(config_dir):
        try:
            if not cfg.read(config_file):
                sys.stdout.write("**ERROR** Failed to parse configuration file %s!\n" % config_file)
        except Exception, e:
            sys.stdout.write("**ERROR** Failed to parse config file %s: %s\n" %
                             (config_file, repr(e)))

    if len(cfg.sections()) == 0:
        sys.stdout.write("**ERROR** Configuration defines no hosts!\n")
        sys.exit(0)

    return cfg


# structure of statusfile
# # HOST        |LASTTIME |HOPCOUNT|HOP1|Loss%|Snt|Last|Avg|Best|Wrst|StDev|HOP2|...|HOP8|...|StdDev
# www.google.com|145122481|8|192.168.1.1|0.0%|10|32.6|3.6|0.3|32.6|10.2|192.168.0.1|...|9.8
def read_status():
    current_status = {}
    if not os.path.exists(status_filename):
        return current_status

    for line in file(status_filename):
        try:
            parts = line.split('|')
            if len(parts) < 2:
                sys.stdout.write("**ERROR** (BUG) Status has less than 2 parts:\n")
                sys.stdout.write("%s\n" % parts)
                continue
            host = parts[0]
            lasttime = int(float(parts[1]))
            current_status[host] = {'hops': {}, 'lasttime': lasttime}
            hops = int(parts[2])
            for i in range(0, hops):
                current_status[host]["hops"][i + 1] = {
                    'hopname': parts[i * 8 + 3].rstrip(),
                    'loss': parts[i * 8 + 4].rstrip(),
                    'snt': parts[i * 8 + 5].rstrip(),
                    'last': parts[i * 8 + 6].rstrip(),
                    'avg': parts[i * 8 + 7].rstrip(),
                    'best': parts[i * 8 + 8].rstrip(),
                    'wrst': parts[i * 8 + 9].rstrip(),
                    'stddev': parts[i * 8 + 10].rstrip(),
                }
        except Exception, e:
            sys.stdout.write("*ERROR** (BUG) Could not parse status line: %s, reason: %s\n" %
                             (line, repr(e)))
    return current_status


def save_status(current_status):
    f = file(status_filename, "w")
    for host, hostdict in current_status.items():
        hopnum = len(hostdict["hops"].keys())
        lastreport = hostdict["lasttime"]
        hoststring = "%s|%s|%s" % (host, lastreport, hopnum)
        for hop in hostdict["hops"].keys():
            hi = hostdict["hops"][hop]
            hoststring += '|%s|%s|%s|%s|%s|%s|%s|%s' % (
                hi['hopname'],
                hi['loss'],
                hi['snt'],
                hi['last'],
                hi['avg'],
                hi['best'],
                hi['wrst'],
                hi['stddev'],
            )
        hoststring = hoststring.rstrip()
        f.write("%s\n" % hoststring)


_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.:]+')


def host_to_filename(host, delim=u'-'):
    # Get rid of gibberish chars, stolen from Django
    """Generates an slightly worse ASCII-only slug."""
    host = unicode(host, 'UTF-8')
    result = []
    for word in _punct_re.split(host.lower()):
        word = normalize('NFKD', word).encode('ascii', 'ignore')
        if word:
            result.append(word)
    return unicode(delim.join(result))


def check_mtr_pid(pid):
    """ Check for the existence of a unix pid and if the process matches. """
    try:
        os.kill(pid, 0)
    except OSError:
        return False  # process does no longer exist
    else:
        pid_cmdline = "/proc/%d/cmdline" % pid
        try:
            return os.path.exists(pid_cmdline) and \
                   file(pid_cmdline).read().startswith("mtr\x00--report\x00--report-wide")
        except:
            return False  # any error


def parse_report(host):
    reportfile = report_filepre + host_to_filename(host)
    if not os.path.exists(reportfile):
        if not host in status.keys():
            # New host
            status[host] = {'hops': {}, 'lasttime': 0}
        return

    # 1451228358
    # Start: Sun Dec 27 14:35:18 2015
    #HOST: purple         Loss%   Snt   Last   Avg  Best  Wrst StDev
    #  1.|-- 80.69.76.120    0.0%    10    0.3   0.4   0.3   0.6   0.0
    #  2.|-- 80.249.209.100  0.0%    10    1.0   1.1   0.8   1.4   0.0
    #  3.|-- 209.85.240.63   0.0%    10    1.3   1.7   1.1   3.6   0.5
    #  4.|-- 209.85.253.242  0.0%    10    1.6   1.8   1.6   2.1   0.0
    #  5.|-- 209.85.253.201  0.0%    10    4.8   5.0   4.8   5.4   0.0
    #  6.|-- 216.239.56.6    0.0%    10    4.7   5.1   4.7   5.5   0.0
    #  7.|-- ???            100.0    10    0.0   0.0   0.0   0.0   0.0
    #  8.|-- 74.125.136.147  0.0%    10    4.5   4.6   4.3   5.2   0.0
    # See if pidfile exists and if mtr is still running
    if os.path.exists(reportfile + ".pid"):
        # See if it's running
        try:
            pid = int(file(reportfile + ".pid", 'r').readline().rstrip())
            if check_mtr_pid(pid):
                # Still running, we're done.
                if not host in status.keys():
                    # New host
                    status[host] = {'hops': {}, 'lasttime': 0}
                status[host]['running'] = True
                return
        except ValueError:
            # Pid file is broken. Process probably crashed..
            pass
        # Done running, get rid of pid file
        os.unlink(reportfile + ".pid")

    # Parse the existing report
    lines = file(reportfile).readlines()
    if len(lines) < 3:
        sys.stdout.write("**ERROR** Report file %s has less than 3 lines, "
                         "expecting at least 1 hop! Throwing away invalid report\n" % reportfile)
        os.unlink(reportfile)
        if not host in status.keys():
            # New host
            status[host] = {'hops': {}, 'lasttime': 0}
        return
    status[host] = {'hops': {}, 'lasttime': 0}

    hopcount = 0
    status[host]["lasttime"] = int(float(lines.pop(0)))
    while len(lines) > 0 and not lines[0].startswith("HOST:"):
        lines.pop(0)
    if len(lines) < 2:  # Not enough lines
        return
    try:
        lines.pop(0)  # Get rid of HOST: header
        hopline = re.compile(
            r'^\s*\d+\.')  #  10.|-- 129.250.2.147   0.0%    10  325.6 315.5 310.3 325.6   5.0
        for line in lines:
            if not hopline.match(line):
                continue  #     |  `|-- 129.250.2.159
            hopcount += 1
            parts = line.split()
            if len(parts) < 8:
                sys.stdout.write("**ERROR** Bug parsing host/hop, "
                                 "line has less than 8 parts: %s\n" % line)
                continue
            status[host]['hops'][hopcount] = {
                'hopname': parts[1],
                'loss': parts[2],
                'snt': parts[3],
                'last': parts[4],
                'avg': parts[5],
                'best': parts[6],
                'wrst': parts[7],
                'stddev': parts[8],
            }
    except Exception, e:
        sys.stdout.write("**ERROR** Could not parse report file %s, "
                         "tossing away invalid data %s\n" % (reportfile, e))
        del status[host]
    os.unlink(reportfile)


def output_report(host):
    hostdict = status.get(host)
    if not hostdict:
        return

    hopnum = len(hostdict["hops"].keys())
    lastreport = hostdict["lasttime"]
    hoststring = "%s|%s|%s" % (host, lastreport, hopnum)
    for hop in hostdict["hops"].keys():
        hi = hostdict["hops"][hop]
        hoststring += '|%s|%s|%s|%s|%s|%s|%s|%s' % (
            hi['hopname'],
            hi['loss'],
            hi['snt'],
            hi['last'],
            hi['avg'],
            hi['best'],
            hi['wrst'],
            hi['stddev'],
        )
    sys.stdout.write("%s\n" % hoststring)


def start_mtr(host):
    options = [mtr_prog, '--report', '--report-wide']
    pingtype = config.get(host, "type")
    count = config.getint(host, "count")
    ipv4 = config.getboolean(host, "force_ipv4")
    ipv6 = config.getboolean(host, "force_ipv6")
    size = config.getint(host, "size")
    lasttime = config.getint(host, "time")
    dns = config.getboolean(host, "dns")
    port = config.get(host, "port")
    address = config.get(host, "address")
    interval = config.get(host, "interval")
    timeout = config.get(host, "timeout")

    if "running" in status[host].keys():
        if debug:
            sys.stdout.write("MTR for host still running, not restarting MTR!\n")
        return

    if time.time() - status[host]["lasttime"] < lasttime:
        if debug:
            sys.stdout.write("%s - %s = %s is smaller than %s => mtr run not needed yet.\n" %
                             (time.time(), status[host]["lasttime"],
                              time.time() - status[host]["lasttime"], lasttime))
        return

    pid = os.fork()
    if pid > 0:
        # Parent process, return and keep running
        return

    os.chdir("/")
    os.umask(0)
    os.setsid()

    # Close all fd except stdin,out,err
    for fd in range(3, 256):
        try:
            os.close(fd)
        except OSError:
            pass

    if pingtype == 'tcp':
        options.append("--tcp")
    if pingtype == 'udp':
        options.append("--udp")
    if port is not None:
        options.append("--port")
        options.append(str(port))
    if ipv4:
        options.append("-4")
    if ipv6:
        options.append("-6")
    options.append("-s")
    options.append(str(size))
    options.append("-c")
    options.append(str(count))
    if not dns:
        options.append("--no-dns")
    if not address is None:
        options.append("--address")
        options.append(str(address))
    if not interval is None:
        options.append("-i")
        options.append(str(interval))
    if not timeout is None:
        options.append("--timeout")
        options.append(str(timeout))

    options.append(str(host))
    if debug:
        sys.stdout.write("Startin MTR: %s\n" % (" ".join(options)))
    reportfile = report_filepre + host_to_filename(host)
    if os.path.exists(reportfile):
        os.unlink(reportfile)
    report = open(reportfile, 'a+')
    report.write(str(int(time.time())) + "\n")
    report.flush()
    process = subprocess.Popen(options, stdout=report, stderr=report)
    # Write pid to report.pid
    pidfile = open(reportfile + ".pid", 'w')
    pidfile.write("%d\n" % process.pid)
    pidfile.flush()
    pidfile.close()
    os._exit(os.EX_OK)


# Parse config
sys.stdout.write("<<<mtr:sep(124)>>>\n")
config = read_config()
status = read_status()
for host_name in config.sections():
    # Parse outstanding report
    parse_report(host_name)
    # Output last known values
    output_report(host_name)
    # Start new if needed
    start_mtr(host_name)
save_status(status)
