#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# First generation of agents output only the process command line:
# /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6

# Second generation of agents output the user in brackets in the first columns:
# (root) /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6

# Third generation (from 1.1.5) output also virtual memory, resident memory and %CPU:
# (class,122376,88128,0.0) /usr/jre1.6.0_13/bin/java -Dn=Cart_16TH13 -Dmcs.node=zbgh1ca -Dmcs.mdt.redundan

# Forth generation (>=1.2.5), additional columns in bracket:
# (user, virtual_size, resident_size, %cpu, processID, pagefile_usage, usermodetime, kernelmodetime, openHandles, threadCount) name
# (\\KLAPPRECHNER\ab,29284,2948,0,3124,904,400576,901296,35,1)    NOTEPAD.EXE

# Sixth generation (>=1.2.7) adds an optional etime, joined by "/" with the CPU time

# The plugin "psperf.bat" is deprecated. As of version 1.2.5 all of this information
# is reported by the windows agent itself. However, we still support sections from psperf.bat
# if the agent version is lower than 1.2.5.
# Windows agent now ships a plugin "psperf.bat" that adds a section from wmic
# to the output:
# <<<ps:sep(44)>>>
# [wmic process]
# ^M
# Node,KernelModeTime,Name,PageFileUsage,ThreadCount,UserModeTime,VirtualSize,WorkingSetSize^M
# WINDOWSXP,43478281250,System Idle Process,0,2,0,0,28672^M
# WINDOWSXP,155781250,System,0,59,0,1957888,253952^M
# WINDOWSXP,468750,smss.exe,176128,3,156250,3928064,442368^M
# WINDOWSXP,56406250,csrss.exe,1863680,12,11406250,25780224,3956736^M
# WINDOWSXP,18593750,winlogon.exe,6832128,19,4843750,59314176,2686976^M
# WINDOWSXP,167500000,services.exe,1765376,16,13750000,22601728,4444160^M
# WINDOWSXP,16875000,lsass.exe,3964928,21,3906250,43462656,6647808^M
# WINDOWSXP,8750000,VBoxService.exe,1056768,8,468750,26652672,3342336^M

# New since 1.2.1i2: WATO compatible syntax
#
# Holds a list of rules which are matching hosts by names or tags and
# where each rule holds a dictionary.
#
# Each of those entries defines the following options:
#
# 1. descr:    item name to be used for the service description
# 2. match:    matching-definition
# 3. user:     user definition
# 5. perfdata: monitor with perfdata
# 4. levels:   four numbers (thresholds)
inventory_processes_rules = []

# Deprecated option since 1.6. cmk_base creates a config warning when finding rules
# for this ruleset. Can be dropped with 1.7.
inventory_processes = []

inventory_processes_perf = []
ANY_USER = None


def ps_cleanup_counters(parsed):
    # remove legacy key used for some kind of caching
    cleanup_idents = ["last.cleared.ps_"]

    pids = ps_get_current_pids(parsed)
    cleanup_idents += ps_get_counters_to_delete(pids)

    # Avoid growing up the item state with info about processes that aren't
    # executing anymore. Clean all information about process that are not
    # the ones specifically inside the current parsed agent output
    clear_item_states_by_full_keys(cleanup_idents)


# Get the idents of the counters which can be deleted because the process id of
# the counter is not found anymore in the process table.
#
# Handle these formats of idents:
# Old string based keys: 'ps_stat.pcpu.669': (1448634267.875281, 1),
# New magic keys:        ('ps', None, 'ps_wmic.kernel.692'): (1448633487.573496, 1092007),
def ps_get_counters_to_delete(pids):
    counters_to_delete = []
    for ident in get_all_item_states():
        if isinstance(ident, tuple) and ident[0] in ["ps", "ps.perf"]:
            check_ident = ident[2]
        elif not isinstance(ident, tuple) and (ident.startswith("ps_stat") or
                                               ident.startswith("ps_wmic")):
            check_ident = ident
        else:
            continue

        pid = check_ident.split(".")[-1]
        if pid.isdigit() and pid not in pids:
            counters_to_delete.append(ident)

    return counters_to_delete


def ps_get_current_pids(parsed):
    pids = set()
    for line in parsed:
        process_info = line[1]
        if process_info.process_id:
            pids.add(process_info.process_id)
    return pids


# This function is only concerned with deprecated output from psperf.bat,
# in case of all other output it just returns info unmodified. But if it is
# a windows output it will extract the number of cpu cores
def ps_merge_wmic_info(info):
    # Agent output version cmk>1.2.5
    # Assumes line = [CLUSTER, PS_INFO, COMMAND]
    has_wmic = False
    for line in info:
        if len(line) > 2 and line[2].lower() == "system idle process":
            cpu_cores = int(line[1][1:-1].split(",")[9])
            return cpu_cores, info
        if "wmic process" in line[-1]:
            has_wmic = True
            break

    # Data from other systems than windows
    if not has_wmic:
        return 1, info

    # Data from windows with wmic info, cmk<1.2.5
    return extract_wmic_info(info)


def extract_wmic_info(info):
    ps_result = []
    lines = iter(info)
    wmic_info = {}
    is_wmic = False

    while True:
        try:
            line = next(lines)
            if line[-1] == '[wmic process]':
                is_wmic = True
                wmic_headers = ["node"] + next(lines)[1:]
                continue
            elif line[-1] == '[wmic process end]':
                is_wmic = False
                continue
        except StopIteration:
            break  # Finished with all lines

        if is_wmic:
            row = dict(zip(wmic_headers, line))
            # Row might be damaged. I've seen this agent output:
            # Node - TILE-BUILDER02
            # ERROR:
            # Description = Quota violation
            #
            # Node,
            if "Name" in row and "ProcessId" in row:
                wmic_info.setdefault((row["node"], row["Name"]), []).append(row)
        else:
            ps_result.append(line)  # plain list of process names

    return merge_wmic(ps_result, wmic_info, wmic_headers)


def merge_wmic(ps_result, wmic_info, wmic_headers):
    info = []
    seen_pids = set([])  # Remove duplicate entries
    cpu_cores = 1
    for line in ps_result:
        psinfos = wmic_info.get((line[0], line[1]), [])
        if psinfos:
            psinfo = psinfos.pop()  # each info is used only once!
            # Get number of CPU cores from system idle process
            if "ThreadCount" in wmic_headers and psinfo["Name"].lower() == "system idle process":
                cpu_cores = int(psinfo["ThreadCount"])
            pid = int(psinfo["ProcessId"])
            if pid not in seen_pids:
                seen_pids.add(pid)
                virt = int(psinfo["VirtualSize"]) / 1024  # Bytes -> KB
                resi = int(psinfo["WorkingSetSize"]) / 1024  # Bytes -> KB
                pagefile = int(psinfo["PageFileUsage"]) / 1024  # Bytes -> KB
                userc = int(psinfo["UserModeTime"])  # do not resolve counter here!
                kernelc = int(psinfo["KernelModeTime"])  # do not resolve counter here!
                handlec = int(psinfo.get("HandleCount", 0))  # Only in newer psperf.bat versions
                threadc = int(psinfo["ThreadCount"])  # do not resolve counter here!
                line[1:1] = [
                    "(unknown,%d,%d,0,%d,%d,%d,%d,%d,%d,)" %
                    (virt, resi, pid, pagefile, userc, kernelc, handlec, threadc)
                ]
        info.append(line)

    return cpu_cores, info


# This mainly formats the line[1] element which contains the process info (user,...)
def ps_parse_process_entries(pre_parsed):
    parsed = []
    # line[0] = node
    # line[1] = process_info OR (if no process info available) = process name
    for line in pre_parsed:
        process_info = ps_info_tuple(line[1])
        if process_info:
            parsed_line = [line[0], process_info] + line[2:]
        else:
            # Make number of columns in line consistent for discovery/check
            parsed_line = [line[0], ps_info()] + line[1:]

        # Filter out any lines where no process command line is available, e.g.
        # [None, u'(<defunct>,,,)']
        # [None, u'(<defunct>,,,)', u'']
        if len(parsed_line) > 2 and parsed_line[2]:
            parsed.append(parsed_line)

    return parsed


# Produces a list of lists where each sub list is built as follows:
# [
#     [None, (u'root', u'35156', u'4372', u'00:00:05/2-14:14:49', u'1'), u'/sbin/init'],
# ]
# First element:  The node the data comes from in a cluster or None
# Second element: The process info tuple (see ps.include: check_ps_common() for details on the elements)
# Third element:  The process command line
def parse_ps(info):
    cpu_cores, parsed = ps_merge_wmic_info(info)
    parsed = ps_parse_process_entries(parsed)
    return cpu_cores, parsed


def inventory_ps(info):
    ps_info, ps_lnx_info = info[:2]
    if ps_lnx_info:
        parsed = parse_ps_lnx(ps_lnx_info)
    else:
        _cpu_cores, parsed = parse_ps(ps_info)
    return inventory_ps_common(inventory_processes_rules, parsed)


def check_ps(item, params, info):
    ps_info, ps_lnx_info = info[:2]
    if ps_lnx_info:
        cpu_cores, parsed = 1, parse_ps_lnx(ps_lnx_info)
    else:
        cpu_cores, parsed = parse_ps(ps_info)

    # Cleanup counters of processes which do not exist anymore
    ps_cleanup_counters(parsed)

    mem_info, solaris_mem_info, statgrab_mem_info, aix_memory_info, cpu_info = info[2:]

    # cpu_info for non windows systems
    if cpu_info and len(cpu_info[0]) == 6:
        cpu_cores = int(cpu_info[0][5])

    if mem_info:
        total_ram = parse_proc_meminfo_bytes(mem_info).get("MemTotal")
    elif solaris_mem_info:
        total_ram = solaris_mem_info.get("MemTotal") * 1024
    elif statgrab_mem_info:
        total_ram = statgrab_mem_info.get("MemTotal") * 1024
    elif aix_memory_info:
        total_ram = int(aix_memory_info[0][0]) * 4 * 1024
    else:
        total_ram = None

    return check_ps_common(item, params, parsed, cpu_cores=cpu_cores, total_ram=total_ram)


check_info['ps'] = {
    "inventory_function": inventory_ps,
    "check_function": check_ps,
    "service_description": "Process %s",
    "includes": ["ps_lnx.include", "ps.include", "mem.include"],
    "has_perfdata": True,
    "node_info": True,  # add first column with actual host name
    "group": "ps",
    "default_levels_variable": "ps_default_levels",
    "extra_sections": ["ps_lnx", "mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"],
}

# NOTE: This check is deprecated and will be removed any decade now. ps now
# does always performance data.
check_info['ps.perf'] = {
    "check_function": check_ps,
    "service_description": "Process %s",
    "includes": ["ps_lnx.include", "ps.include", "mem.include"],
    "has_perfdata": True,
    "node_info": True,  # add first column with actual host name
    "group": "ps",
    "default_levels_variable": "ps_default_levels",
    "extra_sections": ["ps_lnx", "mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"],
}
