#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# EXAMPLE DATA FROM: WDC SSC-D0128SC-2100
#<<<smart>>>
#/dev/sda ATA WDC_SSC-D0128SC-   1 Raw_Read_Error_Rate     0x000b   100   100   050    Pre-fail  Always       -       16777215
#/dev/sda ATA WDC_SSC-D0128SC-   3 Spin_Up_Time            0x0007   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC-   5 Reallocated_Sector_Ct   0x0013   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC-   7 Seek_Error_Rate         0x000b   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC-   9 Power_On_Hours          0x0012   100   100   000    Old_age   Always       -       1408
#/dev/sda ATA WDC_SSC-D0128SC-  10 Spin_Retry_Count        0x0013   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC-  12 Power_Cycle_Count       0x0012   100   100   000    Old_age   Always       -       523
#/dev/sda ATA WDC_SSC-D0128SC- 168 Unknown_Attribute       0x0012   100   100   000    Old_age   Always       -       1
#/dev/sda ATA WDC_SSC-D0128SC- 175 Program_Fail_Count_Chip 0x0003   100   100   010    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC- 192 Power-Off_Retract_Count 0x0012   100   100   000    Old_age   Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC- 194 Temperature_Celsius     0x0022   040   100   000    Old_age   Always       -       40 (Lifetime Min/Max 30/60)
#/dev/sda ATA WDC_SSC-D0128SC- 197 Current_Pending_Sector  0x0012   100   100   000    Old_age   Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC- 240 Head_Flying_Hours       0x0013   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC- 170 Unknown_Attribute       0x0003   100   100   010    Pre-fail  Always       -       1769478
#/dev/sda ATA WDC_SSC-D0128SC- 173 Unknown_Attribute       0x0012   100   100   000    Old_age   Always       -       4217788040605

# TODO: Need to completely rework smart check. Use IDs instead of changing
# descriptions! But be careful: There is no standard neither for IDs nor for
# descriptions. Only use those, which are common sense.

factory_settings["smart_temp_default_levels"] = {"levels": (35, 40)}

smart_stats_default_levels = {
    'realloc_events': (1, 1),
    'realloc_sectors': (1, 1),
    'spin_retries': (1, 1),
    'pending_retries': (1, 1),
    'pending_sectors': (1, 1),
    'cmd_timeouts': (5, 10),
    'e2e_errs': (1, 1),
    'uncorr_errs': (1, 1),
    'udma_crcs': (1, 1),
}


def parse_smart_raw_values(info):
    disks = {}
    disk_name = None

    for line in info:
        if len(line) >= 13:
            if line[0] != disk_name:
                disk_name = line[0]
                disk = {}
                disks[disk_name] = disk

            field = line[4]
            if field != "Unknown_Attribute":
                value = saveint(line[12])
                disk[field] = value
        # nvme
        elif 3 <= len(line) <= 6:
            if "/dev" in line[0]:
                disk_name = line[0]
                disk = {}
                disks[disk_name] = disk

            else:
                field, value = [e.strip() for e in " ".join(line).split(":")]
                value = value.replace("%", "").replace(".", "").replace(",", "")
                if field == "Temperature":
                    value = value.split()[0]
                if field == "Critical Warning":
                    value = int(value, 16)
                if field == "Data Units Read":
                    value = (int(value.split()[0]) * 512000)
                if field == "Data Units Written":
                    value = (int(value.split()[0]) * 512000)
                disk[field.replace(" ", "_")] = saveint(value)

    return disks


def parse_smart_normalized_values(info):
    disks = {}
    disk_name = None

    for line in info:
        if len(line) >= 13:
            if line[0] != disk_name:
                disk_name = line[0]
                disk = {}
                disks[disk_name] = disk

            field = line[4]
            if field != "Unknown_Attribute":
                try:
                    value = int(line[6])
                except ValueError:
                    disk[field] = None, None
                    continue
                if isinstance(line[8], int):
                    threshold = int(line[8])
                else:
                    threshold = None
                disk[field] = value, threshold
    return disks


smart_stats_fields = [
    'Reallocated_Sector_Ct',
    'Spin_Retry_Count',
    'Reallocated_Event_Count',
    'Current_Pending_Sector',
    'Command_Timeout',
    'End-to-End_Error',
    'Reported_Uncorrect',
    'Uncorrectable_Error_Cnt',
    'UDMA_CRC_Error_Count',
    'CRC_Error_Count',
    #nvme
    'Critical_Warning',
]


def inventory_smart_stats(info):
    disks = parse_smart_raw_values(info)
    inventory = []
    for disk_name, disk in disks.items():
        for field in disk.keys():
            if field in smart_stats_fields:  # found at least one interesting field
                cleaned = dict([(f, disk[f]) for f in smart_stats_fields if f in disk])
                inventory.append((disk_name, cleaned))
                break
    return inventory


def check_smart_stats(item, params, info):
    # params is a snapshot of all counters at the point of time of inventory

    disks = parse_smart_raw_values(info)
    normalized = parse_smart_normalized_values(info)

    if item not in disks:
        return
    disk = disks[item]

    for unit, field, descr in [
        (' hours', 'Power_On_Hours', 'Powered on'),
        ('', 'Power_Cycle_Count', 'Power cycles'),
        ('', 'Reallocated_Sector_Ct', 'Reallocated sectors'),
        ('', 'Reallocated_Event_Count', 'Reallocated events'),
        ('', 'Spin_Retry_Count', 'Spin retries'),
        ('', 'Current_Pending_Sector', 'Pending sectors'),
        ('', 'Command_Timeout', 'Command timeouts'),
        ('', 'End-to-End_Error', 'End-to-End errors'),
        ('', 'Reported_Uncorrect', 'Uncorrectable errors'),
        ('', 'Uncorrectable_Error_Cnt', 'Uncorrectable errors'),
        ('', 'UDMA_CRC_Error_Count', 'UDMA CRC errors'),
        ('', 'CRC_Error_Count', 'UDMA CRC errors'),
            #nvme
        ('', 'Power_Cycles', 'Power cycles'),
        ('', 'Critical_Warning', 'Critical warning'),
        ('%', 'Available_Spare', 'Available spare'),
        ('%', 'Percentage_Used', 'Percentage used'),
        ('', 'Media_and_Data_Integrity_Errors', 'Media and data integrity errors'),
        ('', 'Error_Information_Log_Entries', 'Error information log entries'),
        ('', 'Data_Units_Read', 'Data units read'),
        ('', 'Data_Units_Written', 'Data units written'),
    ]:
        value = disk.get(field)
        if value is None:
            continue

        if field.startswith("Data_Units_"):
            infotext = "%s: %s%s" % (descr, get_bytes_human_readable(value), unit)
        else:
            infotext = "%s: %d%s" % (descr, value, unit)

        if field == "Available_Spare":
            ref_value = int(disk.get("Available_Spare_Threshold"))
        else:
            ref_value = params.get(field)

        if ref_value is None:
            yield 0, infotext, [(field, value)]
            continue

        if field == "Available_Spare":
            state = 2 if value < ref_value else 0
            hints = ["during discovery: %d (!!)" % ref_value] if value < ref_value else []
        else:
            state = 2 if value > ref_value else 0
            hints = ["during discovery: %d (!!)" % ref_value] if value > ref_value else []

        # For reallocated event counts we experienced to many reported errors for disks
        # which still seem to be OK. The raw value increased by a small amount but the
        # aggregated value remained at it's initial/ok state. So we use the aggregated
        # value now. Only for this field.
        if field == "Reallocated_Event_Count":
            norm_value, norm_threshold = normalized[item][field]
            if norm_value is None:
                yield 0, infotext, [(field, value)]
                continue
            hints.append("normalized value: %d" % norm_value)
            if norm_value <= norm_threshold:
                state = 2
                hints[-1] += " (!!)"

        yield state, infotext + " (%s)" % ', '.join(hints) if hints else infotext, [(field, value)]


check_info["smart.stats"] = {
    'check_function': check_smart_stats,
    'inventory_function': inventory_smart_stats,
    'has_perfdata': True,
    'service_description': 'SMART %s Stats',
}


def inventory_smart_temp(info):
    disks = parse_smart_raw_values(info)
    for disk_name, disk in disks.iteritems():
        if "Temperature_Celsius" in disk or \
           "Temperature_Internal" in disk or \
           "Temperature" in disk:
            yield disk_name, {}


def check_smart_temp(item, params, info):
    disks = parse_smart_raw_values(info)
    data = disks.get(item)
    if data is None:
        return

    if "Temperature_Celsius" in data:
        temperature = data["Temperature_Celsius"]
    elif "Temperature_Internal" in data:
        temperature = data["Temperature_Internal"]
    elif "Temperature" in data:
        temperature = data["Temperature"]
    else:
        return

    return check_temperature(temperature, params, "smart_%s" % item)


check_info["smart.temp"] = {
    'check_function': check_smart_temp,
    'inventory_function': inventory_smart_temp,
    'service_description': 'Temperature SMART %s',
    'group': 'temperature',
    'has_perfdata': True,
    'includes': ['temperature.include'],
    'default_levels_variable': "smart_temp_default_levels"
}
