Refactoring: Changed all check parameters starting with an 'o' to the new rulespec...
[check_mk.git] / checks / mkeventd_status
blob8a1b64ba4dd0bca5fc863b4749ba6d37e216a843
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Example output from agent:
28 # <<<mkeventd_status:sep(0)>>>
29 # ["heute"]
30 # [["status_config_load_time", "status_num_open_events", "status_messages", "status_message_rate", "status_average_message_rate", "status_connects", "status_connect_rate", "status_average_connect_rate", "status_rule_tries", "status_rule_trie_rate", "status_average_rule_trie_rate", "status_drops", "status_drop_rate", "status_average_drop_rate", "status_events", "status_event_rate", "status_average_event_rate", "status_rule_hits", "status_rule_hit_rate", "status_average_rule_hit_rate", "status_average_processing_time", "status_average_request_time", "status_average_sync_time", "status_replication_slavemode", "status_replication_last_sync", "status_replication_success", "status_event_limit_host", "status_event_limit_rule", "status_event_limit_overall", "status_event_limit_active_hosts", "status_event_limit_active_rules", "status_event_limit_active_overall"], [1474040901.678517, 19, 0, 0.0, 0.0, 2, 0.1998879393337847, 0.1998879393337847, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0.002389192581176758, 0.0, "master", 0.0, false, 10, 5, 20, [], ["catch_w", "catch_y", "catch_x"], false]]
33 def parse_mkeventd_status(info):
34 import json
36 parsed, site = {}, None
37 for line in info:
38 try:
39 data = json.loads(line[0])
40 except ValueError:
41 # The agent plugin asks the event console for json OutputFormat, but
42 # older versions always provide python format - even when other format
43 # was requested. Skipping the site. Won't eval data from other systems.
44 continue
46 if len(data) == 1:
47 site = data[0]
48 parsed[site] = None # Site is marked as down until overwritten later
49 elif site:
50 # strip "status_" from the column names
51 keys = [col[7:] for col in data[0]]
52 parsed[site] = dict(zip(keys, data[1]))
54 return parsed
57 def inventory_mkeventd_status(parsed):
58 return [(site, {}) for (site, status) in parsed.items() if status is not None]
61 def check_mkeventd_status(item, params, parsed):
62 if item not in parsed:
63 return
65 status = parsed[item]
67 # Ignore down sites. This happens on a regular basis due to restarts
68 # of the core. The availability of a site is monitored with 'omd_status'.
69 if status is None:
70 yield 0, "Currently not running"
71 return
73 yield 0, "Current events: %d" % status["num_open_events"], \
74 [("num_open_events", status["num_open_events"])]
76 yield 0, "Virtual memory: %s" % \
77 get_bytes_human_readable(status["virtual_memory_size"]), \
78 [("process_virtual_size", status["virtual_memory_size"])]
80 # Event limits
81 if status["event_limit_active_overall"]:
82 yield 2, "Overall event limit active"
83 else:
84 yield 0, "Overall event limit inactive"
86 for ty in ["hosts", "rules"]:
87 limited = status["event_limit_active_%s" % ty]
88 if limited:
89 yield 1, "Event limit active for %d %s (%s)" % \
90 (len(limited), ty, ", ".join(limited))
91 else:
92 yield 0, "No %s event limit active" % ty
94 # Rates
95 columns = [
96 ("Received messages", "message", "%.2f/s"),
97 ("Rule hits", "rule_hit", "%.2f/s"),
98 ("Rule tries", "rule_trie", "%.2f/s"),
99 ("Message drops", "drop", "%.2f/s"),
100 ("Created events", "event", "%.2f/s"),
101 ("Client connects", "connect", "%.2f/s"),
103 rates = {}
104 this_time = time.time()
105 for title, col, fmt in columns:
106 counter_value = status[col + "s"]
107 rate = get_rate(col, this_time, counter_value)
108 rates[col] = rate
109 yield 0, ("%s: " + fmt) % (title, rate), \
110 [("average_%s_rate" % col, rate)]
112 # Hit rate
113 if rates["rule_trie"] == 0.0:
114 hit_rate_txt = "-"
115 else:
116 value = rates["rule_hit"] / rates["rule_trie"] * 100
117 hit_rate_txt = "%.2f%%" % value
118 yield 0, '', [("average_rule_hit_ratio", value)]
119 yield 0, "%s: %s" % ("Rule hit ratio", hit_rate_txt)
121 # Time columns
122 time_columns = [
123 ("Processing time per message", "processing"),
124 ("Time per client request", "request"),
125 ("Replication synchronization", "sync"),
127 for title, name in time_columns:
128 value = status.get("average_%s_time" % name)
129 if value:
130 txt = "%.2f ms" % (value * 1000)
131 yield 0, '', [("average_%s_time" % name, value)]
132 else:
133 if name == "sync":
134 continue # skip if not available
135 txt = "-"
136 yield 0, "%s: %s" % (title, txt)
139 check_info["mkeventd_status"] = {
140 "parse_function": parse_mkeventd_status,
141 "inventory_function": inventory_mkeventd_status,
142 "check_function": check_mkeventd_status,
143 "service_description": "OMD %s Event Console",
144 "has_perfdata": True,