Refactoring: Changed all check parameters starting with an 'o' to the new rulespec...
[check_mk.git] / checks / aws_elb
blobe9809a775729c2b8fb0f4eddf00982b8ab94a7a1
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2019 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
28 def parse_aws_elb(info):
29 return _extract_aws_metrics([
30 "RequestCount",
31 "SurgeQueueLength",
32 "SpilloverCount",
33 "Latency",
34 "HTTPCode_ELB_4XX",
35 "HTTPCode_ELB_5XX",
36 "HTTPCode_Backend_2XX",
37 "HTTPCode_Backend_3XX",
38 "HTTPCode_Backend_4XX",
39 "HTTPCode_Backend_5XX",
40 "HealthyHostCount",
41 "UnHealthyHostCount",
42 "BackendConnectionErrors",
43 ], parse_aws(info))
46 def inventory_aws_elb_generic(parsed, required_metrics):
47 for required_metric in required_metrics:
48 if required_metric not in parsed:
49 return []
50 return [(None, {})]
53 # .--statistics----------------------------------------------------------.
54 # | _ _ _ _ _ |
55 # | ___| |_ __ _| |_(_)___| |_(_) ___ ___ |
56 # | / __| __/ _` | __| / __| __| |/ __/ __| |
57 # | \__ \ || (_| | |_| \__ \ |_| | (__\__ \ |
58 # | |___/\__\__,_|\__|_|___/\__|_|\___|___/ |
59 # | |
60 # '----------------------------------------------------------------------'
62 # SpilloverCount: When the SurgeQueueLength reaches the maximum of 1,024 queued
63 # Requests, new requests are dropped, the user receives a 503 error, and the
64 # Spillover count metric is incremented. In a healthy system, this metric is
65 # Always equal to zero.
67 factory_settings['aws_elb_statistics'] = {
68 'levels_surge_queue_length': (1024, 1024),
69 'levels_spillover': (1, 1),
73 def check_aws_elb_statistics(item, params, parsed):
74 queue_length = parsed.get('SurgeQueueLength')
75 if queue_length is not None:
76 yield check_levels(
77 queue_length,
78 'aws_surge_queue_length',
79 params['levels_surge_queue_length'],
80 infoname='Surge queue length')
82 spillover = parsed.get('SpilloverCount')
83 if spillover is not None:
84 yield check_levels(
85 spillover, 'aws_spillover', params['levels_spillover'], infoname='Spillover')
88 check_info['aws_elb'] = {
89 'parse_function': parse_aws_elb,
90 'inventory_function':
91 lambda p: inventory_aws_elb_generic(p, ['RequestCount', 'SurgeQueueLength', 'SpilloverCount']),
92 'check_function': check_aws_elb_statistics,
93 'service_description': 'AWS/ELB Statistics',
94 'includes': ['aws.include'],
95 'has_perfdata': True,
96 'default_levels_variable': 'aws_elb_statistics',
97 'group': 'aws_elb_statistics',
101 # .--latency-------------------------------------------------------------.
102 # | _ _ |
103 # | | | __ _| |_ ___ _ __ ___ _ _ |
104 # | | |/ _` | __/ _ \ '_ \ / __| | | | |
105 # | | | (_| | || __/ | | | (__| |_| | |
106 # | |_|\__,_|\__\___|_| |_|\___|\__, | |
107 # | |___/ |
108 # '----------------------------------------------------------------------'
111 def check_aws_elb_latency(item, params, parsed):
112 latency = parsed.get("Latency")
113 if latency is not None:
114 yield check_levels(
115 latency,
116 'server_latency',
117 params.get('levels_latency'),
118 human_readable_func=get_age_human_readable,
119 infoname="Latency")
122 check_info['aws_elb.latency'] = {
123 'inventory_function': lambda p: inventory_aws_elb_generic(p, ['Latency']),
124 'check_function': check_aws_elb_latency,
125 'service_description': 'AWS/ELB Latency',
126 'has_perfdata': True,
127 'group': 'aws_elb_latency',
131 # .--HTTP ELB------------------------------------------------------------.
132 # | _ _ _____ _____ ____ _____ _ ____ |
133 # | | | | |_ _|_ _| _ \ | ____| | | __ ) |
134 # | | |_| | | | | | | |_) | | _| | | | _ \ |
135 # | | _ | | | | | | __/ | |___| |___| |_) | |
136 # | |_| |_| |_| |_| |_| |_____|_____|____/ |
137 # | |
138 # '----------------------------------------------------------------------'
141 def check_aws_elb_http_elb(item, params, parsed):
142 now = time.time()
143 request_count = parsed.get('RequestCount')
144 if request_count is not None:
145 request_rate = get_rate('aws_elb_statistics', now, request_count)
146 yield 0, 'Requests: %s/s' % request_rate, [('requests_per_second', request_rate)]
147 else:
148 request_rate = 0
150 for http_errors_nr in ["4", "5"]:
151 http_errors = parsed.get('HTTPCode_ELB_%sXX' % http_errors_nr)
152 if http_errors is None:
153 continue
155 http_errors_rate = get_rate('aws_elb_http_backend.%sxx' % http_errors_nr, now, http_errors)
156 yield (0, '%s00-Errors: %s/s' % (http_errors_nr, http_errors_rate),
157 [('http_%sxx_rate' % http_errors_nr, http_errors_rate)])
159 try:
160 http_errors_perc = 100.0 * http_errors_rate / request_rate
161 except ZeroDivisionError:
162 pass
163 else:
164 yield check_levels(
165 http_errors_perc,
166 'http_%sxx_perc' % http_errors_nr,
167 params.get('levels_http_%sxx_perc' % http_errors_nr),
168 unit='%',
169 infoname="%s00-Errors of total requests" % http_errors_nr)
172 check_info['aws_elb.http_elb'] = {
173 'inventory_function':
174 lambda p: inventory_aws_elb_generic(p, ['RequestCount', 'HTTPCode_ELB_4XX', 'HTTPCode_ELB_5XX']),
175 'check_function': check_aws_elb_http_elb,
176 'service_description': 'AWS/ELB HTTP ELB',
177 'has_perfdata': True,
178 'group': 'aws_elb_http',
182 # .--HTTP Backend--------------------------------------------------------.
183 # | _ _ _____ _____ ____ ____ _ _ |
184 # | | | | |_ _|_ _| _ \ | __ ) __ _ ___| | _____ _ __ __| | |
185 # | | |_| | | | | | | |_) | | _ \ / _` |/ __| |/ / _ \ '_ \ / _` | |
186 # | | _ | | | | | | __/ | |_) | (_| | (__| < __/ | | | (_| | |
187 # | |_| |_| |_| |_| |_| |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |
188 # | |
189 # '----------------------------------------------------------------------'
192 def check_aws_elb_http_backend(item, params, parsed):
193 now = time.time()
194 request_count = parsed.get('RequestCount')
195 if request_count is not None:
196 request_rate = get_rate('aws_elb_statistics', now, request_count)
197 yield 0, 'Requests: %s/s' % request_rate, [('requests_per_second', request_rate)]
198 else:
199 request_rate = 0
201 for http_errors_nr in ["4", "5"]:
202 http_errors = parsed.get('HTTPCode_ELB_%sXX' % http_errors_nr)
203 if http_errors is None:
204 continue
206 http_errors_rate = get_rate('aws_elb_http_backend.%sxx' % http_errors_nr, now, http_errors)
207 yield (0, '%s00-Errors: %s/s' % (http_errors_nr, http_errors_rate),
208 [('http_%sxx_rate' % http_errors_nr, http_errors_rate)])
210 try:
211 http_errors_perc = 100.0 * http_errors_rate / request_rate
212 except ZeroDivisionError:
213 pass
214 else:
215 yield check_levels(
216 http_errors_perc,
217 'http_%sxx_perc' % http_errors_nr,
218 params.get('levels_http_%sxx_perc' % http_errors_nr),
219 unit='%',
220 infoname="%s00-Errors of total requests" % http_errors_nr)
222 http_backend_2xx = parsed.get('HTTPCode_Backend_2XX')
223 if http_backend_2xx is not None:
224 yield 0, '200-Requests: %s/s' % get_rate('aws_elb_http_backend.2xx', now, http_backend_2xx)
226 http_backend_3xx = parsed.get('HTTPCode_Backend_3XX')
227 if http_backend_3xx is not None:
228 yield 0, '300-Requests: %s/s' % get_rate('aws_elb_http_backend.3xx', now, http_backend_3xx)
231 check_info['aws_elb.http_backend'] = {
232 'inventory_function': lambda p: inventory_aws_elb_generic(p, ['RequestCount', 'HTTPCode_Backend_2XX', 'HTTPCode_Backend_3XX', 'HTTPCode_Backend_4XX', 'HTTPCode_Backend_5XX']),
233 'check_function': check_aws_elb_http_backend,
234 'service_description': 'AWS/ELB HTTP Backend',
235 'has_perfdata': True,
236 'group': 'aws_elb_http',
240 # .--Healthy hosts-------------------------------------------------------.
241 # | _ _ _ _ _ _ _ |
242 # | | | | | ___ __ _| | |_| |__ _ _ | |__ ___ ___| |_ ___ |
243 # | | |_| |/ _ \/ _` | | __| '_ \| | | | | '_ \ / _ \/ __| __/ __| |
244 # | | _ | __/ (_| | | |_| | | | |_| | | | | | (_) \__ \ |_\__ \ |
245 # | |_| |_|\___|\__,_|_|\__|_| |_|\__, | |_| |_|\___/|___/\__|___/ |
246 # | |___/ |
247 # '----------------------------------------------------------------------'
250 def check_aws_elb_healthy_hosts(item, params, parsed):
251 try:
252 healthy_hosts = int(parsed["HealthyHostCount"])
253 except (KeyError, ValueError):
254 healthy_hosts = None
256 try:
257 unhealthy_hosts = int(parsed["UnHealthyHostCount"])
258 except (KeyError, ValueError):
259 unhealthy_hosts = None
261 if healthy_hosts is not None:
262 yield 0, 'Healthy hosts: %s' % healthy_hosts
264 if unhealthy_hosts is not None:
265 yield 0, 'Unhealthy hosts: %s' % unhealthy_hosts
267 if healthy_hosts is not None and unhealthy_hosts is not None:
268 total_hosts = unhealthy_hosts + healthy_hosts
269 yield 0, 'Total: %s' % total_hosts
271 try:
272 perc = 100.0 * healthy_hosts / total_hosts
273 except ZeroDivisionError:
274 perc = None
276 if perc is not None:
277 yield check_levels(
278 perc,
279 'aws_overall_hosts_health_perc',
280 params.get('levels_overall_hosts_health_perc'),
281 human_readable_func=get_percent_human_readable,
282 infoname="Proportion of healthy hosts")
285 check_info['aws_elb.healthy_hosts'] = {
286 'inventory_function':
287 lambda p: inventory_aws_elb_generic(p, ['HealthyHostCount', 'UnHealthyHostCount']),
288 'check_function': check_aws_elb_healthy_hosts,
289 'service_description': 'AWS/ELB Healthy Hosts',
290 'group': 'aws_elb_healthy_hosts',
294 # .--Backend errors------------------------------------------------------.
295 # | ____ _ _ |
296 # | | __ ) __ _ ___| | _____ _ __ __| | |
297 # | | _ \ / _` |/ __| |/ / _ \ '_ \ / _` | |
298 # | | |_) | (_| | (__| < __/ | | | (_| | |
299 # | |____/ \__,_|\___|_|\_\___|_| |_|\__,_| |
300 # | |
301 # | |
302 # | ___ _ __ _ __ ___ _ __ ___ |
303 # | / _ \ '__| '__/ _ \| '__/ __| |
304 # | | __/ | | | | (_) | | \__ \ |
305 # | \___|_| |_| \___/|_| |___/ |
306 # | |
307 # '----------------------------------------------------------------------'
310 def check_aws_elb_backend_connection_errors(item, params, parsed):
311 now = time.time()
312 backend_connection_errors = parsed.get("BackendConnectionErrors")
313 if backend_connection_errors is not None:
314 yield check_levels(
315 get_rate('aws_elb_backend_connection_errors', now, backend_connection_errors),
316 'backend_connection_errors_rate', params.get('levels_backend_connection_errors_rate'))
319 check_info['aws_elb.backend_connection_errors'] = {
320 'inventory_function': lambda p: inventory_aws_elb_generic(p, ['BackendConnectionErrors']),
321 'check_function': check_aws_elb_backend_connection_errors,
322 'service_description': 'AWS/ELB Backend Connection Errors',
323 'has_perfdata': True,
324 'group': 'aws_elb_backend_connection_errors',