Refactoring: Changed all check parameters starting with an 'o' to the new rulespec...
[check_mk.git] / agents / plugins / mtr
blob1d6816da9d76dd893dd3c3d0007052cbfc279052
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2016 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # This plugin was sponsored by BenV. Thanks!
28 # https://notes.benv.junerules.com/mtr/
30 # Concept:
31 # Read config mtr.cfg
32 # For every host:
33 # parse outstanding reports (and delete them)
34 # If current time > last check + config(time)//300 start new mtr in background
35 # MTR results are stored in $VARDIR/mtr_${host}.report
36 # return previous host data
38 import ConfigParser
39 import glob
40 import os
41 import re
42 import subprocess
43 import sys
44 import time
45 from unicodedata import normalize
47 mk_confdir = os.getenv("MK_CONFDIR") or "/etc/check_mk"
48 mk_vardir = os.getenv("MK_VARDIR") or "/var/lib/check_mk_agent"
50 config_filename = mk_confdir + "/mtr.cfg"
51 config_dir = mk_confdir + "/mtr.d/*.cfg"
52 status_filename = mk_vardir + "/mtr.state"
53 report_filepre = mk_vardir + "/mtr.report."
55 debug = '-d' in sys.argv[2:] or '--debug' in sys.argv[1:]
58 def which(program):
59 def is_exe(fpath):
60 return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
62 fpath, _fname = os.path.split(program)
63 if fpath:
64 if is_exe(program):
65 return program
66 else:
67 for path in os.environ["PATH"].split(os.pathsep):
68 exe_file = os.path.join(path, program)
69 if is_exe(exe_file):
70 return exe_file
72 return None
75 # See if we have mtr
76 mtr_prog = which('mtr')
77 if mtr_prog is None:
78 if debug:
79 sys.stdout.write("Could not find mtr binary\n")
80 sys.exit(0)
83 def read_config():
84 default_options = {
85 'type': 'icmp',
86 'count': "10",
87 'force_ipv4': "0",
88 'force_ipv6': "0",
89 'size': "64",
90 'time': "0",
91 'dns': "0",
92 'port': None,
93 'address': None,
94 'interval': None,
95 'timeout': None
97 if not os.path.exists(config_filename):
98 if debug:
99 sys.stdout.write("Not configured, %s missing\n" % config_filename)
100 sys.exit(0)
102 cfg = ConfigParser.SafeConfigParser(default_options)
103 # Let ConfigParser figure it out
104 for config_file in [config_filename] + glob.glob(config_dir):
105 try:
106 if not cfg.read(config_file):
107 sys.stdout.write("**ERROR** Failed to parse configuration file %s!\n" % config_file)
108 except Exception as e:
109 sys.stdout.write(
110 "**ERROR** Failed to parse config file %s: %s\n" % (config_file, repr(e)))
112 if len(cfg.sections()) == 0:
113 sys.stdout.write("**ERROR** Configuration defines no hosts!\n")
114 sys.exit(0)
116 return cfg
119 # structure of statusfile
120 # # HOST |LASTTIME |HOPCOUNT|HOP1|Loss%|Snt|Last|Avg|Best|Wrst|StDev|HOP2|...|HOP8|...|StdDev
121 # www.google.com|145122481|8|192.168.1.1|0.0%|10|32.6|3.6|0.3|32.6|10.2|192.168.0.1|...|9.8
122 def read_status():
123 current_status = {}
124 if not os.path.exists(status_filename):
125 return current_status
127 for line in file(status_filename):
128 try:
129 parts = line.split('|')
130 if len(parts) < 2:
131 sys.stdout.write("**ERROR** (BUG) Status has less than 2 parts:\n")
132 sys.stdout.write("%s\n" % parts)
133 continue
134 host = parts[0]
135 lasttime = int(float(parts[1]))
136 current_status[host] = {'hops': {}, 'lasttime': lasttime}
137 hops = int(parts[2])
138 for i in range(0, hops):
139 current_status[host]["hops"][i + 1] = {
140 'hopname': parts[i * 8 + 3].rstrip(),
141 'loss': parts[i * 8 + 4].rstrip(),
142 'snt': parts[i * 8 + 5].rstrip(),
143 'last': parts[i * 8 + 6].rstrip(),
144 'avg': parts[i * 8 + 7].rstrip(),
145 'best': parts[i * 8 + 8].rstrip(),
146 'wrst': parts[i * 8 + 9].rstrip(),
147 'stddev': parts[i * 8 + 10].rstrip(),
149 except Exception as e:
150 sys.stdout.write(
151 "*ERROR** (BUG) Could not parse status line: %s, reason: %s\n" % (line, repr(e)))
152 return current_status
155 def save_status(current_status):
156 f = file(status_filename, "w")
157 for host, hostdict in current_status.items():
158 hopnum = len(hostdict["hops"].keys())
159 lastreport = hostdict["lasttime"]
160 hoststring = "%s|%s|%s" % (host, lastreport, hopnum)
161 for hop in hostdict["hops"].keys():
162 hi = hostdict["hops"][hop]
163 hoststring += '|%s|%s|%s|%s|%s|%s|%s|%s' % (
164 hi['hopname'],
165 hi['loss'],
166 hi['snt'],
167 hi['last'],
168 hi['avg'],
169 hi['best'],
170 hi['wrst'],
171 hi['stddev'],
173 hoststring = hoststring.rstrip()
174 f.write("%s\n" % hoststring)
177 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.:]+')
180 def host_to_filename(host, delim=u'-'):
181 # Get rid of gibberish chars, stolen from Django
182 """Generates an slightly worse ASCII-only slug."""
183 host = unicode(host, 'UTF-8')
184 result = []
185 for word in _punct_re.split(host.lower()):
186 word = normalize('NFKD', word).encode('ascii', 'ignore')
187 if word:
188 result.append(word)
189 return unicode(delim.join(result))
192 def check_mtr_pid(pid):
193 """ Check for the existence of a unix pid and if the process matches. """
194 try:
195 os.kill(pid, 0)
196 except OSError:
197 return False # process does no longer exist
198 else:
199 pid_cmdline = "/proc/%d/cmdline" % pid
200 try:
201 return os.path.exists(pid_cmdline) and \
202 file(pid_cmdline).read().startswith("mtr\x00--report\x00--report-wide")
203 except:
204 return False # any error
207 def parse_report(host):
208 reportfile = report_filepre + host_to_filename(host)
209 if not os.path.exists(reportfile):
210 if not host in status.keys():
211 # New host
212 status[host] = {'hops': {}, 'lasttime': 0}
213 return
215 # 1451228358
216 # Start: Sun Dec 27 14:35:18 2015
217 #HOST: purple Loss% Snt Last Avg Best Wrst StDev
218 # 1.|-- 80.69.76.120 0.0% 10 0.3 0.4 0.3 0.6 0.0
219 # 2.|-- 80.249.209.100 0.0% 10 1.0 1.1 0.8 1.4 0.0
220 # 3.|-- 209.85.240.63 0.0% 10 1.3 1.7 1.1 3.6 0.5
221 # 4.|-- 209.85.253.242 0.0% 10 1.6 1.8 1.6 2.1 0.0
222 # 5.|-- 209.85.253.201 0.0% 10 4.8 5.0 4.8 5.4 0.0
223 # 6.|-- 216.239.56.6 0.0% 10 4.7 5.1 4.7 5.5 0.0
224 # 7.|-- ??? 100.0 10 0.0 0.0 0.0 0.0 0.0
225 # 8.|-- 74.125.136.147 0.0% 10 4.5 4.6 4.3 5.2 0.0
226 # See if pidfile exists and if mtr is still running
227 if os.path.exists(reportfile + ".pid"):
228 # See if it's running
229 try:
230 pid = int(file(reportfile + ".pid", 'r').readline().rstrip())
231 if check_mtr_pid(pid):
232 # Still running, we're done.
233 if not host in status.keys():
234 # New host
235 status[host] = {'hops': {}, 'lasttime': 0}
236 status[host]['running'] = True
237 return
238 except ValueError:
239 # Pid file is broken. Process probably crashed..
240 pass
241 # Done running, get rid of pid file
242 os.unlink(reportfile + ".pid")
244 # Parse the existing report
245 lines = file(reportfile).readlines()
246 if len(lines) < 3:
247 sys.stdout.write("**ERROR** Report file %s has less than 3 lines, "
248 "expecting at least 1 hop! Throwing away invalid report\n" % reportfile)
249 os.unlink(reportfile)
250 if not host in status.keys():
251 # New host
252 status[host] = {'hops': {}, 'lasttime': 0}
253 return
254 status[host] = {'hops': {}, 'lasttime': 0}
256 hopcount = 0
257 status[host]["lasttime"] = int(float(lines.pop(0)))
258 while len(lines) > 0 and not lines[0].startswith("HOST:"):
259 lines.pop(0)
260 if len(lines) < 2: # Not enough lines
261 return
262 try:
263 lines.pop(0) # Get rid of HOST: header
264 hopline = re.compile(
265 r'^\s*\d+\.') # 10.|-- 129.250.2.147 0.0% 10 325.6 315.5 310.3 325.6 5.0
266 for line in lines:
267 if not hopline.match(line):
268 continue # | `|-- 129.250.2.159
269 hopcount += 1
270 parts = line.split()
271 if len(parts) < 8:
272 sys.stdout.write("**ERROR** Bug parsing host/hop, "
273 "line has less than 8 parts: %s\n" % line)
274 continue
275 status[host]['hops'][hopcount] = {
276 'hopname': parts[1],
277 'loss': parts[2],
278 'snt': parts[3],
279 'last': parts[4],
280 'avg': parts[5],
281 'best': parts[6],
282 'wrst': parts[7],
283 'stddev': parts[8],
285 except Exception, e:
286 sys.stdout.write("**ERROR** Could not parse report file %s, "
287 "tossing away invalid data %s\n" % (reportfile, e))
288 del status[host]
289 os.unlink(reportfile)
292 def output_report(host):
293 hostdict = status.get(host)
294 if not hostdict:
295 return
297 hopnum = len(hostdict["hops"].keys())
298 lastreport = hostdict["lasttime"]
299 hoststring = "%s|%s|%s" % (host, lastreport, hopnum)
300 for hop in hostdict["hops"].keys():
301 hi = hostdict["hops"][hop]
302 hoststring += '|%s|%s|%s|%s|%s|%s|%s|%s' % (
303 hi['hopname'],
304 hi['loss'],
305 hi['snt'],
306 hi['last'],
307 hi['avg'],
308 hi['best'],
309 hi['wrst'],
310 hi['stddev'],
312 sys.stdout.write("%s\n" % hoststring)
315 def start_mtr(host):
316 options = [mtr_prog, '--report', '--report-wide']
317 pingtype = config.get(host, "type")
318 count = config.getint(host, "count")
319 ipv4 = config.getboolean(host, "force_ipv4")
320 ipv6 = config.getboolean(host, "force_ipv6")
321 size = config.getint(host, "size")
322 lasttime = config.getint(host, "time")
323 dns = config.getboolean(host, "dns")
324 port = config.get(host, "port")
325 address = config.get(host, "address")
326 interval = config.get(host, "interval")
327 timeout = config.get(host, "timeout")
329 if "running" in status[host].keys():
330 if debug:
331 sys.stdout.write("MTR for host still running, not restarting MTR!\n")
332 return
334 if time.time() - status[host]["lasttime"] < lasttime:
335 if debug:
336 sys.stdout.write("%s - %s = %s is smaller than %s => mtr run not needed yet.\n" %
337 (time.time(), status[host]["lasttime"],
338 time.time() - status[host]["lasttime"], lasttime))
339 return
341 pid = os.fork()
342 if pid > 0:
343 # Parent process, return and keep running
344 return
346 os.chdir("/")
347 os.umask(0)
348 os.setsid()
350 # Close all fd except stdin,out,err
351 for fd in range(3, 256):
352 try:
353 os.close(fd)
354 except OSError:
355 pass
357 if pingtype == 'tcp':
358 options.append("--tcp")
359 if pingtype == 'udp':
360 options.append("--udp")
361 if port is not None:
362 options.append("--port")
363 options.append(str(port))
364 if ipv4:
365 options.append("-4")
366 if ipv6:
367 options.append("-6")
368 options.append("-s")
369 options.append(str(size))
370 options.append("-c")
371 options.append(str(count))
372 if not dns:
373 options.append("--no-dns")
374 if not address is None:
375 options.append("--address")
376 options.append(str(address))
377 if not interval is None:
378 options.append("-i")
379 options.append(str(interval))
380 if not timeout is None:
381 options.append("--timeout")
382 options.append(str(timeout))
384 options.append(str(host))
385 if debug:
386 sys.stdout.write("Startin MTR: %s\n" % (" ".join(options)))
387 reportfile = report_filepre + host_to_filename(host)
388 if os.path.exists(reportfile):
389 os.unlink(reportfile)
390 report = open(reportfile, 'a+')
391 report.write(str(int(time.time())) + "\n")
392 report.flush()
393 process = subprocess.Popen(options, stdout=report, stderr=report)
394 # Write pid to report.pid
395 pidfile = open(reportfile + ".pid", 'w')
396 pidfile.write("%d\n" % process.pid)
397 pidfile.flush()
398 pidfile.close()
399 os._exit(os.EX_OK)
402 # Parse config
403 sys.stdout.write("<<<mtr:sep(124)>>>\n")
404 config = read_config()
405 status = read_status()
406 for host_name in config.sections():
407 # Parse outstanding report
408 parse_report(host_name)
409 # Output last known values
410 output_report(host_name)
411 # Start new if needed
412 start_mtr(host_name)
413 save_status(status)