GUI CSS: Deployed view styles for layouts (CMK-1171)
[check_mk.git] / checks / diskstat
blobc286c68cc9e6fd4c78c4d8c0a378823f1756d391
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # <<<diskstat>>>
28 # 1300264105
29 # 8 0 sda 691860 951191 13559915 491748 234686 197346 3359512 94944 0 56844 586312
30 # 8 32 sdb 791860 91191 23589915 491748 234686 197346 3359512 94944 0 56844 586312
32 # Newer agent output also dm-* and Veritas devices and if
33 # available the following additional information for name rewriting:
35 # <<<diskstat>>>
36 # 1338931242
37 # 8 0 sda 6142 327 219612 2244 3190 6233 74075 8206 0 6523 10446
38 # 253 0 dm-0 4579 0 181754 2343 9249 0 73960 259491 0 1208 261833
39 # 253 1 dm-1 342 0 2736 47 3 0 11796464 5016 0 5063 5063
40 # 253 2 dm-2 160 0 1274 27 11 0 56 3 0 27 30
41 # 8 16 sdb 464 858 7717 336 1033 0 311454 3899 0 3007 4231
42 # 8 32 sdc 855 13352 106777 1172 915 0 154467 2798 0 3012 3967
43 # 8 48 sdd 1217 861 109802 1646 118 0 56151 1775 0 2736 3420
44 # 8 80 sdf 359 1244 58323 792 66 0 4793 388 0 765 1178
45 # 8 64 sde 310 1242 6964 268 118 0 56151 1607 0 1307 1872
46 # 8 96 sdg 1393 1242 314835 3759 129 0 56172 1867 0 4027 5619
47 # 199 27000 VxVM27000 131 0 990 61 11 0 21 29 0 89 90
48 # 199 27001 VxVM27001 0 0 0 0 0 0 0 0 0 0 0
49 # [dmsetup_info]
50 # vg_zwei-lv_home 253:2 vg_zwei lv_home
51 # vg_zwei-lv_swap 253:1 vg_zwei lv_swap
52 # vg_zwei-lv_root 253:0 vg_zwei lv_root
53 # [vx_dsk]
54 # c7 6978 /dev/vx/dsk/datadg/lalavol
55 # c7 6979 /dev/vx/dsk/datadg/oravol
57 # output may have zeros appended
59 # 8 0 sda 111918756 929875 3960367050 349083041 20142495 1149711 1021234448 851284769 0 233177192 1197549009 0 0 0 0
60 # 8 1 sda1 226 0 27481 3388 381 3 31472 35862 0 8123 39260 0 0 0 0
61 # 8 2 sda2 111918500 929875 3960337473 349079568 20142114 1149708 1021202976 851248906 0 233176504 1197492420 0 0 0 0
62 # 253 0 dm-0 883953 0 92124097 10287533 108572 0 2251672 809814 0 7545567 11097424 0 0 0 0
63 # 253 1 dm-1 21046 0 172072 157766 164020 0 1312160 29292970 0 124138 29451007 0 0 0 0
64 # 253 2 dm-2 750714 0 19747073 7702216 1445987 0 36811608 9817313 0 7159271 17520030 0 0 0 0
66 # Fields in /proc/diskstats
67 # Index 0 -- major number
68 # Index 1 -- minor number
69 # Index 2 -- device name --> used by check
70 # Index 3 -- # of reads issued
71 # Index 4 -- # of reads merged
72 # Index 5 -- # of sectors read (a 512 Byte) --> used by check
73 # Index 6 -- # of milliseconds spent reading
74 # Index 7 -- # of writes completed
75 # Index 8 -- # of writes merged
76 # Index 9 -- # of sectors written (a 512 Byte) --> used by check
77 # Index 10 -- # of milliseconds spent writing
78 # Index 11 -- # of I/Os currently in progress
79 # Index 12 -- # of milliseconds spent doing I/Os
80 # Index 13 -- weighted # of milliseconds spent doing I/Os
82 # Convert information to generic format also generated
83 # by winperf_phydisk
84 # [ now, [( disk, readctr, writectr ), ... ]]
85 # where counters are in sectors (512 bytes)
87 # Parse /proc/diskstat and additional information into a nice canonical
88 # dictionary of the form:
89 # disks = {
90 # "hda" : {
91 # 'average_read_request_size' : 0.0,
92 # 'average_read_wait' : 0.0,
93 # 'average_request_size' : 40569.90476190476,
94 # 'average_wait' : 0.761904761904762,
95 # 'average_write_request_size' : 40569.90476190476,
96 # 'average_write_wait' : 0.0007619047619047619,
97 # 'node' : None,
98 # 'read_ios' : 0.0,
99 # 'read_throughput' : 0.0,
100 # 'latency' : 0.00038095238095238096,
101 # 'utilization' : 0.0006153846153846154,
102 # 'write_ios' : 1.6153846153846154,
103 # 'write_throughput' : 65536.0,
104 # },
105 # "LVM foobar" : {
106 # ...
110 # Returns a pair of the timestamp and that dictionary
111 # parsed = timestamp, disks
114 # Consideration for debugging purposes:
115 # Due to check_info['diskstat']['extra_sections']: ["multipath"])
116 # each info list is prefixed with '<node_name>'.
117 def parse_diskstat(info):
118 timestamp_str, proc_diskstat, name_info = diskstat_extract_name_info(info)
119 # limit diskstat to first elements before actual parsing
120 proc_diskstat = [ds[:15] for ds in proc_diskstat]
121 timestamp = int(timestamp_str)
123 # Here we discover real partitions and exclude them:
124 # Sort of partitions with disks - typical in XEN virtual setups.
125 # Eg. there are xvda1, xvda2, but no xvda...
126 device_names = [line[3] for line in proc_diskstat]
127 real_partitions = {
128 device_name for device_name in device_names
129 if diskstat_diskless_pattern.match(device_name) and re.sub('[0-9]+$', '', device_name)
131 disks = {}
132 for line in proc_diskstat:
133 if line[3] in real_partitions:
134 continue
136 node_name, major, minor, device, \
137 read_ios, _read_merges, read_sectors, read_ticks, \
138 write_ios, _write_merges, write_sectors, write_ticks, \
139 ios_in_prog, total_ticks, _rq_ticks = line
141 if (node_name, int(major), int(minor)) in name_info:
142 device = name_info[(node_name, int(major), int(minor))]
144 counter_base = "diskstat.%s." % device
146 # Some of the following computations were learned from Munin. Thanks
147 # to that project!
149 # There are 1000 ticks per second
150 # Note: we use onwrap=0.0 here because the parse function is being used also during
151 # service discovery. If we raise a counter wrap exception here, then nothing will
152 # be inventorized.
153 read_ticks_rate = get_rate(
154 counter_base + "read_ticks", timestamp, int(read_ticks), onwrap=0.0)
155 write_ticks_rate = get_rate(
156 counter_base + "write_ticks", timestamp, int(write_ticks), onwrap=0.0)
157 total_ticks_rate = get_rate(
158 counter_base + "total_ticks", timestamp, int(total_ticks), onwrap=0.0)
159 read_ios_rate = get_rate(counter_base + "read_ios", timestamp, int(read_ios), onwrap=0.0)
160 write_ios_rate = get_rate(counter_base + "write_ios", timestamp, int(write_ios), onwrap=0.0)
161 total_ios_rate = read_ios_rate + write_ios_rate
162 utilization = total_ticks_rate / 1000 # not percent, but 0...1
163 read_bytes_rate = get_rate(
164 counter_base + "read_sectors", timestamp, int(read_sectors), onwrap=0.0) * 512
165 write_bytes_rate = get_rate(
166 counter_base + "write_sectors", timestamp, int(write_sectors), onwrap=0.0) * 512
167 total_bytes_rate = read_bytes_rate + write_bytes_rate
169 # The service time is computed from the utilization. If we work
170 # e.g. 0.34 (34%) of the time and we can do 17 operations in that
171 # time then the average latency is time * 0.34 / 17
172 if total_ios_rate:
173 latency = utilization / total_ios_rate
174 average_wait = (read_ticks_rate + write_ticks_rate) / total_ios_rate / 1000.0
175 average_request_size = total_bytes_rate / total_ios_rate
176 else:
177 latency = 0.0
178 average_wait = 0.0
179 average_request_size = 0.0
181 # Average read and write rate, from end to end, including queuing, etc.
182 # and average size of one request
183 if read_ticks_rate and read_ios_rate > 0:
184 average_read_wait = read_ticks_rate / read_ios_rate / 1000.0
185 average_read_size = read_bytes_rate / read_ios_rate
186 else:
187 average_read_wait = 0.0
188 average_read_size = 0.0
190 if write_ticks_rate and write_ios_rate > 0:
191 average_write_wait = write_ticks_rate / write_ios_rate / 1000.0
192 average_write_size = write_bytes_rate / write_ios_rate
193 else:
194 average_write_wait = 0.0
195 average_write_size = 0.0
197 disks[device] = {
198 "node": node_name,
199 "read_ios": read_ios_rate,
200 "write_ios": write_ios_rate,
201 "read_throughput": read_bytes_rate,
202 "write_throughput": write_bytes_rate,
203 "utilization": utilization,
204 "latency": latency,
205 "average_request_size": average_request_size,
206 "average_wait": average_wait,
207 "average_read_wait": average_read_wait,
208 "average_read_request_size": average_read_size,
209 "average_write_wait": average_write_wait,
210 "average_write_request_size": average_write_size,
211 "queue_length": int(ios_in_prog),
214 return disks
217 ### # Index 0 -- major number
218 ### # Index 1 -- minor number
219 ### # Index 2 -- device name --> used by check
220 ### # Index 3 -- # of reads issued
221 ### # Index 4 -- # of reads merged
222 ### # Index 5 -- # of sectors read (a 512 Byte) --> used by check
223 ### # Index 6 -- # of milliseconds spent reading
224 ### # Index 7 -- # of writes completed
225 ### # Index 8 -- # of writes merged
226 ### # Index 9 -- # of sectors written (a 512 Byte) --> used by check
227 ### # Index 10 -- # of milliseconds spent writing
228 ### # Index 11 -- # of I/Os currently in progress
229 ### # Index 12 -- # of milliseconds spent doing I/Os
230 ### # Index 13 -- weighted # of milliseconds spent doing I/Os
231 ### for line in proc_diskstat:
232 ### node = line[0]
236 ### # For multipath devices use the entries for dm-?? and rename
237 ### # them with their multipath UUID/alias - and drop the according
238 ### # sdXY that belong to the paths.
239 ### multipath_name_info = {}
240 ### skipped_devices = set([])
242 ### # The generic function takes the following values per line:
243 ### # 0: None or node name
244 ### # 1: devname
245 ### # 2: read bytes counter
246 ### # 3: write bytes counter
247 ### # Optional ones:
248 ### # 4: number of reads
249 ### # 5: number of writes
250 ### # 6: timems
251 ### # 7: read queue length *counters*
252 ### # 8: write queue length *counters*
253 ### rewritten = [
254 ### ( l[0], # node name or None
255 ### diskstat_rewrite_device(name_info, multipath_name_info, l[0:4]),
256 ### int(l[6]),
257 ### int(l[10]),
258 ### int(l[4]),
259 ### int(l[8]),
260 ### # int(l[13])
261 ### ) for l in info[1:] if len(l) >= 14
262 ### ]
264 ### # Remove device mapper devices without a translated name
265 ### return [ line for line in rewritten
266 ### if not line[1].startswith("dm-")
267 ### and not line[1] in skipped_devices ]
270 # Extra additional information from diskstat section about
271 # LVM and DM devices. These information is encapsulated
272 # with [dmsetup_info] and [vx_dsk] subsections. Example for
273 # name_info:
275 # (None, 253, 0): 'LVM vg00-rootvol',
276 # (None, 253, 1): 'LVM vg00-tmpvol',
277 # (None, 253, 2): 'LVM vg00-varvol',
278 # (None, 253, 3): 'LVM vg00-optvol',
279 # (None, 253, 4): 'LVM vg00-usrvol',
280 # (None, 253, 5): 'LVM vg00-swapvol',
281 # (None, 253, 6): 'LVM vgappl-applvol',
283 def diskstat_extract_name_info(info):
284 name_info = {} # dict from (node, major, minor) to itemname
285 timestamp = None
287 info_plain = []
288 phase = 'info'
289 node = None
290 for line in info:
291 if node is None:
292 node = line[0]
294 if line[1] == '[dmsetup_info]':
295 phase = 'dmsetup_info'
296 elif line[1] == '[vx_dsk]':
297 phase = 'vx_dsk'
298 # new node in case of a cluster, restart with info phase
299 elif line[0] != node:
300 phase = 'info'
301 node = line[0]
302 else:
303 if phase == 'info':
304 if len(line) == 2:
305 timestamp = int(line[1])
306 else:
307 info_plain.append(line)
308 elif phase == 'dmsetup_info':
309 try:
310 major, minor = map(int, line[2].split(':'))
311 if len(line) == 5:
312 name = "LVM %s" % line[1]
313 else:
314 name = "DM %s" % line[1]
315 name_info[node, major, minor] = name
316 except:
317 pass # ignore such crap as "No Devices Found"
318 elif phase == 'vx_dsk':
319 major = int(line[1], 16)
320 minor = int(line[2], 16)
321 group, disk = line[3].split('/')[-2:]
322 name = "VxVM %s-%s" % (group, disk)
323 name_info[(node, major, minor)] = name
325 return timestamp, info_plain, name_info
328 def diskstat_convert_info(parsed):
329 disks, multipath_info = parsed
330 converted_disks = dict(disks.items()) # we must not modify info!
332 # If we have information about multipathing, then remove the
333 # physical path devices from the disks array. But only do this,
334 # when there are information for the multipath device available.
336 # For multipath entries: Rename the generic names like "dm-8"
337 # with multipath names like "SDataCoreSANsymphony_DAT07-fscl"
338 if multipath_info:
339 for uuid, multipath in multipath_info.items():
340 if "alias" not in multipath:
341 multipath["alias"] = ""
343 if multipath["device"] in converted_disks or \
344 "DM %s" % multipath["alias"] in converted_disks:
345 for path in multipath["paths"]:
346 if path in converted_disks:
347 del converted_disks[path]
349 if multipath["device"] in converted_disks:
350 converted_disks[uuid] = converted_disks[multipath["device"]]
351 del converted_disks[multipath["device"]]
353 if "DM %s" % multipath["alias"] in converted_disks:
354 alias = "DM %s" % multipath["alias"]
355 converted_disks[uuid] = converted_disks[alias]
356 del converted_disks[alias]
358 # Remove any left-over device mapper devices that are not part of a
359 # known multipath device, LVM device or whatever
360 for device in converted_disks.keys():
361 if device.startswith("dm-"):
362 del converted_disks[device]
364 return converted_disks
367 def inventory_diskstat(parsed):
368 converted_disks = diskstat_convert_info(parsed)
370 # Use generic diskstat inventory function that is used also for other
371 # Disk IO checks. That expects a table of (node, device, ...)
372 return inventory_diskstat_generic(
373 [(disk["node"], device) for device, disk in converted_disks.items()])
376 def check_diskstat(item, params, parsed):
377 return check_diskstat_dict(item, params, diskstat_convert_info(parsed))
380 check_info["diskstat"] = {
381 'parse_function': parse_diskstat,
382 'inventory_function': inventory_diskstat,
383 'check_function': check_diskstat,
384 'service_description': 'Disk IO %s',
385 'has_perfdata': True,
386 'group': 'diskstat',
387 "node_info": True, # add first column with actual host name
388 'includes': ["diskstat.include"],
389 'extra_sections': ["multipath"],