2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
29 # 8 0 sda 691860 951191 13559915 491748 234686 197346 3359512 94944 0 56844 586312
30 # 8 32 sdb 791860 91191 23589915 491748 234686 197346 3359512 94944 0 56844 586312
32 # Newer agent output also dm-* and Veritas devices and if
33 # available the following additional information for name rewriting:
37 # 8 0 sda 6142 327 219612 2244 3190 6233 74075 8206 0 6523 10446
38 # 253 0 dm-0 4579 0 181754 2343 9249 0 73960 259491 0 1208 261833
39 # 253 1 dm-1 342 0 2736 47 3 0 11796464 5016 0 5063 5063
40 # 253 2 dm-2 160 0 1274 27 11 0 56 3 0 27 30
41 # 8 16 sdb 464 858 7717 336 1033 0 311454 3899 0 3007 4231
42 # 8 32 sdc 855 13352 106777 1172 915 0 154467 2798 0 3012 3967
43 # 8 48 sdd 1217 861 109802 1646 118 0 56151 1775 0 2736 3420
44 # 8 80 sdf 359 1244 58323 792 66 0 4793 388 0 765 1178
45 # 8 64 sde 310 1242 6964 268 118 0 56151 1607 0 1307 1872
46 # 8 96 sdg 1393 1242 314835 3759 129 0 56172 1867 0 4027 5619
47 # 199 27000 VxVM27000 131 0 990 61 11 0 21 29 0 89 90
48 # 199 27001 VxVM27001 0 0 0 0 0 0 0 0 0 0 0
50 # vg_zwei-lv_home 253:2 vg_zwei lv_home
51 # vg_zwei-lv_swap 253:1 vg_zwei lv_swap
52 # vg_zwei-lv_root 253:0 vg_zwei lv_root
54 # c7 6978 /dev/vx/dsk/datadg/lalavol
55 # c7 6979 /dev/vx/dsk/datadg/oravol
57 # output may have zeros appended
59 # 8 0 sda 111918756 929875 3960367050 349083041 20142495 1149711 1021234448 851284769 0 233177192 1197549009 0 0 0 0
60 # 8 1 sda1 226 0 27481 3388 381 3 31472 35862 0 8123 39260 0 0 0 0
61 # 8 2 sda2 111918500 929875 3960337473 349079568 20142114 1149708 1021202976 851248906 0 233176504 1197492420 0 0 0 0
62 # 253 0 dm-0 883953 0 92124097 10287533 108572 0 2251672 809814 0 7545567 11097424 0 0 0 0
63 # 253 1 dm-1 21046 0 172072 157766 164020 0 1312160 29292970 0 124138 29451007 0 0 0 0
64 # 253 2 dm-2 750714 0 19747073 7702216 1445987 0 36811608 9817313 0 7159271 17520030 0 0 0 0
66 # Fields in /proc/diskstats
67 # Index 0 -- major number
68 # Index 1 -- minor number
69 # Index 2 -- device name --> used by check
70 # Index 3 -- # of reads issued
71 # Index 4 -- # of reads merged
72 # Index 5 -- # of sectors read (a 512 Byte) --> used by check
73 # Index 6 -- # of milliseconds spent reading
74 # Index 7 -- # of writes completed
75 # Index 8 -- # of writes merged
76 # Index 9 -- # of sectors written (a 512 Byte) --> used by check
77 # Index 10 -- # of milliseconds spent writing
78 # Index 11 -- # of I/Os currently in progress
79 # Index 12 -- # of milliseconds spent doing I/Os
80 # Index 13 -- weighted # of milliseconds spent doing I/Os
82 # Convert information to generic format also generated
84 # [ now, [( disk, readctr, writectr ), ... ]]
85 # where counters are in sectors (512 bytes)
87 # Parse /proc/diskstat and additional information into a nice canonical
88 # dictionary of the form:
91 # 'average_read_request_size' : 0.0,
92 # 'average_read_wait' : 0.0,
93 # 'average_request_size' : 40569.90476190476,
94 # 'average_wait' : 0.761904761904762,
95 # 'average_write_request_size' : 40569.90476190476,
96 # 'average_write_wait' : 0.0007619047619047619,
99 # 'read_throughput' : 0.0,
100 # 'latency' : 0.00038095238095238096,
101 # 'utilization' : 0.0006153846153846154,
102 # 'write_ios' : 1.6153846153846154,
103 # 'write_throughput' : 65536.0,
110 # Returns a pair of the timestamp and that dictionary
111 # parsed = timestamp, disks
114 # Consideration for debugging purposes:
115 # Due to check_info['diskstat']['extra_sections']: ["multipath"])
116 # each info list is prefixed with '<node_name>'.
117 def parse_diskstat(info
):
118 timestamp_str
, proc_diskstat
, name_info
= diskstat_extract_name_info(info
)
119 # limit diskstat to first elements before actual parsing
120 proc_diskstat
= [ds
[:15] for ds
in proc_diskstat
]
121 timestamp
= int(timestamp_str
)
123 # Here we discover real partitions and exclude them:
124 # Sort of partitions with disks - typical in XEN virtual setups.
125 # Eg. there are xvda1, xvda2, but no xvda...
126 device_names
= [line
[3] for line
in proc_diskstat
]
128 device_name
for device_name
in device_names
129 if diskstat_diskless_pattern
.match(device_name
) and re
.sub('[0-9]+$', '', device_name
)
132 for line
in proc_diskstat
:
133 if line
[3] in real_partitions
:
136 node_name
, major
, minor
, device
, \
137 read_ios
, _read_merges
, read_sectors
, read_ticks
, \
138 write_ios
, _write_merges
, write_sectors
, write_ticks
, \
139 ios_in_prog
, total_ticks
, _rq_ticks
= line
141 if (node_name
, int(major
), int(minor
)) in name_info
:
142 device
= name_info
[(node_name
, int(major
), int(minor
))]
144 counter_base
= "diskstat.%s." % device
146 # Some of the following computations were learned from Munin. Thanks
149 # There are 1000 ticks per second
150 # Note: we use onwrap=0.0 here because the parse function is being used also during
151 # service discovery. If we raise a counter wrap exception here, then nothing will
153 read_ticks_rate
= get_rate(
154 counter_base
+ "read_ticks", timestamp
, int(read_ticks
), onwrap
=0.0)
155 write_ticks_rate
= get_rate(
156 counter_base
+ "write_ticks", timestamp
, int(write_ticks
), onwrap
=0.0)
157 total_ticks_rate
= get_rate(
158 counter_base
+ "total_ticks", timestamp
, int(total_ticks
), onwrap
=0.0)
159 read_ios_rate
= get_rate(counter_base
+ "read_ios", timestamp
, int(read_ios
), onwrap
=0.0)
160 write_ios_rate
= get_rate(counter_base
+ "write_ios", timestamp
, int(write_ios
), onwrap
=0.0)
161 total_ios_rate
= read_ios_rate
+ write_ios_rate
162 utilization
= total_ticks_rate
/ 1000 # not percent, but 0...1
163 read_bytes_rate
= get_rate(
164 counter_base
+ "read_sectors", timestamp
, int(read_sectors
), onwrap
=0.0) * 512
165 write_bytes_rate
= get_rate(
166 counter_base
+ "write_sectors", timestamp
, int(write_sectors
), onwrap
=0.0) * 512
167 total_bytes_rate
= read_bytes_rate
+ write_bytes_rate
169 # The service time is computed from the utilization. If we work
170 # e.g. 0.34 (34%) of the time and we can do 17 operations in that
171 # time then the average latency is time * 0.34 / 17
173 latency
= utilization
/ total_ios_rate
174 average_wait
= (read_ticks_rate
+ write_ticks_rate
) / total_ios_rate
/ 1000.0
175 average_request_size
= total_bytes_rate
/ total_ios_rate
179 average_request_size
= 0.0
181 # Average read and write rate, from end to end, including queuing, etc.
182 # and average size of one request
183 if read_ticks_rate
and read_ios_rate
> 0:
184 average_read_wait
= read_ticks_rate
/ read_ios_rate
/ 1000.0
185 average_read_size
= read_bytes_rate
/ read_ios_rate
187 average_read_wait
= 0.0
188 average_read_size
= 0.0
190 if write_ticks_rate
and write_ios_rate
> 0:
191 average_write_wait
= write_ticks_rate
/ write_ios_rate
/ 1000.0
192 average_write_size
= write_bytes_rate
/ write_ios_rate
194 average_write_wait
= 0.0
195 average_write_size
= 0.0
199 "read_ios": read_ios_rate
,
200 "write_ios": write_ios_rate
,
201 "read_throughput": read_bytes_rate
,
202 "write_throughput": write_bytes_rate
,
203 "utilization": utilization
,
205 "average_request_size": average_request_size
,
206 "average_wait": average_wait
,
207 "average_read_wait": average_read_wait
,
208 "average_read_request_size": average_read_size
,
209 "average_write_wait": average_write_wait
,
210 "average_write_request_size": average_write_size
,
211 "queue_length": int(ios_in_prog
),
217 ### # Index 0 -- major number
218 ### # Index 1 -- minor number
219 ### # Index 2 -- device name --> used by check
220 ### # Index 3 -- # of reads issued
221 ### # Index 4 -- # of reads merged
222 ### # Index 5 -- # of sectors read (a 512 Byte) --> used by check
223 ### # Index 6 -- # of milliseconds spent reading
224 ### # Index 7 -- # of writes completed
225 ### # Index 8 -- # of writes merged
226 ### # Index 9 -- # of sectors written (a 512 Byte) --> used by check
227 ### # Index 10 -- # of milliseconds spent writing
228 ### # Index 11 -- # of I/Os currently in progress
229 ### # Index 12 -- # of milliseconds spent doing I/Os
230 ### # Index 13 -- weighted # of milliseconds spent doing I/Os
231 ### for line in proc_diskstat:
236 ### # For multipath devices use the entries for dm-?? and rename
237 ### # them with their multipath UUID/alias - and drop the according
238 ### # sdXY that belong to the paths.
239 ### multipath_name_info = {}
240 ### skipped_devices = set([])
242 ### # The generic function takes the following values per line:
243 ### # 0: None or node name
245 ### # 2: read bytes counter
246 ### # 3: write bytes counter
248 ### # 4: number of reads
249 ### # 5: number of writes
251 ### # 7: read queue length *counters*
252 ### # 8: write queue length *counters*
254 ### ( l[0], # node name or None
255 ### diskstat_rewrite_device(name_info, multipath_name_info, l[0:4]),
261 ### ) for l in info[1:] if len(l) >= 14
264 ### # Remove device mapper devices without a translated name
265 ### return [ line for line in rewritten
266 ### if not line[1].startswith("dm-")
267 ### and not line[1] in skipped_devices ]
270 # Extra additional information from diskstat section about
271 # LVM and DM devices. These information is encapsulated
272 # with [dmsetup_info] and [vx_dsk] subsections. Example for
275 # (None, 253, 0): 'LVM vg00-rootvol',
276 # (None, 253, 1): 'LVM vg00-tmpvol',
277 # (None, 253, 2): 'LVM vg00-varvol',
278 # (None, 253, 3): 'LVM vg00-optvol',
279 # (None, 253, 4): 'LVM vg00-usrvol',
280 # (None, 253, 5): 'LVM vg00-swapvol',
281 # (None, 253, 6): 'LVM vgappl-applvol',
283 def diskstat_extract_name_info(info
):
284 name_info
= {} # dict from (node, major, minor) to itemname
294 if line
[1] == '[dmsetup_info]':
295 phase
= 'dmsetup_info'
296 elif line
[1] == '[vx_dsk]':
298 # new node in case of a cluster, restart with info phase
299 elif line
[0] != node
:
305 timestamp
= int(line
[1])
307 info_plain
.append(line
)
308 elif phase
== 'dmsetup_info':
310 major
, minor
= map(int, line
[2].split(':'))
312 name
= "LVM %s" % line
[1]
314 name
= "DM %s" % line
[1]
315 name_info
[node
, major
, minor
] = name
317 pass # ignore such crap as "No Devices Found"
318 elif phase
== 'vx_dsk':
319 major
= int(line
[1], 16)
320 minor
= int(line
[2], 16)
321 group
, disk
= line
[3].split('/')[-2:]
322 name
= "VxVM %s-%s" % (group
, disk
)
323 name_info
[(node
, major
, minor
)] = name
325 return timestamp
, info_plain
, name_info
328 def diskstat_convert_info(parsed
):
329 disks
, multipath_info
= parsed
330 converted_disks
= dict(disks
.items()) # we must not modify info!
332 # If we have information about multipathing, then remove the
333 # physical path devices from the disks array. But only do this,
334 # when there are information for the multipath device available.
336 # For multipath entries: Rename the generic names like "dm-8"
337 # with multipath names like "SDataCoreSANsymphony_DAT07-fscl"
339 for uuid
, multipath
in multipath_info
.items():
340 if "alias" not in multipath
:
341 multipath
["alias"] = ""
343 if multipath
["device"] in converted_disks
or \
344 "DM %s" % multipath
["alias"] in converted_disks
:
345 for path
in multipath
["paths"]:
346 if path
in converted_disks
:
347 del converted_disks
[path
]
349 if multipath
["device"] in converted_disks
:
350 converted_disks
[uuid
] = converted_disks
[multipath
["device"]]
351 del converted_disks
[multipath
["device"]]
353 if "DM %s" % multipath
["alias"] in converted_disks
:
354 alias
= "DM %s" % multipath
["alias"]
355 converted_disks
[uuid
] = converted_disks
[alias
]
356 del converted_disks
[alias
]
358 # Remove any left-over device mapper devices that are not part of a
359 # known multipath device, LVM device or whatever
360 for device
in converted_disks
.keys():
361 if device
.startswith("dm-"):
362 del converted_disks
[device
]
364 return converted_disks
367 def inventory_diskstat(parsed
):
368 converted_disks
= diskstat_convert_info(parsed
)
370 # Use generic diskstat inventory function that is used also for other
371 # Disk IO checks. That expects a table of (node, device, ...)
372 return inventory_diskstat_generic(
373 [(disk
["node"], device
) for device
, disk
in converted_disks
.items()])
376 def check_diskstat(item
, params
, parsed
):
377 return check_diskstat_dict(item
, params
, diskstat_convert_info(parsed
))
380 check_info
["diskstat"] = {
381 'parse_function': parse_diskstat
,
382 'inventory_function': inventory_diskstat
,
383 'check_function': check_diskstat
,
384 'service_description': 'Disk IO %s',
385 'has_perfdata': True,
387 "node_info": True, # add first column with actual host name
388 'includes': ["diskstat.include"],
389 'extra_sections': ["multipath"],