2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # .--mem.linux-----------------------------------------------------------.
29 # | _ __ ___ ___ _ __ ___ | (_)_ __ _ ___ __ |
30 # | | '_ ` _ \ / _ \ '_ ` _ \ | | | '_ \| | | \ \/ / |
31 # | | | | | | | __/ | | | | |_| | | | | | |_| |> < |
32 # | |_| |_| |_|\___|_| |_| |_(_)_|_|_| |_|\__,_/_/\_\ |
34 # +----------------------------------------------------------------------+
35 # | Specialized memory check for Linux that takes into account |
36 # | all of its specific information in /proc/meminfo. |
37 # '----------------------------------------------------------------------'
39 # BEWARE: half of the information and blob entries about /proc/meminfo
40 # in the internet is unprecise or even totally wrong!
43 # MemTotal: 24707592 kB
49 # Inactive: 13360444 kB
50 # Active(anon): 1481236 kB
51 # Inactive(anon): 371260 kB
52 # Active(file): 7275640 kB
53 # Inactive(file): 12989184 kB
54 # Unevictable: 964808 kB
56 # SwapTotal: 16777212 kB
57 # SwapFree: 16703328 kB
60 # AnonPages: 2774444 kB
64 # SReclaimable: 756236 kB
65 # SUnreclaim: 104792 kB
66 # KernelStack: 4176 kB
67 # PageTables: 15892 kB
71 # CommitLimit: 39014044 kB
72 # Committed_AS: 3539808 kB
73 # VmallocTotal: 34359738367 kB
74 # VmallocUsed: 347904 kB
75 # VmallocChunk: 34346795572 kB
76 # HardwareCorrupted: 0 kB
82 # Hugepagesize: 2048 kB
83 # DirectMap4k: 268288 kB
84 # DirectMap2M: 8112128 kB
85 # DirectMap1G: 16777216 kB
87 # This is from an earlier kernel (CentOS 5.5). Some entries
101 # SwapTotal: 2064376 kB
102 # SwapFree: 2062756 kB
105 # AnonPages: 43080 kB
108 # PageTables: 3208 kB
111 # CommitLimit: 2252964 kB
112 # Committed_AS: 125968 kB
113 # VmallocTotal: 34359738367 kB
114 # VmallocUsed: 18112 kB
115 # VmallocChunk: 34359719415 kB
119 # Hugepagesize: 2048 kB
121 # Yet earlier kernel (SLES 9):
123 # MemTotal: 6224268 kB
124 # MemFree: 2913660 kB
129 # Inactive: 1276156 kB
130 # HighTotal: 5373824 kB
131 # HighFree: 2233984 kB
132 # LowTotal: 850444 kB
134 # SwapTotal: 1052280 kB
135 # SwapFree: 1052280 kB
140 # Committed_AS: 2758332 kB
141 # PageTables: 7672 kB
142 # VmallocTotal: 112632 kB
143 # VmallocUsed: 9324 kB
144 # VmallocChunk: 103180 kB
147 # Hugepagesize: 2048 kB
149 factory_settings
["mem_linux_default_levels"] = {
150 "levels_virtual": ("perc_used", (80.0, 90.0)),
151 "levels_total": ("perc_used", (120.0, 150.0)),
152 "levels_shm": ("perc_used", (20.0, 30.0)),
153 "levels_pagetables": ("perc_used", (8.0, 16.0)),
154 "levels_committed": ("perc_used", (100.0, 150.0)),
155 "levels_commitlimit": ("perc_free", (20.0, 10.0)),
156 "levels_vmalloc": ("abs_free", (50 * 1024 * 1024, 30 * 1024 * 1024)),
160 def is_linux_meminfo(meminfo
):
161 return "PageTables" in meminfo
and "Writeback" in meminfo
and "Committed_AS" in meminfo
164 def inventory_mem_linux(info
):
165 meminfo
= parse_proc_meminfo_bytes(info
)
166 if is_linux_meminfo(meminfo
):
170 def check_mem_levels(title
,
176 show_percentage
=False,
179 of_value
= total
# Reference for percentage levels
186 infotext
= "%s: %s" % (title
, get_bytes_human_readable(value
))
188 infotext
= "%s used: %s of %s" % (title
, get_bytes_human_readable(used
),
189 get_bytes_human_readable(total
))
192 if levels
and levels
!= "ignore":
194 if how
== "predictive":
195 return 3, "Predictive levels for memory check not yet implemented"
196 # TODO: Hier brauchen wir den Namen der RRD-variable! Das klappt aber
197 # nicht, wenn hier Werte zusammengerechnet wurden. Wir sollten die
198 # Predictive Levels nur für ausgewählte Werte zulassen.
199 # return check_levels(used, levels[1], unit = "GB")
201 warn
, crit
= levels
[1]
202 if how
.startswith("perc_"):
203 perc_used
= 100.0 * float(used
) / of_value
204 perc_free
= 100 - perc_used
205 if how
== "perc_used":
210 levels_text
= " (%.1f%%%s, " % (perc_used
, t
)
211 if perc_used
>= crit
:
213 elif perc_used
>= warn
:
216 elif how
== "perc_free":
221 levels_text
= " (%.1f%% %s, " % (perc_free
, t
)
224 elif perc_free
< warn
:
229 infotext
+= levels_text
+ "warn/crit at %.1f%%/%.1f%%)" % (warn
, crit
)
232 if how
== "abs_used":
245 infotext
+= " (warn/crit at %s/%s)" % (get_bytes_human_readable(warn
),
246 get_bytes_human_readable(crit
))
248 if not perc_shown
and show_percentage
:
249 infotext
+= " (%.1f%%)" % (100.0 * float(used
) / of_value
)
250 return state
, infotext
253 def check_mem_linux(_no_item
, params
, info
):
254 meminfo
= parse_proc_meminfo_bytes(info
)
256 yield 3, "Data not found in agent output"
259 # SReclaimable is not available for older kernels
260 if "SReclaimable" not in meminfo
:
261 meminfo
["SReclaimable"] = 0
262 meminfo
["SUnreclaim"] = meminfo
["Slab"]
264 # Compute memory used by caches, that can be considered "free"
265 meminfo
["Caches"] = meminfo
["Cached"] + meminfo
["Buffers"] \
266 + meminfo
["SwapCached"] + meminfo
["SReclaimable"]
269 meminfo
["MemUsed"] = meminfo
["MemTotal"] - meminfo
["MemFree"] - meminfo
["Caches"]
270 yield check_mem_levels(
274 params
.get("levels_ram"),
275 show_percentage
=not meminfo
["SwapTotal"])
277 # Swap - but only if available
278 meminfo
["SwapUsed"] = meminfo
["SwapTotal"] - meminfo
["SwapFree"]
279 if meminfo
["SwapTotal"]:
280 yield check_mem_levels("Swap", meminfo
["SwapUsed"], meminfo
["SwapTotal"],
281 params
.get("levels_swap"))
283 # Total virtual memory
284 meminfo
["TotalTotal"] = meminfo
["MemTotal"] + meminfo
["SwapTotal"]
285 meminfo
["TotalUsed"] = meminfo
["MemUsed"] + meminfo
["SwapUsed"]
286 r
= check_mem_levels(
287 "Total virtual memory",
288 meminfo
["TotalUsed"],
289 meminfo
["TotalTotal"],
290 params
.get("levels_virtual"),
291 show_percentage
=True)
292 if r
[0] or meminfo
["SwapTotal"]:
293 yield r
# only display if there is swap or status is non-OK
295 # Total memory / in relation to RAM
296 r
= check_mem_levels(
298 meminfo
["TotalUsed"],
299 meminfo
["TotalTotal"],
300 params
.get("levels_total"),
302 of_value
=meminfo
["MemTotal"])
304 yield r
# only display if non-OK
307 if "Shmem" in meminfo
:
308 r
= check_mem_levels(
312 params
.get("levels_shm"),
315 yield r
# only display if non-OK
318 r
= check_mem_levels(
320 meminfo
["PageTables"],
322 params
.get("levels_pagetables"),
325 yield r
# only display if non-OK
328 meminfo
["Pending"] = \
330 + meminfo
.get("Writeback", 0) \
331 + meminfo
.get("NFS_Unstable", 0) \
332 + meminfo
.get("Bounce", 0) \
333 + meminfo
.get("WritebackTmp", 0)
335 r
= check_mem_levels(
339 params
.get("levels_writeback"),
342 yield r
# only display if non-OK
345 r
= check_mem_levels(
347 meminfo
["Committed_AS"],
348 meminfo
["TotalTotal"],
349 params
.get("levels_committed"),
350 of_what
="RAM + Swap")
352 yield r
# only display if non-OK
355 if "CommitLimit" in meminfo
:
356 r
= check_mem_levels(
358 meminfo
["TotalTotal"] - meminfo
["CommitLimit"],
359 meminfo
["TotalTotal"],
360 params
.get("levels_commitlimit"),
361 of_what
="RAM + Swap")
363 yield r
# only display if non-OK
366 if "MemAvailable" in meminfo
:
367 r
= check_mem_levels(
369 meminfo
["MemTotal"] - meminfo
["MemAvailable"],
371 params
.get("levels_available"),
375 yield r
# only display if non-OK
378 # newer kernel version report wrong data,
379 # i.d. both VmallocUsed and Chunk equal zero
380 if not (meminfo
["VmallocUsed"] == 0 and meminfo
["VmallocChunk"] == 0):
381 r
= check_mem_levels(
382 "Largest Free VMalloc Chunk",
383 meminfo
["VmallocTotal"] - meminfo
["VmallocChunk"],
384 meminfo
["VmallocTotal"],
385 params
.get("levels_vmalloc"),
386 of_what
="VMalloc Area",
389 yield r
# only display if non-OK
392 hwc
= meminfo
.get("HardwareCorrupted")
394 yield params
.get("handle_hw_corrupted_error",
395 2), "Hardware defect of %s" % get_bytes_human_readable(hwc
)
397 # Now send performance data. We simply output *all* fields of meminfo
398 # except for a few really useless values
400 items
= meminfo
.items()
402 for name
, value
in items
:
403 if name
.startswith("DirectMap"):
406 "Vmalloc") and meminfo
["VmallocTotal"] > 2**40: # useless on 64 Bit system
408 if name
.startswith("Huge"):
409 if meminfo
["HugePages_Total"] == 0: # omit useless data
411 if name
== "Hugepagesize":
412 continue # not needed
413 value
= value
* meminfo
["Hugepagesize"] # convert number to actual memory size
414 perfdata
.append((camelcase_to_underscored(name
.replace("(", "_").replace(")", "")), value
))
415 yield 0, "", perfdata
418 # ThisIsACamel -> this_is_a_camel
419 def camelcase_to_underscored(name
):
420 previous_lower
= False
421 previous_underscore
= True
425 if previous_lower
and not previous_underscore
:
427 previous_lower
= False
428 previous_underscore
= False
431 previous_lower
= False
432 previous_underscore
= True
435 previous_lower
= True
436 previous_underscore
= False
441 check_info
["mem.linux"] = {
442 'inventory_function': inventory_mem_linux
,
443 'check_function': check_mem_linux
,
444 'service_description': 'Memory',
445 'default_levels_variable': 'mem_linux_default_levels',
446 'has_perfdata': True,
447 'group': 'memory_linux',
448 "handle_real_time_checks": True,
449 'includes': ['mem.include'],
453 # .--mem.used------------------------------------------------------------.
455 # | _ __ ___ ___ _ __ ___ _ _ ___ ___ __| | |
456 # | | '_ ` _ \ / _ \ '_ ` _ \ | | | / __|/ _ \/ _` | |
457 # | | | | | | | __/ | | | | || |_| \__ \ __/ (_| | |
458 # | |_| |_| |_|\___|_| |_| |_(_)__,_|___/\___|\__,_| |
460 # +----------------------------------------------------------------------+
461 # | Memory check that takes into account the swap space. This check is |
462 # | used for unixoide operating systems. |
463 # '----------------------------------------------------------------------'
466 def parse_proc_meminfo(info
):
470 parsed
[line
[0][:-1]] = int(line
[1])
471 except (ValueError, IndexError) as _exc
:
476 # The following variable is obsolete. It is kept here so that Check_MK
477 # won't fail if it's found in main.mk
478 mem_extended_perfdata
= None
481 def inventory_mem_used(info
):
482 meminfo
= parse_proc_meminfo(info
)
483 if "MemTotal" in meminfo \
484 and "PageTotal" not in meminfo \
485 and not is_linux_meminfo(meminfo
): # handled by more modern check
489 def check_mem_used(_no_item
, params
, info
):
490 meminfo
= parse_proc_meminfo(info
)
491 return check_memory(params
, meminfo
)
494 check_info
['mem.used'] = {
495 "check_function": check_mem_used
,
496 "inventory_function": inventory_mem_used
,
497 "service_description": "Memory used",
498 "has_perfdata": True,
500 "default_levels_variable": "memory_default_levels",
501 "includes": ["mem.include"],
502 "handle_real_time_checks": True,
506 # .--mem.win-------------------------------------------------------------.
508 # | _ __ ___ ___ _ __ ___ __ _(_)_ __ |
509 # | | '_ ` _ \ / _ \ '_ ` _ \\ \ /\ / / | '_ \ |
510 # | | | | | | | __/ | | | | |\ V V /| | | | | |
511 # | |_| |_| |_|\___|_| |_| |_(_)_/\_/ |_|_| |_| |
513 # +----------------------------------------------------------------------+
514 # | Windows now has a dedicated memory check that reflect the special |
515 # | nature of the page file. |
516 # '----------------------------------------------------------------------'
518 # Special memory and page file check for Windows
519 factory_settings
["memory_win_default_levels"] = {
520 "memory": (80.0, 90.0),
521 "pagefile": (80.0, 90.0),
525 def inventory_mem_win(info
):
526 meminfo
= parse_proc_meminfo(info
)
527 if "MemTotal" in meminfo
and "PageTotal" in meminfo
:
531 def check_mem_windows(_no_item
, params
, info
):
532 meminfo
= parse_proc_meminfo(info
)
535 def _get_levels_on_used_mb(param_key
, total_mb
):
536 levels
= params
.get(param_key
)
537 if not isinstance(levels
, tuple):
538 # Predictive levels have no level information in the performance data
541 if isinstance(levels
[0], float):
542 # float type means percent
543 warn
= total_mb
* levels
[0] / 100
545 # int means levels on *free* space
546 warn
= total_mb
- levels
[0]
547 if isinstance(levels
[1], float):
548 crit
= total_mb
* levels
[1] / 100
550 crit
= total_mb
- levels
[1]
553 for title
, prefix
, paramname
in [("Memory usage", "Mem", "memory"),
554 ("Commit charge", "Page", "pagefile")]:
556 total_kb
= meminfo
.get("%sTotal" % prefix
)
557 free_kb
= meminfo
.get("%sFree" % prefix
)
558 if None in (total_kb
, free_kb
):
561 total_mb
= total_kb
/ 1024.0
562 free_mb
= free_kb
/ 1024.0
563 used_kb
= total_kb
- free_kb
564 used_mb
= total_mb
- free_mb
565 perc
= 100.0 * used_kb
/ total_kb
567 warn
, crit
= _get_levels_on_used_mb(paramname
, total_mb
)
569 infotext
= "%s: %s (%s/%s)" % (title
, get_percent_human_readable(perc
),
570 get_bytes_human_readable(used_kb
* 1024),
571 get_bytes_human_readable(total_kb
* 1024))
573 perfdata
= [(paramname
, used_mb
, warn
, crit
, 0, total_mb
)]
575 perfdata
.append(("mem_total", total_mb
))
576 elif prefix
== "Page":
577 perfdata
.append(("pagefile_total", total_mb
))
579 # Do averaging, if configured, just for matching the levels
580 if "average" in params
:
581 average_min
= params
["average"]
582 used_kb
= get_average(
583 "mem.win.%s" % paramname
, now
, used_kb
, average_min
, initialize_zero
=False)
584 used_mb
= used_kb
/ 1024.0
585 perc
= 100.0 * used_kb
/ total_kb
586 infotext
+= ", %d min average: %s (%s)" % (average_min
,
587 get_percent_human_readable(perc
),
588 get_bytes_human_readable(used_kb
* 1024))
589 perfdata
.append((paramname
+ "_avg", used_mb
))
591 # Now check the levels
592 if (warn
, crit
) != (None, None):
596 elif used_mb
>= warn
:
601 state
, infoadd
, perfadd
= check_levels(
602 used_mb
, # Current value stored in MB in RRDs
603 "average" in params
and paramname
+ "_avg" or paramname
, # Name of RRD variable
605 unit
="GB", # Levels are specified in GB...
606 scale
=1024, # ... in WATO ValueSpec
609 infotext
+= ", " + infoadd
612 yield state
, infotext
, perfdata
615 check_info
["mem.win"] = {
616 'check_function': check_mem_windows
,
617 'inventory_function': inventory_mem_win
,
618 'service_description': 'Memory and pagefile',
619 'has_perfdata': True,
620 'group': 'memory_pagefile_win',
621 'default_levels_variable': 'memory_win_default_levels',
622 "handle_real_time_checks": True,
626 # .--mem.vmalloc---------------------------------------------------------.
628 # | _ __ ___ ___ _ __ ___ __ ___ __ ___ __ _| | | ___ ___ |
629 # | | '_ ` _ \ / _ \ '_ ` _ \\ \ / / '_ ` _ \ / _` | | |/ _ \ / __| |
630 # | | | | | | | __/ | | | | |\ V /| | | | | | (_| | | | (_) | (__ |
631 # | |_| |_| |_|\___|_| |_| |_(_)_/ |_| |_| |_|\__,_|_|_|\___/ \___| |
633 # +----------------------------------------------------------------------+
634 # | This very specific check checks the usage and fragmentation of the |
635 # | address space 'vmalloc' that can be problematic on 32-Bit systems. |
636 # | It is superseeded by the new check mem.linux and will be removed |
638 # '----------------------------------------------------------------------'
640 # warn, crit, warn_chunk, crit_chunk. Integers are in MB, floats are in percent
641 mem_vmalloc_default_levels
= (80.0, 90.0, 64, 32)
644 def inventory_mem_vmalloc(info
):
645 meminfo
= parse_proc_meminfo(info
)
646 if is_linux_meminfo(meminfo
):
647 return # handled by new Linux memory check
649 # newer kernel version report wrong data,
650 # i.d. both VmallocUsed and Chunk equal zero
651 if "VmallocTotal" in meminfo
and \
652 not (meminfo
["VmallocUsed"] == 0 and meminfo
["VmallocChunk"] == 0):
653 # Do not checks this on 64 Bit systems. They have almost
655 vmalloc
= meminfo
["VmallocTotal"] / 1024.4
657 return [(None, "mem_vmalloc_default_levels")]
660 def check_mem_vmalloc(item
, params
, info
):
661 meminfo
= parse_proc_meminfo(info
)
662 total_mb
= meminfo
["VmallocTotal"] / 1024.0
663 used_mb
= meminfo
["VmallocUsed"] / 1024.0
664 chunk_mb
= meminfo
["VmallocChunk"] / 1024.0
665 warn
, crit
, warn_chunk
, crit_chunk
= params
670 for var
, w
, c
, v
, neg
, what
in [("used", warn
, crit
, used_mb
, False, "used"),
671 ("chunk", warn_chunk
, crit_chunk
, chunk_mb
, True,
674 # convert levels from percentage to MB values
675 if isinstance(w
, float):
676 w_mb
= total_mb
* w
/ 100
680 if isinstance(c
, float):
681 c_mb
= total_mb
* c
/ 100
685 infotxt
= "%s %.1f MB" % (what
, v
)
686 if (v
>= c_mb
) != neg
:
688 infotxt
+= " (critical at %.1f MB!!)" % c_mb
689 elif (v
>= w_mb
) != neg
:
691 infotxt
+= " (warning at %.1f MB!)" % w_mb
694 state
= max(state
, s
)
695 infotxts
.append(infotxt
)
696 perfdata
.append((var
, v
, w_mb
, c_mb
, 0, total_mb
))
697 return (state
, ("total %.1f MB, " % total_mb
) + ", ".join(infotxts
), perfdata
)
700 check_info
["mem.vmalloc"] = {
701 'inventory_function': inventory_mem_vmalloc
,
702 'check_function': check_mem_vmalloc
,
703 'service_description': 'Vmalloc address space',
704 'has_perfdata': True,
705 "handle_real_time_checks": True,