Refactoring: Changed remaining check parameters starting with an 's' to the new rules...
[check_mk.git] / checks / ps
bloba0a09cbd0b20a3cd544eabbbd52a612c121174cd
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # First generation of agents output only the process command line:
28 # /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6
30 # Second generation of agents output the user in brackets in the first columns:
31 # (root) /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6
33 # Third generation (from 1.1.5) output also virtual memory, resident memory and %CPU:
34 # (class,122376,88128,0.0) /usr/jre1.6.0_13/bin/java -Dn=Cart_16TH13 -Dmcs.node=zbgh1ca -Dmcs.mdt.redundan
36 # Forth generation (>=1.2.5), additional columns in bracket:
37 # (user, virtual_size, resident_size, %cpu, processID, pagefile_usage, usermodetime, kernelmodetime, openHandles, threadCount) name
38 # (\\KLAPPRECHNER\ab,29284,2948,0,3124,904,400576,901296,35,1) NOTEPAD.EXE
40 # Sixth generation (>=1.2.7) adds an optional etime, joined by "/" with the CPU time
42 # The plugin "psperf.bat" is deprecated. As of version 1.2.5 all of this information
43 # is reported by the windows agent itself. However, we still support sections from psperf.bat
44 # if the agent version is lower than 1.2.5.
45 # Windows agent now ships a plugin "psperf.bat" that adds a section from wmic
46 # to the output:
47 # <<<ps:sep(44)>>>
48 # [wmic process]
49 # ^M
50 # Node,KernelModeTime,Name,PageFileUsage,ThreadCount,UserModeTime,VirtualSize,WorkingSetSize^M
51 # WINDOWSXP,43478281250,System Idle Process,0,2,0,0,28672^M
52 # WINDOWSXP,155781250,System,0,59,0,1957888,253952^M
53 # WINDOWSXP,468750,smss.exe,176128,3,156250,3928064,442368^M
54 # WINDOWSXP,56406250,csrss.exe,1863680,12,11406250,25780224,3956736^M
55 # WINDOWSXP,18593750,winlogon.exe,6832128,19,4843750,59314176,2686976^M
56 # WINDOWSXP,167500000,services.exe,1765376,16,13750000,22601728,4444160^M
57 # WINDOWSXP,16875000,lsass.exe,3964928,21,3906250,43462656,6647808^M
58 # WINDOWSXP,8750000,VBoxService.exe,1056768,8,468750,26652672,3342336^M
60 # New since 1.2.1i2: WATO compatible syntax
62 # Holds a list of rules which are matching hosts by names or tags and
63 # where each rule holds a dictionary.
65 # Each of those entries defines the following options:
67 # 1. descr: item name to be used for the service description
68 # 2. match: matching-definition
69 # 3. user: user definition
70 # 5. perfdata: monitor with perfdata
71 # 4. levels: four numbers (thresholds)
72 inventory_processes_rules = []
74 inventory_processes = []
75 inventory_processes_perf = []
76 ANY_USER = None
79 def ps_cleanup_counters(parsed):
80 # remove legacy key used for some kind of caching
81 cleanup_idents = ["last.cleared.ps_"]
83 pids = ps_get_current_pids(parsed)
84 cleanup_idents += ps_get_counters_to_delete(pids)
86 # Avoid growing up the item state with info about processes that aren't
87 # executing anymore. Clean all information about process that are not
88 # the ones specifically inside the current parsed agent output
89 clear_item_states_by_full_keys(cleanup_idents)
92 # Get the idents of the counters which can be deleted because the process id of
93 # the counter is not found anymore in the process table.
95 # Handle these formats of idents:
96 # Old string based keys: 'ps_stat.pcpu.669': (1448634267.875281, 1),
97 # New magic keys: ('ps', None, 'ps_wmic.kernel.692'): (1448633487.573496, 1092007),
98 def ps_get_counters_to_delete(pids):
99 counters_to_delete = []
100 for ident in get_all_item_states():
101 if isinstance(ident, tuple) and ident[0] == "ps":
102 check_ident = ident[2]
103 elif not isinstance(ident, tuple) and (ident.startswith("ps_stat") or
104 ident.startswith("ps_wmic")):
105 check_ident = ident
106 else:
107 continue
109 pid = check_ident.split(".")[-1]
110 if pid.isdigit() and pid not in pids:
111 counters_to_delete.append(ident)
113 return counters_to_delete
116 def ps_get_current_pids(parsed):
117 pids = set()
118 for line in parsed:
119 process_info = line[1]
120 if process_info.process_id:
121 pids.add(process_info.process_id)
122 return pids
125 # This function is only concerned with deprecated output from psperf.bat,
126 # in case of all other output it just returns info unmodified. But if it is
127 # a windows output it will extract the number of cpu cores
128 def ps_merge_wmic_info(info):
129 # Agent output version cmk>1.2.5
130 # Assumes line = [CLUSTER, PS_INFO, COMMAND]
131 has_wmic = False
132 for line in info:
133 if len(line) > 2 and line[2].lower() == "system idle process":
134 cpu_cores = int(line[1][1:-1].split(",")[9])
135 return cpu_cores, info
136 if "wmic process" in line[-1]:
137 has_wmic = True
138 break
140 # Data from other systems than windows
141 if not has_wmic:
142 return 1, info
144 # Data from windows with wmic info, cmk<1.2.5
145 return extract_wmic_info(info)
148 def extract_wmic_info(info):
149 ps_result = []
150 lines = iter(info)
151 wmic_info = {}
152 is_wmic = False
154 while True:
155 try:
156 line = lines.next()
157 if line[-1] == '[wmic process]':
158 is_wmic = True
159 wmic_headers = ["node"] + lines.next()[1:]
160 continue
161 elif line[-1] == '[wmic process end]':
162 is_wmic = False
163 continue
164 except StopIteration:
165 break # Finished with all lines
167 if is_wmic:
168 row = dict(zip(wmic_headers, line))
169 # Row might be damaged. I've seen this agent output:
170 # Node - TILE-BUILDER02
171 # ERROR:
172 # Description = Quota violation
174 # Node,
175 if "Name" in row and "ProcessId" in row:
176 wmic_info.setdefault((row["node"], row["Name"]), []).append(row)
177 else:
178 ps_result.append(line) # plain list of process names
180 return merge_wmic(ps_result, wmic_info, wmic_headers)
183 def merge_wmic(ps_result, wmic_info, wmic_headers):
184 info = []
185 seen_pids = set([]) # Remove duplicate entries
186 cpu_cores = 1
187 for line in ps_result:
188 psinfos = wmic_info.get((line[0], line[1]), [])
189 if psinfos:
190 psinfo = psinfos.pop() # each info is used only once!
191 # Get number of CPU cores from system idle process
192 if "ThreadCount" in wmic_headers and psinfo["Name"].lower() == "system idle process":
193 cpu_cores = int(psinfo["ThreadCount"])
194 pid = int(psinfo["ProcessId"])
195 if pid not in seen_pids:
196 seen_pids.add(pid)
197 virt = int(psinfo["VirtualSize"]) / 1024 # Bytes -> KB
198 resi = int(psinfo["WorkingSetSize"]) / 1024 # Bytes -> KB
199 pagefile = int(psinfo["PageFileUsage"]) / 1024 # Bytes -> KB
200 userc = int(psinfo["UserModeTime"]) # do not resolve counter here!
201 kernelc = int(psinfo["KernelModeTime"]) # do not resolve counter here!
202 handlec = int(psinfo.get("HandleCount", 0)) # Only in newer psperf.bat versions
203 threadc = int(psinfo["ThreadCount"]) # do not resolve counter here!
204 line[1:1] = [
205 "(unknown,%d,%d,0,%d,%d,%d,%d,%d,%d,)" % (virt, resi, pid, pagefile, userc,
206 kernelc, handlec, threadc)
208 info.append(line)
210 return cpu_cores, info
213 # This mainly formats the line[1] element which contains the process info (user,...)
214 def ps_parse_process_entries(parsed):
215 # line[0] = node
216 # line[1] = process_info OR (if no process info available) = process name
217 for line in parsed:
218 process_info = ps_info_tuple(line[1])
219 if process_info:
220 line[1] = process_info
221 else:
222 # Make number of columns in line consistent for discovery/check
223 line.insert(1, ps_info())
225 # Filter out any lines where no process command line is available, e.g.
226 # [None, u'(<defunct>,,,)']
227 # [None, u'(<defunct>,,,)', u'']
228 parsed = [x for x in parsed if len(x) > 2 and x[2]]
229 return parsed
232 # Produces a list of lists where each sub list is built as follows:
234 # [None, (u'root', u'35156', u'4372', u'00:00:05/2-14:14:49', u'1'), u'/sbin/init'],
236 # First element: The node the data comes from in a cluster or None
237 # Second element: The process info tuple (see ps.include: check_ps_common() for details on the elements)
238 # Third element: The process command line
239 def parse_ps(info):
240 cpu_cores, parsed = ps_merge_wmic_info(info)
242 parsed = ps_parse_process_entries(parsed)
244 # Cleanup counters of processes which do not exist anymore
245 ps_cleanup_counters(parsed)
247 return cpu_cores, parsed
250 def inventory_ps(info):
251 _cpu_cores, parsed = info[0]
252 return inventory_ps_common(inventory_processes, inventory_processes_rules, parsed)
255 def check_ps(item, params, info):
256 (cpu_cores,
257 parsed), mem_info, solaris_mem_info, statgrab_mem_info, aix_memory_info, cpu_info = info
259 # cpu_info for non windows systems
260 if cpu_info and len(cpu_info[0]) == 6:
261 cpu_cores = int(cpu_info[0][5])
263 if mem_info:
264 total_ram = parse_proc_meminfo_bytes(mem_info).get("MemTotal")
265 elif solaris_mem_info:
266 total_ram = solaris_mem_info.get("MemTotal") * 1024
267 elif statgrab_mem_info:
268 total_ram = statgrab_mem_info.get("MemTotal") * 1024
269 elif aix_memory_info:
270 total_ram = int(aix_memory_info[0][0]) * 4 * 1024
271 else:
272 total_ram = None
274 return check_ps_common(item, params, parsed, cpu_cores=cpu_cores, total_ram=total_ram)
277 check_info['ps'] = {
278 "parse_function": parse_ps,
279 "inventory_function": inventory_ps,
280 "check_function": check_ps,
281 "service_description": "Process %s",
282 "includes": ["ps.include", "mem.include"],
283 "has_perfdata": True,
284 "node_info": True, # add first column with actual host name
285 "group": "ps",
286 "default_levels_variable": "ps_default_levels",
287 "extra_sections": ["mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"],
290 # NOTE: This check is deprecated and will be removed any decade now. ps now
291 # does always performance data.
292 check_info['ps.perf'] = {
293 "check_function": check_ps,
294 "service_description": "Process %s",
295 "includes": ["ps.include", "mem.include"],
296 "has_perfdata": True,
297 "node_info": True, # add first column with actual host name
298 "group": "ps",
299 "default_levels_variable": "ps_default_levels",
300 "extra_sections": ["mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"],