GUI CSS: Deployed view styles for layouts (CMK-1171)
[check_mk.git] / checks / ps
blob41e149c27f24e1d1c99e1b2047d93d699c9dd095
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # First generation of agents output only the process command line:
28 # /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6
30 # Second generation of agents output the user in brackets in the first columns:
31 # (root) /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6
33 # Third generation (from 1.1.5) output also virtual memory, resident memory and %CPU:
34 # (class,122376,88128,0.0) /usr/jre1.6.0_13/bin/java -Dn=Cart_16TH13 -Dmcs.node=zbgh1ca -Dmcs.mdt.redundan
36 # Forth generation (>=1.2.5), additional columns in bracket:
37 # (user, virtual_size, resident_size, %cpu, processID, pagefile_usage, usermodetime, kernelmodetime, openHandles, threadCount) name
38 # (\\KLAPPRECHNER\ab,29284,2948,0,3124,904,400576,901296,35,1) NOTEPAD.EXE
40 # Sixth generation (>=1.2.7) adds an optional etime, joined by "/" with the CPU time
42 # The plugin "psperf.bat" is deprecated. As of version 1.2.5 all of this information
43 # is reported by the windows agent itself. However, we still support sections from psperf.bat
44 # if the agent version is lower than 1.2.5.
45 # Windows agent now ships a plugin "psperf.bat" that adds a section from wmic
46 # to the output:
47 # <<<ps:sep(44)>>>
48 # [wmic process]
49 # ^M
50 # Node,KernelModeTime,Name,PageFileUsage,ThreadCount,UserModeTime,VirtualSize,WorkingSetSize^M
51 # WINDOWSXP,43478281250,System Idle Process,0,2,0,0,28672^M
52 # WINDOWSXP,155781250,System,0,59,0,1957888,253952^M
53 # WINDOWSXP,468750,smss.exe,176128,3,156250,3928064,442368^M
54 # WINDOWSXP,56406250,csrss.exe,1863680,12,11406250,25780224,3956736^M
55 # WINDOWSXP,18593750,winlogon.exe,6832128,19,4843750,59314176,2686976^M
56 # WINDOWSXP,167500000,services.exe,1765376,16,13750000,22601728,4444160^M
57 # WINDOWSXP,16875000,lsass.exe,3964928,21,3906250,43462656,6647808^M
58 # WINDOWSXP,8750000,VBoxService.exe,1056768,8,468750,26652672,3342336^M
60 # New since 1.2.1i2: WATO compatible syntax
62 # Holds a list of rules which are matching hosts by names or tags and
63 # where each rule holds a dictionary.
65 # Each of those entries defines the following options:
67 # 1. descr: item name to be used for the service description
68 # 2. match: matching-definition
69 # 3. user: user definition
70 # 5. perfdata: monitor with perfdata
71 # 4. levels: four numbers (thresholds)
72 inventory_processes_rules = []
74 # Deprecated option since 1.6. cmk_base creates a config warning when finding rules
75 # for this ruleset. Can be dropped with 1.7.
76 inventory_processes = []
78 inventory_processes_perf = []
79 ANY_USER = None
82 def ps_cleanup_counters(parsed):
83 # remove legacy key used for some kind of caching
84 cleanup_idents = ["last.cleared.ps_"]
86 pids = ps_get_current_pids(parsed)
87 cleanup_idents += ps_get_counters_to_delete(pids)
89 # Avoid growing up the item state with info about processes that aren't
90 # executing anymore. Clean all information about process that are not
91 # the ones specifically inside the current parsed agent output
92 clear_item_states_by_full_keys(cleanup_idents)
95 # Get the idents of the counters which can be deleted because the process id of
96 # the counter is not found anymore in the process table.
98 # Handle these formats of idents:
99 # Old string based keys: 'ps_stat.pcpu.669': (1448634267.875281, 1),
100 # New magic keys: ('ps', None, 'ps_wmic.kernel.692'): (1448633487.573496, 1092007),
101 def ps_get_counters_to_delete(pids):
102 counters_to_delete = []
103 for ident in get_all_item_states():
104 if isinstance(ident, tuple) and ident[0] in ["ps", "ps.perf"]:
105 check_ident = ident[2]
106 elif not isinstance(ident, tuple) and (ident.startswith("ps_stat") or
107 ident.startswith("ps_wmic")):
108 check_ident = ident
109 else:
110 continue
112 pid = check_ident.split(".")[-1]
113 if pid.isdigit() and pid not in pids:
114 counters_to_delete.append(ident)
116 return counters_to_delete
119 def ps_get_current_pids(parsed):
120 pids = set()
121 for line in parsed:
122 process_info = line[1]
123 if process_info.process_id:
124 pids.add(process_info.process_id)
125 return pids
128 # This function is only concerned with deprecated output from psperf.bat,
129 # in case of all other output it just returns info unmodified. But if it is
130 # a windows output it will extract the number of cpu cores
131 def ps_merge_wmic_info(info):
132 # Agent output version cmk>1.2.5
133 # Assumes line = [CLUSTER, PS_INFO, COMMAND]
134 has_wmic = False
135 for line in info:
136 if len(line) > 2 and line[2].lower() == "system idle process":
137 cpu_cores = int(line[1][1:-1].split(",")[9])
138 return cpu_cores, info
139 if "wmic process" in line[-1]:
140 has_wmic = True
141 break
143 # Data from other systems than windows
144 if not has_wmic:
145 return 1, info
147 # Data from windows with wmic info, cmk<1.2.5
148 return extract_wmic_info(info)
151 def extract_wmic_info(info):
152 ps_result = []
153 lines = iter(info)
154 wmic_info = {}
155 is_wmic = False
157 while True:
158 try:
159 line = lines.next()
160 if line[-1] == '[wmic process]':
161 is_wmic = True
162 wmic_headers = ["node"] + lines.next()[1:]
163 continue
164 elif line[-1] == '[wmic process end]':
165 is_wmic = False
166 continue
167 except StopIteration:
168 break # Finished with all lines
170 if is_wmic:
171 row = dict(zip(wmic_headers, line))
172 # Row might be damaged. I've seen this agent output:
173 # Node - TILE-BUILDER02
174 # ERROR:
175 # Description = Quota violation
177 # Node,
178 if "Name" in row and "ProcessId" in row:
179 wmic_info.setdefault((row["node"], row["Name"]), []).append(row)
180 else:
181 ps_result.append(line) # plain list of process names
183 return merge_wmic(ps_result, wmic_info, wmic_headers)
186 def merge_wmic(ps_result, wmic_info, wmic_headers):
187 info = []
188 seen_pids = set([]) # Remove duplicate entries
189 cpu_cores = 1
190 for line in ps_result:
191 psinfos = wmic_info.get((line[0], line[1]), [])
192 if psinfos:
193 psinfo = psinfos.pop() # each info is used only once!
194 # Get number of CPU cores from system idle process
195 if "ThreadCount" in wmic_headers and psinfo["Name"].lower() == "system idle process":
196 cpu_cores = int(psinfo["ThreadCount"])
197 pid = int(psinfo["ProcessId"])
198 if pid not in seen_pids:
199 seen_pids.add(pid)
200 virt = int(psinfo["VirtualSize"]) / 1024 # Bytes -> KB
201 resi = int(psinfo["WorkingSetSize"]) / 1024 # Bytes -> KB
202 pagefile = int(psinfo["PageFileUsage"]) / 1024 # Bytes -> KB
203 userc = int(psinfo["UserModeTime"]) # do not resolve counter here!
204 kernelc = int(psinfo["KernelModeTime"]) # do not resolve counter here!
205 handlec = int(psinfo.get("HandleCount", 0)) # Only in newer psperf.bat versions
206 threadc = int(psinfo["ThreadCount"]) # do not resolve counter here!
207 line[1:1] = [
208 "(unknown,%d,%d,0,%d,%d,%d,%d,%d,%d,)" % (virt, resi, pid, pagefile, userc,
209 kernelc, handlec, threadc)
211 info.append(line)
213 return cpu_cores, info
216 # This mainly formats the line[1] element which contains the process info (user,...)
217 def ps_parse_process_entries(parsed):
218 # line[0] = node
219 # line[1] = process_info OR (if no process info available) = process name
220 for line in parsed:
221 process_info = ps_info_tuple(line[1])
222 if process_info:
223 line[1] = process_info
224 else:
225 # Make number of columns in line consistent for discovery/check
226 line.insert(1, ps_info())
228 # Filter out any lines where no process command line is available, e.g.
229 # [None, u'(<defunct>,,,)']
230 # [None, u'(<defunct>,,,)', u'']
231 parsed = [x for x in parsed if len(x) > 2 and x[2]]
232 return parsed
235 # Produces a list of lists where each sub list is built as follows:
237 # [None, (u'root', u'35156', u'4372', u'00:00:05/2-14:14:49', u'1'), u'/sbin/init'],
239 # First element: The node the data comes from in a cluster or None
240 # Second element: The process info tuple (see ps.include: check_ps_common() for details on the elements)
241 # Third element: The process command line
242 def parse_ps(info):
243 cpu_cores, parsed = ps_merge_wmic_info(info)
245 parsed = ps_parse_process_entries(parsed)
247 # Cleanup counters of processes which do not exist anymore
248 ps_cleanup_counters(parsed)
250 return cpu_cores, parsed
253 def inventory_ps(info):
254 _cpu_cores, parsed = info[0]
255 return inventory_ps_common(inventory_processes_rules, parsed)
258 def check_ps(item, params, info):
259 (cpu_cores,
260 parsed), mem_info, solaris_mem_info, statgrab_mem_info, aix_memory_info, cpu_info = info
262 # cpu_info for non windows systems
263 if cpu_info and len(cpu_info[0]) == 6:
264 cpu_cores = int(cpu_info[0][5])
266 if mem_info:
267 total_ram = parse_proc_meminfo_bytes(mem_info).get("MemTotal")
268 elif solaris_mem_info:
269 total_ram = solaris_mem_info.get("MemTotal") * 1024
270 elif statgrab_mem_info:
271 total_ram = statgrab_mem_info.get("MemTotal") * 1024
272 elif aix_memory_info:
273 total_ram = int(aix_memory_info[0][0]) * 4 * 1024
274 else:
275 total_ram = None
277 return check_ps_common(item, params, parsed, cpu_cores=cpu_cores, total_ram=total_ram)
280 check_info['ps'] = {
281 "parse_function": parse_ps,
282 "inventory_function": inventory_ps,
283 "check_function": check_ps,
284 "service_description": "Process %s",
285 "includes": ["ps.include", "mem.include"],
286 "has_perfdata": True,
287 "node_info": True, # add first column with actual host name
288 "group": "ps",
289 "default_levels_variable": "ps_default_levels",
290 "extra_sections": ["mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"],
293 # NOTE: This check is deprecated and will be removed any decade now. ps now
294 # does always performance data.
295 check_info['ps.perf'] = {
296 "check_function": check_ps,
297 "service_description": "Process %s",
298 "includes": ["ps.include", "mem.include"],
299 "has_perfdata": True,
300 "node_info": True, # add first column with actual host name
301 "group": "ps",
302 "default_levels_variable": "ps_default_levels",
303 "extra_sections": ["mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"],