2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
26 """Performing the actual checks."""
37 import cmk
.utils
.defines
as defines
38 import cmk
.utils
.tty
as tty
39 import cmk
.utils
.cpu_tracking
as cpu_tracking
40 from cmk
.utils
.exceptions
import MKGeneralException
, MKTimeout
41 from cmk
.utils
.regex
import regex
42 import cmk
.utils
.debug
45 import cmk_base
.crash_reporting
46 import cmk_base
.console
as console
47 import cmk_base
.config
as config
48 import cmk_base
.ip_lookup
as ip_lookup
49 import cmk_base
.data_sources
as data_sources
50 import cmk_base
.item_state
as item_state
51 import cmk_base
.check_table
as check_table
52 from cmk_base
.exceptions
import MKParseFunctionError
53 import cmk_base
.check_utils
54 import cmk_base
.decorator
55 import cmk_base
.check_api_utils
as check_api_utils
58 import cmk_base
.cee
.keepalive
as keepalive
59 import cmk_base
.cee
.inline_snmp
as inline_snmp
61 keepalive
= None # type: ignore
62 inline_snmp
= None # type: ignore
64 # global variables used to cache temporary values that do not need
65 # to be reset after a configuration change.
66 _nagios_command_pipe
= None # Filedescriptor to open nagios command pipe.
67 _checkresult_file_fd
= None
68 _checkresult_file_path
= None
70 _submit_to_core
= True
71 _show_perfdata
= False
74 # .--Checking------------------------------------------------------------.
76 # | / ___| |__ ___ ___| | _(_)_ __ __ _ |
77 # | | | | '_ \ / _ \/ __| |/ / | '_ \ / _` | |
78 # | | |___| | | | __/ (__| <| | | | | (_| | |
79 # | \____|_| |_|\___|\___|_|\_\_|_| |_|\__, | |
81 # +----------------------------------------------------------------------+
82 # | Execute the Check_MK checks on hosts |
83 # '----------------------------------------------------------------------'
86 @cmk_base.decorator
.handle_check_mk_check_result("mk", "Check_MK")
87 def do_check(hostname
, ipaddress
, only_check_plugin_names
=None):
88 cpu_tracking
.start("busy")
89 console
.verbose("Check_MK version %s\n" % cmk
.__version
__)
91 # Exit state in various situations is configurable since 1.2.3i1
92 exit_spec
= config
.exit_code_spec(hostname
)
94 status
, infotexts
, long_infotexts
, perfdata
= 0, [], [], []
96 # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when
97 # address is unknown). When called as non keepalive ipaddress may be None or
98 # is already an address (2nd argument)
99 if ipaddress
is None and not config
.is_cluster(hostname
):
100 ipaddress
= ip_lookup
.lookup_ip_address(hostname
)
102 item_state
.load(hostname
)
104 sources
= data_sources
.DataSources(hostname
, ipaddress
)
106 num_success
, missing_sections
= \
107 _do_all_checks_on_host(sources
, hostname
, ipaddress
, only_check_plugin_names
)
110 item_state
.save(hostname
)
112 for source
in sources
.get_data_sources():
113 source_state
, source_output
, source_perfdata
= source
.get_summary_result_for_checking()
114 if source_output
!= "":
115 status
= max(status
, source_state
)
116 infotexts
.append("[%s] %s" % (source
.id(), source_output
))
117 perfdata
.extend(source_perfdata
)
119 if missing_sections
and num_success
> 0:
120 missing_sections_status
, missing_sections_infotext
= \
121 _check_missing_sections(missing_sections
, exit_spec
)
122 status
= max(status
, missing_sections_status
)
123 infotexts
.append(missing_sections_infotext
)
125 elif missing_sections
:
126 infotexts
.append("Got no information from host")
127 status
= max(status
, exit_spec
.get("empty_output", 2))
130 phase_times
= cpu_tracking
.get_times()
131 total_times
= phase_times
["TOTAL"]
132 run_time
= total_times
[4]
134 infotexts
.append("execution time %.1f sec" % run_time
)
135 if config
.check_mk_perfdata_with_times
:
137 "execution_time=%.3f" % run_time
,
138 "user_time=%.3f" % total_times
[0],
139 "system_time=%.3f" % total_times
[1],
140 "children_user_time=%.3f" % total_times
[2],
141 "children_system_time=%.3f" % total_times
[3],
144 for phase
, times
in phase_times
.items():
145 if phase
in ["agent", "snmp", "ds"]:
146 t
= times
[4] - sum(times
[:4]) # real time - CPU time
147 perfdata
.append("cmk_time_%s=%.3f" % (phase
, t
))
149 perfdata
.append("execution_time=%.3f" % run_time
)
151 return status
, infotexts
, long_infotexts
, perfdata
153 if _checkresult_file_fd
is not None:
154 _close_checkresult_file()
156 if config
.record_inline_snmp_stats
and config
.is_inline_snmp_host(hostname
):
157 inline_snmp
.save_snmp_stats()
160 def _check_missing_sections(missing_sections
, exit_spec
):
161 specific_missing_sections_spec
= exit_spec
.get("specific_missing_sections", [])
162 specific_missing_sections
, generic_missing_sections
= set(), set()
163 for section
in missing_sections
:
165 for pattern
, status
in specific_missing_sections_spec
:
167 if reg
.match(section
):
169 specific_missing_sections
.add((section
, status
))
172 generic_missing_sections
.add(section
)
174 generic_missing_sections_status
= exit_spec
.get("missing_sections", 1)
176 "Missing agent sections: %s%s" %
177 (", ".join(sorted(generic_missing_sections
)),
178 check_api_utils
.state_markers
[generic_missing_sections_status
])
181 for section
, status
in sorted(specific_missing_sections
):
182 infotexts
.append("%s%s" % (section
, check_api_utils
.state_markers
[status
]))
183 generic_missing_sections_status
= max(generic_missing_sections_status
, status
)
185 return generic_missing_sections_status
, ", ".join(infotexts
)
188 # Loops over all checks for ANY host (cluster, real host), gets the data, calls the check
189 # function that examines that data and sends the result to the Core.
190 def _do_all_checks_on_host(sources
, hostname
, ipaddress
, only_check_plugin_names
=None):
191 num_success
, missing_sections
= 0, set()
193 check_api_utils
.set_hostname(hostname
)
197 belongs_to_cluster
= len(config
.get_config_cache().clusters_of(hostname
)) > 0
198 if belongs_to_cluster
:
199 filter_mode
= "include_clustered"
201 table
= check_table
.get_precompiled_check_table(
202 hostname
, remove_duplicates
=True, filter_mode
=filter_mode
)
204 # When check types are specified via command line, enforce them. Otherwise use the
205 # list of checks defined by the check table.
206 if only_check_plugin_names
is None:
207 only_check_plugin_names
= set([e
[0] for e
in table
])
209 only_check_plugin_names
= set(only_check_plugin_names
)
211 sources
.enforce_check_plugin_names(only_check_plugin_names
)
213 # Gather the data from the sources
214 multi_host_sections
= sources
.get_host_sections()
216 # Filter out check types which are not used on the node
217 config_cache
= config
.get_config_cache()
218 if belongs_to_cluster
:
221 for check_plugin_name
, item
, params
, description
in table
:
222 if hostname
!= config_cache
.host_of_clustered_service(hostname
, description
):
223 pos_match
.add(check_plugin_name
)
225 neg_match
.add(check_plugin_name
)
226 only_check_plugin_names
-= (pos_match
- neg_match
)
228 for check_plugin_name
, item
, params
, description
in table
:
229 if only_check_plugin_names
is not None and check_plugin_name
not in only_check_plugin_names
:
232 if belongs_to_cluster
and hostname
!= config_cache
.host_of_clustered_service(
233 hostname
, description
):
236 success
= execute_check(multi_host_sections
, hostname
, ipaddress
, check_plugin_name
, item
,
240 elif success
is None:
241 # If the service is in any timeperiod we do not want to
242 # - increase num_success or
243 # - add to missing sections
246 missing_sections
.add(cmk_base
.check_utils
.section_name_of(check_plugin_name
))
248 import cmk_base
.inventory
as inventory
249 inventory
.do_inventory_actions_during_checking_for(sources
, multi_host_sections
, hostname
,
252 missing_section_list
= sorted(list(missing_sections
))
253 return num_success
, missing_section_list
256 def execute_check(multi_host_sections
, hostname
, ipaddress
, check_plugin_name
, item
, params
,
258 # Make a bit of context information globally available, so that functions
259 # called by checks now this context
260 check_api_utils
.set_service(check_plugin_name
, description
)
261 item_state
.set_item_state_prefix(check_plugin_name
, item
)
263 # Skip checks that are not in their check period
264 period
= config
.check_period_of(hostname
, description
)
265 if period
and not cmk_base
.core
.check_timeperiod(period
):
267 "Skipping service %s: currently not in timeperiod %s.\n" % (description
, period
))
271 console
.vverbose("Service %s: timeperiod %s is currently active.\n" % (description
, period
))
273 section_name
= cmk_base
.check_utils
.section_name_of(check_plugin_name
)
276 section_content
= None
278 # TODO: There is duplicate code with discovery._execute_discovery(). Find a common place!
280 section_content
= multi_host_sections
.get_section_content(
285 service_description
=description
)
286 except MKParseFunctionError
as e
:
288 # re-raise the original exception to not destory the trace. This may raise a MKCounterWrapped
289 # exception which need to lead to a skipped check instead of a crash
290 raise x
[0], x
[1], x
[2]
292 # TODO: Move this to a helper function
293 if section_content
is None: # No data for this check type
296 # In case of SNMP checks but missing agent response, skip this check.
297 # Special checks which still need to be called even with empty data
299 if not section_content
and cmk_base
.check_utils
.is_snmp_check(check_plugin_name
) \
300 and not config
.check_info
[check_plugin_name
]["handle_empty_info"]:
303 check_function
= config
.check_info
[check_plugin_name
].get("check_function")
304 if check_function
is None:
305 check_function
= lambda item
, params
, section_content
: (
306 3, 'UNKNOWN - Check not implemented')
308 # Call the actual check function
309 item_state
.reset_wrapped_counters()
311 raw_result
= check_function(item
, determine_check_params(params
), section_content
)
312 result
= sanitize_check_result(raw_result
,
313 cmk_base
.check_utils
.is_snmp_check(check_plugin_name
))
314 item_state
.raise_counter_wrap()
316 except item_state
.MKCounterWrapped
as e
:
317 # handle check implementations that do not yet support the
318 # handling of wrapped counters via exception on their own.
319 # Do not submit any check result in that case:
320 console
.verbose("%-20s PEND - Cannot compute check result: %s\n" % (description
, e
))
326 except Exception as e
:
327 if cmk
.utils
.debug
.enabled():
329 result
= 3, cmk_base
.crash_reporting
.create_crash_dump(
330 hostname
, check_plugin_name
, item
, is_manual_check(hostname
, check_plugin_name
, item
),
331 params
, description
, section_content
), []
334 # Now add information about the age of the data in the agent
335 # sections. This is in data_sources.g_agent_cache_info. For clusters we
336 # use the oldest of the timestamps, of course.
337 oldest_cached_at
= None
338 largest_interval
= None
347 for host_sections
in multi_host_sections
.get_host_sections().values():
348 section_entries
= host_sections
.cache_info
349 if section_name
in section_entries
:
350 cached_at
, cache_interval
= section_entries
[section_name
]
351 oldest_cached_at
= minn(oldest_cached_at
, cached_at
)
352 largest_interval
= max(largest_interval
, cache_interval
)
354 _submit_check_result(
358 cached_at
=oldest_cached_at
,
359 cache_interval
=largest_interval
)
363 def determine_check_params(entries
):
364 if not isinstance(entries
, cmk_base
.config
.TimespecificParamList
):
367 # Check if first entry is not dict based or if its dict based
368 # check if the tp_default_value is not a dict
369 if not isinstance(entries
[0], dict) or \
370 not isinstance(entries
[0].get("tp_default_value", {}), dict):
371 # This rule is tuple based, means no dict-key merging
372 if not isinstance(entries
[0], dict):
373 return entries
[0] # A tuple rule, simply return first match
374 return _evaluate_timespecific_entry(
375 entries
[0]) # A timespecific rule, determine the correct tuple
377 # This rule is dictionary based, evaluate all entries and merge matching keys
378 timespecific_entries
= {}
379 for entry
in entries
[::-1]:
380 timespecific_entries
.update(_evaluate_timespecific_entry(entry
))
382 return timespecific_entries
385 def _evaluate_timespecific_entry(entry
):
386 # Dictionary entries without timespecific settings
387 if "tp_default_value" not in entry
:
390 # Timespecific entry, start with default value and update with timespecific entry
391 # Note: This combined_entry may be a dict or tuple, so the update mechanism must handle this correctly
392 # A shallow copy is sufficient
393 combined_entry
= copy
.copy(entry
["tp_default_value"])
394 for timeperiod_name
, tp_entry
in entry
["tp_values"][::-1]:
396 tp_active
= cmk_base
.core
.timeperiod_active(timeperiod_name
)
399 if cmk
.utils
.debug
.enabled():
406 # If multiple timeperiods are active, their settings are also merged
407 # This follows the same logic than merging different rules
408 if isinstance(combined_entry
, dict):
409 combined_entry
.update(tp_entry
)
411 combined_entry
= tp_entry
413 return combined_entry
416 def is_manual_check(hostname
, check_plugin_name
, item
):
417 manual_checks
= check_table
.get_check_table(
418 hostname
, remove_duplicates
=True, skip_autochecks
=True)
419 return (check_plugin_name
, item
) in manual_checks
422 def sanitize_check_result(result
, is_snmp
):
423 if isinstance(result
, tuple):
424 return _sanitize_tuple_check_result(result
)
427 return _item_not_found(is_snmp
)
429 return _sanitize_yield_check_result(result
, is_snmp
)
432 # The check function may return an iterator (using yield) since 1.2.5i5.
433 # This function handles this case and converts them to tuple results
434 def _sanitize_yield_check_result(result
, is_snmp
):
435 subresults
= list(result
)
437 # Empty list? Check returned nothing
439 return _item_not_found(is_snmp
)
441 # Several sub results issued with multiple yields. Make that worst sub check
442 # decide the total state, join the texts and performance data. Subresults with
443 # an infotext of None are used for adding performance data.
448 for subresult
in subresults
:
449 st
, text
, perf
= _sanitize_tuple_check_result(subresult
, allow_missing_infotext
=True)
450 status
= cmk_base
.utils
.worst_service_state(st
, status
)
453 infotexts
.append(text
+ ["", "(!)", "(!!)", "(?)"][st
])
458 return status
, ", ".join(infotexts
), perfdata
461 def _item_not_found(is_snmp
):
463 return 3, "Item not found in SNMP data", []
465 return 3, "Item not found in agent output", []
468 def _sanitize_tuple_check_result(result
, allow_missing_infotext
=False):
470 state
, infotext
, perfdata
= result
[:3]
471 _validate_perf_data_values(perfdata
)
473 state
, infotext
= result
476 infotext
= _sanitize_check_result_infotext(infotext
, allow_missing_infotext
)
478 return state
, infotext
, perfdata
481 def _validate_perf_data_values(perfdata
):
482 if not isinstance(perfdata
, list):
484 for v
in [value
for entry
in perfdata
for value
in entry
[1:]]:
486 # See Nagios performance data spec for detailed information
487 raise MKGeneralException("Performance data values must not contain spaces")
490 def _sanitize_check_result_infotext(infotext
, allow_missing_infotext
):
491 if infotext
is None and not allow_missing_infotext
:
492 raise MKGeneralException("Invalid infotext from check: \"None\"")
494 if isinstance(infotext
, str):
495 return infotext
.decode('utf-8')
500 def _convert_perf_data(p
):
501 # replace None with "" and fill up to 7 values
502 p
= (map(_convert_perf_value
, p
) + ['', '', '', ''])[0:6]
503 return "%s=%s;%s;%s;%s;%s" % tuple(p
)
506 def _convert_perf_value(x
):
509 elif isinstance(x
, six
.string_types
):
511 elif isinstance(x
, float):
512 return ("%.6f" % x
).rstrip("0").rstrip(".")
518 # .--Submit to core------------------------------------------------------.
520 # | / ___| _ _| |__ _ __ ___ (_) |_ | |_ ___ ___ ___ _ __ ___ |
521 # | \___ \| | | | '_ \| '_ ` _ \| | __| | __/ _ \ / __/ _ \| '__/ _ \ |
522 # | ___) | |_| | |_) | | | | | | | |_ | || (_) | | (_| (_) | | | __/ |
523 # | |____/ \__,_|_.__/|_| |_| |_|_|\__| \__\___/ \___\___/|_| \___| |
525 # +----------------------------------------------------------------------+
526 # | Submit check results to the core. Care about different methods |
527 # | depending on the running core. |
528 # '----------------------------------------------------------------------'
529 # TODO: Put the core specific things to dedicated files
532 def _submit_check_result(host
, servicedesc
, result
, cached_at
=None, cache_interval
=None):
534 result
= 3, "Check plugin did not return any result"
537 raise MKGeneralException("Invalid check result: %s" % (result
,))
538 state
, infotext
, perfdata
= result
540 if not (infotext
.startswith("OK -") or infotext
.startswith("WARN -") or
541 infotext
.startswith("CRIT -") or infotext
.startswith("UNKNOWN -")):
542 infotext
= defines
.short_service_state_name(state
) + " - " + infotext
544 # make sure that plugin output does not contain a vertical bar. If that is the
545 # case then replace it with a Uniocode "Light vertical bar
546 if isinstance(infotext
, unicode):
547 # regular check results are unicode...
548 infotext
= infotext
.replace(u
"|", u
"\u2758")
550 # ...crash dumps, and hard-coded outputs are regular strings
551 infotext
= infotext
.replace("|", u
"\u2758".encode("utf8"))
553 # performance data - if any - is stored in the third part of the result
558 # Check may append the name of the check command to the
559 # list of perfdata. It is of type string. And it might be
560 # needed by the graphing tool in order to choose the correct
561 # template. Currently this is used only by mrpe.
562 if len(perfdata
) > 0 and isinstance(perfdata
[-1], six
.string_types
):
563 check_command
= perfdata
[-1]
569 perftexts
.append(_convert_perf_data(p
))
572 if check_command
and config
.perfdata_format
== "pnp":
573 perftexts
.append("[%s]" % check_command
)
574 perftext
= "|" + (" ".join(perftexts
))
577 _do_submit_to_core(host
, servicedesc
, state
, infotext
+ perftext
, cached_at
, cache_interval
)
579 _output_check_result(servicedesc
, state
, infotext
, perftexts
)
582 def _output_check_result(servicedesc
, state
, infotext
, perftexts
):
584 infotext_fmt
= "%-56s"
585 p
= ' (%s)' % (" ".join(perftexts
))
590 console
.verbose("%-20s %s%s" + infotext_fmt
+ "%s%s\n",
591 servicedesc
.encode('utf-8'), tty
.bold
, tty
.states
[state
],
592 cmk_base
.utils
.make_utf8(infotext
.split('\n')[0]), tty
.normal
,
593 cmk_base
.utils
.make_utf8(p
))
596 def _do_submit_to_core(host
, service
, state
, output
, cached_at
=None, cache_interval
=None):
597 if _in_keepalive_mode():
598 # Regular case for the CMC - check helpers are running in keepalive mode
599 keepalive
.add_keepalive_check_result(host
, service
, state
, output
, cached_at
,
602 elif config
.check_submission
== "pipe" or config
.monitoring_core
== "cmc":
603 # In case of CMC this is used when running "cmk" manually
604 _submit_via_command_pipe(host
, service
, state
, output
)
606 elif config
.check_submission
== "file":
607 _submit_via_check_result_file(host
, service
, state
, output
)
610 raise MKGeneralException("Invalid setting %r for check_submission. "
611 "Must be 'pipe' or 'file'" % config
.check_submission
)
614 def _submit_via_check_result_file(host
, service
, state
, output
):
615 output
= output
.replace("\n", "\\n")
616 _open_checkresult_file()
617 if _checkresult_file_fd
:
620 _checkresult_file_fd
, """host_name=%s
621 service_description=%s
631 """ % (host
, cmk_base
.utils
.make_utf8(service
), now
, now
, state
, cmk_base
.utils
.make_utf8(output
)))
634 def _open_checkresult_file():
635 global _checkresult_file_fd
636 global _checkresult_file_path
637 if _checkresult_file_fd
is None:
639 _checkresult_file_fd
, _checkresult_file_path
= \
640 tempfile
.mkstemp('', 'c', cmk
.utils
.paths
.check_result_path
)
641 except Exception as e
:
642 raise MKGeneralException("Cannot create check result file in %s: %s" %
643 (cmk
.utils
.paths
.check_result_path
, e
))
646 def _close_checkresult_file():
647 global _checkresult_file_fd
648 if _checkresult_file_fd
is not None:
649 os
.close(_checkresult_file_fd
)
650 file(_checkresult_file_path
+ ".ok", "w")
651 _checkresult_file_fd
= None
654 def _submit_via_command_pipe(host
, service
, state
, output
):
655 output
= output
.replace("\n", "\\n")
657 if _nagios_command_pipe
:
658 # [<timestamp>] PROCESS_SERVICE_CHECK_RESULT;<host_name>;<svc_description>;<return_code>;<plugin_output>
659 _nagios_command_pipe
.write("[%d] PROCESS_SERVICE_CHECK_RESULT;%s;%s;%d;%s\n" %
660 (int(time
.time()), host
, cmk_base
.utils
.make_utf8(service
),
661 state
, cmk_base
.utils
.make_utf8(output
)))
662 # Important: Nagios needs the complete command in one single write() block!
663 # Python buffers and sends chunks of 4096 bytes, if we do not flush.
664 _nagios_command_pipe
.flush()
667 def _open_command_pipe():
668 global _nagios_command_pipe
669 if _nagios_command_pipe
is None:
670 if not os
.path
.exists(cmk
.utils
.paths
.nagios_command_pipe_path
):
671 _nagios_command_pipe
= False # False means: tried but failed to open
672 raise MKGeneralException(
673 "Missing core command pipe '%s'" % cmk
.utils
.paths
.nagios_command_pipe_path
)
676 signal
.signal(signal
.SIGALRM
, _core_pipe_open_timeout
)
677 signal
.alarm(3) # three seconds to open pipe
678 _nagios_command_pipe
= file(cmk
.utils
.paths
.nagios_command_pipe_path
, 'w')
679 signal
.alarm(0) # cancel alarm
680 except Exception as e
:
681 _nagios_command_pipe
= False
682 raise MKGeneralException("Error writing to command pipe: %s" % e
)
685 def _core_pipe_open_timeout(signum
, stackframe
):
686 raise IOError("Timeout while opening pipe")
690 # .--Misc----------------------------------------------------------------.
692 # | | \/ (_)___ ___ |
693 # | | |\/| | / __|/ __| |
694 # | | | | | \__ \ (__ |
695 # | |_| |_|_|___/\___| |
697 # +----------------------------------------------------------------------+
698 # | Various helper functions |
699 # '----------------------------------------------------------------------'
703 global _show_perfdata
704 _show_perfdata
= True
707 def disable_submit():
708 global _submit_to_core
709 _submit_to_core
= False
712 def _in_keepalive_mode():
714 return keepalive
.enabled()