2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
33 from cmk
.utils
.regex
import regex
34 import cmk
.utils
.tty
as tty
35 import cmk
.utils
.debug
36 import cmk
.utils
.paths
37 from cmk
.utils
.exceptions
import MKGeneralException
, MKTimeout
39 import cmk_base
.crash_reporting
40 import cmk_base
.config
as config
41 import cmk_base
.console
as console
42 import cmk_base
.ip_lookup
as ip_lookup
43 import cmk_base
.check_api_utils
as check_api_utils
44 import cmk_base
.item_state
as item_state
45 import cmk_base
.checking
as checking
46 import cmk_base
.data_sources
as data_sources
47 import cmk_base
.check_table
as check_table
49 from cmk_base
.exceptions
import MKParseFunctionError
50 import cmk_base
.cleanup
51 import cmk_base
.check_utils
52 import cmk_base
.decorator
53 import cmk_base
.snmp_scan
as snmp_scan
55 # Run the discovery queued by check_discovery() - if any
56 _marked_host_discovery_timeout
= 120
58 # .--cmk -I--------------------------------------------------------------.
60 # | ___ _ __ ___ | | __ |_ _| |
61 # | / __| '_ ` _ \| |/ / _____| | |
62 # | | (__| | | | | | < |_____| | |
63 # | \___|_| |_| |_|_|\_\ |___| |
65 # +----------------------------------------------------------------------+
66 # | Functions for command line options -I and -II |
67 # '----------------------------------------------------------------------'
70 # Function implementing cmk -I and cmk -II. This is directly
71 # being called from the main option parsing code. The list of
72 # hostnames is already prepared by the main code. If it is
73 # empty then we use all hosts and switch to using cache files.
74 def do_discovery(hostnames
, check_plugin_names
, only_new
):
75 use_caches
= data_sources
.abstract
.DataSource
.get_may_use_cache_file()
77 console
.verbose("Discovering services on all hosts\n")
78 hostnames
= config
.all_active_realhosts()
81 console
.verbose("Discovering services on: %s\n" % ", ".join(hostnames
))
83 # For clusters add their nodes to the list. Clusters itself
84 # cannot be discovered but the user is allowed to specify
85 # them and we do discovery on the nodes instead.
89 nodes
= config
.nodes_of(h
)
91 cluster_hosts
.append(h
)
94 # Then remove clusters and make list unique
95 hostnames
= list(set([h
for h
in hostnames
if not config
.is_cluster(h
)]))
98 # Now loop through all hosts
99 for hostname
in hostnames
:
100 console
.section_begin(hostname
)
103 if cmk
.utils
.debug
.enabled():
108 ipaddress
= ip_lookup
.lookup_ip_address(hostname
)
110 # Usually we disable SNMP scan if cmk -I is used without a list of
111 # explicity hosts. But for host that have never been service-discovered
112 # yet (do not have autochecks), we enable SNMP scan.
113 do_snmp_scan
= not use_caches
or not _has_autochecks(hostname
)
115 sources
= _get_sources_for_discovery(hostname
, ipaddress
, check_plugin_names
,
116 do_snmp_scan
, on_error
)
117 multi_host_sections
= _get_host_sections_for_discovery(sources
, use_caches
=use_caches
)
119 _do_discovery_for(hostname
, ipaddress
, sources
, multi_host_sections
, check_plugin_names
,
122 except Exception as e
:
123 if cmk
.utils
.debug
.enabled():
125 console
.section_error("%s" % e
)
127 cmk_base
.cleanup
.cleanup_globals()
129 # Check whether or not the cluster host autocheck files are still
130 # existant. Remove them. The autochecks are only stored in the nodes
131 # autochecks files these days.
132 for hostname
in cluster_hosts
:
133 _remove_autochecks_file(hostname
)
136 def _do_discovery_for(hostname
, ipaddress
, sources
, multi_host_sections
, check_plugin_names
,
138 if not check_plugin_names
and not only_new
:
139 old_items
= [] # do not even read old file
141 old_items
= parse_autochecks_file(hostname
)
143 if not check_plugin_names
:
144 # In 'multi_host_sections = _get_host_sections_for_discovery(..)'
145 # we've already discovered the right check plugin names.
146 # _discover_services(..) would discover check plugin names again.
147 # In order to avoid a second discovery (SNMP data source would do
148 # another SNMP scan) we enforce this selection to be used.
149 check_plugin_names
= multi_host_sections
.get_check_plugin_names()
150 sources
.enforce_check_plugin_names(check_plugin_names
)
152 console
.step("Executing discovery plugins (%d)" % len(check_plugin_names
))
153 console
.vverbose(" Trying discovery with: %s\n" % ", ".join(check_plugin_names
))
154 new_items
= _discover_services(
155 hostname
, ipaddress
, sources
, multi_host_sections
, on_error
=on_error
)
157 # There are three ways of how to merge existing and new discovered checks:
158 # 1. -II without --checks=
159 # check_plugin_names is empty, only_new is False
160 # --> complete drop old services, only use new ones
161 # 2. -II with --checks=
162 # --> drop old services of that types
163 # check_plugin_names is not empty, only_new is False
165 # --> just add new services
168 # Parse old items into a dict (ct, item) -> paramstring
170 for check_plugin_name
, item
, paramstring
in old_items
:
171 # Take over old items if -I is selected or if -II
172 # is selected with --checks= and the check type is not
173 # one of the listed ones
174 if only_new
or (check_plugin_names
and check_plugin_name
not in check_plugin_names
):
175 result
[(check_plugin_name
, item
)] = paramstring
177 stats
, num_services
= {}, 0
178 for check_plugin_name
, item
, paramstring
in new_items
:
179 if (check_plugin_name
, item
) not in result
:
180 result
[(check_plugin_name
, item
)] = paramstring
181 stats
.setdefault(check_plugin_name
, 0)
182 stats
[check_plugin_name
] += 1
186 for (check_plugin_name
, item
), paramstring
in result
.items():
187 final_items
.append((check_plugin_name
, item
, paramstring
))
189 _save_autochecks_file(hostname
, final_items
)
191 found_check_plugin_names
= stats
.keys()
192 found_check_plugin_names
.sort()
194 if found_check_plugin_names
:
195 for check_plugin_name
in found_check_plugin_names
:
196 console
.verbose("%s%3d%s %s\n" % (tty
.green
+ tty
.bold
, stats
[check_plugin_name
],
197 tty
.normal
, check_plugin_name
))
198 console
.section_success("Found %d services" % num_services
)
200 console
.section_success("Found nothing%s" % (only_new
and " new" or ""))
203 # determine changed services on host.
204 # param mode: can be one of "new", "remove", "fixall", "refresh"
205 # param do_snmp_scan: if True, a snmp host will be scanned, otherwise uses only the check types
206 # previously discovereda
207 # param servic_filter: if a filter is set, it controls whether items are touched by the discovery.
208 # if it returns False for a new item it will not be added, if it returns
209 # False for a vanished item, that item is kept
210 def discover_on_host(mode
,
215 service_filter
=None):
216 counts
= {"added": 0, "removed": 0, "kept": 0}
218 if hostname
not in config
.all_active_realhosts():
219 return [0, 0, 0, 0], ""
221 if service_filter
is None:
222 service_filter
= lambda hostname
, check_plugin_name
, item
: True
227 # in "refresh" mode we first need to remove all previously discovered
228 # checks of the host, so that _get_host_services() does show us the
229 # new discovered check parameters.
230 if mode
== "refresh":
231 counts
["removed"] += remove_autochecks_of(hostname
) # this is cluster-aware!
233 if config
.is_cluster(hostname
):
236 ipaddress
= ip_lookup
.lookup_ip_address(hostname
)
238 sources
= _get_sources_for_discovery(
241 check_plugin_names
=None,
242 do_snmp_scan
=do_snmp_scan
,
245 multi_host_sections
= _get_host_sections_for_discovery(sources
, use_caches
=use_caches
)
247 # Compute current state of new and existing checks
248 services
= _get_host_services(
249 hostname
, ipaddress
, sources
, multi_host_sections
, on_error
=on_error
)
251 # Create new list of checks
253 for (check_plugin_name
, item
), (check_source
, paramstring
) in services
.items():
254 if check_source
in ("custom", "legacy", "active", "manual"):
255 continue # this is not an autocheck or ignored and currently not checked
256 # Note discovered checks that are shadowed by manual checks will vanish
259 if check_source
== "new":
260 if mode
in ("new", "fixall", "refresh") and service_filter(
261 hostname
, check_plugin_name
, item
):
263 new_items
[(check_plugin_name
, item
)] = paramstring
265 elif check_source
in ("old", "ignored"):
266 # keep currently existing valid services in any case
267 new_items
[(check_plugin_name
, item
)] = paramstring
270 elif check_source
== "vanished":
271 # keep item, if we are currently only looking for new services
272 # otherwise fix it: remove ignored and non-longer existing services
273 if mode
not in ("fixall",
274 "remove") or not service_filter(hostname
, check_plugin_name
, item
):
275 new_items
[(check_plugin_name
, item
)] = paramstring
278 counts
["removed"] += 1
280 # Silently keep clustered services
281 elif check_source
.startswith("clustered_"):
282 new_items
[(check_plugin_name
, item
)] = paramstring
285 raise MKGeneralException("Unknown check source '%s'" % check_source
)
286 set_autochecks_of(hostname
, new_items
)
289 raise # let general timeout through
291 except Exception as e
:
292 if cmk
.utils
.debug
.enabled():
295 return [counts
["added"], counts
["removed"], counts
["kept"],
296 counts
["added"] + counts
["kept"]], err
300 # .--Discovery Check-----------------------------------------------------.
302 # | | _ \(_)___ ___ ___| |__ ___ ___| | __ |
303 # | | | | | / __|/ __| / __| '_ \ / _ \/ __| |/ / |
304 # | | |_| | \__ \ (__ _ | (__| | | | __/ (__| < |
305 # | |____/|_|___/\___(_) \___|_| |_|\___|\___|_|\_\ |
307 # +----------------------------------------------------------------------+
308 # | Active check for checking undiscovered services. |
309 # '----------------------------------------------------------------------'
312 @cmk_base.decorator
.handle_check_mk_check_result("discovery", "Check_MK Discovery")
313 def check_discovery(hostname
, ipaddress
):
314 params
= discovery_check_parameters(hostname
) or \
315 default_discovery_check_parameters()
317 status
, infotexts
, long_infotexts
, perfdata
= 0, [], [], []
319 # In case of keepalive discovery we always have an ipaddress. When called as non keepalive
320 # ipaddress is always None
321 if ipaddress
is None and not config
.is_cluster(hostname
):
322 ipaddress
= ip_lookup
.lookup_ip_address(hostname
)
324 sources
= _get_sources_for_discovery(
327 check_plugin_names
=None,
328 do_snmp_scan
=params
["inventory_check_do_scan"],
331 multi_host_sections
= _get_host_sections_for_discovery(
332 sources
, use_caches
=data_sources
.abstract
.DataSource
.get_may_use_cache_file())
334 services
= _get_host_services(
335 hostname
, ipaddress
, sources
, multi_host_sections
, on_error
="raise")
337 need_rediscovery
= False
339 params_rediscovery
= params
.get("inventory_rediscovery", {})
341 if params_rediscovery
.get("service_whitelist", []) or\
342 params_rediscovery
.get("service_blacklist", []):
343 # whitelist. if none is specified, this matches everything
344 whitelist
= regex("|".join(
345 ["(%s)" % pat
for pat
in params_rediscovery
.get("service_whitelist", [".*"])]))
346 # blacklist. if none is specified, this matches nothing
347 blacklist
= regex("|".join(
348 ["(%s)" % pat
for pat
in params_rediscovery
.get("service_blacklist", ["(?!x)x"])]))
350 item_filters
= lambda hostname
, check_plugin_name
, item
:\
351 _discovery_filter_by_lists(hostname
, check_plugin_name
, item
, whitelist
, blacklist
)
355 for check_state
, title
, params_key
, default_state
in [
356 ("new", "unmonitored", "severity_unmonitored", config
.inventory_check_severity
),
357 ("vanished", "vanished", "severity_vanished", 0),
360 affected_check_plugin_names
= {}
364 for (check_plugin_name
, item
), (check_source
, _unused_paramstring
) in services
.items():
365 if check_source
== check_state
:
367 affected_check_plugin_names
.setdefault(check_plugin_name
, 0)
368 affected_check_plugin_names
[check_plugin_name
] += 1
370 if not unfiltered
and\
371 (item_filters
is None or item_filters(hostname
, check_plugin_name
, item
)):
374 long_infotexts
.append(
375 "%s: %s: %s" % (title
, check_plugin_name
,
376 config
.service_description(hostname
, check_plugin_name
, item
)))
378 if affected_check_plugin_names
:
379 info
= ", ".join(["%s:%d" % e
for e
in affected_check_plugin_names
.items()])
380 st
= params
.get(params_key
, default_state
)
381 status
= cmk_base
.utils
.worst_service_state(status
, st
)
383 "%d %s services (%s)%s" % (count
, title
, info
, check_api_utils
.state_markers
[st
]))
385 if params
.get("inventory_rediscovery", False):
386 mode
= params
["inventory_rediscovery"]["mode"]
388 ((check_state
== "new" and mode
in ( 0, 2, 3 )) or
389 (check_state
== "vanished" and mode
in ( 1, 2, 3 ))):
390 need_rediscovery
= True
392 infotexts
.append("no %s services found" % title
)
394 for (check_plugin_name
, item
), (check_source
, _unused_paramstring
) in services
.items():
395 if check_source
== "ignored":
396 long_infotexts
.append(
397 "ignored: %s: %s" % (check_plugin_name
,
398 config
.service_description(hostname
, check_plugin_name
, item
)))
400 _set_rediscovery_flag(hostname
, need_rediscovery
)
402 infotexts
.append("rediscovery scheduled")
404 # Add data source information to check results
405 for source
in sources
.get_data_sources():
406 source_state
, source_output
, _source_perfdata
= source
.get_summary_result()
407 # Do not output informational (state = 0) things. These information are shown by the "Check_MK" service
408 if source_state
!= 0:
409 status
= max(status
, source_state
)
410 infotexts
.append("[%s] %s" % (source
.id(), source_output
))
412 return status
, infotexts
, long_infotexts
, perfdata
415 # Compute the parameters for the discovery check for a host. Note:
416 # if the discovery check is disabled for that host, default parameters
418 def discovery_check_parameters(hostname
):
419 entries
= config
.host_extra_conf(hostname
, config
.periodic_discovery
)
423 elif config
.inventory_check_interval
:
424 # Support legacy global configurations
425 return default_discovery_check_parameters()
430 def default_discovery_check_parameters():
432 "check_interval": config
.inventory_check_interval
,
433 "severity_unmonitored": config
.inventory_check_severity
,
434 "severity_vanished": 0,
435 "inventory_check_do_scan": config
.inventory_check_do_scan
,
439 def _set_rediscovery_flag(hostname
, need_rediscovery
):
441 if not os
.path
.exists(filename
):
442 f
= open(filename
, "w")
445 autodiscovery_dir
= cmk
.utils
.paths
.var_dir
+ '/autodiscovery'
446 discovery_filename
= os
.path
.join(autodiscovery_dir
, hostname
)
448 if not os
.path
.exists(autodiscovery_dir
):
449 os
.makedirs(autodiscovery_dir
)
450 touch(discovery_filename
)
452 if os
.path
.exists(discovery_filename
):
454 os
.remove(discovery_filename
)
459 class DiscoveryTimeout(Exception):
463 def _handle_discovery_timeout():
464 raise DiscoveryTimeout()
467 def _set_discovery_timeout():
468 signal
.signal(signal
.SIGALRM
, _handle_discovery_timeout
)
469 # Add an additional 10 seconds as grace period
470 signal
.alarm(_marked_host_discovery_timeout
+ 10)
473 def _clear_discovery_timeout():
477 def _get_autodiscovery_dir():
478 return cmk
.utils
.paths
.var_dir
+ '/autodiscovery'
481 def discover_marked_hosts(core
):
482 console
.verbose("Doing discovery for all marked hosts:\n")
483 autodiscovery_dir
= _get_autodiscovery_dir()
485 if not os
.path
.exists(autodiscovery_dir
):
486 # there is obviously nothing to do
487 console
.verbose(" Nothing to do. %s is missing.\n" % autodiscovery_dir
)
491 end_time_ts
= now_ts
+ _marked_host_discovery_timeout
# don't run for more than 2 minutes
492 oldest_queued
= _queue_age()
493 all_hosts
= config
.all_configured_hosts()
494 hosts
= os
.listdir(autodiscovery_dir
)
496 console
.verbose(" Nothing to do. No hosts marked by discovery check.\n")
498 activation_required
= False
500 _set_discovery_timeout()
501 for hostname
in hosts
:
502 if _discover_marked_host(hostname
, all_hosts
, now_ts
, oldest_queued
):
503 activation_required
= True
505 if time
.time() > end_time_ts
:
507 " Timeout of %d seconds reached. Lets do the remaining hosts next time." %
508 _marked_host_discovery_timeout
)
510 except DiscoveryTimeout
:
513 _clear_discovery_timeout()
515 if activation_required
:
516 console
.verbose("\nRestarting monitoring core with updated configuration...\n")
517 if config
.monitoring_core
== "cmc":
518 cmk_base
.core
.do_reload(core
)
520 cmk_base
.core
.do_restart(core
)
523 def _discover_marked_host(hostname
, all_hosts
, now_ts
, oldest_queued
):
524 services_changed
= False
526 mode_table
= {0: "new", 1: "remove", 2: "fixall", 3: "refresh"}
528 console
.verbose("%s%s%s:\n" % (tty
.bold
, hostname
, tty
.normal
))
529 host_flag_path
= os
.path
.join(_get_autodiscovery_dir(), hostname
)
530 if hostname
not in all_hosts
:
532 os
.remove(host_flag_path
)
535 console
.verbose(" Skipped. Host does not exist in configuration. Removing mark.\n")
538 params
= discovery_check_parameters(hostname
) or default_discovery_check_parameters()
539 params_rediscovery
= params
.get("inventory_rediscovery", {})
540 if "service_blacklist" in params_rediscovery
or "service_whitelist" in params_rediscovery
:
541 # whitelist. if none is specified, this matches everything
542 whitelist
= regex("|".join(
543 ["(%s)" % pat
for pat
in params_rediscovery
.get("service_whitelist", [".*"])]))
544 # blacklist. if none is specified, this matches nothing
545 blacklist
= regex("|".join(
546 ["(%s)" % pat
for pat
in params_rediscovery
.get("service_blacklist", ["(?!x)x"])]))
547 item_filters
= lambda hostname
, check_plugin_name
, item
:\
548 _discovery_filter_by_lists(hostname
, check_plugin_name
, item
, whitelist
, blacklist
)
552 why_not
= _may_rediscover(params
, now_ts
, oldest_queued
)
554 redisc_params
= params
["inventory_rediscovery"]
555 console
.verbose(" Doing discovery with mode '%s'...\n" % mode_table
[redisc_params
["mode"]])
556 result
, error
= discover_on_host(
557 mode_table
[redisc_params
["mode"]],
559 do_snmp_scan
=params
["inventory_check_do_scan"],
561 service_filter
=item_filters
)
562 if error
is not None:
564 console
.verbose("failed: %s\n" % error
)
566 # for offline hosts the error message is empty. This is to remain
567 # compatible with the automation code
568 console
.verbose(" failed: host is offline\n")
570 new_services
, removed_services
, kept_services
, total_services
= result
571 if new_services
== 0 and removed_services
== 0 and kept_services
== total_services
:
572 console
.verbose(" nothing changed.\n")
575 " %d new, %d removed, %d kept, %d total services.\n" % (tuple(result
)))
576 if redisc_params
["activation"]:
577 services_changed
= True
579 # Now ensure that the discovery service is updated right after the changes
580 schedule_discovery_check(hostname
)
582 # delete the file even in error case, otherwise we might be causing the same error
583 # every time the cron job runs
585 os
.remove(host_flag_path
)
589 console
.verbose(" skipped: %s\n" % why_not
)
591 return services_changed
595 autodiscovery_dir
= _get_autodiscovery_dir()
597 for filename
in os
.listdir(autodiscovery_dir
):
598 oldest
= min(oldest
, os
.path
.getmtime(autodiscovery_dir
+ "/" + filename
))
602 def _may_rediscover(params
, now_ts
, oldest_queued
):
603 if "inventory_rediscovery" not in params
:
604 return "automatic discovery disabled for this host"
606 now
= time
.gmtime(now_ts
)
607 for start_hours_mins
, end_hours_mins
in params
["inventory_rediscovery"]["excluded_time"]:
608 start_time
= time
.struct_time(
609 (now
.tm_year
, now
.tm_mon
, now
.tm_mday
, start_hours_mins
[0], start_hours_mins
[1], 0,
610 now
.tm_wday
, now
.tm_yday
, now
.tm_isdst
))
612 end_time
= time
.struct_time((now
.tm_year
, now
.tm_mon
, now
.tm_mday
, end_hours_mins
[0],
613 end_hours_mins
[1], 0, now
.tm_wday
, now
.tm_yday
, now
.tm_isdst
))
615 if start_time
<= now
<= end_time
:
616 return "we are currently in a disallowed time of day"
618 if now_ts
- oldest_queued
< params
["inventory_rediscovery"]["group_time"]:
619 return "last activation is too recent"
624 def _discovery_filter_by_lists(hostname
, check_plugin_name
, item
, whitelist
, blacklist
):
625 description
= config
.service_description(hostname
, check_plugin_name
, item
)
626 return whitelist
.match(description
) is not None and\
627 blacklist
.match(description
) is None
631 # .--Helpers-------------------------------------------------------------.
633 # | | | | | ___| |_ __ ___ _ __ ___ |
634 # | | |_| |/ _ \ | '_ \ / _ \ '__/ __| |
635 # | | _ | __/ | |_) | __/ | \__ \ |
636 # | |_| |_|\___|_| .__/ \___|_| |___/ |
638 # +----------------------------------------------------------------------+
639 # | Various helper functions |
640 # '----------------------------------------------------------------------'
643 # TODO: Move to livestatus module!
644 def schedule_discovery_check(hostname
):
646 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
647 s
.connect(cmk
.utils
.paths
.livestatus_unix_socket
)
648 now
= int(time
.time())
649 if 'cmk-inventory' in config
.use_new_descriptions_for
:
650 command
= "SCHEDULE_FORCED_SVC_CHECK;%s;Check_MK Discovery;%d" % (hostname
, now
)
652 # TODO: Remove this old name handling one day
653 command
= "SCHEDULE_FORCED_SVC_CHECK;%s;Check_MK inventory;%d" % (hostname
, now
)
655 # Ignore missing check and avoid warning in cmc.log
656 if config
.monitoring_core
== "cmc":
659 s
.send("COMMAND [%d] %s\n" % (now
, command
))
661 if cmk
.utils
.debug
.enabled():
666 # .--Discovery-----------------------------------------------------------.
668 # | | _ \(_)___ ___ _____ _____ _ __ _ _ |
669 # | | | | | / __|/ __/ _ \ \ / / _ \ '__| | | | |
670 # | | |_| | \__ \ (_| (_) \ V / __/ | | |_| | |
671 # | |____/|_|___/\___\___/ \_/ \___|_| \__, | |
673 # +----------------------------------------------------------------------+
674 # | Core code of actual service discovery |
675 # '----------------------------------------------------------------------'
678 # Create a table of autodiscovered services of a host. Do not save
679 # this table anywhere. Do not read any previously discovered
680 # services. The table has the following columns:
683 # 3. Parameter string (not evaluated)
685 # This function does not handle:
687 # - disabled services
689 # This function *does* handle:
690 # - disabled check typess
692 # on_error is one of:
693 # "ignore" -> silently ignore any exception
694 # "warn" -> output a warning on stderr
695 # "raise" -> let the exception come through
696 def _discover_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
):
697 # Make hostname available as global variable in discovery functions
698 # (used e.g. by ps-discovery)
699 check_api_utils
.set_hostname(hostname
)
701 discovered_services
= []
703 for check_plugin_name
in sources
.get_check_plugin_names():
705 for item
, paramstring
in _execute_discovery(multi_host_sections
, hostname
,
706 ipaddress
, check_plugin_name
, on_error
):
707 discovered_services
.append((check_plugin_name
, item
, paramstring
))
708 except (KeyboardInterrupt, MKTimeout
):
710 except Exception as e
:
711 if on_error
== "raise":
713 elif on_error
== "warn":
714 console
.error("Discovery of '%s' failed: %s\n" % (check_plugin_name
, e
))
716 return discovered_services
718 except KeyboardInterrupt:
719 raise MKGeneralException("Interrupted by Ctrl-C.")
722 def _get_sources_for_discovery(hostname
, ipaddress
, check_plugin_names
, do_snmp_scan
, on_error
):
723 sources
= data_sources
.DataSources(hostname
, ipaddress
)
725 for source
in sources
.get_data_sources():
726 if isinstance(source
, data_sources
.SNMPDataSource
):
727 source
.set_on_error(on_error
)
728 source
.set_do_snmp_scan(do_snmp_scan
)
729 source
.set_use_snmpwalk_cache(False)
730 source
.set_ignore_check_interval(True)
731 source
.set_check_plugin_name_filter(snmp_scan
.gather_snmp_check_plugin_names
)
733 # When check types are specified via command line, enforce them and disable auto detection
734 if check_plugin_names
:
735 sources
.enforce_check_plugin_names(check_plugin_names
)
740 def _get_host_sections_for_discovery(sources
, use_caches
):
741 max_cachefile_age
= config
.inventory_max_cachefile_age
if use_caches
else 0
742 return sources
.get_host_sections(max_cachefile_age
)
745 def _execute_discovery(multi_host_sections
, hostname
, ipaddress
, check_plugin_name
, on_error
):
746 # Skip this check type if is ignored for that host
747 if config
.service_ignored(hostname
, check_plugin_name
, None):
751 discovery_function
= config
.check_info
[check_plugin_name
]["inventory_function"]
752 if discovery_function
is None:
753 discovery_function
= check_api_utils
.no_discovery_possible
755 raise MKGeneralException("No such check type '%s'" % check_plugin_name
)
757 # Now do the actual discovery
759 # TODO: There is duplicate code with checking.execute_check(). Find a common place!
761 section_content
= multi_host_sections
.get_section_content(
762 hostname
, ipaddress
, check_plugin_name
, for_discovery
=True)
763 except MKParseFunctionError
as e
:
764 if cmk
.utils
.debug
.enabled() or on_error
== "raise":
766 if x
[0] == item_state
.MKCounterWrapped
:
769 # re-raise the original exception to not destory the trace. This may raise a MKCounterWrapped
770 # exception which need to lead to a skipped check instead of a crash
771 raise x
[0], x
[1], x
[2]
773 elif on_error
== "warn":
774 section_name
= cmk_base
.check_utils
.section_name_of(check_plugin_name
)
776 "Exception while parsing agent section '%s': %s\n" % (section_name
, e
))
780 if section_content
is None: # No data for this check type
783 # In case of SNMP checks but missing agent response, skip this check.
784 # Special checks which still need to be called even with empty data
786 if not section_content
and cmk_base
.check_utils
.is_snmp_check(check_plugin_name
) \
787 and not config
.check_info
[check_plugin_name
]["handle_empty_info"]:
790 # Check number of arguments of discovery function. Note: This
791 # check for the legacy API will be removed after 1.2.6.
792 if len(inspect
.getargspec(discovery_function
).args
) == 2:
793 discovered_items
= discovery_function(
795 section_content
) # discovery is a list of pairs (item, current_value)
797 # New preferred style since 1.1.11i3: only one argument: section_content
798 discovered_items
= discovery_function(section_content
)
800 # tolerate function not explicitely returning []
801 if discovered_items
is None:
802 discovered_items
= []
804 # New yield based api style
805 elif not isinstance(discovered_items
, list):
806 discovered_items
= list(discovered_items
)
809 for entry
in discovered_items
:
810 if not isinstance(entry
, tuple):
812 "%s: Check %s returned invalid discovery data (entry not a tuple): %r\n" %
813 (hostname
, check_plugin_name
, repr(entry
)))
816 if len(entry
) == 2: # comment is now obsolete
817 item
, paramstring
= entry
818 elif len(entry
) == 3: # allow old school
819 item
, __
, paramstring
= entry
820 else: # we really don't want longer tuples (or 1-tuples).
822 "%s: Check %s returned invalid discovery data (not 2 or 3 elements): %r\n" %
823 (hostname
, check_plugin_name
, repr(entry
)))
826 # Check_MK 1.2.7i3 defines items to be unicode strings. Convert non unicode
827 # strings here seamless. TODO remove this conversion one day and replace it
828 # with a validation that item needs to be of type unicode
829 if isinstance(item
, str):
830 item
= config
.decode_incoming_string(item
)
832 description
= config
.service_description(hostname
, check_plugin_name
, item
)
834 if len(description
) == 0:
835 console
.error("%s: Check %s returned empty service description - ignoring it.\n" %
836 (hostname
, check_plugin_name
))
839 result
.append((item
, paramstring
))
841 except Exception as e
:
842 if on_error
== "warn":
844 " Exception in discovery function of check type '%s': %s" % (check_plugin_name
, e
))
845 elif on_error
== "raise":
852 # Creates a table of all services that a host has or could have according
853 # to service discovery. The result is a dictionary of the form
854 # (check_plugin_name, item) -> (check_source, paramstring)
855 # check_source is the reason/state/source of the service:
856 # "new" : Check is discovered but currently not yet monitored
857 # "old" : Check is discovered and already monitored (most common)
858 # "vanished" : Check had been discovered previously, but item has vanished
859 # "legacy" : Check is defined via legacy_checks
860 # "active" : Check is defined via active_checks
861 # "custom" : Check is defined via custom_checks
862 # "manual" : Check is a manual Check_MK check without service discovery
863 # "ignored" : discovered or static, but disabled via ignored_services
864 # "clustered_new" : New service found on a node that belongs to a cluster
865 # "clustered_old" : Old service found on a node that belongs to a cluster
866 # This function is cluster-aware
867 def _get_host_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
):
868 if config
.is_cluster(hostname
):
869 return _get_cluster_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
)
871 return _get_node_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
)
874 # Do the actual work for a non-cluster host or node
875 def _get_node_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
):
876 services
= _get_discovered_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
)
878 # Identify clustered services
879 for (check_plugin_name
, item
), (check_source
, paramstring
) in services
.items():
881 descr
= config
.service_description(hostname
, check_plugin_name
, item
)
882 except Exception as e
:
883 if on_error
== "raise":
885 elif on_error
== "warn":
886 console
.error("Invalid service description: %s\n" % e
)
890 if hostname
!= config
.host_of_clustered_service(hostname
, descr
):
891 if check_source
== "vanished":
892 del services
[(check_plugin_name
,
893 item
)] # do not show vanished clustered services here
895 services
[(check_plugin_name
, item
)] = ("clustered_" + check_source
, paramstring
)
897 _merge_manual_services(services
, hostname
, on_error
)
901 # Part of _get_node_services that deals with discovered services
902 def _get_discovered_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
):
903 # Create a dict from check_plugin_name/item to check_source/paramstring
906 # In 'multi_host_sections = _get_host_sections_for_discovery(..)'
907 # we've already discovered the right check plugin names.
908 # _discover_services(..) would discover check plugin names again.
909 # In order to avoid a second discovery (SNMP data source would do
910 # another SNMP scan) we enforce this selection to be used.
911 check_plugin_names
= multi_host_sections
.get_check_plugin_names()
912 sources
.enforce_check_plugin_names(check_plugin_names
)
914 # Handle discovered services -> "new"
915 new_items
= _discover_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
)
916 for check_plugin_name
, item
, paramstring
in new_items
:
917 services
.setdefault((check_plugin_name
, item
), ("new", paramstring
))
919 # Match with existing items -> "old" and "vanished"
920 old_items
= parse_autochecks_file(hostname
)
921 for check_plugin_name
, item
, paramstring
in old_items
:
922 if (check_plugin_name
, item
) not in services
:
923 services
[(check_plugin_name
, item
)] = ("vanished", paramstring
)
925 services
[(check_plugin_name
, item
)] = ("old", paramstring
)
930 # To a list of discovered services add/replace manual and active
931 # checks and handle ignoration
932 def _merge_manual_services(services
, hostname
, on_error
):
933 # Find manual checks. These can override discovered checks -> "manual"
934 manual_items
= check_table
.get_check_table(hostname
, skip_autochecks
=True)
935 for (check_plugin_name
, item
), (params
, descr
, _unused_deps
) in manual_items
.items():
936 services
[(check_plugin_name
, item
)] = ('manual', repr(params
))
938 # Add legacy checks -> "legacy"
939 legchecks
= config
.host_extra_conf(hostname
, config
.legacy_checks
)
940 for _unused_cmd
, descr
, _unused_perf
in legchecks
:
941 services
[('legacy', descr
)] = ('legacy', 'None')
943 # Add custom checks -> "custom"
944 custchecks
= config
.host_extra_conf(hostname
, config
.custom_checks
)
945 for entry
in custchecks
:
946 services
[('custom', entry
['service_description'])] = ('custom', 'None')
948 # Similar for 'active_checks', but here we have parameters
949 for acttype
, rules
in config
.active_checks
.items():
950 entries
= config
.host_extra_conf(hostname
, rules
)
951 for params
in entries
:
952 descr
= config
.active_check_service_description(hostname
, acttype
, params
)
953 services
[(acttype
, descr
)] = ('active', repr(params
))
955 # Handle disabled services -> "ignored"
956 for (check_plugin_name
, item
), (check_source
, paramstring
) in services
.items():
957 if check_source
in ["legacy", "active", "custom"]:
958 # These are ignored later in get_check_preview
959 # TODO: This needs to be cleaned up. The problem here is that service_description() can not
960 # calculate the description of active checks and the active checks need to be put into
961 # "[source]_ignored" instead of ignored.
965 descr
= config
.service_description(hostname
, check_plugin_name
, item
)
966 except Exception as e
:
967 if on_error
== "raise":
969 elif on_error
== "warn":
970 console
.error("Invalid service description: %s\n" % e
)
974 if config
.service_ignored(hostname
, check_plugin_name
, descr
):
975 services
[(check_plugin_name
, item
)] = ("ignored", paramstring
)
980 # Do the work for a cluster
981 def _get_cluster_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
):
982 nodes
= config
.nodes_of(hostname
)
984 # Get setting from cluster SNMP data source
986 for source
in sources
.get_data_sources():
987 if isinstance(source
, data_sources
.SNMPDataSource
):
988 do_snmp_scan
= source
.get_do_snmp_scan()
990 # Get services of the nodes. We are only interested in "old", "new" and "vanished"
991 # From the states and parameters of these we construct the final state per service.
994 node_ipaddress
= ip_lookup
.lookup_ip_address(node
)
995 node_sources
= _get_sources_for_discovery(
998 check_plugin_names
=sources
.get_enforced_check_plugin_names(),
999 do_snmp_scan
=do_snmp_scan
,
1003 services
= _get_discovered_services(node
, node_ipaddress
, node_sources
, multi_host_sections
,
1005 for (check_plugin_name
, item
), (check_source
, paramstring
) in services
.items():
1006 descr
= config
.service_description(hostname
, check_plugin_name
, item
)
1007 if hostname
== config
.host_of_clustered_service(node
, descr
):
1008 if (check_plugin_name
, item
) not in cluster_items
:
1009 cluster_items
[(check_plugin_name
, item
)] = (check_source
, paramstring
)
1011 first_check_source
, first_paramstring
= cluster_items
[(check_plugin_name
, item
)]
1012 if first_check_source
== "old":
1014 elif check_source
== "old":
1015 cluster_items
[(check_plugin_name
, item
)] = (check_source
, paramstring
)
1016 elif first_check_source
== "vanished" and check_source
== "new":
1017 cluster_items
[(check_plugin_name
, item
)] = ("old", first_paramstring
)
1018 elif check_source
== "vanished" and first_check_source
== "new":
1019 cluster_items
[(check_plugin_name
, item
)] = ("old", paramstring
)
1020 # In all other cases either both must be "new" or "vanished" -> let it be
1022 # Now add manual and active serivce and handle ignored services
1023 _merge_manual_services(cluster_items
, hostname
, on_error
)
1024 return cluster_items
1027 # Translates a parameter string (read from autochecks) to it's final value
1028 # (according to the current configuration)
1029 def resolve_paramstring(check_plugin_name
, paramstring
):
1030 check_context
= config
.get_check_context(check_plugin_name
)
1031 # TODO: Can't we simply access check_context[paramstring]?
1032 return eval(paramstring
, check_context
, check_context
)
1035 # Get the list of service of a host or cluster and guess the current state of
1036 # all services if possible
1037 # TODO: Can't we reduce the duplicate code here? Check out the "checking" code
1038 def get_check_preview(hostname
, use_caches
, do_snmp_scan
, on_error
):
1039 if config
.is_cluster(hostname
):
1042 ipaddress
= ip_lookup
.lookup_ip_address(hostname
)
1044 sources
= _get_sources_for_discovery(
1045 hostname
, ipaddress
, check_plugin_names
=None, do_snmp_scan
=do_snmp_scan
, on_error
=on_error
)
1047 multi_host_sections
= _get_host_sections_for_discovery(sources
, use_caches
=use_caches
)
1049 services
= _get_host_services(hostname
, ipaddress
, sources
, multi_host_sections
, on_error
)
1052 for (check_plugin_name
, item
), (check_source
, paramstring
) in services
.items():
1056 if check_source
not in ['legacy', 'active', 'custom']:
1057 # apply check_parameters
1059 if isinstance(paramstring
, str):
1060 params
= resolve_paramstring(check_plugin_name
, paramstring
)
1062 params
= paramstring
1064 raise MKGeneralException("Invalid check parameter string '%s'" % paramstring
)
1067 descr
= config
.service_description(hostname
, check_plugin_name
, item
)
1068 except Exception as e
:
1069 if on_error
== "raise":
1071 elif on_error
== "warn":
1072 console
.error("Invalid service description: %s\n" % e
)
1076 check_api_utils
.set_service(check_plugin_name
, descr
)
1077 section_name
= cmk_base
.check_utils
.section_name_of(check_plugin_name
)
1079 if check_plugin_name
not in config
.check_info
:
1080 continue # Skip not existing check silently
1084 section_content
= multi_host_sections
.get_section_content(
1085 hostname
, ipaddress
, section_name
, for_discovery
=True)
1086 except MKParseFunctionError
as e
:
1087 if cmk
.utils
.debug
.enabled() or on_error
== "raise":
1089 # re-raise the original exception to not destory the trace. This may raise a MKCounterWrapped
1090 # exception which need to lead to a skipped check instead of a crash
1091 raise x
[0], x
[1], x
[2]
1094 except Exception as e
:
1095 if cmk
.utils
.debug
.enabled():
1098 output
= "Error: %s" % e
1100 # TODO: Move this to a helper function
1101 if section_content
is None: # No data for this check type
1103 output
= "Received no data"
1105 if not section_content
and cmk_base
.check_utils
.is_snmp_check(check_plugin_name
) \
1106 and not config
.check_info
[check_plugin_name
]["handle_empty_info"]:
1108 output
= "Received no data"
1110 item_state
.set_item_state_prefix(check_plugin_name
, item
)
1112 if exitcode
is None:
1113 check_function
= config
.check_info
[check_plugin_name
]["check_function"]
1114 if check_source
!= 'manual':
1115 params
= check_table
.get_precompiled_check_parameters(
1117 config
.compute_check_parameters(hostname
, check_plugin_name
, item
, params
),
1120 params
= check_table
.get_precompiled_check_parameters(
1121 hostname
, item
, params
, check_plugin_name
)
1124 item_state
.reset_wrapped_counters()
1125 result
= checking
.sanitize_check_result(
1126 check_function(item
, checking
.determine_check_params(params
),
1128 cmk_base
.check_utils
.is_snmp_check(check_plugin_name
))
1129 item_state
.raise_counter_wrap()
1130 except item_state
.MKCounterWrapped
as e
:
1131 result
= (None, "WAITING - Counter based check, cannot be done offline")
1132 except Exception as e
:
1133 if cmk
.utils
.debug
.enabled():
1136 3, "UNKNOWN - invalid output from agent or error in check implementation")
1137 if len(result
) == 2:
1138 result
= (result
[0], result
[1], [])
1139 exitcode
, output
, perfdata
= result
1143 output
= "WAITING - %s check, cannot be done offline" % check_source
.title()
1146 if check_source
== "active":
1147 params
= resolve_paramstring(check_plugin_name
, paramstring
)
1149 if check_source
in ["legacy", "active", "custom"]:
1151 if config
.service_ignored(hostname
, None, descr
):
1152 check_source
= "%s_ignored" % check_source
1154 checkgroup
= config
.check_info
[check_plugin_name
]["group"]
1156 table
.append((check_source
, check_plugin_name
, checkgroup
, item
, paramstring
, params
, descr
,
1157 exitcode
, output
, perfdata
))
1163 # .--Autochecks----------------------------------------------------------.
1165 # | / \ _ _| |_ ___ ___| |__ ___ ___| | _____ |
1166 # | / _ \| | | | __/ _ \ / __| '_ \ / _ \/ __| |/ / __| |
1167 # | / ___ \ |_| | || (_) | (__| | | | __/ (__| <\__ \ |
1168 # | /_/ \_\__,_|\__\___/ \___|_| |_|\___|\___|_|\_\___/ |
1170 # +----------------------------------------------------------------------+
1171 # | Reading, parsing, writing, modifying autochecks files |
1172 # '----------------------------------------------------------------------'
1175 # Read autochecks, but do not compute final check parameters,
1176 # also return a forth column with the raw string of the parameters.
1177 # Returns a table with three columns:
1178 # 1. check_plugin_name
1180 # 3. parameter string, not yet evaluated!
1181 # TODO: use store.load_data_from_file()
1182 def parse_autochecks_file(hostname
):
1183 def split_python_tuple(line
):
1187 for i
, c
in enumerate(line
):
1194 quote
= None # end of quoted string
1195 elif c
in ['"', "'"] and not quote
:
1196 quote
= c
# begin of quoted string
1199 elif c
in ['(', '{', '[']:
1201 elif c
in [')', '}', ']']:
1208 return value
.strip(), rest
1210 return line
.strip(), None
1212 path
= "%s/%s.mk" % (cmk
.utils
.paths
.autochecks_dir
, hostname
)
1213 if not os
.path
.exists(path
):
1218 for line
in file(path
):
1222 if not line
.startswith("("):
1225 # drop everything after potential '#' (from older versions)
1227 if i
> 0: # make sure # is not contained in string
1229 if '"' not in rest
and "'" not in rest
:
1230 line
= line
[:i
].strip()
1232 if line
.endswith(","):
1234 line
= line
[1:-1] # drop brackets
1236 # First try old format - with hostname
1240 part
, line
= split_python_tuple(line
)
1245 parts
= parts
[1:] # drop hostname, legacy format with host in first column
1246 elif len(parts
) != 3:
1247 raise Exception("Invalid number of parts: %d (%r)" % (len(parts
), parts
))
1249 checktypestring
, itemstring
, paramstring
= parts
1251 item
= eval(itemstring
)
1252 # With Check_MK 1.2.7i3 items are now defined to be unicode strings. Convert
1253 # items from existing autocheck files for compatibility. TODO remove this one day
1254 if isinstance(item
, str):
1255 item
= config
.decode_incoming_string(item
)
1257 table
.append((eval(checktypestring
), item
, paramstring
))
1259 if cmk
.utils
.debug
.enabled():
1261 raise Exception("Invalid line %d in autochecks file %s" % (lineno
, path
))
1265 def _has_autochecks(hostname
):
1266 return os
.path
.exists(cmk
.utils
.paths
.autochecks_dir
+ "/" + hostname
+ ".mk")
1269 def _remove_autochecks_file(hostname
):
1270 filepath
= cmk
.utils
.paths
.autochecks_dir
+ "/" + hostname
+ ".mk"
1277 # FIXME TODO: Consolidate with automation.py automation_write_autochecks_file()
1278 def _save_autochecks_file(hostname
, items
):
1279 if not os
.path
.exists(cmk
.utils
.paths
.autochecks_dir
):
1280 os
.makedirs(cmk
.utils
.paths
.autochecks_dir
)
1281 filepath
= "%s/%s.mk" % (cmk
.utils
.paths
.autochecks_dir
, hostname
)
1282 out
= file(filepath
, "w")
1284 for check_plugin_name
, item
, paramstring
in items
:
1285 out
.write(" (%r, %r, %s),\n" % (check_plugin_name
, item
, paramstring
))
1289 def set_autochecks_of(hostname
, new_items
):
1290 # A Cluster does not have an autochecks file
1291 # All of its services are located in the nodes instead
1292 # So we cycle through all nodes remove all clustered service
1293 # and add the ones we've got from stdin
1294 if config
.is_cluster(hostname
):
1295 for node
in config
.nodes_of(hostname
):
1297 existing
= parse_autochecks_file(node
)
1298 for check_plugin_name
, item
, paramstring
in existing
:
1299 descr
= config
.service_description(node
, check_plugin_name
, item
)
1300 if hostname
!= config
.host_of_clustered_service(node
, descr
):
1301 new_autochecks
.append((check_plugin_name
, item
, paramstring
))
1303 for (check_plugin_name
, item
), paramstring
in new_items
.items():
1304 new_autochecks
.append((check_plugin_name
, item
, paramstring
))
1306 # write new autochecks file for that host
1307 _save_autochecks_file(node
, new_autochecks
)
1309 # Check whether or not the cluster host autocheck files are still
1310 # existant. Remove them. The autochecks are only stored in the nodes
1311 # autochecks files these days.
1312 _remove_autochecks_file(hostname
)
1314 existing
= parse_autochecks_file(hostname
)
1315 # write new autochecks file, but take paramstrings from existing ones
1316 # for those checks which are kept
1318 for ct
, item
, paramstring
in existing
:
1319 if (ct
, item
) in new_items
:
1320 new_autochecks
.append((ct
, item
, paramstring
))
1321 del new_items
[(ct
, item
)]
1323 for (ct
, item
), paramstring
in new_items
.items():
1324 new_autochecks
.append((ct
, item
, paramstring
))
1326 # write new autochecks file for that host
1327 _save_autochecks_file(hostname
, new_autochecks
)
1330 # Remove all autochecks of a host while being cluster-aware!
1331 def remove_autochecks_of(hostname
):
1333 nodes
= config
.nodes_of(hostname
)
1336 removed
+= _remove_autochecks_of_host(node
)
1338 removed
+= _remove_autochecks_of_host(hostname
)
1343 def _remove_autochecks_of_host(hostname
):
1344 old_items
= parse_autochecks_file(hostname
)
1347 for check_plugin_name
, item
, paramstring
in old_items
:
1348 descr
= config
.service_description(hostname
, check_plugin_name
, item
)
1349 if hostname
!= config
.host_of_clustered_service(hostname
, descr
):
1350 new_items
.append((check_plugin_name
, item
, paramstring
))
1353 _save_autochecks_file(hostname
, new_items
)