Refactoring: Changed all check parameters starting with an 'o' to the new rulespec...
[check_mk.git] / checks / heartbeat_crm
blob81681dab5bf5803922a87c49eabec2d5c7a6d1dc
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Example outputs from agent:
28 # <<<heartbeat_crm>>>
29 # ============
30 # Last updated: Thu Jul 1 07:48:19 2010
31 # Current DC: mwp (118cc1e7-bbf3-4550-b820-cac372885be1)
32 # 2 Nodes configured.
33 # 2 Resources configured.
34 # ============
35 # Node: smwp (2395453b-d647-48ff-a908-a7cd76062265): online
36 # Node: mwp (118cc1e7-bbf3-4550-b820-cac372885be1): online
37 # Full list of resources:
38 # Resource Group: group_slapmaster
39 # resource_virtip1 (ocf::heartbeat:IPaddr): Started mwp
40 # resource_virtip2 (ocf::heartbeat:IPaddr): Started mwp
41 # resource_pingnodes (ocf::heartbeat:pingd): Started mwp
42 # resource_slapmaster (ocf::heartbeat:OpenLDAP): Started mwp
43 # resource_slapslave (ocf::heartbeat:OpenLDAP): Started smwp
45 # Nails down the DC to the node which is the DC during inventory. The check
46 # will report CRITICAL when another node becomes the DC during later checks.
47 # If set to "False" the check will be passed.
49 # Leave this option to be compatible with inventorized pre 1.2.5i6
50 heartbeat_crm_naildown = True
52 # Max age of "last updated"
54 # Leave this option to be compatible with inventorized pre 1.2.5i6
55 heartbeat_crm_default_max_age = 60
57 # Naildown the resources to the nodes which care about the resources during
58 # the inventory run
60 # Leave this option to be compatible with inventorized pre 1.2.5i6
61 heartbeat_crm_resources_naildown = True
63 # Holds a dict of settings which tell the inventory functions whether or not
64 # some options like the resources and DC role shal be nailed down to the
65 # node which holds these resources during inventory.
66 inventory_heartbeat_crm_rules = []
68 factory_settings["heartbeat_crm_default_levels"] = {
69 "max_age": heartbeat_crm_default_max_age,
73 def heartbeat_crm_parse_general(info):
74 last_updated = None
75 dc = None
76 num_nodes = None
77 num_resources = None
78 for line in info:
79 line_txt = ' '.join(line)
81 if line_txt.startswith("Last updated:"):
82 if "Last change:" in line_txt:
83 # Some versions seem to combine both lines
84 last_updated = line_txt[:line_txt.index("Last change:")].split(": ")[1].strip()
85 else:
86 last_updated = ' '.join(line[2:])
88 elif line_txt.startswith('Current DC:'):
89 dc = line[2]
91 elif "nodes and" in line_txt and "resources configured" in line_txt:
92 # Some versions put number of nodes and resources in one line
93 num_nodes = int(line[0])
94 num_resources = int(line[3])
96 elif ' '.join(line[1:3]).rstrip('.,').lower() == 'nodes configured':
97 num_nodes = int(line[0])
98 elif ' '.join(line[1:3]).rstrip('.,').lower() == 'resources configured':
99 num_resources = int(line[0])
100 return (last_updated, dc, num_nodes, num_resources)
103 # .--CRM-----------------------------------------------------------------.
104 # | ____ ____ __ __ |
105 # | / ___| _ \| \/ | |
106 # | | | | |_) | |\/| | |
107 # | | |___| _ <| | | | |
108 # | \____|_| \_\_| |_| |
109 # | |
110 # '----------------------------------------------------------------------'
113 def inventory_heartbeat_crm(info):
114 # Use these lines to gather the inventory and perform this check:
115 # ============
116 # Last updated: Thu Jul 1 07:48:19 2010
117 # Current DC: mwp (118cc1e7-bbf3-4550-b820-cac372885be1)
118 # 2 Nodes configured.
119 # 2 Resources configured.
120 # ============
122 # - Naildown the DC or not.
123 # - Check the number of nodes/resources
124 # - Check the age of "last updated"
125 settings = host_extra_conf_merged(host_name(), inventory_heartbeat_crm_rules)
126 try:
127 _last_updated, dc, num_nodes, num_resources = heartbeat_crm_parse_general(info)
128 except:
129 # In the case that CRM is not working, add it as a service and show the error later
130 dc, num_nodes, num_resources = 0, 0, 0
131 params = {
132 'num_nodes': num_nodes,
133 'num_resources': num_resources,
135 if settings.get('naildown_dc', False):
136 params['dc'] = dc
137 return [(None, params)]
140 def check_heartbeat_crm(item, params, info):
141 if not len(info) > 0:
142 return
144 first_line = ' '.join(info[0])
145 if first_line.lower().startswith(
146 "critical") or 'connection to cluster failed' in first_line.lower():
147 return 2, first_line
149 last_updated, dc, numNodes, numResources = heartbeat_crm_parse_general(info)
151 # Convert old tuple params (pre 1.2.5i6)
152 if isinstance(params, tuple):
153 params = {
154 'max_age': params[0],
155 'dc': params[1] != "" and params[1] or None,
156 'num_nodes': params[2] != -1 and params[2] or None,
157 'num_resources': params[3] != -1 and params[3] or None,
160 # Check the freshness of the crm_mon output and terminate with CRITICAL
161 # when too old information are found
162 dt = utc_mktime(time.strptime(last_updated, '%a %b %d %H:%M:%S %Y'))
163 now = time.time()
164 delta = now - dt
165 if delta > params['max_age']:
166 return 3, 'Ignoring reported data (Status output too old: %s)' % get_age_human_readable(
167 delta)
169 status, output = 0, ''
171 # Check for correct DC when enabled
172 if params.get('dc') is None or dc == params['dc']:
173 output += 'DC: %s, ' % dc
174 else:
175 output += 'DC: %s (Expected %s (!!)), ' % (dc, params['dc'])
176 status = 2
178 # Check for number of nodes when enabled
179 if params['num_nodes'] is not None:
180 if numNodes == params['num_nodes']:
181 output += 'Nodes: %d, ' % numNodes
182 else:
183 output += 'Nodes: %d (Expected %d (!!)), ' % (numNodes, params['num_nodes'])
184 status = 2
186 # Check for number of resources when enabled
187 if params['num_resources'] is not None:
188 if numResources == params['num_resources']:
189 output += 'Resources: %d, ' % numResources
190 else:
191 output += 'Resources: %d (Expected %d (!!)), ' % (numResources, params['num_resources'])
192 status = 2
194 return (status, output.rstrip(', '))
197 check_info["heartbeat_crm"] = {
198 'check_function': check_heartbeat_crm,
199 'inventory_function': inventory_heartbeat_crm,
200 'service_description': 'Heartbeat CRM General',
201 'group': 'heartbeat_crm',
202 'default_levels_variable': 'heartbeat_crm_default_levels',
206 # .--Resources-----------------------------------------------------------.
207 # | ____ |
208 # | | _ \ ___ ___ ___ _ _ _ __ ___ ___ ___ |
209 # | | |_) / _ \/ __|/ _ \| | | | '__/ __/ _ \/ __| |
210 # | | _ < __/\__ \ (_) | |_| | | | (_| __/\__ \ |
211 # | |_| \_\___||___/\___/ \__,_|_| \___\___||___/ |
212 # | |
213 # '----------------------------------------------------------------------'
216 def heartbeat_crm_parse_resources(info):
217 blockStart = False
218 resources = {}
219 resource = ''
220 mode = 'single'
221 for parts in info:
222 line = " ".join(parts)
224 if line.lower() == 'failed actions:':
225 blockStart = False
226 elif not blockStart and line == 'Full list of resources:':
227 blockStart = True
228 elif blockStart:
229 if line.startswith('Resource Group:'):
230 # Resource group
231 resources[parts[2]] = []
232 resource = parts[2]
233 mode = 'resourcegroup'
234 elif line.startswith('Clone Set:'):
235 # Clone set
236 resources[parts[2]] = []
237 resource = parts[2]
238 mode = 'cloneset'
239 elif line.startswith('Master/Slave Set:'):
240 # Master/Slave set
241 resources[parts[2]] = []
242 resource = parts[2]
243 mode = 'masterslaveset'
244 elif line.startswith('_'):
245 # consider inconsistent agent output in clone set lines
246 fixed_parts = parts[1:]
247 if parts[0] != "_":
248 fixed_parts.insert(0, parts[0].lstrip('_'))
250 # Resource group or set member
251 if mode == 'resourcegroup':
252 resources[resource].append(fixed_parts)
253 elif mode == 'cloneset':
254 if fixed_parts[0] == 'Started:':
255 resources[resource].append(
256 [resource, 'Clone', 'Started', fixed_parts[2:-1]])
257 elif mode == 'masterslaveset':
258 if fixed_parts[0] == 'Masters:':
259 resources[resource].append([resource, 'Master', 'Started', fixed_parts[2]])
260 if fixed_parts[0] == 'Slaves:':
261 resources[resource].append([resource, 'Slave', 'Started', fixed_parts[2]])
262 else:
263 # Single resource
264 resources[parts[0]] = [parts]
266 return resources
269 def inventory_heartbeat_crm_resources(info):
270 # Full list of resources:
271 # Resource Group: group_slapmaster
272 # resource_virtip1 (ocf::heartbeat:IPaddr): Started mwp
273 # resource_virtip2 (ocf::heartbeat:IPaddr): Started mwp
274 # resource_pingnodes (ocf::heartbeat:pingd): Started mwp
275 # resource_slapmaster (ocf::heartbeat:OpenLDAP): Started mwp
276 # resource_slapslave (ocf::heartbeat:OpenLDAP): Started smwp
277 inventory = []
278 settings = host_extra_conf_merged(host_name(), inventory_heartbeat_crm_rules)
279 for name, resources in heartbeat_crm_parse_resources(info).iteritems():
280 # In naildown mode only resources which are started somewhere can be
281 # inventorized
282 if settings.get('naildown_resources', False) and resources[0][2] != 'Stopped':
283 inventory.append((name, '"%s"' % resources[0][3]))
284 else:
285 inventory.append((name, None))
286 return inventory
289 def check_heartbeat_crm_resources(item, target_node, info):
290 output = ''
291 status = 0
293 resources = None
294 for name, this_resources in heartbeat_crm_parse_resources(info).items():
295 if name == item:
296 resources = this_resources
297 if resources is None:
298 return
300 for resource in resources:
301 output += ' '.join([isinstance(p, list) and repr(p) or p for p in resource])
302 if len(resource) == 3 or resource[2] != 'Started':
303 status = 2
304 output += ' (Resource is in state "%s" (!!))' % resource[2]
305 elif target_node and target_node != resource[3] and resource[1] != 'Slave' and resource[
306 1] != 'Clone':
307 status = 2
308 output += ' (Expected node: %s (!!))' % target_node
309 output += ', '
311 if not output:
312 output = "no resources found"
313 return (status, output.rstrip(', '))
316 check_info["heartbeat_crm.resources"] = {
317 'check_function': check_heartbeat_crm_resources,
318 'inventory_function': inventory_heartbeat_crm_resources,
319 'service_description': 'Heartbeat CRM %s',
320 'group': 'heartbeat_crm_resources',