Refactoring: Changed all check parameters starting with an 'o' to the new rulespec...
[check_mk.git] / checks / oracle_asm_diskgroup
blobc8d36d04f1bb4e088cbf98f57d47d24b666c2fb4
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # future todos in checkcode
28 # - RAC: 1 of 3 nodes has a DISMOUNTED DG. This is not a CRIT!
30 # Example output from asmcmd lsdg:
31 # State Type Rebal Sector Block AU Total_MB Free_MB Req_mir_free_MB Usable_file_MB Offline_disks Voting_files Name
32 # MOUNTED NORMAL N 512 4096 1048576 512000 92888 0 46444 0 N DATA/
33 # MOUNTED NORMAL N 512 4096 1048576 3072 2146 309 918 0 Y OCR_VOTE/
34 # DISMOUNTED N 0 0 0 0 0 0 0 0 N DB_DG1/
35 # DISMOUNTED N 0 4096 0 0 0 0 0 0 N ABC/
36 # MOUNTED EXTERN N 512 4096 1048576 2047984 163379 0 163379 0 N XYZ/
37 # MOUNTED EXTERN N 512 4096 1048576 307092 291710 0 291710 0 N HUHU/
38 # DISMOUNTED N 0 4096 0 0 0 0 0 0 N FOO/
39 # DISMOUNTED N 0 4096 0 0 0 0 0 0 N BAR/
41 # The agent section <<<oracle_asm_diskgroup>>> does not output the header line
43 # new format with Failuregroup details:
44 # state type dgname block au req_mir_free_mb total_mb free_mb fg_name voting_files fg_type offline_disks fg_min_repair_tima fg_disks
46 # MOUNTED|EXTERN|FRA|4096|4194304|0|10236|4468|FRA01|N|REGULAR|0|8640000|1
47 # MOUNTED|EXTERN|GRID|4096|4194304|0|5112|5016|GRID01|N|REGULAR|0|8640000|1
48 # MOUNTED|NORMAL|DATA|4096|4194304|102396|614376|476280|NS1|N|REGULAR|0|8640000|3
49 # MOUNTED|NORMAL|DATA|4096|4194304|102396|614376|476280|NS2|N|REGULAR|0|8640000|3
51 factory_settings["asm_diskgroup_default_levels"] = {
52 "levels": (80.0, 90.0), # warn/crit in percent
53 "magic_normsize": 20, # Standard size if 20 GB
54 "levels_low": (50.0, 60.0), # Never move warn level below 50% due to magic factor
55 "trend_range": 24,
56 "trend_perfdata": True, # do send performance data for trends
57 "req_mir_free": False, # Ignore Requirre mirror free space in DG
61 def parse_oracle_asm_diskgroup(info):
62 parsed = {}
64 first_node = ''
65 for line in info:
67 # Filuregroups are usually REGULAR.
68 # Other types are possible from Version 11.2 onwards
69 fg_type = 'REGULAR'
71 dgstate = line[1]
73 if dgstate == "DISMOUNTED":
74 dgtype = None
75 index = 2
77 if len(line) == 15:
79 # work arround for new format with '|'
80 # => we get a clean output from agent. no need to correct it with index
81 index = 3
83 elif dgstate == "MOUNTED":
84 dgtype = line[2]
85 index = 3
87 else:
88 continue
90 stripped_line = line[index:]
92 if len(stripped_line) == 10:
93 _rebal, _sector, _block, _au, total_mb, free_mb, req_mir_free_mb, \
94 _usable_file_mb, offline_disks, dgname = stripped_line
95 voting_files = "N"
97 elif len(stripped_line) == 11:
98 _rebal, _sector, _block, _au, total_mb, free_mb, req_mir_free_mb, \
99 _usable_file_mb, offline_disks, voting_files, dgname = stripped_line
101 elif len(stripped_line) == 12:
102 # new format with Failuregroup details
103 dgname, _block, _au, req_mir_free_mb, total_mb, free_mb, \
104 fg_name, voting_files, fg_type, offline_disks, fg_min_repair_tima, fg_disks = stripped_line
106 else:
107 continue
109 dgname = dgname.rstrip("/")
111 # only use data from 1. node in agent output
112 # => later calculation of DG size is much easier
114 # todo: RAC with mounted DG on 2 of 3 nodes. => Problem when first_node has the DISMOUNTED DG
115 # the old agent formats without '|' are really painful here, because we need the DG at this
116 # point to find a possible node with mounted DG.
118 node_name = line[0]
119 if first_node == '':
120 first_node = node_name
122 elif first_node <> node_name:
123 continue
125 if len(stripped_line) <> 12:
127 # old format without fg data
128 parsed.setdefault(
129 dgname, {
130 "dgstate": dgstate,
131 "dgtype": dgtype,
132 "total_mb": total_mb,
133 "free_mb": free_mb,
134 "req_mir_free_mb": req_mir_free_mb,
135 "offline_disks": offline_disks,
136 "voting_files": voting_files,
139 else:
141 if dgstate == "DISMOUNTED":
143 # we don't have any detail data for the fg
144 # => add dummy fg for format detection in check
145 this_failgroup = {}
147 else:
149 this_failgroup = {
150 "fg_name": fg_name,
151 "fg_voting_files": voting_files,
152 "fg_type": fg_type,
153 "fg_free_mb": int(free_mb),
154 "fg_total_mb": int(total_mb),
155 "fg_disks": int(fg_disks),
156 "fg_min_repair_tima": int(fg_min_repair_tima),
159 failgroups = []
161 if dgname in parsed:
163 # eppend entry to failgroups
164 failgroups = parsed[dgname]["failgroups"]
165 failgroups.append(this_failgroup)
167 else:
168 failgroups.append(this_failgroup)
170 parsed.setdefault(
171 dgname, {
172 "dgstate": dgstate,
173 "dgtype": dgtype,
174 "total_mb": total_mb,
175 "free_mb": free_mb,
176 "req_mir_free_mb": req_mir_free_mb,
177 "offline_disks": offline_disks,
178 "voting_files": voting_files,
179 "failgroups": failgroups,
181 return parsed
184 def inventory_oracle_asm_diskgroup(parsed):
185 for asm_diskgroup_name, attrs in parsed.items():
186 if attrs["dgstate"] in ["MOUNTED", "DISMOUNTED"]:
187 yield asm_diskgroup_name, {}
190 def check_oracle_asm_diskgroup(item, params, parsed):
191 if item in parsed:
192 data = parsed[item]
194 dgstate = data["dgstate"]
195 dgtype = data["dgtype"]
196 total_mb = 0
197 free_mb = 0
198 req_mir_free_mb = data["req_mir_free_mb"]
199 offline_disks = data["offline_disks"]
200 voting_files = data["voting_files"]
202 if dgstate == "DISMOUNTED":
203 return 2, "Diskgroup dismounted"
205 add_text = ""
207 if "failgroups" in data:
209 # => New agentformat!
211 fg_count = len(data["failgroups"])
213 # dg_sizefactor depends on dg_type and fg_count
215 if dgtype == 'EXTERN':
216 dg_sizefactor = 1
218 elif dgtype == 'NORMAL':
220 if fg_count == 1:
222 # we miss the 2nd requirred fg.
223 # => factor is down from 2 to 1
224 dg_sizefactor = 1
226 else:
227 dg_sizefactor = 2
229 elif dgtype == 'HIGH':
231 if fg_count <= 3:
233 # we are under the minimum requirred fgs for the dg.
234 dg_sizefactor = fg_count
236 else:
237 dg_sizefactor = 3
239 dg_votecount = 0
240 dg_disks = 0
242 # 100 days => no disk in repair time
243 dg_min_repair = 8640000
245 fg_uniform_size = True
246 last_total = -1
248 # check for some details against the failure groups
249 for fgitem in data["failgroups"]:
251 # count number of disks over all fgs
252 dg_disks += fgitem["fg_disks"]
254 if fgitem['fg_voting_files'] == 'Y':
255 dg_votecount += 1
257 dg_min_repair = min(dg_min_repair, fgitem['fg_min_repair_tima'])
259 # this is the size without the dg_sizefactor
260 free_mb += fgitem["fg_free_mb"]
261 total_mb += fgitem["fg_total_mb"]
263 # check uniform size of failure-groups. 5% difference is ok
264 if last_total == -1:
265 last_total = fgitem["fg_total_mb"]
267 # ignore failure-groups with Voting-Files
268 # => exadata use special failure-groups for Voting with different size
269 # => Ignore QUORUM failure-groups. They cannot store regular data!
270 elif fgitem['fg_type'] == 'REGULAR' and fgitem['fg_voting_files'] == 'N' \
271 and fgitem["fg_total_mb"]*0.95 <= last_total >= fgitem["fg_total_mb"]*1.05:
272 fg_uniform_size = False
274 else:
276 # work on old agentformat
278 total_mb = data["total_mb"]
279 free_mb = data["free_mb"]
281 # => some estimates with possible errors are expected. Use new agentformat for correct results
282 if dgtype == 'EXTERN':
283 dg_sizefactor = 1
285 elif dgtype in ('NORMAL', 'HIGH'):
287 # old plugin format has limitations when NORMAL or HIGH redundancy is found
288 add_text += ', old plugin data, possible wrong used and free space'
290 if dgtype == 'NORMAL':
291 if voting_files == 'Y':
292 # NORMAL Redundancy Disk-Groups with Voting requires 3 Failgroups
293 dg_sizefactor = 3
294 else:
295 dg_sizefactor = 2
297 elif dgtype == 'HIGH':
298 if voting_files == 'Y':
299 # HIGH Redundancy Disk-Groups with Voting requires 5 Failgroups
300 dg_sizefactor = 5
301 else:
302 dg_sizefactor = 3
304 total_mb = int(total_mb) / dg_sizefactor
305 free_space_mb = int(free_mb) / dg_sizefactor
307 if params.get('req_mir_free'):
308 req_mir_free_mb = int(req_mir_free_mb)
309 if req_mir_free_mb < 0:
310 # requirred mirror free space could be negative!
311 req_mir_free_mb = 0
313 add_text = ', required mirror free space used'
315 status, infotext, perfdata = df_check_filesystem_single(item, int(total_mb), free_space_mb,
316 0, None, None, params)
317 if dgtype is not None:
318 infotext += ', %s redundancy' % dgtype.lower()
320 if "failgroups" in data:
322 # => New agentformat!
324 infotext += ', %i disks' % dg_disks
326 if dgtype <> 'EXTERN':
328 # EXTERN Redundancy has only 1 FG. => useless information
329 infotext += ' in %i failgroups' % fg_count
331 if not fg_uniform_size:
333 infotext += ', failgroups with unequal size'
335 if dg_votecount > 0:
336 votemarker = ''
337 if (dgtype == 'HIGH' and dg_votecount < 5):
339 # HIGH redundancy allows a loss of 2 votes. => 1 is only a WARN
340 status = min(status, 1)
341 votemarker = ', not enough votings, 5 expected (!)'
343 elif (dgtype == 'NORMAL' and dg_votecount < 3) \
344 or (dgtype == 'HIGH' and dg_votecount < 4):
346 status = max(status, 2)
347 votemarker = ', not enough votings, 3 expected (!!)'
349 infotext += ', %i votings' % dg_votecount
350 infotext += votemarker
352 if dg_min_repair < 8640000:
354 # no need to set a state due to offline disks
355 infotext += ', disk repair timer for offline disks at %s (!)' % get_age_human_readable(
356 dg_min_repair)
358 infotext += add_text
360 offline_disks = int(offline_disks)
361 if offline_disks > 0:
362 status = max(2, status)
363 infotext += ', %d Offline disks found(!!)' % offline_disks
365 return status, infotext, perfdata
367 # In case of missing information we assume that the ASM-Instance is
368 # checked at a later time.
369 # This reduce false notifications for not running ASM-Instances
370 raise MKCounterWrapped("Diskgroup %s not found" % item)
373 check_info["oracle_asm_diskgroup"] = {
374 'parse_function': parse_oracle_asm_diskgroup,
375 'inventory_function': inventory_oracle_asm_diskgroup,
376 'check_function': check_oracle_asm_diskgroup,
377 'service_description': 'ASM Diskgroup %s',
378 'has_perfdata': True,
379 'node_info': True,
380 'group': 'asm_diskgroup',
381 'default_levels_variable': 'asm_diskgroup_default_levels',
382 "includes": ["df.include", "size_trend.include"],