Cleanup config.nodes_of
[check_mk.git] / cmk_base / item_state.py
blob3d22d2f18ad9c4dd7abc4c47c44163adbd69ee9a
1 #!/usr/bin/env python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
26 """
27 These functions allow checks to keep a memory until the next time
28 the check is being executed. The most frequent use case is compu-
29 tation of rates from two succeeding counter values. This is done
30 via the helper function get_rate(). Averaging is another example
31 and done by get_average().
33 While a host is being checked this memory is kept in _cached_item_states.
34 That is a dictionary. The keys are unique to one check type and
35 item. The value is free form.
37 Note: The item state is kept in tmpfs and not reboot-persistant.
38 Do not store long-time things here. Also do not store complex
39 structures like log files or stuff.
40 """
42 import os
43 import traceback
45 import cmk.utils.paths
46 import cmk.utils.store
47 from cmk.utils.exceptions import MKGeneralException
48 import cmk_base.cleanup
50 # Constants for counters
51 SKIP = None
52 RAISE = False
53 ZERO = 0.0
55 g_last_counter_wrap = None #
56 g_suppress_on_wrap = True # Suppress check on wrap (raise an exception)
58 # e.g. do not suppress this check on check_mk -nv
61 class MKCounterWrapped(Exception):
62 def __init__(self, reason):
63 self.reason = reason
64 super(MKCounterWrapped, self).__init__(reason)
66 def __str__(self):
67 return self.reason
70 class CachedItemStates(object):
71 def __init__(self):
72 super(CachedItemStates, self).__init__()
73 self.reset()
75 def clear_all_item_states(self):
76 removed_item_state_keys = self._item_states.keys()
77 self.reset()
78 self._removed_item_state_keys = removed_item_state_keys
80 def reset(self):
81 # The actual cached data
82 self._item_states = {}
83 self._item_state_prefix = ()
84 self._last_mtime = None # timestamp of last modification
85 self._removed_item_state_keys = []
86 self._updated_item_states = {}
88 def load(self, hostname):
89 filename = cmk.utils.paths.counters_dir + "/" + hostname
90 try:
91 # TODO: refactoring. put these two values into a named tuple
92 self._item_states = cmk.utils.store.load_data_from_file(filename, default={}, lock=True)
93 self._last_mtime = os.stat(filename).st_mtime
94 finally:
95 cmk.utils.store.release_lock(filename)
97 # TODO: self._last_mtime needs be updated accordingly after the save_data_to_file operation
98 # right now, the current mechanism is sufficient enough, since the save() function is only
99 # called as the final operation, just before the lifecycle of the CachedItemState ends
100 def save(self, hostname):
101 """ The job of the save function is to update the item state on disk.
102 It simply returns, if it detects that the data wasn't changed at all since the last loading
103 If the data on disk has been changed in the meantime, the cached data is updated from disk.
104 Afterwards only the actual modifications (update/remove) are applied to the updated cached
105 data before it is written back to disk.
107 filename = cmk.utils.paths.counters_dir + "/" + hostname
108 if not self._removed_item_state_keys and not self._updated_item_states:
109 return
111 try:
112 if not os.path.exists(cmk.utils.paths.counters_dir):
113 os.makedirs(cmk.utils.paths.counters_dir)
115 cmk.utils.store.aquire_lock(filename)
116 last_mtime = os.stat(filename).st_mtime
117 if last_mtime != self._last_mtime:
118 self._item_states = cmk.utils.store.load_data_from_file(filename, default={})
120 # Remove obsolete keys
121 for key in self._removed_item_state_keys:
122 try:
123 del self._item_states[key]
124 except KeyError:
125 pass
127 # Add updated keys
128 self._item_states.update(self._updated_item_states)
130 cmk.utils.store.save_data_to_file(filename, self._item_states, pretty=False)
131 except Exception:
132 raise MKGeneralException("Cannot write to %s: %s" % (filename, traceback.format_exc()))
133 finally:
134 cmk.utils.store.release_lock(filename)
136 def clear_item_state(self, user_key):
137 key = self.get_unique_item_state_key(user_key)
138 self.remove_full_key(key)
140 def clear_item_states_by_full_keys(self, full_keys):
141 for key in full_keys:
142 self.remove_full_key(key)
144 def remove_full_key(self, full_key):
145 try:
146 self._removed_item_state_keys.append(full_key)
147 del self._item_states[full_key]
148 except KeyError:
149 pass
151 def get_item_state(self, user_key, default=None):
152 key = self.get_unique_item_state_key(user_key)
153 return self._item_states.get(key, default)
155 def set_item_state(self, user_key, state):
156 key = self.get_unique_item_state_key(user_key)
157 self._item_states[key] = state
158 self._updated_item_states[key] = state
160 def get_all_item_states(self):
161 return self._item_states
163 def get_item_state_prefix(self):
164 return self._item_state_prefix
166 def set_item_state_prefix(self, args):
167 self._item_state_prefix = args
169 def get_unique_item_state_key(self, user_key):
170 return self._item_state_prefix + (user_key,)
173 _cached_item_states = CachedItemStates()
176 def load(hostname):
177 _cached_item_states.reset()
178 _cached_item_states.load(hostname)
181 def save(hostname):
182 _cached_item_states.save(hostname)
185 def set_item_state(user_key, state):
186 """Store arbitrary values until the next execution of a check.
188 The user_key is the identifier of the stored value and needs
189 to be unique per service."""
190 _cached_item_states.set_item_state(user_key, state)
193 def get_item_state(user_key, default=None):
194 """Returns the currently stored item with the user_key.
196 Returns None or the given default value in case there
197 is currently no such item stored."""
198 return _cached_item_states.get_item_state(user_key, default)
201 def get_all_item_states():
202 """Returns all stored items of the host that is currently being checked."""
203 return _cached_item_states.get_all_item_states()
206 def clear_item_state(user_key):
207 """Deletes a stored matching the given key. This needs to be
208 the same key as used with set_item_state().
210 In case the given item does not exist, the function returns
211 without modification."""
212 _cached_item_states.clear_item_state(user_key)
215 def clear_item_states_by_full_keys(full_keys):
216 """Clears all stored items specified in full_keys.
218 The items are deleted by their full identifiers, not only the
219 names specified with set_item_state(). For checks this is
220 normally (<check_plugin_name>, <item>, <user_key>).
222 _cached_item_states.clear_item_states_by_full_keys(full_keys)
225 def cleanup_item_states():
226 """Clears all stored items of the host that is currently being checked."""
227 _cached_item_states.clear_all_item_states()
230 def set_item_state_prefix(*args):
231 _cached_item_states.set_item_state_prefix(args)
234 def get_item_state_prefix():
235 return _cached_item_states.get_item_state_prefix()
238 def _unique_item_state_key(user_key):
239 _cached_item_states.get_unique_item_state_key(user_key)
242 def continue_on_counter_wrap():
243 global g_suppress_on_wrap
244 g_suppress_on_wrap = False
247 # Idea (2): Check_MK should fetch a time stamp for each info. This should also be
248 # available as a global variable, so that this_time would be an optional argument.
249 def get_rate(user_key, this_time, this_val, allow_negative=False, onwrap=SKIP, is_rate=False):
250 try:
251 return _get_counter(user_key, this_time, this_val, allow_negative, is_rate)[1]
252 except MKCounterWrapped as e:
253 if onwrap is RAISE:
254 raise
255 elif onwrap is SKIP:
256 global g_last_counter_wrap
257 g_last_counter_wrap = e
258 return 0.0
259 else:
260 return onwrap
263 # Helper for get_rate(). Note: this function has been part of the official check API
264 # for a long time. So we cannot change its call syntax or remove it for the while.
265 def _get_counter(countername, this_time, this_val, allow_negative=False, is_rate=False):
266 old_state = get_item_state(countername, None)
267 set_item_state(countername, (this_time, this_val))
269 # First time we see this counter? Do not return
270 # any data!
271 if old_state is None:
272 if not g_suppress_on_wrap:
273 return 1.0, 0.0
274 raise MKCounterWrapped('Counter initialization')
276 last_time, last_val = old_state
277 timedif = this_time - last_time
278 if timedif <= 0: # do not update counter
279 if not g_suppress_on_wrap:
280 return 1.0, 0.0
281 raise MKCounterWrapped('No time difference')
283 if not is_rate:
284 valuedif = this_val - last_val
285 else:
286 valuedif = this_val
288 if valuedif < 0 and not allow_negative:
289 # Do not try to handle wrapper counters. We do not know
290 # wether they are 32 or 64 bit. It also could happen counter
291 # reset (reboot, etc.). Better is to leave this value undefined
292 # and wait for the next check interval.
293 if not g_suppress_on_wrap:
294 return 1.0, 0.0
295 raise MKCounterWrapped('Value overflow')
297 per_sec = float(valuedif) / timedif
298 return timedif, per_sec
301 def reset_wrapped_counters():
302 global g_last_counter_wrap
303 g_last_counter_wrap = None
306 # TODO: Can we remove this? (check API)
307 def last_counter_wrap():
308 return g_last_counter_wrap
311 def raise_counter_wrap():
312 if g_last_counter_wrap:
313 raise g_last_counter_wrap # pylint: disable=raising-bad-type
316 def get_average(itemname, this_time, this_val, backlog_minutes, initialize_zero=True):
317 """Compute average by gliding exponential algorithm
319 itemname : unique ID for storing this average until the next check
320 this_time : timestamp of new value
321 backlog : averaging horizon in minutes
322 initialize_zero : assume average of 0.0 when now previous average is stored
324 old_state = get_item_state(itemname, None)
326 # first call: take current value as average or assume 0.0
327 if old_state is None:
328 if initialize_zero:
329 this_val = 0
330 set_item_state(itemname, (this_time, this_val))
331 return this_val # avoid time diff of 0.0 -> avoid division by zero
333 # Get previous value and time difference
334 last_time, last_val = old_state
335 timedif = this_time - last_time
337 # Gracefully handle time-anomaly of target systems. We lose
338 # one value, but what then heck..
339 if timedif < 0:
340 timedif = 0
342 # Overflow error occurs if weigth exceeds 1e308 or falls below 1e-308
343 # Under the conditions 0<=percentile<=1, backlog_minutes>=1 and timedif>=0
344 # first case is not possible because weigth is max. 1.
345 # In the second case weigth goes to zero.
346 try:
347 # Compute the weight: We do it like this: First we assume that
348 # we get one sample per minute. And that backlog_minutes is the number
349 # of minutes we should average over. Then we want that the weight
350 # of the values of the last average minutes have a fraction of W%
351 # in the result and the rest until infinity the rest (1-W%).
352 # Then the weight can be computed as backlog_minutes'th root of 1-W
353 percentile = 0.50
355 weight_per_minute = (1 - percentile)**(1.0 / backlog_minutes)
357 # now let's compute the weight per second. This is done
358 weight = weight_per_minute**(timedif / 60.0)
360 except OverflowError:
361 weight = 0
363 new_val = last_val * weight + this_val * (1 - weight)
365 set_item_state(itemname, (this_time, new_val))
366 return new_val
369 cmk_base.cleanup.register_cleanup(cleanup_item_states)