Refactoring: Changed all check parameters starting with an 'o' to the new rulespec...
[check_mk.git] / agents / plugins / mk_filestats.py
blobc85a56b0e123febac582f2832edc8917cdc3038e
1 #!/usr/bin/env python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2018 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
26 r"""Check_MK Agent Plugin: mk_filestats
28 This is a Check_MK Agent plugin. If configured, it will be called by the
29 agent without any arguments.
31 usage: mk_filestats [OPTIONS]
33 Options:
34 -h, --help Show this help message and exit
35 -v, -vv Increase verbosity
36 -c, --config-file Read config file
37 (default: $MK_CONFDIR/filestats.cfg)
39 Details:
41 This plugin is configured using ini-style configuration files, i.e. a file with
42 sections started by lines of the form '[Section Name]' and consisting of
43 'key: value' (key-colon-space-value) lines.
45 Every section will be processed individually, and this processing can be
46 described by the following four phases:
48 1. Input:
49 This phase will gather (iterators returning) the files the plugin will
50 initiallly be aware of.
51 Option keys concerning this phase a prefixed 'input_'. Currently, there
52 is only one option available (which therefore must be present):
53 * ``input_patterns'':
54 Here you can specify one or more *globbing* patterns. If more than one
55 pattern is provided, they will be splitted according to shell rules
56 (using shlex.split). Every matching file will be dealt with, every
57 matching folder will recursively searched for *all* files.
58 2. Filtering:
59 This phase will filter the input files according to filters provided using
60 the option keys starting with 'filter_' of the corresponding configuration
61 section. The following are available (note that regex filters will allways
62 be applied before other types of filters)
63 * ``filter_regex: regular_expression''
64 Only further process a file, if its full path matches the given regular
65 expression. Everything following the characters 'filter_regex: ' will
66 considered one single regular expression.
67 * ``filter_regex_inverse: regular_expression''
68 Only further process a file, if its full path *does not* match the given
69 regular expression.
70 * ``filter_size: specification''
71 Only further process a file, if its size in bytes matches the provided
72 specification. The specification consists of one of the operators '>',
73 '<', '>=', '<=' and '==', directly followed by an integer.
74 E.g.: 'filter_size: <43008' will only match files smaller than 42 KB.
75 * ``filter_age: specification''
76 Only further process a file, if its age in seconds matches the filter.
77 See ``filter_size''.
78 3. Grouping
79 Currently every section in the configuration file will result in one
80 group in the produced output (indicated by '[[[output_type group_name]]]',
81 where the group name will be taken from the sections name in the config
82 file.
83 Future versions may provide means to create more than one group per
84 section (grouped by subfolder, for instance).
85 4. Output
86 You can choose from three different ways the output will be aggregated:
87 * ``output: file_stats''
88 Output the full information for on every single file that is processed.
89 This is the default.
90 * ``output: count_only''
91 Only count the files matching all of the provided filters. Unless
92 required for the filtering operation, no stat call on the files is
93 made.
94 * ``output: extremes_only''
95 Only report the youngest, oldest, smallest, and biggest files. In case
96 checks only require this information, we can signifficantly reduce data.
98 You should find an example configuration file at
99 '../cfg_examples/mk_filestats.cfg' relative to this file.
101 import re
102 import os
103 import sys
104 import time
105 import glob
106 import shlex
107 import logging
109 try:
110 import ConfigParser as configparser
111 except NameError: # Python3
112 import configparser
114 DEFAULT_CFG_FILE = os.path.join(os.getenv('MK_CONFDIR', ''), "filestats.cfg")
116 DEFAULT_CFG_SECTION = {"output": "file_stats"}
118 FILTER_SPEC_PATTERN = re.compile('(?P<operator>[<>=]+)(?P<value>.+)')
120 LOGGER = logging.getLogger(__name__)
123 def parse_arguments(argv=None):
124 if argv is None:
125 argv = sys.argv[1:]
127 parsed_args = {}
129 if "-h" in argv or "--help" in argv:
130 sys.stderr.write(__doc__)
131 sys.exit(0)
133 if "-v" in argv or "--verbose" in argv:
134 logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
135 elif "-vv" in argv or "--verbose" in argv:
136 logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(lineno)s: %(message)s")
137 else:
138 LOGGER.propagate = False
140 parsed_args['cfg_file'] = DEFAULT_CFG_FILE
141 for opt in ("-c", "--config-file"):
142 if opt in argv:
143 try:
144 parsed_args['cfg_file'] = argv[argv.index(opt) + 1]
145 except IndexError:
146 sys.stderr.write("missing value for option %r\n" % opt)
147 sys.exit(1)
148 return parsed_args
151 class LazyFileStats(object):
152 """Wrapper arount os.stat
154 Only call os.stat once, and not until corresponding attributes
155 are actually needed.
158 def __init__(self, path):
159 super(LazyFileStats, self).__init__()
160 LOGGER.debug("Creating LazyFileStats(%r)", path)
161 if not isinstance(path, unicode):
162 path = unicode(path, 'utf8')
163 self.path = os.path.abspath(path)
164 self.stat_status = None
165 self._size = None
166 self._age = None
167 self._m_time = None
169 def _stat(self):
170 if self.stat_status is not None:
171 return
173 LOGGER.debug("os.stat(%r)", self.path)
175 path = self.path.encode('utf8')
176 try:
177 stat = os.stat(path)
178 except OSError, exc:
179 self.stat_status = "file vanished" if exc.errno == 2 else str(exc)
180 return
182 try:
183 self._size = int(stat.st_size)
184 except ValueError, exc:
185 self.stat_status = str(exc)
186 return
188 try:
189 self._m_time = int(stat.st_mtime)
190 self._age = int(time.time()) - self._m_time
191 except ValueError, exc:
192 self.stat_status = str(exc)
193 return
195 self.stat_status = 'ok'
197 @property
198 def size(self):
199 self._stat()
200 return self._size
202 @property
203 def age(self):
204 self._stat()
205 return self._age
207 def __repr__(self):
208 return "LazyFileStats(%r)" % self.path
210 def dumps(self):
211 data = {
212 "type": "file",
213 "path": self.path,
214 "stat_status": self.stat_status,
215 "size": self.size,
216 "age": self.age,
217 "mtime": self._m_time
219 return repr(data)
223 # .--Input---------------------------------------------------------------.
224 # | ___ _ |
225 # | |_ _|_ __ _ __ _ _| |_ |
226 # | | || '_ \| '_ \| | | | __| |
227 # | | || | | | |_) | |_| | |_ |
228 # | |___|_| |_| .__/ \__,_|\__| |
229 # | |_| |
230 # +----------------------------------------------------------------------+
231 # | |
232 # '----------------------------------------------------------------------'
235 class PatternIterator(object):
236 """Recursively iterate over all files"""
238 def __init__(self, pattern_list):
239 super(PatternIterator, self).__init__()
240 self._patterns = [os.path.expanduser(p) for p in pattern_list]
242 def _iter_files(self, pattern):
243 for item in glob.iglob(pattern):
244 if os.path.isfile(item):
245 yield LazyFileStats(item)
246 # for now, we recurse unconditionally
247 else:
248 for lazy_file in self._iter_files(os.path.join(item, '*')):
249 yield lazy_file
251 def __iter__(self):
252 for pat in self._patterns:
253 LOGGER.info("processing pattern: %r", pat)
254 for lazy_file in self._iter_files(pat):
255 yield lazy_file
258 def get_file_iterator(config):
259 """get a LazyFileStats iterator"""
260 input_specs = [(k[6:], v) for k, v in config.items() if k.startswith('input_')]
261 if not input_specs:
262 raise ValueError("missing input definition")
263 if len(input_specs) != 1: # currently not supported
264 raise ValueError("multiple input definitions: %r" % input_specs)
265 variety, spec_string = input_specs[0]
266 if variety != "patterns":
267 raise ValueError("unknown input type: %r" % variety)
268 patterns = shlex.split(spec_string)
269 return PatternIterator(patterns)
273 # .--Filtering-----------------------------------------------------------.
274 # | _____ _ _ _ _ |
275 # | | ___(_) | |_ ___ _ __(_)_ __ __ _ |
276 # | | |_ | | | __/ _ \ '__| | '_ \ / _` | |
277 # | | _| | | | || __/ | | | | | | (_| | |
278 # | |_| |_|_|\__\___|_| |_|_| |_|\__, | |
279 # | |___/ |
280 # +----------------------------------------------------------------------+
281 # | |
282 # '----------------------------------------------------------------------'
285 class AbstractFilter(object):
286 """Abstract filter interface"""
288 def matches(self, lazy_file):
289 """return a boolean"""
290 raise NotImplementedError()
293 class AbstractNumericFilter(AbstractFilter):
294 """Common code for filtering by comparing integers"""
296 def __init__(self, spec_string):
297 super(AbstractNumericFilter, self).__init__()
298 try:
299 spec = FILTER_SPEC_PATTERN.match(spec_string).groupdict()
300 except AttributeError:
301 raise ValueError("unable to parse filter spec: %r" % spec_string)
302 operator, value = spec['operator'], spec['value']
303 self._value = int(value)
304 if operator not in ('<', '<=', '>', '>=', '=='):
305 raise ValueError("unknown operator for numeric filter: %r" % operator)
306 self._positive_cmp_results = []
307 if '<' in operator:
308 self._positive_cmp_results.append(1)
309 if '>' in operator:
310 self._positive_cmp_results.append(-1)
311 if '=' in operator:
312 self._positive_cmp_results.append(0)
314 def _matches_value(self, other_value):
315 """decide whether an integer value matches"""
316 return self._value.__cmp__(int(other_value)) in self._positive_cmp_results
318 def matches(self, lazy_file):
319 raise NotImplementedError()
322 class SizeFilter(AbstractNumericFilter):
323 def matches(self, lazy_file):
324 """apply AbstractNumericFilter ti file size"""
325 size = lazy_file.size
326 if size is not None:
327 return self._matches_value(size)
328 # Don't return vanished files.
329 # Other cases are a problem, and should be included
330 return lazy_file.stat_status != "file vanished"
333 class AgeFilter(AbstractNumericFilter):
334 def matches(self, lazy_file):
335 """apply AbstractNumericFilter ti file age"""
336 age = lazy_file.age
337 if age is not None:
338 return self._matches_value(age)
339 # Don't return vanished files.
340 # Other cases are a problem, and should be included
341 return lazy_file.stat_status != "file vanished"
344 class RegexFilter(AbstractFilter):
345 def __init__(self, regex_pattern):
346 super(RegexFilter, self).__init__()
347 LOGGER.debug("initializing with pattern: %r", regex_pattern)
348 if not isinstance(regex_pattern, unicode):
349 regex_pattern = unicode(regex_pattern, 'utf8')
350 self._regex = re.compile(regex_pattern, re.UNICODE)
352 def matches(self, lazy_file):
353 return bool(self._regex.match(lazy_file.path))
356 class InverseRegexFilter(RegexFilter):
357 def matches(self, lazy_file):
358 return not bool(self._regex.match(lazy_file.path))
361 def get_file_filters(config):
362 filter_specs = ((k[7:], v) for k, v in config.items() if k.startswith('filter_'))
364 filters = []
365 for variety, spec_string in filter_specs:
366 LOGGER.debug("found filter spec: %r", (variety, spec_string))
367 try:
368 filter_type = {
369 "regex": RegexFilter,
370 "regex_inverse": InverseRegexFilter,
371 "size": SizeFilter,
372 "age": AgeFilter,
373 }[variety]
374 except KeyError:
375 raise ValueError("unknown filter type: %r" % variety)
376 filters.append(filter_type(spec_string))
378 # add regex filters first to save stat calls
379 return sorted(filters, key=lambda x: not isinstance(x, RegexFilter))
382 def iter_filtered_files(file_filters, iterator):
383 for lazy_file in iterator:
384 if all(f.matches(lazy_file) for f in file_filters):
385 LOGGER.debug("matched all filters: %r", lazy_file)
386 yield lazy_file
390 # .--Grouping------------------------------------------------------------.
391 # | ____ _ |
392 # | / ___|_ __ ___ _ _ _ __ (_)_ __ __ _ |
393 # | | | _| '__/ _ \| | | | '_ \| | '_ \ / _` | |
394 # | | |_| | | | (_) | |_| | |_) | | | | | (_| | |
395 # | \____|_| \___/ \__,_| .__/|_|_| |_|\__, | |
396 # | |_| |___/ |
397 # +----------------------------------------------------------------------+
398 # | |
399 # '----------------------------------------------------------------------'
402 def grouping_single_group(section_name, files_iter):
403 """create one single group per section"""
404 yield section_name, files_iter
408 # .--Output--------------------------------------------------------------.
409 # | ___ _ _ |
410 # | / _ \ _ _| |_ _ __ _ _| |_ |
411 # | | | | | | | | __| '_ \| | | | __| |
412 # | | |_| | |_| | |_| |_) | |_| | |_ |
413 # | \___/ \__,_|\__| .__/ \__,_|\__| |
414 # | |_| |
415 # +----------------------------------------------------------------------+
416 # | |
417 # '----------------------------------------------------------------------'
420 def output_aggregator_count_only(group_name, files_iter):
421 yield '[[[count_only %s]]]' % group_name
422 count = sum(1 for __ in files_iter)
423 yield repr({"type": "summary", "count": count})
426 def output_aggregator_file_stats(group_name, files_iter):
427 yield "[[[file_stats %s]]]" % group_name
428 count = 0
429 for count, lazy_file in enumerate(files_iter, 1):
430 yield lazy_file.dumps()
431 yield repr({"type": "summary", "count": count})
434 def output_aggregator_extremes_only(group_name, files_iter):
435 yield "[[[extremes_only %s]]]" % group_name
437 count = 0
438 for count, lazy_file in enumerate(files_iter, 1):
439 if count == 1: # init
440 min_age = max_age = min_size = max_size = lazy_file
441 if lazy_file.age < min_age.age:
442 min_age = lazy_file
443 elif lazy_file.age > max_age.age:
444 max_age = lazy_file
445 if lazy_file.size < min_size.size:
446 min_size = lazy_file
447 elif lazy_file.size > max_size.size:
448 max_size = lazy_file
450 extremes = set((min_age, max_age, min_size, max_size)) if count else ()
451 for extreme_file in extremes:
452 yield extreme_file.dumps()
453 yield repr({"type": "summary", "count": count})
456 def get_output_aggregator(config):
457 output_spec = config.get("output")
458 try:
459 return {
460 "count_only": output_aggregator_count_only,
461 "extremes_only": output_aggregator_extremes_only,
462 "file_stats": output_aggregator_file_stats,
463 }[output_spec]
464 except KeyError:
465 raise ValueError("unknown 'output' spec: %r" % output_spec)
468 def write_output(groups, output_aggregator):
469 for group_name, group_files_iter in groups:
470 for line in output_aggregator(group_name, group_files_iter):
471 sys.stdout.write("%s\n" % line)
475 # .--Main----------------------------------------------------------------.
476 # | __ __ _ |
477 # | | \/ | __ _(_)_ __ |
478 # | | |\/| |/ _` | | '_ \ |
479 # | | | | | (_| | | | | | |
480 # | |_| |_|\__,_|_|_| |_| |
481 # | |
482 # +----------------------------------------------------------------------+
483 # | |
484 # '----------------------------------------------------------------------'
487 def iter_config_section_dicts(cfg_file=None):
488 if cfg_file is None:
489 cfg_file = DEFAULT_CFG_FILE
490 config = configparser.ConfigParser(DEFAULT_CFG_SECTION)
491 LOGGER.debug("trying to read %r", cfg_file)
492 files_read = config.read(cfg_file)
493 LOGGER.info("read configration file(s): %r", files_read)
495 for section_name in config.sections():
496 options = config.options(section_name)
497 yield section_name, {k: config.get(section_name, k) for k in options}
500 def main():
502 args = parse_arguments()
504 sys.stdout.write('<<<filestats:sep(0)>>>\n')
505 for section_name, config in iter_config_section_dicts(args['cfg_file']):
507 #1 input
508 files_iter = get_file_iterator(config)
510 #2 filtering
511 filters = get_file_filters(config)
512 filtered_files = iter_filtered_files(filters, files_iter)
514 #3 grouping
515 grouper = grouping_single_group
516 groups = grouper(section_name, filtered_files)
518 #4 output
519 output_aggregator = get_output_aggregator(config)
520 write_output(groups, output_aggregator)
523 if __name__ == "__main__":
524 main()