2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2018 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
26 r
"""Check_MK Agent Plugin: mk_filestats
28 This is a Check_MK Agent plugin. If configured, it will be called by the
29 agent without any arguments.
31 usage: mk_filestats [OPTIONS]
34 -h, --help Show this help message and exit
35 -v, -vv Increase verbosity
36 -c, --config-file Read config file
37 (default: $MK_CONFDIR/filestats.cfg)
41 This plugin is configured using ini-style configuration files, i.e. a file with
42 sections started by lines of the form '[Section Name]' and consisting of
43 'key: value' (key-colon-space-value) lines.
45 Every section will be processed individually, and this processing can be
46 described by the following four phases:
49 This phase will gather (iterators returning) the files the plugin will
50 initiallly be aware of.
51 Option keys concerning this phase a prefixed 'input_'. Currently, there
52 is only one option available (which therefore must be present):
54 Here you can specify one or more *globbing* patterns. If more than one
55 pattern is provided, they will be splitted according to shell rules
56 (using shlex.split). Every matching file will be dealt with, every
57 matching folder will recursively searched for *all* files.
59 This phase will filter the input files according to filters provided using
60 the option keys starting with 'filter_' of the corresponding configuration
61 section. The following are available (note that regex filters will allways
62 be applied before other types of filters)
63 * ``filter_regex: regular_expression''
64 Only further process a file, if its full path matches the given regular
65 expression. Everything following the characters 'filter_regex: ' will
66 considered one single regular expression.
67 * ``filter_regex_inverse: regular_expression''
68 Only further process a file, if its full path *does not* match the given
70 * ``filter_size: specification''
71 Only further process a file, if its size in bytes matches the provided
72 specification. The specification consists of one of the operators '>',
73 '<', '>=', '<=' and '==', directly followed by an integer.
74 E.g.: 'filter_size: <43008' will only match files smaller than 42 KB.
75 * ``filter_age: specification''
76 Only further process a file, if its age in seconds matches the filter.
79 Currently every section in the configuration file will result in one
80 group in the produced output (indicated by '[[[output_type group_name]]]',
81 where the group name will be taken from the sections name in the config
83 Future versions may provide means to create more than one group per
84 section (grouped by subfolder, for instance).
86 You can choose from three different ways the output will be aggregated:
87 * ``output: file_stats''
88 Output the full information for on every single file that is processed.
90 * ``output: count_only''
91 Only count the files matching all of the provided filters. Unless
92 required for the filtering operation, no stat call on the files is
94 * ``output: extremes_only''
95 Only report the youngest, oldest, smallest, and biggest files. In case
96 checks only require this information, we can signifficantly reduce data.
98 You should find an example configuration file at
99 '../cfg_examples/mk_filestats.cfg' relative to this file.
110 import ConfigParser
as configparser
111 except NameError: # Python3
114 DEFAULT_CFG_FILE
= os
.path
.join(os
.getenv('MK_CONFDIR', ''), "filestats.cfg")
116 DEFAULT_CFG_SECTION
= {"output": "file_stats"}
118 FILTER_SPEC_PATTERN
= re
.compile('(?P<operator>[<>=]+)(?P<value>.+)')
120 LOGGER
= logging
.getLogger(__name__
)
123 def parse_arguments(argv
=None):
129 if "-h" in argv
or "--help" in argv
:
130 sys
.stderr
.write(__doc__
)
133 if "-v" in argv
or "--verbose" in argv
:
134 logging
.basicConfig(level
=logging
.INFO
, format
="%(levelname)s: %(message)s")
135 elif "-vv" in argv
or "--verbose" in argv
:
136 logging
.basicConfig(level
=logging
.DEBUG
, format
="%(levelname)s: %(lineno)s: %(message)s")
138 LOGGER
.propagate
= False
140 parsed_args
['cfg_file'] = DEFAULT_CFG_FILE
141 for opt
in ("-c", "--config-file"):
144 parsed_args
['cfg_file'] = argv
[argv
.index(opt
) + 1]
146 sys
.stderr
.write("missing value for option %r\n" % opt
)
151 class LazyFileStats(object):
152 """Wrapper arount os.stat
154 Only call os.stat once, and not until corresponding attributes
158 def __init__(self
, path
):
159 super(LazyFileStats
, self
).__init
__()
160 LOGGER
.debug("Creating LazyFileStats(%r)", path
)
161 if not isinstance(path
, unicode):
162 path
= unicode(path
, 'utf8')
163 self
.path
= os
.path
.abspath(path
)
164 self
.stat_status
= None
170 if self
.stat_status
is not None:
173 LOGGER
.debug("os.stat(%r)", self
.path
)
175 path
= self
.path
.encode('utf8')
179 self
.stat_status
= "file vanished" if exc
.errno
== 2 else str(exc
)
183 self
._size
= int(stat
.st_size
)
184 except ValueError, exc
:
185 self
.stat_status
= str(exc
)
189 self
._m
_time
= int(stat
.st_mtime
)
190 self
._age
= int(time
.time()) - self
._m
_time
191 except ValueError, exc
:
192 self
.stat_status
= str(exc
)
195 self
.stat_status
= 'ok'
208 return "LazyFileStats(%r)" % self
.path
214 "stat_status": self
.stat_status
,
217 "mtime": self
._m
_time
223 # .--Input---------------------------------------------------------------.
225 # | |_ _|_ __ _ __ _ _| |_ |
226 # | | || '_ \| '_ \| | | | __| |
227 # | | || | | | |_) | |_| | |_ |
228 # | |___|_| |_| .__/ \__,_|\__| |
230 # +----------------------------------------------------------------------+
232 # '----------------------------------------------------------------------'
235 class PatternIterator(object):
236 """Recursively iterate over all files"""
238 def __init__(self
, pattern_list
):
239 super(PatternIterator
, self
).__init
__()
240 self
._patterns
= [os
.path
.expanduser(p
) for p
in pattern_list
]
242 def _iter_files(self
, pattern
):
243 for item
in glob
.iglob(pattern
):
244 if os
.path
.isfile(item
):
245 yield LazyFileStats(item
)
246 # for now, we recurse unconditionally
248 for lazy_file
in self
._iter
_files
(os
.path
.join(item
, '*')):
252 for pat
in self
._patterns
:
253 LOGGER
.info("processing pattern: %r", pat
)
254 for lazy_file
in self
._iter
_files
(pat
):
258 def get_file_iterator(config
):
259 """get a LazyFileStats iterator"""
260 input_specs
= [(k
[6:], v
) for k
, v
in config
.items() if k
.startswith('input_')]
262 raise ValueError("missing input definition")
263 if len(input_specs
) != 1: # currently not supported
264 raise ValueError("multiple input definitions: %r" % input_specs
)
265 variety
, spec_string
= input_specs
[0]
266 if variety
!= "patterns":
267 raise ValueError("unknown input type: %r" % variety
)
268 patterns
= shlex
.split(spec_string
)
269 return PatternIterator(patterns
)
273 # .--Filtering-----------------------------------------------------------.
275 # | | ___(_) | |_ ___ _ __(_)_ __ __ _ |
276 # | | |_ | | | __/ _ \ '__| | '_ \ / _` | |
277 # | | _| | | | || __/ | | | | | | (_| | |
278 # | |_| |_|_|\__\___|_| |_|_| |_|\__, | |
280 # +----------------------------------------------------------------------+
282 # '----------------------------------------------------------------------'
285 class AbstractFilter(object):
286 """Abstract filter interface"""
288 def matches(self
, lazy_file
):
289 """return a boolean"""
290 raise NotImplementedError()
293 class AbstractNumericFilter(AbstractFilter
):
294 """Common code for filtering by comparing integers"""
296 def __init__(self
, spec_string
):
297 super(AbstractNumericFilter
, self
).__init
__()
299 spec
= FILTER_SPEC_PATTERN
.match(spec_string
).groupdict()
300 except AttributeError:
301 raise ValueError("unable to parse filter spec: %r" % spec_string
)
302 operator
, value
= spec
['operator'], spec
['value']
303 self
._value
= int(value
)
304 if operator
not in ('<', '<=', '>', '>=', '=='):
305 raise ValueError("unknown operator for numeric filter: %r" % operator
)
306 self
._positive
_cmp
_results
= []
308 self
._positive
_cmp
_results
.append(1)
310 self
._positive
_cmp
_results
.append(-1)
312 self
._positive
_cmp
_results
.append(0)
314 def _matches_value(self
, other_value
):
315 """decide whether an integer value matches"""
316 return self
._value
.__cmp
__(int(other_value
)) in self
._positive
_cmp
_results
318 def matches(self
, lazy_file
):
319 raise NotImplementedError()
322 class SizeFilter(AbstractNumericFilter
):
323 def matches(self
, lazy_file
):
324 """apply AbstractNumericFilter ti file size"""
325 size
= lazy_file
.size
327 return self
._matches
_value
(size
)
328 # Don't return vanished files.
329 # Other cases are a problem, and should be included
330 return lazy_file
.stat_status
!= "file vanished"
333 class AgeFilter(AbstractNumericFilter
):
334 def matches(self
, lazy_file
):
335 """apply AbstractNumericFilter ti file age"""
338 return self
._matches
_value
(age
)
339 # Don't return vanished files.
340 # Other cases are a problem, and should be included
341 return lazy_file
.stat_status
!= "file vanished"
344 class RegexFilter(AbstractFilter
):
345 def __init__(self
, regex_pattern
):
346 super(RegexFilter
, self
).__init
__()
347 LOGGER
.debug("initializing with pattern: %r", regex_pattern
)
348 if not isinstance(regex_pattern
, unicode):
349 regex_pattern
= unicode(regex_pattern
, 'utf8')
350 self
._regex
= re
.compile(regex_pattern
, re
.UNICODE
)
352 def matches(self
, lazy_file
):
353 return bool(self
._regex
.match(lazy_file
.path
))
356 class InverseRegexFilter(RegexFilter
):
357 def matches(self
, lazy_file
):
358 return not bool(self
._regex
.match(lazy_file
.path
))
361 def get_file_filters(config
):
362 filter_specs
= ((k
[7:], v
) for k
, v
in config
.items() if k
.startswith('filter_'))
365 for variety
, spec_string
in filter_specs
:
366 LOGGER
.debug("found filter spec: %r", (variety
, spec_string
))
369 "regex": RegexFilter
,
370 "regex_inverse": InverseRegexFilter
,
375 raise ValueError("unknown filter type: %r" % variety
)
376 filters
.append(filter_type(spec_string
))
378 # add regex filters first to save stat calls
379 return sorted(filters
, key
=lambda x
: not isinstance(x
, RegexFilter
))
382 def iter_filtered_files(file_filters
, iterator
):
383 for lazy_file
in iterator
:
384 if all(f
.matches(lazy_file
) for f
in file_filters
):
385 LOGGER
.debug("matched all filters: %r", lazy_file
)
390 # .--Grouping------------------------------------------------------------.
392 # | / ___|_ __ ___ _ _ _ __ (_)_ __ __ _ |
393 # | | | _| '__/ _ \| | | | '_ \| | '_ \ / _` | |
394 # | | |_| | | | (_) | |_| | |_) | | | | | (_| | |
395 # | \____|_| \___/ \__,_| .__/|_|_| |_|\__, | |
397 # +----------------------------------------------------------------------+
399 # '----------------------------------------------------------------------'
402 def grouping_single_group(section_name
, files_iter
):
403 """create one single group per section"""
404 yield section_name
, files_iter
408 # .--Output--------------------------------------------------------------.
410 # | / _ \ _ _| |_ _ __ _ _| |_ |
411 # | | | | | | | | __| '_ \| | | | __| |
412 # | | |_| | |_| | |_| |_) | |_| | |_ |
413 # | \___/ \__,_|\__| .__/ \__,_|\__| |
415 # +----------------------------------------------------------------------+
417 # '----------------------------------------------------------------------'
420 def output_aggregator_count_only(group_name
, files_iter
):
421 yield '[[[count_only %s]]]' % group_name
422 count
= sum(1 for __
in files_iter
)
423 yield repr({"type": "summary", "count": count
})
426 def output_aggregator_file_stats(group_name
, files_iter
):
427 yield "[[[file_stats %s]]]" % group_name
429 for count
, lazy_file
in enumerate(files_iter
, 1):
430 yield lazy_file
.dumps()
431 yield repr({"type": "summary", "count": count
})
434 def output_aggregator_extremes_only(group_name
, files_iter
):
435 yield "[[[extremes_only %s]]]" % group_name
438 for count
, lazy_file
in enumerate(files_iter
, 1):
439 if count
== 1: # init
440 min_age
= max_age
= min_size
= max_size
= lazy_file
441 if lazy_file
.age
< min_age
.age
:
443 elif lazy_file
.age
> max_age
.age
:
445 if lazy_file
.size
< min_size
.size
:
447 elif lazy_file
.size
> max_size
.size
:
450 extremes
= set((min_age
, max_age
, min_size
, max_size
)) if count
else ()
451 for extreme_file
in extremes
:
452 yield extreme_file
.dumps()
453 yield repr({"type": "summary", "count": count
})
456 def get_output_aggregator(config
):
457 output_spec
= config
.get("output")
460 "count_only": output_aggregator_count_only
,
461 "extremes_only": output_aggregator_extremes_only
,
462 "file_stats": output_aggregator_file_stats
,
465 raise ValueError("unknown 'output' spec: %r" % output_spec
)
468 def write_output(groups
, output_aggregator
):
469 for group_name
, group_files_iter
in groups
:
470 for line
in output_aggregator(group_name
, group_files_iter
):
471 sys
.stdout
.write("%s\n" % line
)
475 # .--Main----------------------------------------------------------------.
477 # | | \/ | __ _(_)_ __ |
478 # | | |\/| |/ _` | | '_ \ |
479 # | | | | | (_| | | | | | |
480 # | |_| |_|\__,_|_|_| |_| |
482 # +----------------------------------------------------------------------+
484 # '----------------------------------------------------------------------'
487 def iter_config_section_dicts(cfg_file
=None):
489 cfg_file
= DEFAULT_CFG_FILE
490 config
= configparser
.ConfigParser(DEFAULT_CFG_SECTION
)
491 LOGGER
.debug("trying to read %r", cfg_file
)
492 files_read
= config
.read(cfg_file
)
493 LOGGER
.info("read configration file(s): %r", files_read
)
495 for section_name
in config
.sections():
496 options
= config
.options(section_name
)
497 yield section_name
, {k
: config
.get(section_name
, k
) for k
in options
}
502 args
= parse_arguments()
504 sys
.stdout
.write('<<<filestats:sep(0)>>>\n')
505 for section_name
, config
in iter_config_section_dicts(args
['cfg_file']):
508 files_iter
= get_file_iterator(config
)
511 filters
= get_file_filters(config
)
512 filtered_files
= iter_filtered_files(filters
, files_iter
)
515 grouper
= grouping_single_group
516 groups
= grouper(section_name
, filtered_files
)
519 output_aggregator
= get_output_aggregator(config
)
520 write_output(groups
, output_aggregator
)
523 if __name__
== "__main__":