agents/plugins/mk_sap

   1 #!/usr/bin/python
   2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
   3 # +------------------------------------------------------------------+
   4 # |             ____ _               _        __  __ _  __           |
   5 # |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
   6 # |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
   7 # |           | |___| | | |  __/ (__|   <    | |  | | . \            |
   8 # |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
   9 # |                                                                  |
  10 # | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
  11 # +------------------------------------------------------------------+
  12 #
  13 # This file is part of Check_MK.
  14 # The official homepage is at http://mathias-kettner.de/check_mk.
  15 #
  16 # check_mk is free software;  you can redistribute it and/or modify it
  17 # under the  terms of the  GNU General Public License  as published by
  18 # the Free Software Foundation in version 2.  check_mk is  distributed
  19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
  20 # out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
  21 # PARTICULAR PURPOSE. See the  GNU General Public License for more de-
  22 # tails. You should have  received  a copy of the  GNU  General Public
  23 # License along with GNU Make; see the file  COPYING.  If  not,  write
  24 # to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
  25 # Boston, MA 02110-1301 USA.
  26
  27 # This agent plugin has been built to collect information from SAP R/3 systems
  28 # using RFC calls. It needs the python module sapnwrfc (available in Check_MK
  29 # git at agents/sap/sapnwrfc) and the nwrfcsdk (can be downloaded from SAP
  30 # download portal) installed to be working. You can configure the agent plugin
  31 # using the configuration file /etc/check_mk/sap.cfg (a sample file can be
  32 # found in Check_MK git at agents/sap/sap.cfg) to tell it how to connect to
  33 # your SAP instance and which values you want to fetch from your system to be
  34 # forwarded to and checked by Check_MK.
  35 #
  36 # This current agent has been developed and tested with:
  37 #   python-sapnwrfc-0.19
  38 #
  39 # During development the "CCMS_Doku.pdf" was really helpful.
  40
  41 import os
  42 import sys
  43 import time
  44 import fcntl
  45 import fnmatch
  46 import datetime
  47
  48 # sapnwrfc needs to know where the libs are located. During
  49 # development the import failed, since the module did not
  50 # find the libraries. So we preload the library to have it
  51 # already loaded.
  52 try:
  53     import sapnwrfc
  54 except ImportError, e:
  55     if 'sapnwrfc.so' in str(e):
  56         sys.stderr.write(
  57             'Unable to find the library sapnwrfc.so. Maybe you need to put a file pointing to\n'
  58             'the sapnwrfc library directory into the /etc/ld.so.conf.d directory. For example\n'
  59             'create the file /etc/ld.so.conf.d/sapnwrfc.conf containing the path\n'
  60             '"/usr/sap/nwrfcsdk/lib" and run "ldconfig" afterwards.\n')
  61         sys.exit(1)
  62     elif 'No module named sapnwrfc' in str(e):
  63         sys.stderr.write("Missing the Python module sapnwfrc.\n")
  64         sys.exit(1)
  65     else:
  66         raise
  67
  68 # #############################################################################
  69
  70 # This sign is used to separate the path parts given in the config
  71 SEPARATOR = '/'
  72
  73 # This are the different classes of monitoring objects which
  74 # can be found in the tree.
  75 #
  76 # Summarizs information from several subnodes
  77 MTE_SUMMARY = '050'
  78 # A monitoring object which has several subnodes which lead to the status
  79 # of this object. For example it is the "CPU" object on a host
  80 MTE_MON_OBJ = '070'
  81 # Contains performance information (which can be used to create graphs from)
  82 MTE_PERFORMANCE = '100'
  83 # Might contain several messages
  84 MTE_MSG_CONTAINER = '101'
  85 # Contains a single status message
  86 MTE_SINGLE_MSG = '102'
  87 # This is a long text label without status
  88 MTE_LONG_TXT = '110'
  89 # This is a short text label without status
  90 MTE_SHORT_TXT = '111'
  91 # Is a "folder" which has no own state, just computed by its childs
  92 MTE_VIRTUAL = '199'
  93
  94 # This map converts between the SAP color codes (key values) and the
  95 # nagios state codes and strings
  96 STATE_VALUE_MAP = {
  97     0: (0, 'OK'),  # GRAY  (inactive or no current info available) -> OK
  98     1: (0, 'OK'),  # GREEN  -> OK
  99     2: (1, 'WARN'),  # YELLOW -> WARNING
 100     3: (2, 'CRIT'),  # RED    -> CRITICAL
 101 }
 102
 103 STATE_LOGWATCH_MAP = ['O', 'O', 'W', 'C']
 104
 105 # Monitoring objects of these classes are skipped during processing
 106 SKIP_MTCLASSES = [
 107     MTE_VIRTUAL,
 108     MTE_SUMMARY,
 109     MTE_MON_OBJ,
 110     MTE_SHORT_TXT,
 111     MTE_LONG_TXT,
 112 ]
 113
 114 MK_CONFDIR = os.getenv("MK_CONFDIR") or "/etc/check_mk"
 115 MK_VARDIR = os.getenv("MK_VARDIR") or "/var/lib/check_mk_agent"
 116
 117 STATE_FILE = MK_VARDIR + '/sap.state'
 118 state_file_changed = False
 119
 120 # #############################################################################
 121
 122 # Settings to be used to connect to the SAP R/3 host.
 123 local_cfg = {
 124     'ashost': 'localhost',
 125     'sysnr': '00',
 126     'client': '100',
 127     'user': '',
 128     'passwd': '',
 129     'trace': '3',
 130     'loglevel': 'warn',
 131     #'lang':     'EN',
 132     #'host_prefix': 'FOOBAR_',
 133 }
 134
 135 # A list of strings, while the string must match the full path to one or
 136 # several monitor objects. We use unix shell patterns during matching, so
 137 # you can use several chars as placeholders:
 138 #
 139 # *      matches everything
 140 # ?      matches any single character
 141 # [seq]  matches any character in seq
 142 # [!seq] matches any character not in seq
 143 #
 144 # The * matches the whole following string and does not end on next "/".
 145 # For examples, take a look at the default config file (/etc/check_mk/sap.cfg).
 146 monitor_paths = [
 147     'SAP CCMS Monitor Templates/Dialog Overview/*',
 148 ]
 149 monitor_types = []
 150 config_file = MK_CONFDIR + '/sap.cfg'
 151
 152 cfg = {}
 153 if os.path.exists(config_file):
 154     execfile(config_file)
 155     if isinstance(cfg, dict):
 156         cfg = [cfg]
 157 else:
 158     cfg = [local_cfg]
 159
 160 # Load the state file into memory
 161 try:
 162     states = eval(file(STATE_FILE).read())
 163 except IOError:
 164     states = {}
 165
 166 # index of all logfiles which have been found in a run. This is used to
 167 # remove logfiles which are not available anymore from the states dict.
 168 logfiles = []
 169 conn = None
 170
 171 # #############################################################################
 172
 173 #
 174 # HELPERS
 175 #
 176
 177
 178 def to_be_monitored(path, toplevel_match=False):
 179     for rule in monitor_paths:
 180         if toplevel_match and rule.count('/') > 1:
 181             rule = '/'.join(rule.split('/')[:2])
 182
 183         if fnmatch.fnmatch(path, rule):
 184             return True
 185     return False
 186
 187
 188 def node_path(tree, node, path=''):
 189     if path:
 190         path = node['MTNAMESHRT'].rstrip() + SEPARATOR + path
 191     else:
 192         path = node['MTNAMESHRT'].rstrip()
 193
 194     if node['ALPARINTRE'] > 0:
 195         parent_node = tree[node['ALPARINTRE'] - 1]
 196         return node_path(tree, parent_node, path)
 197     return path
 198
 199
 200 #
 201 # API ACCESS FUNCTIONS
 202 #
 203
 204
 205 def query(what, params, debug=False):
 206     fd = conn.discover(what)
 207
 208     if debug:
 209         sys.stdout.write("Name: %s Params: %s\n" % (fd.name, fd.handle.parameters))
 210         sys.stdout.write("Given-Params: %s\n" % params)
 211
 212     f = fd.create_function_call()
 213     for param_key, val in params.items():
 214         getattr(f, param_key)(val)
 215     f.invoke()
 216
 217     ret = f.RETURN.value
 218     if ret['TYPE'] == 'E':
 219         sys.stderr.write("ERROR: %s\n" % ret['MESSAGE'].strip())
 220
 221     return f
 222
 223
 224 def login():
 225     f = query(
 226         'BAPI_XMI_LOGON', {
 227             'EXTCOMPANY': 'Mathias Kettner GmbH',
 228             'EXTPRODUCT': 'Check_MK SAP Agent',
 229             'INTERFACE': 'XAL',
 230             'VERSION': '1.0',
 231         })
 232     #sys.stdout.write("%s\n" % f.RETURN)
 233     return f.SESSIONID.value
 234
 235
 236 def logout():
 237     query('BAPI_XMI_LOGOFF', {
 238         'INTERFACE': 'XAL',
 239     })
 240
 241
 242 def mon_list(cfg_entry):
 243     f = query("BAPI_SYSTEM_MON_GETLIST", {
 244         'EXTERNAL_USER_NAME': cfg_entry['user'],
 245     })
 246     l = []
 247     for mon in f.MONITOR_NAMES.value:
 248         l.append((mon["MS_NAME"].rstrip(), mon["MONI_NAME"].rstrip()))
 249     return l
 250
 251
 252 #def ms_list( cfg ):
 253 #    f = query("BAPI_SYSTEM_MS_GETLIST", {
 254 #        'EXTERNAL_USER_NAME': cfg['user'],
 255 #    })
 256 #    l = []
 257 #    for ms in f.MONITOR_SETS.value:
 258 #        l.append(ms['NAME'].rstrip())
 259 #    return l
 260
 261
 262 def mon_tree(cfg_entry, ms_name, mon_name):
 263     f = query(
 264         "BAPI_SYSTEM_MON_GETTREE", {
 265             'EXTERNAL_USER_NAME': cfg_entry['user'],
 266             'MONITOR_NAME': {
 267                 "MS_NAME": ms_name,
 268                 "MONI_NAME": mon_name
 269             },
 270         })
 271     tree = f.TREE_NODES.value
 272     for node in tree:
 273         node['PATH'] = ms_name + SEPARATOR + node_path(tree, node)
 274     return tree
 275
 276
 277 def tid(node):
 278     return {
 279         'MTSYSID': node['MTSYSID'].strip(),
 280         'MTMCNAME': node['MTMCNAME'].strip(),
 281         'MTNUMRANGE': node['MTNUMRANGE'].strip(),
 282         'MTUID': node['MTUID'].strip(),
 283         'MTCLASS': node['MTCLASS'].strip(),
 284         'MTINDEX': node['MTINDEX'].strip(),
 285         'EXTINDEX': node['EXTINDEX'].strip(),
 286     }
 287
 288
 289 def mon_perfdata(cfg_entry, node):
 290     f = query('BAPI_SYSTEM_MTE_GETPERFCURVAL', {
 291         'EXTERNAL_USER_NAME': cfg_entry['user'],
 292         'TID': tid(node),
 293     })
 294     value = f.CURRENT_VALUE.value['LASTPERVAL']
 295
 296     f = query('BAPI_SYSTEM_MTE_GETPERFPROP', {
 297         'EXTERNAL_USER_NAME': cfg_entry['user'],
 298         'TID': tid(node),
 299     })
 300     if f.PROPERTIES.value['DECIMALS'] != 0:
 301         value = (value + 0.0) / 10**f.PROPERTIES.value['DECIMALS']
 302     uom = f.PROPERTIES.value['VALUNIT'].strip()
 303
 304     return value, uom
 305
 306
 307 def mon_msg(cfg_entry, node):
 308     f = query('BAPI_SYSTEM_MTE_GETSMVALUE', {
 309         'EXTERNAL_USER_NAME': cfg_entry['user'],
 310         'TID': tid(node),
 311     })
 312     data = f.VALUE.value
 313     dt = parse_dt(data['SMSGDATE'], data['SMSGTIME'])
 314     return (dt, data['MSG'].strip())
 315
 316
 317 def parse_dt(d, t):
 318     d = d.strip()
 319     t = t.strip()
 320     if not d or not t:
 321         return None
 322     return datetime.datetime(*time.strptime(d + t, '%Y%m%d%H%M%S')[:6])
 323
 324
 325 def mon_alerts(cfg_entry, node):
 326     f = query('BAPI_SYSTEM_MTE_GETALERTS', {
 327         'EXTERNAL_USER_NAME': cfg_entry['user'],
 328         'TID': tid(node),
 329     })
 330     return f.ALERTS.value
 331
 332
 333 def aid(alert):
 334     return {
 335         "ALSYSID": alert["ALSYSID"],
 336         "MSEGNAME": alert["MSEGNAME"],
 337         "ALUNIQNUM": alert["ALUNIQNUM"],
 338         "ALINDEX": alert["ALINDEX"],
 339         "ALERTDATE": alert["ALERTDATE"],
 340         "ALERTTIME": alert["ALERTTIME"],
 341     }
 342
 343
 344 def alert_details(cfg_entry, alert):
 345     f = query('BAPI_SYSTEM_ALERT_GETDETAILS', {
 346         'EXTERNAL_USER_NAME': cfg_entry['user'],
 347         'AID': aid(alert),
 348     })
 349     #prop  = f.PROPERTIES.value
 350     state = f.VALUE.value
 351     msg = f.XMI_EXT_MSG.value['MSG'].strip()
 352     return state, msg
 353
 354
 355 def process_alerts(cfg_entry, logs, ms_name, mon_name, node, alerts):
 356     global state_file_changed
 357
 358     sid = node["MTSYSID"].strip() or 'Other'
 359     context = node["MTMCNAME"].strip() or 'Other'
 360     path = node["PATH"]
 361
 362     # Use the sid as hostname for the logs
 363     hostname = sid
 364     logfile = context + "/" + path
 365
 366     logfiles.append((hostname, logfile))
 367
 368     logs.setdefault(sid, {})
 369     logs[hostname][logfile] = []
 370     newest_log_dt = None
 371     for alert in alerts:
 372         dt = parse_dt(alert['ALERTDATE'], alert['ALERTTIME'])
 373
 374         if (hostname, logfile) in states and states[(hostname, logfile)] >= dt:
 375             continue  # skip log messages which are older than the last cached date
 376
 377         if not newest_log_dt or dt > newest_log_dt:
 378             newest_log_dt = dt  # store the newest log of this run
 379
 380         alert_state, alert_msg = alert_details(cfg_entry, alert)
 381         # Format lines to "logwatch" format
 382         logs[hostname][logfile].append('%s %s %s' % (STATE_LOGWATCH_MAP[alert_state['VALUE']],
 383                                                      dt.strftime("%Y-%m-%d %H:%M:%S"), alert_msg))
 384
 385     if newest_log_dt:
 386         # Write newest log age to cache to prevent double processing of logs
 387         states[(hostname, logfile)] = newest_log_dt
 388         state_file_changed = True
 389     return logs
 390
 391
 392 def check(cfg_entry):
 393     global conn
 394     conn = sapnwrfc.base.rfc_connect(cfg_entry)
 395     login()
 396
 397     logs = {}
 398     sap_data = {}
 399
 400     # This loop is used to collect all information from SAP
 401     for ms_name, mon_name in mon_list(cfg_entry):
 402         path = ms_name + SEPARATOR + mon_name
 403         if not to_be_monitored(path, True):
 404             continue
 405
 406         tree = mon_tree(cfg_entry, ms_name, mon_name)
 407         for node in tree:
 408             if not to_be_monitored(node['PATH']):
 409                 continue
 410             #sys.stdout.write("%s\n" % node["PATH"])
 411
 412             status_details = ''
 413             perfvalue = '-'
 414             uom = '-'
 415
 416             # Use precalculated states
 417             state = {
 418                 'VALUE': node['ACTUALVAL'],
 419                 'SEVERITY': node['ACTUALSEV'],
 420             }
 421
 422             if state['VALUE'] not in STATE_VALUE_MAP:
 423                 sys.stdout.write('UNHANDLED STATE VALUE\n')
 424                 sys.exit(1)
 425
 426             #
 427             # Handle different object classes individually
 428             # to get details about them
 429             #
 430
 431             if monitor_types and node['MTCLASS'] not in monitor_types:
 432                 continue  # Skip unwanted classes if class filtering is enabled
 433
 434             if node['MTCLASS'] == MTE_PERFORMANCE:
 435                 perfvalue, this_uom = mon_perfdata(cfg_entry, node)
 436                 uom = this_uom if this_uom else uom
 437
 438             elif node['MTCLASS'] == MTE_SINGLE_MSG:
 439                 status_details = "%s: %s" % mon_msg(cfg_entry, node)
 440
 441             elif node['MTCLASS'] == MTE_MSG_CONTAINER:
 442
 443                 alerts = mon_alerts(cfg_entry, node)
 444                 logs = process_alerts(cfg_entry, logs, ms_name, mon_name, node, alerts)
 445                 if len(alerts) > 0:
 446                     last_alert = alerts[-1]
 447                     dt = parse_dt(last_alert["ALERTDATE"], last_alert["ALERTTIME"])
 448                     alert_state, alert_msg = alert_details(cfg_entry, last_alert)
 449                     last_msg = '%s: %s - %s' % (dt, STATE_VALUE_MAP[alert_state['VALUE']][1],
 450                                                 alert_msg)
 451
 452                     status_details = '%d Messages, Last: %s' % (len(alerts), last_msg)
 453                 else:
 454                     status_details = 'The log is empty'
 455
 456             elif node['MTCLASS'] not in SKIP_MTCLASSES:
 457                 # Add an error to output on unhandled classes
 458                 status_details = "UNHANDLED MTCLASS", node['MTCLASS']
 459
 460             if node['MTCLASS'] not in SKIP_MTCLASSES:
 461                 sid = node["MTSYSID"].strip() or 'Other'
 462                 context = node["MTMCNAME"].strip() or 'Other'
 463                 path = node["PATH"]
 464
 465                 sap_data.setdefault(sid, [])
 466                 sap_data[sid].append(
 467                     "%s\t%d\t%3d\t%s\t%s\t%s\t%s" % (context, state['VALUE'], state['SEVERITY'],
 468                                                      path, perfvalue, uom, status_details))
 469
 470     for host, host_sap in sap_data.items():
 471         sys.stdout.write('<<<<%s%s>>>>\n' % (cfg_entry.get("host_prefix", ""), host))
 472         sys.stdout.write('<<<sap:sep(9)>>>\n')
 473         sys.stdout.write('%s\n' % '\n'.join(host_sap))
 474     sys.stdout.write('<<<<>>>>\n')
 475
 476     for host, host_logs in logs.items():
 477         sys.stdout.write('<<<<%s>>>>\n' % host)
 478         sys.stdout.write('<<<logwatch>>>\n')
 479         for log, lines in host_logs.items():
 480             sys.stdout.write('[[[%s]]]\n' % log)
 481             if lines:
 482                 sys.stdout.write('\n'.join(lines) + '\n')
 483         sys.stdout.write('<<<<>>>>\n')
 484
 485     logout()
 486     conn.close()
 487
 488
 489 # It is possible to configure multiple SAP instances to monitor. Loop them all, but
 490 # do not terminate when one connection failed
 491 processed_all = True
 492 try:
 493     for entry in cfg:
 494         try:
 495             check(entry)
 496             sys.stdout.write('<<<sap_state:sep(9)>>>\n%s\tOK\n' % entry['ashost'])
 497         except sapnwrfc.RFCCommunicationError, e:
 498             sys.stderr.write('ERROR: Unable to connect (%s)\n' % e)
 499             sys.stdout.write('<<<sap_state:sep(9)>>>\n%s\tUnable to connect (%s)\n' %\
 500                              (entry['ashost'], e))
 501             processed_all = False
 502         except Exception, e:
 503             sys.stderr.write('ERROR: Unhandled exception (%s)\n' % e)
 504             sys.stdout.write('<<<sap_state:sep(9)>>>\n%s\tUnhandled exception (%s)\n' %\
 505                              (entry['ashost'], e))
 506             processed_all = False
 507
 508     # Now check whether or not an old logfile needs to be removed. This can only
 509     # be done this way, when all hosts have been reached. Otherwise the cleanup
 510     # is skipped.
 511     if processed_all:
 512         for key in states.keys():
 513             if key not in logfiles:
 514                 state_file_changed = True
 515                 del states[key]
 516
 517     # Only write the state file once per run. And only when it has been changed
 518     if state_file_changed:
 519         new_file = STATE_FILE + '.new'
 520         state_fd = os.open(new_file, os.O_WRONLY | os.O_CREAT)
 521         fcntl.flock(state_fd, fcntl.LOCK_EX)
 522         os.write(state_fd, repr(states))
 523         os.close(state_fd)
 524         os.rename(STATE_FILE + '.new', STATE_FILE)
 525
 526 except Exception, e:
 527     sys.stderr.write('ERROR: Unhandled exception (%s)\n' % e)
 528
 529 sys.exit(0)