Refactoring: Changed all check parameters starting with an 'o' to the new rulespec...
[check_mk.git] / agents / plugins / mk_mongodb
blobd8fae9d7226403acc098afb20ca0eede9bded3d1
1 #!/usr/bin/python
2 # Monitor MongoDB on Linux
4 import sys
5 import time
6 import pprint
7 import os
9 # This agent plugin creates various sections out of the MongoDB server status information.
10 # Important: 1) If MongoDB runs as single instance the agent data is assigned
11 # to the host same host where the plugin resides.
13 # 2) If MongoDB is deployed as replica set the agent data is piggybacked
14 # to a different hostname, name after the replica set name.
15 # You have to create a new host in the monitoring system matching the
16 # replica set name, or use the piggyback translation rule to modify the
17 # hostname according to your needs.
19 try:
20 import pymongo
21 except ImportError, e:
22 sys.stderr.write("ERROR: Unable to import pymongo module\n")
23 sys.exit(2)
25 # TODO: might be implemented in the future..
26 host = None
27 port = None
29 try:
30 con = pymongo.MongoClient(host, port)
31 try:
32 # pylint: disable=no-member
33 con = pymongo.database_names()
34 except:
35 con = pymongo.MongoClient(None, None, read_preference=pymongo.ReadPreference.SECONDARY)
37 con.admin.read_preference = pymongo.ReadPreference.SECONDARY
39 # if user and passwd:
40 # db = con["admin"]
41 # if not db.authenticate(user, passwd):
42 # sys.exit("Username/Password incorrect")
44 server_status = con.admin.command("serverStatus")
45 except:
46 sys.stdout.write("<<<mongodb_instance:sep(9)>>>\n")
47 sys.stdout.write("error\tInstance is down\n")
48 sys.exit(0)
50 server_version = tuple(con.server_info()['version'].split('.'))
52 repl_info = server_status.get("repl")
54 sys.stdout.write("<<<mongodb_instance:sep(9)>>>\n")
55 if not repl_info:
56 sys.stdout.write("mode\tSingle Instance\n")
57 else:
58 if repl_info.get("ismaster"):
59 sys.stdout.write("mode\tPrimary\n")
60 elif repl_info.get("secondary"):
61 sys.stdout.write("mode\tSecondary\n")
62 else:
63 sys.stdout.write("mode\tArbiter\n")
64 sys.stdout.write("address\t%s\n" % repl_info["me"])
66 sys.stdout.write("version\t%s\n" % server_status["version"])
67 sys.stdout.write("pid\t%s\n" % server_status["pid"])
69 if repl_info:
70 if not repl_info.get("ismaster"):
71 sys.exit(0)
72 sys.stdout.write("<<<<%s>>>>\n" % repl_info["setName"])
73 sys.stdout.write("<<<mongodb_replica:sep(9)>>>\n")
74 sys.stdout.write("primary\t%s\n" % repl_info.get("primary"))
75 sys.stdout.write("hosts\t%s\n" % " ".join(repl_info.get("hosts")))
76 sys.stdout.write("arbiters\t%s\n" % " ".join(repl_info.get("arbiters")))
78 sys.stdout.write("<<<mongodb_replstatus>>>\n")
79 sys.stdout.write(pprint.pformat(con.admin.command("replSetGetStatus")))
81 sys.stdout.write("<<<mongodb_asserts>>>\n")
82 for key, value in server_status.get("asserts", {}).items():
83 sys.stdout.write("%s %s\n" % (key, value))
85 sys.stdout.write("<<<mongodb_connections>>>\n")
86 sys.stdout.write("%s\n" % "\n".join("%s %s" % x for x in server_status["connections"].items()))
88 databases = {x: {} for x in con.database_names()}
90 for name in databases.keys():
91 databases[name]["collections"] = con[name].collection_names()
92 databases[name]["stats"] = con[name].command("dbstats")
93 databases[name]["collstats"] = {}
94 for collection in databases[name]["collections"]:
95 databases[name]["collstats"][collection] = con[name].command("collstats", collection)
97 sys.stdout.write("<<<mongodb_chunks>>>\n")
98 col = con.config.chunks
99 for db_name, db_data in databases.items():
100 shards = col.distinct("shard")
101 sys.stdout.write("shardcount %d\n" % len(shards))
102 for collection in db_data.get("collections"):
103 nsfilter = "%s.%s" % (db_name, collection)
104 sys.stdout.write("nscount %s %s\n" % (nsfilter, col.find({"ns": nsfilter}).count()))
105 for shard in shards:
106 sys.stdout.write("shardmatches %s#%s %s\n" % (nsfilter, shard,
107 col.find({
108 "ns": nsfilter,
109 "shard": shard
110 }).count()))
112 sys.stdout.write("<<<mongodb_locks>>>\n")
113 global_lock_info = server_status.get("globalLock")
114 if global_lock_info:
115 for what in ["activeClients", "currentQueue"]:
116 if what in global_lock_info:
117 for key, value in global_lock_info[what].items():
118 sys.stdout.write("%s %s %s\n" % (what, key, value))
120 sys.stdout.write("<<<mongodb_flushing>>>\n")
121 sys.stdout.write("average_ms %s\n" % server_status["backgroundFlushing"]["average_ms"])
122 sys.stdout.write("last_ms %s\n" % server_status["backgroundFlushing"]["last_ms"])
123 sys.stdout.write("flushed %s\n" % server_status["backgroundFlushing"]["flushes"])
125 # Unused
126 #try:
127 # if server_version >= tuple("2.4.0".split(".")):
128 # indexCounters = server_status['indexCounters']
129 # else:
130 # indexCounters = server_status['indexCounters']["btree"]
131 # print "<<<mongodb_indexcounters>>>"
132 # for key, value in indexCounters.items():
133 # print "%s %s" % (key, value)
134 #except:
135 # pass
137 sys.stdout.write("<<<mongodb_mem>>>\n")
138 for key, value in server_status["mem"].items():
139 sys.stdout.write("%s %s\n" % (key, value))
140 for key, value in server_status["extra_info"].items():
141 sys.stdout.write("%s %s\n" % (key, value))
143 sys.stdout.write("<<<mongodb_counters>>>\n")
144 for what in ["opcounters", "opcountersRepl"]:
145 for key, value in server_status.get(what, {}).items():
146 sys.stdout.write("%s %s %s\n" % (what, key, value))
148 sys.stdout.write("<<<mongodb_collections:sep(9)>>>\n")
149 for dbname, dbdata in databases.items():
150 for collname, colldata in dbdata.get("collstats", {}).items():
151 for what, value in colldata.items():
152 sys.stdout.write("%s\t%s\t%s\t%s\n" % (dbname, collname, what, value))
154 sys.stdout.write("<<<logwatch>>>\n")
155 sys.stdout.write("[[[MongoDB startupWarnings]]]\n")
156 startup_warnings = con.admin.command({"getLog": "startupWarnings"})
158 var_dir = os.environ.get("MK_VARDIR")
159 if var_dir:
160 state_file = "%s/mongodb.state" % var_dir
161 last_timestamp = None
162 output_all = False
164 # Supports: Nov 6 13:44:09
165 # 2015-10-17T05:35:24
166 def get_timestamp(text):
167 for pattern in ["%a %b %d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"]:
168 try:
169 result = time.mktime(time.strptime(text, pattern))
170 return result
171 except:
172 continue
174 year_available = False
175 if os.path.exists(state_file):
176 last_timestamp = int(file(state_file).read())
177 if time.localtime(last_timestamp).tm_year >= 2015:
178 year_available = True
180 # Note: there is no year information in these loglines
181 # As workaround we look at the creation date (year) of the last statefile
182 # If it differs and there are new messages we start from the beginning
183 if not year_available:
184 statefile_year = time.localtime(os.stat(state_file).st_ctime).tm_year
185 if time.localtime().tm_year != statefile_year:
186 output_all = True
188 for line in startup_warnings["log"]:
189 state = "C"
190 state_index = line.find("]") + 2
191 if len(line) == state_index or line[state_index:].startswith("** "):
192 state = "."
194 if "** WARNING:" in line:
195 state = "W"
197 if output_all or get_timestamp(line.split(".")[0]) > last_timestamp:
198 sys.stdout.write("%s %s\n" % (state, line))
200 # update state file
201 if startup_warnings["log"]:
202 file(state_file, "w").write("%d" % get_timestamp(startup_warnings["log"][-1].split(".")[0]))
204 sys.stdout.write("<<<<>>>>\n")