Cleanup config.nodes_of
[check_mk.git] / livestatus / src / module.cc
blob88363bc2d7f39adadebf0d3fa4d8348746373717
1 // +------------------------------------------------------------------+
2 // | ____ _ _ __ __ _ __ |
3 // | / ___| |__ ___ ___| | __ | \/ | |/ / |
4 // | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
5 // | | |___| | | | __/ (__| < | | | | . \ |
6 // | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
7 // | |
8 // | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
9 // +------------------------------------------------------------------+
11 // This file is part of Check_MK.
12 // The official homepage is at http://mathias-kettner.de/check_mk.
14 // check_mk is free software; you can redistribute it and/or modify it
15 // under the terms of the GNU General Public License as published by
16 // the Free Software Foundation in version 2. check_mk is distributed
17 // in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
18 // out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
19 // PARTICULAR PURPOSE. See the GNU General Public License for more de-
20 // tails. You should have received a copy of the GNU General Public
21 // License along with GNU Make; see the file COPYING. If not, write
22 // to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
23 // Boston, MA 02110-1301 USA.
25 // Needed for S_ISSOCK
26 #define _XOPEN_SOURCE 500
28 // https://github.com/include-what-you-use/include-what-you-use/issues/166
29 // IWYU pragma: no_include <ext/alloc_traits.h>
30 #include "config.h"
31 #include <fcntl.h>
32 #include <pthread.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/un.h>
36 #include <unistd.h>
37 #include <atomic>
38 #include <chrono>
39 #include <cstddef>
40 #include <cstdlib>
41 #include <cstring>
42 #include <memory>
43 #include <sstream>
44 #include <string>
45 #include <vector>
46 #include "ChronoUtils.h"
47 #include "ClientQueue.h"
48 #include "InputBuffer.h"
49 #include "Logger.h"
50 #include "NagiosCore.h"
51 #include "OutputBuffer.h"
52 #include "Poller.h"
53 #include "RegExp.h"
54 #include "TimeperiodsCache.h"
55 #include "Triggers.h"
56 #include "auth.h"
57 #include "data_encoding.h"
58 #include "global_counters.h"
59 #include "nagios.h"
60 #include "strutil.h"
62 NEB_API_VERSION(CURRENT_NEB_API_VERSION)
63 #ifndef NAGIOS4
64 extern int event_broker_options;
65 #else
66 extern unsigned long event_broker_options;
67 #endif // NAGIOS4
68 extern int enable_environment_macros;
70 // maximum idle time for connection in keep alive state
71 static std::chrono::milliseconds fl_idle_timeout = std::chrono::minutes(5);
73 // maximum time for reading a query
74 static std::chrono::milliseconds fl_query_timeout = std::chrono::seconds(10);
76 // allow 10 concurrent connections per default
77 size_t g_livestatus_threads = 10;
78 // current number of queued connections (for statistics)
79 int g_num_queued_connections = 0;
80 // current number of active connections (for statistics)
81 std::atomic_int32_t g_livestatus_active_connections{0};
82 size_t g_thread_stack_size = 1024 * 1024; /* stack size of threads */
84 void *g_nagios_handle;
85 int g_unix_socket = -1;
86 int g_max_fd_ever = 0;
88 static NagiosPaths fl_paths;
90 static bool fl_should_terminate = false;
92 struct ThreadInfo {
93 pthread_t id;
94 std::string name;
97 static std::vector<ThreadInfo> fl_thread_info;
98 static thread_local ThreadInfo *tl_info;
100 static NagiosLimits fl_limits;
102 int g_thread_running = 0;
104 static NagiosAuthorization fl_authorization;
106 Encoding fl_data_encoding{Encoding::utf8};
108 static Logger *fl_logger_nagios = nullptr;
109 static LogLevel fl_livestatus_log_level = LogLevel::notice;
110 static ClientQueue *fl_client_queue = nullptr;
111 TimeperiodsCache *g_timeperiods_cache = nullptr;
113 /* simple statistics data for TableStatus */
114 extern service *service_list;
115 extern int log_initial_states;
117 int g_num_hosts;
118 int g_num_services;
120 static NagiosCore *fl_core = nullptr;
122 void count_hosts() {
123 extern host *host_list;
124 g_num_hosts = 0;
125 for (host *h = host_list; h != nullptr; h = h->next) {
126 g_num_hosts++;
130 void count_services() {
131 g_num_services = 0;
132 for (service *s = service_list; s != nullptr; s = s->next) {
133 g_num_services++;
137 void *voidp;
139 void livestatus_count_fork() { counterIncrement(Counter::forks); }
141 void livestatus_cleanup_after_fork() {
142 // 4.2.2010: Deactivate the cleanup function. It might cause
143 // more trouble than it tries to avoid. It might lead to a deadlock
144 // with Nagios' fork()-mechanism...
145 // store_deinit();
146 struct stat st;
148 int i;
149 // We need to close our server and client sockets. Otherwise
150 // our connections are inherited to host and service checks.
151 // If we close our client connection in such a situation,
152 // the connection will still be open since and the client will
153 // hang while trying to read further data. And the CLOEXEC is
154 // not atomic :-(
156 // Eventuell sollte man hier anstelle von store_deinit() nicht
157 // darauf verlassen, dass die ClientQueue alle Verbindungen zumacht.
158 // Es sind ja auch Dateideskriptoren offen, die von Threads gehalten
159 // werden und nicht mehr in der Queue sind. Und in store_deinit()
160 // wird mit mutexes rumgemacht....
161 for (i = 3; i < g_max_fd_ever; i++) {
162 if (0 == fstat(i, &st) && S_ISSOCK(st.st_mode)) {
163 close(i);
168 void *main_thread(void *data) {
169 tl_info = static_cast<ThreadInfo *>(data);
170 auto logger = fl_core->loggerLivestatus();
171 while (!fl_should_terminate) {
172 do_statistics();
174 Poller poller;
175 poller.addFileDescriptor(g_unix_socket, PollEvents::in);
176 int retval = poller.poll(std::chrono::milliseconds(2500));
177 if (retval > 0 &&
178 poller.isFileDescriptorSet(g_unix_socket, PollEvents::in)) {
179 #if HAVE_ACCEPT4
180 int cc = accept4(g_unix_socket, nullptr, nullptr, SOCK_CLOEXEC);
181 #else
182 int cc = accept(g_unix_socket, nullptr, nullptr);
183 #endif
184 if (cc == -1) {
185 generic_error ge("cannot accept client connection");
186 Warning(logger) << ge;
187 continue;
189 #if !HAVE_ACCEPT4
190 if (fcntl(cc, F_SETFD, FD_CLOEXEC) == -1) {
191 generic_error ge(
192 "cannot set close-on-exec bit on client socket");
193 Alert(logger) << ge;
194 break;
196 #endif
197 if (cc > g_max_fd_ever) {
198 g_max_fd_ever = cc;
200 fl_client_queue->addConnection(cc); // closes fd
201 g_num_queued_connections++;
202 counterIncrement(Counter::connections);
205 Notice(logger) << "socket thread has terminated";
206 return voidp;
209 void *client_thread(void *data) {
210 tl_info = static_cast<ThreadInfo *>(data);
211 auto logger = fl_core->loggerLivestatus();
212 while (!fl_should_terminate) {
213 int cc = fl_client_queue->popConnection();
214 g_num_queued_connections--;
215 g_livestatus_active_connections++;
216 if (cc >= 0) {
217 Debug(logger) << "accepted client connection on fd " << cc;
218 InputBuffer input_buffer(cc, fl_should_terminate, logger,
219 fl_query_timeout, fl_idle_timeout);
220 bool keepalive = true;
221 unsigned requestnr = 0;
222 while (keepalive && !fl_should_terminate) {
223 if (++requestnr > 1) {
224 Debug(logger) << "handling request " << requestnr
225 << " on same connection";
227 counterIncrement(Counter::requests);
228 OutputBuffer output_buffer(cc, fl_should_terminate, logger);
229 keepalive = fl_core->answerRequest(input_buffer, output_buffer);
231 close(cc);
233 g_livestatus_active_connections--;
235 return voidp;
238 namespace {
239 class NagiosHandler : public Handler {
240 public:
241 NagiosHandler() { setFormatter(std::make_unique<NagiosFormatter>()); }
243 private:
244 class NagiosFormatter : public Formatter {
245 void format(std::ostream &os, const LogRecord &record) override {
246 os << "livestatus: " << record.getMessage();
250 void publish(const LogRecord &record) override {
251 std::ostringstream os;
252 getFormatter()->format(os, record);
253 // TODO(sp) The Nagios headers are (once again) not const-correct...
254 write_to_all_logs(const_cast<char *>(os.str().c_str()),
255 NSLOG_INFO_MESSAGE);
259 class LivestatusHandler : public FileHandler {
260 public:
261 explicit LivestatusHandler(const std::string &filename)
262 : FileHandler(filename) {
263 setFormatter(std::make_unique<LivestatusFormatter>());
266 private:
267 class LivestatusFormatter : public Formatter {
268 void format(std::ostream &os, const LogRecord &record) override {
269 os << FormattedTimePoint(record.getTimePoint()) << " ["
270 << tl_info->name << "] " << record.getMessage();
272 } _formatter;
274 } // namespace
276 void start_threads() {
277 count_hosts();
278 count_services();
280 if (g_thread_running == 0) {
281 auto logger = fl_core->loggerLivestatus();
282 logger->setLevel(fl_livestatus_log_level);
283 logger->setUseParentHandlers(false);
284 try {
285 logger->setHandler(
286 std::make_unique<LivestatusHandler>(fl_paths._logfile));
287 } catch (const generic_error &ex) {
288 Warning(fl_logger_nagios) << ex;
291 Informational(fl_logger_nagios)
292 << "starting main thread and " << g_livestatus_threads
293 << " client threads";
295 pthread_atfork(livestatus_count_fork, nullptr,
296 livestatus_cleanup_after_fork);
298 pthread_attr_t attr;
299 pthread_attr_init(&attr);
300 size_t defsize;
301 if (pthread_attr_getstacksize(&attr, &defsize) == 0) {
302 Debug(fl_logger_nagios) << "default stack size is " << defsize;
304 if (pthread_attr_setstacksize(&attr, g_thread_stack_size) != 0) {
305 Warning(fl_logger_nagios)
306 << "cannot set thread stack size to " << g_thread_stack_size;
307 } else {
308 Debug(fl_logger_nagios)
309 << "setting thread stack size to " << g_thread_stack_size;
312 fl_thread_info.resize(g_livestatus_threads + 1);
313 for (auto &info : fl_thread_info) {
314 ptrdiff_t idx = &info - &fl_thread_info[0];
315 if (idx == 0) {
316 // start thread that listens on socket
317 info.name = "main";
318 pthread_create(&info.id, nullptr, main_thread, &info);
319 // Our current thread (i.e. the main one, confusing terminology)
320 // needs thread-local infos for logging, too.
321 tl_info = &info;
322 } else {
323 info.name = "client " + std::to_string(idx);
324 pthread_create(&info.id, &attr, client_thread, &info);
328 g_thread_running = 1;
329 pthread_attr_destroy(&attr);
333 void terminate_threads() {
334 if (g_thread_running != 0) {
335 fl_should_terminate = true;
336 Informational(fl_logger_nagios) << "waiting for main to terminate...";
337 pthread_join(fl_thread_info[0].id, nullptr);
338 Informational(fl_logger_nagios)
339 << "waiting for client threads to terminate...";
340 fl_client_queue->terminate();
341 for (const auto &info : fl_thread_info) {
342 if (pthread_join(info.id, nullptr) != 0) {
343 Warning(fl_logger_nagios)
344 << "could not join thread " << info.name;
347 Informational(fl_logger_nagios)
348 << "main thread + " << g_livestatus_threads
349 << " client threads have finished";
350 g_thread_running = 0;
351 fl_should_terminate = false;
355 bool open_unix_socket() {
356 struct stat st;
357 if (stat(fl_paths._socket.c_str(), &st) == 0) {
358 if (unlink(fl_paths._socket.c_str()) == 0) {
359 Debug(fl_logger_nagios)
360 << "removed old socket file " << fl_paths._socket;
361 } else {
362 generic_error ge("cannot remove old socket file " +
363 fl_paths._socket);
364 Alert(fl_logger_nagios) << ge;
365 return false;
369 g_unix_socket = socket(PF_UNIX, SOCK_STREAM, 0);
370 g_max_fd_ever = g_unix_socket;
371 if (g_unix_socket < 0) {
372 generic_error ge("cannot create UNIX socket");
373 Critical(fl_logger_nagios) << ge;
374 return false;
377 // Imortant: close on exec -> check plugins must not inherit it!
378 if (fcntl(g_unix_socket, F_SETFD, FD_CLOEXEC) == -1) {
379 generic_error ge("cannot set close-on-exec bit on socket");
380 Alert(fl_logger_nagios) << ge;
381 close(g_unix_socket);
382 return false;
385 // Bind it to its address. This creates the file with the name
386 // fl_paths._socket
387 struct sockaddr_un sockaddr;
388 sockaddr.sun_family = AF_UNIX;
389 strncpy(sockaddr.sun_path, fl_paths._socket.c_str(),
390 sizeof(sockaddr.sun_path) - 1);
391 sockaddr.sun_path[sizeof(sockaddr.sun_path) - 1] = '\0';
392 if (bind(g_unix_socket, reinterpret_cast<struct sockaddr *>(&sockaddr),
393 sizeof(sockaddr)) < 0) {
394 generic_error ge("cannot bind UNIX socket to address " +
395 fl_paths._socket);
396 Error(fl_logger_nagios) << ge;
397 close(g_unix_socket);
398 return false;
401 // Make writable group members (fchmod didn't do nothing for me. Don't know
402 // why!)
403 if (0 != chmod(fl_paths._socket.c_str(), 0660)) {
404 generic_error ge("cannot change file permissions for UNIX socket at " +
405 fl_paths._socket + " to 0660");
406 Error(fl_logger_nagios) << ge;
407 close(g_unix_socket);
408 return false;
411 if (0 != listen(g_unix_socket, 3 /* backlog */)) {
412 generic_error ge("cannot listen to UNIX socket at " + fl_paths._socket);
413 Error(fl_logger_nagios) << ge;
414 close(g_unix_socket);
415 return false;
418 Informational(fl_logger_nagios)
419 << "opened UNIX socket at " << fl_paths._socket;
420 return true;
423 void close_unix_socket() {
424 unlink(fl_paths._socket.c_str());
425 if (g_unix_socket >= 0) {
426 close(g_unix_socket);
427 g_unix_socket = -1;
431 int broker_host(int event_type __attribute__((__unused__)),
432 void *data __attribute__((__unused__))) {
433 counterIncrement(Counter::neb_callbacks);
434 return 0;
437 int broker_check(int event_type, void *data) {
438 int result = NEB_OK;
439 if (event_type == NEBCALLBACK_SERVICE_CHECK_DATA) {
440 auto c = static_cast<nebstruct_service_check_data *>(data);
441 if (c->type == NEBTYPE_SERVICECHECK_PROCESSED) {
442 counterIncrement(Counter::service_checks);
444 } else if (event_type == NEBCALLBACK_HOST_CHECK_DATA) {
445 auto c = static_cast<nebstruct_host_check_data *>(data);
446 if (c->type == NEBTYPE_HOSTCHECK_PROCESSED) {
447 counterIncrement(Counter::host_checks);
450 fl_core->triggers().notify_all(Triggers::Kind::check);
451 return result;
454 int broker_comment(int event_type __attribute__((__unused__)), void *data) {
455 auto co = static_cast<nebstruct_comment_data *>(data);
456 fl_core->registerComment(co);
457 counterIncrement(Counter::neb_callbacks);
458 fl_core->triggers().notify_all(Triggers::Kind::comment);
459 return 0;
462 int broker_downtime(int event_type __attribute__((__unused__)), void *data) {
463 auto dt = static_cast<nebstruct_downtime_data *>(data);
464 fl_core->registerDowntime(dt);
465 counterIncrement(Counter::neb_callbacks);
466 fl_core->triggers().notify_all(Triggers::Kind::downtime);
467 return 0;
470 int broker_log(int event_type __attribute__((__unused__)),
471 void *data __attribute__((__unused__))) {
472 counterIncrement(Counter::neb_callbacks);
473 counterIncrement(Counter::log_messages);
474 // NOTE: We use logging very early, even before the core is instantiated!
475 if (fl_core != nullptr) {
476 fl_core->triggers().notify_all(Triggers::Kind::log);
478 return 0;
481 // called twice (start/end) for each external command, even builtin ones
482 int broker_command(int event_type __attribute__((__unused__)), void *data) {
483 auto sc = static_cast<nebstruct_external_command_data *>(data);
484 if (sc->type == NEBTYPE_EXTERNALCOMMAND_START) {
485 counterIncrement(Counter::commands);
486 if (sc->command_type == CMD_CUSTOM_COMMAND &&
487 strcmp(sc->command_string, "_LOG") == 0) {
488 write_to_all_logs(sc->command_args, -1);
489 counterIncrement(Counter::log_messages);
490 fl_core->triggers().notify_all(Triggers::Kind::log);
493 counterIncrement(Counter::neb_callbacks);
494 fl_core->triggers().notify_all(Triggers::Kind::command);
495 return 0;
498 int broker_state(int event_type __attribute__((__unused__)),
499 void *data __attribute__((__unused__))) {
500 counterIncrement(Counter::neb_callbacks);
501 fl_core->triggers().notify_all(Triggers::Kind::state);
502 return 0;
505 int broker_program(int event_type __attribute__((__unused__)),
506 void *data __attribute__((__unused__))) {
507 counterIncrement(Counter::neb_callbacks);
508 fl_core->triggers().notify_all(Triggers::Kind::program);
509 return 0;
512 void livestatus_log_initial_states() {
513 extern scheduled_downtime *scheduled_downtime_list;
514 // It's a bit unclear if we need to log downtimes of hosts *before* their
515 // corresponding service downtimes, so let's play safe...
516 for (auto dt = scheduled_downtime_list; dt != nullptr; dt = dt->next) {
517 if (dt->is_in_effect != 0 && dt->type == HOST_DOWNTIME) {
518 Informational(fl_logger_nagios)
519 << "HOST DOWNTIME ALERT: " << dt->host_name << ";STARTED;"
520 << dt->comment;
523 for (auto dt = scheduled_downtime_list; dt != nullptr; dt = dt->next) {
524 if (dt->is_in_effect != 0 && dt->type == SERVICE_DOWNTIME) {
525 Informational(fl_logger_nagios)
526 << "SERVICE DOWNTIME ALERT: " << dt->host_name << ";"
527 << dt->service_description << ";STARTED;" << dt->comment;
530 g_timeperiods_cache->logCurrentTimeperiods();
533 int broker_event(int event_type __attribute__((__unused__)), void *data) {
534 counterIncrement(Counter::neb_callbacks);
535 auto ts = static_cast<struct nebstruct_timed_event_struct *>(data);
536 if (ts->event_type == EVENT_LOG_ROTATION) {
537 if (g_thread_running == 1) {
538 livestatus_log_initial_states();
539 } else if (log_initial_states == 1) {
540 // initial info during startup
541 Informational(fl_logger_nagios) << "logging initial states";
544 g_timeperiods_cache->update(from_timeval(ts->timestamp));
545 return 0;
548 int broker_process(int event_type __attribute__((__unused__)), void *data) {
549 auto ps = static_cast<struct nebstruct_process_struct *>(data);
550 switch (ps->type) {
551 case NEBTYPE_PROCESS_START:
552 fl_core = new NagiosCore(fl_paths, fl_limits, fl_authorization,
553 fl_data_encoding);
554 fl_client_queue = new ClientQueue();
555 g_timeperiods_cache = new TimeperiodsCache(fl_logger_nagios);
556 break;
557 case NEBTYPE_PROCESS_EVENTLOOPSTART:
558 g_timeperiods_cache->update(from_timeval(ps->timestamp));
559 start_threads();
560 break;
561 default:
562 break;
564 return 0;
567 int verify_event_broker_options() {
568 int errors = 0;
569 if ((event_broker_options & BROKER_PROGRAM_STATE) == 0) {
570 Critical(fl_logger_nagios)
571 << "need BROKER_PROGRAM_STATE (" << BROKER_PROGRAM_STATE
572 << ") event_broker_option enabled to work.";
573 errors++;
575 if ((event_broker_options & BROKER_TIMED_EVENTS) == 0) {
576 Critical(fl_logger_nagios)
577 << "need BROKER_TIMED_EVENTS (" << BROKER_TIMED_EVENTS
578 << ") event_broker_option enabled to work.";
579 errors++;
581 if ((event_broker_options & BROKER_SERVICE_CHECKS) == 0) {
582 Critical(fl_logger_nagios)
583 << "need BROKER_SERVICE_CHECKS (" << BROKER_SERVICE_CHECKS
584 << ") event_broker_option enabled to work.";
585 errors++;
587 if ((event_broker_options & BROKER_HOST_CHECKS) == 0) {
588 Critical(fl_logger_nagios)
589 << "need BROKER_HOST_CHECKS (" << BROKER_HOST_CHECKS
590 << ") event_broker_option enabled to work.";
591 errors++;
593 if ((event_broker_options & BROKER_LOGGED_DATA) == 0) {
594 Critical(fl_logger_nagios)
595 << "need BROKER_LOGGED_DATA (" << BROKER_LOGGED_DATA
596 << ") event_broker_option enabled to work.",
597 errors++;
599 if ((event_broker_options & BROKER_COMMENT_DATA) == 0) {
600 Critical(fl_logger_nagios)
601 << "need BROKER_COMMENT_DATA (" << BROKER_COMMENT_DATA
602 << ") event_broker_option enabled to work.";
603 errors++;
605 if ((event_broker_options & BROKER_DOWNTIME_DATA) == 0) {
606 Critical(fl_logger_nagios)
607 << "need BROKER_DOWNTIME_DATA (" << BROKER_DOWNTIME_DATA
608 << ") event_broker_option enabled to work.";
609 errors++;
611 if ((event_broker_options & BROKER_STATUS_DATA) == 0) {
612 Critical(fl_logger_nagios)
613 << "need BROKER_STATUS_DATA (" << BROKER_STATUS_DATA
614 << ") event_broker_option enabled to work.";
615 errors++;
617 if ((event_broker_options & BROKER_ADAPTIVE_DATA) == 0) {
618 Critical(fl_logger_nagios)
619 << "need BROKER_ADAPTIVE_DATA (" << BROKER_ADAPTIVE_DATA
620 << ") event_broker_option enabled to work.";
621 errors++;
623 if ((event_broker_options & BROKER_EXTERNALCOMMAND_DATA) == 0) {
624 Critical(fl_logger_nagios) << "need BROKER_EXTERNALCOMMAND_DATA ("
625 << BROKER_EXTERNALCOMMAND_DATA
626 << ") event_broker_option enabled to work.";
627 errors++;
629 if ((event_broker_options & BROKER_STATECHANGE_DATA) == 0) {
630 Critical(fl_logger_nagios)
631 << "need BROKER_STATECHANGE_DATA (" << BROKER_STATECHANGE_DATA
632 << ") event_broker_option enabled to work.";
633 errors++;
636 return static_cast<int>(errors == 0);
639 void register_callbacks() {
640 neb_register_callback(NEBCALLBACK_HOST_STATUS_DATA, g_nagios_handle, 0,
641 broker_host); // Needed to start threads
642 neb_register_callback(NEBCALLBACK_COMMENT_DATA, g_nagios_handle, 0,
643 broker_comment); // dynamic data
644 neb_register_callback(NEBCALLBACK_DOWNTIME_DATA, g_nagios_handle, 0,
645 broker_downtime); // dynamic data
646 neb_register_callback(NEBCALLBACK_SERVICE_CHECK_DATA, g_nagios_handle, 0,
647 broker_check); // only for statistics
648 neb_register_callback(NEBCALLBACK_HOST_CHECK_DATA, g_nagios_handle, 0,
649 broker_check); // only for statistics
650 neb_register_callback(NEBCALLBACK_LOG_DATA, g_nagios_handle, 0,
651 broker_log); // only for trigger 'log'
652 neb_register_callback(NEBCALLBACK_EXTERNAL_COMMAND_DATA, g_nagios_handle, 0,
653 broker_command); // only for trigger 'command'
654 neb_register_callback(NEBCALLBACK_STATE_CHANGE_DATA, g_nagios_handle, 0,
655 broker_state); // only for trigger 'state'
656 neb_register_callback(NEBCALLBACK_ADAPTIVE_PROGRAM_DATA, g_nagios_handle, 0,
657 broker_program); // only for trigger 'program'
658 neb_register_callback(NEBCALLBACK_PROCESS_DATA, g_nagios_handle, 0,
659 broker_process); // used for starting threads
660 neb_register_callback(NEBCALLBACK_TIMED_EVENT_DATA, g_nagios_handle, 0,
661 broker_event); // used for timeperiods cache
664 void deregister_callbacks() {
665 neb_deregister_callback(NEBCALLBACK_HOST_STATUS_DATA, broker_host);
666 neb_deregister_callback(NEBCALLBACK_COMMENT_DATA, broker_comment);
667 neb_deregister_callback(NEBCALLBACK_DOWNTIME_DATA, broker_downtime);
668 neb_deregister_callback(NEBCALLBACK_SERVICE_CHECK_DATA, broker_check);
669 neb_deregister_callback(NEBCALLBACK_HOST_CHECK_DATA, broker_check);
670 neb_deregister_callback(NEBCALLBACK_LOG_DATA, broker_log);
671 neb_deregister_callback(NEBCALLBACK_EXTERNAL_COMMAND_DATA, broker_command);
672 neb_deregister_callback(NEBCALLBACK_STATE_CHANGE_DATA, broker_state);
673 neb_deregister_callback(NEBCALLBACK_ADAPTIVE_PROGRAM_DATA, broker_program);
674 neb_deregister_callback(NEBCALLBACK_PROCESS_DATA, broker_program);
675 neb_deregister_callback(NEBCALLBACK_TIMED_EVENT_DATA, broker_event);
678 std::string check_path(const std::string &name, const std::string &path) {
679 struct stat st;
680 if (stat(path.c_str(), &st) != 0) {
681 Error(fl_logger_nagios) << name << " '" << path << "' not existing!";
682 return {}; // disable
684 if (access(path.c_str(), R_OK) != 0) {
685 Error(fl_logger_nagios) << name << " '" << path
686 << "' not readable, please fix permissions.";
687 return {}; // disable
689 return path;
692 void livestatus_parse_arguments(Logger *logger, const char *args_orig) {
693 // set default path to our logfile to be in the same path as nagios.log
694 extern char *log_file;
695 std::string lf{log_file};
696 auto slash = lf.rfind('/');
697 fl_paths._logfile =
698 (slash == std::string::npos ? "/tmp/" : lf.substr(0, slash + 1)) +
699 "livestatus.log";
701 if (args_orig == nullptr) {
702 return; // no arguments, use default options
705 // TODO(sp) Nuke next_field and friends. Use C++ strings everywhere.
706 std::vector<char> args_buf(args_orig, args_orig + strlen(args_orig) + 1);
707 char *args = &args_buf[0];
708 while (char *token = next_field(&args)) {
709 /* find = */
710 char *part = token;
711 std::string left = safe_next_token(&part, '=');
712 const char *right_token = next_token(&part, 0);
713 if (right_token == nullptr) {
714 fl_paths._socket = left;
715 } else {
716 std::string right{right_token};
717 if (left == "debug") {
718 int debug_level = atoi(right.c_str());
719 if (debug_level >= 2) {
720 fl_livestatus_log_level = LogLevel::debug;
721 } else if (debug_level >= 1) {
722 fl_livestatus_log_level = LogLevel::informational;
723 } else {
724 fl_livestatus_log_level = LogLevel::notice;
726 Notice(logger)
727 << "setting debug level to " << fl_livestatus_log_level;
728 } else if (left == "log_file") {
729 fl_paths._logfile = right;
730 } else if (left == "mkeventd_socket") {
731 fl_paths._mkeventd_socket = right;
732 } else if (left == "max_cached_messages") {
733 fl_limits._max_cached_messages =
734 strtoul(right.c_str(), nullptr, 10);
735 Notice(logger)
736 << "setting max number of cached log messages to "
737 << fl_limits._max_cached_messages;
738 } else if (left == "max_lines_per_logfile") {
739 fl_limits._max_lines_per_logfile =
740 strtoul(right.c_str(), nullptr, 10);
741 Notice(logger) << "setting max number lines per logfile to "
742 << fl_limits._max_lines_per_logfile;
743 } else if (left == "thread_stack_size") {
744 g_thread_stack_size = strtoul(right.c_str(), nullptr, 10);
745 Notice(logger) << "setting size of thread stacks to "
746 << g_thread_stack_size;
747 } else if (left == "max_response_size") {
748 fl_limits._max_response_size =
749 strtoul(right.c_str(), nullptr, 10);
750 Notice(logger)
751 << "setting maximum response size to "
752 << fl_limits._max_response_size << " bytes ("
753 << (fl_limits._max_response_size / (1024.0 * 1024.0))
754 << " MB)";
755 } else if (left == "num_client_threads") {
756 int c = atoi(right.c_str());
757 if (c <= 0 || c > 1000) {
758 Warning(logger) << "cannot set num_client_threads to " << c
759 << ", must be > 0 and <= 1000";
760 } else {
761 Notice(logger)
762 << "setting number of client threads to " << c;
763 g_livestatus_threads = c;
765 } else if (left == "query_timeout") {
766 int c = atoi(right.c_str());
767 if (c < 0) {
768 Warning(logger) << "query_timeout must be >= 0";
769 } else {
770 fl_query_timeout = std::chrono::milliseconds(c);
771 if (c == 0) {
772 Notice(logger) << "disabled query timeout!";
773 } else {
774 Notice(logger)
775 << "Setting timeout for reading a query to " << c
776 << " ms";
779 } else if (left == "idle_timeout") {
780 int c = atoi(right.c_str());
781 if (c < 0) {
782 Warning(logger) << "idle_timeout must be >= 0";
783 } else {
784 fl_idle_timeout = std::chrono::milliseconds(c);
785 if (c == 0) {
786 Notice(logger) << "disabled idle timeout!";
787 } else {
788 Notice(logger)
789 << "setting idle timeout to " << c << " ms";
792 } else if (left == "service_authorization") {
793 if (right == "strict") {
794 fl_authorization._service = AuthorizationKind::strict;
795 } else if (right == "loose") {
796 fl_authorization._service = AuthorizationKind::loose;
797 } else {
798 Warning(logger) << "invalid service authorization mode, "
799 "allowed are strict and loose";
801 } else if (left == "group_authorization") {
802 if (right == "strict") {
803 fl_authorization._group = AuthorizationKind::strict;
804 } else if (right == "loose") {
805 fl_authorization._group = AuthorizationKind::loose;
806 } else {
807 Warning(logger)
808 << "invalid group authorization mode, allowed are strict and loose";
810 } else if (left == "pnp_path") {
811 fl_paths._pnp = check_path("PNP perfdata directory", right);
812 } else if (left == "mk_inventory_path") {
813 fl_paths._mk_inventory =
814 check_path("Check_MK Inventory directory", right);
815 } else if (left == "structured_status_path") {
816 fl_paths._structured_status =
817 check_path("Check_MK structured status directory", right);
818 } else if (left == "mk_logwatch_path") {
819 fl_paths._mk_logwatch =
820 check_path("Check_MK logwatch directory", right);
821 } else if (left == "data_encoding") {
822 if (right == "utf8") {
823 fl_data_encoding = Encoding::utf8;
824 } else if (right == "latin1") {
825 fl_data_encoding = Encoding::latin1;
826 } else if (right == "mixed") {
827 fl_data_encoding = Encoding::mixed;
828 } else {
829 Warning(logger) << "invalid data_encoding " << right
830 << ", allowed are utf8, latin1 and mixed";
832 } else if (left == "livecheck") {
833 Warning(logger)
834 << "livecheck has been removed from Livestatus, sorry.";
835 } else if (left == "disable_statehist_filtering") {
836 Warning(logger)
837 << "the disable_statehist_filtering option has been removed, filtering is always active now.";
838 } else {
839 Warning(logger)
840 << "ignoring invalid option " << left << "=" << right;
845 if (fl_paths._mkeventd_socket.empty()) {
846 std::string sp{fl_paths._socket};
847 auto slash = sp.rfind('/');
848 fl_paths._mkeventd_socket =
849 (slash == std::string::npos ? "" : sp.substr(0, slash + 1)) +
850 "mkeventd/status";
854 void omd_advertize(Logger *logger) {
855 Notice(logger) << "Livestatus by Mathias Kettner started with PID "
856 << getpid();
857 Notice(logger) << "version " << VERSION << " compiled " << BUILD_DATE
858 << " on " << BUILD_HOSTNAME;
859 Notice(logger) << "built with " << BUILD_CXX << ", using "
860 << RegExp::engine() << " regex engine";
861 Notice(logger) << "please visit us at http://mathias-kettner.de/";
862 fl_paths.dump(logger);
863 if (char *omd_site = getenv("OMD_SITE")) {
864 Informational(logger)
865 << "running on OMD site " << omd_site << ", cool.";
866 } else {
867 Notice(logger)
868 << "Hint: Please try out OMD - the Open Monitoring Distribution";
869 Notice(logger) << "Please visit OMD at http://omdistro.org";
873 // Called from Nagios after we have been loaded.
874 extern "C" int nebmodule_init(int flags __attribute__((__unused__)), char *args,
875 void *handle) {
876 fl_logger_nagios = Logger::getLogger("nagios");
877 fl_logger_nagios->setHandler(std::make_unique<NagiosHandler>());
878 fl_logger_nagios->setUseParentHandlers(false);
880 g_nagios_handle = handle;
881 livestatus_parse_arguments(fl_logger_nagios, args);
882 omd_advertize(fl_logger_nagios);
884 if (!open_unix_socket()) {
885 return 1;
888 if (verify_event_broker_options() == 0) {
889 Critical(fl_logger_nagios)
890 << "bailing out, please fix event_broker_options.";
891 Critical(fl_logger_nagios)
892 << "hint: your event_broker_options are set to "
893 << event_broker_options << ", try setting it to -1.";
894 return 1;
896 Informational(fl_logger_nagios)
897 << "your event_broker_options are sufficient for livestatus.";
899 if (enable_environment_macros == 1) {
900 Notice(fl_logger_nagios)
901 << "environment_macros are enabled, this might decrease the "
902 "overall nagios performance";
905 register_callbacks();
907 /* Unfortunately, we cannot start our socket thread right now.
908 Nagios demonizes *after* having loaded the NEB modules. When
909 demonizing we are losing our thread. Therefore, we create the
910 thread the first time one of our callbacks is called. Before
911 that happens, we haven't got any data anyway... */
913 Notice(fl_logger_nagios)
914 << "finished initialization, further log messages go to "
915 << fl_paths._logfile;
916 return 0;
919 // Called from Nagios after before we are unloaded.
920 extern "C" int nebmodule_deinit(int flags __attribute__((__unused__)),
921 int reason __attribute__((__unused__))) {
922 Notice(fl_logger_nagios) << "deinitializing";
923 terminate_threads();
924 close_unix_socket();
925 deregister_callbacks();
927 delete g_timeperiods_cache;
928 g_timeperiods_cache = nullptr;
930 delete fl_client_queue;
931 fl_client_queue = nullptr;
933 delete fl_core;
934 fl_core = nullptr;
936 return 0;