1 /*****************************************************************************
3 * Nagios check_procs plugin
6 * Copyright (c) 2000-2008 Nagios Plugins Development Team
10 * This file contains the check_procs plugin
12 * Checks all processes and generates WARNING or CRITICAL states if the
13 * specified metric is outside the required threshold ranges. The metric
14 * defaults to number of processes. Search filters can be applied to limit
15 * the processes to check.
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
32 *****************************************************************************/
34 const char *progname
= "check_procs";
35 const char *program_name
= "check_procs"; /* Required for coreutils libs */
36 const char *copyright
= "2000-2008";
37 const char *email
= "nagiosplug-devel@lists.sourceforge.net";
41 #include "utils_cmd.h"
46 #ifdef HAVE_SYS_STAT_H
48 typedef struct stat struct_stat_t
;
50 /* won't be used anyway */
51 typedef struct { dev_t dev
; ino_t ino
; } struct_stat_t
;
54 int process_arguments (int, char **);
55 int validate_arguments (void);
56 int convert_to_seconds (char *);
57 void print_help (void);
58 void print_usage (void);
60 char *warning_range
= NULL
;
61 char *critical_range
= NULL
;
62 thresholds
*procs_thresholds
= NULL
;
64 int options
= 0; /* bitmask of filter criteria to test against */
75 #define EREG_ARGS 1024
77 #define KTHREAD_PARENT "kthreadd" /* the parent process of kernel threads:
78 ppid of procs are compared to pid of this proc*/
80 /* Different metrics */
89 enum metric metric
= METRIC_PROCS
;
100 char *input_filename
= NULL
;
104 char tmp
[MAX_INPUT_BUFFER
];
105 int kthread_filter
= 0;
106 int usepid
= 0; /* whether to test for pid or /proc/pid/exe */
108 FILE *ps_input
= NULL
;
111 stat_exe (const pid_t pid
, struct_stat_t
*buf
) {
112 #if defined(HAVE_PROC_PID_EXE) && defined(HAVE_SYS_STAT_H)
115 xasprintf(&path
, "/proc/%d/exe", pid
);
116 ret
= stat(path
, buf
);
126 main (int argc
, char **argv
)
133 struct_stat_t statbuf
;
139 pid_t kthread_ppid
= 0;
145 char procetime
[MAX_INPUT_BUFFER
] = { '\0' };
148 const char *zombie
= "Z";
150 int resultsum
= 0; /* bitmask of the filter criteria met by a process */
151 int found
= 0; /* counter for number of lines returned in `ps` output */
152 int procs
= 0; /* counter for number of processes meeting filter criteria */
153 int pos
; /* number of spaces before 'args' in `ps` output */
154 int cols
; /* number of columns in ps output */
155 int expected_cols
= PS_COLS
- 1;
156 int warn
= 0; /* number of processes in warn state */
157 int crit
= 0; /* number of processes in crit state */
159 int result
= STATE_UNKNOWN
;
160 output chld_out
, chld_err
;
162 setlocale (LC_ALL
, "");
163 bindtextdomain (PACKAGE
, LOCALEDIR
);
164 textdomain (PACKAGE
);
165 setlocale(LC_NUMERIC
, "POSIX");
167 input_buffer
= malloc (MAX_INPUT_BUFFER
);
168 procprog
= malloc (MAX_INPUT_BUFFER
);
170 xasprintf (&metric_name
, "PROCS");
171 metric
= METRIC_PROCS
;
173 /* Parse extra opts if any */
174 argv
=np_extra_opts (&argc
, argv
, progname
);
176 if (process_arguments (argc
, argv
) == ERROR
)
177 usage4 (_("Could not parse arguments"));
181 if (usepid
|| stat_exe(mypid
, &statbuf
) == -1) {
182 /* usepid might have been set by -T */
186 mydev
= statbuf
.st_dev
;
187 myino
= statbuf
.st_ino
;
190 /* Set signal handling and alarm timeout */
191 if (signal (SIGALRM
, timeout_alarm_handler
) == SIG_ERR
) {
192 die (STATE_UNKNOWN
, _("Cannot catch SIGALRM"));
194 (void) alarm ((unsigned) timeout_interval
);
197 printf (_("CMD: %s\n"), PS_COMMAND
);
199 if (input_filename
== NULL
) {
200 result
= cmd_run( PS_COMMAND
, &chld_out
, &chld_err
, 0);
201 if (chld_err
.lines
> 0) {
202 printf ("%s: %s", _("System call sent warnings to stderr"), chld_err
.line
[0]);
206 result
= cmd_file_read( input_filename
, &chld_out
, 0);
209 /* flush first line: j starts at 1 */
210 for (j
= 1; j
< chld_out
.lines
; j
++) {
211 input_line
= chld_out
.line
[j
];
214 printf ("%s", input_line
);
216 strcpy (procprog
, "");
217 xasprintf (&procargs
, "%s", "");
219 cols
= sscanf (input_line
, PS_FORMAT
, PS_VARLIST
);
221 /* Zombie processes do not give a procprog command */
222 if ( cols
< expected_cols
&& strstr(procstat
, zombie
) ) {
223 cols
= expected_cols
;
225 if ( cols
>= expected_cols
) {
227 xasprintf (&procargs
, "%s", input_line
+ pos
);
230 /* Some ps return full pathname for command. This removes path */
231 strcpy(procprog
, base_name(procprog
));
233 /* we need to convert the elapsed time to seconds */
234 procseconds
= convert_to_seconds(procetime
);
237 printf ("proc#=%d uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
238 procs
, procuid
, procvsz
, procrss
,
239 procpid
, procppid
, procpcpu
, procstat
,
240 procetime
, procprog
, procargs
);
243 if ((usepid
&& mypid
== procpid
) ||
244 (!usepid
&& stat_exe(procpid
, &statbuf
) != -1 && statbuf
.st_dev
== mydev
&& statbuf
.st_ino
== myino
)) {
246 printf("not considering - is myself\n");
250 /* filter kernel threads (childs of KTHREAD_PARENT)*/
251 /* TODO adapt for other OSes than GNU/Linux
252 sorry for not doing that, but I've no other OSes to test :-( */
253 if (kthread_filter
== 1) {
254 /* get pid KTHREAD_PARENT */
255 if (kthread_ppid
== 0 && !strcmp(procprog
, KTHREAD_PARENT
) )
256 kthread_ppid
= procpid
;
258 if (kthread_ppid
== procppid
) {
260 printf ("Ignore kernel thread: pid=%d ppid=%d prog=%s args=%s\n", procpid
, procppid
, procprog
, procargs
);
265 if ((options
& STAT
) && (strstr (statopts
, procstat
)))
267 if ((options
& ARGS
) && procargs
&& (strstr (procargs
, args
) != NULL
))
269 if ((options
& EREG_ARGS
) && procargs
&& (regexec(&re_args
, procargs
, (size_t) 0, NULL
, 0) == 0))
270 resultsum
|= EREG_ARGS
;
271 if ((options
& PROG
) && procprog
&& (strcmp (prog
, procprog
) == 0))
273 if ((options
& PPID
) && (procppid
== ppid
))
275 if ((options
& USER
) && (procuid
== uid
))
277 if ((options
& VSZ
) && (procvsz
>= vsz
))
279 if ((options
& RSS
) && (procrss
>= rss
))
281 if ((options
& PCPU
) && (procpcpu
>= pcpu
))
286 /* Next line if filters not matched */
287 if (!(options
== resultsum
|| options
== ALL
))
292 printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
293 procuid
, procvsz
, procrss
,
294 procpid
, procppid
, procpcpu
, procstat
,
295 procetime
, procprog
, procargs
);
298 if (metric
== METRIC_VSZ
)
299 i
= get_status ((double)procvsz
, procs_thresholds
);
300 else if (metric
== METRIC_RSS
)
301 i
= get_status ((double)procrss
, procs_thresholds
);
302 /* TODO? float thresholds for --metric=CPU */
303 else if (metric
== METRIC_CPU
)
304 i
= get_status (procpcpu
, procs_thresholds
);
305 else if (metric
== METRIC_ELAPSED
)
306 i
= get_status ((double)procseconds
, procs_thresholds
);
308 if (metric
!= METRIC_PROCS
) {
309 if (i
== STATE_WARNING
) {
311 xasprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
312 result
= max_state (result
, i
);
314 if (i
== STATE_CRITICAL
) {
316 xasprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
317 result
= max_state (result
, i
);
321 /* This should not happen */
323 printf(_("Not parseable: %s"), input_buffer
);
327 if (found
== 0) { /* no process lines parsed so return STATE_UNKNOWN */
328 printf (_("Unable to read output\n"));
329 return STATE_UNKNOWN
;
332 if ( result
== STATE_UNKNOWN
)
335 /* Needed if procs found, but none match filter */
336 if ( metric
== METRIC_PROCS
) {
337 result
= max_state (result
, get_status ((double)procs
, procs_thresholds
) );
340 if ( result
== STATE_OK
) {
341 printf ("%s %s: ", metric_name
, _("OK"));
342 } else if (result
== STATE_WARNING
) {
343 printf ("%s %s: ", metric_name
, _("WARNING"));
344 if ( metric
!= METRIC_PROCS
) {
345 printf (_("%d warn out of "), warn
);
347 } else if (result
== STATE_CRITICAL
) {
348 printf ("%s %s: ", metric_name
, _("CRITICAL"));
349 if (metric
!= METRIC_PROCS
) {
350 printf (_("%d crit, %d warn out of "), crit
, warn
);
353 printf (ngettext ("%d process", "%d processes", (unsigned long) procs
), procs
);
355 if (strcmp(fmt
,"") != 0) {
356 printf (_(" with %s"), fmt
);
359 if ( verbose
>= 1 && strcmp(fails
,"") )
360 printf (" [%s]", fails
);
362 if (metric
== METRIC_PROCS
)
363 printf (" | procs=%d;%s;%s;0;", procs
,
364 warning_range
? warning_range
: "",
365 critical_range
? critical_range
: "");
367 printf (" | procs=%d;;;0; procs_warn=%d;;;0; procs_crit=%d;;;0;", procs
, warn
, crit
);
375 /* process command-line arguments */
377 process_arguments (int argc
, char **argv
)
384 int cflags
= REG_NOSUB
| REG_EXTENDED
;
385 char errbuf
[MAX_INPUT_BUFFER
];
388 static struct option longopts
[] = {
389 {"warning", required_argument
, 0, 'w'},
390 {"critical", required_argument
, 0, 'c'},
391 {"metric", required_argument
, 0, 'm'},
392 {"timeout", required_argument
, 0, 't'},
393 {"status", required_argument
, 0, 's'},
394 {"ppid", required_argument
, 0, 'p'},
395 {"user", required_argument
, 0, 'u'},
396 {"command", required_argument
, 0, 'C'},
397 {"vsz", required_argument
, 0, 'z'},
398 {"rss", required_argument
, 0, 'r'},
399 {"pcpu", required_argument
, 0, 'P'},
400 {"elapsed", required_argument
, 0, 'e'},
401 {"argument-array", required_argument
, 0, 'a'},
402 {"help", no_argument
, 0, 'h'},
403 {"version", no_argument
, 0, 'V'},
404 {"verbose", no_argument
, 0, 'v'},
405 {"ereg-argument-array", required_argument
, 0, CHAR_MAX
+1},
406 {"input-file", required_argument
, 0, CHAR_MAX
+2},
407 {"no-kthreads", required_argument
, 0, 'k'},
408 {"traditional-filter", no_argument
, 0, 'T'},
412 for (c
= 1; c
< argc
; c
++)
413 if (strcmp ("-to", argv
[c
]) == 0)
414 strcpy (argv
[c
], "-t");
417 c
= getopt_long (argc
, argv
, "Vvhkt:c:w:p:s:u:C:a:z:r:m:P:T",
420 if (c
== -1 || c
== EOF
)
429 case 'V': /* version */
430 print_revision (progname
, NP_VERSION
);
432 case 't': /* timeout period */
433 if (!is_integer (optarg
))
434 usage2 (_("Timeout interval must be a positive integer"), optarg
);
436 timeout_interval
= atoi (optarg
);
438 case 'c': /* critical threshold */
439 critical_range
= optarg
;
441 case 'w': /* warning threshold */
442 warning_range
= optarg
;
444 case 'p': /* process id */
445 if (sscanf (optarg
, "%d%[^0-9]", &ppid
, tmp
) == 1) {
446 xasprintf (&fmt
, "%s%sPPID = %d", (fmt
? fmt
: "") , (options
? ", " : ""), ppid
);
450 usage4 (_("Parent Process ID must be an integer!"));
451 case 's': /* status */
456 xasprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
459 case 'u': /* user or user id */
460 if (is_integer (optarg
)) {
462 pw
= getpwuid ((uid_t
) uid
);
463 /* check to be sure user exists */
465 usage2 (_("UID was not found"), optarg
);
468 pw
= getpwnam (optarg
);
469 /* check to be sure user exists */
471 usage2 (_("User name was not found"), optarg
);
476 xasprintf (&fmt
, "%s%sUID = %d (%s)", (fmt
? fmt
: ""), (options
? ", " : ""),
480 case 'C': /* command */
481 /* TODO: allow this to be passed in with --metric */
486 xasprintf (&fmt
, _("%s%scommand name '%s'"), (fmt
? fmt
: ""), (options
? ", " : ""),
490 case 'a': /* args (full path name with args) */
491 /* TODO: allow this to be passed in with --metric */
496 xasprintf (&fmt
, "%s%sargs '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), args
);
500 err
= regcomp(&re_args
, optarg
, cflags
);
502 regerror (err
, &re_args
, errbuf
, MAX_INPUT_BUFFER
);
503 die (STATE_UNKNOWN
, "PROCS %s: %s - %s\n", _("UNKNOWN"), _("Could not compile regular expression"), errbuf
);
505 /* Strip off any | within the regex optarg */
506 temp_string
= strdup(optarg
);
507 while(temp_string
[i
]!='\0'){
508 if(temp_string
[i
]=='|')
512 xasprintf (&fmt
, "%s%sregex args '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), temp_string
);
513 options
|= EREG_ARGS
;
516 if (sscanf (optarg
, "%d%[^0-9]", &rss
, tmp
) == 1) {
517 xasprintf (&fmt
, "%s%sRSS >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), rss
);
521 usage4 (_("RSS must be an integer!"));
523 if (sscanf (optarg
, "%d%[^0-9]", &vsz
, tmp
) == 1) {
524 xasprintf (&fmt
, "%s%sVSZ >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), vsz
);
528 usage4 (_("VSZ must be an integer!"));
530 /* TODO: -P 1.5.5 is accepted */
531 if (sscanf (optarg
, "%f%[^0-9.]", &pcpu
, tmp
) == 1) {
532 xasprintf (&fmt
, "%s%sPCPU >= %.2f", (fmt
? fmt
: ""), (options
? ", " : ""), pcpu
);
536 usage4 (_("PCPU must be a float!"));
538 xasprintf (&metric_name
, "%s", optarg
);
539 if ( strcmp(optarg
, "PROCS") == 0) {
540 metric
= METRIC_PROCS
;
543 else if ( strcmp(optarg
, "VSZ") == 0) {
547 else if ( strcmp(optarg
, "RSS") == 0 ) {
551 else if ( strcmp(optarg
, "CPU") == 0 ) {
555 else if ( strcmp(optarg
, "ELAPSED") == 0) {
556 metric
= METRIC_ELAPSED
;
560 usage4 (_("Metric must be one of PROCS, VSZ, RSS, CPU, ELAPSED!"));
561 case 'k': /* linux kernel thread filter */
564 case 'v': /* command */
571 input_filename
= optarg
;
577 if ((! warning_range
) && argv
[c
])
578 warning_range
= argv
[c
++];
579 if ((! critical_range
) && argv
[c
])
580 critical_range
= argv
[c
++];
581 if (statopts
== NULL
&& argv
[c
]) {
582 xasprintf (&statopts
, "%s", argv
[c
++]);
583 xasprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
587 /* this will abort in case of invalid ranges */
588 set_thresholds (&procs_thresholds
, warning_range
, critical_range
);
590 return validate_arguments ();
596 validate_arguments ()
602 statopts
= strdup("");
620 /* convert the elapsed time to seconds */
622 convert_to_seconds(char *etime
) {
641 for (ptr
= etime
; *ptr
!= '\0'; ptr
++) {
654 sscanf(etime
, "%d-%d:%d:%d",
655 &days
, &hours
, &minutes
, &seconds
);
656 /* linux 2.6.5/2.6.6 reporting some processes with infinite
657 * elapsed times for some reason */
663 sscanf(etime
, "%d:%d:%d",
664 &hours
, &minutes
, &seconds
);
665 } else if (coloncnt
== 1) {
666 sscanf(etime
, "%d:%d",
671 total
= (days
* 86400) +
676 if (verbose
>= 3 && metric
== METRIC_ELAPSED
) {
677 printf("seconds: %d\n", total
);
686 print_revision (progname
, NP_VERSION
);
688 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>\n");
689 printf (COPYRIGHT
, copyright
, email
);
691 printf ("%s\n", _("Checks all processes and generates WARNING or CRITICAL states if the specified"));
692 printf ("%s\n", _("metric is outside the required threshold ranges. The metric defaults to number"));
693 printf ("%s\n", _("of processes. Search filters can be applied to limit the processes to check."));
699 printf (UT_HELP_VRSN
);
700 printf (UT_EXTRA_OPTS
);
701 printf (" %s\n", "-w, --warning=RANGE");
702 printf (" %s\n", _("Generate warning state if metric is outside this range"));
703 printf (" %s\n", "-c, --critical=RANGE");
704 printf (" %s\n", _("Generate critical state if metric is outside this range"));
705 printf (" %s\n", "-m, --metric=TYPE");
706 printf (" %s\n", _("Check thresholds against metric. Valid types:"));
707 printf (" %s\n", _("PROCS - number of processes (default)"));
708 printf (" %s\n", _("VSZ - virtual memory size"));
709 printf (" %s\n", _("RSS - resident set memory size"));
710 printf (" %s\n", _("CPU - percentage CPU"));
711 /* only linux etime is support currently */
712 #if defined( __linux__ )
713 printf (" %s\n", _("ELAPSED - time elapsed in seconds"));
714 #endif /* defined(__linux__) */
715 printf (UT_TIMEOUT
, DEFAULT_SOCKET_TIMEOUT
);
717 printf (" %s\n", "-v, --verbose");
718 printf (" %s\n", _("Extra information. Up to 3 verbosity levels"));
720 printf (" %s\n", "-T, --traditional");
721 printf (" %s\n", _("Filter own process the traditional way by PID instead of /proc/pid/exe"));
724 printf ("%s\n", "Filters:");
725 printf (" %s\n", "-s, --state=STATUSFLAGS");
726 printf (" %s\n", _("Only scan for processes that have, in the output of `ps`, one or"));
727 printf (" %s\n", _("more of the status flags you specify (for example R, Z, S, RS,"));
728 printf (" %s\n", _("RSZDT, plus others based on the output of your 'ps' command)."));
729 printf (" %s\n", "-p, --ppid=PPID");
730 printf (" %s\n", _("Only scan for children of the parent process ID indicated."));
731 printf (" %s\n", "-z, --vsz=VSZ");
732 printf (" %s\n", _("Only scan for processes with VSZ higher than indicated."));
733 printf (" %s\n", "-r, --rss=RSS");
734 printf (" %s\n", _("Only scan for processes with RSS higher than indicated."));
735 printf (" %s\n", "-P, --pcpu=PCPU");
736 printf (" %s\n", _("Only scan for processes with PCPU higher than indicated."));
737 printf (" %s\n", "-u, --user=USER");
738 printf (" %s\n", _("Only scan for processes with user name or ID indicated."));
739 printf (" %s\n", "-a, --argument-array=STRING");
740 printf (" %s\n", _("Only scan for processes with args that contain STRING."));
741 printf (" %s\n", "--ereg-argument-array=STRING");
742 printf (" %s\n", _("Only scan for processes with args that contain the regex STRING."));
743 printf (" %s\n", "-C, --command=COMMAND");
744 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path)."));
745 printf (" %s\n", "-k, --no-kthreads");
746 printf (" %s\n", _("Only scan for non kernel threads (works on Linux only)."));
749 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
750 specified 'max:min', a warning status will be generated if the\n\
751 count is inside the specified range\n\n"));
754 This plugin checks the number of currently running processes and\n\
755 generates WARNING or CRITICAL states if the process count is outside\n\
756 the specified threshold ranges. The process count can be filtered by\n\
757 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
758 be the total number of running processes\n\n"));
760 printf ("%s\n", _("Examples:"));
761 printf (" %s\n", "check_procs -w 2:2 -c 2:1024 -C portsentry");
762 printf (" %s\n", _("Warning if not two processes with command name portsentry."));
763 printf (" %s\n\n", _("Critical if < 2 or > 1024 processes"));
764 printf (" %s\n", "check_procs -w 10 -a '/usr/local/bin/perl' -u root");
765 printf (" %s\n", _("Warning alert if > 10 processes with command arguments containing"));
766 printf (" %s\n\n", _("'/usr/local/bin/perl' and owned by root"));
767 printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ");
768 printf (" %s\n\n", _("Alert if VSZ of any processes over 50K or 100K"));
769 printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU");
770 printf (" %s\n", _("Alert if CPU of any processes over 10%% or 20%%"));
778 printf ("%s\n", _("Usage:"));
779 printf ("%s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n", progname
);
780 printf (" [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n");
781 printf (" [-C command] [-k] [-t timeout] [-v]\n");