1 /*****************************************************************************
3 * Nagios check_procs plugin
6 * Copyright (c) 2000-2008 Nagios Plugins Development Team
10 * This file contains the check_procs plugin
12 * Checks all processes and generates WARNING or CRITICAL states if the
13 * specified metric is outside the required threshold ranges. The metric
14 * defaults to number of processes. Search filters can be applied to limit
15 * the processes to check.
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
32 *****************************************************************************/
34 const char *progname
= "check_procs";
35 const char *program_name
= "check_procs"; /* Required for coreutils libs */
36 const char *copyright
= "2000-2008";
37 const char *email
= "nagiosplug-devel@lists.sourceforge.net";
41 #include "utils_cmd.h"
47 #ifdef HAVE_SYS_STAT_H
51 int process_arguments (int, char **);
52 int validate_arguments (void);
53 int convert_to_seconds (char *);
54 void print_help (void);
55 void print_usage (void);
57 char *warning_range
= NULL
;
58 char *critical_range
= NULL
;
59 thresholds
*procs_thresholds
= NULL
;
61 int options
= 0; /* bitmask of filter criteria to test against */
72 #define EREG_ARGS 1024
74 #define KTHREAD_PARENT "kthreadd" /* the parent process of kernel threads:
75 ppid of procs are compared to pid of this proc*/
77 /* Different metrics */
86 enum metric metric
= METRIC_PROCS
;
97 char *input_filename
= NULL
;
101 char tmp
[MAX_INPUT_BUFFER
];
102 int kthread_filter
= 0;
103 int usepid
= 0; /* whether to test for pid or /proc/pid/exe */
105 FILE *ps_input
= NULL
;
108 stat_exe (const pid_t pid
, struct stat
*buf
) {
111 xasprintf(&path
, "/proc/%d/exe", pid
);
112 ret
= stat(path
, buf
);
119 main (int argc
, char **argv
)
132 pid_t kthread_ppid
= 0;
138 char procetime
[MAX_INPUT_BUFFER
] = { '\0' };
141 const char *zombie
= "Z";
143 int resultsum
= 0; /* bitmask of the filter criteria met by a process */
144 int found
= 0; /* counter for number of lines returned in `ps` output */
145 int procs
= 0; /* counter for number of processes meeting filter criteria */
146 int pos
; /* number of spaces before 'args' in `ps` output */
147 int cols
; /* number of columns in ps output */
148 int expected_cols
= PS_COLS
- 1;
149 int warn
= 0; /* number of processes in warn state */
150 int crit
= 0; /* number of processes in crit state */
152 int result
= STATE_UNKNOWN
;
154 output chld_out
, chld_err
;
156 setlocale (LC_ALL
, "");
157 bindtextdomain (PACKAGE
, LOCALEDIR
);
158 textdomain (PACKAGE
);
159 setlocale(LC_NUMERIC
, "POSIX");
161 input_buffer
= malloc (MAX_INPUT_BUFFER
);
162 procprog
= malloc (MAX_INPUT_BUFFER
);
164 xasprintf (&metric_name
, "PROCS");
165 metric
= METRIC_PROCS
;
167 /* Parse extra opts if any */
168 argv
=np_extra_opts (&argc
, argv
, progname
);
170 if (process_arguments (argc
, argv
) == ERROR
)
171 usage4 (_("Could not parse arguments"));
175 if (usepid
|| stat_exe(mypid
, &statbuf
) == -1) {
176 /* usepid might have been set by -T */
180 mydev
= statbuf
.st_dev
;
181 myino
= statbuf
.st_ino
;
184 /* Set signal handling and alarm timeout */
185 if (signal (SIGALRM
, timeout_alarm_handler
) == SIG_ERR
) {
186 die (STATE_UNKNOWN
, _("Cannot catch SIGALRM"));
188 (void) alarm ((unsigned) timeout_interval
);
191 printf (_("CMD: %s\n"), PS_COMMAND
);
193 if (input_filename
== NULL
) {
194 result
= cmd_run( PS_COMMAND
, &chld_out
, &chld_err
, 0);
195 if (chld_err
.lines
> 0) {
196 printf ("%s: %s", _("System call sent warnings to stderr"), chld_err
.line
[0]);
200 result
= cmd_file_read( input_filename
, &chld_out
, 0);
203 /* flush first line: j starts at 1 */
204 for (j
= 1; j
< chld_out
.lines
; j
++) {
205 input_line
= chld_out
.line
[j
];
208 printf ("%s", input_line
);
210 strcpy (procprog
, "");
211 xasprintf (&procargs
, "%s", "");
213 cols
= sscanf (input_line
, PS_FORMAT
, PS_VARLIST
);
215 /* Zombie processes do not give a procprog command */
216 if ( cols
< expected_cols
&& strstr(procstat
, zombie
) ) {
217 cols
= expected_cols
;
219 if ( cols
>= expected_cols
) {
221 xasprintf (&procargs
, "%s", input_line
+ pos
);
224 /* Some ps return full pathname for command. This removes path */
225 strcpy(procprog
, base_name(procprog
));
227 /* we need to convert the elapsed time to seconds */
228 procseconds
= convert_to_seconds(procetime
);
231 printf ("proc#=%d uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
232 procs
, procuid
, procvsz
, procrss
,
233 procpid
, procppid
, procpcpu
, procstat
,
234 procetime
, procprog
, procargs
);
237 if ((usepid
&& mypid
== procpid
) ||
238 (!usepid
&& ((ret
= stat_exe(procpid
, &statbuf
) != -1) && statbuf
.st_dev
== mydev
&& statbuf
.st_ino
== myino
) ||
239 (ret
== -1 && errno
== ENOENT
))) {
241 printf("not considering - is myself or gone\n");
245 /* filter kernel threads (childs of KTHREAD_PARENT)*/
246 /* TODO adapt for other OSes than GNU/Linux
247 sorry for not doing that, but I've no other OSes to test :-( */
248 if (kthread_filter
== 1) {
249 /* get pid KTHREAD_PARENT */
250 if (kthread_ppid
== 0 && !strcmp(procprog
, KTHREAD_PARENT
) )
251 kthread_ppid
= procpid
;
253 if (kthread_ppid
== procppid
) {
255 printf ("Ignore kernel thread: pid=%d ppid=%d prog=%s args=%s\n", procpid
, procppid
, procprog
, procargs
);
260 if ((options
& STAT
) && (strstr (statopts
, procstat
)))
262 if ((options
& ARGS
) && procargs
&& (strstr (procargs
, args
) != NULL
))
264 if ((options
& EREG_ARGS
) && procargs
&& (regexec(&re_args
, procargs
, (size_t) 0, NULL
, 0) == 0))
265 resultsum
|= EREG_ARGS
;
266 if ((options
& PROG
) && procprog
&& (strcmp (prog
, procprog
) == 0))
268 if ((options
& PPID
) && (procppid
== ppid
))
270 if ((options
& USER
) && (procuid
== uid
))
272 if ((options
& VSZ
) && (procvsz
>= vsz
))
274 if ((options
& RSS
) && (procrss
>= rss
))
276 if ((options
& PCPU
) && (procpcpu
>= pcpu
))
281 /* Next line if filters not matched */
282 if (!(options
== resultsum
|| options
== ALL
))
287 printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
288 procuid
, procvsz
, procrss
,
289 procpid
, procppid
, procpcpu
, procstat
,
290 procetime
, procprog
, procargs
);
293 if (metric
== METRIC_VSZ
)
294 i
= get_status ((double)procvsz
, procs_thresholds
);
295 else if (metric
== METRIC_RSS
)
296 i
= get_status ((double)procrss
, procs_thresholds
);
297 /* TODO? float thresholds for --metric=CPU */
298 else if (metric
== METRIC_CPU
)
299 i
= get_status (procpcpu
, procs_thresholds
);
300 else if (metric
== METRIC_ELAPSED
)
301 i
= get_status ((double)procseconds
, procs_thresholds
);
303 if (metric
!= METRIC_PROCS
) {
304 if (i
== STATE_WARNING
) {
306 xasprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
307 result
= max_state (result
, i
);
309 if (i
== STATE_CRITICAL
) {
311 xasprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
312 result
= max_state (result
, i
);
316 /* This should not happen */
318 printf(_("Not parseable: %s"), input_buffer
);
322 if (found
== 0) { /* no process lines parsed so return STATE_UNKNOWN */
323 printf (_("Unable to read output\n"));
324 return STATE_UNKNOWN
;
327 if ( result
== STATE_UNKNOWN
)
330 /* Needed if procs found, but none match filter */
331 if ( metric
== METRIC_PROCS
) {
332 result
= max_state (result
, get_status ((double)procs
, procs_thresholds
) );
335 if ( result
== STATE_OK
) {
336 printf ("%s %s: ", metric_name
, _("OK"));
337 } else if (result
== STATE_WARNING
) {
338 printf ("%s %s: ", metric_name
, _("WARNING"));
339 if ( metric
!= METRIC_PROCS
) {
340 printf (_("%d warn out of "), warn
);
342 } else if (result
== STATE_CRITICAL
) {
343 printf ("%s %s: ", metric_name
, _("CRITICAL"));
344 if (metric
!= METRIC_PROCS
) {
345 printf (_("%d crit, %d warn out of "), crit
, warn
);
348 printf (ngettext ("%d process", "%d processes", (unsigned long) procs
), procs
);
350 if (strcmp(fmt
,"") != 0) {
351 printf (_(" with %s"), fmt
);
354 if ( verbose
>= 1 && strcmp(fails
,"") )
355 printf (" [%s]", fails
);
357 if (metric
== METRIC_PROCS
)
358 printf (" | procs=%d;%s;%s;0;", procs
,
359 warning_range
? warning_range
: "",
360 critical_range
? critical_range
: "");
362 printf (" | procs=%d;;;0; procs_warn=%d;;;0; procs_crit=%d;;;0;", procs
, warn
, crit
);
370 /* process command-line arguments */
372 process_arguments (int argc
, char **argv
)
379 int cflags
= REG_NOSUB
| REG_EXTENDED
;
380 char errbuf
[MAX_INPUT_BUFFER
];
383 static struct option longopts
[] = {
384 {"warning", required_argument
, 0, 'w'},
385 {"critical", required_argument
, 0, 'c'},
386 {"metric", required_argument
, 0, 'm'},
387 {"timeout", required_argument
, 0, 't'},
388 {"status", required_argument
, 0, 's'},
389 {"ppid", required_argument
, 0, 'p'},
390 {"user", required_argument
, 0, 'u'},
391 {"command", required_argument
, 0, 'C'},
392 {"vsz", required_argument
, 0, 'z'},
393 {"rss", required_argument
, 0, 'r'},
394 {"pcpu", required_argument
, 0, 'P'},
395 {"elapsed", required_argument
, 0, 'e'},
396 {"argument-array", required_argument
, 0, 'a'},
397 {"help", no_argument
, 0, 'h'},
398 {"version", no_argument
, 0, 'V'},
399 {"verbose", no_argument
, 0, 'v'},
400 {"ereg-argument-array", required_argument
, 0, CHAR_MAX
+1},
401 {"input-file", required_argument
, 0, CHAR_MAX
+2},
402 {"no-kthreads", required_argument
, 0, 'k'},
403 {"traditional-filter", no_argument
, 0, 'T'},
407 for (c
= 1; c
< argc
; c
++)
408 if (strcmp ("-to", argv
[c
]) == 0)
409 strcpy (argv
[c
], "-t");
412 c
= getopt_long (argc
, argv
, "Vvhkt:c:w:p:s:u:C:a:z:r:m:P:T",
415 if (c
== -1 || c
== EOF
)
424 case 'V': /* version */
425 print_revision (progname
, NP_VERSION
);
427 case 't': /* timeout period */
428 if (!is_integer (optarg
))
429 usage2 (_("Timeout interval must be a positive integer"), optarg
);
431 timeout_interval
= atoi (optarg
);
433 case 'c': /* critical threshold */
434 critical_range
= optarg
;
436 case 'w': /* warning threshold */
437 warning_range
= optarg
;
439 case 'p': /* process id */
440 if (sscanf (optarg
, "%d%[^0-9]", &ppid
, tmp
) == 1) {
441 xasprintf (&fmt
, "%s%sPPID = %d", (fmt
? fmt
: "") , (options
? ", " : ""), ppid
);
445 usage4 (_("Parent Process ID must be an integer!"));
446 case 's': /* status */
451 xasprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
454 case 'u': /* user or user id */
455 if (is_integer (optarg
)) {
457 pw
= getpwuid ((uid_t
) uid
);
458 /* check to be sure user exists */
460 usage2 (_("UID was not found"), optarg
);
463 pw
= getpwnam (optarg
);
464 /* check to be sure user exists */
466 usage2 (_("User name was not found"), optarg
);
471 xasprintf (&fmt
, "%s%sUID = %d (%s)", (fmt
? fmt
: ""), (options
? ", " : ""),
475 case 'C': /* command */
476 /* TODO: allow this to be passed in with --metric */
481 xasprintf (&fmt
, _("%s%scommand name '%s'"), (fmt
? fmt
: ""), (options
? ", " : ""),
485 case 'a': /* args (full path name with args) */
486 /* TODO: allow this to be passed in with --metric */
491 xasprintf (&fmt
, "%s%sargs '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), args
);
495 err
= regcomp(&re_args
, optarg
, cflags
);
497 regerror (err
, &re_args
, errbuf
, MAX_INPUT_BUFFER
);
498 die (STATE_UNKNOWN
, "PROCS %s: %s - %s\n", _("UNKNOWN"), _("Could not compile regular expression"), errbuf
);
500 /* Strip off any | within the regex optarg */
501 temp_string
= strdup(optarg
);
502 while(temp_string
[i
]!='\0'){
503 if(temp_string
[i
]=='|')
507 xasprintf (&fmt
, "%s%sregex args '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), temp_string
);
508 options
|= EREG_ARGS
;
511 if (sscanf (optarg
, "%d%[^0-9]", &rss
, tmp
) == 1) {
512 xasprintf (&fmt
, "%s%sRSS >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), rss
);
516 usage4 (_("RSS must be an integer!"));
518 if (sscanf (optarg
, "%d%[^0-9]", &vsz
, tmp
) == 1) {
519 xasprintf (&fmt
, "%s%sVSZ >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), vsz
);
523 usage4 (_("VSZ must be an integer!"));
525 /* TODO: -P 1.5.5 is accepted */
526 if (sscanf (optarg
, "%f%[^0-9.]", &pcpu
, tmp
) == 1) {
527 xasprintf (&fmt
, "%s%sPCPU >= %.2f", (fmt
? fmt
: ""), (options
? ", " : ""), pcpu
);
531 usage4 (_("PCPU must be a float!"));
533 xasprintf (&metric_name
, "%s", optarg
);
534 if ( strcmp(optarg
, "PROCS") == 0) {
535 metric
= METRIC_PROCS
;
538 else if ( strcmp(optarg
, "VSZ") == 0) {
542 else if ( strcmp(optarg
, "RSS") == 0 ) {
546 else if ( strcmp(optarg
, "CPU") == 0 ) {
550 else if ( strcmp(optarg
, "ELAPSED") == 0) {
551 metric
= METRIC_ELAPSED
;
555 usage4 (_("Metric must be one of PROCS, VSZ, RSS, CPU, ELAPSED!"));
556 case 'k': /* linux kernel thread filter */
559 case 'v': /* command */
566 input_filename
= optarg
;
572 if ((! warning_range
) && argv
[c
])
573 warning_range
= argv
[c
++];
574 if ((! critical_range
) && argv
[c
])
575 critical_range
= argv
[c
++];
576 if (statopts
== NULL
&& argv
[c
]) {
577 xasprintf (&statopts
, "%s", argv
[c
++]);
578 xasprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
582 /* this will abort in case of invalid ranges */
583 set_thresholds (&procs_thresholds
, warning_range
, critical_range
);
585 return validate_arguments ();
591 validate_arguments ()
597 statopts
= strdup("");
615 /* convert the elapsed time to seconds */
617 convert_to_seconds(char *etime
) {
636 for (ptr
= etime
; *ptr
!= '\0'; ptr
++) {
649 sscanf(etime
, "%d-%d:%d:%d",
650 &days
, &hours
, &minutes
, &seconds
);
651 /* linux 2.6.5/2.6.6 reporting some processes with infinite
652 * elapsed times for some reason */
658 sscanf(etime
, "%d:%d:%d",
659 &hours
, &minutes
, &seconds
);
660 } else if (coloncnt
== 1) {
661 sscanf(etime
, "%d:%d",
666 total
= (days
* 86400) +
671 if (verbose
>= 3 && metric
== METRIC_ELAPSED
) {
672 printf("seconds: %d\n", total
);
681 print_revision (progname
, NP_VERSION
);
683 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>\n");
684 printf (COPYRIGHT
, copyright
, email
);
686 printf ("%s\n", _("Checks all processes and generates WARNING or CRITICAL states if the specified"));
687 printf ("%s\n", _("metric is outside the required threshold ranges. The metric defaults to number"));
688 printf ("%s\n", _("of processes. Search filters can be applied to limit the processes to check."));
694 printf (UT_HELP_VRSN
);
695 printf (UT_EXTRA_OPTS
);
696 printf (" %s\n", "-w, --warning=RANGE");
697 printf (" %s\n", _("Generate warning state if metric is outside this range"));
698 printf (" %s\n", "-c, --critical=RANGE");
699 printf (" %s\n", _("Generate critical state if metric is outside this range"));
700 printf (" %s\n", "-m, --metric=TYPE");
701 printf (" %s\n", _("Check thresholds against metric. Valid types:"));
702 printf (" %s\n", _("PROCS - number of processes (default)"));
703 printf (" %s\n", _("VSZ - virtual memory size"));
704 printf (" %s\n", _("RSS - resident set memory size"));
705 printf (" %s\n", _("CPU - percentage CPU"));
706 /* only linux etime is support currently */
707 #if defined( __linux__ )
708 printf (" %s\n", _("ELAPSED - time elapsed in seconds"));
709 #endif /* defined(__linux__) */
710 printf (UT_TIMEOUT
, DEFAULT_SOCKET_TIMEOUT
);
712 printf (" %s\n", "-v, --verbose");
713 printf (" %s\n", _("Extra information. Up to 3 verbosity levels"));
715 printf (" %s\n", "-T, --traditional");
716 printf (" %s\n", _("Filter own process the traditional way by PID instead of /proc/pid/exe"));
719 printf ("%s\n", "Filters:");
720 printf (" %s\n", "-s, --state=STATUSFLAGS");
721 printf (" %s\n", _("Only scan for processes that have, in the output of `ps`, one or"));
722 printf (" %s\n", _("more of the status flags you specify (for example R, Z, S, RS,"));
723 printf (" %s\n", _("RSZDT, plus others based on the output of your 'ps' command)."));
724 printf (" %s\n", "-p, --ppid=PPID");
725 printf (" %s\n", _("Only scan for children of the parent process ID indicated."));
726 printf (" %s\n", "-z, --vsz=VSZ");
727 printf (" %s\n", _("Only scan for processes with VSZ higher than indicated."));
728 printf (" %s\n", "-r, --rss=RSS");
729 printf (" %s\n", _("Only scan for processes with RSS higher than indicated."));
730 printf (" %s\n", "-P, --pcpu=PCPU");
731 printf (" %s\n", _("Only scan for processes with PCPU higher than indicated."));
732 printf (" %s\n", "-u, --user=USER");
733 printf (" %s\n", _("Only scan for processes with user name or ID indicated."));
734 printf (" %s\n", "-a, --argument-array=STRING");
735 printf (" %s\n", _("Only scan for processes with args that contain STRING."));
736 printf (" %s\n", "--ereg-argument-array=STRING");
737 printf (" %s\n", _("Only scan for processes with args that contain the regex STRING."));
738 printf (" %s\n", "-C, --command=COMMAND");
739 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path)."));
740 printf (" %s\n", "-k, --no-kthreads");
741 printf (" %s\n", _("Only scan for non kernel threads (works on Linux only)."));
744 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
745 specified 'max:min', a warning status will be generated if the\n\
746 count is inside the specified range\n\n"));
749 This plugin checks the number of currently running processes and\n\
750 generates WARNING or CRITICAL states if the process count is outside\n\
751 the specified threshold ranges. The process count can be filtered by\n\
752 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
753 be the total number of running processes\n\n"));
755 printf ("%s\n", _("Examples:"));
756 printf (" %s\n", "check_procs -w 2:2 -c 2:1024 -C portsentry");
757 printf (" %s\n", _("Warning if not two processes with command name portsentry."));
758 printf (" %s\n\n", _("Critical if < 2 or > 1024 processes"));
759 printf (" %s\n", "check_procs -w 10 -a '/usr/local/bin/perl' -u root");
760 printf (" %s\n", _("Warning alert if > 10 processes with command arguments containing"));
761 printf (" %s\n\n", _("'/usr/local/bin/perl' and owned by root"));
762 printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ");
763 printf (" %s\n\n", _("Alert if VSZ of any processes over 50K or 100K"));
764 printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU");
765 printf (" %s\n", _("Alert if CPU of any processes over 10%% or 20%%"));
773 printf ("%s\n", _("Usage:"));
774 printf ("%s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n", progname
);
775 printf (" [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n");
776 printf (" [-C command] [-k] [-t timeout] [-v]\n");