1 /*****************************************************************************
3 * Nagios check_procs plugin
6 * Copyright (c) 2000-2008 Nagios Plugins Development Team
10 * This file contains the check_procs plugin
12 * Checks all processes and generates WARNING or CRITICAL states if the
13 * specified metric is outside the required threshold ranges. The metric
14 * defaults to number of processes. Search filters can be applied to limit
15 * the processes to check.
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
32 *****************************************************************************/
34 const char *progname
= "check_procs";
35 const char *program_name
= "check_procs"; /* Required for coreutils libs */
36 const char *copyright
= "2000-2008";
37 const char *email
= "nagiosplug-devel@lists.sourceforge.net";
41 #include "utils_cmd.h"
46 int process_arguments (int, char **);
47 int validate_arguments (void);
48 int convert_to_seconds (char *);
49 void print_help (void);
50 void print_usage (void);
52 char *warning_range
= NULL
;
53 char *critical_range
= NULL
;
54 thresholds
*procs_thresholds
= NULL
;
56 int options
= 0; /* bitmask of filter criteria to test against */
67 #define EREG_ARGS 1024
68 /* Different metrics */
77 enum metric metric
= METRIC_PROCS
;
88 char *input_filename
= NULL
;
92 char tmp
[MAX_INPUT_BUFFER
];
94 FILE *ps_input
= NULL
;
98 main (int argc
, char **argv
)
113 char procetime
[MAX_INPUT_BUFFER
] = { '\0' };
116 const char *zombie
= "Z";
118 int resultsum
= 0; /* bitmask of the filter criteria met by a process */
119 int found
= 0; /* counter for number of lines returned in `ps` output */
120 int procs
= 0; /* counter for number of processes meeting filter criteria */
121 int pos
; /* number of spaces before 'args' in `ps` output */
122 int cols
; /* number of columns in ps output */
123 int expected_cols
= PS_COLS
- 1;
124 int warn
= 0; /* number of processes in warn state */
125 int crit
= 0; /* number of processes in crit state */
127 int result
= STATE_UNKNOWN
;
128 output chld_out
, chld_err
;
130 setlocale (LC_ALL
, "");
131 bindtextdomain (PACKAGE
, LOCALEDIR
);
132 textdomain (PACKAGE
);
133 setlocale(LC_NUMERIC
, "POSIX");
135 input_buffer
= malloc (MAX_INPUT_BUFFER
);
136 procprog
= malloc (MAX_INPUT_BUFFER
);
138 xasprintf (&metric_name
, "PROCS");
139 metric
= METRIC_PROCS
;
141 /* Parse extra opts if any */
142 argv
=np_extra_opts (&argc
, argv
, progname
);
144 if (process_arguments (argc
, argv
) == ERROR
)
145 usage4 (_("Could not parse arguments"));
150 /* Set signal handling and alarm timeout */
151 if (signal (SIGALRM
, timeout_alarm_handler
) == SIG_ERR
) {
152 die (STATE_UNKNOWN
, _("Cannot catch SIGALRM"));
154 (void) alarm ((unsigned) timeout_interval
);
157 printf (_("CMD: %s\n"), PS_COMMAND
);
159 if (input_filename
== NULL
) {
160 result
= cmd_run( PS_COMMAND
, &chld_out
, &chld_err
, 0);
161 if (chld_err
.lines
> 0) {
162 printf ("%s: %s", _("System call sent warnings to stderr"), chld_err
.line
[0]);
166 result
= cmd_file_read( input_filename
, &chld_out
, 0);
169 /* flush first line: j starts at 1 */
170 for (j
= 1; j
< chld_out
.lines
; j
++) {
171 input_line
= chld_out
.line
[j
];
174 printf ("%s", input_line
);
176 strcpy (procprog
, "");
177 xasprintf (&procargs
, "%s", "");
179 cols
= sscanf (input_line
, PS_FORMAT
, PS_VARLIST
);
181 /* Zombie processes do not give a procprog command */
182 if ( cols
< expected_cols
&& strstr(procstat
, zombie
) ) {
183 cols
= expected_cols
;
185 if ( cols
>= expected_cols
) {
187 xasprintf (&procargs
, "%s", input_line
+ pos
);
190 /* Some ps return full pathname for command. This removes path */
191 strcpy(procprog
, base_name(procprog
));
193 /* we need to convert the elapsed time to seconds */
194 procseconds
= convert_to_seconds(procetime
);
197 printf ("proc#=%d uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
198 procs
, procuid
, procvsz
, procrss
,
199 procpid
, procppid
, procpcpu
, procstat
,
200 procetime
, procprog
, procargs
);
203 if (mypid
== procpid
) continue;
205 if ((options
& STAT
) && (strstr (statopts
, procstat
)))
207 if ((options
& ARGS
) && procargs
&& (strstr (procargs
, args
) != NULL
))
209 if ((options
& EREG_ARGS
) && procargs
&& (regexec(&re_args
, procargs
, (size_t) 0, NULL
, 0) == 0))
210 resultsum
|= EREG_ARGS
;
211 if ((options
& PROG
) && procprog
&& (strcmp (prog
, procprog
) == 0))
213 if ((options
& PPID
) && (procppid
== ppid
))
215 if ((options
& USER
) && (procuid
== uid
))
217 if ((options
& VSZ
) && (procvsz
>= vsz
))
219 if ((options
& RSS
) && (procrss
>= rss
))
221 if ((options
& PCPU
) && (procpcpu
>= pcpu
))
226 /* Next line if filters not matched */
227 if (!(options
== resultsum
|| options
== ALL
))
232 printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
233 procuid
, procvsz
, procrss
,
234 procpid
, procppid
, procpcpu
, procstat
,
235 procetime
, procprog
, procargs
);
238 if (metric
== METRIC_VSZ
)
239 i
= get_status ((double)procvsz
, procs_thresholds
);
240 else if (metric
== METRIC_RSS
)
241 i
= get_status ((double)procrss
, procs_thresholds
);
242 /* TODO? float thresholds for --metric=CPU */
243 else if (metric
== METRIC_CPU
)
244 i
= get_status (procpcpu
, procs_thresholds
);
245 else if (metric
== METRIC_ELAPSED
)
246 i
= get_status ((double)procseconds
, procs_thresholds
);
248 if (metric
!= METRIC_PROCS
) {
249 if (i
== STATE_WARNING
) {
251 xasprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
252 result
= max_state (result
, i
);
254 if (i
== STATE_CRITICAL
) {
256 xasprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
257 result
= max_state (result
, i
);
261 /* This should not happen */
263 printf(_("Not parseable: %s"), input_buffer
);
267 if (found
== 0) { /* no process lines parsed so return STATE_UNKNOWN */
268 printf (_("Unable to read output\n"));
269 return STATE_UNKNOWN
;
272 if ( result
== STATE_UNKNOWN
)
275 /* Needed if procs found, but none match filter */
276 if ( metric
== METRIC_PROCS
) {
277 result
= max_state (result
, get_status ((double)procs
, procs_thresholds
) );
280 if ( result
== STATE_OK
) {
281 printf ("%s %s: ", metric_name
, _("OK"));
282 } else if (result
== STATE_WARNING
) {
283 printf ("%s %s: ", metric_name
, _("WARNING"));
284 if ( metric
!= METRIC_PROCS
) {
285 printf (_("%d warn out of "), warn
);
287 } else if (result
== STATE_CRITICAL
) {
288 printf ("%s %s: ", metric_name
, _("CRITICAL"));
289 if (metric
!= METRIC_PROCS
) {
290 printf (_("%d crit, %d warn out of "), crit
, warn
);
293 printf (ngettext ("%d process", "%d processes", (unsigned long) procs
), procs
);
295 if (strcmp(fmt
,"") != 0) {
296 printf (_(" with %s"), fmt
);
299 if ( verbose
>= 1 && strcmp(fails
,"") )
300 printf (" [%s]", fails
);
302 if (metric
== METRIC_PROCS
)
303 printf (" | procs=%d;%s;%s;0;", procs
,
304 warning_range
? warning_range
: "",
305 critical_range
? critical_range
: "");
307 printf (" | procs=%d;;;0; procs_warn=%d;;;0; procs_crit=%d;;;0;", procs
, warn
, crit
);
315 /* process command-line arguments */
317 process_arguments (int argc
, char **argv
)
324 int cflags
= REG_NOSUB
| REG_EXTENDED
;
325 char errbuf
[MAX_INPUT_BUFFER
];
328 static struct option longopts
[] = {
329 {"warning", required_argument
, 0, 'w'},
330 {"critical", required_argument
, 0, 'c'},
331 {"metric", required_argument
, 0, 'm'},
332 {"timeout", required_argument
, 0, 't'},
333 {"status", required_argument
, 0, 's'},
334 {"ppid", required_argument
, 0, 'p'},
335 {"command", required_argument
, 0, 'C'},
336 {"vsz", required_argument
, 0, 'z'},
337 {"rss", required_argument
, 0, 'r'},
338 {"pcpu", required_argument
, 0, 'P'},
339 {"elapsed", required_argument
, 0, 'e'},
340 {"argument-array", required_argument
, 0, 'a'},
341 {"help", no_argument
, 0, 'h'},
342 {"version", no_argument
, 0, 'V'},
343 {"verbose", no_argument
, 0, 'v'},
344 {"ereg-argument-array", required_argument
, 0, CHAR_MAX
+1},
345 {"input-file", required_argument
, 0, CHAR_MAX
+2},
349 for (c
= 1; c
< argc
; c
++)
350 if (strcmp ("-to", argv
[c
]) == 0)
351 strcpy (argv
[c
], "-t");
354 c
= getopt_long (argc
, argv
, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
357 if (c
== -1 || c
== EOF
)
366 case 'V': /* version */
367 print_revision (progname
, NP_VERSION
);
369 case 't': /* timeout period */
370 if (!is_integer (optarg
))
371 usage2 (_("Timeout interval must be a positive integer"), optarg
);
373 timeout_interval
= atoi (optarg
);
375 case 'c': /* critical threshold */
376 critical_range
= optarg
;
378 case 'w': /* warning threshold */
379 warning_range
= optarg
;
381 case 'p': /* process id */
382 if (sscanf (optarg
, "%d%[^0-9]", &ppid
, tmp
) == 1) {
383 xasprintf (&fmt
, "%s%sPPID = %d", (fmt
? fmt
: "") , (options
? ", " : ""), ppid
);
387 usage4 (_("Parent Process ID must be an integer!"));
388 case 's': /* status */
393 xasprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
396 case 'u': /* user or user id */
397 if (is_integer (optarg
)) {
399 pw
= getpwuid ((uid_t
) uid
);
400 /* check to be sure user exists */
402 usage2 (_("UID was not found"), optarg
);
405 pw
= getpwnam (optarg
);
406 /* check to be sure user exists */
408 usage2 (_("User name was not found"), optarg
);
413 xasprintf (&fmt
, "%s%sUID = %d (%s)", (fmt
? fmt
: ""), (options
? ", " : ""),
417 case 'C': /* command */
418 /* TODO: allow this to be passed in with --metric */
423 xasprintf (&fmt
, _("%s%scommand name '%s'"), (fmt
? fmt
: ""), (options
? ", " : ""),
427 case 'a': /* args (full path name with args) */
428 /* TODO: allow this to be passed in with --metric */
433 xasprintf (&fmt
, "%s%sargs '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), args
);
437 err
= regcomp(&re_args
, optarg
, cflags
);
439 regerror (err
, &re_args
, errbuf
, MAX_INPUT_BUFFER
);
440 die (STATE_UNKNOWN
, "PROCS %s: %s - %s\n", _("UNKNOWN"), _("Could not compile regular expression"), errbuf
);
442 /* Strip off any | within the regex optarg */
443 temp_string
= strdup(optarg
);
444 while(temp_string
[i
]!='\0'){
445 if(temp_string
[i
]=='|')
449 xasprintf (&fmt
, "%s%sregex args '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), temp_string
);
450 options
|= EREG_ARGS
;
453 if (sscanf (optarg
, "%d%[^0-9]", &rss
, tmp
) == 1) {
454 xasprintf (&fmt
, "%s%sRSS >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), rss
);
458 usage4 (_("RSS must be an integer!"));
460 if (sscanf (optarg
, "%d%[^0-9]", &vsz
, tmp
) == 1) {
461 xasprintf (&fmt
, "%s%sVSZ >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), vsz
);
465 usage4 (_("VSZ must be an integer!"));
467 /* TODO: -P 1.5.5 is accepted */
468 if (sscanf (optarg
, "%f%[^0-9.]", &pcpu
, tmp
) == 1) {
469 xasprintf (&fmt
, "%s%sPCPU >= %.2f", (fmt
? fmt
: ""), (options
? ", " : ""), pcpu
);
473 usage4 (_("PCPU must be a float!"));
475 xasprintf (&metric_name
, "%s", optarg
);
476 if ( strcmp(optarg
, "PROCS") == 0) {
477 metric
= METRIC_PROCS
;
480 else if ( strcmp(optarg
, "VSZ") == 0) {
484 else if ( strcmp(optarg
, "RSS") == 0 ) {
488 else if ( strcmp(optarg
, "CPU") == 0 ) {
492 else if ( strcmp(optarg
, "ELAPSED") == 0) {
493 metric
= METRIC_ELAPSED
;
497 usage4 (_("Metric must be one of PROCS, VSZ, RSS, CPU, ELAPSED!"));
498 case 'v': /* command */
502 input_filename
= optarg
;
508 if ((! warning_range
) && argv
[c
])
509 warning_range
= argv
[c
++];
510 if ((! critical_range
) && argv
[c
])
511 critical_range
= argv
[c
++];
512 if (statopts
== NULL
&& argv
[c
]) {
513 xasprintf (&statopts
, "%s", argv
[c
++]);
514 xasprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
518 /* this will abort in case of invalid ranges */
519 set_thresholds (&procs_thresholds
, warning_range
, critical_range
);
521 return validate_arguments ();
527 validate_arguments ()
533 statopts
= strdup("");
551 /* convert the elapsed time to seconds */
553 convert_to_seconds(char *etime
) {
572 for (ptr
= etime
; *ptr
!= '\0'; ptr
++) {
585 sscanf(etime
, "%d-%d:%d:%d",
586 &days
, &hours
, &minutes
, &seconds
);
587 /* linux 2.6.5/2.6.6 reporting some processes with infinite
588 * elapsed times for some reason */
594 sscanf(etime
, "%d:%d:%d",
595 &hours
, &minutes
, &seconds
);
596 } else if (coloncnt
== 1) {
597 sscanf(etime
, "%d:%d",
602 total
= (days
* 86400) +
607 if (verbose
>= 3 && metric
== METRIC_ELAPSED
) {
608 printf("seconds: %d\n", total
);
617 print_revision (progname
, NP_VERSION
);
619 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>\n");
620 printf (COPYRIGHT
, copyright
, email
);
622 printf ("%s\n", _("Checks all processes and generates WARNING or CRITICAL states if the specified"));
623 printf ("%s\n", _("metric is outside the required threshold ranges. The metric defaults to number"));
624 printf ("%s\n", _("of processes. Search filters can be applied to limit the processes to check."));
630 printf (UT_HELP_VRSN
);
631 printf (UT_EXTRA_OPTS
);
632 printf (" %s\n", "-w, --warning=RANGE");
633 printf (" %s\n", _("Generate warning state if metric is outside this range"));
634 printf (" %s\n", "-c, --critical=RANGE");
635 printf (" %s\n", _("Generate critical state if metric is outside this range"));
636 printf (" %s\n", "-m, --metric=TYPE");
637 printf (" %s\n", _("Check thresholds against metric. Valid types:"));
638 printf (" %s\n", _("PROCS - number of processes (default)"));
639 printf (" %s\n", _("VSZ - virtual memory size"));
640 printf (" %s\n", _("RSS - resident set memory size"));
641 printf (" %s\n", _("CPU - percentage CPU"));
642 /* only linux etime is support currently */
643 #if defined( __linux__ )
644 printf (" %s\n", _("ELAPSED - time elapsed in seconds"));
645 #endif /* defined(__linux__) */
646 printf (UT_TIMEOUT
, DEFAULT_SOCKET_TIMEOUT
);
648 printf (" %s\n", "-v, --verbose");
649 printf (" %s\n", _("Extra information. Up to 3 verbosity levels"));
652 printf ("%s\n", "Filters:");
653 printf (" %s\n", "-s, --state=STATUSFLAGS");
654 printf (" %s\n", _("Only scan for processes that have, in the output of `ps`, one or"));
655 printf (" %s\n", _("more of the status flags you specify (for example R, Z, S, RS,"));
656 printf (" %s\n", _("RSZDT, plus others based on the output of your 'ps' command)."));
657 printf (" %s\n", "-p, --ppid=PPID");
658 printf (" %s\n", _("Only scan for children of the parent process ID indicated."));
659 printf (" %s\n", "-z, --vsz=VSZ");
660 printf (" %s\n", _("Only scan for processes with VSZ higher than indicated."));
661 printf (" %s\n", "-r, --rss=RSS");
662 printf (" %s\n", _("Only scan for processes with RSS higher than indicated."));
663 printf (" %s\n", "-P, --pcpu=PCPU");
664 printf (" %s\n", _("Only scan for processes with PCPU higher than indicated."));
665 printf (" %s\n", "-u, --user=USER");
666 printf (" %s\n", _("Only scan for processes with user name or ID indicated."));
667 printf (" %s\n", "-a, --argument-array=STRING");
668 printf (" %s\n", _("Only scan for processes with args that contain STRING."));
669 printf (" %s\n", "--ereg-argument-array=STRING");
670 printf (" %s\n", _("Only scan for processes with args that contain the regex STRING."));
671 printf (" %s\n", "-C, --command=COMMAND");
672 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path)."));
675 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
676 specified 'max:min', a warning status will be generated if the\n\
677 count is inside the specified range\n\n"));
680 This plugin checks the number of currently running processes and\n\
681 generates WARNING or CRITICAL states if the process count is outside\n\
682 the specified threshold ranges. The process count can be filtered by\n\
683 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
684 be the total number of running processes\n\n"));
686 printf ("%s\n", _("Examples:"));
687 printf (" %s\n", "check_procs -w 2:2 -c 2:1024 -C portsentry");
688 printf (" %s\n", _("Warning if not two processes with command name portsentry."));
689 printf (" %s\n\n", _("Critical if < 2 or > 1024 processes"));
690 printf (" %s\n", "check_procs -w 10 -a '/usr/local/bin/perl' -u root");
691 printf (" %s\n", _("Warning alert if > 10 processes with command arguments containing"));
692 printf (" %s\n\n", _("'/usr/local/bin/perl' and owned by root"));
693 printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ");
694 printf (" %s\n\n", _("Alert if VSZ of any processes over 50K or 100K"));
695 printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU");
696 printf (" %s\n", _("Alert if CPU of any processes over 10%% or 20%%"));
704 printf ("%s\n", _("Usage:"));
705 printf ("%s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n", progname
);
706 printf (" [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n");
707 printf (" [-C command] [-t timeout] [-v]\n");