4 * Actions taken when something happens to the UPS.
8 * Copyright (C) 2000-2004 Kern Sibbald
9 * Copyright (C) 1996-1999 Andre M. Hedrick <andre@suse.com>
10 * Copyright (C) 1999-2000 Riccardo Facchetti <riccardo@master.oasi.gpa.it>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General
14 * Public License as published by the Free Software Foundation.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the Free
23 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
29 extern int kill_on_powerfail
;
30 static void do_shutdown(UPSINFO
*ups
, int cmdtype
);
33 * These are the commands understood by the apccontrol shell script.
34 * You _must_ keep the the commands[] array in sync with the defines in
35 * include/apc_defines.h
37 UPSCOMMANDS ups_event
[] = {
38 {"powerout", 0}, /* CMDPOWEROUT */
39 {"onbattery", 0}, /* CMDONBATTERY */
40 {"failing", 0}, /* CMDFAILING */
41 {"timeout", 0}, /* CMDTIMEOUT */
42 {"loadlimit", 0}, /* CMDLOADLIMIT */
43 {"runlimit", 0}, /* CMDRUNLIMIT */
44 {"doshutdown", 0}, /* CMDDOSHUTDOWN */
45 {"mainsback", 0}, /* CMDMAINSBACK */
46 {"annoyme", 0}, /* CMDANNOYME */
47 {"emergency", 0}, /* CMDEMERGENCY */
48 {"changeme", 0}, /* CMDCHANGEME */
49 {"remotedown", 0}, /* CMDREMOTEDOWN */
50 {"commfailure", 0}, /* CMDCOMMFAILURE */
51 {"commok", 0}, /* CMDCOMMOK */
52 {"startselftest", 0}, /* CMDSTARTSELFTEST */
53 {"endselftest", 0}, /* CMDENDSELFTEST */
54 {"offbattery", 0}, /* CMDOFFBATTERY */
55 {"battdetach", 0}, /* CMDBATTDETACH */
56 {"battattach", 0} /* CMDBATTATTACH */
60 * These messages must be kept in sync with the above array
61 * and the defines in include/apc_defines.h
63 UPSCMDMSG event_msg
[] = {
64 {LOG_CRIT
, "Power failure."},
65 {LOG_CRIT
, "Running on UPS batteries."},
66 {LOG_ALERT
, "Battery power exhausted."},
67 {LOG_ALERT
, "Reached run time limit on batteries."},
68 {LOG_ALERT
, "Battery charge below low limit."},
69 {LOG_ALERT
, "Reached remaining time percentage limit on batteries."},
70 {LOG_ALERT
, "Initiating system shutdown!"},
71 {LOG_ALERT
, "Power is back. UPS running on mains."},
72 {LOG_ALERT
, "Users requested to logoff."},
73 {LOG_ALERT
, "Battery failure. Emergency."},
74 {LOG_CRIT
, "UPS battery must be replaced."},
75 {LOG_CRIT
, "Remote shutdown requested."},
76 {LOG_WARNING
, "Communications with UPS lost."},
77 {LOG_WARNING
, "Communications with UPS restored."},
78 {LOG_WARNING
, "UPS Self Test switch to battery."},
79 {LOG_WARNING
, "UPS Self Test completed."},
80 {LOG_CRIT
, "Mains returned. No longer on UPS batteries."},
81 {LOG_CRIT
, "Battery disconnected."},
82 {LOG_CRIT
, "Battery reattached."}
85 void generate_event(UPSINFO
*ups
, int event
)
87 /* Log message and execute script for this event */
88 log_event(ups
, event_msg
[event
].level
, event_msg
[event
].msg
);
89 Dmsg2(80, "calling execute_ups_event %s event=%d\n", ups_event
[event
], event
);
90 execute_command(ups
, ups_event
[event
]);
93 * Additional possible actions. For certain, we now do a
98 * For the following, in addition to the basic,
99 * message logged and executed above, we do a
108 log_event(ups
, event_msg
[CMDDOSHUTDOWN
].level
,
109 event_msg
[CMDDOSHUTDOWN
].msg
);
110 do_shutdown(ups
, CMDDOSHUTDOWN
);
113 /* For the following, everything is already done. */
114 case CMDSTARTSELFTEST
:
121 case CMDDOSHUTDOWN
: /* Already shutdown, don't recall */
134 * Closes procfile and logfile to preserve information.
136 * ok = 1 => power is back
137 * ok = 2 => power failure
138 * ok = 3 => remote shutdown
140 static void powerfail(int ok
)
143 * If apcupsd terminates here, it will never get a chance to
144 * report the event of returning mains-power. I think apcupsd
145 * has no need to force terminate() by itself. It will receive
146 * a SIGTERM from init, when system goes down. This signal is
147 * trapped and will trigger apcupsd's terminate() function.
152 if (terminate_on_powerfail
) {
154 * This sends a SIGTERM signal to itself.
155 * The SIGTERM is bound to apcupsd_ or apctest_terminate(),
156 * depending on which program is running this code, so it will
157 * do in anyway the right thing.
164 * For network slaves, apcupsd needs to terminate here for now.
165 * This is sloppy, but it works. If you are networked, then the
166 * master must fall also. This is required so that the UPS
167 * can reboot the slaves.
174 * If called with zero, prevent users from logging in.
175 * If called with one, allow users to login.
177 static void logonfail(UPSINFO
*ups
, int ok
)
181 unlink(ups
->nologinpath
);
184 ((lgnfd
= open(ups
->nologinpath
, O_CREAT
| O_WRONLY
, 0644)) >= 0)) {
185 write(lgnfd
, POWERFAIL
, strlen(POWERFAIL
));
190 static void prohibit_logins(UPSINFO
*ups
)
192 if (ups
->nologin_file
)
193 return; /* already done */
196 ups
->nologin_file
= true;
198 log_event(ups
, LOG_ALERT
, "User logins prohibited");
201 static void do_shutdown(UPSINFO
*ups
, int cmdtype
)
203 if (ups
->is_shutdown())
204 return; /* already done */
206 ups
->ShutDown
= time(NULL
);
208 delete_lockfile(ups
);
210 make_file(ups
, ups
->pwrfailpath
);
211 prohibit_logins(ups
);
213 if (!ups
->is_slave()) {
215 * Note, try avoid using this option if at all possible
216 * as it will shutoff the UPS power, and you cannot
217 * be guaranteed that the shutdown command will have
218 * succeeded. This PROBABLY should be executed AFTER
219 * the shutdown command is given (the execute_command below).
221 if (kill_on_powerfail
)
222 initiate_hibernate(ups
);
225 /* Now execute the shutdown command */
226 execute_command(ups
, ups_event
[cmdtype
]);
229 * On some systems we may stop on the previous
230 * line if a SIGTERM signal is sent to us.
233 if (cmdtype
== CMDREMOTEDOWN
)
239 /* These are the different "states" that the UPS can be in. */
249 * Figure out what "state" the UPS is in and
250 * return it for use in do_action()
252 static enum a_state
get_state(UPSINFO
*ups
, time_t now
)
256 /* If we're on battery for calibration, treat as not on battery */
257 if (ups
->is_onbatt() && !ups
->is_calibration()) {
258 if (ups
->chg_onbatt()) {
259 state
= st_PowerFailure
; /* Power failure just detected */
261 if (ups
->SelfTest
) /* see if UPS is doing self test */
262 state
= st_SelfTest
; /* yes */
264 state
= st_OnBattery
; /* No, this must be real power failure */
267 if (ups
->chg_onbatt()) /* if we were on batteries */
268 state
= st_MainsBack
; /* then we just got power back */
270 state
= st_OnMains
; /* Solid on mains, normal condition */
275 static const char *testresult_to_string(SelfTestResult res
)
279 return "Not supported";
281 return "No test results available";
283 return "Test failed";
286 case TEST_INPROGRESS
:
287 return "In progress";
291 return "Test failed -- insufficient battery capacity";
293 return "Test failed -- battery overloaded";
301 * Carl Lindberg <lindberg@clindberg.org> patch applied 24Dec04
303 * The APC network management cards have options to shut down, reboot, or
304 * "sleep" (really just a delayed reboot) the UPS. For all of these, it
305 * has a "graceful" option, meaning it gives the PowerChute software a
306 * chance to cleanly shutdown the machine before the UPS is shut down. To
307 * do this, the card sets the ONBATT and LOWBATT statuses at the same
308 * time, waits several minutes, then cuts power. PowerChute (presumably)
309 * notices this and shuts the machine down, but unfortunately apcupsd did
312 * The problem happens because in this situation, apcupsd sets the
313 * UPS_prev_battlow status before testing for it. In the do_action()
314 * function, apcupsd notices the ONBATT status, and uses the
315 * "st_PowerFailure" state to send off an initial power failure event.
316 * After a short delay, do_action() is invoked again. If ONBATT is
317 * still set, the "st_OnBattery" state is used, and the onbattery event
318 * (among other things) is sent.
320 * The test for LOWBATT to see if shutdown is needed is only done in the
321 * st_OnBattery state, and it's done if LOWBATT is set but
322 * UPS_prev_battlow is not set yet. In normal operation, LOWBATT will
323 * only come on after a period of ONBATT, and this situation works fine.
324 * However, since ONBATT and LOWBATT were set simultaneously, the
325 * UPS_prev_battlow was set the first time through, when the
326 * st_PowerFailure was used, meaning the test for LOWBATT was not
327 * performed. The second time through in st_OnBattery, UPS_prev_battlow
328 * is already set, meaning apcupsd is assuming that the needed shutdown
329 * has already been invoked.
331 * The code fix just moves setting of the UPS_prev_battlow status to
332 * inside the block that tests for it, ensuring that LOWBATT will never be
333 * ignored. Clearing the UPS_prev_battlow status remains where it is in
334 * the code, and it will always be turned off if LOWBATT is no longer set.
336 * After the fix, UPS_prev_battlow is not prematurely set, and apcupsd
337 * catches the signal from the management card to shut down. I've had the
338 * code in for over a month, and it's worked fine, both from using the
339 * management card and regular pull-the-plug tests as well. This was
340 * only tested with a serial UPS, but I assume it would be a problem with
341 * USB and SNMP connections as well.
344 /*********************************************************************/
345 void do_action(UPSINFO
*ups
)
348 static int requested_logoff
= 0; /* asked user to logoff */
349 static int first
= 1;
354 time(&now
); /* get current time */
357 ups
->last_time_nologon
= ups
->last_time_annoy
= now
;
358 ups
->last_time_on_line
= now
;
361 * This is cheating slightly. We want to initialize the previous
362 * status to zero so all set bits in current status will appear
363 * as changes, thus allowing us to handle starting up when power
364 * has already failed, for instance. However, we don't want to
365 * get a BATTATTACHED event every time the daemon starts, so we
366 * set the UPS_battpresent bit in the previous status.
368 ups
->PrevStatus
= UPS_battpresent
;
371 if (ups
->is_replacebatt()) { /* Replace battery */
373 * Complain every 9 hours, this causes the complaint to
374 * cycle around the clock and hopefully be more noticable
375 * without being too annoying. Also, ignore all change battery
376 * indications for the first 10 minutes of running time to
377 * prevent false alerts. Finally, issue the event 5 times, then
378 * clear the flag to silence false alarms. If the battery is
379 * really dead, the flag will be reset in apcsmart.c
381 * UPS_replacebatt is a flag. To count use a static local counter.
382 * The counter is initialized only one time at startup.
384 if (now
- ups
->start_time
< 60 * 10 || ups
->ChangeBattCounter
> 5) {
385 ups
->clear_replacebatt();
386 ups
->ChangeBattCounter
= 0;
387 } else if (now
- ups
->last_time_changeme
> 60 * 60 * 9) {
388 generate_event(ups
, CMDCHANGEME
);
389 ups
->last_time_changeme
= now
;
390 ups
->ChangeBattCounter
++;
394 /* Remote is shutting down, so must we. */
395 if (ups
->is_shut_remote()) {
396 if (ups
->chg_shut_remote()) {
397 generate_event(ups
, CMDREMOTEDOWN
);
399 ups
->PrevStatus
= ups
->Status
;
404 /* Generate event if battery is disconnected or reattached */
405 if (ups
->chg_battpresent()) {
406 if (ups
->is_battpresent())
407 generate_event(ups
, CMDBATTATTACH
);
409 generate_event(ups
, CMDBATTDETACH
);
413 * Did BattLow bit go high? If so, start the battlow shutdown
414 * timer. We will only act on this timer if we switch to battery
415 * (or are already on battery). It is possible that this event occurs
416 * at the same time as or even slightly before we switch to battery.
417 * Therefore we must check it every time we get new status.
419 if (ups
->chg_battlow()) {
420 if (ups
->is_battlow()) {
421 Dmsg0(100, "BATTLOW asserted\n");
422 ups
->start_shut_lbatt
= now
;
424 Dmsg0(100, "BATTLOW glitch\n");
428 state
= get_state(ups
, now
);
431 /* If power is good, update the timers. */
432 ups
->last_time_nologon
= ups
->last_time_annoy
= now
;
433 ups
->last_time_on_line
= now
;
434 ups
->clear_fastpoll();
437 case st_PowerFailure
:
438 /* This is our first indication of a power problem */
439 ups
->set_fastpoll(); /* speed up polling */
441 /* Check if selftest */
442 Dmsg1(80, "Power failure detected. 0x%x\n", ups
->Status
);
443 device_entry_point(ups
, DEVICE_CMD_CHECK_SELFTEST
, NULL
);
446 generate_event(ups
, CMDSTARTSELFTEST
);
448 generate_event(ups
, CMDPOWEROUT
);
450 ups
->last_time_nologon
= ups
->last_time_annoy
= now
;
451 ups
->last_time_on_line
= now
;
452 ups
->last_onbatt_time
= now
;
455 /* Enable DTR on dumb UPSes with CUSTOM_SIMPLE cable. */
456 device_entry_point(ups
, DEVICE_CMD_DTR_ENABLE
, NULL
);
460 /* allow 40 seconds max for selftest */
461 if (now
- ups
->SelfTest
< 40 && !ups
->is_battlow())
464 /* Cancel self test, announce power failure */
466 Dmsg1(80, "UPS Self Test cancelled, fall-thru to On Battery. 0x%x\n",
469 /* ...FALL-THRU to st_OnBattery... */
472 /* Did the second test verify the power is failing? */
473 if (!ups
->is_onbatt_msg() &&
474 time(NULL
) - ups
->last_time_on_line
>= ups
->onbattdelay
) {
475 ups
->set_onbatt_msg(); /* it is confirmed, we are on batteries */
476 generate_event(ups
, CMDONBATTERY
);
477 ups
->last_time_nologon
= ups
->last_time_annoy
= now
;
478 ups
->last_time_on_line
= now
;
482 /* shutdown requested but still running */
483 if (ups
->is_shutdown()) {
484 if (ups
->killdelay
&& now
- ups
->ShutDown
>= ups
->killdelay
) {
485 if (!ups
->is_slave())
486 initiate_hibernate(ups
);
487 ups
->ShutDown
= now
; /* wait a bit before doing again */
490 } else { /* not shutdown yet */
492 * Did MaxTimeOnBattery Expire? (TIMEOUT in apcupsd.conf)
493 * Normal Power down during Power Failure: Shutdown immediately.
495 if ((ups
->maxtime
> 0) && ((now
- ups
->last_time_on_line
) > ups
->maxtime
)) {
496 ups
->set_shut_btime();
497 generate_event(ups
, CMDTIMEOUT
);
502 * Did Battery Charge or Runtime go below percent cutoff?
503 * Normal Power down during Power Failure: Start shutdown timer.
505 if (ups
->UPS_Cap
[CI_BATTLEV
] && ups
->BattChg
<= ups
->percent
) {
506 if (!ups
->is_shut_load()) {
507 Dmsg0(100, "CI_BATTLEV shutdown\n");
508 ups
->set_shut_load();
509 ups
->start_shut_load
= now
;
512 if (ups
->UPS_Cap
[CI_BATTLEV
] && ups
->is_shut_load())
513 Dmsg0(100, "CI_BATTLEV glitch\n");
514 ups
->clear_shut_load();
517 if (ups
->UPS_Cap
[CI_RUNTIM
] && ups
->TimeLeft
<= ups
->runtime
) {
518 if (!ups
->is_shut_ltime()) {
519 Dmsg0(100, "CI_RUNTIM shutdown\n");
520 ups
->set_shut_ltime();
521 ups
->start_shut_ltime
= now
;
524 if (ups
->UPS_Cap
[CI_RUNTIM
] && ups
->is_shut_ltime())
525 Dmsg0(100, "CI_RUNTIM glitch\n");
526 ups
->clear_shut_ltime();
530 * Check for expired shutdown timers and act on them.
532 if (ups
->is_battlow() && ((now
- ups
->start_shut_lbatt
) >= 5)) {
533 generate_event(ups
, CMDFAILING
);
536 if (ups
->is_shut_load() && ((now
- ups
->start_shut_load
) >= 5)) {
537 generate_event(ups
, CMDLOADLIMIT
);
540 if (ups
->is_shut_ltime() && ((now
- ups
->start_shut_ltime
) >= 5)) {
541 generate_event(ups
, CMDRUNLIMIT
);
546 * We are on batteries, the battery is low, and the power is not
547 * down ==> the battery is dead. KES Sept 2000
549 * Then the battery has failed!!!
550 * Must do Emergency Shutdown NOW
552 if (ups
->is_battlow() && ups
->is_online()) {
553 ups
->set_shut_emerg();
554 generate_event(ups
, CMDEMERGENCY
);
557 /* Announce to LogOff, with initial delay. */
558 if (((now
- ups
->last_time_on_line
) > ups
->annoydelay
) &&
559 ((now
- ups
->last_time_annoy
) > ups
->annoy
) && ups
->nologin_file
) {
560 if (!requested_logoff
) {
561 /* generate log message once */
562 generate_event(ups
, CMDANNOYME
);
564 /* but execute script every time */
565 execute_command(ups
, ups_event
[CMDANNOYME
]);
568 time(&ups
->last_time_annoy
);
569 requested_logoff
= true;
572 /* Delay NoLogons. */
573 if (!ups
->nologin_file
) {
574 switch (ups
->nologin
.type
) {
578 if ((now
- ups
->last_time_nologon
) > ups
->nologin_time
)
579 prohibit_logins(ups
);
582 if (ups
->UPS_Cap
[CI_BATTLEV
] && ups
->nologin_time
>= ups
->BattChg
)
583 prohibit_logins(ups
);
586 if (ups
->UPS_Cap
[CI_RUNTIM
] && ups
->nologin_time
>= ups
->TimeLeft
)
587 prohibit_logins(ups
);
591 prohibit_logins(ups
);
599 /* The power is back after a power failure or a self test */
600 if (ups
->is_onbatt_msg()) {
601 ups
->clear_onbatt_msg();
602 generate_event(ups
, CMDOFFBATTERY
);
606 ups
->LastSelfTest
= ups
->SelfTest
;
609 /* Get last selftest results, only for smart UPSes. */
610 device_entry_point(ups
, DEVICE_CMD_GET_SELFTEST_MSG
, NULL
);
611 log_event(ups
, LOG_ALERT
, "UPS Self Test completed: %s",
612 testresult_to_string(ups
->testresult
));
613 execute_command(ups
, ups_event
[CMDENDSELFTEST
]);
615 generate_event(ups
, CMDMAINSBACK
);
618 if (ups
->nologin_file
)
619 log_event(ups
, LOG_ALERT
, "Allowing logins");
622 ups
->nologin_file
= false;
623 requested_logoff
= false;
624 device_entry_point(ups
, DEVICE_CMD_DTR_ST_DISABLE
, NULL
);
625 ups
->last_offbatt_time
= now
;
628 * Sanity check. Sometimes only first power problem trips
629 * thus last_onbatt_time is not set when we get here.
631 if (ups
->last_onbatt_time
<= 0)
632 ups
->last_onbatt_time
= ups
->last_offbatt_time
;
634 ups
->cum_time_on_batt
+= (ups
->last_offbatt_time
- ups
->last_onbatt_time
);
641 /* Do a non-blocking wait on any exec()ed children */
642 if (ups
->num_execed_children
> 0) {
643 while (waitpid(-1, NULL
, WNOHANG
) > 0)
644 ups
->num_execed_children
--;
647 /* Remember status */
648 ups
->PrevStatus
= ups
->Status
;