UPS: apcupsd clean sources
[tomato.git] / release / src / router / apcupsd / src / action.c
blobdc13e55fb22f4df14d25ea62902d7940b2e9312f
1 /*
2 * apcaction.c
4 * Actions taken when something happens to the UPS.
5 */
7 /*
8 * Copyright (C) 2000-2004 Kern Sibbald
9 * Copyright (C) 1996-1999 Andre M. Hedrick <andre@suse.com>
10 * Copyright (C) 1999-2000 Riccardo Facchetti <riccardo@master.oasi.gpa.it>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General
14 * Public License as published by the Free Software Foundation.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the Free
23 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24 * MA 02111-1307, USA.
27 #include "apc.h"
29 extern int kill_on_powerfail;
30 static void do_shutdown(UPSINFO *ups, int cmdtype);
33 * These are the commands understood by the apccontrol shell script.
34 * You _must_ keep the the commands[] array in sync with the defines in
35 * include/apc_defines.h
37 UPSCOMMANDS ups_event[] = {
38 {"powerout", 0}, /* CMDPOWEROUT */
39 {"onbattery", 0}, /* CMDONBATTERY */
40 {"failing", 0}, /* CMDFAILING */
41 {"timeout", 0}, /* CMDTIMEOUT */
42 {"loadlimit", 0}, /* CMDLOADLIMIT */
43 {"runlimit", 0}, /* CMDRUNLIMIT */
44 {"doshutdown", 0}, /* CMDDOSHUTDOWN */
45 {"mainsback", 0}, /* CMDMAINSBACK */
46 {"annoyme", 0}, /* CMDANNOYME */
47 {"emergency", 0}, /* CMDEMERGENCY */
48 {"changeme", 0}, /* CMDCHANGEME */
49 {"remotedown", 0}, /* CMDREMOTEDOWN */
50 {"commfailure", 0}, /* CMDCOMMFAILURE */
51 {"commok", 0}, /* CMDCOMMOK */
52 {"startselftest", 0}, /* CMDSTARTSELFTEST */
53 {"endselftest", 0}, /* CMDENDSELFTEST */
54 {"offbattery", 0}, /* CMDOFFBATTERY */
55 {"battdetach", 0}, /* CMDBATTDETACH */
56 {"battattach", 0} /* CMDBATTATTACH */
60 * These messages must be kept in sync with the above array
61 * and the defines in include/apc_defines.h
63 UPSCMDMSG event_msg[] = {
64 {LOG_CRIT, "Power failure."},
65 {LOG_CRIT, "Running on UPS batteries."},
66 {LOG_ALERT, "Battery power exhausted."},
67 {LOG_ALERT, "Reached run time limit on batteries."},
68 {LOG_ALERT, "Battery charge below low limit."},
69 {LOG_ALERT, "Reached remaining time percentage limit on batteries."},
70 {LOG_ALERT, "Initiating system shutdown!"},
71 {LOG_ALERT, "Power is back. UPS running on mains."},
72 {LOG_ALERT, "Users requested to logoff."},
73 {LOG_ALERT, "Battery failure. Emergency."},
74 {LOG_CRIT, "UPS battery must be replaced."},
75 {LOG_CRIT, "Remote shutdown requested."},
76 {LOG_WARNING, "Communications with UPS lost."},
77 {LOG_WARNING, "Communications with UPS restored."},
78 {LOG_WARNING, "UPS Self Test switch to battery."},
79 {LOG_WARNING, "UPS Self Test completed."},
80 {LOG_CRIT, "Mains returned. No longer on UPS batteries."},
81 {LOG_CRIT, "Battery disconnected."},
82 {LOG_CRIT, "Battery reattached."}
85 void generate_event(UPSINFO *ups, int event)
87 /* Log message and execute script for this event */
88 log_event(ups, event_msg[event].level, event_msg[event].msg);
89 Dmsg2(80, "calling execute_ups_event %s event=%d\n", ups_event[event], event);
90 execute_command(ups, ups_event[event]);
93 * Additional possible actions. For certain, we now do a
94 * shutdown
96 switch (event) {
98 * For the following, in addition to the basic,
99 * message logged and executed above, we do a
100 * system shutdown.
102 case CMDFAILING:
103 case CMDTIMEOUT:
104 case CMDRUNLIMIT:
105 case CMDLOADLIMIT:
106 case CMDEMERGENCY:
107 case CMDREMOTEDOWN:
108 log_event(ups, event_msg[CMDDOSHUTDOWN].level,
109 event_msg[CMDDOSHUTDOWN].msg);
110 do_shutdown(ups, CMDDOSHUTDOWN);
111 break;
113 /* For the following, everything is already done. */
114 case CMDSTARTSELFTEST:
115 case CMDENDSELFTEST:
116 case CMDCOMMFAILURE:
117 case CMDCOMMOK:
118 case CMDCHANGEME:
119 case CMDANNOYME:
120 case CMDMAINSBACK:
121 case CMDDOSHUTDOWN: /* Already shutdown, don't recall */
122 case CMDPOWEROUT:
123 case CMDONBATTERY:
124 case CMDOFFBATTERY:
125 case CMDBATTDETACH:
126 case CMDBATTATTACH:
127 default:
128 break;
134 * Closes procfile and logfile to preserve information.
136 * ok = 1 => power is back
137 * ok = 2 => power failure
138 * ok = 3 => remote shutdown
140 static void powerfail(int ok)
143 * If apcupsd terminates here, it will never get a chance to
144 * report the event of returning mains-power. I think apcupsd
145 * has no need to force terminate() by itself. It will receive
146 * a SIGTERM from init, when system goes down. This signal is
147 * trapped and will trigger apcupsd's terminate() function.
150 if (ok == 2) {
151 clear_files();
152 if (terminate_on_powerfail) {
154 * This sends a SIGTERM signal to itself.
155 * The SIGTERM is bound to apcupsd_ or apctest_terminate(),
156 * depending on which program is running this code, so it will
157 * do in anyway the right thing.
159 sendsig_terminate();
164 * For network slaves, apcupsd needs to terminate here for now.
165 * This is sloppy, but it works. If you are networked, then the
166 * master must fall also. This is required so that the UPS
167 * can reboot the slaves.
169 if (ok == 3)
170 sendsig_terminate();
174 * If called with zero, prevent users from logging in.
175 * If called with one, allow users to login.
177 static void logonfail(UPSINFO *ups, int ok)
179 int lgnfd;
181 unlink(ups->nologinpath);
183 if (ok == 0 &&
184 ((lgnfd = open(ups->nologinpath, O_CREAT | O_WRONLY, 0644)) >= 0)) {
185 write(lgnfd, POWERFAIL, strlen(POWERFAIL));
186 close(lgnfd);
190 static void prohibit_logins(UPSINFO *ups)
192 if (ups->nologin_file)
193 return; /* already done */
195 logonfail(ups, 0);
196 ups->nologin_file = true;
198 log_event(ups, LOG_ALERT, "User logins prohibited");
201 static void do_shutdown(UPSINFO *ups, int cmdtype)
203 if (ups->is_shutdown())
204 return; /* already done */
206 ups->ShutDown = time(NULL);
207 ups->set_shutdown();
208 delete_lockfile(ups);
209 ups->set_fastpoll();
210 make_file(ups, ups->pwrfailpath);
211 prohibit_logins(ups);
213 if (!ups->is_slave()) {
215 * Note, try avoid using this option if at all possible
216 * as it will shutoff the UPS power, and you cannot
217 * be guaranteed that the shutdown command will have
218 * succeeded. This PROBABLY should be executed AFTER
219 * the shutdown command is given (the execute_command below).
221 if (kill_on_powerfail)
222 initiate_hibernate(ups);
225 /* Now execute the shutdown command */
226 execute_command(ups, ups_event[cmdtype]);
229 * On some systems we may stop on the previous
230 * line if a SIGTERM signal is sent to us.
233 if (cmdtype == CMDREMOTEDOWN)
234 powerfail(3);
235 else
236 powerfail(2);
239 /* These are the different "states" that the UPS can be in. */
240 enum a_state {
241 st_PowerFailure,
242 st_SelfTest,
243 st_OnBattery,
244 st_MainsBack,
245 st_OnMains
249 * Figure out what "state" the UPS is in and
250 * return it for use in do_action()
252 static enum a_state get_state(UPSINFO *ups, time_t now)
254 enum a_state state;
256 /* If we're on battery for calibration, treat as not on battery */
257 if (ups->is_onbatt() && !ups->is_calibration()) {
258 if (ups->chg_onbatt()) {
259 state = st_PowerFailure; /* Power failure just detected */
260 } else {
261 if (ups->SelfTest) /* see if UPS is doing self test */
262 state = st_SelfTest; /* yes */
263 else
264 state = st_OnBattery; /* No, this must be real power failure */
266 } else {
267 if (ups->chg_onbatt()) /* if we were on batteries */
268 state = st_MainsBack; /* then we just got power back */
269 else
270 state = st_OnMains; /* Solid on mains, normal condition */
272 return state;
275 static const char *testresult_to_string(SelfTestResult res)
277 switch (res) {
278 case TEST_NA:
279 return "Not supported";
280 case TEST_NONE:
281 return "No test results available";
282 case TEST_FAILED:
283 return "Test failed";
284 case TEST_WARNING:
285 return "Warning";
286 case TEST_INPROGRESS:
287 return "In progress";
288 case TEST_PASSED:
289 return "Battery OK";
290 case TEST_FAILCAP:
291 return "Test failed -- insufficient battery capacity";
292 case TEST_FAILLOAD:
293 return "Test failed -- battery overloaded";
294 case TEST_UNKNOWN:
295 default:
296 return "Unknown";
301 * Carl Lindberg <lindberg@clindberg.org> patch applied 24Dec04
303 * The APC network management cards have options to shut down, reboot, or
304 * "sleep" (really just a delayed reboot) the UPS. For all of these, it
305 * has a "graceful" option, meaning it gives the PowerChute software a
306 * chance to cleanly shutdown the machine before the UPS is shut down. To
307 * do this, the card sets the ONBATT and LOWBATT statuses at the same
308 * time, waits several minutes, then cuts power. PowerChute (presumably)
309 * notices this and shuts the machine down, but unfortunately apcupsd did
310 * not.
312 * The problem happens because in this situation, apcupsd sets the
313 * UPS_prev_battlow status before testing for it. In the do_action()
314 * function, apcupsd notices the ONBATT status, and uses the
315 * "st_PowerFailure" state to send off an initial power failure event.
316 * After a short delay, do_action() is invoked again. If ONBATT is
317 * still set, the "st_OnBattery" state is used, and the onbattery event
318 * (among other things) is sent.
320 * The test for LOWBATT to see if shutdown is needed is only done in the
321 * st_OnBattery state, and it's done if LOWBATT is set but
322 * UPS_prev_battlow is not set yet. In normal operation, LOWBATT will
323 * only come on after a period of ONBATT, and this situation works fine.
324 * However, since ONBATT and LOWBATT were set simultaneously, the
325 * UPS_prev_battlow was set the first time through, when the
326 * st_PowerFailure was used, meaning the test for LOWBATT was not
327 * performed. The second time through in st_OnBattery, UPS_prev_battlow
328 * is already set, meaning apcupsd is assuming that the needed shutdown
329 * has already been invoked.
331 * The code fix just moves setting of the UPS_prev_battlow status to
332 * inside the block that tests for it, ensuring that LOWBATT will never be
333 * ignored. Clearing the UPS_prev_battlow status remains where it is in
334 * the code, and it will always be turned off if LOWBATT is no longer set.
336 * After the fix, UPS_prev_battlow is not prematurely set, and apcupsd
337 * catches the signal from the management card to shut down. I've had the
338 * code in for over a month, and it's worked fine, both from using the
339 * management card and regular pull-the-plug tests as well. This was
340 * only tested with a serial UPS, but I assume it would be a problem with
341 * USB and SNMP connections as well.
344 /*********************************************************************/
345 void do_action(UPSINFO *ups)
347 time_t now;
348 static int requested_logoff = 0; /* asked user to logoff */
349 static int first = 1;
350 enum a_state state;
352 write_lock(ups);
354 time(&now); /* get current time */
355 if (first) {
356 first = 0;
357 ups->last_time_nologon = ups->last_time_annoy = now;
358 ups->last_time_on_line = now;
361 * This is cheating slightly. We want to initialize the previous
362 * status to zero so all set bits in current status will appear
363 * as changes, thus allowing us to handle starting up when power
364 * has already failed, for instance. However, we don't want to
365 * get a BATTATTACHED event every time the daemon starts, so we
366 * set the UPS_battpresent bit in the previous status.
368 ups->PrevStatus = UPS_battpresent;
371 if (ups->is_replacebatt()) { /* Replace battery */
373 * Complain every 9 hours, this causes the complaint to
374 * cycle around the clock and hopefully be more noticable
375 * without being too annoying. Also, ignore all change battery
376 * indications for the first 10 minutes of running time to
377 * prevent false alerts. Finally, issue the event 5 times, then
378 * clear the flag to silence false alarms. If the battery is
379 * really dead, the flag will be reset in apcsmart.c
381 * UPS_replacebatt is a flag. To count use a static local counter.
382 * The counter is initialized only one time at startup.
384 if (now - ups->start_time < 60 * 10 || ups->ChangeBattCounter > 5) {
385 ups->clear_replacebatt();
386 ups->ChangeBattCounter = 0;
387 } else if (now - ups->last_time_changeme > 60 * 60 * 9) {
388 generate_event(ups, CMDCHANGEME);
389 ups->last_time_changeme = now;
390 ups->ChangeBattCounter++;
394 /* Remote is shutting down, so must we. */
395 if (ups->is_shut_remote()) {
396 if (ups->chg_shut_remote()) {
397 generate_event(ups, CMDREMOTEDOWN);
399 ups->PrevStatus = ups->Status;
400 write_unlock(ups);
401 return;
404 /* Generate event if battery is disconnected or reattached */
405 if (ups->chg_battpresent()) {
406 if (ups->is_battpresent())
407 generate_event(ups, CMDBATTATTACH);
408 else
409 generate_event(ups, CMDBATTDETACH);
413 * Did BattLow bit go high? If so, start the battlow shutdown
414 * timer. We will only act on this timer if we switch to battery
415 * (or are already on battery). It is possible that this event occurs
416 * at the same time as or even slightly before we switch to battery.
417 * Therefore we must check it every time we get new status.
419 if (ups->chg_battlow()) {
420 if (ups->is_battlow()) {
421 Dmsg0(100, "BATTLOW asserted\n");
422 ups->start_shut_lbatt = now;
423 } else {
424 Dmsg0(100, "BATTLOW glitch\n");
428 state = get_state(ups, now);
429 switch (state) {
430 case st_OnMains:
431 /* If power is good, update the timers. */
432 ups->last_time_nologon = ups->last_time_annoy = now;
433 ups->last_time_on_line = now;
434 ups->clear_fastpoll();
435 break;
437 case st_PowerFailure:
438 /* This is our first indication of a power problem */
439 ups->set_fastpoll(); /* speed up polling */
441 /* Check if selftest */
442 Dmsg1(80, "Power failure detected. 0x%x\n", ups->Status);
443 device_entry_point(ups, DEVICE_CMD_CHECK_SELFTEST, NULL);
445 if (ups->SelfTest)
446 generate_event(ups, CMDSTARTSELFTEST);
447 else
448 generate_event(ups, CMDPOWEROUT);
450 ups->last_time_nologon = ups->last_time_annoy = now;
451 ups->last_time_on_line = now;
452 ups->last_onbatt_time = now;
453 ups->num_xfers++;
455 /* Enable DTR on dumb UPSes with CUSTOM_SIMPLE cable. */
456 device_entry_point(ups, DEVICE_CMD_DTR_ENABLE, NULL);
457 break;
459 case st_SelfTest:
460 /* allow 40 seconds max for selftest */
461 if (now - ups->SelfTest < 40 && !ups->is_battlow())
462 break;
464 /* Cancel self test, announce power failure */
465 ups->SelfTest = 0;
466 Dmsg1(80, "UPS Self Test cancelled, fall-thru to On Battery. 0x%x\n",
467 ups->Status);
469 /* ...FALL-THRU to st_OnBattery... */
471 case st_OnBattery:
472 /* Did the second test verify the power is failing? */
473 if (!ups->is_onbatt_msg() &&
474 time(NULL) - ups->last_time_on_line >= ups->onbattdelay) {
475 ups->set_onbatt_msg(); /* it is confirmed, we are on batteries */
476 generate_event(ups, CMDONBATTERY);
477 ups->last_time_nologon = ups->last_time_annoy = now;
478 ups->last_time_on_line = now;
479 break;
482 /* shutdown requested but still running */
483 if (ups->is_shutdown()) {
484 if (ups->killdelay && now - ups->ShutDown >= ups->killdelay) {
485 if (!ups->is_slave())
486 initiate_hibernate(ups);
487 ups->ShutDown = now; /* wait a bit before doing again */
488 ups->set_shutdown();
490 } else { /* not shutdown yet */
492 * Did MaxTimeOnBattery Expire? (TIMEOUT in apcupsd.conf)
493 * Normal Power down during Power Failure: Shutdown immediately.
495 if ((ups->maxtime > 0) && ((now - ups->last_time_on_line) > ups->maxtime)) {
496 ups->set_shut_btime();
497 generate_event(ups, CMDTIMEOUT);
498 break;
502 * Did Battery Charge or Runtime go below percent cutoff?
503 * Normal Power down during Power Failure: Start shutdown timer.
505 if (ups->UPS_Cap[CI_BATTLEV] && ups->BattChg <= ups->percent) {
506 if (!ups->is_shut_load()) {
507 Dmsg0(100, "CI_BATTLEV shutdown\n");
508 ups->set_shut_load();
509 ups->start_shut_load = now;
511 } else {
512 if (ups->UPS_Cap[CI_BATTLEV] && ups->is_shut_load())
513 Dmsg0(100, "CI_BATTLEV glitch\n");
514 ups->clear_shut_load();
517 if (ups->UPS_Cap[CI_RUNTIM] && ups->TimeLeft <= ups->runtime) {
518 if (!ups->is_shut_ltime()) {
519 Dmsg0(100, "CI_RUNTIM shutdown\n");
520 ups->set_shut_ltime();
521 ups->start_shut_ltime = now;
523 } else {
524 if (ups->UPS_Cap[CI_RUNTIM] && ups->is_shut_ltime())
525 Dmsg0(100, "CI_RUNTIM glitch\n");
526 ups->clear_shut_ltime();
530 * Check for expired shutdown timers and act on them.
532 if (ups->is_battlow() && ((now - ups->start_shut_lbatt) >= 5)) {
533 generate_event(ups, CMDFAILING);
534 break;
536 if (ups->is_shut_load() && ((now - ups->start_shut_load) >= 5)) {
537 generate_event(ups, CMDLOADLIMIT);
538 break;
540 if (ups->is_shut_ltime() && ((now - ups->start_shut_ltime) >= 5)) {
541 generate_event(ups, CMDRUNLIMIT);
542 break;
546 * We are on batteries, the battery is low, and the power is not
547 * down ==> the battery is dead. KES Sept 2000
549 * Then the battery has failed!!!
550 * Must do Emergency Shutdown NOW
552 if (ups->is_battlow() && ups->is_online()) {
553 ups->set_shut_emerg();
554 generate_event(ups, CMDEMERGENCY);
557 /* Announce to LogOff, with initial delay. */
558 if (((now - ups->last_time_on_line) > ups->annoydelay) &&
559 ((now - ups->last_time_annoy) > ups->annoy) && ups->nologin_file) {
560 if (!requested_logoff) {
561 /* generate log message once */
562 generate_event(ups, CMDANNOYME);
563 } else {
564 /* but execute script every time */
565 execute_command(ups, ups_event[CMDANNOYME]);
568 time(&ups->last_time_annoy);
569 requested_logoff = true;
572 /* Delay NoLogons. */
573 if (!ups->nologin_file) {
574 switch (ups->nologin.type) {
575 case NEVER:
576 break;
577 case TIMEOUT:
578 if ((now - ups->last_time_nologon) > ups->nologin_time)
579 prohibit_logins(ups);
580 break;
581 case PERCENT:
582 if (ups->UPS_Cap[CI_BATTLEV] && ups->nologin_time >= ups->BattChg)
583 prohibit_logins(ups);
584 break;
585 case MINUTES:
586 if (ups->UPS_Cap[CI_RUNTIM] && ups->nologin_time >= ups->TimeLeft)
587 prohibit_logins(ups);
588 break;
589 case ALWAYS:
590 default:
591 prohibit_logins(ups);
592 break;
596 break;
598 case st_MainsBack:
599 /* The power is back after a power failure or a self test */
600 if (ups->is_onbatt_msg()) {
601 ups->clear_onbatt_msg();
602 generate_event(ups, CMDOFFBATTERY);
605 if (ups->SelfTest) {
606 ups->LastSelfTest = ups->SelfTest;
607 ups->SelfTest = 0;
609 /* Get last selftest results, only for smart UPSes. */
610 device_entry_point(ups, DEVICE_CMD_GET_SELFTEST_MSG, NULL);
611 log_event(ups, LOG_ALERT, "UPS Self Test completed: %s",
612 testresult_to_string(ups->testresult));
613 execute_command(ups, ups_event[CMDENDSELFTEST]);
614 } else {
615 generate_event(ups, CMDMAINSBACK);
618 if (ups->nologin_file)
619 log_event(ups, LOG_ALERT, "Allowing logins");
621 logonfail(ups, 1);
622 ups->nologin_file = false;
623 requested_logoff = false;
624 device_entry_point(ups, DEVICE_CMD_DTR_ST_DISABLE, NULL);
625 ups->last_offbatt_time = now;
628 * Sanity check. Sometimes only first power problem trips
629 * thus last_onbatt_time is not set when we get here.
631 if (ups->last_onbatt_time <= 0)
632 ups->last_onbatt_time = ups->last_offbatt_time;
634 ups->cum_time_on_batt += (ups->last_offbatt_time - ups->last_onbatt_time);
635 break;
637 default:
638 break;
641 /* Do a non-blocking wait on any exec()ed children */
642 if (ups->num_execed_children > 0) {
643 while (waitpid(-1, NULL, WNOHANG) > 0)
644 ups->num_execed_children--;
647 /* Remember status */
648 ups->PrevStatus = ups->Status;
650 write_unlock(ups);