timed(8): master() never returns, so don't pretend it does.
[dragonfly.git] / usr.sbin / timed / timed / master.c
blob8e3391abfa2301452842c8c1c5577de6ac234f4d
1 /*-
2 * Copyright (c) 1985, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
29 * @(#)master.c 8.1 (Berkeley) 6/6/93
30 * $FreeBSD: src/usr.sbin/timed/timed/master.c,v 1.6 1999/08/28 01:20:17 peter Exp $
33 #include "globals.h"
34 #include <sys/file.h>
35 #include <sys/types.h>
36 #include <sys/times.h>
37 #include <setjmp.h>
38 #include <utmp.h>
39 #include "pathnames.h"
41 extern int measure_delta;
42 extern jmp_buf jmpenv;
43 extern int Mflag;
44 extern int justquit;
46 static int dictate;
47 static int slvcount; /* slaves listening to our clock */
49 static void mchgdate(struct tsp *);
52 * The main function of `master' is to periodically compute the differences
53 * (deltas) between its clock and the clocks of the slaves, to compute the
54 * network average delta, and to send to the slaves the differences between
55 * their individual deltas and the network delta.
56 * While waiting, it receives messages from the slaves (i.e. requests for
57 * master's name, remote requests to set the network time, ...), and
58 * takes the appropriate action.
60 void
61 master(void)
63 struct hosttbl *htp;
64 long pollingtime;
65 #define POLLRATE 4
66 int polls;
67 struct timeval wait, ntime;
68 time_t tsp_time_sec;
69 struct tsp *msg, *answer, to;
70 char newdate[32];
71 struct sockaddr_in taddr;
72 char tname[MAXHOSTNAMELEN];
73 struct netinfo *ntp;
74 int i;
76 syslog(LOG_NOTICE, "This machine is master");
77 if (trace)
78 fprintf(fd, "This machine is master\n");
79 for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
80 if (ntp->status == MASTER)
81 masterup(ntp);
83 gettimeofday(&ntime, 0);
84 pollingtime = ntime.tv_sec+3;
85 if (justquit)
86 polls = 0;
87 else
88 polls = POLLRATE-1;
90 /* Process all outstanding messages before spending the long time necessary
91 * to update all timers.
93 loop:
94 gettimeofday(&ntime, 0);
95 wait.tv_sec = pollingtime - ntime.tv_sec;
96 if (wait.tv_sec < 0)
97 wait.tv_sec = 0;
98 wait.tv_usec = 0;
99 msg = readmsg(TSP_ANY, ANYADDR, &wait, 0);
100 if (!msg) {
101 gettimeofday(&ntime, 0);
102 if (ntime.tv_sec >= pollingtime) {
103 pollingtime = ntime.tv_sec + SAMPLEINTVL;
104 get_goodgroup(0);
106 /* If a bogus master told us to quit, we can have decided to ignore a
107 * network. Therefore, periodically try to take over everything.
109 polls = (polls + 1) % POLLRATE;
110 if (0 == polls && nignorednets > 0) {
111 trace_msg("Looking for nets to re-master\n");
112 for (ntp = nettab; ntp; ntp = ntp->next) {
113 if (ntp->status == IGNORE
114 || ntp->status == NOMASTER) {
115 lookformaster(ntp);
116 if (ntp->status == MASTER) {
117 masterup(ntp);
118 polls = POLLRATE-1;
121 if (ntp->status == MASTER
122 && --ntp->quit_count < 0)
123 ntp->quit_count = 0;
125 if (polls != 0)
126 setstatus();
129 synch(0L);
131 for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
132 to.tsp_type = TSP_LOOP;
133 to.tsp_vers = TSPVERSION;
134 to.tsp_seq = sequence++;
135 to.tsp_hopcnt = MAX_HOPCNT;
136 strlcpy(to.tsp_name, hostname,
137 sizeof(to.tsp_name));
138 bytenetorder(&to);
139 if (sendto(sock, (char *)&to,
140 sizeof(struct tsp), 0,
141 (struct sockaddr*)&ntp->dest_addr,
142 sizeof(ntp->dest_addr)) < 0) {
143 trace_sendto_err(ntp->dest_addr.sin_addr);
149 } else {
150 switch (msg->tsp_type) {
152 case TSP_MASTERREQ:
153 break;
155 case TSP_SLAVEUP:
156 newslave(msg);
157 break;
159 case TSP_SETDATE:
161 * XXX check to see it is from ourself
163 tsp_time_sec = msg->tsp_time.tv_sec;
164 strlcpy(newdate, ctime(&tsp_time_sec), sizeof(newdate));
165 if (!good_host_name(msg->tsp_name)) {
166 syslog(LOG_NOTICE,
167 "attempted date change by %s to %s",
168 msg->tsp_name, newdate);
169 spreadtime();
170 break;
173 mchgdate(msg);
174 gettimeofday(&ntime, 0);
175 pollingtime = ntime.tv_sec + SAMPLEINTVL;
176 break;
178 case TSP_SETDATEREQ:
179 if (!fromnet || fromnet->status != MASTER)
180 break;
181 tsp_time_sec = msg->tsp_time.tv_sec;
182 strlcpy(newdate, ctime(&tsp_time_sec), sizeof(newdate));
183 htp = findhost(msg->tsp_name);
184 if (htp == NULL) {
185 syslog(LOG_ERR,
186 "attempted SET DATEREQ by uncontrolled %s to %s",
187 msg->tsp_name, newdate);
188 break;
190 if (htp->seq == msg->tsp_seq)
191 break;
192 htp->seq = msg->tsp_seq;
193 if (!htp->good) {
194 syslog(LOG_NOTICE,
195 "attempted SET DATEREQ by untrusted %s to %s",
196 msg->tsp_name, newdate);
197 spreadtime();
198 break;
201 mchgdate(msg);
202 gettimeofday(&ntime, 0);
203 pollingtime = ntime.tv_sec + SAMPLEINTVL;
204 break;
206 case TSP_MSITE:
207 xmit(TSP_ACK, msg->tsp_seq, &from);
208 break;
210 case TSP_MSITEREQ:
211 break;
213 case TSP_TRACEON:
214 traceon();
215 break;
217 case TSP_TRACEOFF:
218 traceoff("Tracing ended at %s\n");
219 break;
221 case TSP_ELECTION:
222 if (!fromnet)
223 break;
224 if (fromnet->status == MASTER) {
225 pollingtime = 0;
226 addmach(msg->tsp_name, &from,fromnet);
228 taddr = from;
229 strlcpy(tname, msg->tsp_name, sizeof(tname));
230 to.tsp_type = TSP_QUIT;
231 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
232 answer = acksend(&to, &taddr, tname,
233 TSP_ACK, 0, 1);
234 if (answer == NULL) {
235 syslog(LOG_ERR, "election error by %s",
236 tname);
238 break;
240 case TSP_CONFLICT:
242 * After a network partition, there can be
243 * more than one master: the first slave to
244 * come up will notify here the situation.
246 if (!fromnet || fromnet->status != MASTER)
247 break;
248 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
250 /* The other master often gets into the same state,
251 * with boring results if we stay at it forever.
253 ntp = fromnet; /* (acksend() can leave fromnet=0 */
254 for (i = 0; i < 3; i++) {
255 to.tsp_type = TSP_RESOLVE;
256 strlcpy(to.tsp_name, hostname,
257 sizeof(to.tsp_name));
258 answer = acksend(&to, &ntp->dest_addr,
259 ANYADDR, TSP_MASTERACK,
260 ntp, 0);
261 if (!answer)
262 break;
263 htp = addmach(answer->tsp_name,&from,ntp);
264 to.tsp_type = TSP_QUIT;
265 msg = acksend(&to, &htp->addr, htp->name,
266 TSP_ACK, 0, htp->noanswer);
267 if (msg == NULL) {
268 syslog(LOG_ERR,
269 "no response from %s to CONFLICT-QUIT",
270 htp->name);
273 masterup(ntp);
274 pollingtime = 0;
275 break;
277 case TSP_RESOLVE:
278 if (!fromnet || fromnet->status != MASTER)
279 break;
281 * do not want to call synch() while waiting
282 * to be killed!
284 gettimeofday(&ntime, NULL);
285 pollingtime = ntime.tv_sec + SAMPLEINTVL;
286 break;
288 case TSP_QUIT:
289 doquit(msg); /* become a slave */
290 break;
292 case TSP_LOOP:
293 if (!fromnet || fromnet->status != MASTER
294 || !strcmp(msg->tsp_name, hostname))
295 break;
297 * We should not have received this from a net
298 * we are master on. There must be two masters.
300 htp = addmach(msg->tsp_name, &from,fromnet);
301 to.tsp_type = TSP_QUIT;
302 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
303 answer = acksend(&to, &htp->addr, htp->name,
304 TSP_ACK, 0, 1);
305 if (!answer) {
306 syslog(LOG_WARNING,
307 "loop breakage: no reply from %s=%s to QUIT",
308 htp->name, inet_ntoa(htp->addr.sin_addr));
309 remmach(htp);
312 case TSP_TEST:
313 if (trace) {
314 fprintf(fd,
315 "\tnets = %d, masters = %d, slaves = %d, ignored = %d\n",
316 nnets, nmasternets, nslavenets, nignorednets);
317 setstatus();
319 pollingtime = 0;
320 polls = POLLRATE-1;
321 break;
323 default:
324 if (trace) {
325 fprintf(fd, "garbage message: ");
326 print(msg, &from);
328 break;
331 goto loop;
336 * change the system date on the master
338 static void
339 mchgdate(struct tsp *msg)
341 char tname[MAXHOSTNAMELEN];
342 char olddate[32];
343 struct timeval otime, ntime;
345 strlcpy(tname, msg->tsp_name, sizeof(tname));
347 xmit(TSP_DATEACK, msg->tsp_seq, &from);
349 strlcpy(olddate, date(), sizeof(olddate));
351 /* adjust time for residence on the queue */
352 gettimeofday(&otime, 0);
353 adj_msg_time(msg,&otime);
355 timevalsub(&ntime, &msg->tsp_time, &otime);
356 if (ntime.tv_sec < MAXADJ && ntime.tv_sec > -MAXADJ) {
358 * do not change the clock if we can adjust it
360 dictate = 3;
361 synch(tvtomsround(ntime));
362 } else {
363 logwtmp("|", "date", "");
364 settimeofday(&msg->tsp_time, 0);
365 logwtmp("{", "date", "");
366 spreadtime();
369 syslog(LOG_NOTICE, "date changed by %s from %s",
370 tname, olddate);
375 * synchronize all of the slaves
377 void
378 synch(long mydelta)
380 struct hosttbl *htp;
381 int measure_status;
382 struct timeval check, stop, wait;
384 if (slvcount > 0) {
385 if (trace)
386 fprintf(fd, "measurements starting at %s\n", date());
387 gettimeofday(&check, 0);
388 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
389 if (htp->noanswer != 0) {
390 measure_status = measure(500, 100,
391 htp->name,
392 &htp->addr,0);
393 } else {
394 measure_status = measure(3000, 100,
395 htp->name,
396 &htp->addr,0);
398 if (measure_status != GOOD) {
399 /* The slave did not respond. We have
400 * just wasted lots of time on it.
402 htp->delta = HOSTDOWN;
403 if (++htp->noanswer >= LOSTHOST) {
404 if (trace) {
405 fprintf(fd,
406 "purging %s for not answering ICMP\n",
407 htp->name);
408 fflush(fd);
410 htp = remmach(htp);
412 } else {
413 htp->delta = measure_delta;
415 gettimeofday(&stop, 0);
416 timevalsub(&stop, &stop, &check);
417 if (stop.tv_sec >= 1) {
418 if (trace)
419 fflush(fd);
421 * ack messages periodically
423 wait.tv_sec = 0;
424 wait.tv_usec = 0;
425 if (0 != readmsg(TSP_TRACEON,ANYADDR,
426 &wait,0))
427 traceon();
428 gettimeofday(&check, 0);
431 if (trace)
432 fprintf(fd, "measurements finished at %s\n", date());
434 if (!(status & SLAVE)) {
435 if (!dictate) {
436 mydelta = networkdelta();
437 } else {
438 dictate--;
441 if (trace && (mydelta != 0 || (status & SLAVE)))
442 fprintf(fd,"local correction of %ld ms.\n", mydelta);
443 correct(mydelta);
447 * sends the time to each slave after the master
448 * has received the command to set the network time
450 void
451 spreadtime(void)
453 struct hosttbl *htp;
454 struct tsp to;
455 struct tsp *answer;
457 /* Do not listen to the consensus after forcing the time. This is because
458 * the consensus takes a while to reach the time we are dictating.
460 dictate = 2;
461 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
462 to.tsp_type = TSP_SETTIME;
463 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
464 gettimeofday(&to.tsp_time, 0);
465 answer = acksend(&to, &htp->addr, htp->name,
466 TSP_ACK, 0, htp->noanswer);
467 if (answer == NULL) {
468 /* We client does not respond, then we have
469 * just wasted lots of time on it.
471 syslog(LOG_WARNING,
472 "no reply to SETTIME from %s", htp->name);
473 if (++htp->noanswer >= LOSTHOST) {
474 if (trace) {
475 fprintf(fd,
476 "purging %s for not answering",
477 htp->name);
478 fflush(fd);
480 htp = remmach(htp);
486 void
487 prthp(clock_t delta)
489 static time_t next_time;
490 time_t this_time;
491 struct tms tm;
492 struct hosttbl *htp;
493 int length, l;
494 int i;
496 if (!fd) /* quit if tracing already off */
497 return;
499 this_time = times(&tm);
500 if (this_time + (time_t)delta < next_time)
501 return;
502 next_time = this_time + CLK_TCK;
504 fprintf(fd, "host table: %d entries at %s\n", slvcount, date());
505 htp = self.l_fwd;
506 length = 1;
507 for (i = 1; i <= slvcount; i++, htp = htp->l_fwd) {
508 l = strlen(htp->name) + 1;
509 if (length+l >= 80) {
510 fprintf(fd, "\n");
511 length = 0;
513 length += l;
514 fprintf(fd, " %s", htp->name);
516 fprintf(fd, "\n");
520 static struct hosttbl *newhost_hash;
521 static struct hosttbl *lasthfree = &hosttbl[0];
524 struct hosttbl * /* answer or 0 */
525 findhost(char *name)
527 int i, j;
528 struct hosttbl *htp;
529 char *p;
531 j= 0;
532 for (p = name, i = 0; i < 8 && *p != '\0'; i++, p++)
533 j = (j << 2) ^ *p;
534 newhost_hash = &hosttbl[j % NHOSTS];
536 htp = newhost_hash;
537 if (htp->name[0] == '\0')
538 return(0);
539 do {
540 if (!strcmp(name, htp->name))
541 return(htp);
542 htp = htp->h_fwd;
543 } while (htp != newhost_hash);
544 return(0);
548 * add a host to the list of controlled machines if not already there
550 struct hosttbl *
551 addmach(char *name, struct sockaddr_in *addr, struct netinfo *ntp)
553 struct hosttbl *ret, *p, *b, *f;
555 ret = findhost(name);
556 if (ret == NULL) {
557 if (slvcount >= NHOSTS) {
558 if (trace) {
559 fprintf(fd, "no more slots in host table\n");
560 prthp(CLK_TCK);
562 syslog(LOG_ERR, "no more slots in host table");
563 Mflag = 0;
564 longjmp(jmpenv, 2); /* give up and be a slave */
567 /* if our home hash slot is occupied, find a free entry
568 * in the hash table
570 if (newhost_hash->name[0] != '\0') {
571 do {
572 ret = lasthfree;
573 if (++lasthfree > &hosttbl[NHOSTS])
574 lasthfree = &hosttbl[1];
575 } while (ret->name[0] != '\0');
577 if (!newhost_hash->head) {
578 /* Move an interloper using our home. Use
579 * scratch pointers in case the new head is
580 * pointing to itself.
582 f = newhost_hash->h_fwd;
583 b = newhost_hash->h_bak;
584 f->h_bak = ret;
585 b->h_fwd = ret;
586 f = newhost_hash->l_fwd;
587 b = newhost_hash->l_bak;
588 f->l_bak = ret;
589 b->l_fwd = ret;
590 bcopy(newhost_hash,ret,sizeof(*ret));
591 ret = newhost_hash;
592 ret->head = 1;
593 ret->h_fwd = ret;
594 ret->h_bak = ret;
595 } else {
596 /* link to an existing chain in our home
598 ret->head = 0;
599 p = newhost_hash->h_bak;
600 ret->h_fwd = newhost_hash;
601 ret->h_bak = p;
602 p->h_fwd = ret;
603 newhost_hash->h_bak = ret;
605 } else {
606 ret = newhost_hash;
607 ret->head = 1;
608 ret->h_fwd = ret;
609 ret->h_bak = ret;
611 ret->addr = *addr;
612 ret->ntp = ntp;
613 strlcpy(ret->name, name, sizeof(ret->name));
614 ret->good = good_host_name(name);
615 ret->l_fwd = &self;
616 ret->l_bak = self.l_bak;
617 self.l_bak->l_fwd = ret;
618 self.l_bak = ret;
619 slvcount++;
621 ret->noanswer = 0;
622 ret->need_set = 1;
624 } else {
625 ret->noanswer = (ret->noanswer != 0);
628 /* need to clear sequence number anyhow */
629 ret->seq = 0;
630 return(ret);
634 * remove the machine with the given index in the host table.
636 struct hosttbl *
637 remmach(struct hosttbl *htp)
639 struct hosttbl *lprv, *hnxt, *f, *b;
641 if (trace)
642 fprintf(fd, "remove %s\n", htp->name);
644 /* get out of the lists */
645 htp->l_fwd->l_bak = lprv = htp->l_bak;
646 htp->l_bak->l_fwd = htp->l_fwd;
647 htp->h_fwd->h_bak = htp->h_bak;
648 htp->h_bak->h_fwd = hnxt = htp->h_fwd;
650 /* If we are in the home slot, pull up the chain */
651 if (htp->head && hnxt != htp) {
652 if (lprv == hnxt)
653 lprv = htp;
655 /* Use scratch pointers in case the new head is pointing to
656 * itself.
658 f = hnxt->h_fwd;
659 b = hnxt->h_bak;
660 f->h_bak = htp;
661 b->h_fwd = htp;
662 f = hnxt->l_fwd;
663 b = hnxt->l_bak;
664 f->l_bak = htp;
665 b->l_fwd = htp;
666 hnxt->head = 1;
667 bcopy(hnxt, htp, sizeof(*htp));
668 lasthfree = hnxt;
669 } else {
670 lasthfree = htp;
673 lasthfree->name[0] = '\0';
674 lasthfree->h_fwd = 0;
675 lasthfree->l_fwd = 0;
676 slvcount--;
678 return lprv;
683 * Remove all the machines from the host table that exist on the given
684 * network. This is called when a master transitions to a slave on a
685 * given network.
687 void
688 rmnetmachs(struct netinfo *ntp)
690 struct hosttbl *htp;
692 if (trace)
693 prthp(CLK_TCK);
694 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
695 if (ntp == htp->ntp)
696 htp = remmach(htp);
698 if (trace)
699 prthp(CLK_TCK);
702 void
703 masterup(struct netinfo *net)
706 xmit(TSP_MASTERUP, 0, &net->dest_addr);
709 * Do not tell new slaves our time for a while. This ensures
710 * we do not tell them to start using our time, before we have
711 * found a good master.
713 gettimeofday(&net->slvwait, 0);
716 void
717 newslave(struct tsp *msg)
719 struct hosttbl *htp;
720 struct tsp *answer, to;
721 struct timeval now;
723 if (!fromnet || fromnet->status != MASTER)
724 return;
726 htp = addmach(msg->tsp_name, &from,fromnet);
727 htp->seq = msg->tsp_seq;
728 if (trace)
729 prthp(0);
732 * If we are stable, send our time to the slave.
733 * Do not go crazy if the date has been changed.
735 gettimeofday(&now, 0);
736 if (now.tv_sec >= fromnet->slvwait.tv_sec+3
737 || now.tv_sec < fromnet->slvwait.tv_sec) {
738 to.tsp_type = TSP_SETTIME;
739 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
740 gettimeofday(&to.tsp_time, 0);
741 answer = acksend(&to, &htp->addr,
742 htp->name, TSP_ACK,
743 0, htp->noanswer);
744 if (answer) {
745 htp->need_set = 0;
746 } else {
747 syslog(LOG_WARNING,
748 "no reply to initial SETTIME from %s",
749 htp->name);
750 htp->noanswer = LOSTHOST;
757 * react to a TSP_QUIT:
759 void
760 doquit(struct tsp *msg)
763 if (fromnet->status == MASTER) {
764 if (!good_host_name(msg->tsp_name)) {
765 if (fromnet->quit_count <= 0) {
766 syslog(LOG_NOTICE,"untrusted %s told us QUIT",
767 msg->tsp_name);
768 suppress(&from, msg->tsp_name, fromnet);
769 fromnet->quit_count = 1;
770 return;
772 syslog(LOG_NOTICE, "untrusted %s told us QUIT twice",
773 msg->tsp_name);
774 fromnet->quit_count = 2;
775 fromnet->status = NOMASTER;
776 } else {
777 fromnet->status = SLAVE;
779 rmnetmachs(fromnet);
780 longjmp(jmpenv, 2); /* give up and be a slave */
782 } else {
783 if (!good_host_name(msg->tsp_name)) {
784 syslog(LOG_NOTICE, "untrusted %s told us QUIT",
785 msg->tsp_name);
786 fromnet->quit_count = 2;
791 void
792 traceon(void)
795 if (!fd) {
796 fd = fopen(_PATH_TIMEDLOG, "w");
797 if (!fd) {
798 trace = 0;
799 return;
801 fprintf(fd,"Tracing started at %s\n", date());
803 trace = 1;
804 get_goodgroup(1);
805 setstatus();
806 prthp(CLK_TCK);
810 void
811 traceoff(char *msg)
814 get_goodgroup(1);
815 setstatus();
816 prthp(CLK_TCK);
817 if (trace) {
818 fprintf(fd, msg, date());
819 fclose(fd);
820 fd = 0;
822 #ifdef GPROF
823 moncontrol(0);
824 _mcleanup();
825 moncontrol(1);
826 #endif
827 trace = OFF;