2 * Copyright (c) 1985, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * @(#)master.c 8.1 (Berkeley) 6/6/93
30 * $FreeBSD: src/usr.sbin/timed/timed/master.c,v 1.6 1999/08/28 01:20:17 peter Exp $
35 #include <sys/types.h>
36 #include <sys/times.h>
39 #include "pathnames.h"
41 extern int measure_delta
;
42 extern jmp_buf jmpenv
;
47 static int slvcount
; /* slaves listening to our clock */
49 static void mchgdate(struct tsp
*);
52 * The main function of `master' is to periodically compute the differences
53 * (deltas) between its clock and the clocks of the slaves, to compute the
54 * network average delta, and to send to the slaves the differences between
55 * their individual deltas and the network delta.
56 * While waiting, it receives messages from the slaves (i.e. requests for
57 * master's name, remote requests to set the network time, ...), and
58 * takes the appropriate action.
67 struct timeval wait
, ntime
;
69 struct tsp
*msg
, *answer
, to
;
71 struct sockaddr_in taddr
;
72 char tname
[MAXHOSTNAMELEN
];
76 syslog(LOG_NOTICE
, "This machine is master");
78 fprintf(fd
, "This machine is master\n");
79 for (ntp
= nettab
; ntp
!= NULL
; ntp
= ntp
->next
) {
80 if (ntp
->status
== MASTER
)
83 gettimeofday(&ntime
, 0);
84 pollingtime
= ntime
.tv_sec
+3;
90 /* Process all outstanding messages before spending the long time necessary
91 * to update all timers.
94 gettimeofday(&ntime
, 0);
95 wait
.tv_sec
= pollingtime
- ntime
.tv_sec
;
99 msg
= readmsg(TSP_ANY
, ANYADDR
, &wait
, 0);
101 gettimeofday(&ntime
, 0);
102 if (ntime
.tv_sec
>= pollingtime
) {
103 pollingtime
= ntime
.tv_sec
+ SAMPLEINTVL
;
106 /* If a bogus master told us to quit, we can have decided to ignore a
107 * network. Therefore, periodically try to take over everything.
109 polls
= (polls
+ 1) % POLLRATE
;
110 if (0 == polls
&& nignorednets
> 0) {
111 trace_msg("Looking for nets to re-master\n");
112 for (ntp
= nettab
; ntp
; ntp
= ntp
->next
) {
113 if (ntp
->status
== IGNORE
114 || ntp
->status
== NOMASTER
) {
116 if (ntp
->status
== MASTER
) {
121 if (ntp
->status
== MASTER
122 && --ntp
->quit_count
< 0)
131 for (ntp
= nettab
; ntp
!= NULL
; ntp
= ntp
->next
) {
132 to
.tsp_type
= TSP_LOOP
;
133 to
.tsp_vers
= TSPVERSION
;
134 to
.tsp_seq
= sequence
++;
135 to
.tsp_hopcnt
= MAX_HOPCNT
;
136 strlcpy(to
.tsp_name
, hostname
,
137 sizeof(to
.tsp_name
));
139 if (sendto(sock
, (char *)&to
,
140 sizeof(struct tsp
), 0,
141 (struct sockaddr
*)&ntp
->dest_addr
,
142 sizeof(ntp
->dest_addr
)) < 0) {
143 trace_sendto_err(ntp
->dest_addr
.sin_addr
);
150 switch (msg
->tsp_type
) {
161 * XXX check to see it is from ourself
163 tsp_time_sec
= msg
->tsp_time
.tv_sec
;
164 strlcpy(newdate
, ctime(&tsp_time_sec
), sizeof(newdate
));
165 if (!good_host_name(msg
->tsp_name
)) {
167 "attempted date change by %s to %s",
168 msg
->tsp_name
, newdate
);
174 gettimeofday(&ntime
, 0);
175 pollingtime
= ntime
.tv_sec
+ SAMPLEINTVL
;
179 if (!fromnet
|| fromnet
->status
!= MASTER
)
181 tsp_time_sec
= msg
->tsp_time
.tv_sec
;
182 strlcpy(newdate
, ctime(&tsp_time_sec
), sizeof(newdate
));
183 htp
= findhost(msg
->tsp_name
);
186 "attempted SET DATEREQ by uncontrolled %s to %s",
187 msg
->tsp_name
, newdate
);
190 if (htp
->seq
== msg
->tsp_seq
)
192 htp
->seq
= msg
->tsp_seq
;
195 "attempted SET DATEREQ by untrusted %s to %s",
196 msg
->tsp_name
, newdate
);
202 gettimeofday(&ntime
, 0);
203 pollingtime
= ntime
.tv_sec
+ SAMPLEINTVL
;
207 xmit(TSP_ACK
, msg
->tsp_seq
, &from
);
218 traceoff("Tracing ended at %s\n");
224 if (fromnet
->status
== MASTER
) {
226 addmach(msg
->tsp_name
, &from
,fromnet
);
229 strlcpy(tname
, msg
->tsp_name
, sizeof(tname
));
230 to
.tsp_type
= TSP_QUIT
;
231 strlcpy(to
.tsp_name
, hostname
, sizeof(to
.tsp_name
));
232 answer
= acksend(&to
, &taddr
, tname
,
234 if (answer
== NULL
) {
235 syslog(LOG_ERR
, "election error by %s",
242 * After a network partition, there can be
243 * more than one master: the first slave to
244 * come up will notify here the situation.
246 if (!fromnet
|| fromnet
->status
!= MASTER
)
248 strlcpy(to
.tsp_name
, hostname
, sizeof(to
.tsp_name
));
250 /* The other master often gets into the same state,
251 * with boring results if we stay at it forever.
253 ntp
= fromnet
; /* (acksend() can leave fromnet=0 */
254 for (i
= 0; i
< 3; i
++) {
255 to
.tsp_type
= TSP_RESOLVE
;
256 strlcpy(to
.tsp_name
, hostname
,
257 sizeof(to
.tsp_name
));
258 answer
= acksend(&to
, &ntp
->dest_addr
,
259 ANYADDR
, TSP_MASTERACK
,
263 htp
= addmach(answer
->tsp_name
,&from
,ntp
);
264 to
.tsp_type
= TSP_QUIT
;
265 msg
= acksend(&to
, &htp
->addr
, htp
->name
,
266 TSP_ACK
, 0, htp
->noanswer
);
269 "no response from %s to CONFLICT-QUIT",
278 if (!fromnet
|| fromnet
->status
!= MASTER
)
281 * do not want to call synch() while waiting
284 gettimeofday(&ntime
, NULL
);
285 pollingtime
= ntime
.tv_sec
+ SAMPLEINTVL
;
289 doquit(msg
); /* become a slave */
293 if (!fromnet
|| fromnet
->status
!= MASTER
294 || !strcmp(msg
->tsp_name
, hostname
))
297 * We should not have received this from a net
298 * we are master on. There must be two masters.
300 htp
= addmach(msg
->tsp_name
, &from
,fromnet
);
301 to
.tsp_type
= TSP_QUIT
;
302 strlcpy(to
.tsp_name
, hostname
, sizeof(to
.tsp_name
));
303 answer
= acksend(&to
, &htp
->addr
, htp
->name
,
307 "loop breakage: no reply from %s=%s to QUIT",
308 htp
->name
, inet_ntoa(htp
->addr
.sin_addr
));
315 "\tnets = %d, masters = %d, slaves = %d, ignored = %d\n",
316 nnets
, nmasternets
, nslavenets
, nignorednets
);
325 fprintf(fd
, "garbage message: ");
336 * change the system date on the master
339 mchgdate(struct tsp
*msg
)
341 char tname
[MAXHOSTNAMELEN
];
343 struct timeval otime
, ntime
;
345 strlcpy(tname
, msg
->tsp_name
, sizeof(tname
));
347 xmit(TSP_DATEACK
, msg
->tsp_seq
, &from
);
349 strlcpy(olddate
, date(), sizeof(olddate
));
351 /* adjust time for residence on the queue */
352 gettimeofday(&otime
, 0);
353 adj_msg_time(msg
,&otime
);
355 timevalsub(&ntime
, &msg
->tsp_time
, &otime
);
356 if (ntime
.tv_sec
< MAXADJ
&& ntime
.tv_sec
> -MAXADJ
) {
358 * do not change the clock if we can adjust it
361 synch(tvtomsround(ntime
));
363 logwtmp("|", "date", "");
364 settimeofday(&msg
->tsp_time
, 0);
365 logwtmp("{", "date", "");
369 syslog(LOG_NOTICE
, "date changed by %s from %s",
375 * synchronize all of the slaves
382 struct timeval check
, stop
, wait
;
386 fprintf(fd
, "measurements starting at %s\n", date());
387 gettimeofday(&check
, 0);
388 for (htp
= self
.l_fwd
; htp
!= &self
; htp
= htp
->l_fwd
) {
389 if (htp
->noanswer
!= 0) {
390 measure_status
= measure(500, 100,
394 measure_status
= measure(3000, 100,
398 if (measure_status
!= GOOD
) {
399 /* The slave did not respond. We have
400 * just wasted lots of time on it.
402 htp
->delta
= HOSTDOWN
;
403 if (++htp
->noanswer
>= LOSTHOST
) {
406 "purging %s for not answering ICMP\n",
413 htp
->delta
= measure_delta
;
415 gettimeofday(&stop
, 0);
416 timevalsub(&stop
, &stop
, &check
);
417 if (stop
.tv_sec
>= 1) {
421 * ack messages periodically
425 if (0 != readmsg(TSP_TRACEON
,ANYADDR
,
428 gettimeofday(&check
, 0);
432 fprintf(fd
, "measurements finished at %s\n", date());
434 if (!(status
& SLAVE
)) {
436 mydelta
= networkdelta();
441 if (trace
&& (mydelta
!= 0 || (status
& SLAVE
)))
442 fprintf(fd
,"local correction of %ld ms.\n", mydelta
);
447 * sends the time to each slave after the master
448 * has received the command to set the network time
457 /* Do not listen to the consensus after forcing the time. This is because
458 * the consensus takes a while to reach the time we are dictating.
461 for (htp
= self
.l_fwd
; htp
!= &self
; htp
= htp
->l_fwd
) {
462 to
.tsp_type
= TSP_SETTIME
;
463 strlcpy(to
.tsp_name
, hostname
, sizeof(to
.tsp_name
));
464 gettimeofday(&to
.tsp_time
, 0);
465 answer
= acksend(&to
, &htp
->addr
, htp
->name
,
466 TSP_ACK
, 0, htp
->noanswer
);
467 if (answer
== NULL
) {
468 /* We client does not respond, then we have
469 * just wasted lots of time on it.
472 "no reply to SETTIME from %s", htp
->name
);
473 if (++htp
->noanswer
>= LOSTHOST
) {
476 "purging %s for not answering",
489 static time_t next_time
;
496 if (!fd
) /* quit if tracing already off */
499 this_time
= times(&tm
);
500 if (this_time
+ (time_t)delta
< next_time
)
502 next_time
= this_time
+ CLK_TCK
;
504 fprintf(fd
, "host table: %d entries at %s\n", slvcount
, date());
507 for (i
= 1; i
<= slvcount
; i
++, htp
= htp
->l_fwd
) {
508 l
= strlen(htp
->name
) + 1;
509 if (length
+l
>= 80) {
514 fprintf(fd
, " %s", htp
->name
);
520 static struct hosttbl
*newhost_hash
;
521 static struct hosttbl
*lasthfree
= &hosttbl
[0];
524 struct hosttbl
* /* answer or 0 */
532 for (p
= name
, i
= 0; i
< 8 && *p
!= '\0'; i
++, p
++)
534 newhost_hash
= &hosttbl
[j
% NHOSTS
];
537 if (htp
->name
[0] == '\0')
540 if (!strcmp(name
, htp
->name
))
543 } while (htp
!= newhost_hash
);
548 * add a host to the list of controlled machines if not already there
551 addmach(char *name
, struct sockaddr_in
*addr
, struct netinfo
*ntp
)
553 struct hosttbl
*ret
, *p
, *b
, *f
;
555 ret
= findhost(name
);
557 if (slvcount
>= NHOSTS
) {
559 fprintf(fd
, "no more slots in host table\n");
562 syslog(LOG_ERR
, "no more slots in host table");
564 longjmp(jmpenv
, 2); /* give up and be a slave */
567 /* if our home hash slot is occupied, find a free entry
570 if (newhost_hash
->name
[0] != '\0') {
573 if (++lasthfree
> &hosttbl
[NHOSTS
])
574 lasthfree
= &hosttbl
[1];
575 } while (ret
->name
[0] != '\0');
577 if (!newhost_hash
->head
) {
578 /* Move an interloper using our home. Use
579 * scratch pointers in case the new head is
580 * pointing to itself.
582 f
= newhost_hash
->h_fwd
;
583 b
= newhost_hash
->h_bak
;
586 f
= newhost_hash
->l_fwd
;
587 b
= newhost_hash
->l_bak
;
590 bcopy(newhost_hash
,ret
,sizeof(*ret
));
596 /* link to an existing chain in our home
599 p
= newhost_hash
->h_bak
;
600 ret
->h_fwd
= newhost_hash
;
603 newhost_hash
->h_bak
= ret
;
613 strlcpy(ret
->name
, name
, sizeof(ret
->name
));
614 ret
->good
= good_host_name(name
);
616 ret
->l_bak
= self
.l_bak
;
617 self
.l_bak
->l_fwd
= ret
;
625 ret
->noanswer
= (ret
->noanswer
!= 0);
628 /* need to clear sequence number anyhow */
634 * remove the machine with the given index in the host table.
637 remmach(struct hosttbl
*htp
)
639 struct hosttbl
*lprv
, *hnxt
, *f
, *b
;
642 fprintf(fd
, "remove %s\n", htp
->name
);
644 /* get out of the lists */
645 htp
->l_fwd
->l_bak
= lprv
= htp
->l_bak
;
646 htp
->l_bak
->l_fwd
= htp
->l_fwd
;
647 htp
->h_fwd
->h_bak
= htp
->h_bak
;
648 htp
->h_bak
->h_fwd
= hnxt
= htp
->h_fwd
;
650 /* If we are in the home slot, pull up the chain */
651 if (htp
->head
&& hnxt
!= htp
) {
655 /* Use scratch pointers in case the new head is pointing to
667 bcopy(hnxt
, htp
, sizeof(*htp
));
673 lasthfree
->name
[0] = '\0';
674 lasthfree
->h_fwd
= 0;
675 lasthfree
->l_fwd
= 0;
683 * Remove all the machines from the host table that exist on the given
684 * network. This is called when a master transitions to a slave on a
688 rmnetmachs(struct netinfo
*ntp
)
694 for (htp
= self
.l_fwd
; htp
!= &self
; htp
= htp
->l_fwd
) {
703 masterup(struct netinfo
*net
)
706 xmit(TSP_MASTERUP
, 0, &net
->dest_addr
);
709 * Do not tell new slaves our time for a while. This ensures
710 * we do not tell them to start using our time, before we have
711 * found a good master.
713 gettimeofday(&net
->slvwait
, 0);
717 newslave(struct tsp
*msg
)
720 struct tsp
*answer
, to
;
723 if (!fromnet
|| fromnet
->status
!= MASTER
)
726 htp
= addmach(msg
->tsp_name
, &from
,fromnet
);
727 htp
->seq
= msg
->tsp_seq
;
732 * If we are stable, send our time to the slave.
733 * Do not go crazy if the date has been changed.
735 gettimeofday(&now
, 0);
736 if (now
.tv_sec
>= fromnet
->slvwait
.tv_sec
+3
737 || now
.tv_sec
< fromnet
->slvwait
.tv_sec
) {
738 to
.tsp_type
= TSP_SETTIME
;
739 strlcpy(to
.tsp_name
, hostname
, sizeof(to
.tsp_name
));
740 gettimeofday(&to
.tsp_time
, 0);
741 answer
= acksend(&to
, &htp
->addr
,
748 "no reply to initial SETTIME from %s",
750 htp
->noanswer
= LOSTHOST
;
757 * react to a TSP_QUIT:
760 doquit(struct tsp
*msg
)
763 if (fromnet
->status
== MASTER
) {
764 if (!good_host_name(msg
->tsp_name
)) {
765 if (fromnet
->quit_count
<= 0) {
766 syslog(LOG_NOTICE
,"untrusted %s told us QUIT",
768 suppress(&from
, msg
->tsp_name
, fromnet
);
769 fromnet
->quit_count
= 1;
772 syslog(LOG_NOTICE
, "untrusted %s told us QUIT twice",
774 fromnet
->quit_count
= 2;
775 fromnet
->status
= NOMASTER
;
777 fromnet
->status
= SLAVE
;
780 longjmp(jmpenv
, 2); /* give up and be a slave */
783 if (!good_host_name(msg
->tsp_name
)) {
784 syslog(LOG_NOTICE
, "untrusted %s told us QUIT",
786 fromnet
->quit_count
= 2;
796 fd
= fopen(_PATH_TIMEDLOG
, "w");
801 fprintf(fd
,"Tracing started at %s\n", date());
818 fprintf(fd
, msg
, date());