iscontrol(8): Fix synopsis, sync usage() & improve markup
[dragonfly.git] / sbin / routed / table.c
blobb6b175c833a1d93bf50aac25c8941949833cf22c
1 /*
2 * Copyright (c) 1983, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgment:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
33 * $FreeBSD: src/sbin/routed/table.c,v 1.9.2.2 2000/08/14 17:00:04 sheldonh Exp $
34 * $DragonFly: src/sbin/routed/table.c,v 1.6 2005/03/16 21:21:34 cpressey Exp $
37 #include "defs.h"
39 #if !defined(sgi) && !defined(__NetBSD__)
40 static char sccsid[] __attribute__((unused)) = "@(#)tables.c 8.1 (Berkeley) 6/5/93";
41 #elif defined(__NetBSD__)
42 __RCSID("$NetBSD$");
43 #endif
45 static struct rt_spare *rts_better(struct rt_entry *);
46 static struct rt_spare rts_empty = {0,0,0,HOPCNT_INFINITY,0,0,0};
47 static void set_need_flash(void);
48 #ifdef _HAVE_SIN_LEN
49 static void masktrim(struct sockaddr_in *ap);
50 #else
51 static void masktrim(struct sockaddr_in_new *ap);
52 #endif
55 struct radix_node_head *rhead; /* root of the radix tree */
57 int need_flash = 1; /* flash update needed
58 * start =1 to suppress the 1st
61 struct timeval age_timer; /* next check of old routes */
62 struct timeval need_kern = { /* need to update kernel table */
63 EPOCH+MIN_WAITTIME-1, 0
66 int stopint;
68 int total_routes;
70 /* zap any old routes through this gateway */
71 naddr age_bad_gate;
74 /* It is desirable to "aggregate" routes, to combine differing routes of
75 * the same metric and next hop into a common route with a smaller netmask
76 * or to suppress redundant routes, routes that add no information to
77 * routes with smaller netmasks.
79 * A route is redundant if and only if any and all routes with smaller
80 * but matching netmasks and nets are the same. Since routes are
81 * kept sorted in the radix tree, redundant routes always come second.
83 * There are two kinds of aggregations. First, two routes of the same bit
84 * mask and differing only in the least significant bit of the network
85 * number can be combined into a single route with a coarser mask.
87 * Second, a route can be suppressed in favor of another route with a more
88 * coarse mask provided no incompatible routes with intermediate masks
89 * are present. The second kind of aggregation involves suppressing routes.
90 * A route must not be suppressed if an incompatible route exists with
91 * an intermediate mask, since the suppressed route would be covered
92 * by the intermediate.
94 * This code relies on the radix tree walk encountering routes
95 * sorted first by address, with the smallest address first.
98 struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
100 /* #define DEBUG_AG */
101 #ifdef DEBUG_AG
102 #define CHECK_AG() {int acnt = 0; struct ag_info *cag; \
103 for (cag = ag_avail; cag != 0; cag = cag->ag_fine) \
104 acnt++; \
105 for (cag = ag_corsest; cag != 0; cag = cag->ag_fine) \
106 acnt++; \
107 if (acnt != NUM_AG_SLOTS) { \
108 fflush(stderr); \
109 abort(); \
112 #else
113 #define CHECK_AG()
114 #endif
117 /* Output the contents of an aggregation table slot.
118 * This function must always be immediately followed with the deletion
119 * of the target slot.
121 static void
122 ag_out(struct ag_info *ag,
123 void (*out)(struct ag_info *))
125 struct ag_info *ag_cors;
126 naddr bit;
129 /* Forget it if this route should not be output for split-horizon. */
130 if (ag->ag_state & AGS_SPLIT_HZ)
131 return;
133 /* If we output both the even and odd twins, then the immediate parent,
134 * if it is present, is redundant, unless the parent manages to
135 * aggregate into something coarser.
136 * On successive calls, this code detects the even and odd twins,
137 * and marks the parent.
139 * Note that the order in which the radix tree code emits routes
140 * ensures that the twins are seen before the parent is emitted.
142 ag_cors = ag->ag_cors;
143 if (ag_cors != 0
144 && ag_cors->ag_mask == ag->ag_mask<<1
145 && ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
146 ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
147 ? AGS_REDUN0
148 : AGS_REDUN1);
151 /* Skip it if this route is itself redundant.
153 * It is ok to change the contents of the slot here, since it is
154 * always deleted next.
156 if (ag->ag_state & AGS_REDUN0) {
157 if (ag->ag_state & AGS_REDUN1)
158 return; /* quit if fully redundant */
159 /* make it finer if it is half-redundant */
160 bit = (-ag->ag_mask) >> 1;
161 ag->ag_dst_h |= bit;
162 ag->ag_mask |= bit;
164 } else if (ag->ag_state & AGS_REDUN1) {
165 /* make it finer if it is half-redundant */
166 bit = (-ag->ag_mask) >> 1;
167 ag->ag_mask |= bit;
169 out(ag);
173 static void
174 ag_del(struct ag_info *ag)
176 CHECK_AG();
178 if (ag->ag_cors == 0)
179 ag_corsest = ag->ag_fine;
180 else
181 ag->ag_cors->ag_fine = ag->ag_fine;
183 if (ag->ag_fine == 0)
184 ag_finest = ag->ag_cors;
185 else
186 ag->ag_fine->ag_cors = ag->ag_cors;
188 ag->ag_fine = ag_avail;
189 ag_avail = ag;
191 CHECK_AG();
195 /* Flush routes waiting for aggregation.
196 * This must not suppress a route unless it is known that among all
197 * routes with coarser masks that match it, the one with the longest
198 * mask is appropriate. This is ensured by scanning the routes
199 * in lexical order, and with the most restrictive mask first
200 * among routes to the same destination.
202 void
203 ag_flush(naddr lim_dst_h, /* flush routes to here */
204 naddr lim_mask, /* matching this mask */
205 void (*out)(struct ag_info *))
207 struct ag_info *ag, *ag_cors;
208 naddr dst_h;
211 for (ag = ag_finest;
212 ag != 0 && ag->ag_mask >= lim_mask;
213 ag = ag_cors) {
214 ag_cors = ag->ag_cors;
216 /* work on only the specified routes */
217 dst_h = ag->ag_dst_h;
218 if ((dst_h & lim_mask) != lim_dst_h)
219 continue;
221 if (!(ag->ag_state & AGS_SUPPRESS))
222 ag_out(ag, out);
224 else for ( ; ; ag_cors = ag_cors->ag_cors) {
225 /* Look for a route that can suppress the
226 * current route */
227 if (ag_cors == 0) {
228 /* failed, so output it and look for
229 * another route to work on
231 ag_out(ag, out);
232 break;
235 if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
236 /* We found a route with a coarser mask that
237 * aggregates the current target.
239 * If it has a different next hop, it
240 * cannot replace the target, so output
241 * the target.
243 if (ag->ag_gate != ag_cors->ag_gate
244 && !(ag->ag_state & AGS_FINE_GATE)
245 && !(ag_cors->ag_state & AGS_CORS_GATE)) {
246 ag_out(ag, out);
247 break;
250 /* If the coarse route has a good enough
251 * metric, it suppresses the target.
252 * If the suppressed target was redundant,
253 * then mark the suppressor redundant.
255 if (ag_cors->ag_pref <= ag->ag_pref) {
256 if (ag_cors->ag_seqno > ag->ag_seqno)
257 ag_cors->ag_seqno = ag->ag_seqno;
258 if (AG_IS_REDUN(ag->ag_state)
259 && ag_cors->ag_mask==ag->ag_mask<<1) {
260 if (ag_cors->ag_dst_h == dst_h)
261 ag_cors->ag_state |= AGS_REDUN0;
262 else
263 ag_cors->ag_state |= AGS_REDUN1;
265 if (ag->ag_tag != ag_cors->ag_tag)
266 ag_cors->ag_tag = 0;
267 if (ag->ag_nhop != ag_cors->ag_nhop)
268 ag_cors->ag_nhop = 0;
269 break;
274 /* That route has either been output or suppressed */
275 ag_cors = ag->ag_cors;
276 ag_del(ag);
279 CHECK_AG();
283 /* Try to aggregate a route with previous routes.
285 void
286 ag_check(naddr dst,
287 naddr mask,
288 naddr gate,
289 naddr nhop,
290 char metric,
291 char pref,
292 u_int seqnum,
293 u_short tag,
294 u_short state,
295 void (*out)(struct ag_info *)) /* output using this */
297 struct ag_info *ag, *nag, *ag_cors;
298 naddr xaddr;
299 int x;
301 dst = ntohl(dst);
303 /* Punt non-contiguous subnet masks.
305 * (X & -X) contains a single bit if and only if X is a power of 2.
306 * (X + (X & -X)) == 0 if and only if X is a power of 2.
308 if ((mask & -mask) + mask != 0) {
309 struct ag_info nc_ag;
311 nc_ag.ag_dst_h = dst;
312 nc_ag.ag_mask = mask;
313 nc_ag.ag_gate = gate;
314 nc_ag.ag_nhop = nhop;
315 nc_ag.ag_metric = metric;
316 nc_ag.ag_pref = pref;
317 nc_ag.ag_tag = tag;
318 nc_ag.ag_state = state;
319 nc_ag.ag_seqno = seqnum;
320 out(&nc_ag);
321 return;
324 /* Search for the right slot in the aggregation table.
326 ag_cors = 0;
327 ag = ag_corsest;
328 while (ag != 0) {
329 if (ag->ag_mask >= mask)
330 break;
332 /* Suppress old routes (i.e. combine with compatible routes
333 * with coarser masks) as we look for the right slot in the
334 * aggregation table for the new route.
335 * A route to an address less than the current destination
336 * will not be affected by the current route or any route
337 * seen hereafter. That means it is safe to suppress it.
338 * This check keeps poor routes (e.g. with large hop counts)
339 * from preventing suppression of finer routes.
341 if (ag_cors != 0
342 && ag->ag_dst_h < dst
343 && (ag->ag_state & AGS_SUPPRESS)
344 && ag_cors->ag_pref <= ag->ag_pref
345 && (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
346 && (ag_cors->ag_gate == ag->ag_gate
347 || (ag->ag_state & AGS_FINE_GATE)
348 || (ag_cors->ag_state & AGS_CORS_GATE))) {
349 if (ag_cors->ag_seqno > ag->ag_seqno)
350 ag_cors->ag_seqno = ag->ag_seqno;
351 /* If the suppressed target was redundant,
352 * then mark the suppressor redundant.
354 if (AG_IS_REDUN(ag->ag_state)
355 && ag_cors->ag_mask == ag->ag_mask<<1) {
356 if (ag_cors->ag_dst_h == dst)
357 ag_cors->ag_state |= AGS_REDUN0;
358 else
359 ag_cors->ag_state |= AGS_REDUN1;
361 if (ag->ag_tag != ag_cors->ag_tag)
362 ag_cors->ag_tag = 0;
363 if (ag->ag_nhop != ag_cors->ag_nhop)
364 ag_cors->ag_nhop = 0;
365 ag_del(ag);
366 CHECK_AG();
367 } else {
368 ag_cors = ag;
370 ag = ag_cors->ag_fine;
373 /* If we find the even/odd twin of the new route, and if the
374 * masks and so forth are equal, we can aggregate them.
375 * We can probably promote one of the pair.
377 * Since the routes are encountered in lexical order,
378 * the new route must be odd. However, the second or later
379 * times around this loop, it could be the even twin promoted
380 * from the even/odd pair of twins of the finer route.
382 while (ag != 0
383 && ag->ag_mask == mask
384 && ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
386 /* Here we know the target route and the route in the current
387 * slot have the same netmasks and differ by at most the
388 * last bit. They are either for the same destination, or
389 * for an even/odd pair of destinations.
391 if (ag->ag_dst_h == dst) {
392 /* We have two routes to the same destination.
393 * Routes are encountered in lexical order, so a
394 * route is never promoted until the parent route is
395 * already present. So we know that the new route is
396 * a promoted (or aggregated) pair and the route
397 * already in the slot is the explicit route.
399 * Prefer the best route if their metrics differ,
400 * or the aggregated one if not, following a sort
401 * of longest-match rule.
403 if (pref <= ag->ag_pref) {
404 ag->ag_gate = gate;
405 ag->ag_nhop = nhop;
406 ag->ag_tag = tag;
407 ag->ag_metric = metric;
408 ag->ag_pref = pref;
409 x = ag->ag_state;
410 ag->ag_state = state;
411 state = x;
414 /* The sequence number controls flash updating,
415 * and should be the smaller of the two.
417 if (ag->ag_seqno > seqnum)
418 ag->ag_seqno = seqnum;
420 /* Some bits are set if they are set on either route,
421 * except when the route is for an interface.
423 if (!(ag->ag_state & AGS_IF))
424 ag->ag_state |= (state & (AGS_AGGREGATE_EITHER
425 | AGS_REDUN0
426 | AGS_REDUN1));
427 return;
430 /* If one of the routes can be promoted and the other can
431 * be suppressed, it may be possible to combine them or
432 * worthwhile to promote one.
434 * Any route that can be promoted is always
435 * marked to be eligible to be suppressed.
437 if (!((state & AGS_AGGREGATE)
438 && (ag->ag_state & AGS_SUPPRESS))
439 && !((ag->ag_state & AGS_AGGREGATE)
440 && (state & AGS_SUPPRESS)))
441 break;
443 /* A pair of even/odd twin routes can be combined
444 * if either is redundant, or if they are via the
445 * same gateway and have the same metric.
447 if (AG_IS_REDUN(ag->ag_state)
448 || AG_IS_REDUN(state)
449 || (ag->ag_gate == gate
450 && ag->ag_pref == pref
451 && (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
453 /* We have both the even and odd pairs.
454 * Since the routes are encountered in order,
455 * the route in the slot must be the even twin.
457 * Combine and promote (aggregate) the pair of routes.
459 if (seqnum > ag->ag_seqno)
460 seqnum = ag->ag_seqno;
461 if (!AG_IS_REDUN(state))
462 state &= ~AGS_REDUN1;
463 if (AG_IS_REDUN(ag->ag_state))
464 state |= AGS_REDUN0;
465 else
466 state &= ~AGS_REDUN0;
467 state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
468 if (ag->ag_tag != tag)
469 tag = 0;
470 if (ag->ag_nhop != nhop)
471 nhop = 0;
473 /* Get rid of the even twin that was already
474 * in the slot.
476 ag_del(ag);
478 } else if (ag->ag_pref >= pref
479 && (ag->ag_state & AGS_AGGREGATE)) {
480 /* If we cannot combine the pair, maybe the route
481 * with the worse metric can be promoted.
483 * Promote the old, even twin, by giving its slot
484 * in the table to the new, odd twin.
486 ag->ag_dst_h = dst;
488 xaddr = ag->ag_gate;
489 ag->ag_gate = gate;
490 gate = xaddr;
492 xaddr = ag->ag_nhop;
493 ag->ag_nhop = nhop;
494 nhop = xaddr;
496 x = ag->ag_tag;
497 ag->ag_tag = tag;
498 tag = x;
500 /* The promoted route is even-redundant only if the
501 * even twin was fully redundant. It is not
502 * odd-redundant because the odd-twin will still be
503 * in the table.
505 x = ag->ag_state;
506 if (!AG_IS_REDUN(x))
507 x &= ~AGS_REDUN0;
508 x &= ~AGS_REDUN1;
509 ag->ag_state = state;
510 state = x;
512 x = ag->ag_metric;
513 ag->ag_metric = metric;
514 metric = x;
516 x = ag->ag_pref;
517 ag->ag_pref = pref;
518 pref = x;
520 /* take the newest sequence number */
521 if (seqnum >= ag->ag_seqno)
522 seqnum = ag->ag_seqno;
523 else
524 ag->ag_seqno = seqnum;
526 } else {
527 if (!(state & AGS_AGGREGATE))
528 break; /* cannot promote either twin */
530 /* Promote the new, odd twin by shaving its
531 * mask and address.
532 * The promoted route is odd-redundant only if the
533 * odd twin was fully redundant. It is not
534 * even-redundant because the even twin is still in
535 * the table.
537 if (!AG_IS_REDUN(state))
538 state &= ~AGS_REDUN1;
539 state &= ~AGS_REDUN0;
540 if (seqnum > ag->ag_seqno)
541 seqnum = ag->ag_seqno;
542 else
543 ag->ag_seqno = seqnum;
546 mask <<= 1;
547 dst &= mask;
549 if (ag_cors == 0) {
550 ag = ag_corsest;
551 break;
553 ag = ag_cors;
554 ag_cors = ag->ag_cors;
557 /* When we can no longer promote and combine routes,
558 * flush the old route in the target slot. Also flush
559 * any finer routes that we know will never be aggregated by
560 * the new route.
562 * In case we moved toward coarser masks,
563 * get back where we belong
565 if (ag != 0
566 && ag->ag_mask < mask) {
567 ag_cors = ag;
568 ag = ag->ag_fine;
571 /* Empty the target slot
573 if (ag != 0 && ag->ag_mask == mask) {
574 ag_flush(ag->ag_dst_h, ag->ag_mask, out);
575 ag = (ag_cors == 0) ? ag_corsest : ag_cors->ag_fine;
578 #ifdef DEBUG_AG
579 fflush(stderr);
580 if (ag == 0 && ag_cors != ag_finest)
581 abort();
582 if (ag_cors == 0 && ag != ag_corsest)
583 abort();
584 if (ag != 0 && ag->ag_cors != ag_cors)
585 abort();
586 if (ag_cors != 0 && ag_cors->ag_fine != ag)
587 abort();
588 CHECK_AG();
589 #endif
591 /* Save the new route on the end of the table.
593 nag = ag_avail;
594 ag_avail = nag->ag_fine;
596 nag->ag_dst_h = dst;
597 nag->ag_mask = mask;
598 nag->ag_gate = gate;
599 nag->ag_nhop = nhop;
600 nag->ag_metric = metric;
601 nag->ag_pref = pref;
602 nag->ag_tag = tag;
603 nag->ag_state = state;
604 nag->ag_seqno = seqnum;
606 nag->ag_fine = ag;
607 if (ag != 0)
608 ag->ag_cors = nag;
609 else
610 ag_finest = nag;
611 nag->ag_cors = ag_cors;
612 if (ag_cors == 0)
613 ag_corsest = nag;
614 else
615 ag_cors->ag_fine = nag;
616 CHECK_AG();
620 #define NAME0_LEN 14
621 static const char *
622 rtm_type_name(u_char type)
624 static const char *rtm_types[] = {
625 "RTM_ADD",
626 "RTM_DELETE",
627 "RTM_CHANGE",
628 "RTM_GET",
629 "RTM_LOSING",
630 "RTM_REDIRECT",
631 "RTM_MISS",
632 "RTM_LOCK",
633 "RTM_OLDADD",
634 "RTM_OLDDEL",
635 "RTM_RESOLVE",
636 "RTM_NEWADDR",
637 "RTM_DELADDR",
638 "RTM_IFINFO",
639 "RTM_NEWMADDR",
640 "RTM_DELMADDR"
642 #define NEW_RTM_PAT "RTM type %#x"
643 static char name0[sizeof(NEW_RTM_PAT)+2];
646 if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
647 || type == 0) {
648 snprintf(name0, sizeof(name0), NEW_RTM_PAT, type);
649 return name0;
650 } else {
651 return rtm_types[type-1];
653 #undef NEW_RTM_PAT
657 /* Trim a mask in a sockaddr
658 * Produce a length of 0 for an address of 0.
659 * Otherwise produce the index of the first zero byte.
661 void
662 #ifdef _HAVE_SIN_LEN
663 masktrim(struct sockaddr_in *ap)
664 #else
665 masktrim(struct sockaddr_in_new *ap)
666 #endif
668 char *cp;
670 if (ap->sin_addr.s_addr == 0) {
671 ap->sin_len = 0;
672 return;
674 cp = (char *)(&ap->sin_addr.s_addr+1);
675 while (*--cp == 0)
676 continue;
677 ap->sin_len = cp - (char*)ap + 1;
681 /* Tell the kernel to add, delete or change a route
683 static void
684 rtioctl(int action, /* RTM_DELETE, etc */
685 naddr dst,
686 naddr gate,
687 naddr mask,
688 int metric,
689 int flags)
691 struct {
692 struct rt_msghdr w_rtm;
693 struct sockaddr_in w_dst;
694 struct sockaddr_in w_gate;
695 #ifdef _HAVE_SA_LEN
696 struct sockaddr_in w_mask;
697 #else
698 struct sockaddr_in_new w_mask;
699 #endif
700 } w;
701 long cc;
702 # define PAT " %-10s %s metric=%d flags=%#x"
703 # define ARGS rtm_type_name(action), rtname(dst,mask,gate), metric, flags
705 again:
706 memset(&w, 0, sizeof(w));
707 w.w_rtm.rtm_msglen = sizeof(w);
708 w.w_rtm.rtm_version = RTM_VERSION;
709 w.w_rtm.rtm_type = action;
710 w.w_rtm.rtm_flags = flags;
711 w.w_rtm.rtm_seq = ++rt_sock_seqno;
712 w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
713 if (metric != 0 || action == RTM_CHANGE) {
714 w.w_rtm.rtm_rmx.rmx_hopcount = metric;
715 w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
717 w.w_dst.sin_family = AF_INET;
718 w.w_dst.sin_addr.s_addr = dst;
719 w.w_gate.sin_family = AF_INET;
720 w.w_gate.sin_addr.s_addr = gate;
721 #ifdef _HAVE_SA_LEN
722 w.w_dst.sin_len = sizeof(w.w_dst);
723 w.w_gate.sin_len = sizeof(w.w_gate);
724 #endif
725 if (mask == HOST_MASK) {
726 w.w_rtm.rtm_flags |= RTF_HOST;
727 w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
728 } else {
729 w.w_rtm.rtm_addrs |= RTA_NETMASK;
730 w.w_mask.sin_addr.s_addr = htonl(mask);
731 #ifdef _HAVE_SA_LEN
732 masktrim(&w.w_mask);
733 if (w.w_mask.sin_len == 0)
734 w.w_mask.sin_len = sizeof(long);
735 w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
736 #endif
739 #ifndef NO_INSTALL
740 cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
741 if (cc < 0) {
742 if (errno == ESRCH
743 && (action == RTM_CHANGE || action == RTM_DELETE)) {
744 trace_act("route disappeared before" PAT, ARGS);
745 if (action == RTM_CHANGE) {
746 action = RTM_ADD;
747 goto again;
749 return;
751 msglog("write(rt_sock)" PAT ": %s", ARGS, strerror(errno));
752 return;
753 } else if (cc != w.w_rtm.rtm_msglen) {
754 msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
755 cc, w.w_rtm.rtm_msglen, ARGS);
756 return;
758 #endif
759 if (TRACEKERNEL)
760 trace_misc("write kernel" PAT, ARGS);
761 #undef PAT
762 #undef ARGS
766 #define KHASH_SIZE 71 /* should be prime */
767 #define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
768 static struct khash {
769 struct khash *k_next;
770 naddr k_dst;
771 naddr k_mask;
772 naddr k_gate;
773 short k_metric;
774 u_short k_state;
775 #define KS_NEW 0x001
776 #define KS_DELETE 0x002 /* need to delete the route */
777 #define KS_ADD 0x004 /* add to the kernel */
778 #define KS_CHANGE 0x008 /* tell kernel to change the route */
779 #define KS_DEL_ADD 0x010 /* delete & add to change the kernel */
780 #define KS_STATIC 0x020 /* Static flag in kernel */
781 #define KS_GATEWAY 0x040 /* G flag in kernel */
782 #define KS_DYNAMIC 0x080 /* result of redirect */
783 #define KS_DELETED 0x100 /* already deleted from kernel */
784 #define KS_CHECK 0x200
785 time_t k_keep;
786 #define K_KEEP_LIM 30
787 time_t k_redirect_time; /* when redirected route 1st seen */
788 } *khash_bins[KHASH_SIZE];
791 static struct khash*
792 kern_find(naddr dst, naddr mask, struct khash ***ppk)
794 struct khash *k, **pk;
796 for (pk = &KHASH(dst,mask); (k = *pk) != 0; pk = &k->k_next) {
797 if (k->k_dst == dst && k->k_mask == mask)
798 break;
800 if (ppk != 0)
801 *ppk = pk;
802 return k;
806 static struct khash*
807 kern_add(naddr dst, naddr mask)
809 struct khash *k, **pk;
811 k = kern_find(dst, mask, &pk);
812 if (k != 0)
813 return k;
815 k = (struct khash *)rtmalloc(sizeof(*k), "kern_add");
817 memset(k, 0, sizeof(*k));
818 k->k_dst = dst;
819 k->k_mask = mask;
820 k->k_state = KS_NEW;
821 k->k_keep = now.tv_sec;
822 *pk = k;
824 return k;
828 /* If a kernel route has a non-zero metric, check that it is still in the
829 * daemon table, and not deleted by interfaces coming and going.
831 static void
832 kern_check_static(struct khash *k,
833 struct interface *ifp)
835 struct rt_entry *rt;
836 struct rt_spare new;
838 if (k->k_metric == 0)
839 return;
841 memset(&new, 0, sizeof(new));
842 new.rts_ifp = ifp;
843 new.rts_gate = k->k_gate;
844 new.rts_router = (ifp != 0) ? ifp->int_addr : loopaddr;
845 new.rts_metric = k->k_metric;
846 new.rts_time = now.tv_sec;
848 rt = rtget(k->k_dst, k->k_mask);
849 if (rt != 0) {
850 if (!(rt->rt_state & RS_STATIC))
851 rtchange(rt, rt->rt_state | RS_STATIC, &new, 0);
852 } else {
853 rtadd(k->k_dst, k->k_mask, RS_STATIC, &new);
858 /* operate on a kernel entry
860 static void
861 kern_ioctl(struct khash *k,
862 int action, /* RTM_DELETE, etc */
863 int flags)
866 switch (action) {
867 case RTM_DELETE:
868 k->k_state &= ~KS_DYNAMIC;
869 if (k->k_state & KS_DELETED)
870 return;
871 k->k_state |= KS_DELETED;
872 break;
873 case RTM_ADD:
874 k->k_state &= ~KS_DELETED;
875 break;
876 case RTM_CHANGE:
877 if (k->k_state & KS_DELETED) {
878 action = RTM_ADD;
879 k->k_state &= ~KS_DELETED;
881 break;
884 rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_metric, flags);
888 /* add a route the kernel told us
890 static void
891 rtm_add(struct rt_msghdr *rtm,
892 struct rt_addrinfo *info,
893 time_t keep)
895 struct khash *k;
896 struct interface *ifp;
897 naddr mask;
900 if (rtm->rtm_flags & RTF_HOST) {
901 mask = HOST_MASK;
902 } else if (INFO_MASK(info) != 0) {
903 mask = ntohl(S_ADDR(INFO_MASK(info)));
904 } else {
905 msglog("ignore %s without mask", rtm_type_name(rtm->rtm_type));
906 return;
909 k = kern_add(S_ADDR(INFO_DST(info)), mask);
910 if (k->k_state & KS_NEW)
911 k->k_keep = now.tv_sec+keep;
912 if (INFO_GATE(info) == 0) {
913 trace_act("note %s without gateway",
914 rtm_type_name(rtm->rtm_type));
915 k->k_metric = HOPCNT_INFINITY;
916 } else if (INFO_GATE(info)->sa_family != AF_INET) {
917 trace_act("note %s with gateway AF=%d",
918 rtm_type_name(rtm->rtm_type),
919 INFO_GATE(info)->sa_family);
920 k->k_metric = HOPCNT_INFINITY;
921 } else {
922 k->k_gate = S_ADDR(INFO_GATE(info));
923 k->k_metric = rtm->rtm_rmx.rmx_hopcount;
924 if (k->k_metric < 0)
925 k->k_metric = 0;
926 else if (k->k_metric > HOPCNT_INFINITY-1)
927 k->k_metric = HOPCNT_INFINITY-1;
929 k->k_state &= ~(KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD
930 | KS_DELETED | KS_GATEWAY | KS_STATIC
931 | KS_NEW | KS_CHECK);
932 if (rtm->rtm_flags & RTF_GATEWAY)
933 k->k_state |= KS_GATEWAY;
934 if (rtm->rtm_flags & RTF_STATIC)
935 k->k_state |= KS_STATIC;
937 if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
938 if (INFO_AUTHOR(info) != 0
939 && INFO_AUTHOR(info)->sa_family == AF_INET)
940 ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
941 else
942 ifp = 0;
943 if (supplier
944 && (ifp == 0 || !(ifp->int_state & IS_REDIRECT_OK))) {
945 /* Routers are not supposed to listen to redirects,
946 * so delete it if it came via an unknown interface
947 * or the interface does not have special permission.
949 k->k_state &= ~KS_DYNAMIC;
950 k->k_state |= KS_DELETE;
951 LIM_SEC(need_kern, 0);
952 trace_act("mark for deletion redirected %s --> %s"
953 " via %s",
954 addrname(k->k_dst, k->k_mask, 0),
955 naddr_ntoa(k->k_gate),
956 ifp ? ifp->int_name : "unknown interface");
957 } else {
958 k->k_state |= KS_DYNAMIC;
959 k->k_redirect_time = now.tv_sec;
960 trace_act("accept redirected %s --> %s via %s",
961 addrname(k->k_dst, k->k_mask, 0),
962 naddr_ntoa(k->k_gate),
963 ifp ? ifp->int_name : "unknown interface");
965 return;
968 /* If it is not a static route, quit until the next comparison
969 * between the kernel and daemon tables, when it will be deleted.
971 if (!(k->k_state & KS_STATIC)) {
972 k->k_state |= KS_DELETE;
973 LIM_SEC(need_kern, k->k_keep);
974 return;
977 /* Put static routes with real metrics into the daemon table so
978 * they can be advertised.
980 * Find the interface toward the gateway.
982 ifp = iflookup(k->k_gate);
983 if (ifp == 0)
984 msglog("static route %s --> %s impossibly lacks ifp",
985 addrname(S_ADDR(INFO_DST(info)), mask, 0),
986 naddr_ntoa(k->k_gate));
988 kern_check_static(k, ifp);
992 /* deal with packet loss
994 static void
995 rtm_lose(struct rt_msghdr *rtm,
996 struct rt_addrinfo *info)
998 if (INFO_GATE(info) == 0
999 || INFO_GATE(info)->sa_family != AF_INET) {
1000 trace_act("ignore %s without gateway",
1001 rtm_type_name(rtm->rtm_type));
1002 return;
1005 if (rdisc_ok)
1006 rdisc_age(S_ADDR(INFO_GATE(info)));
1007 age(S_ADDR(INFO_GATE(info)));
1011 /* Make the gateway slot of an info structure point to something
1012 * useful. If it is not already useful, but it specifies an interface,
1013 * then fill in the sockaddr_in provided and point it there.
1015 static int
1016 get_info_gate(struct sockaddr **sap,
1017 struct sockaddr_in *in)
1019 struct sockaddr_dl *sdl = (struct sockaddr_dl *)*sap;
1020 struct interface *ifp;
1022 if (sdl == 0)
1023 return 0;
1024 if ((sdl)->sdl_family == AF_INET)
1025 return 1;
1026 if ((sdl)->sdl_family != AF_LINK)
1027 return 0;
1029 ifp = ifwithindex(sdl->sdl_index, 1);
1030 if (ifp == 0)
1031 return 0;
1033 in->sin_addr.s_addr = ifp->int_addr;
1034 #ifdef _HAVE_SA_LEN
1035 in->sin_len = sizeof(*in);
1036 #endif
1037 in->sin_family = AF_INET;
1038 *sap = (struct sockaddr *)in;
1040 return 1;
1044 /* Clean the kernel table by copying it to the daemon image.
1045 * Eventually the daemon will delete any extra routes.
1047 void
1048 flush_kern(void)
1050 static char *sysctl_buf;
1051 static size_t sysctl_buf_size = 0;
1052 size_t needed;
1053 int mib[6];
1054 char *next, *lim;
1055 struct rt_msghdr *rtm;
1056 struct sockaddr_in gate_sin;
1057 struct rt_addrinfo info;
1058 int i;
1059 struct khash *k;
1062 for (i = 0; i < KHASH_SIZE; i++) {
1063 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1064 k->k_state |= KS_CHECK;
1068 mib[0] = CTL_NET;
1069 mib[1] = PF_ROUTE;
1070 mib[2] = 0; /* protocol */
1071 mib[3] = 0; /* wildcard address family */
1072 mib[4] = NET_RT_DUMP;
1073 mib[5] = 0; /* no flags */
1074 for (;;) {
1075 if ((needed = sysctl_buf_size) != 0) {
1076 if (sysctl(mib, 6, sysctl_buf,&needed, 0, 0) >= 0)
1077 break;
1078 if (errno != ENOMEM && errno != EFAULT)
1079 BADERR(1,"flush_kern: sysctl(RT_DUMP)");
1080 free(sysctl_buf);
1081 needed = 0;
1083 if (sysctl(mib, 6, 0, &needed, 0, 0) < 0)
1084 BADERR(1,"flush_kern: sysctl(RT_DUMP) estimate");
1085 /* Kludge around the habit of some systems, such as
1086 * BSD/OS 3.1, to not admit how many routes are in the
1087 * kernel, or at least to be quite wrong.
1089 needed += 50*(sizeof(*rtm)+5*sizeof(struct sockaddr));
1090 sysctl_buf = rtmalloc(sysctl_buf_size = needed,
1091 "flush_kern sysctl(RT_DUMP)");
1094 lim = sysctl_buf + needed;
1095 for (next = sysctl_buf; next < lim; next += rtm->rtm_msglen) {
1096 rtm = (struct rt_msghdr *)next;
1097 if (rtm->rtm_msglen == 0) {
1098 msglog("zero length kernel route at "
1099 " %#lx in buffer %#lx before %#lx",
1100 (u_long)rtm, (u_long)sysctl_buf, (u_long)lim);
1101 break;
1104 rt_xaddrs(&info,
1105 (struct sockaddr *)(rtm+1),
1106 (struct sockaddr *)(next + rtm->rtm_msglen),
1107 rtm->rtm_addrs);
1109 if (INFO_DST(&info) == 0
1110 || INFO_DST(&info)->sa_family != AF_INET)
1111 continue;
1113 /* ignore ARP table entries on systems with a merged route
1114 * and ARP table.
1116 if (rtm->rtm_flags & RTF_LLINFO)
1117 continue;
1119 /* ignore multicast addresses
1121 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
1122 continue;
1124 if (!get_info_gate(&INFO_GATE(&info), &gate_sin))
1125 continue;
1127 /* Note static routes and interface routes, and also
1128 * preload the image of the kernel table so that
1129 * we can later clean it, as well as avoid making
1130 * unneeded changes. Keep the old kernel routes for a
1131 * few seconds to allow a RIP or router-discovery
1132 * response to be heard.
1134 rtm_add(rtm,&info,MIN_WAITTIME);
1137 for (i = 0; i < KHASH_SIZE; i++) {
1138 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1139 if (k->k_state & KS_CHECK) {
1140 msglog("%s --> %s disappeared from kernel",
1141 addrname(k->k_dst, k->k_mask, 0),
1142 naddr_ntoa(k->k_gate));
1143 del_static(k->k_dst, k->k_mask, k->k_gate, 1);
1150 /* Listen to announcements from the kernel
1152 void
1153 read_rt(void)
1155 long cc;
1156 struct interface *ifp;
1157 struct sockaddr_in gate_sin;
1158 naddr mask, gate;
1159 union {
1160 struct {
1161 struct rt_msghdr rtm;
1162 struct sockaddr addrs[RTAX_MAX];
1163 } r;
1164 struct if_msghdr ifm;
1165 } m;
1166 char str[100], *strp;
1167 struct rt_addrinfo info;
1170 for (;;) {
1171 cc = read(rt_sock, &m, sizeof(m));
1172 if (cc <= 0) {
1173 if (cc < 0 && errno != EWOULDBLOCK)
1174 LOGERR("read(rt_sock)");
1175 return;
1178 if (m.r.rtm.rtm_version != RTM_VERSION) {
1179 msglog("bogus routing message version %d",
1180 m.r.rtm.rtm_version);
1181 continue;
1184 /* Ignore our own results.
1186 if (m.r.rtm.rtm_type <= RTM_CHANGE
1187 && m.r.rtm.rtm_pid == mypid) {
1188 static int complained = 0;
1189 if (!complained) {
1190 msglog("receiving our own change messages");
1191 complained = 1;
1193 continue;
1196 if (m.r.rtm.rtm_type == RTM_IFINFO
1197 || m.r.rtm.rtm_type == RTM_NEWADDR
1198 || m.r.rtm.rtm_type == RTM_DELADDR) {
1199 ifp = ifwithindex(m.ifm.ifm_index,
1200 m.r.rtm.rtm_type != RTM_DELADDR);
1201 if (ifp == 0)
1202 trace_act("note %s with flags %#x"
1203 " for unknown interface index #%d",
1204 rtm_type_name(m.r.rtm.rtm_type),
1205 m.ifm.ifm_flags,
1206 m.ifm.ifm_index);
1207 else
1208 trace_act("note %s with flags %#x for %s",
1209 rtm_type_name(m.r.rtm.rtm_type),
1210 m.ifm.ifm_flags,
1211 ifp->int_name);
1213 /* After being informed of a change to an interface,
1214 * check them all now if the check would otherwise
1215 * be a long time from now, if the interface is
1216 * not known, or if the interface has been turned
1217 * off or on.
1219 if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
1220 || ifp == 0
1221 || ((ifp->int_if_flags ^ m.ifm.ifm_flags)
1222 & IFF_UP) != 0)
1223 ifinit_timer.tv_sec = now.tv_sec;
1224 continue;
1227 strcpy(str, rtm_type_name(m.r.rtm.rtm_type));
1228 strp = &str[strlen(str)];
1229 if (m.r.rtm.rtm_type <= RTM_CHANGE)
1230 strp += sprintf(strp," from pid %d",m.r.rtm.rtm_pid);
1232 rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
1233 m.r.rtm.rtm_addrs);
1235 if (INFO_DST(&info) == 0) {
1236 trace_act("ignore %s without dst", str);
1237 continue;
1240 if (INFO_DST(&info)->sa_family != AF_INET) {
1241 trace_act("ignore %s for AF %d", str,
1242 INFO_DST(&info)->sa_family);
1243 continue;
1246 mask = ((INFO_MASK(&info) != 0)
1247 ? ntohl(S_ADDR(INFO_MASK(&info)))
1248 : (m.r.rtm.rtm_flags & RTF_HOST)
1249 ? HOST_MASK
1250 : std_mask(S_ADDR(INFO_DST(&info))));
1252 strp += sprintf(strp, ": %s",
1253 addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1255 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
1256 trace_act("ignore multicast %s", str);
1257 continue;
1260 if (m.r.rtm.rtm_flags & RTF_LLINFO) {
1261 trace_act("ignore ARP %s", str);
1262 continue;
1265 if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
1266 gate = S_ADDR(INFO_GATE(&info));
1267 strp += sprintf(strp, " --> %s", naddr_ntoa(gate));
1268 } else {
1269 gate = 0;
1272 if (INFO_AUTHOR(&info) != 0)
1273 strp += sprintf(strp, " by authority of %s",
1274 saddr_ntoa(INFO_AUTHOR(&info)));
1276 switch (m.r.rtm.rtm_type) {
1277 case RTM_ADD:
1278 case RTM_CHANGE:
1279 case RTM_REDIRECT:
1280 if (m.r.rtm.rtm_errno != 0) {
1281 trace_act("ignore %s with \"%s\" error",
1282 str, strerror(m.r.rtm.rtm_errno));
1283 } else {
1284 trace_act("%s", str);
1285 rtm_add(&m.r.rtm,&info,0);
1287 break;
1289 case RTM_DELETE:
1290 if (m.r.rtm.rtm_errno != 0
1291 && m.r.rtm.rtm_errno != ESRCH) {
1292 trace_act("ignore %s with \"%s\" error",
1293 str, strerror(m.r.rtm.rtm_errno));
1294 } else {
1295 trace_act("%s", str);
1296 del_static(S_ADDR(INFO_DST(&info)), mask,
1297 gate, 1);
1299 break;
1301 case RTM_LOSING:
1302 trace_act("%s", str);
1303 rtm_lose(&m.r.rtm,&info);
1304 break;
1306 default:
1307 trace_act("ignore %s", str);
1308 break;
1314 /* after aggregating, note routes that belong in the kernel
1316 static void
1317 kern_out(struct ag_info *ag)
1319 struct khash *k;
1322 /* Do not install bad routes if they are not already present.
1323 * This includes routes that had RS_NET_SYN for interfaces that
1324 * recently died.
1326 if (ag->ag_metric == HOPCNT_INFINITY) {
1327 k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
1328 if (k == 0)
1329 return;
1330 } else {
1331 k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
1334 if (k->k_state & KS_NEW) {
1335 /* will need to add new entry to the kernel table */
1336 k->k_state = KS_ADD;
1337 if (ag->ag_state & AGS_GATEWAY)
1338 k->k_state |= KS_GATEWAY;
1339 k->k_gate = ag->ag_gate;
1340 k->k_metric = ag->ag_metric;
1341 return;
1344 if (k->k_state & KS_STATIC)
1345 return;
1347 /* modify existing kernel entry if necessary */
1348 if (k->k_gate != ag->ag_gate
1349 || k->k_metric != ag->ag_metric) {
1350 /* Must delete bad interface routes etc. to change them. */
1351 if (k->k_metric == HOPCNT_INFINITY)
1352 k->k_state |= KS_DEL_ADD;
1353 k->k_gate = ag->ag_gate;
1354 k->k_metric = ag->ag_metric;
1355 k->k_state |= KS_CHANGE;
1358 /* If the daemon thinks the route should exist, forget
1359 * about any redirections.
1360 * If the daemon thinks the route should exist, eventually
1361 * override manual intervention by the operator.
1363 if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
1364 k->k_state &= ~KS_DYNAMIC;
1365 k->k_state |= (KS_ADD | KS_DEL_ADD);
1368 if ((k->k_state & KS_GATEWAY)
1369 && !(ag->ag_state & AGS_GATEWAY)) {
1370 k->k_state &= ~KS_GATEWAY;
1371 k->k_state |= (KS_ADD | KS_DEL_ADD);
1372 } else if (!(k->k_state & KS_GATEWAY)
1373 && (ag->ag_state & AGS_GATEWAY)) {
1374 k->k_state |= KS_GATEWAY;
1375 k->k_state |= (KS_ADD | KS_DEL_ADD);
1378 /* Deleting-and-adding is necessary to change aspects of a route.
1379 * Just delete instead of deleting and then adding a bad route.
1380 * Otherwise, we want to keep the route in the kernel.
1382 if (k->k_metric == HOPCNT_INFINITY
1383 && (k->k_state & KS_DEL_ADD))
1384 k->k_state |= KS_DELETE;
1385 else
1386 k->k_state &= ~KS_DELETE;
1387 #undef RT
1391 /* ARGSUSED */
1392 static int
1393 walk_kern(struct radix_node *rn,
1394 struct walkarg *argp UNUSED)
1396 #define RT ((struct rt_entry *)rn)
1397 char metric, pref;
1398 u_int ags = 0;
1401 /* Do not install synthetic routes */
1402 if (RT->rt_state & RS_NET_SYN)
1403 return 0;
1405 if (!(RT->rt_state & RS_IF)) {
1406 /* This is an ordinary route, not for an interface.
1409 /* aggregate, ordinary good routes without regard to
1410 * their metric
1412 pref = 1;
1413 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1415 /* Do not install host routes directly to hosts, to avoid
1416 * interfering with ARP entries in the kernel table.
1418 if (RT_ISHOST(RT)
1419 && ntohl(RT->rt_dst) == RT->rt_gate)
1420 return 0;
1422 } else {
1423 /* This is an interface route.
1424 * Do not install routes for "external" remote interfaces.
1426 if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
1427 return 0;
1429 /* Interfaces should override received routes.
1431 pref = 0;
1432 ags |= (AGS_IF | AGS_CORS_GATE);
1434 /* If it is not an interface, or an alias for an interface,
1435 * it must be a "gateway."
1437 * If it is a "remote" interface, it is also a "gateway" to
1438 * the kernel if is not a alias.
1440 if (RT->rt_ifp == 0
1441 || (RT->rt_ifp->int_state & IS_REMOTE))
1442 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1445 /* If RIP is off and IRDP is on, let the route to the discovered
1446 * route suppress any RIP routes. Eventually the RIP routes
1447 * will time-out and be deleted. This reaches the steady-state
1448 * quicker.
1450 if ((RT->rt_state & RS_RDISC) && rip_sock < 0)
1451 ags |= AGS_CORS_GATE;
1453 metric = RT->rt_metric;
1454 if (metric == HOPCNT_INFINITY) {
1455 /* if the route is dead, so try hard to aggregate. */
1456 pref = HOPCNT_INFINITY;
1457 ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
1458 ags &= ~(AGS_IF | AGS_CORS_GATE);
1461 ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
1462 metric,pref, 0, 0, ags, kern_out);
1463 return 0;
1464 #undef RT
1468 /* Update the kernel table to match the daemon table.
1470 static void
1471 fix_kern(void)
1473 int i;
1474 struct khash *k, **pk;
1477 need_kern = age_timer;
1479 /* Walk daemon table, updating the copy of the kernel table.
1481 rn_walktree(rhead, walk_kern, 0);
1482 ag_flush(0,0,kern_out);
1484 for (i = 0; i < KHASH_SIZE; i++) {
1485 for (pk = &khash_bins[i]; (k = *pk) != 0; ) {
1486 /* Do not touch static routes */
1487 if (k->k_state & KS_STATIC) {
1488 kern_check_static(k,0);
1489 pk = &k->k_next;
1490 continue;
1493 /* check hold on routes deleted by the operator */
1494 if (k->k_keep > now.tv_sec) {
1495 /* ensure we check when the hold is over */
1496 LIM_SEC(need_kern, k->k_keep);
1497 /* mark for the next cycle */
1498 k->k_state |= KS_DELETE;
1499 pk = &k->k_next;
1500 continue;
1503 if ((k->k_state & KS_DELETE)
1504 && !(k->k_state & KS_DYNAMIC)) {
1505 kern_ioctl(k, RTM_DELETE, 0);
1506 *pk = k->k_next;
1507 free(k);
1508 continue;
1511 if (k->k_state & KS_DEL_ADD)
1512 kern_ioctl(k, RTM_DELETE, 0);
1514 if (k->k_state & KS_ADD) {
1515 kern_ioctl(k, RTM_ADD,
1516 ((0 != (k->k_state & (KS_GATEWAY
1517 | KS_DYNAMIC)))
1518 ? RTF_GATEWAY : 0));
1519 } else if (k->k_state & KS_CHANGE) {
1520 kern_ioctl(k, RTM_CHANGE,
1521 ((0 != (k->k_state & (KS_GATEWAY
1522 | KS_DYNAMIC)))
1523 ? RTF_GATEWAY : 0));
1525 k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
1527 /* Mark this route to be deleted in the next cycle.
1528 * This deletes routes that disappear from the
1529 * daemon table, since the normal aging code
1530 * will clear the bit for routes that have not
1531 * disappeared from the daemon table.
1533 k->k_state |= KS_DELETE;
1534 pk = &k->k_next;
1540 /* Delete a static route in the image of the kernel table.
1542 void
1543 del_static(naddr dst,
1544 naddr mask,
1545 naddr gate,
1546 int gone)
1548 struct khash *k;
1549 struct rt_entry *rt;
1551 /* Just mark it in the table to be deleted next time the kernel
1552 * table is updated.
1553 * If it has already been deleted, mark it as such, and set its
1554 * keep-timer so that it will not be deleted again for a while.
1555 * This lets the operator delete a route added by the daemon
1556 * and add a replacement.
1558 k = kern_find(dst, mask, 0);
1559 if (k != 0 && (gate == 0 || k->k_gate == gate)) {
1560 k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
1561 k->k_state |= KS_DELETE;
1562 if (gone) {
1563 k->k_state |= KS_DELETED;
1564 k->k_keep = now.tv_sec + K_KEEP_LIM;
1568 rt = rtget(dst, mask);
1569 if (rt != 0 && (rt->rt_state & RS_STATIC))
1570 rtbad(rt);
1574 /* Delete all routes generated from ICMP Redirects that use a given gateway,
1575 * as well as old redirected routes.
1577 void
1578 del_redirects(naddr bad_gate,
1579 time_t old)
1581 int i;
1582 struct khash *k;
1585 for (i = 0; i < KHASH_SIZE; i++) {
1586 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1587 if (!(k->k_state & KS_DYNAMIC)
1588 || (k->k_state & KS_STATIC))
1589 continue;
1591 if (k->k_gate != bad_gate
1592 && k->k_redirect_time > old
1593 && !supplier)
1594 continue;
1596 k->k_state |= KS_DELETE;
1597 k->k_state &= ~KS_DYNAMIC;
1598 need_kern.tv_sec = now.tv_sec;
1599 trace_act("mark redirected %s --> %s for deletion",
1600 addrname(k->k_dst, k->k_mask, 0),
1601 naddr_ntoa(k->k_gate));
1607 /* Start the daemon tables.
1609 extern int max_keylen;
1611 void
1612 rtinit(void)
1614 int i;
1615 struct ag_info *ag;
1617 /* Initialize the radix trees */
1618 max_keylen = sizeof(struct sockaddr_in);
1619 rn_init();
1620 rn_inithead(&rhead, 32);
1622 /* mark all of the slots in the table free */
1623 ag_avail = ag_slots;
1624 for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
1625 ag->ag_fine = ag+1;
1626 ag++;
1631 #ifdef _HAVE_SIN_LEN
1632 static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET, 0, {0}, {0}};
1633 static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET, 0, {0}, {0}};
1634 #else
1635 static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
1636 static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
1637 #endif
1640 static void
1641 set_need_flash(void)
1643 if (!need_flash) {
1644 need_flash = 1;
1645 /* Do not send the flash update immediately. Wait a little
1646 * while to hear from other routers.
1648 no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
1653 /* Get a particular routing table entry
1655 struct rt_entry *
1656 rtget(naddr dst, naddr mask)
1658 struct rt_entry *rt;
1660 dst_sock.sin_addr.s_addr = dst;
1661 mask_sock.sin_addr.s_addr = htonl(mask);
1662 masktrim(&mask_sock);
1663 rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
1664 if (!rt
1665 || rt->rt_dst != dst
1666 || rt->rt_mask != mask)
1667 return 0;
1669 return rt;
1673 /* Find a route to dst as the kernel would.
1675 struct rt_entry *
1676 rtfind(naddr dst)
1678 dst_sock.sin_addr.s_addr = dst;
1679 return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
1683 /* add a route to the table
1685 void
1686 rtadd(naddr dst,
1687 naddr mask,
1688 u_int state, /* rt_state for the entry */
1689 struct rt_spare *new)
1691 struct rt_entry *rt;
1692 naddr smask;
1693 int i;
1694 struct rt_spare *rts;
1696 rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
1697 memset(rt, 0, sizeof(*rt));
1698 for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
1699 rts->rts_metric = HOPCNT_INFINITY;
1701 rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
1702 rt->rt_dst = dst;
1703 rt->rt_dst_sock.sin_family = AF_INET;
1704 #ifdef _HAVE_SIN_LEN
1705 rt->rt_dst_sock.sin_len = dst_sock.sin_len;
1706 #endif
1707 if (mask != HOST_MASK) {
1708 smask = std_mask(dst);
1709 if ((smask & ~mask) == 0 && mask > smask)
1710 state |= RS_SUBNET;
1712 mask_sock.sin_addr.s_addr = htonl(mask);
1713 masktrim(&mask_sock);
1714 rt->rt_mask = mask;
1715 rt->rt_state = state;
1716 rt->rt_spares[0] = *new;
1717 rt->rt_time = now.tv_sec;
1718 rt->rt_poison_metric = HOPCNT_INFINITY;
1719 rt->rt_seqno = update_seqno;
1721 if (++total_routes == MAX_ROUTES)
1722 msglog("have maximum (%d) routes", total_routes);
1723 if (TRACEACTIONS)
1724 trace_add_del("Add", rt);
1726 need_kern.tv_sec = now.tv_sec;
1727 set_need_flash();
1729 if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
1730 rhead, rt->rt_nodes)) {
1731 msglog("rnh_addaddr() failed for %s mask=%#lx",
1732 naddr_ntoa(dst), (u_long)mask);
1733 free(rt);
1738 /* notice a changed route
1740 void
1741 rtchange(struct rt_entry *rt,
1742 u_int state, /* new state bits */
1743 struct rt_spare *new,
1744 char *label)
1746 if (rt->rt_metric != new->rts_metric) {
1747 /* Fix the kernel immediately if it seems the route
1748 * has gone bad, since there may be a working route that
1749 * aggregates this route.
1751 if (new->rts_metric == HOPCNT_INFINITY) {
1752 need_kern.tv_sec = now.tv_sec;
1753 if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
1754 new->rts_time = now.tv_sec - EXPIRE_TIME;
1756 rt->rt_seqno = update_seqno;
1757 set_need_flash();
1760 if (rt->rt_gate != new->rts_gate) {
1761 need_kern.tv_sec = now.tv_sec;
1762 rt->rt_seqno = update_seqno;
1763 set_need_flash();
1766 state |= (rt->rt_state & RS_SUBNET);
1768 /* Keep various things from deciding ageless routes are stale.
1770 if (!AGE_RT(state, new->rts_ifp))
1771 new->rts_time = now.tv_sec;
1773 if (TRACEACTIONS)
1774 trace_change(rt, state, new,
1775 label ? label : "Chg ");
1777 rt->rt_state = state;
1778 rt->rt_spares[0] = *new;
1782 /* check for a better route among the spares
1784 static struct rt_spare *
1785 rts_better(struct rt_entry *rt)
1787 struct rt_spare *rts, *rts1;
1788 int i;
1790 /* find the best alternative among the spares */
1791 rts = rt->rt_spares+1;
1792 for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
1793 if (BETTER_LINK(rt,rts1,rts))
1794 rts = rts1;
1797 return rts;
1801 /* switch to a backup route
1803 void
1804 rtswitch(struct rt_entry *rt,
1805 struct rt_spare *rts)
1807 struct rt_spare swap;
1808 char label[10];
1811 /* Do not change permanent routes */
1812 if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
1813 | RS_NET_SYN | RS_IF)))
1814 return;
1816 /* find the best alternative among the spares */
1817 if (rts == 0)
1818 rts = rts_better(rt);
1820 /* Do not bother if it is not worthwhile.
1822 if (!BETTER_LINK(rt, rts, rt->rt_spares))
1823 return;
1825 swap = rt->rt_spares[0];
1826 sprintf(label, "Use #%d", (int)(rts - rt->rt_spares));
1827 rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC), rts, label);
1828 if (swap.rts_metric == HOPCNT_INFINITY) {
1829 *rts = rts_empty;
1830 } else {
1831 *rts = swap;
1836 void
1837 rtdelete(struct rt_entry *rt)
1839 struct khash *k;
1842 if (TRACEACTIONS)
1843 trace_add_del("Del", rt);
1845 k = kern_find(rt->rt_dst, rt->rt_mask, 0);
1846 if (k != 0) {
1847 k->k_state |= KS_DELETE;
1848 need_kern.tv_sec = now.tv_sec;
1851 dst_sock.sin_addr.s_addr = rt->rt_dst;
1852 mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
1853 masktrim(&mask_sock);
1854 if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
1855 rhead)) {
1856 msglog("rnh_deladdr() failed");
1857 } else {
1858 free(rt);
1859 total_routes--;
1864 void
1865 rts_delete(struct rt_entry *rt,
1866 struct rt_spare *rts)
1868 trace_upslot(rt, rts, &rts_empty);
1869 *rts = rts_empty;
1873 /* Get rid of a bad route, and try to switch to a replacement.
1875 void
1876 rtbad(struct rt_entry *rt)
1878 struct rt_spare new;
1880 /* Poison the route */
1881 new = rt->rt_spares[0];
1882 new.rts_metric = HOPCNT_INFINITY;
1883 rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC), &new, 0);
1884 rtswitch(rt, 0);
1888 /* Junk a RS_NET_SYN or RS_LOCAL route,
1889 * unless it is needed by another interface.
1891 void
1892 rtbad_sub(struct rt_entry *rt)
1894 struct interface *ifp, *ifp1;
1895 struct intnet *intnetp;
1896 u_int state;
1899 ifp1 = 0;
1900 state = 0;
1902 if (rt->rt_state & RS_LOCAL) {
1903 /* Is this the route through loopback for the interface?
1904 * If so, see if it is used by any other interfaces, such
1905 * as a point-to-point interface with the same local address.
1907 for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1908 /* Retain it if another interface needs it.
1910 if (ifp->int_addr == rt->rt_ifp->int_addr) {
1911 state |= RS_LOCAL;
1912 ifp1 = ifp;
1913 break;
1919 if (!(state & RS_LOCAL)) {
1920 /* Retain RIPv1 logical network route if there is another
1921 * interface that justifies it.
1923 if (rt->rt_state & RS_NET_SYN) {
1924 for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1925 if ((ifp->int_state & IS_NEED_NET_SYN)
1926 && rt->rt_mask == ifp->int_std_mask
1927 && rt->rt_dst == ifp->int_std_addr) {
1928 state |= RS_NET_SYN;
1929 ifp1 = ifp;
1930 break;
1935 /* or if there is an authority route that needs it. */
1936 for (intnetp = intnets;
1937 intnetp != 0;
1938 intnetp = intnetp->intnet_next) {
1939 if (intnetp->intnet_addr == rt->rt_dst
1940 && intnetp->intnet_mask == rt->rt_mask) {
1941 state |= (RS_NET_SYN | RS_NET_INT);
1942 break;
1947 if (ifp1 != 0 || (state & RS_NET_SYN)) {
1948 struct rt_spare new = rt->rt_spares[0];
1949 new.rts_ifp = ifp1;
1950 rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
1951 &new, 0);
1952 } else {
1953 rtbad(rt);
1958 /* Called while walking the table looking for sick interfaces
1959 * or after a time change.
1961 /* ARGSUSED */
1963 walk_bad(struct radix_node *rn,
1964 struct walkarg *argp UNUSED)
1966 #define RT ((struct rt_entry *)rn)
1967 struct rt_spare *rts;
1968 int i;
1971 /* fix any spare routes through the interface
1973 rts = RT->rt_spares;
1974 for (i = NUM_SPARES; i != 1; i--) {
1975 rts++;
1976 if (rts->rts_metric < HOPCNT_INFINITY
1977 && (rts->rts_ifp == 0
1978 || (rts->rts_ifp->int_state & IS_BROKE)))
1979 rts_delete(RT, rts);
1982 /* Deal with the main route
1984 /* finished if it has been handled before or if its interface is ok
1986 if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
1987 return 0;
1989 /* Bad routes for other than interfaces are easy.
1991 if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
1992 rtbad(RT);
1993 return 0;
1996 rtbad_sub(RT);
1997 return 0;
1998 #undef RT
2002 /* Check the age of an individual route.
2004 /* ARGSUSED */
2005 static int
2006 walk_age(struct radix_node *rn,
2007 struct walkarg *argp UNUSED)
2009 #define RT ((struct rt_entry *)rn)
2010 struct interface *ifp;
2011 struct rt_spare *rts;
2012 int i;
2015 /* age all of the spare routes, including the primary route
2016 * currently in use
2018 rts = RT->rt_spares;
2019 for (i = NUM_SPARES; i != 0; i--, rts++) {
2021 ifp = rts->rts_ifp;
2022 if (i == NUM_SPARES) {
2023 if (!AGE_RT(RT->rt_state, ifp)) {
2024 /* Keep various things from deciding ageless
2025 * routes are stale
2027 rts->rts_time = now.tv_sec;
2028 continue;
2031 /* forget RIP routes after RIP has been turned off.
2033 if (rip_sock < 0) {
2034 rtdelete(RT);
2035 return 0;
2039 /* age failing routes
2041 if (age_bad_gate == rts->rts_gate
2042 && rts->rts_time >= now_stale) {
2043 rts->rts_time -= SUPPLY_INTERVAL;
2046 /* trash the spare routes when they go bad */
2047 if (rts->rts_metric < HOPCNT_INFINITY
2048 && now_garbage > rts->rts_time
2049 && i != NUM_SPARES)
2050 rts_delete(RT, rts);
2054 /* finished if the active route is still fresh */
2055 if (now_stale <= RT->rt_time)
2056 return 0;
2058 /* try to switch to an alternative */
2059 rtswitch(RT, 0);
2061 /* Delete a dead route after it has been publically mourned. */
2062 if (now_garbage > RT->rt_time) {
2063 rtdelete(RT);
2064 return 0;
2067 /* Start poisoning a bad route before deleting it. */
2068 if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
2069 struct rt_spare new = RT->rt_spares[0];
2070 new.rts_metric = HOPCNT_INFINITY;
2071 rtchange(RT, RT->rt_state, &new, 0);
2073 return 0;
2077 /* Watch for dead routes and interfaces.
2079 void
2080 age(naddr bad_gate)
2082 struct interface *ifp;
2083 int need_query = 0;
2085 /* If not listening to RIP, there is no need to age the routes in
2086 * the table.
2088 age_timer.tv_sec = (now.tv_sec
2089 + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
2091 /* Check for dead IS_REMOTE interfaces by timing their
2092 * transmissions.
2094 for (ifp = ifnet; ifp; ifp = ifp->int_next) {
2095 if (!(ifp->int_state & IS_REMOTE))
2096 continue;
2098 /* ignore unreachable remote interfaces */
2099 if (!check_remote(ifp))
2100 continue;
2102 /* Restore remote interface that has become reachable
2104 if (ifp->int_state & IS_BROKE)
2105 if_ok(ifp, "remote ");
2107 if (ifp->int_act_time != NEVER
2108 && now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
2109 msglog("remote interface %s to %s timed out after"
2110 " %ld:%ld",
2111 ifp->int_name,
2112 naddr_ntoa(ifp->int_dstaddr),
2113 (now.tv_sec - ifp->int_act_time)/60,
2114 (now.tv_sec - ifp->int_act_time)%60);
2115 if_sick(ifp);
2118 /* If we have not heard from the other router
2119 * recently, ask it.
2121 if (now.tv_sec >= ifp->int_query_time) {
2122 ifp->int_query_time = NEVER;
2123 need_query = 1;
2127 /* Age routes. */
2128 age_bad_gate = bad_gate;
2129 rn_walktree(rhead, walk_age, 0);
2131 /* delete old redirected routes to keep the kernel table small
2132 * and prevent blackholes
2134 del_redirects(bad_gate, now.tv_sec-STALE_TIME);
2136 /* Update the kernel routing table. */
2137 fix_kern();
2139 /* poke reticent remote gateways */
2140 if (need_query)
2141 rip_query();