Pre-2.0 release: Sync with HAMMER 64 - NFS and cross-device link fixes.
[dragonfly.git] / sbin / routed / table.c
blobf74110a9d21ebd738868508156abf49142932e25
1 /*
2 * Copyright (c) 1983, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgment:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
33 * $FreeBSD: src/sbin/routed/table.c,v 1.9.2.2 2000/08/14 17:00:04 sheldonh Exp $
34 * $DragonFly: src/sbin/routed/table.c,v 1.6 2005/03/16 21:21:34 cpressey Exp $
37 #include "defs.h"
39 #if !defined(sgi) && !defined(__NetBSD__)
40 static char sccsid[] __attribute__((unused)) = "@(#)tables.c 8.1 (Berkeley) 6/5/93";
41 #elif defined(__NetBSD__)
42 __RCSID("$NetBSD$");
43 #endif
44 #ident "$FreeBSD: src/sbin/routed/table.c,v 1.9.2.2 2000/08/14 17:00:04 sheldonh Exp $"
46 static struct rt_spare *rts_better(struct rt_entry *);
47 static struct rt_spare rts_empty = {0,0,0,HOPCNT_INFINITY,0,0,0};
48 static void set_need_flash(void);
49 #ifdef _HAVE_SIN_LEN
50 static void masktrim(struct sockaddr_in *ap);
51 #else
52 static void masktrim(struct sockaddr_in_new *ap);
53 #endif
56 struct radix_node_head *rhead; /* root of the radix tree */
58 int need_flash = 1; /* flash update needed
59 * start =1 to suppress the 1st
62 struct timeval age_timer; /* next check of old routes */
63 struct timeval need_kern = { /* need to update kernel table */
64 EPOCH+MIN_WAITTIME-1, 0
67 int stopint;
69 int total_routes;
71 /* zap any old routes through this gateway */
72 naddr age_bad_gate;
75 /* It is desirable to "aggregate" routes, to combine differing routes of
76 * the same metric and next hop into a common route with a smaller netmask
77 * or to suppress redundant routes, routes that add no information to
78 * routes with smaller netmasks.
80 * A route is redundant if and only if any and all routes with smaller
81 * but matching netmasks and nets are the same. Since routes are
82 * kept sorted in the radix tree, redundant routes always come second.
84 * There are two kinds of aggregations. First, two routes of the same bit
85 * mask and differing only in the least significant bit of the network
86 * number can be combined into a single route with a coarser mask.
88 * Second, a route can be suppressed in favor of another route with a more
89 * coarse mask provided no incompatible routes with intermediate masks
90 * are present. The second kind of aggregation involves suppressing routes.
91 * A route must not be suppressed if an incompatible route exists with
92 * an intermediate mask, since the suppressed route would be covered
93 * by the intermediate.
95 * This code relies on the radix tree walk encountering routes
96 * sorted first by address, with the smallest address first.
99 struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
101 /* #define DEBUG_AG */
102 #ifdef DEBUG_AG
103 #define CHECK_AG() {int acnt = 0; struct ag_info *cag; \
104 for (cag = ag_avail; cag != 0; cag = cag->ag_fine) \
105 acnt++; \
106 for (cag = ag_corsest; cag != 0; cag = cag->ag_fine) \
107 acnt++; \
108 if (acnt != NUM_AG_SLOTS) { \
109 fflush(stderr); \
110 abort(); \
113 #else
114 #define CHECK_AG()
115 #endif
118 /* Output the contents of an aggregation table slot.
119 * This function must always be immediately followed with the deletion
120 * of the target slot.
122 static void
123 ag_out(struct ag_info *ag,
124 void (*out)(struct ag_info *))
126 struct ag_info *ag_cors;
127 naddr bit;
130 /* Forget it if this route should not be output for split-horizon. */
131 if (ag->ag_state & AGS_SPLIT_HZ)
132 return;
134 /* If we output both the even and odd twins, then the immediate parent,
135 * if it is present, is redundant, unless the parent manages to
136 * aggregate into something coarser.
137 * On successive calls, this code detects the even and odd twins,
138 * and marks the parent.
140 * Note that the order in which the radix tree code emits routes
141 * ensures that the twins are seen before the parent is emitted.
143 ag_cors = ag->ag_cors;
144 if (ag_cors != 0
145 && ag_cors->ag_mask == ag->ag_mask<<1
146 && ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
147 ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
148 ? AGS_REDUN0
149 : AGS_REDUN1);
152 /* Skip it if this route is itself redundant.
154 * It is ok to change the contents of the slot here, since it is
155 * always deleted next.
157 if (ag->ag_state & AGS_REDUN0) {
158 if (ag->ag_state & AGS_REDUN1)
159 return; /* quit if fully redundant */
160 /* make it finer if it is half-redundant */
161 bit = (-ag->ag_mask) >> 1;
162 ag->ag_dst_h |= bit;
163 ag->ag_mask |= bit;
165 } else if (ag->ag_state & AGS_REDUN1) {
166 /* make it finer if it is half-redundant */
167 bit = (-ag->ag_mask) >> 1;
168 ag->ag_mask |= bit;
170 out(ag);
174 static void
175 ag_del(struct ag_info *ag)
177 CHECK_AG();
179 if (ag->ag_cors == 0)
180 ag_corsest = ag->ag_fine;
181 else
182 ag->ag_cors->ag_fine = ag->ag_fine;
184 if (ag->ag_fine == 0)
185 ag_finest = ag->ag_cors;
186 else
187 ag->ag_fine->ag_cors = ag->ag_cors;
189 ag->ag_fine = ag_avail;
190 ag_avail = ag;
192 CHECK_AG();
196 /* Flush routes waiting for aggregation.
197 * This must not suppress a route unless it is known that among all
198 * routes with coarser masks that match it, the one with the longest
199 * mask is appropriate. This is ensured by scanning the routes
200 * in lexical order, and with the most restrictive mask first
201 * among routes to the same destination.
203 void
204 ag_flush(naddr lim_dst_h, /* flush routes to here */
205 naddr lim_mask, /* matching this mask */
206 void (*out)(struct ag_info *))
208 struct ag_info *ag, *ag_cors;
209 naddr dst_h;
212 for (ag = ag_finest;
213 ag != 0 && ag->ag_mask >= lim_mask;
214 ag = ag_cors) {
215 ag_cors = ag->ag_cors;
217 /* work on only the specified routes */
218 dst_h = ag->ag_dst_h;
219 if ((dst_h & lim_mask) != lim_dst_h)
220 continue;
222 if (!(ag->ag_state & AGS_SUPPRESS))
223 ag_out(ag, out);
225 else for ( ; ; ag_cors = ag_cors->ag_cors) {
226 /* Look for a route that can suppress the
227 * current route */
228 if (ag_cors == 0) {
229 /* failed, so output it and look for
230 * another route to work on
232 ag_out(ag, out);
233 break;
236 if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
237 /* We found a route with a coarser mask that
238 * aggregates the current target.
240 * If it has a different next hop, it
241 * cannot replace the target, so output
242 * the target.
244 if (ag->ag_gate != ag_cors->ag_gate
245 && !(ag->ag_state & AGS_FINE_GATE)
246 && !(ag_cors->ag_state & AGS_CORS_GATE)) {
247 ag_out(ag, out);
248 break;
251 /* If the coarse route has a good enough
252 * metric, it suppresses the target.
253 * If the suppressed target was redundant,
254 * then mark the suppressor redundant.
256 if (ag_cors->ag_pref <= ag->ag_pref) {
257 if (ag_cors->ag_seqno > ag->ag_seqno)
258 ag_cors->ag_seqno = ag->ag_seqno;
259 if (AG_IS_REDUN(ag->ag_state)
260 && ag_cors->ag_mask==ag->ag_mask<<1) {
261 if (ag_cors->ag_dst_h == dst_h)
262 ag_cors->ag_state |= AGS_REDUN0;
263 else
264 ag_cors->ag_state |= AGS_REDUN1;
266 if (ag->ag_tag != ag_cors->ag_tag)
267 ag_cors->ag_tag = 0;
268 if (ag->ag_nhop != ag_cors->ag_nhop)
269 ag_cors->ag_nhop = 0;
270 break;
275 /* That route has either been output or suppressed */
276 ag_cors = ag->ag_cors;
277 ag_del(ag);
280 CHECK_AG();
284 /* Try to aggregate a route with previous routes.
286 void
287 ag_check(naddr dst,
288 naddr mask,
289 naddr gate,
290 naddr nhop,
291 char metric,
292 char pref,
293 u_int seqnum,
294 u_short tag,
295 u_short state,
296 void (*out)(struct ag_info *)) /* output using this */
298 struct ag_info *ag, *nag, *ag_cors;
299 naddr xaddr;
300 int x;
302 dst = ntohl(dst);
304 /* Punt non-contiguous subnet masks.
306 * (X & -X) contains a single bit if and only if X is a power of 2.
307 * (X + (X & -X)) == 0 if and only if X is a power of 2.
309 if ((mask & -mask) + mask != 0) {
310 struct ag_info nc_ag;
312 nc_ag.ag_dst_h = dst;
313 nc_ag.ag_mask = mask;
314 nc_ag.ag_gate = gate;
315 nc_ag.ag_nhop = nhop;
316 nc_ag.ag_metric = metric;
317 nc_ag.ag_pref = pref;
318 nc_ag.ag_tag = tag;
319 nc_ag.ag_state = state;
320 nc_ag.ag_seqno = seqnum;
321 out(&nc_ag);
322 return;
325 /* Search for the right slot in the aggregation table.
327 ag_cors = 0;
328 ag = ag_corsest;
329 while (ag != 0) {
330 if (ag->ag_mask >= mask)
331 break;
333 /* Suppress old routes (i.e. combine with compatible routes
334 * with coarser masks) as we look for the right slot in the
335 * aggregation table for the new route.
336 * A route to an address less than the current destination
337 * will not be affected by the current route or any route
338 * seen hereafter. That means it is safe to suppress it.
339 * This check keeps poor routes (e.g. with large hop counts)
340 * from preventing suppression of finer routes.
342 if (ag_cors != 0
343 && ag->ag_dst_h < dst
344 && (ag->ag_state & AGS_SUPPRESS)
345 && ag_cors->ag_pref <= ag->ag_pref
346 && (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
347 && (ag_cors->ag_gate == ag->ag_gate
348 || (ag->ag_state & AGS_FINE_GATE)
349 || (ag_cors->ag_state & AGS_CORS_GATE))) {
350 if (ag_cors->ag_seqno > ag->ag_seqno)
351 ag_cors->ag_seqno = ag->ag_seqno;
352 /* If the suppressed target was redundant,
353 * then mark the suppressor redundant.
355 if (AG_IS_REDUN(ag->ag_state)
356 && ag_cors->ag_mask == ag->ag_mask<<1) {
357 if (ag_cors->ag_dst_h == dst)
358 ag_cors->ag_state |= AGS_REDUN0;
359 else
360 ag_cors->ag_state |= AGS_REDUN1;
362 if (ag->ag_tag != ag_cors->ag_tag)
363 ag_cors->ag_tag = 0;
364 if (ag->ag_nhop != ag_cors->ag_nhop)
365 ag_cors->ag_nhop = 0;
366 ag_del(ag);
367 CHECK_AG();
368 } else {
369 ag_cors = ag;
371 ag = ag_cors->ag_fine;
374 /* If we find the even/odd twin of the new route, and if the
375 * masks and so forth are equal, we can aggregate them.
376 * We can probably promote one of the pair.
378 * Since the routes are encountered in lexical order,
379 * the new route must be odd. However, the second or later
380 * times around this loop, it could be the even twin promoted
381 * from the even/odd pair of twins of the finer route.
383 while (ag != 0
384 && ag->ag_mask == mask
385 && ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
387 /* Here we know the target route and the route in the current
388 * slot have the same netmasks and differ by at most the
389 * last bit. They are either for the same destination, or
390 * for an even/odd pair of destinations.
392 if (ag->ag_dst_h == dst) {
393 /* We have two routes to the same destination.
394 * Routes are encountered in lexical order, so a
395 * route is never promoted until the parent route is
396 * already present. So we know that the new route is
397 * a promoted (or aggregated) pair and the route
398 * already in the slot is the explicit route.
400 * Prefer the best route if their metrics differ,
401 * or the aggregated one if not, following a sort
402 * of longest-match rule.
404 if (pref <= ag->ag_pref) {
405 ag->ag_gate = gate;
406 ag->ag_nhop = nhop;
407 ag->ag_tag = tag;
408 ag->ag_metric = metric;
409 ag->ag_pref = pref;
410 x = ag->ag_state;
411 ag->ag_state = state;
412 state = x;
415 /* The sequence number controls flash updating,
416 * and should be the smaller of the two.
418 if (ag->ag_seqno > seqnum)
419 ag->ag_seqno = seqnum;
421 /* Some bits are set if they are set on either route,
422 * except when the route is for an interface.
424 if (!(ag->ag_state & AGS_IF))
425 ag->ag_state |= (state & (AGS_AGGREGATE_EITHER
426 | AGS_REDUN0
427 | AGS_REDUN1));
428 return;
431 /* If one of the routes can be promoted and the other can
432 * be suppressed, it may be possible to combine them or
433 * worthwhile to promote one.
435 * Any route that can be promoted is always
436 * marked to be eligible to be suppressed.
438 if (!((state & AGS_AGGREGATE)
439 && (ag->ag_state & AGS_SUPPRESS))
440 && !((ag->ag_state & AGS_AGGREGATE)
441 && (state & AGS_SUPPRESS)))
442 break;
444 /* A pair of even/odd twin routes can be combined
445 * if either is redundant, or if they are via the
446 * same gateway and have the same metric.
448 if (AG_IS_REDUN(ag->ag_state)
449 || AG_IS_REDUN(state)
450 || (ag->ag_gate == gate
451 && ag->ag_pref == pref
452 && (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
454 /* We have both the even and odd pairs.
455 * Since the routes are encountered in order,
456 * the route in the slot must be the even twin.
458 * Combine and promote (aggregate) the pair of routes.
460 if (seqnum > ag->ag_seqno)
461 seqnum = ag->ag_seqno;
462 if (!AG_IS_REDUN(state))
463 state &= ~AGS_REDUN1;
464 if (AG_IS_REDUN(ag->ag_state))
465 state |= AGS_REDUN0;
466 else
467 state &= ~AGS_REDUN0;
468 state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
469 if (ag->ag_tag != tag)
470 tag = 0;
471 if (ag->ag_nhop != nhop)
472 nhop = 0;
474 /* Get rid of the even twin that was already
475 * in the slot.
477 ag_del(ag);
479 } else if (ag->ag_pref >= pref
480 && (ag->ag_state & AGS_AGGREGATE)) {
481 /* If we cannot combine the pair, maybe the route
482 * with the worse metric can be promoted.
484 * Promote the old, even twin, by giving its slot
485 * in the table to the new, odd twin.
487 ag->ag_dst_h = dst;
489 xaddr = ag->ag_gate;
490 ag->ag_gate = gate;
491 gate = xaddr;
493 xaddr = ag->ag_nhop;
494 ag->ag_nhop = nhop;
495 nhop = xaddr;
497 x = ag->ag_tag;
498 ag->ag_tag = tag;
499 tag = x;
501 /* The promoted route is even-redundant only if the
502 * even twin was fully redundant. It is not
503 * odd-redundant because the odd-twin will still be
504 * in the table.
506 x = ag->ag_state;
507 if (!AG_IS_REDUN(x))
508 x &= ~AGS_REDUN0;
509 x &= ~AGS_REDUN1;
510 ag->ag_state = state;
511 state = x;
513 x = ag->ag_metric;
514 ag->ag_metric = metric;
515 metric = x;
517 x = ag->ag_pref;
518 ag->ag_pref = pref;
519 pref = x;
521 /* take the newest sequence number */
522 if (seqnum >= ag->ag_seqno)
523 seqnum = ag->ag_seqno;
524 else
525 ag->ag_seqno = seqnum;
527 } else {
528 if (!(state & AGS_AGGREGATE))
529 break; /* cannot promote either twin */
531 /* Promote the new, odd twin by shaving its
532 * mask and address.
533 * The promoted route is odd-redundant only if the
534 * odd twin was fully redundant. It is not
535 * even-redundant because the even twin is still in
536 * the table.
538 if (!AG_IS_REDUN(state))
539 state &= ~AGS_REDUN1;
540 state &= ~AGS_REDUN0;
541 if (seqnum > ag->ag_seqno)
542 seqnum = ag->ag_seqno;
543 else
544 ag->ag_seqno = seqnum;
547 mask <<= 1;
548 dst &= mask;
550 if (ag_cors == 0) {
551 ag = ag_corsest;
552 break;
554 ag = ag_cors;
555 ag_cors = ag->ag_cors;
558 /* When we can no longer promote and combine routes,
559 * flush the old route in the target slot. Also flush
560 * any finer routes that we know will never be aggregated by
561 * the new route.
563 * In case we moved toward coarser masks,
564 * get back where we belong
566 if (ag != 0
567 && ag->ag_mask < mask) {
568 ag_cors = ag;
569 ag = ag->ag_fine;
572 /* Empty the target slot
574 if (ag != 0 && ag->ag_mask == mask) {
575 ag_flush(ag->ag_dst_h, ag->ag_mask, out);
576 ag = (ag_cors == 0) ? ag_corsest : ag_cors->ag_fine;
579 #ifdef DEBUG_AG
580 fflush(stderr);
581 if (ag == 0 && ag_cors != ag_finest)
582 abort();
583 if (ag_cors == 0 && ag != ag_corsest)
584 abort();
585 if (ag != 0 && ag->ag_cors != ag_cors)
586 abort();
587 if (ag_cors != 0 && ag_cors->ag_fine != ag)
588 abort();
589 CHECK_AG();
590 #endif
592 /* Save the new route on the end of the table.
594 nag = ag_avail;
595 ag_avail = nag->ag_fine;
597 nag->ag_dst_h = dst;
598 nag->ag_mask = mask;
599 nag->ag_gate = gate;
600 nag->ag_nhop = nhop;
601 nag->ag_metric = metric;
602 nag->ag_pref = pref;
603 nag->ag_tag = tag;
604 nag->ag_state = state;
605 nag->ag_seqno = seqnum;
607 nag->ag_fine = ag;
608 if (ag != 0)
609 ag->ag_cors = nag;
610 else
611 ag_finest = nag;
612 nag->ag_cors = ag_cors;
613 if (ag_cors == 0)
614 ag_corsest = nag;
615 else
616 ag_cors->ag_fine = nag;
617 CHECK_AG();
621 #define NAME0_LEN 14
622 static const char *
623 rtm_type_name(u_char type)
625 static const char *rtm_types[] = {
626 "RTM_ADD",
627 "RTM_DELETE",
628 "RTM_CHANGE",
629 "RTM_GET",
630 "RTM_LOSING",
631 "RTM_REDIRECT",
632 "RTM_MISS",
633 "RTM_LOCK",
634 "RTM_OLDADD",
635 "RTM_OLDDEL",
636 "RTM_RESOLVE",
637 "RTM_NEWADDR",
638 "RTM_DELADDR",
639 "RTM_IFINFO",
640 "RTM_NEWMADDR",
641 "RTM_DELMADDR"
643 #define NEW_RTM_PAT "RTM type %#x"
644 static char name0[sizeof(NEW_RTM_PAT)+2];
647 if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
648 || type == 0) {
649 snprintf(name0, sizeof(name0), NEW_RTM_PAT, type);
650 return name0;
651 } else {
652 return rtm_types[type-1];
654 #undef NEW_RTM_PAT
658 /* Trim a mask in a sockaddr
659 * Produce a length of 0 for an address of 0.
660 * Otherwise produce the index of the first zero byte.
662 void
663 #ifdef _HAVE_SIN_LEN
664 masktrim(struct sockaddr_in *ap)
665 #else
666 masktrim(struct sockaddr_in_new *ap)
667 #endif
669 char *cp;
671 if (ap->sin_addr.s_addr == 0) {
672 ap->sin_len = 0;
673 return;
675 cp = (char *)(&ap->sin_addr.s_addr+1);
676 while (*--cp == 0)
677 continue;
678 ap->sin_len = cp - (char*)ap + 1;
682 /* Tell the kernel to add, delete or change a route
684 static void
685 rtioctl(int action, /* RTM_DELETE, etc */
686 naddr dst,
687 naddr gate,
688 naddr mask,
689 int metric,
690 int flags)
692 struct {
693 struct rt_msghdr w_rtm;
694 struct sockaddr_in w_dst;
695 struct sockaddr_in w_gate;
696 #ifdef _HAVE_SA_LEN
697 struct sockaddr_in w_mask;
698 #else
699 struct sockaddr_in_new w_mask;
700 #endif
701 } w;
702 long cc;
703 # define PAT " %-10s %s metric=%d flags=%#x"
704 # define ARGS rtm_type_name(action), rtname(dst,mask,gate), metric, flags
706 again:
707 memset(&w, 0, sizeof(w));
708 w.w_rtm.rtm_msglen = sizeof(w);
709 w.w_rtm.rtm_version = RTM_VERSION;
710 w.w_rtm.rtm_type = action;
711 w.w_rtm.rtm_flags = flags;
712 w.w_rtm.rtm_seq = ++rt_sock_seqno;
713 w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
714 if (metric != 0 || action == RTM_CHANGE) {
715 w.w_rtm.rtm_rmx.rmx_hopcount = metric;
716 w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
718 w.w_dst.sin_family = AF_INET;
719 w.w_dst.sin_addr.s_addr = dst;
720 w.w_gate.sin_family = AF_INET;
721 w.w_gate.sin_addr.s_addr = gate;
722 #ifdef _HAVE_SA_LEN
723 w.w_dst.sin_len = sizeof(w.w_dst);
724 w.w_gate.sin_len = sizeof(w.w_gate);
725 #endif
726 if (mask == HOST_MASK) {
727 w.w_rtm.rtm_flags |= RTF_HOST;
728 w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
729 } else {
730 w.w_rtm.rtm_addrs |= RTA_NETMASK;
731 w.w_mask.sin_addr.s_addr = htonl(mask);
732 #ifdef _HAVE_SA_LEN
733 masktrim(&w.w_mask);
734 if (w.w_mask.sin_len == 0)
735 w.w_mask.sin_len = sizeof(long);
736 w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
737 #endif
740 #ifndef NO_INSTALL
741 cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
742 if (cc < 0) {
743 if (errno == ESRCH
744 && (action == RTM_CHANGE || action == RTM_DELETE)) {
745 trace_act("route disappeared before" PAT, ARGS);
746 if (action == RTM_CHANGE) {
747 action = RTM_ADD;
748 goto again;
750 return;
752 msglog("write(rt_sock)" PAT ": %s", ARGS, strerror(errno));
753 return;
754 } else if (cc != w.w_rtm.rtm_msglen) {
755 msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
756 cc, w.w_rtm.rtm_msglen, ARGS);
757 return;
759 #endif
760 if (TRACEKERNEL)
761 trace_misc("write kernel" PAT, ARGS);
762 #undef PAT
763 #undef ARGS
767 #define KHASH_SIZE 71 /* should be prime */
768 #define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
769 static struct khash {
770 struct khash *k_next;
771 naddr k_dst;
772 naddr k_mask;
773 naddr k_gate;
774 short k_metric;
775 u_short k_state;
776 #define KS_NEW 0x001
777 #define KS_DELETE 0x002 /* need to delete the route */
778 #define KS_ADD 0x004 /* add to the kernel */
779 #define KS_CHANGE 0x008 /* tell kernel to change the route */
780 #define KS_DEL_ADD 0x010 /* delete & add to change the kernel */
781 #define KS_STATIC 0x020 /* Static flag in kernel */
782 #define KS_GATEWAY 0x040 /* G flag in kernel */
783 #define KS_DYNAMIC 0x080 /* result of redirect */
784 #define KS_DELETED 0x100 /* already deleted from kernel */
785 #define KS_CHECK 0x200
786 time_t k_keep;
787 #define K_KEEP_LIM 30
788 time_t k_redirect_time; /* when redirected route 1st seen */
789 } *khash_bins[KHASH_SIZE];
792 static struct khash*
793 kern_find(naddr dst, naddr mask, struct khash ***ppk)
795 struct khash *k, **pk;
797 for (pk = &KHASH(dst,mask); (k = *pk) != 0; pk = &k->k_next) {
798 if (k->k_dst == dst && k->k_mask == mask)
799 break;
801 if (ppk != 0)
802 *ppk = pk;
803 return k;
807 static struct khash*
808 kern_add(naddr dst, naddr mask)
810 struct khash *k, **pk;
812 k = kern_find(dst, mask, &pk);
813 if (k != 0)
814 return k;
816 k = (struct khash *)rtmalloc(sizeof(*k), "kern_add");
818 memset(k, 0, sizeof(*k));
819 k->k_dst = dst;
820 k->k_mask = mask;
821 k->k_state = KS_NEW;
822 k->k_keep = now.tv_sec;
823 *pk = k;
825 return k;
829 /* If a kernel route has a non-zero metric, check that it is still in the
830 * daemon table, and not deleted by interfaces coming and going.
832 static void
833 kern_check_static(struct khash *k,
834 struct interface *ifp)
836 struct rt_entry *rt;
837 struct rt_spare new;
839 if (k->k_metric == 0)
840 return;
842 memset(&new, 0, sizeof(new));
843 new.rts_ifp = ifp;
844 new.rts_gate = k->k_gate;
845 new.rts_router = (ifp != 0) ? ifp->int_addr : loopaddr;
846 new.rts_metric = k->k_metric;
847 new.rts_time = now.tv_sec;
849 rt = rtget(k->k_dst, k->k_mask);
850 if (rt != 0) {
851 if (!(rt->rt_state & RS_STATIC))
852 rtchange(rt, rt->rt_state | RS_STATIC, &new, 0);
853 } else {
854 rtadd(k->k_dst, k->k_mask, RS_STATIC, &new);
859 /* operate on a kernel entry
861 static void
862 kern_ioctl(struct khash *k,
863 int action, /* RTM_DELETE, etc */
864 int flags)
867 switch (action) {
868 case RTM_DELETE:
869 k->k_state &= ~KS_DYNAMIC;
870 if (k->k_state & KS_DELETED)
871 return;
872 k->k_state |= KS_DELETED;
873 break;
874 case RTM_ADD:
875 k->k_state &= ~KS_DELETED;
876 break;
877 case RTM_CHANGE:
878 if (k->k_state & KS_DELETED) {
879 action = RTM_ADD;
880 k->k_state &= ~KS_DELETED;
882 break;
885 rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_metric, flags);
889 /* add a route the kernel told us
891 static void
892 rtm_add(struct rt_msghdr *rtm,
893 struct rt_addrinfo *info,
894 time_t keep)
896 struct khash *k;
897 struct interface *ifp;
898 naddr mask;
901 if (rtm->rtm_flags & RTF_HOST) {
902 mask = HOST_MASK;
903 } else if (INFO_MASK(info) != 0) {
904 mask = ntohl(S_ADDR(INFO_MASK(info)));
905 } else {
906 msglog("ignore %s without mask", rtm_type_name(rtm->rtm_type));
907 return;
910 k = kern_add(S_ADDR(INFO_DST(info)), mask);
911 if (k->k_state & KS_NEW)
912 k->k_keep = now.tv_sec+keep;
913 if (INFO_GATE(info) == 0) {
914 trace_act("note %s without gateway",
915 rtm_type_name(rtm->rtm_type));
916 k->k_metric = HOPCNT_INFINITY;
917 } else if (INFO_GATE(info)->sa_family != AF_INET) {
918 trace_act("note %s with gateway AF=%d",
919 rtm_type_name(rtm->rtm_type),
920 INFO_GATE(info)->sa_family);
921 k->k_metric = HOPCNT_INFINITY;
922 } else {
923 k->k_gate = S_ADDR(INFO_GATE(info));
924 k->k_metric = rtm->rtm_rmx.rmx_hopcount;
925 if (k->k_metric < 0)
926 k->k_metric = 0;
927 else if (k->k_metric > HOPCNT_INFINITY-1)
928 k->k_metric = HOPCNT_INFINITY-1;
930 k->k_state &= ~(KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD
931 | KS_DELETED | KS_GATEWAY | KS_STATIC
932 | KS_NEW | KS_CHECK);
933 if (rtm->rtm_flags & RTF_GATEWAY)
934 k->k_state |= KS_GATEWAY;
935 if (rtm->rtm_flags & RTF_STATIC)
936 k->k_state |= KS_STATIC;
938 if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
939 if (INFO_AUTHOR(info) != 0
940 && INFO_AUTHOR(info)->sa_family == AF_INET)
941 ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
942 else
943 ifp = 0;
944 if (supplier
945 && (ifp == 0 || !(ifp->int_state & IS_REDIRECT_OK))) {
946 /* Routers are not supposed to listen to redirects,
947 * so delete it if it came via an unknown interface
948 * or the interface does not have special permission.
950 k->k_state &= ~KS_DYNAMIC;
951 k->k_state |= KS_DELETE;
952 LIM_SEC(need_kern, 0);
953 trace_act("mark for deletion redirected %s --> %s"
954 " via %s",
955 addrname(k->k_dst, k->k_mask, 0),
956 naddr_ntoa(k->k_gate),
957 ifp ? ifp->int_name : "unknown interface");
958 } else {
959 k->k_state |= KS_DYNAMIC;
960 k->k_redirect_time = now.tv_sec;
961 trace_act("accept redirected %s --> %s via %s",
962 addrname(k->k_dst, k->k_mask, 0),
963 naddr_ntoa(k->k_gate),
964 ifp ? ifp->int_name : "unknown interface");
966 return;
969 /* If it is not a static route, quit until the next comparison
970 * between the kernel and daemon tables, when it will be deleted.
972 if (!(k->k_state & KS_STATIC)) {
973 k->k_state |= KS_DELETE;
974 LIM_SEC(need_kern, k->k_keep);
975 return;
978 /* Put static routes with real metrics into the daemon table so
979 * they can be advertised.
981 * Find the interface toward the gateway.
983 ifp = iflookup(k->k_gate);
984 if (ifp == 0)
985 msglog("static route %s --> %s impossibly lacks ifp",
986 addrname(S_ADDR(INFO_DST(info)), mask, 0),
987 naddr_ntoa(k->k_gate));
989 kern_check_static(k, ifp);
993 /* deal with packet loss
995 static void
996 rtm_lose(struct rt_msghdr *rtm,
997 struct rt_addrinfo *info)
999 if (INFO_GATE(info) == 0
1000 || INFO_GATE(info)->sa_family != AF_INET) {
1001 trace_act("ignore %s without gateway",
1002 rtm_type_name(rtm->rtm_type));
1003 return;
1006 if (rdisc_ok)
1007 rdisc_age(S_ADDR(INFO_GATE(info)));
1008 age(S_ADDR(INFO_GATE(info)));
1012 /* Make the gateway slot of an info structure point to something
1013 * useful. If it is not already useful, but it specifies an interface,
1014 * then fill in the sockaddr_in provided and point it there.
1016 static int
1017 get_info_gate(struct sockaddr **sap,
1018 struct sockaddr_in *in)
1020 struct sockaddr_dl *sdl = (struct sockaddr_dl *)*sap;
1021 struct interface *ifp;
1023 if (sdl == 0)
1024 return 0;
1025 if ((sdl)->sdl_family == AF_INET)
1026 return 1;
1027 if ((sdl)->sdl_family != AF_LINK)
1028 return 0;
1030 ifp = ifwithindex(sdl->sdl_index, 1);
1031 if (ifp == 0)
1032 return 0;
1034 in->sin_addr.s_addr = ifp->int_addr;
1035 #ifdef _HAVE_SA_LEN
1036 in->sin_len = sizeof(*in);
1037 #endif
1038 in->sin_family = AF_INET;
1039 *sap = (struct sockaddr *)in;
1041 return 1;
1045 /* Clean the kernel table by copying it to the daemon image.
1046 * Eventually the daemon will delete any extra routes.
1048 void
1049 flush_kern(void)
1051 static char *sysctl_buf;
1052 static size_t sysctl_buf_size = 0;
1053 size_t needed;
1054 int mib[6];
1055 char *next, *lim;
1056 struct rt_msghdr *rtm;
1057 struct sockaddr_in gate_sin;
1058 struct rt_addrinfo info;
1059 int i;
1060 struct khash *k;
1063 for (i = 0; i < KHASH_SIZE; i++) {
1064 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1065 k->k_state |= KS_CHECK;
1069 mib[0] = CTL_NET;
1070 mib[1] = PF_ROUTE;
1071 mib[2] = 0; /* protocol */
1072 mib[3] = 0; /* wildcard address family */
1073 mib[4] = NET_RT_DUMP;
1074 mib[5] = 0; /* no flags */
1075 for (;;) {
1076 if ((needed = sysctl_buf_size) != 0) {
1077 if (sysctl(mib, 6, sysctl_buf,&needed, 0, 0) >= 0)
1078 break;
1079 if (errno != ENOMEM && errno != EFAULT)
1080 BADERR(1,"flush_kern: sysctl(RT_DUMP)");
1081 free(sysctl_buf);
1082 needed = 0;
1084 if (sysctl(mib, 6, 0, &needed, 0, 0) < 0)
1085 BADERR(1,"flush_kern: sysctl(RT_DUMP) estimate");
1086 /* Kludge around the habit of some systems, such as
1087 * BSD/OS 3.1, to not admit how many routes are in the
1088 * kernel, or at least to be quite wrong.
1090 needed += 50*(sizeof(*rtm)+5*sizeof(struct sockaddr));
1091 sysctl_buf = rtmalloc(sysctl_buf_size = needed,
1092 "flush_kern sysctl(RT_DUMP)");
1095 lim = sysctl_buf + needed;
1096 for (next = sysctl_buf; next < lim; next += rtm->rtm_msglen) {
1097 rtm = (struct rt_msghdr *)next;
1098 if (rtm->rtm_msglen == 0) {
1099 msglog("zero length kernel route at "
1100 " %#lx in buffer %#lx before %#lx",
1101 (u_long)rtm, (u_long)sysctl_buf, (u_long)lim);
1102 break;
1105 rt_xaddrs(&info,
1106 (struct sockaddr *)(rtm+1),
1107 (struct sockaddr *)(next + rtm->rtm_msglen),
1108 rtm->rtm_addrs);
1110 if (INFO_DST(&info) == 0
1111 || INFO_DST(&info)->sa_family != AF_INET)
1112 continue;
1114 /* ignore ARP table entries on systems with a merged route
1115 * and ARP table.
1117 if (rtm->rtm_flags & RTF_LLINFO)
1118 continue;
1120 /* ignore multicast addresses
1122 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
1123 continue;
1125 if (!get_info_gate(&INFO_GATE(&info), &gate_sin))
1126 continue;
1128 /* Note static routes and interface routes, and also
1129 * preload the image of the kernel table so that
1130 * we can later clean it, as well as avoid making
1131 * unneeded changes. Keep the old kernel routes for a
1132 * few seconds to allow a RIP or router-discovery
1133 * response to be heard.
1135 rtm_add(rtm,&info,MIN_WAITTIME);
1138 for (i = 0; i < KHASH_SIZE; i++) {
1139 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1140 if (k->k_state & KS_CHECK) {
1141 msglog("%s --> %s disappeared from kernel",
1142 addrname(k->k_dst, k->k_mask, 0),
1143 naddr_ntoa(k->k_gate));
1144 del_static(k->k_dst, k->k_mask, k->k_gate, 1);
1151 /* Listen to announcements from the kernel
1153 void
1154 read_rt(void)
1156 long cc;
1157 struct interface *ifp;
1158 struct sockaddr_in gate_sin;
1159 naddr mask, gate;
1160 union {
1161 struct {
1162 struct rt_msghdr rtm;
1163 struct sockaddr addrs[RTAX_MAX];
1164 } r;
1165 struct if_msghdr ifm;
1166 } m;
1167 char str[100], *strp;
1168 struct rt_addrinfo info;
1171 for (;;) {
1172 cc = read(rt_sock, &m, sizeof(m));
1173 if (cc <= 0) {
1174 if (cc < 0 && errno != EWOULDBLOCK)
1175 LOGERR("read(rt_sock)");
1176 return;
1179 if (m.r.rtm.rtm_version != RTM_VERSION) {
1180 msglog("bogus routing message version %d",
1181 m.r.rtm.rtm_version);
1182 continue;
1185 /* Ignore our own results.
1187 if (m.r.rtm.rtm_type <= RTM_CHANGE
1188 && m.r.rtm.rtm_pid == mypid) {
1189 static int complained = 0;
1190 if (!complained) {
1191 msglog("receiving our own change messages");
1192 complained = 1;
1194 continue;
1197 if (m.r.rtm.rtm_type == RTM_IFINFO
1198 || m.r.rtm.rtm_type == RTM_NEWADDR
1199 || m.r.rtm.rtm_type == RTM_DELADDR) {
1200 ifp = ifwithindex(m.ifm.ifm_index,
1201 m.r.rtm.rtm_type != RTM_DELADDR);
1202 if (ifp == 0)
1203 trace_act("note %s with flags %#x"
1204 " for unknown interface index #%d",
1205 rtm_type_name(m.r.rtm.rtm_type),
1206 m.ifm.ifm_flags,
1207 m.ifm.ifm_index);
1208 else
1209 trace_act("note %s with flags %#x for %s",
1210 rtm_type_name(m.r.rtm.rtm_type),
1211 m.ifm.ifm_flags,
1212 ifp->int_name);
1214 /* After being informed of a change to an interface,
1215 * check them all now if the check would otherwise
1216 * be a long time from now, if the interface is
1217 * not known, or if the interface has been turned
1218 * off or on.
1220 if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
1221 || ifp == 0
1222 || ((ifp->int_if_flags ^ m.ifm.ifm_flags)
1223 & IFF_UP) != 0)
1224 ifinit_timer.tv_sec = now.tv_sec;
1225 continue;
1228 strcpy(str, rtm_type_name(m.r.rtm.rtm_type));
1229 strp = &str[strlen(str)];
1230 if (m.r.rtm.rtm_type <= RTM_CHANGE)
1231 strp += sprintf(strp," from pid %d",m.r.rtm.rtm_pid);
1233 rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
1234 m.r.rtm.rtm_addrs);
1236 if (INFO_DST(&info) == 0) {
1237 trace_act("ignore %s without dst", str);
1238 continue;
1241 if (INFO_DST(&info)->sa_family != AF_INET) {
1242 trace_act("ignore %s for AF %d", str,
1243 INFO_DST(&info)->sa_family);
1244 continue;
1247 mask = ((INFO_MASK(&info) != 0)
1248 ? ntohl(S_ADDR(INFO_MASK(&info)))
1249 : (m.r.rtm.rtm_flags & RTF_HOST)
1250 ? HOST_MASK
1251 : std_mask(S_ADDR(INFO_DST(&info))));
1253 strp += sprintf(strp, ": %s",
1254 addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1256 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
1257 trace_act("ignore multicast %s", str);
1258 continue;
1261 if (m.r.rtm.rtm_flags & RTF_LLINFO) {
1262 trace_act("ignore ARP %s", str);
1263 continue;
1266 if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
1267 gate = S_ADDR(INFO_GATE(&info));
1268 strp += sprintf(strp, " --> %s", naddr_ntoa(gate));
1269 } else {
1270 gate = 0;
1273 if (INFO_AUTHOR(&info) != 0)
1274 strp += sprintf(strp, " by authority of %s",
1275 saddr_ntoa(INFO_AUTHOR(&info)));
1277 switch (m.r.rtm.rtm_type) {
1278 case RTM_ADD:
1279 case RTM_CHANGE:
1280 case RTM_REDIRECT:
1281 if (m.r.rtm.rtm_errno != 0) {
1282 trace_act("ignore %s with \"%s\" error",
1283 str, strerror(m.r.rtm.rtm_errno));
1284 } else {
1285 trace_act("%s", str);
1286 rtm_add(&m.r.rtm,&info,0);
1288 break;
1290 case RTM_DELETE:
1291 if (m.r.rtm.rtm_errno != 0
1292 && m.r.rtm.rtm_errno != ESRCH) {
1293 trace_act("ignore %s with \"%s\" error",
1294 str, strerror(m.r.rtm.rtm_errno));
1295 } else {
1296 trace_act("%s", str);
1297 del_static(S_ADDR(INFO_DST(&info)), mask,
1298 gate, 1);
1300 break;
1302 case RTM_LOSING:
1303 trace_act("%s", str);
1304 rtm_lose(&m.r.rtm,&info);
1305 break;
1307 default:
1308 trace_act("ignore %s", str);
1309 break;
1315 /* after aggregating, note routes that belong in the kernel
1317 static void
1318 kern_out(struct ag_info *ag)
1320 struct khash *k;
1323 /* Do not install bad routes if they are not already present.
1324 * This includes routes that had RS_NET_SYN for interfaces that
1325 * recently died.
1327 if (ag->ag_metric == HOPCNT_INFINITY) {
1328 k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
1329 if (k == 0)
1330 return;
1331 } else {
1332 k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
1335 if (k->k_state & KS_NEW) {
1336 /* will need to add new entry to the kernel table */
1337 k->k_state = KS_ADD;
1338 if (ag->ag_state & AGS_GATEWAY)
1339 k->k_state |= KS_GATEWAY;
1340 k->k_gate = ag->ag_gate;
1341 k->k_metric = ag->ag_metric;
1342 return;
1345 if (k->k_state & KS_STATIC)
1346 return;
1348 /* modify existing kernel entry if necessary */
1349 if (k->k_gate != ag->ag_gate
1350 || k->k_metric != ag->ag_metric) {
1351 /* Must delete bad interface routes etc. to change them. */
1352 if (k->k_metric == HOPCNT_INFINITY)
1353 k->k_state |= KS_DEL_ADD;
1354 k->k_gate = ag->ag_gate;
1355 k->k_metric = ag->ag_metric;
1356 k->k_state |= KS_CHANGE;
1359 /* If the daemon thinks the route should exist, forget
1360 * about any redirections.
1361 * If the daemon thinks the route should exist, eventually
1362 * override manual intervention by the operator.
1364 if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
1365 k->k_state &= ~KS_DYNAMIC;
1366 k->k_state |= (KS_ADD | KS_DEL_ADD);
1369 if ((k->k_state & KS_GATEWAY)
1370 && !(ag->ag_state & AGS_GATEWAY)) {
1371 k->k_state &= ~KS_GATEWAY;
1372 k->k_state |= (KS_ADD | KS_DEL_ADD);
1373 } else if (!(k->k_state & KS_GATEWAY)
1374 && (ag->ag_state & AGS_GATEWAY)) {
1375 k->k_state |= KS_GATEWAY;
1376 k->k_state |= (KS_ADD | KS_DEL_ADD);
1379 /* Deleting-and-adding is necessary to change aspects of a route.
1380 * Just delete instead of deleting and then adding a bad route.
1381 * Otherwise, we want to keep the route in the kernel.
1383 if (k->k_metric == HOPCNT_INFINITY
1384 && (k->k_state & KS_DEL_ADD))
1385 k->k_state |= KS_DELETE;
1386 else
1387 k->k_state &= ~KS_DELETE;
1388 #undef RT
1392 /* ARGSUSED */
1393 static int
1394 walk_kern(struct radix_node *rn,
1395 struct walkarg *argp UNUSED)
1397 #define RT ((struct rt_entry *)rn)
1398 char metric, pref;
1399 u_int ags = 0;
1402 /* Do not install synthetic routes */
1403 if (RT->rt_state & RS_NET_SYN)
1404 return 0;
1406 if (!(RT->rt_state & RS_IF)) {
1407 /* This is an ordinary route, not for an interface.
1410 /* aggregate, ordinary good routes without regard to
1411 * their metric
1413 pref = 1;
1414 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1416 /* Do not install host routes directly to hosts, to avoid
1417 * interfering with ARP entries in the kernel table.
1419 if (RT_ISHOST(RT)
1420 && ntohl(RT->rt_dst) == RT->rt_gate)
1421 return 0;
1423 } else {
1424 /* This is an interface route.
1425 * Do not install routes for "external" remote interfaces.
1427 if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
1428 return 0;
1430 /* Interfaces should override received routes.
1432 pref = 0;
1433 ags |= (AGS_IF | AGS_CORS_GATE);
1435 /* If it is not an interface, or an alias for an interface,
1436 * it must be a "gateway."
1438 * If it is a "remote" interface, it is also a "gateway" to
1439 * the kernel if is not a alias.
1441 if (RT->rt_ifp == 0
1442 || (RT->rt_ifp->int_state & IS_REMOTE))
1443 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1446 /* If RIP is off and IRDP is on, let the route to the discovered
1447 * route suppress any RIP routes. Eventually the RIP routes
1448 * will time-out and be deleted. This reaches the steady-state
1449 * quicker.
1451 if ((RT->rt_state & RS_RDISC) && rip_sock < 0)
1452 ags |= AGS_CORS_GATE;
1454 metric = RT->rt_metric;
1455 if (metric == HOPCNT_INFINITY) {
1456 /* if the route is dead, so try hard to aggregate. */
1457 pref = HOPCNT_INFINITY;
1458 ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
1459 ags &= ~(AGS_IF | AGS_CORS_GATE);
1462 ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
1463 metric,pref, 0, 0, ags, kern_out);
1464 return 0;
1465 #undef RT
1469 /* Update the kernel table to match the daemon table.
1471 static void
1472 fix_kern(void)
1474 int i;
1475 struct khash *k, **pk;
1478 need_kern = age_timer;
1480 /* Walk daemon table, updating the copy of the kernel table.
1482 rn_walktree(rhead, walk_kern, 0);
1483 ag_flush(0,0,kern_out);
1485 for (i = 0; i < KHASH_SIZE; i++) {
1486 for (pk = &khash_bins[i]; (k = *pk) != 0; ) {
1487 /* Do not touch static routes */
1488 if (k->k_state & KS_STATIC) {
1489 kern_check_static(k,0);
1490 pk = &k->k_next;
1491 continue;
1494 /* check hold on routes deleted by the operator */
1495 if (k->k_keep > now.tv_sec) {
1496 /* ensure we check when the hold is over */
1497 LIM_SEC(need_kern, k->k_keep);
1498 /* mark for the next cycle */
1499 k->k_state |= KS_DELETE;
1500 pk = &k->k_next;
1501 continue;
1504 if ((k->k_state & KS_DELETE)
1505 && !(k->k_state & KS_DYNAMIC)) {
1506 kern_ioctl(k, RTM_DELETE, 0);
1507 *pk = k->k_next;
1508 free(k);
1509 continue;
1512 if (k->k_state & KS_DEL_ADD)
1513 kern_ioctl(k, RTM_DELETE, 0);
1515 if (k->k_state & KS_ADD) {
1516 kern_ioctl(k, RTM_ADD,
1517 ((0 != (k->k_state & (KS_GATEWAY
1518 | KS_DYNAMIC)))
1519 ? RTF_GATEWAY : 0));
1520 } else if (k->k_state & KS_CHANGE) {
1521 kern_ioctl(k, RTM_CHANGE,
1522 ((0 != (k->k_state & (KS_GATEWAY
1523 | KS_DYNAMIC)))
1524 ? RTF_GATEWAY : 0));
1526 k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
1528 /* Mark this route to be deleted in the next cycle.
1529 * This deletes routes that disappear from the
1530 * daemon table, since the normal aging code
1531 * will clear the bit for routes that have not
1532 * disappeared from the daemon table.
1534 k->k_state |= KS_DELETE;
1535 pk = &k->k_next;
1541 /* Delete a static route in the image of the kernel table.
1543 void
1544 del_static(naddr dst,
1545 naddr mask,
1546 naddr gate,
1547 int gone)
1549 struct khash *k;
1550 struct rt_entry *rt;
1552 /* Just mark it in the table to be deleted next time the kernel
1553 * table is updated.
1554 * If it has already been deleted, mark it as such, and set its
1555 * keep-timer so that it will not be deleted again for a while.
1556 * This lets the operator delete a route added by the daemon
1557 * and add a replacement.
1559 k = kern_find(dst, mask, 0);
1560 if (k != 0 && (gate == 0 || k->k_gate == gate)) {
1561 k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
1562 k->k_state |= KS_DELETE;
1563 if (gone) {
1564 k->k_state |= KS_DELETED;
1565 k->k_keep = now.tv_sec + K_KEEP_LIM;
1569 rt = rtget(dst, mask);
1570 if (rt != 0 && (rt->rt_state & RS_STATIC))
1571 rtbad(rt);
1575 /* Delete all routes generated from ICMP Redirects that use a given gateway,
1576 * as well as old redirected routes.
1578 void
1579 del_redirects(naddr bad_gate,
1580 time_t old)
1582 int i;
1583 struct khash *k;
1586 for (i = 0; i < KHASH_SIZE; i++) {
1587 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1588 if (!(k->k_state & KS_DYNAMIC)
1589 || (k->k_state & KS_STATIC))
1590 continue;
1592 if (k->k_gate != bad_gate
1593 && k->k_redirect_time > old
1594 && !supplier)
1595 continue;
1597 k->k_state |= KS_DELETE;
1598 k->k_state &= ~KS_DYNAMIC;
1599 need_kern.tv_sec = now.tv_sec;
1600 trace_act("mark redirected %s --> %s for deletion",
1601 addrname(k->k_dst, k->k_mask, 0),
1602 naddr_ntoa(k->k_gate));
1608 /* Start the daemon tables.
1610 extern int max_keylen;
1612 void
1613 rtinit(void)
1615 int i;
1616 struct ag_info *ag;
1618 /* Initialize the radix trees */
1619 max_keylen = sizeof(struct sockaddr_in);
1620 rn_init();
1621 rn_inithead(&rhead, 32);
1623 /* mark all of the slots in the table free */
1624 ag_avail = ag_slots;
1625 for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
1626 ag->ag_fine = ag+1;
1627 ag++;
1632 #ifdef _HAVE_SIN_LEN
1633 static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET, 0, {0}, {0}};
1634 static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET, 0, {0}, {0}};
1635 #else
1636 static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
1637 static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
1638 #endif
1641 static void
1642 set_need_flash(void)
1644 if (!need_flash) {
1645 need_flash = 1;
1646 /* Do not send the flash update immediately. Wait a little
1647 * while to hear from other routers.
1649 no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
1654 /* Get a particular routing table entry
1656 struct rt_entry *
1657 rtget(naddr dst, naddr mask)
1659 struct rt_entry *rt;
1661 dst_sock.sin_addr.s_addr = dst;
1662 mask_sock.sin_addr.s_addr = htonl(mask);
1663 masktrim(&mask_sock);
1664 rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
1665 if (!rt
1666 || rt->rt_dst != dst
1667 || rt->rt_mask != mask)
1668 return 0;
1670 return rt;
1674 /* Find a route to dst as the kernel would.
1676 struct rt_entry *
1677 rtfind(naddr dst)
1679 dst_sock.sin_addr.s_addr = dst;
1680 return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
1684 /* add a route to the table
1686 void
1687 rtadd(naddr dst,
1688 naddr mask,
1689 u_int state, /* rt_state for the entry */
1690 struct rt_spare *new)
1692 struct rt_entry *rt;
1693 naddr smask;
1694 int i;
1695 struct rt_spare *rts;
1697 rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
1698 memset(rt, 0, sizeof(*rt));
1699 for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
1700 rts->rts_metric = HOPCNT_INFINITY;
1702 rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
1703 rt->rt_dst = dst;
1704 rt->rt_dst_sock.sin_family = AF_INET;
1705 #ifdef _HAVE_SIN_LEN
1706 rt->rt_dst_sock.sin_len = dst_sock.sin_len;
1707 #endif
1708 if (mask != HOST_MASK) {
1709 smask = std_mask(dst);
1710 if ((smask & ~mask) == 0 && mask > smask)
1711 state |= RS_SUBNET;
1713 mask_sock.sin_addr.s_addr = htonl(mask);
1714 masktrim(&mask_sock);
1715 rt->rt_mask = mask;
1716 rt->rt_state = state;
1717 rt->rt_spares[0] = *new;
1718 rt->rt_time = now.tv_sec;
1719 rt->rt_poison_metric = HOPCNT_INFINITY;
1720 rt->rt_seqno = update_seqno;
1722 if (++total_routes == MAX_ROUTES)
1723 msglog("have maximum (%d) routes", total_routes);
1724 if (TRACEACTIONS)
1725 trace_add_del("Add", rt);
1727 need_kern.tv_sec = now.tv_sec;
1728 set_need_flash();
1730 if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
1731 rhead, rt->rt_nodes)) {
1732 msglog("rnh_addaddr() failed for %s mask=%#lx",
1733 naddr_ntoa(dst), (u_long)mask);
1734 free(rt);
1739 /* notice a changed route
1741 void
1742 rtchange(struct rt_entry *rt,
1743 u_int state, /* new state bits */
1744 struct rt_spare *new,
1745 char *label)
1747 if (rt->rt_metric != new->rts_metric) {
1748 /* Fix the kernel immediately if it seems the route
1749 * has gone bad, since there may be a working route that
1750 * aggregates this route.
1752 if (new->rts_metric == HOPCNT_INFINITY) {
1753 need_kern.tv_sec = now.tv_sec;
1754 if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
1755 new->rts_time = now.tv_sec - EXPIRE_TIME;
1757 rt->rt_seqno = update_seqno;
1758 set_need_flash();
1761 if (rt->rt_gate != new->rts_gate) {
1762 need_kern.tv_sec = now.tv_sec;
1763 rt->rt_seqno = update_seqno;
1764 set_need_flash();
1767 state |= (rt->rt_state & RS_SUBNET);
1769 /* Keep various things from deciding ageless routes are stale.
1771 if (!AGE_RT(state, new->rts_ifp))
1772 new->rts_time = now.tv_sec;
1774 if (TRACEACTIONS)
1775 trace_change(rt, state, new,
1776 label ? label : "Chg ");
1778 rt->rt_state = state;
1779 rt->rt_spares[0] = *new;
1783 /* check for a better route among the spares
1785 static struct rt_spare *
1786 rts_better(struct rt_entry *rt)
1788 struct rt_spare *rts, *rts1;
1789 int i;
1791 /* find the best alternative among the spares */
1792 rts = rt->rt_spares+1;
1793 for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
1794 if (BETTER_LINK(rt,rts1,rts))
1795 rts = rts1;
1798 return rts;
1802 /* switch to a backup route
1804 void
1805 rtswitch(struct rt_entry *rt,
1806 struct rt_spare *rts)
1808 struct rt_spare swap;
1809 char label[10];
1812 /* Do not change permanent routes */
1813 if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
1814 | RS_NET_SYN | RS_IF)))
1815 return;
1817 /* find the best alternative among the spares */
1818 if (rts == 0)
1819 rts = rts_better(rt);
1821 /* Do not bother if it is not worthwhile.
1823 if (!BETTER_LINK(rt, rts, rt->rt_spares))
1824 return;
1826 swap = rt->rt_spares[0];
1827 sprintf(label, "Use #%d", (int)(rts - rt->rt_spares));
1828 rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC), rts, label);
1829 if (swap.rts_metric == HOPCNT_INFINITY) {
1830 *rts = rts_empty;
1831 } else {
1832 *rts = swap;
1837 void
1838 rtdelete(struct rt_entry *rt)
1840 struct khash *k;
1843 if (TRACEACTIONS)
1844 trace_add_del("Del", rt);
1846 k = kern_find(rt->rt_dst, rt->rt_mask, 0);
1847 if (k != 0) {
1848 k->k_state |= KS_DELETE;
1849 need_kern.tv_sec = now.tv_sec;
1852 dst_sock.sin_addr.s_addr = rt->rt_dst;
1853 mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
1854 masktrim(&mask_sock);
1855 if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
1856 rhead)) {
1857 msglog("rnh_deladdr() failed");
1858 } else {
1859 free(rt);
1860 total_routes--;
1865 void
1866 rts_delete(struct rt_entry *rt,
1867 struct rt_spare *rts)
1869 trace_upslot(rt, rts, &rts_empty);
1870 *rts = rts_empty;
1874 /* Get rid of a bad route, and try to switch to a replacement.
1876 void
1877 rtbad(struct rt_entry *rt)
1879 struct rt_spare new;
1881 /* Poison the route */
1882 new = rt->rt_spares[0];
1883 new.rts_metric = HOPCNT_INFINITY;
1884 rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC), &new, 0);
1885 rtswitch(rt, 0);
1889 /* Junk a RS_NET_SYN or RS_LOCAL route,
1890 * unless it is needed by another interface.
1892 void
1893 rtbad_sub(struct rt_entry *rt)
1895 struct interface *ifp, *ifp1;
1896 struct intnet *intnetp;
1897 u_int state;
1900 ifp1 = 0;
1901 state = 0;
1903 if (rt->rt_state & RS_LOCAL) {
1904 /* Is this the route through loopback for the interface?
1905 * If so, see if it is used by any other interfaces, such
1906 * as a point-to-point interface with the same local address.
1908 for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1909 /* Retain it if another interface needs it.
1911 if (ifp->int_addr == rt->rt_ifp->int_addr) {
1912 state |= RS_LOCAL;
1913 ifp1 = ifp;
1914 break;
1920 if (!(state & RS_LOCAL)) {
1921 /* Retain RIPv1 logical network route if there is another
1922 * interface that justifies it.
1924 if (rt->rt_state & RS_NET_SYN) {
1925 for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1926 if ((ifp->int_state & IS_NEED_NET_SYN)
1927 && rt->rt_mask == ifp->int_std_mask
1928 && rt->rt_dst == ifp->int_std_addr) {
1929 state |= RS_NET_SYN;
1930 ifp1 = ifp;
1931 break;
1936 /* or if there is an authority route that needs it. */
1937 for (intnetp = intnets;
1938 intnetp != 0;
1939 intnetp = intnetp->intnet_next) {
1940 if (intnetp->intnet_addr == rt->rt_dst
1941 && intnetp->intnet_mask == rt->rt_mask) {
1942 state |= (RS_NET_SYN | RS_NET_INT);
1943 break;
1948 if (ifp1 != 0 || (state & RS_NET_SYN)) {
1949 struct rt_spare new = rt->rt_spares[0];
1950 new.rts_ifp = ifp1;
1951 rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
1952 &new, 0);
1953 } else {
1954 rtbad(rt);
1959 /* Called while walking the table looking for sick interfaces
1960 * or after a time change.
1962 /* ARGSUSED */
1964 walk_bad(struct radix_node *rn,
1965 struct walkarg *argp UNUSED)
1967 #define RT ((struct rt_entry *)rn)
1968 struct rt_spare *rts;
1969 int i;
1972 /* fix any spare routes through the interface
1974 rts = RT->rt_spares;
1975 for (i = NUM_SPARES; i != 1; i--) {
1976 rts++;
1977 if (rts->rts_metric < HOPCNT_INFINITY
1978 && (rts->rts_ifp == 0
1979 || (rts->rts_ifp->int_state & IS_BROKE)))
1980 rts_delete(RT, rts);
1983 /* Deal with the main route
1985 /* finished if it has been handled before or if its interface is ok
1987 if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
1988 return 0;
1990 /* Bad routes for other than interfaces are easy.
1992 if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
1993 rtbad(RT);
1994 return 0;
1997 rtbad_sub(RT);
1998 return 0;
1999 #undef RT
2003 /* Check the age of an individual route.
2005 /* ARGSUSED */
2006 static int
2007 walk_age(struct radix_node *rn,
2008 struct walkarg *argp UNUSED)
2010 #define RT ((struct rt_entry *)rn)
2011 struct interface *ifp;
2012 struct rt_spare *rts;
2013 int i;
2016 /* age all of the spare routes, including the primary route
2017 * currently in use
2019 rts = RT->rt_spares;
2020 for (i = NUM_SPARES; i != 0; i--, rts++) {
2022 ifp = rts->rts_ifp;
2023 if (i == NUM_SPARES) {
2024 if (!AGE_RT(RT->rt_state, ifp)) {
2025 /* Keep various things from deciding ageless
2026 * routes are stale
2028 rts->rts_time = now.tv_sec;
2029 continue;
2032 /* forget RIP routes after RIP has been turned off.
2034 if (rip_sock < 0) {
2035 rtdelete(RT);
2036 return 0;
2040 /* age failing routes
2042 if (age_bad_gate == rts->rts_gate
2043 && rts->rts_time >= now_stale) {
2044 rts->rts_time -= SUPPLY_INTERVAL;
2047 /* trash the spare routes when they go bad */
2048 if (rts->rts_metric < HOPCNT_INFINITY
2049 && now_garbage > rts->rts_time
2050 && i != NUM_SPARES)
2051 rts_delete(RT, rts);
2055 /* finished if the active route is still fresh */
2056 if (now_stale <= RT->rt_time)
2057 return 0;
2059 /* try to switch to an alternative */
2060 rtswitch(RT, 0);
2062 /* Delete a dead route after it has been publically mourned. */
2063 if (now_garbage > RT->rt_time) {
2064 rtdelete(RT);
2065 return 0;
2068 /* Start poisoning a bad route before deleting it. */
2069 if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
2070 struct rt_spare new = RT->rt_spares[0];
2071 new.rts_metric = HOPCNT_INFINITY;
2072 rtchange(RT, RT->rt_state, &new, 0);
2074 return 0;
2078 /* Watch for dead routes and interfaces.
2080 void
2081 age(naddr bad_gate)
2083 struct interface *ifp;
2084 int need_query = 0;
2086 /* If not listening to RIP, there is no need to age the routes in
2087 * the table.
2089 age_timer.tv_sec = (now.tv_sec
2090 + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
2092 /* Check for dead IS_REMOTE interfaces by timing their
2093 * transmissions.
2095 for (ifp = ifnet; ifp; ifp = ifp->int_next) {
2096 if (!(ifp->int_state & IS_REMOTE))
2097 continue;
2099 /* ignore unreachable remote interfaces */
2100 if (!check_remote(ifp))
2101 continue;
2103 /* Restore remote interface that has become reachable
2105 if (ifp->int_state & IS_BROKE)
2106 if_ok(ifp, "remote ");
2108 if (ifp->int_act_time != NEVER
2109 && now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
2110 msglog("remote interface %s to %s timed out after"
2111 " %ld:%ld",
2112 ifp->int_name,
2113 naddr_ntoa(ifp->int_dstaddr),
2114 (now.tv_sec - ifp->int_act_time)/60,
2115 (now.tv_sec - ifp->int_act_time)%60);
2116 if_sick(ifp);
2119 /* If we have not heard from the other router
2120 * recently, ask it.
2122 if (now.tv_sec >= ifp->int_query_time) {
2123 ifp->int_query_time = NEVER;
2124 need_query = 1;
2128 /* Age routes. */
2129 age_bad_gate = bad_gate;
2130 rn_walktree(rhead, walk_age, 0);
2132 /* delete old redirected routes to keep the kernel table small
2133 * and prevent blackholes
2135 del_redirects(bad_gate, now.tv_sec-STALE_TIME);
2137 /* Update the kernel routing table. */
2138 fix_kern();
2140 /* poke reticent remote gateways */
2141 if (need_query)
2142 rip_query();