<sys/vfscache.h>: Sync enum vtagtype with what we have.
[dragonfly.git] / sbin / routed / table.c
blob0065bb2c66c318cfbf4bdb68e02b1bd2600a90ac
1 /*
2 * Copyright (c) 1983, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
29 * $FreeBSD: src/sbin/routed/table.c,v 1.9.2.2 2000/08/14 17:00:04 sheldonh Exp $
32 #include "defs.h"
34 #if !defined(__NetBSD__)
35 static char sccsid[] __attribute__((unused)) = "@(#)tables.c 8.1 (Berkeley) 6/5/93";
36 #elif defined(__NetBSD__)
37 __RCSID("$NetBSD$");
38 #endif
40 static struct rt_spare *rts_better(struct rt_entry *);
41 static struct rt_spare rts_empty = {0,0,0,HOPCNT_INFINITY,0,0,0};
42 static void set_need_flash(void);
43 #ifdef _HAVE_SIN_LEN
44 static void masktrim(struct sockaddr_in *ap);
45 #else
46 static void masktrim(struct sockaddr_in_new *ap);
47 #endif
50 struct radix_node_head *rhead; /* root of the radix tree */
52 int need_flash = 1; /* flash update needed
53 * start =1 to suppress the 1st
56 struct timeval age_timer; /* next check of old routes */
57 struct timeval need_kern = { /* need to update kernel table */
58 EPOCH+MIN_WAITTIME-1, 0
61 int stopint;
63 int total_routes;
65 /* zap any old routes through this gateway */
66 naddr age_bad_gate;
69 /* It is desirable to "aggregate" routes, to combine differing routes of
70 * the same metric and next hop into a common route with a smaller netmask
71 * or to suppress redundant routes, routes that add no information to
72 * routes with smaller netmasks.
74 * A route is redundant if and only if any and all routes with smaller
75 * but matching netmasks and nets are the same. Since routes are
76 * kept sorted in the radix tree, redundant routes always come second.
78 * There are two kinds of aggregations. First, two routes of the same bit
79 * mask and differing only in the least significant bit of the network
80 * number can be combined into a single route with a coarser mask.
82 * Second, a route can be suppressed in favor of another route with a more
83 * coarse mask provided no incompatible routes with intermediate masks
84 * are present. The second kind of aggregation involves suppressing routes.
85 * A route must not be suppressed if an incompatible route exists with
86 * an intermediate mask, since the suppressed route would be covered
87 * by the intermediate.
89 * This code relies on the radix tree walk encountering routes
90 * sorted first by address, with the smallest address first.
93 struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
95 /* #define DEBUG_AG */
96 #ifdef DEBUG_AG
97 #define CHECK_AG() {int acnt = 0; struct ag_info *cag; \
98 for (cag = ag_avail; cag != NULL; cag = cag->ag_fine) \
99 acnt++; \
100 for (cag = ag_corsest; cag != NULL; cag = cag->ag_fine) \
101 acnt++; \
102 if (acnt != NUM_AG_SLOTS) { \
103 fflush(stderr); \
104 abort(); \
107 #else
108 #define CHECK_AG()
109 #endif
112 /* Output the contents of an aggregation table slot.
113 * This function must always be immediately followed with the deletion
114 * of the target slot.
116 static void
117 ag_out(struct ag_info *ag,
118 void (*out)(struct ag_info *))
120 struct ag_info *ag_cors;
121 naddr bit;
124 /* Forget it if this route should not be output for split-horizon. */
125 if (ag->ag_state & AGS_SPLIT_HZ)
126 return;
128 /* If we output both the even and odd twins, then the immediate parent,
129 * if it is present, is redundant, unless the parent manages to
130 * aggregate into something coarser.
131 * On successive calls, this code detects the even and odd twins,
132 * and marks the parent.
134 * Note that the order in which the radix tree code emits routes
135 * ensures that the twins are seen before the parent is emitted.
137 ag_cors = ag->ag_cors;
138 if (ag_cors != NULL
139 && ag_cors->ag_mask == ag->ag_mask<<1
140 && ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
141 ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
142 ? AGS_REDUN0
143 : AGS_REDUN1);
146 /* Skip it if this route is itself redundant.
148 * It is ok to change the contents of the slot here, since it is
149 * always deleted next.
151 if (ag->ag_state & AGS_REDUN0) {
152 if (ag->ag_state & AGS_REDUN1)
153 return; /* quit if fully redundant */
154 /* make it finer if it is half-redundant */
155 bit = (-ag->ag_mask) >> 1;
156 ag->ag_dst_h |= bit;
157 ag->ag_mask |= bit;
159 } else if (ag->ag_state & AGS_REDUN1) {
160 /* make it finer if it is half-redundant */
161 bit = (-ag->ag_mask) >> 1;
162 ag->ag_mask |= bit;
164 out(ag);
168 static void
169 ag_del(struct ag_info *ag)
171 CHECK_AG();
173 if (ag->ag_cors == 0)
174 ag_corsest = ag->ag_fine;
175 else
176 ag->ag_cors->ag_fine = ag->ag_fine;
178 if (ag->ag_fine == 0)
179 ag_finest = ag->ag_cors;
180 else
181 ag->ag_fine->ag_cors = ag->ag_cors;
183 ag->ag_fine = ag_avail;
184 ag_avail = ag;
186 CHECK_AG();
190 /* Flush routes waiting for aggregation.
191 * This must not suppress a route unless it is known that among all
192 * routes with coarser masks that match it, the one with the longest
193 * mask is appropriate. This is ensured by scanning the routes
194 * in lexical order, and with the most restrictive mask first
195 * among routes to the same destination.
197 void
198 ag_flush(naddr lim_dst_h, /* flush routes to here */
199 naddr lim_mask, /* matching this mask */
200 void (*out)(struct ag_info *))
202 struct ag_info *ag, *ag_cors;
203 naddr dst_h;
206 for (ag = ag_finest;
207 ag != NULL && ag->ag_mask >= lim_mask;
208 ag = ag_cors) {
209 ag_cors = ag->ag_cors;
211 /* work on only the specified routes */
212 dst_h = ag->ag_dst_h;
213 if ((dst_h & lim_mask) != lim_dst_h)
214 continue;
216 if (!(ag->ag_state & AGS_SUPPRESS))
217 ag_out(ag, out);
219 else for ( ; ; ag_cors = ag_cors->ag_cors) {
220 /* Look for a route that can suppress the
221 * current route */
222 if (ag_cors == NULL) {
223 /* failed, so output it and look for
224 * another route to work on
226 ag_out(ag, out);
227 break;
230 if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
231 /* We found a route with a coarser mask that
232 * aggregates the current target.
234 * If it has a different next hop, it
235 * cannot replace the target, so output
236 * the target.
238 if (ag->ag_gate != ag_cors->ag_gate
239 && !(ag->ag_state & AGS_FINE_GATE)
240 && !(ag_cors->ag_state & AGS_CORS_GATE)) {
241 ag_out(ag, out);
242 break;
245 /* If the coarse route has a good enough
246 * metric, it suppresses the target.
247 * If the suppressed target was redundant,
248 * then mark the suppressor redundant.
250 if (ag_cors->ag_pref <= ag->ag_pref) {
251 if (ag_cors->ag_seqno > ag->ag_seqno)
252 ag_cors->ag_seqno = ag->ag_seqno;
253 if (AG_IS_REDUN(ag->ag_state)
254 && ag_cors->ag_mask==ag->ag_mask<<1) {
255 if (ag_cors->ag_dst_h == dst_h)
256 ag_cors->ag_state |= AGS_REDUN0;
257 else
258 ag_cors->ag_state |= AGS_REDUN1;
260 if (ag->ag_tag != ag_cors->ag_tag)
261 ag_cors->ag_tag = 0;
262 if (ag->ag_nhop != ag_cors->ag_nhop)
263 ag_cors->ag_nhop = 0;
264 break;
269 /* That route has either been output or suppressed */
270 ag_cors = ag->ag_cors;
271 ag_del(ag);
274 CHECK_AG();
278 /* Try to aggregate a route with previous routes.
280 void
281 ag_check(naddr dst,
282 naddr mask,
283 naddr gate,
284 naddr nhop,
285 char metric,
286 char pref,
287 u_int seqnum,
288 u_short tag,
289 u_short state,
290 void (*out)(struct ag_info *)) /* output using this */
292 struct ag_info *ag, *nag, *ag_cors;
293 naddr xaddr;
294 int x;
296 dst = ntohl(dst);
298 /* Punt non-contiguous subnet masks.
300 * (X & -X) contains a single bit if and only if X is a power of 2.
301 * (X + (X & -X)) == 0 if and only if X is a power of 2.
303 if ((mask & -mask) + mask != 0) {
304 struct ag_info nc_ag;
306 nc_ag.ag_dst_h = dst;
307 nc_ag.ag_mask = mask;
308 nc_ag.ag_gate = gate;
309 nc_ag.ag_nhop = nhop;
310 nc_ag.ag_metric = metric;
311 nc_ag.ag_pref = pref;
312 nc_ag.ag_tag = tag;
313 nc_ag.ag_state = state;
314 nc_ag.ag_seqno = seqnum;
315 out(&nc_ag);
316 return;
319 /* Search for the right slot in the aggregation table.
321 ag_cors = NULL;
322 ag = ag_corsest;
323 while (ag != NULL) {
324 if (ag->ag_mask >= mask)
325 break;
327 /* Suppress old routes (i.e. combine with compatible routes
328 * with coarser masks) as we look for the right slot in the
329 * aggregation table for the new route.
330 * A route to an address less than the current destination
331 * will not be affected by the current route or any route
332 * seen hereafter. That means it is safe to suppress it.
333 * This check keeps poor routes (e.g. with large hop counts)
334 * from preventing suppression of finer routes.
336 if (ag_cors != NULL
337 && ag->ag_dst_h < dst
338 && (ag->ag_state & AGS_SUPPRESS)
339 && ag_cors->ag_pref <= ag->ag_pref
340 && (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
341 && (ag_cors->ag_gate == ag->ag_gate
342 || (ag->ag_state & AGS_FINE_GATE)
343 || (ag_cors->ag_state & AGS_CORS_GATE))) {
344 if (ag_cors->ag_seqno > ag->ag_seqno)
345 ag_cors->ag_seqno = ag->ag_seqno;
346 /* If the suppressed target was redundant,
347 * then mark the suppressor redundant.
349 if (AG_IS_REDUN(ag->ag_state)
350 && ag_cors->ag_mask == ag->ag_mask<<1) {
351 if (ag_cors->ag_dst_h == dst)
352 ag_cors->ag_state |= AGS_REDUN0;
353 else
354 ag_cors->ag_state |= AGS_REDUN1;
356 if (ag->ag_tag != ag_cors->ag_tag)
357 ag_cors->ag_tag = 0;
358 if (ag->ag_nhop != ag_cors->ag_nhop)
359 ag_cors->ag_nhop = 0;
360 ag_del(ag);
361 CHECK_AG();
362 } else {
363 ag_cors = ag;
365 ag = ag_cors->ag_fine;
368 /* If we find the even/odd twin of the new route, and if the
369 * masks and so forth are equal, we can aggregate them.
370 * We can probably promote one of the pair.
372 * Since the routes are encountered in lexical order,
373 * the new route must be odd. However, the second or later
374 * times around this loop, it could be the even twin promoted
375 * from the even/odd pair of twins of the finer route.
377 while (ag != NULL
378 && ag->ag_mask == mask
379 && ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
381 /* Here we know the target route and the route in the current
382 * slot have the same netmasks and differ by at most the
383 * last bit. They are either for the same destination, or
384 * for an even/odd pair of destinations.
386 if (ag->ag_dst_h == dst) {
387 /* We have two routes to the same destination.
388 * Routes are encountered in lexical order, so a
389 * route is never promoted until the parent route is
390 * already present. So we know that the new route is
391 * a promoted (or aggregated) pair and the route
392 * already in the slot is the explicit route.
394 * Prefer the best route if their metrics differ,
395 * or the aggregated one if not, following a sort
396 * of longest-match rule.
398 if (pref <= ag->ag_pref) {
399 ag->ag_gate = gate;
400 ag->ag_nhop = nhop;
401 ag->ag_tag = tag;
402 ag->ag_metric = metric;
403 ag->ag_pref = pref;
404 x = ag->ag_state;
405 ag->ag_state = state;
406 state = x;
409 /* The sequence number controls flash updating,
410 * and should be the smaller of the two.
412 if (ag->ag_seqno > seqnum)
413 ag->ag_seqno = seqnum;
415 /* Some bits are set if they are set on either route,
416 * except when the route is for an interface.
418 if (!(ag->ag_state & AGS_IF))
419 ag->ag_state |= (state & (AGS_AGGREGATE_EITHER
420 | AGS_REDUN0
421 | AGS_REDUN1));
422 return;
425 /* If one of the routes can be promoted and the other can
426 * be suppressed, it may be possible to combine them or
427 * worthwhile to promote one.
429 * Any route that can be promoted is always
430 * marked to be eligible to be suppressed.
432 if (!((state & AGS_AGGREGATE)
433 && (ag->ag_state & AGS_SUPPRESS))
434 && !((ag->ag_state & AGS_AGGREGATE)
435 && (state & AGS_SUPPRESS)))
436 break;
438 /* A pair of even/odd twin routes can be combined
439 * if either is redundant, or if they are via the
440 * same gateway and have the same metric.
442 if (AG_IS_REDUN(ag->ag_state)
443 || AG_IS_REDUN(state)
444 || (ag->ag_gate == gate
445 && ag->ag_pref == pref
446 && (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
448 /* We have both the even and odd pairs.
449 * Since the routes are encountered in order,
450 * the route in the slot must be the even twin.
452 * Combine and promote (aggregate) the pair of routes.
454 if (seqnum > ag->ag_seqno)
455 seqnum = ag->ag_seqno;
456 if (!AG_IS_REDUN(state))
457 state &= ~AGS_REDUN1;
458 if (AG_IS_REDUN(ag->ag_state))
459 state |= AGS_REDUN0;
460 else
461 state &= ~AGS_REDUN0;
462 state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
463 if (ag->ag_tag != tag)
464 tag = 0;
465 if (ag->ag_nhop != nhop)
466 nhop = 0;
468 /* Get rid of the even twin that was already
469 * in the slot.
471 ag_del(ag);
473 } else if (ag->ag_pref >= pref
474 && (ag->ag_state & AGS_AGGREGATE)) {
475 /* If we cannot combine the pair, maybe the route
476 * with the worse metric can be promoted.
478 * Promote the old, even twin, by giving its slot
479 * in the table to the new, odd twin.
481 ag->ag_dst_h = dst;
483 xaddr = ag->ag_gate;
484 ag->ag_gate = gate;
485 gate = xaddr;
487 xaddr = ag->ag_nhop;
488 ag->ag_nhop = nhop;
489 nhop = xaddr;
491 x = ag->ag_tag;
492 ag->ag_tag = tag;
493 tag = x;
495 /* The promoted route is even-redundant only if the
496 * even twin was fully redundant. It is not
497 * odd-redundant because the odd-twin will still be
498 * in the table.
500 x = ag->ag_state;
501 if (!AG_IS_REDUN(x))
502 x &= ~AGS_REDUN0;
503 x &= ~AGS_REDUN1;
504 ag->ag_state = state;
505 state = x;
507 x = ag->ag_metric;
508 ag->ag_metric = metric;
509 metric = x;
511 x = ag->ag_pref;
512 ag->ag_pref = pref;
513 pref = x;
515 /* take the newest sequence number */
516 if (seqnum >= ag->ag_seqno)
517 seqnum = ag->ag_seqno;
518 else
519 ag->ag_seqno = seqnum;
521 } else {
522 if (!(state & AGS_AGGREGATE))
523 break; /* cannot promote either twin */
525 /* Promote the new, odd twin by shaving its
526 * mask and address.
527 * The promoted route is odd-redundant only if the
528 * odd twin was fully redundant. It is not
529 * even-redundant because the even twin is still in
530 * the table.
532 if (!AG_IS_REDUN(state))
533 state &= ~AGS_REDUN1;
534 state &= ~AGS_REDUN0;
535 if (seqnum > ag->ag_seqno)
536 seqnum = ag->ag_seqno;
537 else
538 ag->ag_seqno = seqnum;
541 mask <<= 1;
542 dst &= mask;
544 if (ag_cors == NULL) {
545 ag = ag_corsest;
546 break;
548 ag = ag_cors;
549 ag_cors = ag->ag_cors;
552 /* When we can no longer promote and combine routes,
553 * flush the old route in the target slot. Also flush
554 * any finer routes that we know will never be aggregated by
555 * the new route.
557 * In case we moved toward coarser masks,
558 * get back where we belong
560 if (ag != NULL
561 && ag->ag_mask < mask) {
562 ag_cors = ag;
563 ag = ag->ag_fine;
566 /* Empty the target slot
568 if (ag != NULL && ag->ag_mask == mask) {
569 ag_flush(ag->ag_dst_h, ag->ag_mask, out);
570 ag = (ag_cors == NULL) ? ag_corsest : ag_cors->ag_fine;
573 #ifdef DEBUG_AG
574 fflush(stderr);
575 if (ag == NULL && ag_cors != ag_finest)
576 abort();
577 if (ag_cors == NULL && ag != ag_corsest)
578 abort();
579 if (ag != NULL && ag->ag_cors != ag_cors)
580 abort();
581 if (ag_cors != NULL && ag_cors->ag_fine != ag)
582 abort();
583 CHECK_AG();
584 #endif
586 /* Save the new route on the end of the table.
588 nag = ag_avail;
589 ag_avail = nag->ag_fine;
591 nag->ag_dst_h = dst;
592 nag->ag_mask = mask;
593 nag->ag_gate = gate;
594 nag->ag_nhop = nhop;
595 nag->ag_metric = metric;
596 nag->ag_pref = pref;
597 nag->ag_tag = tag;
598 nag->ag_state = state;
599 nag->ag_seqno = seqnum;
601 nag->ag_fine = ag;
602 if (ag != NULL)
603 ag->ag_cors = nag;
604 else
605 ag_finest = nag;
606 nag->ag_cors = ag_cors;
607 if (ag_cors == NULL)
608 ag_corsest = nag;
609 else
610 ag_cors->ag_fine = nag;
611 CHECK_AG();
615 #define NAME0_LEN 14
616 static const char *
617 rtm_type_name(u_char type)
619 static const char *rtm_types[] = {
620 "RTM_ADD",
621 "RTM_DELETE",
622 "RTM_CHANGE",
623 "RTM_GET",
624 "RTM_LOSING",
625 "RTM_REDIRECT",
626 "RTM_MISS",
627 "RTM_LOCK",
628 "RTM_OLDADD",
629 "RTM_OLDDEL",
630 "RTM_RESOLVE",
631 "RTM_NEWADDR",
632 "RTM_DELADDR",
633 "RTM_IFINFO",
634 "RTM_NEWMADDR",
635 "RTM_DELMADDR"
637 #define NEW_RTM_PAT "RTM type %#x"
638 static char name0[sizeof(NEW_RTM_PAT)+2];
641 if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
642 || type == 0) {
643 snprintf(name0, sizeof(name0), NEW_RTM_PAT, type);
644 return name0;
645 } else {
646 return rtm_types[type-1];
648 #undef NEW_RTM_PAT
652 /* Trim a mask in a sockaddr
653 * Produce a length of 0 for an address of 0.
654 * Otherwise produce the index of the first zero byte.
656 void
657 #ifdef _HAVE_SIN_LEN
658 masktrim(struct sockaddr_in *ap)
659 #else
660 masktrim(struct sockaddr_in_new *ap)
661 #endif
663 char *cp;
665 if (ap->sin_addr.s_addr == 0) {
666 ap->sin_len = 0;
667 return;
669 cp = (char *)(&ap->sin_addr.s_addr+1);
670 while (*--cp == 0)
671 continue;
672 ap->sin_len = cp - (char*)ap + 1;
676 /* Tell the kernel to add, delete or change a route
678 static void
679 rtioctl(int action, /* RTM_DELETE, etc */
680 naddr dst,
681 naddr gate,
682 naddr mask,
683 int metric,
684 int flags)
686 struct {
687 struct rt_msghdr w_rtm;
688 struct sockaddr_in w_dst;
689 struct sockaddr_in w_gate;
690 #ifdef _HAVE_SA_LEN
691 struct sockaddr_in w_mask;
692 #else
693 struct sockaddr_in_new w_mask;
694 #endif
695 } w;
696 long cc;
697 # define PAT " %-10s %s metric=%d flags=%#x"
698 # define ARGS rtm_type_name(action), rtname(dst,mask,gate), metric, flags
700 again:
701 memset(&w, 0, sizeof(w));
702 w.w_rtm.rtm_msglen = sizeof(w);
703 w.w_rtm.rtm_version = RTM_VERSION;
704 w.w_rtm.rtm_type = action;
705 w.w_rtm.rtm_flags = flags;
706 w.w_rtm.rtm_seq = ++rt_sock_seqno;
707 w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
708 if (metric != 0 || action == RTM_CHANGE) {
709 w.w_rtm.rtm_rmx.rmx_hopcount = metric;
710 w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
712 w.w_dst.sin_family = AF_INET;
713 w.w_dst.sin_addr.s_addr = dst;
714 w.w_gate.sin_family = AF_INET;
715 w.w_gate.sin_addr.s_addr = gate;
716 #ifdef _HAVE_SA_LEN
717 w.w_dst.sin_len = sizeof(w.w_dst);
718 w.w_gate.sin_len = sizeof(w.w_gate);
719 #endif
720 if (mask == HOST_MASK) {
721 w.w_rtm.rtm_flags |= RTF_HOST;
722 w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
723 } else {
724 w.w_rtm.rtm_addrs |= RTA_NETMASK;
725 w.w_mask.sin_addr.s_addr = htonl(mask);
726 #ifdef _HAVE_SA_LEN
727 masktrim(&w.w_mask);
728 if (w.w_mask.sin_len == 0)
729 w.w_mask.sin_len = sizeof(long);
730 w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
731 #endif
734 #ifndef NO_INSTALL
735 cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
736 if (cc < 0) {
737 if (errno == ESRCH
738 && (action == RTM_CHANGE || action == RTM_DELETE)) {
739 trace_act("route disappeared before" PAT, ARGS);
740 if (action == RTM_CHANGE) {
741 action = RTM_ADD;
742 goto again;
744 return;
746 msglog("write(rt_sock)" PAT ": %s", ARGS, strerror(errno));
747 return;
748 } else if (cc != w.w_rtm.rtm_msglen) {
749 msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
750 cc, w.w_rtm.rtm_msglen, ARGS);
751 return;
753 #endif
754 if (TRACEKERNEL)
755 trace_misc("write kernel" PAT, ARGS);
756 #undef PAT
757 #undef ARGS
761 #define KHASH_SIZE 71 /* should be prime */
762 #define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
763 static struct khash {
764 struct khash *k_next;
765 naddr k_dst;
766 naddr k_mask;
767 naddr k_gate;
768 short k_metric;
769 u_short k_state;
770 #define KS_NEW 0x001
771 #define KS_DELETE 0x002 /* need to delete the route */
772 #define KS_ADD 0x004 /* add to the kernel */
773 #define KS_CHANGE 0x008 /* tell kernel to change the route */
774 #define KS_DEL_ADD 0x010 /* delete & add to change the kernel */
775 #define KS_STATIC 0x020 /* Static flag in kernel */
776 #define KS_GATEWAY 0x040 /* G flag in kernel */
777 #define KS_DYNAMIC 0x080 /* result of redirect */
778 #define KS_DELETED 0x100 /* already deleted from kernel */
779 #define KS_CHECK 0x200
780 time_t k_keep;
781 #define K_KEEP_LIM 30
782 time_t k_redirect_time; /* when redirected route 1st seen */
783 } *khash_bins[KHASH_SIZE];
786 static struct khash*
787 kern_find(naddr dst, naddr mask, struct khash ***ppk)
789 struct khash *k, **pk;
791 for (pk = &KHASH(dst,mask); (k = *pk) != NULL; pk = &k->k_next) {
792 if (k->k_dst == dst && k->k_mask == mask)
793 break;
795 if (ppk != NULL)
796 *ppk = pk;
797 return k;
801 static struct khash*
802 kern_add(naddr dst, naddr mask)
804 struct khash *k, **pk;
806 k = kern_find(dst, mask, &pk);
807 if (k != NULL)
808 return k;
810 k = (struct khash *)rtmalloc(sizeof(*k), "kern_add");
812 memset(k, 0, sizeof(*k));
813 k->k_dst = dst;
814 k->k_mask = mask;
815 k->k_state = KS_NEW;
816 k->k_keep = now.tv_sec;
817 *pk = k;
819 return k;
823 /* If a kernel route has a non-zero metric, check that it is still in the
824 * daemon table, and not deleted by interfaces coming and going.
826 static void
827 kern_check_static(struct khash *k,
828 struct interface *ifp)
830 struct rt_entry *rt;
831 struct rt_spare new;
833 if (k->k_metric == 0)
834 return;
836 memset(&new, 0, sizeof(new));
837 new.rts_ifp = ifp;
838 new.rts_gate = k->k_gate;
839 new.rts_router = (ifp != NULL) ? ifp->int_addr : loopaddr;
840 new.rts_metric = k->k_metric;
841 new.rts_time = now.tv_sec;
843 rt = rtget(k->k_dst, k->k_mask);
844 if (rt != NULL) {
845 if (!(rt->rt_state & RS_STATIC))
846 rtchange(rt, rt->rt_state | RS_STATIC, &new, 0);
847 } else {
848 rtadd(k->k_dst, k->k_mask, RS_STATIC, &new);
853 /* operate on a kernel entry
855 static void
856 kern_ioctl(struct khash *k,
857 int action, /* RTM_DELETE, etc */
858 int flags)
861 switch (action) {
862 case RTM_DELETE:
863 k->k_state &= ~KS_DYNAMIC;
864 if (k->k_state & KS_DELETED)
865 return;
866 k->k_state |= KS_DELETED;
867 break;
868 case RTM_ADD:
869 k->k_state &= ~KS_DELETED;
870 break;
871 case RTM_CHANGE:
872 if (k->k_state & KS_DELETED) {
873 action = RTM_ADD;
874 k->k_state &= ~KS_DELETED;
876 break;
879 rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_metric, flags);
883 /* add a route the kernel told us
885 static void
886 rtm_add(struct rt_msghdr *rtm,
887 struct rt_addrinfo *info,
888 time_t keep)
890 struct khash *k;
891 struct interface *ifp;
892 naddr mask;
895 if (rtm->rtm_flags & RTF_HOST) {
896 mask = HOST_MASK;
897 } else if (INFO_MASK(info) != 0) {
898 mask = ntohl(S_ADDR(INFO_MASK(info)));
899 } else {
900 msglog("ignore %s without mask", rtm_type_name(rtm->rtm_type));
901 return;
904 k = kern_add(S_ADDR(INFO_DST(info)), mask);
905 if (k->k_state & KS_NEW)
906 k->k_keep = now.tv_sec+keep;
907 if (INFO_GATE(info) == 0) {
908 trace_act("note %s without gateway",
909 rtm_type_name(rtm->rtm_type));
910 k->k_metric = HOPCNT_INFINITY;
911 } else if (INFO_GATE(info)->sa_family != AF_INET) {
912 trace_act("note %s with gateway AF=%d",
913 rtm_type_name(rtm->rtm_type),
914 INFO_GATE(info)->sa_family);
915 k->k_metric = HOPCNT_INFINITY;
916 } else {
917 k->k_gate = S_ADDR(INFO_GATE(info));
918 k->k_metric = rtm->rtm_rmx.rmx_hopcount;
919 if (k->k_metric < 0)
920 k->k_metric = 0;
921 else if (k->k_metric > HOPCNT_INFINITY-1)
922 k->k_metric = HOPCNT_INFINITY-1;
924 k->k_state &= ~(KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD
925 | KS_DELETED | KS_GATEWAY | KS_STATIC
926 | KS_NEW | KS_CHECK);
927 if (rtm->rtm_flags & RTF_GATEWAY)
928 k->k_state |= KS_GATEWAY;
929 if (rtm->rtm_flags & RTF_STATIC)
930 k->k_state |= KS_STATIC;
932 if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
933 if (INFO_AUTHOR(info) != 0
934 && INFO_AUTHOR(info)->sa_family == AF_INET)
935 ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
936 else
937 ifp = NULL;
938 if (supplier
939 && (ifp == NULL || !(ifp->int_state & IS_REDIRECT_OK))) {
940 /* Routers are not supposed to listen to redirects,
941 * so delete it if it came via an unknown interface
942 * or the interface does not have special permission.
944 k->k_state &= ~KS_DYNAMIC;
945 k->k_state |= KS_DELETE;
946 LIM_SEC(need_kern, 0);
947 trace_act("mark for deletion redirected %s --> %s"
948 " via %s",
949 addrname(k->k_dst, k->k_mask, 0),
950 naddr_ntoa(k->k_gate),
951 ifp ? ifp->int_name : "unknown interface");
952 } else {
953 k->k_state |= KS_DYNAMIC;
954 k->k_redirect_time = now.tv_sec;
955 trace_act("accept redirected %s --> %s via %s",
956 addrname(k->k_dst, k->k_mask, 0),
957 naddr_ntoa(k->k_gate),
958 ifp ? ifp->int_name : "unknown interface");
960 return;
963 /* If it is not a static route, quit until the next comparison
964 * between the kernel and daemon tables, when it will be deleted.
966 if (!(k->k_state & KS_STATIC)) {
967 k->k_state |= KS_DELETE;
968 LIM_SEC(need_kern, k->k_keep);
969 return;
972 /* Put static routes with real metrics into the daemon table so
973 * they can be advertised.
975 * Find the interface toward the gateway.
977 ifp = iflookup(k->k_gate);
978 if (ifp == NULL)
979 msglog("static route %s --> %s impossibly lacks ifp",
980 addrname(S_ADDR(INFO_DST(info)), mask, 0),
981 naddr_ntoa(k->k_gate));
983 kern_check_static(k, ifp);
987 /* deal with packet loss
989 static void
990 rtm_lose(struct rt_msghdr *rtm,
991 struct rt_addrinfo *info)
993 if (INFO_GATE(info) == 0
994 || INFO_GATE(info)->sa_family != AF_INET) {
995 trace_act("ignore %s without gateway",
996 rtm_type_name(rtm->rtm_type));
997 return;
1000 if (rdisc_ok)
1001 rdisc_age(S_ADDR(INFO_GATE(info)));
1002 age(S_ADDR(INFO_GATE(info)));
1006 /* Make the gateway slot of an info structure point to something
1007 * useful. If it is not already useful, but it specifies an interface,
1008 * then fill in the sockaddr_in provided and point it there.
1010 static int
1011 get_info_gate(struct sockaddr **sap,
1012 struct sockaddr_in *in)
1014 struct sockaddr_dl *sdl = (struct sockaddr_dl *)*sap;
1015 struct interface *ifp;
1017 if (sdl == NULL)
1018 return 0;
1019 if ((sdl)->sdl_family == AF_INET)
1020 return 1;
1021 if ((sdl)->sdl_family != AF_LINK)
1022 return 0;
1024 ifp = ifwithindex(sdl->sdl_index, 1);
1025 if (ifp == NULL)
1026 return 0;
1028 in->sin_addr.s_addr = ifp->int_addr;
1029 #ifdef _HAVE_SA_LEN
1030 in->sin_len = sizeof(*in);
1031 #endif
1032 in->sin_family = AF_INET;
1033 *sap = (struct sockaddr *)in;
1035 return 1;
1039 /* Clean the kernel table by copying it to the daemon image.
1040 * Eventually the daemon will delete any extra routes.
1042 void
1043 flush_kern(void)
1045 static char *sysctl_buf;
1046 static size_t sysctl_buf_size = 0;
1047 size_t needed;
1048 int mib[6];
1049 char *next, *lim;
1050 struct rt_msghdr *rtm;
1051 struct sockaddr_in gate_sin;
1052 struct rt_addrinfo info;
1053 int i;
1054 struct khash *k;
1057 for (i = 0; i < KHASH_SIZE; i++) {
1058 for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1059 k->k_state |= KS_CHECK;
1063 mib[0] = CTL_NET;
1064 mib[1] = PF_ROUTE;
1065 mib[2] = 0; /* protocol */
1066 mib[3] = 0; /* wildcard address family */
1067 mib[4] = NET_RT_DUMP;
1068 mib[5] = 0; /* no flags */
1069 for (;;) {
1070 if ((needed = sysctl_buf_size) != 0) {
1071 if (sysctl(mib, 6, sysctl_buf,&needed, 0, 0) >= 0)
1072 break;
1073 if (errno != ENOMEM && errno != EFAULT)
1074 BADERR(1,"flush_kern: sysctl(RT_DUMP)");
1075 free(sysctl_buf);
1076 needed = 0;
1078 if (sysctl(mib, 6, 0, &needed, 0, 0) < 0)
1079 BADERR(1,"flush_kern: sysctl(RT_DUMP) estimate");
1080 /* Kludge around the habit of some systems, such as
1081 * BSD/OS 3.1, to not admit how many routes are in the
1082 * kernel, or at least to be quite wrong.
1084 needed += 50*(sizeof(*rtm)+5*sizeof(struct sockaddr));
1085 sysctl_buf = rtmalloc(sysctl_buf_size = needed,
1086 "flush_kern sysctl(RT_DUMP)");
1089 lim = sysctl_buf + needed;
1090 for (next = sysctl_buf; next < lim; next += rtm->rtm_msglen) {
1091 rtm = (struct rt_msghdr *)next;
1092 if (rtm->rtm_msglen == 0) {
1093 msglog("zero length kernel route at "
1094 " %#lx in buffer %#lx before %#lx",
1095 (u_long)rtm, (u_long)sysctl_buf, (u_long)lim);
1096 break;
1099 rt_xaddrs(&info,
1100 (struct sockaddr *)(rtm+1),
1101 (struct sockaddr *)(next + rtm->rtm_msglen),
1102 rtm->rtm_addrs);
1104 if (INFO_DST(&info) == 0
1105 || INFO_DST(&info)->sa_family != AF_INET)
1106 continue;
1108 /* ignore ARP table entries on systems with a merged route
1109 * and ARP table.
1111 if (rtm->rtm_flags & RTF_LLINFO)
1112 continue;
1114 /* ignore multicast addresses
1116 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
1117 continue;
1119 if (!get_info_gate(&INFO_GATE(&info), &gate_sin))
1120 continue;
1122 /* Note static routes and interface routes, and also
1123 * preload the image of the kernel table so that
1124 * we can later clean it, as well as avoid making
1125 * unneeded changes. Keep the old kernel routes for a
1126 * few seconds to allow a RIP or router-discovery
1127 * response to be heard.
1129 rtm_add(rtm,&info,MIN_WAITTIME);
1132 for (i = 0; i < KHASH_SIZE; i++) {
1133 for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1134 if (k->k_state & KS_CHECK) {
1135 msglog("%s --> %s disappeared from kernel",
1136 addrname(k->k_dst, k->k_mask, 0),
1137 naddr_ntoa(k->k_gate));
1138 del_static(k->k_dst, k->k_mask, k->k_gate, 1);
1145 /* Listen to announcements from the kernel
1147 void
1148 read_rt(void)
1150 long cc;
1151 struct interface *ifp;
1152 struct sockaddr_in gate_sin;
1153 naddr mask, gate;
1154 union {
1155 struct {
1156 struct rt_msghdr rtm;
1157 struct sockaddr addrs[RTAX_MAX];
1158 } r;
1159 struct if_msghdr ifm;
1160 } m;
1161 char str[100], *strp;
1162 struct rt_addrinfo info;
1165 for (;;) {
1166 cc = read(rt_sock, &m, sizeof(m));
1167 if (cc <= 0) {
1168 if (cc < 0 && errno != EWOULDBLOCK)
1169 LOGERR("read(rt_sock)");
1170 return;
1173 if (m.r.rtm.rtm_version != RTM_VERSION) {
1174 msglog("bogus routing message version %d",
1175 m.r.rtm.rtm_version);
1176 continue;
1179 /* Ignore our own results.
1181 if (m.r.rtm.rtm_type <= RTM_CHANGE
1182 && m.r.rtm.rtm_pid == mypid) {
1183 static int complained = 0;
1184 if (!complained) {
1185 msglog("receiving our own change messages");
1186 complained = 1;
1188 continue;
1191 if (m.r.rtm.rtm_type == RTM_IFINFO
1192 || m.r.rtm.rtm_type == RTM_NEWADDR
1193 || m.r.rtm.rtm_type == RTM_DELADDR) {
1194 ifp = ifwithindex(m.ifm.ifm_index,
1195 m.r.rtm.rtm_type != RTM_DELADDR);
1196 if (ifp == NULL)
1197 trace_act("note %s with flags %#x"
1198 " for unknown interface index #%d",
1199 rtm_type_name(m.r.rtm.rtm_type),
1200 m.ifm.ifm_flags,
1201 m.ifm.ifm_index);
1202 else
1203 trace_act("note %s with flags %#x for %s",
1204 rtm_type_name(m.r.rtm.rtm_type),
1205 m.ifm.ifm_flags,
1206 ifp->int_name);
1208 /* After being informed of a change to an interface,
1209 * check them all now if the check would otherwise
1210 * be a long time from now, if the interface is
1211 * not known, or if the interface has been turned
1212 * off or on.
1214 if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
1215 || ifp == NULL
1216 || ((ifp->int_if_flags ^ m.ifm.ifm_flags)
1217 & IFF_UP) != 0)
1218 ifinit_timer.tv_sec = now.tv_sec;
1219 continue;
1222 strcpy(str, rtm_type_name(m.r.rtm.rtm_type));
1223 strp = &str[strlen(str)];
1224 if (m.r.rtm.rtm_type <= RTM_CHANGE)
1225 strp += sprintf(strp," from pid %d",m.r.rtm.rtm_pid);
1227 rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
1228 m.r.rtm.rtm_addrs);
1230 if (INFO_DST(&info) == 0) {
1231 trace_act("ignore %s without dst", str);
1232 continue;
1235 if (INFO_DST(&info)->sa_family != AF_INET) {
1236 trace_act("ignore %s for AF %d", str,
1237 INFO_DST(&info)->sa_family);
1238 continue;
1241 mask = ((INFO_MASK(&info) != 0)
1242 ? ntohl(S_ADDR(INFO_MASK(&info)))
1243 : (m.r.rtm.rtm_flags & RTF_HOST)
1244 ? HOST_MASK
1245 : std_mask(S_ADDR(INFO_DST(&info))));
1247 strp += sprintf(strp, ": %s",
1248 addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1250 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
1251 trace_act("ignore multicast %s", str);
1252 continue;
1255 if (m.r.rtm.rtm_flags & RTF_LLINFO) {
1256 trace_act("ignore ARP %s", str);
1257 continue;
1260 if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
1261 gate = S_ADDR(INFO_GATE(&info));
1262 strp += sprintf(strp, " --> %s", naddr_ntoa(gate));
1263 } else {
1264 gate = 0;
1267 if (INFO_AUTHOR(&info) != 0)
1268 strp += sprintf(strp, " by authority of %s",
1269 saddr_ntoa(INFO_AUTHOR(&info)));
1271 switch (m.r.rtm.rtm_type) {
1272 case RTM_ADD:
1273 case RTM_CHANGE:
1274 case RTM_REDIRECT:
1275 if (m.r.rtm.rtm_errno != 0) {
1276 trace_act("ignore %s with \"%s\" error",
1277 str, strerror(m.r.rtm.rtm_errno));
1278 } else {
1279 trace_act("%s", str);
1280 rtm_add(&m.r.rtm,&info,0);
1282 break;
1284 case RTM_DELETE:
1285 if (m.r.rtm.rtm_errno != 0
1286 && m.r.rtm.rtm_errno != ESRCH) {
1287 trace_act("ignore %s with \"%s\" error",
1288 str, strerror(m.r.rtm.rtm_errno));
1289 } else {
1290 trace_act("%s", str);
1291 del_static(S_ADDR(INFO_DST(&info)), mask,
1292 gate, 1);
1294 break;
1296 case RTM_LOSING:
1297 trace_act("%s", str);
1298 rtm_lose(&m.r.rtm,&info);
1299 break;
1301 default:
1302 trace_act("ignore %s", str);
1303 break;
1309 /* after aggregating, note routes that belong in the kernel
1311 static void
1312 kern_out(struct ag_info *ag)
1314 struct khash *k;
1317 /* Do not install bad routes if they are not already present.
1318 * This includes routes that had RS_NET_SYN for interfaces that
1319 * recently died.
1321 if (ag->ag_metric == HOPCNT_INFINITY) {
1322 k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
1323 if (k == NULL)
1324 return;
1325 } else {
1326 k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
1329 if (k->k_state & KS_NEW) {
1330 /* will need to add new entry to the kernel table */
1331 k->k_state = KS_ADD;
1332 if (ag->ag_state & AGS_GATEWAY)
1333 k->k_state |= KS_GATEWAY;
1334 k->k_gate = ag->ag_gate;
1335 k->k_metric = ag->ag_metric;
1336 return;
1339 if (k->k_state & KS_STATIC)
1340 return;
1342 /* modify existing kernel entry if necessary */
1343 if (k->k_gate != ag->ag_gate
1344 || k->k_metric != ag->ag_metric) {
1345 /* Must delete bad interface routes etc. to change them. */
1346 if (k->k_metric == HOPCNT_INFINITY)
1347 k->k_state |= KS_DEL_ADD;
1348 k->k_gate = ag->ag_gate;
1349 k->k_metric = ag->ag_metric;
1350 k->k_state |= KS_CHANGE;
1353 /* If the daemon thinks the route should exist, forget
1354 * about any redirections.
1355 * If the daemon thinks the route should exist, eventually
1356 * override manual intervention by the operator.
1358 if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
1359 k->k_state &= ~KS_DYNAMIC;
1360 k->k_state |= (KS_ADD | KS_DEL_ADD);
1363 if ((k->k_state & KS_GATEWAY)
1364 && !(ag->ag_state & AGS_GATEWAY)) {
1365 k->k_state &= ~KS_GATEWAY;
1366 k->k_state |= (KS_ADD | KS_DEL_ADD);
1367 } else if (!(k->k_state & KS_GATEWAY)
1368 && (ag->ag_state & AGS_GATEWAY)) {
1369 k->k_state |= KS_GATEWAY;
1370 k->k_state |= (KS_ADD | KS_DEL_ADD);
1373 /* Deleting-and-adding is necessary to change aspects of a route.
1374 * Just delete instead of deleting and then adding a bad route.
1375 * Otherwise, we want to keep the route in the kernel.
1377 if (k->k_metric == HOPCNT_INFINITY
1378 && (k->k_state & KS_DEL_ADD))
1379 k->k_state |= KS_DELETE;
1380 else
1381 k->k_state &= ~KS_DELETE;
1382 #undef RT
1386 /* ARGSUSED */
1387 static int
1388 walk_kern(struct radix_node *rn,
1389 struct walkarg *argp UNUSED)
1391 #define RT ((struct rt_entry *)rn)
1392 char metric, pref;
1393 u_int ags = 0;
1396 /* Do not install synthetic routes */
1397 if (RT->rt_state & RS_NET_SYN)
1398 return 0;
1400 if (!(RT->rt_state & RS_IF)) {
1401 /* This is an ordinary route, not for an interface.
1404 /* aggregate, ordinary good routes without regard to
1405 * their metric
1407 pref = 1;
1408 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1410 /* Do not install host routes directly to hosts, to avoid
1411 * interfering with ARP entries in the kernel table.
1413 if (RT_ISHOST(RT)
1414 && ntohl(RT->rt_dst) == RT->rt_gate)
1415 return 0;
1417 } else {
1418 /* This is an interface route.
1419 * Do not install routes for "external" remote interfaces.
1421 if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
1422 return 0;
1424 /* Interfaces should override received routes.
1426 pref = 0;
1427 ags |= (AGS_IF | AGS_CORS_GATE);
1429 /* If it is not an interface, or an alias for an interface,
1430 * it must be a "gateway."
1432 * If it is a "remote" interface, it is also a "gateway" to
1433 * the kernel if is not a alias.
1435 if (RT->rt_ifp == 0
1436 || (RT->rt_ifp->int_state & IS_REMOTE))
1437 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1440 /* If RIP is off and IRDP is on, let the route to the discovered
1441 * route suppress any RIP routes. Eventually the RIP routes
1442 * will time-out and be deleted. This reaches the steady-state
1443 * quicker.
1445 if ((RT->rt_state & RS_RDISC) && rip_sock < 0)
1446 ags |= AGS_CORS_GATE;
1448 metric = RT->rt_metric;
1449 if (metric == HOPCNT_INFINITY) {
1450 /* if the route is dead, so try hard to aggregate. */
1451 pref = HOPCNT_INFINITY;
1452 ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
1453 ags &= ~(AGS_IF | AGS_CORS_GATE);
1456 ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
1457 metric,pref, 0, 0, ags, kern_out);
1458 return 0;
1459 #undef RT
1463 /* Update the kernel table to match the daemon table.
1465 static void
1466 fix_kern(void)
1468 int i;
1469 struct khash *k, **pk;
1472 need_kern = age_timer;
1474 /* Walk daemon table, updating the copy of the kernel table.
1476 rn_walktree(rhead, walk_kern, 0);
1477 ag_flush(0,0,kern_out);
1479 for (i = 0; i < KHASH_SIZE; i++) {
1480 for (pk = &khash_bins[i]; (k = *pk) != NULL; ) {
1481 /* Do not touch static routes */
1482 if (k->k_state & KS_STATIC) {
1483 kern_check_static(k,0);
1484 pk = &k->k_next;
1485 continue;
1488 /* check hold on routes deleted by the operator */
1489 if (k->k_keep > now.tv_sec) {
1490 /* ensure we check when the hold is over */
1491 LIM_SEC(need_kern, k->k_keep);
1492 /* mark for the next cycle */
1493 k->k_state |= KS_DELETE;
1494 pk = &k->k_next;
1495 continue;
1498 if ((k->k_state & KS_DELETE)
1499 && !(k->k_state & KS_DYNAMIC)) {
1500 kern_ioctl(k, RTM_DELETE, 0);
1501 *pk = k->k_next;
1502 free(k);
1503 continue;
1506 if (k->k_state & KS_DEL_ADD)
1507 kern_ioctl(k, RTM_DELETE, 0);
1509 if (k->k_state & KS_ADD) {
1510 kern_ioctl(k, RTM_ADD,
1511 ((0 != (k->k_state & (KS_GATEWAY
1512 | KS_DYNAMIC)))
1513 ? RTF_GATEWAY : 0));
1514 } else if (k->k_state & KS_CHANGE) {
1515 kern_ioctl(k, RTM_CHANGE,
1516 ((0 != (k->k_state & (KS_GATEWAY
1517 | KS_DYNAMIC)))
1518 ? RTF_GATEWAY : 0));
1520 k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
1522 /* Mark this route to be deleted in the next cycle.
1523 * This deletes routes that disappear from the
1524 * daemon table, since the normal aging code
1525 * will clear the bit for routes that have not
1526 * disappeared from the daemon table.
1528 k->k_state |= KS_DELETE;
1529 pk = &k->k_next;
1535 /* Delete a static route in the image of the kernel table.
1537 void
1538 del_static(naddr dst,
1539 naddr mask,
1540 naddr gate,
1541 int gone)
1543 struct khash *k;
1544 struct rt_entry *rt;
1546 /* Just mark it in the table to be deleted next time the kernel
1547 * table is updated.
1548 * If it has already been deleted, mark it as such, and set its
1549 * keep-timer so that it will not be deleted again for a while.
1550 * This lets the operator delete a route added by the daemon
1551 * and add a replacement.
1553 k = kern_find(dst, mask, 0);
1554 if (k != NULL && (gate == 0 || k->k_gate == gate)) {
1555 k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
1556 k->k_state |= KS_DELETE;
1557 if (gone) {
1558 k->k_state |= KS_DELETED;
1559 k->k_keep = now.tv_sec + K_KEEP_LIM;
1563 rt = rtget(dst, mask);
1564 if (rt != NULL && (rt->rt_state & RS_STATIC))
1565 rtbad(rt);
1569 /* Delete all routes generated from ICMP Redirects that use a given gateway,
1570 * as well as old redirected routes.
1572 void
1573 del_redirects(naddr bad_gate,
1574 time_t old)
1576 int i;
1577 struct khash *k;
1580 for (i = 0; i < KHASH_SIZE; i++) {
1581 for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1582 if (!(k->k_state & KS_DYNAMIC)
1583 || (k->k_state & KS_STATIC))
1584 continue;
1586 if (k->k_gate != bad_gate
1587 && k->k_redirect_time > old
1588 && !supplier)
1589 continue;
1591 k->k_state |= KS_DELETE;
1592 k->k_state &= ~KS_DYNAMIC;
1593 need_kern.tv_sec = now.tv_sec;
1594 trace_act("mark redirected %s --> %s for deletion",
1595 addrname(k->k_dst, k->k_mask, 0),
1596 naddr_ntoa(k->k_gate));
1602 /* Start the daemon tables.
1604 extern int max_keylen;
1606 void
1607 rtinit(void)
1609 int i;
1610 struct ag_info *ag;
1612 /* Initialize the radix trees */
1613 max_keylen = sizeof(struct sockaddr_in);
1614 rn_init();
1615 rn_inithead(&rhead, 32);
1617 /* mark all of the slots in the table free */
1618 ag_avail = ag_slots;
1619 for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
1620 ag->ag_fine = ag+1;
1621 ag++;
1626 #ifdef _HAVE_SIN_LEN
1627 static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET, 0, {0}, {0}};
1628 static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET, 0, {0}, {0}};
1629 #else
1630 static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
1631 static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
1632 #endif
1635 static void
1636 set_need_flash(void)
1638 if (!need_flash) {
1639 need_flash = 1;
1640 /* Do not send the flash update immediately. Wait a little
1641 * while to hear from other routers.
1643 no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
1648 /* Get a particular routing table entry
1650 struct rt_entry *
1651 rtget(naddr dst, naddr mask)
1653 struct rt_entry *rt;
1655 dst_sock.sin_addr.s_addr = dst;
1656 mask_sock.sin_addr.s_addr = htonl(mask);
1657 masktrim(&mask_sock);
1658 rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
1659 if (!rt
1660 || rt->rt_dst != dst
1661 || rt->rt_mask != mask)
1662 return 0;
1664 return rt;
1668 /* Find a route to dst as the kernel would.
1670 struct rt_entry *
1671 rtfind(naddr dst)
1673 dst_sock.sin_addr.s_addr = dst;
1674 return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
1678 /* add a route to the table
1680 void
1681 rtadd(naddr dst,
1682 naddr mask,
1683 u_int state, /* rt_state for the entry */
1684 struct rt_spare *new)
1686 struct rt_entry *rt;
1687 naddr smask;
1688 int i;
1689 struct rt_spare *rts;
1691 rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
1692 memset(rt, 0, sizeof(*rt));
1693 for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
1694 rts->rts_metric = HOPCNT_INFINITY;
1696 rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
1697 rt->rt_dst = dst;
1698 rt->rt_dst_sock.sin_family = AF_INET;
1699 #ifdef _HAVE_SIN_LEN
1700 rt->rt_dst_sock.sin_len = dst_sock.sin_len;
1701 #endif
1702 if (mask != HOST_MASK) {
1703 smask = std_mask(dst);
1704 if ((smask & ~mask) == 0 && mask > smask)
1705 state |= RS_SUBNET;
1707 mask_sock.sin_addr.s_addr = htonl(mask);
1708 masktrim(&mask_sock);
1709 rt->rt_mask = mask;
1710 rt->rt_state = state;
1711 rt->rt_spares[0] = *new;
1712 rt->rt_time = now.tv_sec;
1713 rt->rt_poison_metric = HOPCNT_INFINITY;
1714 rt->rt_seqno = update_seqno;
1716 if (++total_routes == MAX_ROUTES)
1717 msglog("have maximum (%d) routes", total_routes);
1718 if (TRACEACTIONS)
1719 trace_add_del("Add", rt);
1721 need_kern.tv_sec = now.tv_sec;
1722 set_need_flash();
1724 if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
1725 rhead, rt->rt_nodes)) {
1726 msglog("rnh_addaddr() failed for %s mask=%#lx",
1727 naddr_ntoa(dst), (u_long)mask);
1728 free(rt);
1733 /* notice a changed route
1735 void
1736 rtchange(struct rt_entry *rt,
1737 u_int state, /* new state bits */
1738 struct rt_spare *new,
1739 char *label)
1741 if (rt->rt_metric != new->rts_metric) {
1742 /* Fix the kernel immediately if it seems the route
1743 * has gone bad, since there may be a working route that
1744 * aggregates this route.
1746 if (new->rts_metric == HOPCNT_INFINITY) {
1747 need_kern.tv_sec = now.tv_sec;
1748 if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
1749 new->rts_time = now.tv_sec - EXPIRE_TIME;
1751 rt->rt_seqno = update_seqno;
1752 set_need_flash();
1755 if (rt->rt_gate != new->rts_gate) {
1756 need_kern.tv_sec = now.tv_sec;
1757 rt->rt_seqno = update_seqno;
1758 set_need_flash();
1761 state |= (rt->rt_state & RS_SUBNET);
1763 /* Keep various things from deciding ageless routes are stale.
1765 if (!AGE_RT(state, new->rts_ifp))
1766 new->rts_time = now.tv_sec;
1768 if (TRACEACTIONS)
1769 trace_change(rt, state, new,
1770 label ? label : "Chg ");
1772 rt->rt_state = state;
1773 rt->rt_spares[0] = *new;
1777 /* check for a better route among the spares
1779 static struct rt_spare *
1780 rts_better(struct rt_entry *rt)
1782 struct rt_spare *rts, *rts1;
1783 int i;
1785 /* find the best alternative among the spares */
1786 rts = rt->rt_spares+1;
1787 for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
1788 if (BETTER_LINK(rt,rts1,rts))
1789 rts = rts1;
1792 return rts;
1796 /* switch to a backup route
1798 void
1799 rtswitch(struct rt_entry *rt,
1800 struct rt_spare *rts)
1802 struct rt_spare swap;
1803 char label[10];
1806 /* Do not change permanent routes */
1807 if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
1808 | RS_NET_SYN | RS_IF)))
1809 return;
1811 /* find the best alternative among the spares */
1812 if (rts == NULL)
1813 rts = rts_better(rt);
1815 /* Do not bother if it is not worthwhile.
1817 if (!BETTER_LINK(rt, rts, rt->rt_spares))
1818 return;
1820 swap = rt->rt_spares[0];
1821 sprintf(label, "Use #%d", (int)(rts - rt->rt_spares));
1822 rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC), rts, label);
1823 if (swap.rts_metric == HOPCNT_INFINITY) {
1824 *rts = rts_empty;
1825 } else {
1826 *rts = swap;
1831 void
1832 rtdelete(struct rt_entry *rt)
1834 struct khash *k;
1837 if (TRACEACTIONS)
1838 trace_add_del("Del", rt);
1840 k = kern_find(rt->rt_dst, rt->rt_mask, 0);
1841 if (k != NULL) {
1842 k->k_state |= KS_DELETE;
1843 need_kern.tv_sec = now.tv_sec;
1846 dst_sock.sin_addr.s_addr = rt->rt_dst;
1847 mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
1848 masktrim(&mask_sock);
1849 if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
1850 rhead)) {
1851 msglog("rnh_deladdr() failed");
1852 } else {
1853 free(rt);
1854 total_routes--;
1859 void
1860 rts_delete(struct rt_entry *rt,
1861 struct rt_spare *rts)
1863 trace_upslot(rt, rts, &rts_empty);
1864 *rts = rts_empty;
1868 /* Get rid of a bad route, and try to switch to a replacement.
1870 void
1871 rtbad(struct rt_entry *rt)
1873 struct rt_spare new;
1875 /* Poison the route */
1876 new = rt->rt_spares[0];
1877 new.rts_metric = HOPCNT_INFINITY;
1878 rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC), &new, 0);
1879 rtswitch(rt, 0);
1883 /* Junk a RS_NET_SYN or RS_LOCAL route,
1884 * unless it is needed by another interface.
1886 void
1887 rtbad_sub(struct rt_entry *rt)
1889 struct interface *ifp, *ifp1;
1890 struct intnet *intnetp;
1891 u_int state;
1894 ifp1 = NULL;
1895 state = 0;
1897 if (rt->rt_state & RS_LOCAL) {
1898 /* Is this the route through loopback for the interface?
1899 * If so, see if it is used by any other interfaces, such
1900 * as a point-to-point interface with the same local address.
1902 for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
1903 /* Retain it if another interface needs it.
1905 if (ifp->int_addr == rt->rt_ifp->int_addr) {
1906 state |= RS_LOCAL;
1907 ifp1 = ifp;
1908 break;
1914 if (!(state & RS_LOCAL)) {
1915 /* Retain RIPv1 logical network route if there is another
1916 * interface that justifies it.
1918 if (rt->rt_state & RS_NET_SYN) {
1919 for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
1920 if ((ifp->int_state & IS_NEED_NET_SYN)
1921 && rt->rt_mask == ifp->int_std_mask
1922 && rt->rt_dst == ifp->int_std_addr) {
1923 state |= RS_NET_SYN;
1924 ifp1 = ifp;
1925 break;
1930 /* or if there is an authority route that needs it. */
1931 for (intnetp = intnets;
1932 intnetp != NULL;
1933 intnetp = intnetp->intnet_next) {
1934 if (intnetp->intnet_addr == rt->rt_dst
1935 && intnetp->intnet_mask == rt->rt_mask) {
1936 state |= (RS_NET_SYN | RS_NET_INT);
1937 break;
1942 if (ifp1 != NULL || (state & RS_NET_SYN)) {
1943 struct rt_spare new = rt->rt_spares[0];
1944 new.rts_ifp = ifp1;
1945 rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
1946 &new, 0);
1947 } else {
1948 rtbad(rt);
1953 /* Called while walking the table looking for sick interfaces
1954 * or after a time change.
1956 /* ARGSUSED */
1958 walk_bad(struct radix_node *rn,
1959 struct walkarg *argp UNUSED)
1961 #define RT ((struct rt_entry *)rn)
1962 struct rt_spare *rts;
1963 int i;
1966 /* fix any spare routes through the interface
1968 rts = RT->rt_spares;
1969 for (i = NUM_SPARES; i != 1; i--) {
1970 rts++;
1971 if (rts->rts_metric < HOPCNT_INFINITY
1972 && (rts->rts_ifp == 0
1973 || (rts->rts_ifp->int_state & IS_BROKE)))
1974 rts_delete(RT, rts);
1977 /* Deal with the main route
1979 /* finished if it has been handled before or if its interface is ok
1981 if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
1982 return 0;
1984 /* Bad routes for other than interfaces are easy.
1986 if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
1987 rtbad(RT);
1988 return 0;
1991 rtbad_sub(RT);
1992 return 0;
1993 #undef RT
1997 /* Check the age of an individual route.
1999 /* ARGSUSED */
2000 static int
2001 walk_age(struct radix_node *rn,
2002 struct walkarg *argp UNUSED)
2004 #define RT ((struct rt_entry *)rn)
2005 struct interface *ifp;
2006 struct rt_spare *rts;
2007 int i;
2010 /* age all of the spare routes, including the primary route
2011 * currently in use
2013 rts = RT->rt_spares;
2014 for (i = NUM_SPARES; i != 0; i--, rts++) {
2016 ifp = rts->rts_ifp;
2017 if (i == NUM_SPARES) {
2018 if (!AGE_RT(RT->rt_state, ifp)) {
2019 /* Keep various things from deciding ageless
2020 * routes are stale
2022 rts->rts_time = now.tv_sec;
2023 continue;
2026 /* forget RIP routes after RIP has been turned off.
2028 if (rip_sock < 0) {
2029 rtdelete(RT);
2030 return 0;
2034 /* age failing routes
2036 if (age_bad_gate == rts->rts_gate
2037 && rts->rts_time >= now_stale) {
2038 rts->rts_time -= SUPPLY_INTERVAL;
2041 /* trash the spare routes when they go bad */
2042 if (rts->rts_metric < HOPCNT_INFINITY
2043 && now_garbage > rts->rts_time
2044 && i != NUM_SPARES)
2045 rts_delete(RT, rts);
2049 /* finished if the active route is still fresh */
2050 if (now_stale <= RT->rt_time)
2051 return 0;
2053 /* try to switch to an alternative */
2054 rtswitch(RT, 0);
2056 /* Delete a dead route after it has been publically mourned. */
2057 if (now_garbage > RT->rt_time) {
2058 rtdelete(RT);
2059 return 0;
2062 /* Start poisoning a bad route before deleting it. */
2063 if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
2064 struct rt_spare new = RT->rt_spares[0];
2065 new.rts_metric = HOPCNT_INFINITY;
2066 rtchange(RT, RT->rt_state, &new, 0);
2068 return 0;
2072 /* Watch for dead routes and interfaces.
2074 void
2075 age(naddr bad_gate)
2077 struct interface *ifp;
2078 int need_query = 0;
2080 /* If not listening to RIP, there is no need to age the routes in
2081 * the table.
2083 age_timer.tv_sec = (now.tv_sec
2084 + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
2086 /* Check for dead IS_REMOTE interfaces by timing their
2087 * transmissions.
2089 for (ifp = ifnet; ifp; ifp = ifp->int_next) {
2090 if (!(ifp->int_state & IS_REMOTE))
2091 continue;
2093 /* ignore unreachable remote interfaces */
2094 if (!check_remote(ifp))
2095 continue;
2097 /* Restore remote interface that has become reachable
2099 if (ifp->int_state & IS_BROKE)
2100 if_ok(ifp, "remote ");
2102 if (ifp->int_act_time != NEVER
2103 && now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
2104 msglog("remote interface %s to %s timed out after"
2105 " %ld:%ld",
2106 ifp->int_name,
2107 naddr_ntoa(ifp->int_dstaddr),
2108 (now.tv_sec - ifp->int_act_time)/60,
2109 (now.tv_sec - ifp->int_act_time)%60);
2110 if_sick(ifp);
2113 /* If we have not heard from the other router
2114 * recently, ask it.
2116 if (now.tv_sec >= ifp->int_query_time) {
2117 ifp->int_query_time = NEVER;
2118 need_query = 1;
2122 /* Age routes. */
2123 age_bad_gate = bad_gate;
2124 rn_walktree(rhead, walk_age, 0);
2126 /* delete old redirected routes to keep the kernel table small
2127 * and prevent blackholes
2129 del_redirects(bad_gate, now.tv_sec-STALE_TIME);
2131 /* Update the kernel routing table. */
2132 fix_kern();
2134 /* poke reticent remote gateways */
2135 if (need_query)
2136 rip_query();