hammer2 - Shift inode xop functions into hammer2_xop.c
[dragonfly.git] / sbin / routed / table.c
blobf0a97dd761fc518e27c130ed823358f4bbe71b07
1 /*
2 * Copyright (c) 1983, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
29 * @(#)tables.c 8.1 (Berkeley) 6/5/93
30 * $FreeBSD: src/sbin/routed/table.c,v 1.9.2.2 2000/08/14 17:00:04 sheldonh Exp $
33 #include "defs.h"
35 static struct rt_spare *rts_better(struct rt_entry *);
36 static struct rt_spare rts_empty = {0,0,0,HOPCNT_INFINITY,0,0,0};
37 static void set_need_flash(void);
38 #ifdef _HAVE_SIN_LEN
39 static void masktrim(struct sockaddr_in *ap);
40 #else
41 static void masktrim(struct sockaddr_in_new *ap);
42 #endif
45 struct radix_node_head *rhead; /* root of the radix tree */
47 int need_flash = 1; /* flash update needed
48 * start =1 to suppress the 1st
51 struct timeval age_timer; /* next check of old routes */
52 struct timeval need_kern = { /* need to update kernel table */
53 EPOCH+MIN_WAITTIME-1, 0
56 int stopint;
58 int total_routes;
60 /* zap any old routes through this gateway */
61 naddr age_bad_gate;
64 /* It is desirable to "aggregate" routes, to combine differing routes of
65 * the same metric and next hop into a common route with a smaller netmask
66 * or to suppress redundant routes, routes that add no information to
67 * routes with smaller netmasks.
69 * A route is redundant if and only if any and all routes with smaller
70 * but matching netmasks and nets are the same. Since routes are
71 * kept sorted in the radix tree, redundant routes always come second.
73 * There are two kinds of aggregations. First, two routes of the same bit
74 * mask and differing only in the least significant bit of the network
75 * number can be combined into a single route with a coarser mask.
77 * Second, a route can be suppressed in favor of another route with a more
78 * coarse mask provided no incompatible routes with intermediate masks
79 * are present. The second kind of aggregation involves suppressing routes.
80 * A route must not be suppressed if an incompatible route exists with
81 * an intermediate mask, since the suppressed route would be covered
82 * by the intermediate.
84 * This code relies on the radix tree walk encountering routes
85 * sorted first by address, with the smallest address first.
88 struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
90 /* #define DEBUG_AG */
91 #ifdef DEBUG_AG
92 #define CHECK_AG() {int acnt = 0; struct ag_info *cag; \
93 for (cag = ag_avail; cag != NULL; cag = cag->ag_fine) \
94 acnt++; \
95 for (cag = ag_corsest; cag != NULL; cag = cag->ag_fine) \
96 acnt++; \
97 if (acnt != NUM_AG_SLOTS) { \
98 fflush(stderr); \
99 abort(); \
102 #else
103 #define CHECK_AG()
104 #endif
107 /* Output the contents of an aggregation table slot.
108 * This function must always be immediately followed with the deletion
109 * of the target slot.
111 static void
112 ag_out(struct ag_info *ag,
113 void (*out)(struct ag_info *))
115 struct ag_info *ag_cors;
116 naddr bit;
119 /* Forget it if this route should not be output for split-horizon. */
120 if (ag->ag_state & AGS_SPLIT_HZ)
121 return;
123 /* If we output both the even and odd twins, then the immediate parent,
124 * if it is present, is redundant, unless the parent manages to
125 * aggregate into something coarser.
126 * On successive calls, this code detects the even and odd twins,
127 * and marks the parent.
129 * Note that the order in which the radix tree code emits routes
130 * ensures that the twins are seen before the parent is emitted.
132 ag_cors = ag->ag_cors;
133 if (ag_cors != NULL
134 && ag_cors->ag_mask == ag->ag_mask<<1
135 && ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
136 ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
137 ? AGS_REDUN0
138 : AGS_REDUN1);
141 /* Skip it if this route is itself redundant.
143 * It is ok to change the contents of the slot here, since it is
144 * always deleted next.
146 if (ag->ag_state & AGS_REDUN0) {
147 if (ag->ag_state & AGS_REDUN1)
148 return; /* quit if fully redundant */
149 /* make it finer if it is half-redundant */
150 bit = (-ag->ag_mask) >> 1;
151 ag->ag_dst_h |= bit;
152 ag->ag_mask |= bit;
154 } else if (ag->ag_state & AGS_REDUN1) {
155 /* make it finer if it is half-redundant */
156 bit = (-ag->ag_mask) >> 1;
157 ag->ag_mask |= bit;
159 out(ag);
163 static void
164 ag_del(struct ag_info *ag)
166 CHECK_AG();
168 if (ag->ag_cors == 0)
169 ag_corsest = ag->ag_fine;
170 else
171 ag->ag_cors->ag_fine = ag->ag_fine;
173 if (ag->ag_fine == 0)
174 ag_finest = ag->ag_cors;
175 else
176 ag->ag_fine->ag_cors = ag->ag_cors;
178 ag->ag_fine = ag_avail;
179 ag_avail = ag;
181 CHECK_AG();
185 /* Flush routes waiting for aggregation.
186 * This must not suppress a route unless it is known that among all
187 * routes with coarser masks that match it, the one with the longest
188 * mask is appropriate. This is ensured by scanning the routes
189 * in lexical order, and with the most restrictive mask first
190 * among routes to the same destination.
192 void
193 ag_flush(naddr lim_dst_h, /* flush routes to here */
194 naddr lim_mask, /* matching this mask */
195 void (*out)(struct ag_info *))
197 struct ag_info *ag, *ag_cors;
198 naddr dst_h;
201 for (ag = ag_finest;
202 ag != NULL && ag->ag_mask >= lim_mask;
203 ag = ag_cors) {
204 ag_cors = ag->ag_cors;
206 /* work on only the specified routes */
207 dst_h = ag->ag_dst_h;
208 if ((dst_h & lim_mask) != lim_dst_h)
209 continue;
211 if (!(ag->ag_state & AGS_SUPPRESS))
212 ag_out(ag, out);
214 else for ( ; ; ag_cors = ag_cors->ag_cors) {
215 /* Look for a route that can suppress the
216 * current route */
217 if (ag_cors == NULL) {
218 /* failed, so output it and look for
219 * another route to work on
221 ag_out(ag, out);
222 break;
225 if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
226 /* We found a route with a coarser mask that
227 * aggregates the current target.
229 * If it has a different next hop, it
230 * cannot replace the target, so output
231 * the target.
233 if (ag->ag_gate != ag_cors->ag_gate
234 && !(ag->ag_state & AGS_FINE_GATE)
235 && !(ag_cors->ag_state & AGS_CORS_GATE)) {
236 ag_out(ag, out);
237 break;
240 /* If the coarse route has a good enough
241 * metric, it suppresses the target.
242 * If the suppressed target was redundant,
243 * then mark the suppressor redundant.
245 if (ag_cors->ag_pref <= ag->ag_pref) {
246 if (ag_cors->ag_seqno > ag->ag_seqno)
247 ag_cors->ag_seqno = ag->ag_seqno;
248 if (AG_IS_REDUN(ag->ag_state)
249 && ag_cors->ag_mask==ag->ag_mask<<1) {
250 if (ag_cors->ag_dst_h == dst_h)
251 ag_cors->ag_state |= AGS_REDUN0;
252 else
253 ag_cors->ag_state |= AGS_REDUN1;
255 if (ag->ag_tag != ag_cors->ag_tag)
256 ag_cors->ag_tag = 0;
257 if (ag->ag_nhop != ag_cors->ag_nhop)
258 ag_cors->ag_nhop = 0;
259 break;
264 /* That route has either been output or suppressed */
265 ag_cors = ag->ag_cors;
266 ag_del(ag);
269 CHECK_AG();
273 /* Try to aggregate a route with previous routes.
275 void
276 ag_check(naddr dst,
277 naddr mask,
278 naddr gate,
279 naddr nhop,
280 char metric,
281 char pref,
282 u_int seqnum,
283 u_short tag,
284 u_short state,
285 void (*out)(struct ag_info *)) /* output using this */
287 struct ag_info *ag, *nag, *ag_cors;
288 naddr xaddr;
289 int x;
291 dst = ntohl(dst);
293 /* Punt non-contiguous subnet masks.
295 * (X & -X) contains a single bit if and only if X is a power of 2.
296 * (X + (X & -X)) == 0 if and only if X is a power of 2.
298 if ((mask & -mask) + mask != 0) {
299 struct ag_info nc_ag;
301 nc_ag.ag_dst_h = dst;
302 nc_ag.ag_mask = mask;
303 nc_ag.ag_gate = gate;
304 nc_ag.ag_nhop = nhop;
305 nc_ag.ag_metric = metric;
306 nc_ag.ag_pref = pref;
307 nc_ag.ag_tag = tag;
308 nc_ag.ag_state = state;
309 nc_ag.ag_seqno = seqnum;
310 out(&nc_ag);
311 return;
314 /* Search for the right slot in the aggregation table.
316 ag_cors = NULL;
317 ag = ag_corsest;
318 while (ag != NULL) {
319 if (ag->ag_mask >= mask)
320 break;
322 /* Suppress old routes (i.e. combine with compatible routes
323 * with coarser masks) as we look for the right slot in the
324 * aggregation table for the new route.
325 * A route to an address less than the current destination
326 * will not be affected by the current route or any route
327 * seen hereafter. That means it is safe to suppress it.
328 * This check keeps poor routes (e.g. with large hop counts)
329 * from preventing suppression of finer routes.
331 if (ag_cors != NULL
332 && ag->ag_dst_h < dst
333 && (ag->ag_state & AGS_SUPPRESS)
334 && ag_cors->ag_pref <= ag->ag_pref
335 && (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
336 && (ag_cors->ag_gate == ag->ag_gate
337 || (ag->ag_state & AGS_FINE_GATE)
338 || (ag_cors->ag_state & AGS_CORS_GATE))) {
339 if (ag_cors->ag_seqno > ag->ag_seqno)
340 ag_cors->ag_seqno = ag->ag_seqno;
341 /* If the suppressed target was redundant,
342 * then mark the suppressor redundant.
344 if (AG_IS_REDUN(ag->ag_state)
345 && ag_cors->ag_mask == ag->ag_mask<<1) {
346 if (ag_cors->ag_dst_h == dst)
347 ag_cors->ag_state |= AGS_REDUN0;
348 else
349 ag_cors->ag_state |= AGS_REDUN1;
351 if (ag->ag_tag != ag_cors->ag_tag)
352 ag_cors->ag_tag = 0;
353 if (ag->ag_nhop != ag_cors->ag_nhop)
354 ag_cors->ag_nhop = 0;
355 ag_del(ag);
356 CHECK_AG();
357 } else {
358 ag_cors = ag;
360 ag = ag_cors->ag_fine;
363 /* If we find the even/odd twin of the new route, and if the
364 * masks and so forth are equal, we can aggregate them.
365 * We can probably promote one of the pair.
367 * Since the routes are encountered in lexical order,
368 * the new route must be odd. However, the second or later
369 * times around this loop, it could be the even twin promoted
370 * from the even/odd pair of twins of the finer route.
372 while (ag != NULL
373 && ag->ag_mask == mask
374 && ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
376 /* Here we know the target route and the route in the current
377 * slot have the same netmasks and differ by at most the
378 * last bit. They are either for the same destination, or
379 * for an even/odd pair of destinations.
381 if (ag->ag_dst_h == dst) {
382 /* We have two routes to the same destination.
383 * Routes are encountered in lexical order, so a
384 * route is never promoted until the parent route is
385 * already present. So we know that the new route is
386 * a promoted (or aggregated) pair and the route
387 * already in the slot is the explicit route.
389 * Prefer the best route if their metrics differ,
390 * or the aggregated one if not, following a sort
391 * of longest-match rule.
393 if (pref <= ag->ag_pref) {
394 ag->ag_gate = gate;
395 ag->ag_nhop = nhop;
396 ag->ag_tag = tag;
397 ag->ag_metric = metric;
398 ag->ag_pref = pref;
399 x = ag->ag_state;
400 ag->ag_state = state;
401 state = x;
404 /* The sequence number controls flash updating,
405 * and should be the smaller of the two.
407 if (ag->ag_seqno > seqnum)
408 ag->ag_seqno = seqnum;
410 /* Some bits are set if they are set on either route,
411 * except when the route is for an interface.
413 if (!(ag->ag_state & AGS_IF))
414 ag->ag_state |= (state & (AGS_AGGREGATE_EITHER
415 | AGS_REDUN0
416 | AGS_REDUN1));
417 return;
420 /* If one of the routes can be promoted and the other can
421 * be suppressed, it may be possible to combine them or
422 * worthwhile to promote one.
424 * Any route that can be promoted is always
425 * marked to be eligible to be suppressed.
427 if (!((state & AGS_AGGREGATE)
428 && (ag->ag_state & AGS_SUPPRESS))
429 && !((ag->ag_state & AGS_AGGREGATE)
430 && (state & AGS_SUPPRESS)))
431 break;
433 /* A pair of even/odd twin routes can be combined
434 * if either is redundant, or if they are via the
435 * same gateway and have the same metric.
437 if (AG_IS_REDUN(ag->ag_state)
438 || AG_IS_REDUN(state)
439 || (ag->ag_gate == gate
440 && ag->ag_pref == pref
441 && (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
443 /* We have both the even and odd pairs.
444 * Since the routes are encountered in order,
445 * the route in the slot must be the even twin.
447 * Combine and promote (aggregate) the pair of routes.
449 if (seqnum > ag->ag_seqno)
450 seqnum = ag->ag_seqno;
451 if (!AG_IS_REDUN(state))
452 state &= ~AGS_REDUN1;
453 if (AG_IS_REDUN(ag->ag_state))
454 state |= AGS_REDUN0;
455 else
456 state &= ~AGS_REDUN0;
457 state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
458 if (ag->ag_tag != tag)
459 tag = 0;
460 if (ag->ag_nhop != nhop)
461 nhop = 0;
463 /* Get rid of the even twin that was already
464 * in the slot.
466 ag_del(ag);
468 } else if (ag->ag_pref >= pref
469 && (ag->ag_state & AGS_AGGREGATE)) {
470 /* If we cannot combine the pair, maybe the route
471 * with the worse metric can be promoted.
473 * Promote the old, even twin, by giving its slot
474 * in the table to the new, odd twin.
476 ag->ag_dst_h = dst;
478 xaddr = ag->ag_gate;
479 ag->ag_gate = gate;
480 gate = xaddr;
482 xaddr = ag->ag_nhop;
483 ag->ag_nhop = nhop;
484 nhop = xaddr;
486 x = ag->ag_tag;
487 ag->ag_tag = tag;
488 tag = x;
490 /* The promoted route is even-redundant only if the
491 * even twin was fully redundant. It is not
492 * odd-redundant because the odd-twin will still be
493 * in the table.
495 x = ag->ag_state;
496 if (!AG_IS_REDUN(x))
497 x &= ~AGS_REDUN0;
498 x &= ~AGS_REDUN1;
499 ag->ag_state = state;
500 state = x;
502 x = ag->ag_metric;
503 ag->ag_metric = metric;
504 metric = x;
506 x = ag->ag_pref;
507 ag->ag_pref = pref;
508 pref = x;
510 /* take the newest sequence number */
511 if (seqnum >= ag->ag_seqno)
512 seqnum = ag->ag_seqno;
513 else
514 ag->ag_seqno = seqnum;
516 } else {
517 if (!(state & AGS_AGGREGATE))
518 break; /* cannot promote either twin */
520 /* Promote the new, odd twin by shaving its
521 * mask and address.
522 * The promoted route is odd-redundant only if the
523 * odd twin was fully redundant. It is not
524 * even-redundant because the even twin is still in
525 * the table.
527 if (!AG_IS_REDUN(state))
528 state &= ~AGS_REDUN1;
529 state &= ~AGS_REDUN0;
530 if (seqnum > ag->ag_seqno)
531 seqnum = ag->ag_seqno;
532 else
533 ag->ag_seqno = seqnum;
536 mask <<= 1;
537 dst &= mask;
539 if (ag_cors == NULL) {
540 ag = ag_corsest;
541 break;
543 ag = ag_cors;
544 ag_cors = ag->ag_cors;
547 /* When we can no longer promote and combine routes,
548 * flush the old route in the target slot. Also flush
549 * any finer routes that we know will never be aggregated by
550 * the new route.
552 * In case we moved toward coarser masks,
553 * get back where we belong
555 if (ag != NULL
556 && ag->ag_mask < mask) {
557 ag_cors = ag;
558 ag = ag->ag_fine;
561 /* Empty the target slot
563 if (ag != NULL && ag->ag_mask == mask) {
564 ag_flush(ag->ag_dst_h, ag->ag_mask, out);
565 ag = (ag_cors == NULL) ? ag_corsest : ag_cors->ag_fine;
568 #ifdef DEBUG_AG
569 fflush(stderr);
570 if (ag == NULL && ag_cors != ag_finest)
571 abort();
572 if (ag_cors == NULL && ag != ag_corsest)
573 abort();
574 if (ag != NULL && ag->ag_cors != ag_cors)
575 abort();
576 if (ag_cors != NULL && ag_cors->ag_fine != ag)
577 abort();
578 CHECK_AG();
579 #endif
581 /* Save the new route on the end of the table.
583 nag = ag_avail;
584 ag_avail = nag->ag_fine;
586 nag->ag_dst_h = dst;
587 nag->ag_mask = mask;
588 nag->ag_gate = gate;
589 nag->ag_nhop = nhop;
590 nag->ag_metric = metric;
591 nag->ag_pref = pref;
592 nag->ag_tag = tag;
593 nag->ag_state = state;
594 nag->ag_seqno = seqnum;
596 nag->ag_fine = ag;
597 if (ag != NULL)
598 ag->ag_cors = nag;
599 else
600 ag_finest = nag;
601 nag->ag_cors = ag_cors;
602 if (ag_cors == NULL)
603 ag_corsest = nag;
604 else
605 ag_cors->ag_fine = nag;
606 CHECK_AG();
610 #define NAME0_LEN 14
611 static const char *
612 rtm_type_name(u_char type)
614 static const char *rtm_types[] = {
615 "RTM_ADD",
616 "RTM_DELETE",
617 "RTM_CHANGE",
618 "RTM_GET",
619 "RTM_LOSING",
620 "RTM_REDIRECT",
621 "RTM_MISS",
622 "RTM_LOCK",
623 "unused 0x9",
624 "unused 0xa",
625 "RTM_RESOLVE",
626 "RTM_NEWADDR",
627 "RTM_DELADDR",
628 "RTM_IFINFO",
629 "RTM_NEWMADDR",
630 "RTM_DELMADDR"
632 #define NEW_RTM_PAT "RTM type %#x"
633 static char name0[sizeof(NEW_RTM_PAT)+2];
636 if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
637 || type == 0) {
638 snprintf(name0, sizeof(name0), NEW_RTM_PAT, type);
639 return name0;
640 } else {
641 return rtm_types[type-1];
643 #undef NEW_RTM_PAT
647 /* Trim a mask in a sockaddr
648 * Produce a length of 0 for an address of 0.
649 * Otherwise produce the index of the first zero byte.
651 void
652 #ifdef _HAVE_SIN_LEN
653 masktrim(struct sockaddr_in *ap)
654 #else
655 masktrim(struct sockaddr_in_new *ap)
656 #endif
658 char *cp;
660 if (ap->sin_addr.s_addr == 0) {
661 ap->sin_len = 0;
662 return;
664 cp = (char *)(&ap->sin_addr.s_addr+1);
665 while (*--cp == 0)
666 continue;
667 ap->sin_len = cp - (char*)ap + 1;
671 /* Tell the kernel to add, delete or change a route
673 static void
674 rtioctl(int action, /* RTM_DELETE, etc */
675 naddr dst,
676 naddr gate,
677 naddr mask,
678 int metric,
679 int flags)
681 struct {
682 struct rt_msghdr w_rtm;
683 struct sockaddr_in w_dst;
684 struct sockaddr_in w_gate;
685 #ifdef _HAVE_SA_LEN
686 struct sockaddr_in w_mask;
687 #else
688 struct sockaddr_in_new w_mask;
689 #endif
690 } w;
691 long cc;
692 # define PAT " %-10s %s metric=%d flags=%#x"
693 # define ARGS rtm_type_name(action), rtname(dst,mask,gate), metric, flags
695 again:
696 memset(&w, 0, sizeof(w));
697 w.w_rtm.rtm_msglen = sizeof(w);
698 w.w_rtm.rtm_version = RTM_VERSION;
699 w.w_rtm.rtm_type = action;
700 w.w_rtm.rtm_flags = flags;
701 w.w_rtm.rtm_seq = ++rt_sock_seqno;
702 w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
703 if (metric != 0 || action == RTM_CHANGE) {
704 w.w_rtm.rtm_rmx.rmx_hopcount = metric;
705 w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
707 w.w_dst.sin_family = AF_INET;
708 w.w_dst.sin_addr.s_addr = dst;
709 w.w_gate.sin_family = AF_INET;
710 w.w_gate.sin_addr.s_addr = gate;
711 #ifdef _HAVE_SA_LEN
712 w.w_dst.sin_len = sizeof(w.w_dst);
713 w.w_gate.sin_len = sizeof(w.w_gate);
714 #endif
715 if (mask == HOST_MASK) {
716 w.w_rtm.rtm_flags |= RTF_HOST;
717 w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
718 } else {
719 w.w_rtm.rtm_addrs |= RTA_NETMASK;
720 w.w_mask.sin_addr.s_addr = htonl(mask);
721 #ifdef _HAVE_SA_LEN
722 masktrim(&w.w_mask);
723 if (w.w_mask.sin_len == 0)
724 w.w_mask.sin_len = sizeof(long);
725 w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
726 #endif
729 #ifndef NO_INSTALL
730 cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
731 if (cc < 0) {
732 if (errno == ESRCH
733 && (action == RTM_CHANGE || action == RTM_DELETE)) {
734 trace_act("route disappeared before" PAT, ARGS);
735 if (action == RTM_CHANGE) {
736 action = RTM_ADD;
737 goto again;
739 return;
741 msglog("write(rt_sock)" PAT ": %s", ARGS, strerror(errno));
742 return;
743 } else if (cc != w.w_rtm.rtm_msglen) {
744 msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
745 cc, w.w_rtm.rtm_msglen, ARGS);
746 return;
748 #endif
749 if (TRACEKERNEL)
750 trace_misc("write kernel" PAT, ARGS);
751 #undef PAT
752 #undef ARGS
756 #define KHASH_SIZE 71 /* should be prime */
757 #define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
758 static struct khash {
759 struct khash *k_next;
760 naddr k_dst;
761 naddr k_mask;
762 naddr k_gate;
763 short k_metric;
764 u_short k_state;
765 #define KS_NEW 0x001
766 #define KS_DELETE 0x002 /* need to delete the route */
767 #define KS_ADD 0x004 /* add to the kernel */
768 #define KS_CHANGE 0x008 /* tell kernel to change the route */
769 #define KS_DEL_ADD 0x010 /* delete & add to change the kernel */
770 #define KS_STATIC 0x020 /* Static flag in kernel */
771 #define KS_GATEWAY 0x040 /* G flag in kernel */
772 #define KS_DYNAMIC 0x080 /* result of redirect */
773 #define KS_DELETED 0x100 /* already deleted from kernel */
774 #define KS_CHECK 0x200
775 time_t k_keep;
776 #define K_KEEP_LIM 30
777 time_t k_redirect_time; /* when redirected route 1st seen */
778 } *khash_bins[KHASH_SIZE];
781 static struct khash*
782 kern_find(naddr dst, naddr mask, struct khash ***ppk)
784 struct khash *k, **pk;
786 for (pk = &KHASH(dst,mask); (k = *pk) != NULL; pk = &k->k_next) {
787 if (k->k_dst == dst && k->k_mask == mask)
788 break;
790 if (ppk != NULL)
791 *ppk = pk;
792 return k;
796 static struct khash*
797 kern_add(naddr dst, naddr mask)
799 struct khash *k, **pk;
801 k = kern_find(dst, mask, &pk);
802 if (k != NULL)
803 return k;
805 k = (struct khash *)rtmalloc(sizeof(*k), "kern_add");
807 memset(k, 0, sizeof(*k));
808 k->k_dst = dst;
809 k->k_mask = mask;
810 k->k_state = KS_NEW;
811 k->k_keep = now.tv_sec;
812 *pk = k;
814 return k;
818 /* If a kernel route has a non-zero metric, check that it is still in the
819 * daemon table, and not deleted by interfaces coming and going.
821 static void
822 kern_check_static(struct khash *k,
823 struct interface *ifp)
825 struct rt_entry *rt;
826 struct rt_spare new;
828 if (k->k_metric == 0)
829 return;
831 memset(&new, 0, sizeof(new));
832 new.rts_ifp = ifp;
833 new.rts_gate = k->k_gate;
834 new.rts_router = (ifp != NULL) ? ifp->int_addr : loopaddr;
835 new.rts_metric = k->k_metric;
836 new.rts_time = now.tv_sec;
838 rt = rtget(k->k_dst, k->k_mask);
839 if (rt != NULL) {
840 if (!(rt->rt_state & RS_STATIC))
841 rtchange(rt, rt->rt_state | RS_STATIC, &new, 0);
842 } else {
843 rtadd(k->k_dst, k->k_mask, RS_STATIC, &new);
848 /* operate on a kernel entry
850 static void
851 kern_ioctl(struct khash *k,
852 int action, /* RTM_DELETE, etc */
853 int flags)
856 switch (action) {
857 case RTM_DELETE:
858 k->k_state &= ~KS_DYNAMIC;
859 if (k->k_state & KS_DELETED)
860 return;
861 k->k_state |= KS_DELETED;
862 break;
863 case RTM_ADD:
864 k->k_state &= ~KS_DELETED;
865 break;
866 case RTM_CHANGE:
867 if (k->k_state & KS_DELETED) {
868 action = RTM_ADD;
869 k->k_state &= ~KS_DELETED;
871 break;
874 rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_metric, flags);
878 /* add a route the kernel told us
880 static void
881 rtm_add(struct rt_msghdr *rtm,
882 struct rt_addrinfo *info,
883 time_t keep)
885 struct khash *k;
886 struct interface *ifp;
887 naddr mask;
890 if (rtm->rtm_flags & RTF_HOST) {
891 mask = HOST_MASK;
892 } else if (INFO_MASK(info) != 0) {
893 mask = ntohl(S_ADDR(INFO_MASK(info)));
894 } else {
895 msglog("ignore %s without mask", rtm_type_name(rtm->rtm_type));
896 return;
899 k = kern_add(S_ADDR(INFO_DST(info)), mask);
900 if (k->k_state & KS_NEW)
901 k->k_keep = now.tv_sec+keep;
902 if (INFO_GATE(info) == 0) {
903 trace_act("note %s without gateway",
904 rtm_type_name(rtm->rtm_type));
905 k->k_metric = HOPCNT_INFINITY;
906 } else if (INFO_GATE(info)->sa_family != AF_INET) {
907 trace_act("note %s with gateway AF=%d",
908 rtm_type_name(rtm->rtm_type),
909 INFO_GATE(info)->sa_family);
910 k->k_metric = HOPCNT_INFINITY;
911 } else {
912 k->k_gate = S_ADDR(INFO_GATE(info));
913 k->k_metric = rtm->rtm_rmx.rmx_hopcount;
914 if (k->k_metric < 0)
915 k->k_metric = 0;
916 else if (k->k_metric > HOPCNT_INFINITY-1)
917 k->k_metric = HOPCNT_INFINITY-1;
919 k->k_state &= ~(KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD
920 | KS_DELETED | KS_GATEWAY | KS_STATIC
921 | KS_NEW | KS_CHECK);
922 if (rtm->rtm_flags & RTF_GATEWAY)
923 k->k_state |= KS_GATEWAY;
924 if (rtm->rtm_flags & RTF_STATIC)
925 k->k_state |= KS_STATIC;
927 if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
928 if (INFO_AUTHOR(info) != 0
929 && INFO_AUTHOR(info)->sa_family == AF_INET)
930 ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
931 else
932 ifp = NULL;
933 if (supplier
934 && (ifp == NULL || !(ifp->int_state & IS_REDIRECT_OK))) {
935 /* Routers are not supposed to listen to redirects,
936 * so delete it if it came via an unknown interface
937 * or the interface does not have special permission.
939 k->k_state &= ~KS_DYNAMIC;
940 k->k_state |= KS_DELETE;
941 LIM_SEC(need_kern, 0);
942 trace_act("mark for deletion redirected %s --> %s"
943 " via %s",
944 addrname(k->k_dst, k->k_mask, 0),
945 naddr_ntoa(k->k_gate),
946 ifp ? ifp->int_name : "unknown interface");
947 } else {
948 k->k_state |= KS_DYNAMIC;
949 k->k_redirect_time = now.tv_sec;
950 trace_act("accept redirected %s --> %s via %s",
951 addrname(k->k_dst, k->k_mask, 0),
952 naddr_ntoa(k->k_gate),
953 ifp ? ifp->int_name : "unknown interface");
955 return;
958 /* If it is not a static route, quit until the next comparison
959 * between the kernel and daemon tables, when it will be deleted.
961 if (!(k->k_state & KS_STATIC)) {
962 k->k_state |= KS_DELETE;
963 LIM_SEC(need_kern, k->k_keep);
964 return;
967 /* Put static routes with real metrics into the daemon table so
968 * they can be advertised.
970 * Find the interface toward the gateway.
972 ifp = iflookup(k->k_gate);
973 if (ifp == NULL)
974 msglog("static route %s --> %s impossibly lacks ifp",
975 addrname(S_ADDR(INFO_DST(info)), mask, 0),
976 naddr_ntoa(k->k_gate));
978 kern_check_static(k, ifp);
982 /* deal with packet loss
984 static void
985 rtm_lose(struct rt_msghdr *rtm,
986 struct rt_addrinfo *info)
988 if (INFO_GATE(info) == 0
989 || INFO_GATE(info)->sa_family != AF_INET) {
990 trace_act("ignore %s without gateway",
991 rtm_type_name(rtm->rtm_type));
992 return;
995 if (rdisc_ok)
996 rdisc_age(S_ADDR(INFO_GATE(info)));
997 age(S_ADDR(INFO_GATE(info)));
1001 /* Make the gateway slot of an info structure point to something
1002 * useful. If it is not already useful, but it specifies an interface,
1003 * then fill in the sockaddr_in provided and point it there.
1005 static int
1006 get_info_gate(struct sockaddr **sap,
1007 struct sockaddr_in *in)
1009 struct sockaddr_dl *sdl = (struct sockaddr_dl *)*sap;
1010 struct interface *ifp;
1012 if (sdl == NULL)
1013 return 0;
1014 if ((sdl)->sdl_family == AF_INET)
1015 return 1;
1016 if ((sdl)->sdl_family != AF_LINK)
1017 return 0;
1019 ifp = ifwithindex(sdl->sdl_index, 1);
1020 if (ifp == NULL)
1021 return 0;
1023 in->sin_addr.s_addr = ifp->int_addr;
1024 #ifdef _HAVE_SA_LEN
1025 in->sin_len = sizeof(*in);
1026 #endif
1027 in->sin_family = AF_INET;
1028 *sap = (struct sockaddr *)in;
1030 return 1;
1034 /* Clean the kernel table by copying it to the daemon image.
1035 * Eventually the daemon will delete any extra routes.
1037 void
1038 flush_kern(void)
1040 static char *sysctl_buf;
1041 static size_t sysctl_buf_size = 0;
1042 size_t needed;
1043 int mib[6];
1044 char *next, *lim;
1045 struct rt_msghdr *rtm;
1046 struct sockaddr_in gate_sin;
1047 struct rt_addrinfo info;
1048 int i;
1049 struct khash *k;
1052 for (i = 0; i < KHASH_SIZE; i++) {
1053 for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1054 k->k_state |= KS_CHECK;
1058 mib[0] = CTL_NET;
1059 mib[1] = PF_ROUTE;
1060 mib[2] = 0; /* protocol */
1061 mib[3] = 0; /* wildcard address family */
1062 mib[4] = NET_RT_DUMP;
1063 mib[5] = 0; /* no flags */
1064 for (;;) {
1065 if ((needed = sysctl_buf_size) != 0) {
1066 if (sysctl(mib, 6, sysctl_buf,&needed, 0, 0) >= 0)
1067 break;
1068 if (errno != ENOMEM && errno != EFAULT)
1069 BADERR(1,"flush_kern: sysctl(RT_DUMP)");
1070 free(sysctl_buf);
1071 needed = 0;
1073 if (sysctl(mib, 6, 0, &needed, 0, 0) < 0)
1074 BADERR(1,"flush_kern: sysctl(RT_DUMP) estimate");
1075 /* Kludge around the habit of some systems, such as
1076 * BSD/OS 3.1, to not admit how many routes are in the
1077 * kernel, or at least to be quite wrong.
1079 needed += 50*(sizeof(*rtm)+5*sizeof(struct sockaddr));
1080 sysctl_buf = rtmalloc(sysctl_buf_size = needed,
1081 "flush_kern sysctl(RT_DUMP)");
1084 lim = sysctl_buf + needed;
1085 for (next = sysctl_buf; next < lim; next += rtm->rtm_msglen) {
1086 rtm = (struct rt_msghdr *)next;
1087 if (rtm->rtm_msglen == 0) {
1088 msglog("zero length kernel route at "
1089 " %#lx in buffer %#lx before %#lx",
1090 (u_long)rtm, (u_long)sysctl_buf, (u_long)lim);
1091 break;
1094 rt_xaddrs(&info,
1095 (struct sockaddr *)(rtm+1),
1096 (struct sockaddr *)(next + rtm->rtm_msglen),
1097 rtm->rtm_addrs);
1099 if (INFO_DST(&info) == 0
1100 || INFO_DST(&info)->sa_family != AF_INET)
1101 continue;
1103 /* ignore ARP table entries on systems with a merged route
1104 * and ARP table.
1106 if (rtm->rtm_flags & RTF_LLINFO)
1107 continue;
1109 /* ignore multicast addresses
1111 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
1112 continue;
1114 if (!get_info_gate(&INFO_GATE(&info), &gate_sin))
1115 continue;
1117 /* Note static routes and interface routes, and also
1118 * preload the image of the kernel table so that
1119 * we can later clean it, as well as avoid making
1120 * unneeded changes. Keep the old kernel routes for a
1121 * few seconds to allow a RIP or router-discovery
1122 * response to be heard.
1124 rtm_add(rtm,&info,MIN_WAITTIME);
1127 for (i = 0; i < KHASH_SIZE; i++) {
1128 for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1129 if (k->k_state & KS_CHECK) {
1130 msglog("%s --> %s disappeared from kernel",
1131 addrname(k->k_dst, k->k_mask, 0),
1132 naddr_ntoa(k->k_gate));
1133 del_static(k->k_dst, k->k_mask, k->k_gate, 1);
1140 /* Listen to announcements from the kernel
1142 void
1143 read_rt(void)
1145 long cc;
1146 struct interface *ifp;
1147 struct sockaddr_in gate_sin;
1148 naddr mask, gate;
1149 union {
1150 struct {
1151 struct rt_msghdr rtm;
1152 struct sockaddr addrs[RTAX_MAX];
1153 } r;
1154 struct if_msghdr ifm;
1155 } m;
1156 char str[100], *strp;
1157 struct rt_addrinfo info;
1160 for (;;) {
1161 cc = read(rt_sock, &m, sizeof(m));
1162 if (cc <= 0) {
1163 if (cc < 0 && errno != EWOULDBLOCK)
1164 LOGERR("read(rt_sock)");
1165 return;
1168 if (m.r.rtm.rtm_version != RTM_VERSION) {
1169 msglog("bogus routing message version %d",
1170 m.r.rtm.rtm_version);
1171 continue;
1174 /* Ignore our own results.
1176 if (m.r.rtm.rtm_type <= RTM_CHANGE
1177 && m.r.rtm.rtm_pid == mypid) {
1178 static int complained = 0;
1179 if (!complained) {
1180 msglog("receiving our own change messages");
1181 complained = 1;
1183 continue;
1186 if (m.r.rtm.rtm_type == RTM_IFINFO
1187 || m.r.rtm.rtm_type == RTM_NEWADDR
1188 || m.r.rtm.rtm_type == RTM_DELADDR) {
1189 ifp = ifwithindex(m.ifm.ifm_index,
1190 m.r.rtm.rtm_type != RTM_DELADDR);
1191 if (ifp == NULL)
1192 trace_act("note %s with flags %#x"
1193 " for unknown interface index #%d",
1194 rtm_type_name(m.r.rtm.rtm_type),
1195 m.ifm.ifm_flags,
1196 m.ifm.ifm_index);
1197 else
1198 trace_act("note %s with flags %#x for %s",
1199 rtm_type_name(m.r.rtm.rtm_type),
1200 m.ifm.ifm_flags,
1201 ifp->int_name);
1203 /* After being informed of a change to an interface,
1204 * check them all now if the check would otherwise
1205 * be a long time from now, if the interface is
1206 * not known, or if the interface has been turned
1207 * off or on.
1209 if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
1210 || ifp == NULL
1211 || ((ifp->int_if_flags ^ m.ifm.ifm_flags)
1212 & IFF_UP) != 0)
1213 ifinit_timer.tv_sec = now.tv_sec;
1214 continue;
1217 strcpy(str, rtm_type_name(m.r.rtm.rtm_type));
1218 strp = &str[strlen(str)];
1219 if (m.r.rtm.rtm_type <= RTM_CHANGE)
1220 strp += sprintf(strp," from pid %d",m.r.rtm.rtm_pid);
1222 rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
1223 m.r.rtm.rtm_addrs);
1225 if (INFO_DST(&info) == 0) {
1226 trace_act("ignore %s without dst", str);
1227 continue;
1230 if (INFO_DST(&info)->sa_family != AF_INET) {
1231 trace_act("ignore %s for AF %d", str,
1232 INFO_DST(&info)->sa_family);
1233 continue;
1236 mask = ((INFO_MASK(&info) != 0)
1237 ? ntohl(S_ADDR(INFO_MASK(&info)))
1238 : (m.r.rtm.rtm_flags & RTF_HOST)
1239 ? HOST_MASK
1240 : std_mask(S_ADDR(INFO_DST(&info))));
1242 strp += sprintf(strp, ": %s",
1243 addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1245 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
1246 trace_act("ignore multicast %s", str);
1247 continue;
1250 if (m.r.rtm.rtm_flags & RTF_LLINFO) {
1251 trace_act("ignore ARP %s", str);
1252 continue;
1255 if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
1256 gate = S_ADDR(INFO_GATE(&info));
1257 strp += sprintf(strp, " --> %s", naddr_ntoa(gate));
1258 } else {
1259 gate = 0;
1262 if (INFO_AUTHOR(&info) != 0)
1263 strp += sprintf(strp, " by authority of %s",
1264 saddr_ntoa(INFO_AUTHOR(&info)));
1266 switch (m.r.rtm.rtm_type) {
1267 case RTM_ADD:
1268 case RTM_CHANGE:
1269 case RTM_REDIRECT:
1270 if (m.r.rtm.rtm_errno != 0) {
1271 trace_act("ignore %s with \"%s\" error",
1272 str, strerror(m.r.rtm.rtm_errno));
1273 } else {
1274 trace_act("%s", str);
1275 rtm_add(&m.r.rtm,&info,0);
1277 break;
1279 case RTM_DELETE:
1280 if (m.r.rtm.rtm_errno != 0
1281 && m.r.rtm.rtm_errno != ESRCH) {
1282 trace_act("ignore %s with \"%s\" error",
1283 str, strerror(m.r.rtm.rtm_errno));
1284 } else {
1285 trace_act("%s", str);
1286 del_static(S_ADDR(INFO_DST(&info)), mask,
1287 gate, 1);
1289 break;
1291 case RTM_LOSING:
1292 trace_act("%s", str);
1293 rtm_lose(&m.r.rtm,&info);
1294 break;
1296 default:
1297 trace_act("ignore %s", str);
1298 break;
1304 /* after aggregating, note routes that belong in the kernel
1306 static void
1307 kern_out(struct ag_info *ag)
1309 struct khash *k;
1312 /* Do not install bad routes if they are not already present.
1313 * This includes routes that had RS_NET_SYN for interfaces that
1314 * recently died.
1316 if (ag->ag_metric == HOPCNT_INFINITY) {
1317 k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
1318 if (k == NULL)
1319 return;
1320 } else {
1321 k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
1324 if (k->k_state & KS_NEW) {
1325 /* will need to add new entry to the kernel table */
1326 k->k_state = KS_ADD;
1327 if (ag->ag_state & AGS_GATEWAY)
1328 k->k_state |= KS_GATEWAY;
1329 k->k_gate = ag->ag_gate;
1330 k->k_metric = ag->ag_metric;
1331 return;
1334 if (k->k_state & KS_STATIC)
1335 return;
1337 /* modify existing kernel entry if necessary */
1338 if (k->k_gate != ag->ag_gate
1339 || k->k_metric != ag->ag_metric) {
1340 /* Must delete bad interface routes etc. to change them. */
1341 if (k->k_metric == HOPCNT_INFINITY)
1342 k->k_state |= KS_DEL_ADD;
1343 k->k_gate = ag->ag_gate;
1344 k->k_metric = ag->ag_metric;
1345 k->k_state |= KS_CHANGE;
1348 /* If the daemon thinks the route should exist, forget
1349 * about any redirections.
1350 * If the daemon thinks the route should exist, eventually
1351 * override manual intervention by the operator.
1353 if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
1354 k->k_state &= ~KS_DYNAMIC;
1355 k->k_state |= (KS_ADD | KS_DEL_ADD);
1358 if ((k->k_state & KS_GATEWAY)
1359 && !(ag->ag_state & AGS_GATEWAY)) {
1360 k->k_state &= ~KS_GATEWAY;
1361 k->k_state |= (KS_ADD | KS_DEL_ADD);
1362 } else if (!(k->k_state & KS_GATEWAY)
1363 && (ag->ag_state & AGS_GATEWAY)) {
1364 k->k_state |= KS_GATEWAY;
1365 k->k_state |= (KS_ADD | KS_DEL_ADD);
1368 /* Deleting-and-adding is necessary to change aspects of a route.
1369 * Just delete instead of deleting and then adding a bad route.
1370 * Otherwise, we want to keep the route in the kernel.
1372 if (k->k_metric == HOPCNT_INFINITY
1373 && (k->k_state & KS_DEL_ADD))
1374 k->k_state |= KS_DELETE;
1375 else
1376 k->k_state &= ~KS_DELETE;
1377 #undef RT
1381 /* ARGSUSED */
1382 static int
1383 walk_kern(struct radix_node *rn, __unused struct walkarg *argp)
1385 #define RT ((struct rt_entry *)rn)
1386 char metric, pref;
1387 u_int ags = 0;
1390 /* Do not install synthetic routes */
1391 if (RT->rt_state & RS_NET_SYN)
1392 return 0;
1394 if (!(RT->rt_state & RS_IF)) {
1395 /* This is an ordinary route, not for an interface.
1398 /* aggregate, ordinary good routes without regard to
1399 * their metric
1401 pref = 1;
1402 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1404 /* Do not install host routes directly to hosts, to avoid
1405 * interfering with ARP entries in the kernel table.
1407 if (RT_ISHOST(RT)
1408 && ntohl(RT->rt_dst) == RT->rt_gate)
1409 return 0;
1411 } else {
1412 /* This is an interface route.
1413 * Do not install routes for "external" remote interfaces.
1415 if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
1416 return 0;
1418 /* Interfaces should override received routes.
1420 pref = 0;
1421 ags |= (AGS_IF | AGS_CORS_GATE);
1423 /* If it is not an interface, or an alias for an interface,
1424 * it must be a "gateway."
1426 * If it is a "remote" interface, it is also a "gateway" to
1427 * the kernel if is not a alias.
1429 if (RT->rt_ifp == 0
1430 || (RT->rt_ifp->int_state & IS_REMOTE))
1431 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1434 /* If RIP is off and IRDP is on, let the route to the discovered
1435 * route suppress any RIP routes. Eventually the RIP routes
1436 * will time-out and be deleted. This reaches the steady-state
1437 * quicker.
1439 if ((RT->rt_state & RS_RDISC) && rip_sock < 0)
1440 ags |= AGS_CORS_GATE;
1442 metric = RT->rt_metric;
1443 if (metric == HOPCNT_INFINITY) {
1444 /* if the route is dead, so try hard to aggregate. */
1445 pref = HOPCNT_INFINITY;
1446 ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
1447 ags &= ~(AGS_IF | AGS_CORS_GATE);
1450 ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
1451 metric,pref, 0, 0, ags, kern_out);
1452 return 0;
1453 #undef RT
1457 /* Update the kernel table to match the daemon table.
1459 static void
1460 fix_kern(void)
1462 int i;
1463 struct khash *k, **pk;
1466 need_kern = age_timer;
1468 /* Walk daemon table, updating the copy of the kernel table.
1470 rn_walktree(rhead, walk_kern, 0);
1471 ag_flush(0,0,kern_out);
1473 for (i = 0; i < KHASH_SIZE; i++) {
1474 for (pk = &khash_bins[i]; (k = *pk) != NULL; ) {
1475 /* Do not touch static routes */
1476 if (k->k_state & KS_STATIC) {
1477 kern_check_static(k,0);
1478 pk = &k->k_next;
1479 continue;
1482 /* check hold on routes deleted by the operator */
1483 if (k->k_keep > now.tv_sec) {
1484 /* ensure we check when the hold is over */
1485 LIM_SEC(need_kern, k->k_keep);
1486 /* mark for the next cycle */
1487 k->k_state |= KS_DELETE;
1488 pk = &k->k_next;
1489 continue;
1492 if ((k->k_state & KS_DELETE)
1493 && !(k->k_state & KS_DYNAMIC)) {
1494 kern_ioctl(k, RTM_DELETE, 0);
1495 *pk = k->k_next;
1496 free(k);
1497 continue;
1500 if (k->k_state & KS_DEL_ADD)
1501 kern_ioctl(k, RTM_DELETE, 0);
1503 if (k->k_state & KS_ADD) {
1504 kern_ioctl(k, RTM_ADD,
1505 ((0 != (k->k_state & (KS_GATEWAY
1506 | KS_DYNAMIC)))
1507 ? RTF_GATEWAY : 0));
1508 } else if (k->k_state & KS_CHANGE) {
1509 kern_ioctl(k, RTM_CHANGE,
1510 ((0 != (k->k_state & (KS_GATEWAY
1511 | KS_DYNAMIC)))
1512 ? RTF_GATEWAY : 0));
1514 k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
1516 /* Mark this route to be deleted in the next cycle.
1517 * This deletes routes that disappear from the
1518 * daemon table, since the normal aging code
1519 * will clear the bit for routes that have not
1520 * disappeared from the daemon table.
1522 k->k_state |= KS_DELETE;
1523 pk = &k->k_next;
1529 /* Delete a static route in the image of the kernel table.
1531 void
1532 del_static(naddr dst,
1533 naddr mask,
1534 naddr gate,
1535 int gone)
1537 struct khash *k;
1538 struct rt_entry *rt;
1540 /* Just mark it in the table to be deleted next time the kernel
1541 * table is updated.
1542 * If it has already been deleted, mark it as such, and set its
1543 * keep-timer so that it will not be deleted again for a while.
1544 * This lets the operator delete a route added by the daemon
1545 * and add a replacement.
1547 k = kern_find(dst, mask, 0);
1548 if (k != NULL && (gate == 0 || k->k_gate == gate)) {
1549 k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
1550 k->k_state |= KS_DELETE;
1551 if (gone) {
1552 k->k_state |= KS_DELETED;
1553 k->k_keep = now.tv_sec + K_KEEP_LIM;
1557 rt = rtget(dst, mask);
1558 if (rt != NULL && (rt->rt_state & RS_STATIC))
1559 rtbad(rt);
1563 /* Delete all routes generated from ICMP Redirects that use a given gateway,
1564 * as well as old redirected routes.
1566 void
1567 del_redirects(naddr bad_gate,
1568 time_t old)
1570 int i;
1571 struct khash *k;
1574 for (i = 0; i < KHASH_SIZE; i++) {
1575 for (k = khash_bins[i]; k != NULL; k = k->k_next) {
1576 if (!(k->k_state & KS_DYNAMIC)
1577 || (k->k_state & KS_STATIC))
1578 continue;
1580 if (k->k_gate != bad_gate
1581 && k->k_redirect_time > old
1582 && !supplier)
1583 continue;
1585 k->k_state |= KS_DELETE;
1586 k->k_state &= ~KS_DYNAMIC;
1587 need_kern.tv_sec = now.tv_sec;
1588 trace_act("mark redirected %s --> %s for deletion",
1589 addrname(k->k_dst, k->k_mask, 0),
1590 naddr_ntoa(k->k_gate));
1596 /* Start the daemon tables.
1598 extern int max_keylen;
1600 void
1601 rtinit(void)
1603 int i;
1604 struct ag_info *ag;
1606 /* Initialize the radix trees */
1607 max_keylen = sizeof(struct sockaddr_in);
1608 rn_init();
1609 rn_inithead(&rhead, 32);
1611 /* mark all of the slots in the table free */
1612 ag_avail = ag_slots;
1613 for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
1614 ag->ag_fine = ag+1;
1615 ag++;
1620 #ifdef _HAVE_SIN_LEN
1621 static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET, 0, {0}, {0}};
1622 static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET, 0, {0}, {0}};
1623 #else
1624 static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
1625 static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
1626 #endif
1629 static void
1630 set_need_flash(void)
1632 if (!need_flash) {
1633 need_flash = 1;
1634 /* Do not send the flash update immediately. Wait a little
1635 * while to hear from other routers.
1637 no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
1642 /* Get a particular routing table entry
1644 struct rt_entry *
1645 rtget(naddr dst, naddr mask)
1647 struct rt_entry *rt;
1649 dst_sock.sin_addr.s_addr = dst;
1650 mask_sock.sin_addr.s_addr = htonl(mask);
1651 masktrim(&mask_sock);
1652 rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
1653 if (!rt
1654 || rt->rt_dst != dst
1655 || rt->rt_mask != mask)
1656 return 0;
1658 return rt;
1662 /* Find a route to dst as the kernel would.
1664 struct rt_entry *
1665 rtfind(naddr dst)
1667 dst_sock.sin_addr.s_addr = dst;
1668 return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
1672 /* add a route to the table
1674 void
1675 rtadd(naddr dst,
1676 naddr mask,
1677 u_int state, /* rt_state for the entry */
1678 struct rt_spare *new)
1680 struct rt_entry *rt;
1681 naddr smask;
1682 int i;
1683 struct rt_spare *rts;
1685 rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
1686 memset(rt, 0, sizeof(*rt));
1687 for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
1688 rts->rts_metric = HOPCNT_INFINITY;
1690 rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
1691 rt->rt_dst = dst;
1692 rt->rt_dst_sock.sin_family = AF_INET;
1693 #ifdef _HAVE_SIN_LEN
1694 rt->rt_dst_sock.sin_len = dst_sock.sin_len;
1695 #endif
1696 if (mask != HOST_MASK) {
1697 smask = std_mask(dst);
1698 if ((smask & ~mask) == 0 && mask > smask)
1699 state |= RS_SUBNET;
1701 mask_sock.sin_addr.s_addr = htonl(mask);
1702 masktrim(&mask_sock);
1703 rt->rt_mask = mask;
1704 rt->rt_state = state;
1705 rt->rt_spares[0] = *new;
1706 rt->rt_time = now.tv_sec;
1707 rt->rt_poison_metric = HOPCNT_INFINITY;
1708 rt->rt_seqno = update_seqno;
1710 if (++total_routes == MAX_ROUTES)
1711 msglog("have maximum (%d) routes", total_routes);
1712 if (TRACEACTIONS)
1713 trace_add_del("Add", rt);
1715 need_kern.tv_sec = now.tv_sec;
1716 set_need_flash();
1718 if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
1719 rhead, rt->rt_nodes)) {
1720 msglog("rnh_addaddr() failed for %s mask=%#lx",
1721 naddr_ntoa(dst), (u_long)mask);
1722 free(rt);
1727 /* notice a changed route
1729 void
1730 rtchange(struct rt_entry *rt,
1731 u_int state, /* new state bits */
1732 struct rt_spare *new,
1733 char *label)
1735 if (rt->rt_metric != new->rts_metric) {
1736 /* Fix the kernel immediately if it seems the route
1737 * has gone bad, since there may be a working route that
1738 * aggregates this route.
1740 if (new->rts_metric == HOPCNT_INFINITY) {
1741 need_kern.tv_sec = now.tv_sec;
1742 if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
1743 new->rts_time = now.tv_sec - EXPIRE_TIME;
1745 rt->rt_seqno = update_seqno;
1746 set_need_flash();
1749 if (rt->rt_gate != new->rts_gate) {
1750 need_kern.tv_sec = now.tv_sec;
1751 rt->rt_seqno = update_seqno;
1752 set_need_flash();
1755 state |= (rt->rt_state & RS_SUBNET);
1757 /* Keep various things from deciding ageless routes are stale.
1759 if (!AGE_RT(state, new->rts_ifp))
1760 new->rts_time = now.tv_sec;
1762 if (TRACEACTIONS)
1763 trace_change(rt, state, new,
1764 label ? label : "Chg ");
1766 rt->rt_state = state;
1767 rt->rt_spares[0] = *new;
1771 /* check for a better route among the spares
1773 static struct rt_spare *
1774 rts_better(struct rt_entry *rt)
1776 struct rt_spare *rts, *rts1;
1777 int i;
1779 /* find the best alternative among the spares */
1780 rts = rt->rt_spares+1;
1781 for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
1782 if (BETTER_LINK(rt,rts1,rts))
1783 rts = rts1;
1786 return rts;
1790 /* switch to a backup route
1792 void
1793 rtswitch(struct rt_entry *rt,
1794 struct rt_spare *rts)
1796 struct rt_spare swap;
1797 char label[10];
1800 /* Do not change permanent routes */
1801 if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
1802 | RS_NET_SYN | RS_IF)))
1803 return;
1805 /* find the best alternative among the spares */
1806 if (rts == NULL)
1807 rts = rts_better(rt);
1809 /* Do not bother if it is not worthwhile.
1811 if (!BETTER_LINK(rt, rts, rt->rt_spares))
1812 return;
1814 swap = rt->rt_spares[0];
1815 sprintf(label, "Use #%d", (int)(rts - rt->rt_spares));
1816 rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC), rts, label);
1817 if (swap.rts_metric == HOPCNT_INFINITY) {
1818 *rts = rts_empty;
1819 } else {
1820 *rts = swap;
1825 void
1826 rtdelete(struct rt_entry *rt)
1828 struct khash *k;
1831 if (TRACEACTIONS)
1832 trace_add_del("Del", rt);
1834 k = kern_find(rt->rt_dst, rt->rt_mask, 0);
1835 if (k != NULL) {
1836 k->k_state |= KS_DELETE;
1837 need_kern.tv_sec = now.tv_sec;
1840 dst_sock.sin_addr.s_addr = rt->rt_dst;
1841 mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
1842 masktrim(&mask_sock);
1843 if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
1844 rhead)) {
1845 msglog("rnh_deladdr() failed");
1846 } else {
1847 free(rt);
1848 total_routes--;
1853 void
1854 rts_delete(struct rt_entry *rt,
1855 struct rt_spare *rts)
1857 trace_upslot(rt, rts, &rts_empty);
1858 *rts = rts_empty;
1862 /* Get rid of a bad route, and try to switch to a replacement.
1864 void
1865 rtbad(struct rt_entry *rt)
1867 struct rt_spare new;
1869 /* Poison the route */
1870 new = rt->rt_spares[0];
1871 new.rts_metric = HOPCNT_INFINITY;
1872 rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC), &new, 0);
1873 rtswitch(rt, 0);
1877 /* Junk a RS_NET_SYN or RS_LOCAL route,
1878 * unless it is needed by another interface.
1880 void
1881 rtbad_sub(struct rt_entry *rt)
1883 struct interface *ifp, *ifp1;
1884 struct intnet *intnetp;
1885 u_int state;
1888 ifp1 = NULL;
1889 state = 0;
1891 if (rt->rt_state & RS_LOCAL) {
1892 /* Is this the route through loopback for the interface?
1893 * If so, see if it is used by any other interfaces, such
1894 * as a point-to-point interface with the same local address.
1896 for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
1897 /* Retain it if another interface needs it.
1899 if (ifp->int_addr == rt->rt_ifp->int_addr) {
1900 state |= RS_LOCAL;
1901 ifp1 = ifp;
1902 break;
1908 if (!(state & RS_LOCAL)) {
1909 /* Retain RIPv1 logical network route if there is another
1910 * interface that justifies it.
1912 if (rt->rt_state & RS_NET_SYN) {
1913 for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
1914 if ((ifp->int_state & IS_NEED_NET_SYN)
1915 && rt->rt_mask == ifp->int_std_mask
1916 && rt->rt_dst == ifp->int_std_addr) {
1917 state |= RS_NET_SYN;
1918 ifp1 = ifp;
1919 break;
1924 /* or if there is an authority route that needs it. */
1925 for (intnetp = intnets;
1926 intnetp != NULL;
1927 intnetp = intnetp->intnet_next) {
1928 if (intnetp->intnet_addr == rt->rt_dst
1929 && intnetp->intnet_mask == rt->rt_mask) {
1930 state |= (RS_NET_SYN | RS_NET_INT);
1931 break;
1936 if (ifp1 != NULL || (state & RS_NET_SYN)) {
1937 struct rt_spare new = rt->rt_spares[0];
1938 new.rts_ifp = ifp1;
1939 rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
1940 &new, 0);
1941 } else {
1942 rtbad(rt);
1947 /* Called while walking the table looking for sick interfaces
1948 * or after a time change.
1950 /* ARGSUSED */
1952 walk_bad(struct radix_node *rn, __unused struct walkarg *argp)
1954 #define RT ((struct rt_entry *)rn)
1955 struct rt_spare *rts;
1956 int i;
1959 /* fix any spare routes through the interface
1961 rts = RT->rt_spares;
1962 for (i = NUM_SPARES; i != 1; i--) {
1963 rts++;
1964 if (rts->rts_metric < HOPCNT_INFINITY
1965 && (rts->rts_ifp == 0
1966 || (rts->rts_ifp->int_state & IS_BROKE)))
1967 rts_delete(RT, rts);
1970 /* Deal with the main route
1972 /* finished if it has been handled before or if its interface is ok
1974 if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
1975 return 0;
1977 /* Bad routes for other than interfaces are easy.
1979 if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
1980 rtbad(RT);
1981 return 0;
1984 rtbad_sub(RT);
1985 return 0;
1986 #undef RT
1990 /* Check the age of an individual route.
1992 /* ARGSUSED */
1993 static int
1994 walk_age(struct radix_node *rn, __unused struct walkarg *argp)
1996 #define RT ((struct rt_entry *)rn)
1997 struct interface *ifp;
1998 struct rt_spare *rts;
1999 int i;
2002 /* age all of the spare routes, including the primary route
2003 * currently in use
2005 rts = RT->rt_spares;
2006 for (i = NUM_SPARES; i != 0; i--, rts++) {
2008 ifp = rts->rts_ifp;
2009 if (i == NUM_SPARES) {
2010 if (!AGE_RT(RT->rt_state, ifp)) {
2011 /* Keep various things from deciding ageless
2012 * routes are stale
2014 rts->rts_time = now.tv_sec;
2015 continue;
2018 /* forget RIP routes after RIP has been turned off.
2020 if (rip_sock < 0) {
2021 rtdelete(RT);
2022 return 0;
2026 /* age failing routes
2028 if (age_bad_gate == rts->rts_gate
2029 && rts->rts_time >= now_stale) {
2030 rts->rts_time -= SUPPLY_INTERVAL;
2033 /* trash the spare routes when they go bad */
2034 if (rts->rts_metric < HOPCNT_INFINITY
2035 && now_garbage > rts->rts_time
2036 && i != NUM_SPARES)
2037 rts_delete(RT, rts);
2041 /* finished if the active route is still fresh */
2042 if (now_stale <= RT->rt_time)
2043 return 0;
2045 /* try to switch to an alternative */
2046 rtswitch(RT, 0);
2048 /* Delete a dead route after it has been publically mourned. */
2049 if (now_garbage > RT->rt_time) {
2050 rtdelete(RT);
2051 return 0;
2054 /* Start poisoning a bad route before deleting it. */
2055 if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
2056 struct rt_spare new = RT->rt_spares[0];
2057 new.rts_metric = HOPCNT_INFINITY;
2058 rtchange(RT, RT->rt_state, &new, 0);
2060 return 0;
2064 /* Watch for dead routes and interfaces.
2066 void
2067 age(naddr bad_gate)
2069 struct interface *ifp;
2070 int need_query = 0;
2072 /* If not listening to RIP, there is no need to age the routes in
2073 * the table.
2075 age_timer.tv_sec = (now.tv_sec
2076 + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
2078 /* Check for dead IS_REMOTE interfaces by timing their
2079 * transmissions.
2081 for (ifp = ifnet; ifp; ifp = ifp->int_next) {
2082 if (!(ifp->int_state & IS_REMOTE))
2083 continue;
2085 /* ignore unreachable remote interfaces */
2086 if (!check_remote(ifp))
2087 continue;
2089 /* Restore remote interface that has become reachable
2091 if (ifp->int_state & IS_BROKE)
2092 if_ok(ifp, "remote ");
2094 if (ifp->int_act_time != NEVER
2095 && now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
2096 msglog("remote interface %s to %s timed out after"
2097 " %ld:%ld",
2098 ifp->int_name,
2099 naddr_ntoa(ifp->int_dstaddr),
2100 (now.tv_sec - ifp->int_act_time)/60,
2101 (now.tv_sec - ifp->int_act_time)%60);
2102 if_sick(ifp);
2105 /* If we have not heard from the other router
2106 * recently, ask it.
2108 if (now.tv_sec >= ifp->int_query_time) {
2109 ifp->int_query_time = NEVER;
2110 need_query = 1;
2114 /* Age routes. */
2115 age_bad_gate = bad_gate;
2116 rn_walktree(rhead, walk_age, 0);
2118 /* delete old redirected routes to keep the kernel table small
2119 * and prevent blackholes
2121 del_redirects(bad_gate, now.tv_sec-STALE_TIME);
2123 /* Update the kernel routing table. */
2124 fix_kern();
2126 /* poke reticent remote gateways */
2127 if (need_query)
2128 rip_query();