Merge branch 'master' of ssh://crater.dragonflybsd.org/repository/git/dragonfly
[dragonfly.git] / sys / net / route.c
blobf03c68c0e8e316a0be411eb78491f317336071e4
1 /*
2 * Copyright (c) 2004, 2005 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Jeffrey M. Hsu.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
34 * Copyright (c) 1980, 1986, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
65 * @(#)route.c 8.3 (Berkeley) 1/9/95
66 * $FreeBSD: src/sys/net/route.c,v 1.59.2.10 2003/01/17 08:04:00 ru Exp $
67 * $DragonFly: src/sys/net/route.c,v 1.41 2008/11/09 10:50:15 sephe Exp $
70 #include "opt_inet.h"
71 #include "opt_mpls.h"
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/domain.h>
79 #include <sys/kernel.h>
80 #include <sys/sysctl.h>
81 #include <sys/globaldata.h>
82 #include <sys/thread.h>
84 #include <net/if.h>
85 #include <net/route.h>
86 #include <net/netisr.h>
88 #include <netinet/in.h>
89 #include <net/ip_mroute/ip_mroute.h>
91 #include <sys/thread2.h>
92 #include <sys/msgport2.h>
93 #include <net/netmsg2.h>
95 #ifdef MPLS
96 #include <netproto/mpls/mpls.h>
97 #endif
99 static struct rtstatistics rtstatistics_percpu[MAXCPU];
100 #ifdef SMP
101 #define rtstat rtstatistics_percpu[mycpuid]
102 #else
103 #define rtstat rtstatistics_percpu[0]
104 #endif
106 struct radix_node_head *rt_tables[MAXCPU][AF_MAX+1];
107 struct lwkt_port *rt_ports[MAXCPU];
109 static void rt_maskedcopy (struct sockaddr *, struct sockaddr *,
110 struct sockaddr *);
111 static void rtable_init(void);
112 static void rtable_service_loop(void *dummy);
113 static void rtinit_rtrequest_callback(int, int, struct rt_addrinfo *,
114 struct rtentry *, void *);
116 #ifdef SMP
117 static void rtredirect_msghandler(struct netmsg *netmsg);
118 static void rtrequest1_msghandler(struct netmsg *netmsg);
119 #endif
121 static int rt_setshims(struct rtentry *, struct sockaddr **);
123 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW, 0, "Routing");
125 #ifdef ROUTE_DEBUG
126 static int route_debug = 1;
127 SYSCTL_INT(_net_route, OID_AUTO, route_debug, CTLFLAG_RW,
128 &route_debug, 0, "");
129 #endif
131 int route_assert_owner_access = 0;
132 SYSCTL_INT(_net_route, OID_AUTO, assert_owner_access, CTLFLAG_RW,
133 &route_assert_owner_access, 0, "");
136 * Initialize the route table(s) for protocol domains and
137 * create a helper thread which will be responsible for updating
138 * route table entries on each cpu.
140 void
141 route_init(void)
143 int cpu;
144 thread_t rtd;
146 for (cpu = 0; cpu < ncpus; ++cpu)
147 bzero(&rtstatistics_percpu[cpu], sizeof(struct rtstatistics));
148 rn_init(); /* initialize all zeroes, all ones, mask table */
149 rtable_init(); /* call dom_rtattach() on each cpu */
151 for (cpu = 0; cpu < ncpus; cpu++) {
152 lwkt_create(rtable_service_loop, NULL, &rtd, NULL,
153 0, cpu, "rtable_cpu %d", cpu);
154 rt_ports[cpu] = &rtd->td_msgport;
158 static void
159 rtable_init_oncpu(struct netmsg *nmsg)
161 struct domain *dom;
162 int cpu = mycpuid;
164 SLIST_FOREACH(dom, &domains, dom_next) {
165 if (dom->dom_rtattach) {
166 dom->dom_rtattach(
167 (void **)&rt_tables[cpu][dom->dom_family],
168 dom->dom_rtoffset);
171 ifnet_forwardmsg(&nmsg->nm_lmsg, cpu + 1);
174 static void
175 rtable_init(void)
177 struct netmsg nmsg;
179 netmsg_init(&nmsg, &curthread->td_msgport, 0, rtable_init_oncpu);
180 ifnet_domsg(&nmsg.nm_lmsg, 0);
184 * Our per-cpu table management protocol thread. All route table operations
185 * are sequentially chained through all cpus starting at cpu #0 in order to
186 * maintain duplicate route tables on each cpu. Having a spearate route
187 * table management thread allows the protocol and interrupt threads to
188 * issue route table changes.
190 static void
191 rtable_service_loop(void *dummy __unused)
193 struct netmsg *netmsg;
194 thread_t td = curthread;
196 while ((netmsg = lwkt_waitport(&td->td_msgport, 0)) != NULL) {
197 netmsg->nm_dispatch(netmsg);
202 * Routing statistics.
204 #ifdef SMP
205 static int
206 sysctl_rtstatistics(SYSCTL_HANDLER_ARGS)
208 int cpu, error = 0;
210 for (cpu = 0; cpu < ncpus; ++cpu) {
211 if ((error = SYSCTL_OUT(req, &rtstatistics_percpu[cpu],
212 sizeof(struct rtstatistics))))
213 break;
214 if ((error = SYSCTL_IN(req, &rtstatistics_percpu[cpu],
215 sizeof(struct rtstatistics))))
216 break;
219 return (error);
221 SYSCTL_PROC(_net_route, OID_AUTO, stats, (CTLTYPE_OPAQUE|CTLFLAG_RW),
222 0, 0, sysctl_rtstatistics, "S,rtstatistics", "Routing statistics");
223 #else
224 SYSCTL_STRUCT(_net_route, OID_AUTO, stats, CTLFLAG_RW, &rtstat, rtstatistics,
225 "Routing statistics");
226 #endif
229 * Packet routing routines.
233 * Look up and fill in the "ro_rt" rtentry field in a route structure given
234 * an address in the "ro_dst" field. Always send a report on a miss and
235 * always clone routes.
237 void
238 rtalloc(struct route *ro)
240 rtalloc_ign(ro, 0UL);
244 * Look up and fill in the "ro_rt" rtentry field in a route structure given
245 * an address in the "ro_dst" field. Always send a report on a miss and
246 * optionally clone routes when RTF_CLONING or RTF_PRCLONING are not being
247 * ignored.
249 void
250 rtalloc_ign(struct route *ro, u_long ignoreflags)
252 if (ro->ro_rt != NULL) {
253 if (ro->ro_rt->rt_ifp != NULL && ro->ro_rt->rt_flags & RTF_UP)
254 return;
255 rtfree(ro->ro_rt);
256 ro->ro_rt = NULL;
258 ro->ro_rt = _rtlookup(&ro->ro_dst, RTL_REPORTMSG, ignoreflags);
262 * Look up the route that matches the given "dst" address.
264 * Route lookup can have the side-effect of creating and returning
265 * a cloned route instead when "dst" matches a cloning route and the
266 * RTF_CLONING and RTF_PRCLONING flags are not being ignored.
268 * Any route returned has its reference count incremented.
270 struct rtentry *
271 _rtlookup(struct sockaddr *dst, boolean_t generate_report, u_long ignore)
273 struct radix_node_head *rnh = rt_tables[mycpuid][dst->sa_family];
274 struct rtentry *rt;
276 if (rnh == NULL)
277 goto unreach;
280 * Look up route in the radix tree.
282 rt = (struct rtentry *) rnh->rnh_matchaddr((char *)dst, rnh);
283 if (rt == NULL)
284 goto unreach;
287 * Handle cloning routes.
289 if ((rt->rt_flags & ~ignore & (RTF_CLONING | RTF_PRCLONING)) != 0) {
290 struct rtentry *clonedroute;
291 int error;
293 clonedroute = rt; /* copy in/copy out parameter */
294 error = rtrequest(RTM_RESOLVE, dst, NULL, NULL, 0,
295 &clonedroute); /* clone the route */
296 if (error != 0) { /* cloning failed */
297 if (generate_report)
298 rt_dstmsg(RTM_MISS, dst, error);
299 rt->rt_refcnt++;
300 return (rt); /* return the uncloned route */
302 if (generate_report) {
303 if (clonedroute->rt_flags & RTF_XRESOLVE)
304 rt_dstmsg(RTM_RESOLVE, dst, 0);
305 else
306 rt_rtmsg(RTM_ADD, clonedroute,
307 clonedroute->rt_ifp, 0);
309 return (clonedroute); /* return cloned route */
313 * Increment the reference count of the matched route and return.
315 rt->rt_refcnt++;
316 return (rt);
318 unreach:
319 rtstat.rts_unreach++;
320 if (generate_report)
321 rt_dstmsg(RTM_MISS, dst, 0);
322 return (NULL);
325 void
326 rtfree(struct rtentry *rt)
328 if (rt->rt_cpuid == mycpuid)
329 rtfree_oncpu(rt);
330 else
331 rtfree_remote(rt, 1);
334 void
335 rtfree_oncpu(struct rtentry *rt)
337 KKASSERT(rt->rt_cpuid == mycpuid);
338 KASSERT(rt->rt_refcnt > 0, ("rtfree: rt_refcnt %ld", rt->rt_refcnt));
340 --rt->rt_refcnt;
341 if (rt->rt_refcnt == 0) {
342 struct radix_node_head *rnh =
343 rt_tables[mycpuid][rt_key(rt)->sa_family];
345 if (rnh->rnh_close)
346 rnh->rnh_close((struct radix_node *)rt, rnh);
347 if (!(rt->rt_flags & RTF_UP)) {
348 /* deallocate route */
349 if (rt->rt_ifa != NULL)
350 IFAFREE(rt->rt_ifa);
351 if (rt->rt_parent != NULL)
352 RTFREE(rt->rt_parent); /* recursive call! */
353 Free(rt_key(rt));
354 Free(rt);
359 static void
360 rtfree_remote_dispatch(struct netmsg *nmsg)
362 struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
363 struct rtentry *rt = lmsg->u.ms_resultp;
365 rtfree_oncpu(rt);
366 lwkt_replymsg(lmsg, 0);
369 void
370 rtfree_remote(struct rtentry *rt, int allow_panic)
372 struct netmsg nmsg;
373 struct lwkt_msg *lmsg;
375 KKASSERT(rt->rt_cpuid != mycpuid);
377 if (route_assert_owner_access && allow_panic) {
378 panic("rt remote free rt_cpuid %d, mycpuid %d\n",
379 rt->rt_cpuid, mycpuid);
380 } else {
381 kprintf("rt remote free rt_cpuid %d, mycpuid %d\n",
382 rt->rt_cpuid, mycpuid);
383 backtrace();
386 netmsg_init(&nmsg, &curthread->td_msgport, 0, rtfree_remote_dispatch);
387 lmsg = &nmsg.nm_lmsg;
388 lmsg->u.ms_resultp = rt;
390 lwkt_domsg(rtable_portfn(rt->rt_cpuid), lmsg, 0);
393 static int
394 rtredirect_oncpu(struct sockaddr *dst, struct sockaddr *gateway,
395 struct sockaddr *netmask, int flags, struct sockaddr *src)
397 struct rtentry *rt = NULL;
398 struct rt_addrinfo rtinfo;
399 struct ifaddr *ifa;
400 u_long *stat = NULL;
401 int error;
403 /* verify the gateway is directly reachable */
404 if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
405 error = ENETUNREACH;
406 goto out;
410 * If the redirect isn't from our current router for this destination,
411 * it's either old or wrong.
413 if (!(flags & RTF_DONE) && /* XXX JH */
414 (rt = rtpurelookup(dst)) != NULL &&
415 (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) {
416 error = EINVAL;
417 goto done;
421 * If it redirects us to ourselves, we have a routing loop,
422 * perhaps as a result of an interface going down recently.
424 if (ifa_ifwithaddr(gateway)) {
425 error = EHOSTUNREACH;
426 goto done;
430 * Create a new entry if the lookup failed or if we got back
431 * a wildcard entry for the default route. This is necessary
432 * for hosts which use routing redirects generated by smart
433 * gateways to dynamically build the routing tables.
435 if (rt == NULL)
436 goto create;
437 if ((rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2)) {
438 rtfree(rt);
439 goto create;
442 /* Ignore redirects for directly connected hosts. */
443 if (!(rt->rt_flags & RTF_GATEWAY)) {
444 error = EHOSTUNREACH;
445 goto done;
448 if (!(rt->rt_flags & RTF_HOST) && (flags & RTF_HOST)) {
450 * Changing from a network route to a host route.
451 * Create a new host route rather than smashing the
452 * network route.
454 create:
455 flags |= RTF_GATEWAY | RTF_DYNAMIC;
456 bzero(&rtinfo, sizeof(struct rt_addrinfo));
457 rtinfo.rti_info[RTAX_DST] = dst;
458 rtinfo.rti_info[RTAX_GATEWAY] = gateway;
459 rtinfo.rti_info[RTAX_NETMASK] = netmask;
460 rtinfo.rti_flags = flags;
461 rtinfo.rti_ifa = ifa;
462 rt = NULL; /* copy-in/copy-out parameter */
463 error = rtrequest1(RTM_ADD, &rtinfo, &rt);
464 if (rt != NULL)
465 flags = rt->rt_flags;
466 stat = &rtstat.rts_dynamic;
467 } else {
469 * Smash the current notion of the gateway to this destination.
470 * Should check about netmask!!!
472 rt->rt_flags |= RTF_MODIFIED;
473 flags |= RTF_MODIFIED;
474 rt_setgate(rt, rt_key(rt), gateway);
475 error = 0;
476 stat = &rtstat.rts_newgateway;
479 done:
480 if (rt != NULL)
481 rtfree(rt);
482 out:
483 if (error != 0)
484 rtstat.rts_badredirect++;
485 else if (stat != NULL)
486 (*stat)++;
488 return error;
491 #ifdef SMP
493 struct netmsg_rtredirect {
494 struct netmsg netmsg;
495 struct sockaddr *dst;
496 struct sockaddr *gateway;
497 struct sockaddr *netmask;
498 int flags;
499 struct sockaddr *src;
502 #endif
505 * Force a routing table entry to the specified
506 * destination to go through the given gateway.
507 * Normally called as a result of a routing redirect
508 * message from the network layer.
510 * N.B.: must be called at splnet
512 void
513 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
514 struct sockaddr *netmask, int flags, struct sockaddr *src)
516 struct rt_addrinfo rtinfo;
517 int error;
518 #ifdef SMP
519 struct netmsg_rtredirect msg;
521 netmsg_init(&msg.netmsg, &curthread->td_msgport, 0,
522 rtredirect_msghandler);
523 msg.dst = dst;
524 msg.gateway = gateway;
525 msg.netmask = netmask;
526 msg.flags = flags;
527 msg.src = src;
528 error = lwkt_domsg(rtable_portfn(0), &msg.netmsg.nm_lmsg, 0);
529 #else
530 error = rtredirect_oncpu(dst, gateway, netmask, flags, src);
531 #endif
532 bzero(&rtinfo, sizeof(struct rt_addrinfo));
533 rtinfo.rti_info[RTAX_DST] = dst;
534 rtinfo.rti_info[RTAX_GATEWAY] = gateway;
535 rtinfo.rti_info[RTAX_NETMASK] = netmask;
536 rtinfo.rti_info[RTAX_AUTHOR] = src;
537 rt_missmsg(RTM_REDIRECT, &rtinfo, flags, error);
540 #ifdef SMP
542 static void
543 rtredirect_msghandler(struct netmsg *netmsg)
545 struct netmsg_rtredirect *msg = (void *)netmsg;
546 int nextcpu;
548 rtredirect_oncpu(msg->dst, msg->gateway, msg->netmask,
549 msg->flags, msg->src);
550 nextcpu = mycpuid + 1;
551 if (nextcpu < ncpus)
552 lwkt_forwardmsg(rtable_portfn(nextcpu), &netmsg->nm_lmsg);
553 else
554 lwkt_replymsg(&netmsg->nm_lmsg, 0);
557 #endif
560 * Routing table ioctl interface.
563 rtioctl(u_long req, caddr_t data, struct ucred *cred)
565 #ifdef INET
566 /* Multicast goop, grrr... */
567 return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
568 #else
569 return ENXIO;
570 #endif
573 struct ifaddr *
574 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
576 struct ifaddr *ifa;
578 if (!(flags & RTF_GATEWAY)) {
580 * If we are adding a route to an interface,
581 * and the interface is a point-to-point link,
582 * we should search for the destination
583 * as our clue to the interface. Otherwise
584 * we can use the local address.
586 ifa = NULL;
587 if (flags & RTF_HOST) {
588 ifa = ifa_ifwithdstaddr(dst);
590 if (ifa == NULL)
591 ifa = ifa_ifwithaddr(gateway);
592 } else {
594 * If we are adding a route to a remote net
595 * or host, the gateway may still be on the
596 * other end of a pt to pt link.
598 ifa = ifa_ifwithdstaddr(gateway);
600 if (ifa == NULL)
601 ifa = ifa_ifwithnet(gateway);
602 if (ifa == NULL) {
603 struct rtentry *rt;
605 rt = rtpurelookup(gateway);
606 if (rt == NULL)
607 return (NULL);
608 rt->rt_refcnt--;
609 if ((ifa = rt->rt_ifa) == NULL)
610 return (NULL);
612 if (ifa->ifa_addr->sa_family != dst->sa_family) {
613 struct ifaddr *oldifa = ifa;
615 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
616 if (ifa == NULL)
617 ifa = oldifa;
619 return (ifa);
622 static int rt_fixdelete (struct radix_node *, void *);
623 static int rt_fixchange (struct radix_node *, void *);
625 struct rtfc_arg {
626 struct rtentry *rt0;
627 struct radix_node_head *rnh;
631 * Set rtinfo->rti_ifa and rtinfo->rti_ifp.
634 rt_getifa(struct rt_addrinfo *rtinfo)
636 struct sockaddr *gateway = rtinfo->rti_info[RTAX_GATEWAY];
637 struct sockaddr *dst = rtinfo->rti_info[RTAX_DST];
638 struct sockaddr *ifaaddr = rtinfo->rti_info[RTAX_IFA];
639 int flags = rtinfo->rti_flags;
642 * ifp may be specified by sockaddr_dl
643 * when protocol address is ambiguous.
645 if (rtinfo->rti_ifp == NULL) {
646 struct sockaddr *ifpaddr;
648 ifpaddr = rtinfo->rti_info[RTAX_IFP];
649 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
650 struct ifaddr *ifa;
652 ifa = ifa_ifwithnet(ifpaddr);
653 if (ifa != NULL)
654 rtinfo->rti_ifp = ifa->ifa_ifp;
658 if (rtinfo->rti_ifa == NULL && ifaaddr != NULL)
659 rtinfo->rti_ifa = ifa_ifwithaddr(ifaaddr);
660 if (rtinfo->rti_ifa == NULL) {
661 struct sockaddr *sa;
663 sa = ifaaddr != NULL ? ifaaddr :
664 (gateway != NULL ? gateway : dst);
665 if (sa != NULL && rtinfo->rti_ifp != NULL)
666 rtinfo->rti_ifa = ifaof_ifpforaddr(sa, rtinfo->rti_ifp);
667 else if (dst != NULL && gateway != NULL)
668 rtinfo->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
669 else if (sa != NULL)
670 rtinfo->rti_ifa = ifa_ifwithroute(flags, sa, sa);
672 if (rtinfo->rti_ifa == NULL)
673 return (ENETUNREACH);
675 if (rtinfo->rti_ifp == NULL)
676 rtinfo->rti_ifp = rtinfo->rti_ifa->ifa_ifp;
677 return (0);
681 * Do appropriate manipulations of a routing tree given
682 * all the bits of info needed
685 rtrequest(
686 int req,
687 struct sockaddr *dst,
688 struct sockaddr *gateway,
689 struct sockaddr *netmask,
690 int flags,
691 struct rtentry **ret_nrt)
693 struct rt_addrinfo rtinfo;
695 bzero(&rtinfo, sizeof(struct rt_addrinfo));
696 rtinfo.rti_info[RTAX_DST] = dst;
697 rtinfo.rti_info[RTAX_GATEWAY] = gateway;
698 rtinfo.rti_info[RTAX_NETMASK] = netmask;
699 rtinfo.rti_flags = flags;
700 return rtrequest1(req, &rtinfo, ret_nrt);
704 rtrequest_global(
705 int req,
706 struct sockaddr *dst,
707 struct sockaddr *gateway,
708 struct sockaddr *netmask,
709 int flags)
711 struct rt_addrinfo rtinfo;
713 bzero(&rtinfo, sizeof(struct rt_addrinfo));
714 rtinfo.rti_info[RTAX_DST] = dst;
715 rtinfo.rti_info[RTAX_GATEWAY] = gateway;
716 rtinfo.rti_info[RTAX_NETMASK] = netmask;
717 rtinfo.rti_flags = flags;
718 return rtrequest1_global(req, &rtinfo, NULL, NULL);
721 #ifdef SMP
723 struct netmsg_rtq {
724 struct netmsg netmsg;
725 int req;
726 struct rt_addrinfo *rtinfo;
727 rtrequest1_callback_func_t callback;
728 void *arg;
731 #endif
734 rtrequest1_global(int req, struct rt_addrinfo *rtinfo,
735 rtrequest1_callback_func_t callback, void *arg)
737 int error;
738 #ifdef SMP
739 struct netmsg_rtq msg;
741 netmsg_init(&msg.netmsg, &curthread->td_msgport, 0,
742 rtrequest1_msghandler);
743 msg.netmsg.nm_lmsg.ms_error = -1;
744 msg.req = req;
745 msg.rtinfo = rtinfo;
746 msg.callback = callback;
747 msg.arg = arg;
748 error = lwkt_domsg(rtable_portfn(0), &msg.netmsg.nm_lmsg, 0);
749 #else
750 struct rtentry *rt = NULL;
752 error = rtrequest1(req, rtinfo, &rt);
753 if (rt)
754 --rt->rt_refcnt;
755 if (callback)
756 callback(req, error, rtinfo, rt, arg);
757 #endif
758 return (error);
762 * Handle a route table request on the current cpu. Since the route table's
763 * are supposed to be identical on each cpu, an error occuring later in the
764 * message chain is considered system-fatal.
766 #ifdef SMP
768 static void
769 rtrequest1_msghandler(struct netmsg *netmsg)
771 struct netmsg_rtq *msg = (void *)netmsg;
772 struct rtentry *rt = NULL;
773 int nextcpu;
774 int error;
776 error = rtrequest1(msg->req, msg->rtinfo, &rt);
777 if (rt)
778 --rt->rt_refcnt;
779 if (msg->callback)
780 msg->callback(msg->req, error, msg->rtinfo, rt, msg->arg);
783 * RTM_DELETE's are propogated even if an error occurs, since a
784 * cloned route might be undergoing deletion and cloned routes
785 * are not necessarily replicated. An overall error is returned
786 * only if no cpus have the route in question.
788 if (msg->netmsg.nm_lmsg.ms_error < 0 || error == 0)
789 msg->netmsg.nm_lmsg.ms_error = error;
791 nextcpu = mycpuid + 1;
792 if (error && msg->req != RTM_DELETE) {
793 if (mycpuid != 0) {
794 panic("rtrequest1_msghandler: rtrequest table "
795 "error was not on cpu #0: %p", msg->rtinfo);
797 lwkt_replymsg(&msg->netmsg.nm_lmsg, error);
798 } else if (nextcpu < ncpus) {
799 lwkt_forwardmsg(rtable_portfn(nextcpu), &msg->netmsg.nm_lmsg);
800 } else {
801 lwkt_replymsg(&msg->netmsg.nm_lmsg,
802 msg->netmsg.nm_lmsg.ms_error);
806 #endif
809 rtrequest1(int req, struct rt_addrinfo *rtinfo, struct rtentry **ret_nrt)
811 struct sockaddr *dst = rtinfo->rti_info[RTAX_DST];
812 struct rtentry *rt;
813 struct radix_node *rn;
814 struct radix_node_head *rnh;
815 struct ifaddr *ifa;
816 struct sockaddr *ndst;
817 int error = 0;
819 #define gotoerr(x) { error = x ; goto bad; }
821 #ifdef ROUTE_DEBUG
822 if (route_debug)
823 rt_addrinfo_print(req, rtinfo);
824 #endif
826 crit_enter();
828 * Find the correct routing tree to use for this Address Family
830 if ((rnh = rt_tables[mycpuid][dst->sa_family]) == NULL)
831 gotoerr(EAFNOSUPPORT);
834 * If we are adding a host route then we don't want to put
835 * a netmask in the tree, nor do we want to clone it.
837 if (rtinfo->rti_flags & RTF_HOST) {
838 rtinfo->rti_info[RTAX_NETMASK] = NULL;
839 rtinfo->rti_flags &= ~(RTF_CLONING | RTF_PRCLONING);
842 switch (req) {
843 case RTM_DELETE:
844 /* Remove the item from the tree. */
845 rn = rnh->rnh_deladdr((char *)rtinfo->rti_info[RTAX_DST],
846 (char *)rtinfo->rti_info[RTAX_NETMASK],
847 rnh);
848 if (rn == NULL)
849 gotoerr(ESRCH);
850 KASSERT(!(rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)),
851 ("rnh_deladdr returned flags 0x%x", rn->rn_flags));
852 rt = (struct rtentry *)rn;
854 /* ref to prevent a deletion race */
855 ++rt->rt_refcnt;
857 /* Free any routes cloned from this one. */
858 if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
859 rt_mask(rt) != NULL) {
860 rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
861 (char *)rt_mask(rt),
862 rt_fixdelete, rt);
865 if (rt->rt_gwroute != NULL) {
866 RTFREE(rt->rt_gwroute);
867 rt->rt_gwroute = NULL;
871 * NB: RTF_UP must be set during the search above,
872 * because we might delete the last ref, causing
873 * rt to get freed prematurely.
875 rt->rt_flags &= ~RTF_UP;
877 #ifdef ROUTE_DEBUG
878 if (route_debug)
879 rt_print(rtinfo, rt);
880 #endif
882 /* Give the protocol a chance to keep things in sync. */
883 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
884 ifa->ifa_rtrequest(RTM_DELETE, rt, rtinfo);
887 * If the caller wants it, then it can have it,
888 * but it's up to it to free the rtentry as we won't be
889 * doing it.
891 KASSERT(rt->rt_refcnt >= 0,
892 ("rtrequest1(DELETE): refcnt %ld", rt->rt_refcnt));
893 if (ret_nrt != NULL) {
894 /* leave ref intact for return */
895 *ret_nrt = rt;
896 } else {
897 /* deref / attempt to destroy */
898 rtfree(rt);
900 break;
902 case RTM_RESOLVE:
903 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
904 gotoerr(EINVAL);
905 ifa = rt->rt_ifa;
906 rtinfo->rti_flags =
907 rt->rt_flags & ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
908 rtinfo->rti_flags |= RTF_WASCLONED;
909 rtinfo->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
910 if ((rtinfo->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL)
911 rtinfo->rti_flags |= RTF_HOST;
912 rtinfo->rti_info[RTAX_MPLS1] = rt->rt_shim[0];
913 rtinfo->rti_info[RTAX_MPLS2] = rt->rt_shim[1];
914 rtinfo->rti_info[RTAX_MPLS3] = rt->rt_shim[2];
915 goto makeroute;
917 case RTM_ADD:
918 KASSERT(!(rtinfo->rti_flags & RTF_GATEWAY) ||
919 rtinfo->rti_info[RTAX_GATEWAY] != NULL,
920 ("rtrequest: GATEWAY but no gateway"));
922 if (rtinfo->rti_ifa == NULL && (error = rt_getifa(rtinfo)))
923 gotoerr(error);
924 ifa = rtinfo->rti_ifa;
925 makeroute:
926 R_Malloc(rt, struct rtentry *, sizeof(struct rtentry));
927 if (rt == NULL)
928 gotoerr(ENOBUFS);
929 bzero(rt, sizeof(struct rtentry));
930 rt->rt_flags = RTF_UP | rtinfo->rti_flags;
931 rt->rt_cpuid = mycpuid;
932 error = rt_setgate(rt, dst, rtinfo->rti_info[RTAX_GATEWAY]);
933 if (error != 0) {
934 Free(rt);
935 gotoerr(error);
938 ndst = rt_key(rt);
939 if (rtinfo->rti_info[RTAX_NETMASK] != NULL)
940 rt_maskedcopy(dst, ndst,
941 rtinfo->rti_info[RTAX_NETMASK]);
942 else
943 bcopy(dst, ndst, dst->sa_len);
945 if (rtinfo->rti_info[RTAX_MPLS1] != NULL)
946 rt_setshims(rt, rtinfo->rti_info);
949 * Note that we now have a reference to the ifa.
950 * This moved from below so that rnh->rnh_addaddr() can
951 * examine the ifa and ifa->ifa_ifp if it so desires.
953 IFAREF(ifa);
954 rt->rt_ifa = ifa;
955 rt->rt_ifp = ifa->ifa_ifp;
956 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
958 rn = rnh->rnh_addaddr((char *)ndst,
959 (char *)rtinfo->rti_info[RTAX_NETMASK],
960 rnh, rt->rt_nodes);
961 if (rn == NULL) {
962 struct rtentry *oldrt;
965 * We already have one of these in the tree.
966 * We do a special hack: if the old route was
967 * cloned, then we blow it away and try
968 * re-inserting the new one.
970 oldrt = rtpurelookup(ndst);
971 if (oldrt != NULL) {
972 --oldrt->rt_refcnt;
973 if (oldrt->rt_flags & RTF_WASCLONED) {
974 rtrequest(RTM_DELETE, rt_key(oldrt),
975 oldrt->rt_gateway,
976 rt_mask(oldrt),
977 oldrt->rt_flags, NULL);
978 rn = rnh->rnh_addaddr((char *)ndst,
979 (char *)
980 rtinfo->rti_info[RTAX_NETMASK],
981 rnh, rt->rt_nodes);
987 * If it still failed to go into the tree,
988 * then un-make it (this should be a function).
990 if (rn == NULL) {
991 if (rt->rt_gwroute != NULL)
992 rtfree(rt->rt_gwroute);
993 IFAFREE(ifa);
994 Free(rt_key(rt));
995 Free(rt);
996 gotoerr(EEXIST);
1000 * If we got here from RESOLVE, then we are cloning
1001 * so clone the rest, and note that we
1002 * are a clone (and increment the parent's references)
1004 if (req == RTM_RESOLVE) {
1005 rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
1006 rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
1007 if ((*ret_nrt)->rt_flags &
1008 (RTF_CLONING | RTF_PRCLONING)) {
1009 rt->rt_parent = *ret_nrt;
1010 (*ret_nrt)->rt_refcnt++;
1015 * if this protocol has something to add to this then
1016 * allow it to do that as well.
1018 if (ifa->ifa_rtrequest != NULL)
1019 ifa->ifa_rtrequest(req, rt, rtinfo);
1022 * We repeat the same procedure from rt_setgate() here because
1023 * it doesn't fire when we call it there because the node
1024 * hasn't been added to the tree yet.
1026 if (req == RTM_ADD && !(rt->rt_flags & RTF_HOST) &&
1027 rt_mask(rt) != NULL) {
1028 struct rtfc_arg arg = { rt, rnh };
1030 rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
1031 (char *)rt_mask(rt),
1032 rt_fixchange, &arg);
1035 #ifdef ROUTE_DEBUG
1036 if (route_debug)
1037 rt_print(rtinfo, rt);
1038 #endif
1040 * Return the resulting rtentry,
1041 * increasing the number of references by one.
1043 if (ret_nrt != NULL) {
1044 rt->rt_refcnt++;
1045 *ret_nrt = rt;
1047 break;
1048 default:
1049 error = EOPNOTSUPP;
1051 bad:
1052 #ifdef ROUTE_DEBUG
1053 if (route_debug) {
1054 if (error)
1055 kprintf("rti %p failed error %d\n", rtinfo, error);
1056 else
1057 kprintf("rti %p succeeded\n", rtinfo);
1059 #endif
1060 crit_exit();
1061 return (error);
1065 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
1066 * (i.e., the routes related to it by the operation of cloning). This
1067 * routine is iterated over all potential former-child-routes by way of
1068 * rnh->rnh_walktree_from() above, and those that actually are children of
1069 * the late parent (passed in as VP here) are themselves deleted.
1071 static int
1072 rt_fixdelete(struct radix_node *rn, void *vp)
1074 struct rtentry *rt = (struct rtentry *)rn;
1075 struct rtentry *rt0 = vp;
1077 if (rt->rt_parent == rt0 &&
1078 !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
1079 return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
1080 rt->rt_flags, NULL);
1082 return 0;
1086 * This routine is called from rt_setgate() to do the analogous thing for
1087 * adds and changes. There is the added complication in this case of a
1088 * middle insert; i.e., insertion of a new network route between an older
1089 * network route and (cloned) host routes. For this reason, a simple check
1090 * of rt->rt_parent is insufficient; each candidate route must be tested
1091 * against the (mask, value) of the new route (passed as before in vp)
1092 * to see if the new route matches it.
1094 * XXX - it may be possible to do fixdelete() for changes and reserve this
1095 * routine just for adds. I'm not sure why I thought it was necessary to do
1096 * changes this way.
1098 #ifdef DEBUG
1099 static int rtfcdebug = 0;
1100 #endif
1102 static int
1103 rt_fixchange(struct radix_node *rn, void *vp)
1105 struct rtentry *rt = (struct rtentry *)rn;
1106 struct rtfc_arg *ap = vp;
1107 struct rtentry *rt0 = ap->rt0;
1108 struct radix_node_head *rnh = ap->rnh;
1109 u_char *xk1, *xm1, *xk2, *xmp;
1110 int i, len, mlen;
1112 #ifdef DEBUG
1113 if (rtfcdebug)
1114 kprintf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0);
1115 #endif
1117 if (rt->rt_parent == NULL ||
1118 (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
1119 #ifdef DEBUG
1120 if (rtfcdebug) kprintf("no parent, pinned or cloning\n");
1121 #endif
1122 return 0;
1125 if (rt->rt_parent == rt0) {
1126 #ifdef DEBUG
1127 if (rtfcdebug) kprintf("parent match\n");
1128 #endif
1129 return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
1130 rt->rt_flags, NULL);
1134 * There probably is a function somewhere which does this...
1135 * if not, there should be.
1137 len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
1139 xk1 = (u_char *)rt_key(rt0);
1140 xm1 = (u_char *)rt_mask(rt0);
1141 xk2 = (u_char *)rt_key(rt);
1143 /* avoid applying a less specific route */
1144 xmp = (u_char *)rt_mask(rt->rt_parent);
1145 mlen = rt_key(rt->rt_parent)->sa_len;
1146 if (mlen > rt_key(rt0)->sa_len) {
1147 #ifdef DEBUG
1148 if (rtfcdebug)
1149 kprintf("rt_fixchange: inserting a less "
1150 "specific route\n");
1151 #endif
1152 return 0;
1154 for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
1155 if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) {
1156 #ifdef DEBUG
1157 if (rtfcdebug)
1158 kprintf("rt_fixchange: inserting a less "
1159 "specific route\n");
1160 #endif
1161 return 0;
1165 for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
1166 if ((xk2[i] & xm1[i]) != xk1[i]) {
1167 #ifdef DEBUG
1168 if (rtfcdebug) kprintf("no match\n");
1169 #endif
1170 return 0;
1175 * OK, this node is a clone, and matches the node currently being
1176 * changed/added under the node's mask. So, get rid of it.
1178 #ifdef DEBUG
1179 if (rtfcdebug) kprintf("deleting\n");
1180 #endif
1181 return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
1182 rt->rt_flags, NULL);
1185 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1188 rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate)
1190 char *space, *oldspace;
1191 int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
1192 struct rtentry *rt = rt0;
1193 struct radix_node_head *rnh = rt_tables[mycpuid][dst->sa_family];
1196 * A host route with the destination equal to the gateway
1197 * will interfere with keeping LLINFO in the routing
1198 * table, so disallow it.
1200 if (((rt0->rt_flags & (RTF_HOST | RTF_GATEWAY | RTF_LLINFO)) ==
1201 (RTF_HOST | RTF_GATEWAY)) &&
1202 dst->sa_len == gate->sa_len &&
1203 sa_equal(dst, gate)) {
1205 * The route might already exist if this is an RTM_CHANGE
1206 * or a routing redirect, so try to delete it.
1208 if (rt_key(rt0) != NULL)
1209 rtrequest(RTM_DELETE, rt_key(rt0), rt0->rt_gateway,
1210 rt_mask(rt0), rt0->rt_flags, NULL);
1211 return EADDRNOTAVAIL;
1215 * Both dst and gateway are stored in the same malloc'ed chunk
1216 * (If I ever get my hands on....)
1217 * if we need to malloc a new chunk, then keep the old one around
1218 * till we don't need it any more.
1220 if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
1221 oldspace = (char *)rt_key(rt);
1222 R_Malloc(space, char *, dlen + glen);
1223 if (space == NULL)
1224 return ENOBUFS;
1225 rt->rt_nodes->rn_key = space;
1226 } else {
1227 space = (char *)rt_key(rt); /* Just use the old space. */
1228 oldspace = NULL;
1231 /* Set the gateway value. */
1232 rt->rt_gateway = (struct sockaddr *)(space + dlen);
1233 bcopy(gate, rt->rt_gateway, glen);
1235 if (oldspace != NULL) {
1237 * If we allocated a new chunk, preserve the original dst.
1238 * This way, rt_setgate() really just sets the gate
1239 * and leaves the dst field alone.
1241 bcopy(dst, space, dlen);
1242 Free(oldspace);
1246 * If there is already a gwroute, it's now almost definitely wrong
1247 * so drop it.
1249 if (rt->rt_gwroute != NULL) {
1250 RTFREE(rt->rt_gwroute);
1251 rt->rt_gwroute = NULL;
1253 if (rt->rt_flags & RTF_GATEWAY) {
1255 * Cloning loop avoidance: In the presence of
1256 * protocol-cloning and bad configuration, it is
1257 * possible to get stuck in bottomless mutual recursion
1258 * (rtrequest rt_setgate rtlookup). We avoid this
1259 * by not allowing protocol-cloning to operate for
1260 * gateways (which is probably the correct choice
1261 * anyway), and avoid the resulting reference loops
1262 * by disallowing any route to run through itself as
1263 * a gateway. This is obviously mandatory when we
1264 * get rt->rt_output().
1266 * This breaks TTCP for hosts outside the gateway! XXX JH
1268 rt->rt_gwroute = _rtlookup(gate, RTL_REPORTMSG, RTF_PRCLONING);
1269 if (rt->rt_gwroute == rt) {
1270 rt->rt_gwroute = NULL;
1271 --rt->rt_refcnt;
1272 return EDQUOT; /* failure */
1277 * This isn't going to do anything useful for host routes, so
1278 * don't bother. Also make sure we have a reasonable mask
1279 * (we don't yet have one during adds).
1281 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
1282 struct rtfc_arg arg = { rt, rnh };
1284 rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
1285 (char *)rt_mask(rt),
1286 rt_fixchange, &arg);
1289 return 0;
1292 static void
1293 rt_maskedcopy(
1294 struct sockaddr *src,
1295 struct sockaddr *dst,
1296 struct sockaddr *netmask)
1298 u_char *cp1 = (u_char *)src;
1299 u_char *cp2 = (u_char *)dst;
1300 u_char *cp3 = (u_char *)netmask;
1301 u_char *cplim = cp2 + *cp3;
1302 u_char *cplim2 = cp2 + *cp1;
1304 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1305 cp3 += 2;
1306 if (cplim > cplim2)
1307 cplim = cplim2;
1308 while (cp2 < cplim)
1309 *cp2++ = *cp1++ & *cp3++;
1310 if (cp2 < cplim2)
1311 bzero(cp2, cplim2 - cp2);
1315 rt_llroute(struct sockaddr *dst, struct rtentry *rt0, struct rtentry **drt)
1317 struct rtentry *up_rt, *rt;
1319 if (!(rt0->rt_flags & RTF_UP)) {
1320 up_rt = rtlookup(dst);
1321 if (up_rt == NULL)
1322 return (EHOSTUNREACH);
1323 up_rt->rt_refcnt--;
1324 } else
1325 up_rt = rt0;
1326 if (up_rt->rt_flags & RTF_GATEWAY) {
1327 if (up_rt->rt_gwroute == NULL) {
1328 up_rt->rt_gwroute = rtlookup(up_rt->rt_gateway);
1329 if (up_rt->rt_gwroute == NULL)
1330 return (EHOSTUNREACH);
1331 } else if (!(up_rt->rt_gwroute->rt_flags & RTF_UP)) {
1332 rtfree(up_rt->rt_gwroute);
1333 up_rt->rt_gwroute = rtlookup(up_rt->rt_gateway);
1334 if (up_rt->rt_gwroute == NULL)
1335 return (EHOSTUNREACH);
1337 rt = up_rt->rt_gwroute;
1338 } else
1339 rt = up_rt;
1340 if (rt->rt_flags & RTF_REJECT &&
1341 (rt->rt_rmx.rmx_expire == 0 || /* rt doesn't expire */
1342 time_second < rt->rt_rmx.rmx_expire)) /* rt not expired */
1343 return (rt->rt_flags & RTF_HOST ? EHOSTDOWN : EHOSTUNREACH);
1344 *drt = rt;
1345 return 0;
1348 static int
1349 rt_setshims(struct rtentry *rt, struct sockaddr **rt_shim){
1350 int i;
1352 for (i=0; i<3; i++) {
1353 struct sockaddr *shim = rt_shim[RTAX_MPLS1 + i];
1354 int shimlen;
1356 if (shim == NULL)
1357 break;
1359 shimlen = ROUNDUP(shim->sa_len);
1360 R_Malloc(rt->rt_shim[i], struct sockaddr *, shimlen);
1361 bcopy(shim, rt->rt_shim[i], shimlen);
1364 return 0;
1367 #ifdef ROUTE_DEBUG
1370 * Print out a route table entry
1372 void
1373 rt_print(struct rt_addrinfo *rtinfo, struct rtentry *rn)
1375 kprintf("rti %p cpu %d route %p flags %08lx: ",
1376 rtinfo, mycpuid, rn, rn->rt_flags);
1377 sockaddr_print(rt_key(rn));
1378 kprintf(" mask ");
1379 sockaddr_print(rt_mask(rn));
1380 kprintf(" gw ");
1381 sockaddr_print(rn->rt_gateway);
1382 kprintf(" ifc \"%s\"", rn->rt_ifp ? rn->rt_ifp->if_dname : "?");
1383 kprintf(" ifa %p\n", rn->rt_ifa);
1386 void
1387 rt_addrinfo_print(int cmd, struct rt_addrinfo *rti)
1389 int didit = 0;
1390 int i;
1392 #ifdef ROUTE_DEBUG
1393 if (cmd == RTM_DELETE && route_debug > 1)
1394 backtrace();
1395 #endif
1397 switch(cmd) {
1398 case RTM_ADD:
1399 kprintf("ADD ");
1400 break;
1401 case RTM_RESOLVE:
1402 kprintf("RES ");
1403 break;
1404 case RTM_DELETE:
1405 kprintf("DEL ");
1406 break;
1407 default:
1408 kprintf("C%02d ", cmd);
1409 break;
1411 kprintf("rti %p cpu %d ", rti, mycpuid);
1412 for (i = 0; i < rti->rti_addrs; ++i) {
1413 if (rti->rti_info[i] == NULL)
1414 continue;
1415 if (didit)
1416 kprintf(" ,");
1417 switch(i) {
1418 case RTAX_DST:
1419 kprintf("(DST ");
1420 break;
1421 case RTAX_GATEWAY:
1422 kprintf("(GWY ");
1423 break;
1424 case RTAX_NETMASK:
1425 kprintf("(MSK ");
1426 break;
1427 case RTAX_GENMASK:
1428 kprintf("(GEN ");
1429 break;
1430 case RTAX_IFP:
1431 kprintf("(IFP ");
1432 break;
1433 case RTAX_IFA:
1434 kprintf("(IFA ");
1435 break;
1436 case RTAX_AUTHOR:
1437 kprintf("(AUT ");
1438 break;
1439 case RTAX_BRD:
1440 kprintf("(BRD ");
1441 break;
1442 default:
1443 kprintf("(?%02d ", i);
1444 break;
1446 sockaddr_print(rti->rti_info[i]);
1447 kprintf(")");
1448 didit = 1;
1450 kprintf("\n");
1453 void
1454 sockaddr_print(struct sockaddr *sa)
1456 struct sockaddr_in *sa4;
1457 struct sockaddr_in6 *sa6;
1458 int len;
1459 int i;
1461 if (sa == NULL) {
1462 kprintf("NULL");
1463 return;
1466 len = sa->sa_len - offsetof(struct sockaddr, sa_data[0]);
1468 switch(sa->sa_family) {
1469 case AF_INET:
1470 case AF_INET6:
1471 default:
1472 switch(sa->sa_family) {
1473 case AF_INET:
1474 sa4 = (struct sockaddr_in *)sa;
1475 kprintf("INET %d %d.%d.%d.%d",
1476 ntohs(sa4->sin_port),
1477 (ntohl(sa4->sin_addr.s_addr) >> 24) & 255,
1478 (ntohl(sa4->sin_addr.s_addr) >> 16) & 255,
1479 (ntohl(sa4->sin_addr.s_addr) >> 8) & 255,
1480 (ntohl(sa4->sin_addr.s_addr) >> 0) & 255
1482 break;
1483 case AF_INET6:
1484 sa6 = (struct sockaddr_in6 *)sa;
1485 kprintf("INET6 %d %04x:%04x%04x:%04x:%04x:%04x:%04x:%04x",
1486 ntohs(sa6->sin6_port),
1487 sa6->sin6_addr.s6_addr16[0],
1488 sa6->sin6_addr.s6_addr16[1],
1489 sa6->sin6_addr.s6_addr16[2],
1490 sa6->sin6_addr.s6_addr16[3],
1491 sa6->sin6_addr.s6_addr16[4],
1492 sa6->sin6_addr.s6_addr16[5],
1493 sa6->sin6_addr.s6_addr16[6],
1494 sa6->sin6_addr.s6_addr16[7]
1496 break;
1497 default:
1498 kprintf("AF%d ", sa->sa_family);
1499 while (len > 0 && sa->sa_data[len-1] == 0)
1500 --len;
1502 for (i = 0; i < len; ++i) {
1503 if (i)
1504 kprintf(".");
1505 kprintf("%d", (unsigned char)sa->sa_data[i]);
1507 break;
1512 #endif
1515 * Set up a routing table entry, normally for an interface.
1518 rtinit(struct ifaddr *ifa, int cmd, int flags)
1520 struct sockaddr *dst, *deldst, *netmask;
1521 struct mbuf *m = NULL;
1522 struct radix_node_head *rnh;
1523 struct radix_node *rn;
1524 struct rt_addrinfo rtinfo;
1525 int error;
1527 if (flags & RTF_HOST) {
1528 dst = ifa->ifa_dstaddr;
1529 netmask = NULL;
1530 } else {
1531 dst = ifa->ifa_addr;
1532 netmask = ifa->ifa_netmask;
1535 * If it's a delete, check that if it exists, it's on the correct
1536 * interface or we might scrub a route to another ifa which would
1537 * be confusing at best and possibly worse.
1539 if (cmd == RTM_DELETE) {
1541 * It's a delete, so it should already exist..
1542 * If it's a net, mask off the host bits
1543 * (Assuming we have a mask)
1545 if (netmask != NULL) {
1546 m = m_get(MB_DONTWAIT, MT_SONAME);
1547 if (m == NULL)
1548 return (ENOBUFS);
1549 mbuftrackid(m, 34);
1550 deldst = mtod(m, struct sockaddr *);
1551 rt_maskedcopy(dst, deldst, netmask);
1552 dst = deldst;
1555 * Look up an rtentry that is in the routing tree and
1556 * contains the correct info.
1558 if ((rnh = rt_tables[mycpuid][dst->sa_family]) == NULL ||
1559 (rn = rnh->rnh_lookup((char *)dst,
1560 (char *)netmask, rnh)) == NULL ||
1561 ((struct rtentry *)rn)->rt_ifa != ifa ||
1562 !sa_equal((struct sockaddr *)rn->rn_key, dst)) {
1563 if (m != NULL)
1564 m_free(m);
1565 return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1567 /* XXX */
1568 #if 0
1569 else {
1571 * One would think that as we are deleting, and we know
1572 * it doesn't exist, we could just return at this point
1573 * with an "ELSE" clause, but apparently not..
1575 return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1577 #endif
1580 * Do the actual request
1582 bzero(&rtinfo, sizeof(struct rt_addrinfo));
1583 rtinfo.rti_info[RTAX_DST] = dst;
1584 rtinfo.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1585 rtinfo.rti_info[RTAX_NETMASK] = netmask;
1586 rtinfo.rti_flags = flags | ifa->ifa_flags;
1587 rtinfo.rti_ifa = ifa;
1588 error = rtrequest1_global(cmd, &rtinfo, rtinit_rtrequest_callback, ifa);
1589 if (m != NULL)
1590 m_free(m);
1591 return (error);
1594 static void
1595 rtinit_rtrequest_callback(int cmd, int error,
1596 struct rt_addrinfo *rtinfo, struct rtentry *rt,
1597 void *arg)
1599 struct ifaddr *ifa = arg;
1601 if (error == 0 && rt) {
1602 if (mycpuid == 0) {
1603 ++rt->rt_refcnt;
1604 rt_newaddrmsg(cmd, ifa, error, rt);
1605 --rt->rt_refcnt;
1607 if (cmd == RTM_DELETE) {
1608 if (rt->rt_refcnt == 0) {
1609 ++rt->rt_refcnt;
1610 rtfree(rt);
1616 /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
1617 SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);