2 * Copyright (c) 1980, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95
32 /************************************************************************
33 * Note: In this file a 'fib' is a "forwarding information base" *
34 * Which is the new name for an in kernel routing (next hop) table. *
35 ***********************************************************************/
38 #include "opt_inet6.h"
39 #include "opt_route.h"
41 #include "opt_mrouting.h"
42 #include "opt_mpath.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/malloc.h>
48 #include <sys/socket.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 #include <sys/sysproto.h>
53 #include <sys/domain.h>
54 #include <sys/kernel.h>
57 #include <net/if_var.h>
58 #include <net/if_dl.h>
59 #include <net/route.h>
60 #include <net/route_var.h>
62 #include <net/flowtable.h>
65 #include <net/radix_mpath.h>
68 #include <netinet/in.h>
69 #include <netinet/ip_mroute.h>
73 #define RT_MAXFIBS UINT16_MAX
75 /* Kernel config default option. */
78 #error "ROUTETABLES defined too low"
80 #if ROUTETABLES > RT_MAXFIBS
81 #error "ROUTETABLES defined too big"
83 #define RT_NUMFIBS ROUTETABLES
84 #endif /* ROUTETABLES */
85 /* Initialize to default if not otherwise set. */
90 #if defined(INET) || defined(INET6)
92 extern void sctp_addr_change(struct ifaddr
*ifa
, int cmd
);
97 /* This is read-only.. */
98 u_int rt_numfibs
= RT_NUMFIBS
;
99 SYSCTL_UINT(_net
, OID_AUTO
, fibs
, CTLFLAG_RDTUN
, &rt_numfibs
, 0, "");
102 * By default add routes to all fibs for new interfaces.
103 * Once this is set to 0 then only allocate routes on interface
104 * changes for the FIB of the caller when adding a new set of addresses
105 * to an interface. XXX this is a shotgun aproach to a problem that needs
106 * a more fine grained solution.. that will come.
107 * XXX also has the problems getting the FIB from curthread which will not
108 * always work given the fib can be overridden and prefixes can be added
109 * from the network stack context.
111 VNET_DEFINE(u_int
, rt_add_addr_allfibs
) = 1;
112 SYSCTL_UINT(_net
, OID_AUTO
, add_addr_allfibs
, CTLFLAG_RWTUN
| CTLFLAG_VNET
,
113 &VNET_NAME(rt_add_addr_allfibs
), 0, "");
115 VNET_DEFINE(struct rtstat
, rtstat
);
116 #define V_rtstat VNET(rtstat)
118 VNET_DEFINE(struct rib_head
*, rt_tables
);
119 #define V_rt_tables VNET(rt_tables)
121 VNET_DEFINE(int, rttrash
); /* routes not in table but not freed */
122 #define V_rttrash VNET(rttrash)
126 * Convert a 'struct radix_node *' to a 'struct rtentry *'.
127 * The operation can be done safely (in this code) because a
128 * 'struct rtentry' starts with two 'struct radix_node''s, the first
129 * one representing leaf nodes in the routing tree, which is
130 * what the code in radix.c passes us as a 'struct radix_node'.
132 * But because there are a lot of assumptions in this conversion,
133 * do not cast explicitly, but always use the macro below.
135 #define RNTORT(p) ((struct rtentry *)(p))
137 static VNET_DEFINE(uma_zone_t
, rtzone
); /* Routing table UMA zone. */
138 #define V_rtzone VNET(rtzone)
140 static int rtrequest1_fib_change(struct rib_head
*, struct rt_addrinfo
*,
141 struct rtentry
**, u_int
);
142 static void rt_setmetrics(const struct rt_addrinfo
*, struct rtentry
*);
143 static int rt_ifdelroute(const struct rtentry
*rt
, void *arg
);
144 static struct rtentry
*rt_unlinkrte(struct rib_head
*rnh
,
145 struct rt_addrinfo
*info
, int *perror
);
146 static void rt_notifydelete(struct rtentry
*rt
, struct rt_addrinfo
*info
);
148 static struct radix_node
*rt_mpath_unlink(struct rib_head
*rnh
,
149 struct rt_addrinfo
*info
, struct rtentry
*rto
, int *perror
);
151 static int rt_exportinfo(struct rtentry
*rt
, struct rt_addrinfo
*info
,
160 static int if_updatemtu_cb(struct radix_node
*, void *);
163 * handler for net.my_fibnum
166 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS
)
171 fibnum
= curthread
->td_proc
->p_fibnum
;
172 error
= sysctl_handle_int(oidp
, &fibnum
, 0, req
);
176 SYSCTL_PROC(_net
, OID_AUTO
, my_fibnum
, CTLTYPE_INT
|CTLFLAG_RD
,
177 NULL
, 0, &sysctl_my_fibnum
, "I", "default FIB of caller");
179 static __inline
struct rib_head
**
180 rt_tables_get_rnh_ptr(int table
, int fam
)
182 struct rib_head
**rnh
;
184 KASSERT(table
>= 0 && table
< rt_numfibs
, ("%s: table out of bounds.",
186 KASSERT(fam
>= 0 && fam
< (AF_MAX
+1), ("%s: fam out of bounds.",
189 /* rnh is [fib=0][af=0]. */
190 rnh
= (struct rib_head
**)V_rt_tables
;
191 /* Get the offset to the requested table and fam. */
192 rnh
+= table
* (AF_MAX
+1) + fam
;
198 rt_tables_get_rnh(int table
, int fam
)
201 return (*rt_tables_get_rnh_ptr(table
, fam
));
205 rt_tables_get_gen(int table
, int fam
)
207 struct rib_head
*rnh
;
209 rnh
= *rt_tables_get_rnh_ptr(table
, fam
);
210 KASSERT(rnh
!= NULL
, ("%s: NULL rib_head pointer table %d fam %d",
211 __func__
, table
, fam
));
212 return (rnh
->rnh_gen
);
217 * route initialization must occur before ip6_init2(), which happenas at
224 /* whack the tunable ints into line. */
225 if (rt_numfibs
> RT_MAXFIBS
)
226 rt_numfibs
= RT_MAXFIBS
;
230 SYSINIT(route_init
, SI_SUB_PROTO_DOMAIN
, SI_ORDER_THIRD
, route_init
, 0);
233 rtentry_zinit(void *mem
, int size
, int how
)
235 struct rtentry
*rt
= mem
;
237 rt
->rt_pksent
= counter_u64_alloc(how
);
238 if (rt
->rt_pksent
== NULL
)
247 rtentry_zfini(void *mem
, int size
)
249 struct rtentry
*rt
= mem
;
252 counter_u64_free(rt
->rt_pksent
);
256 rtentry_ctor(void *mem
, int size
, void *arg
, int how
)
258 struct rtentry
*rt
= mem
;
260 bzero(rt
, offsetof(struct rtentry
, rt_endzero
));
261 counter_u64_zero(rt
->rt_pksent
);
268 rtentry_dtor(void *mem
, int size
, void *arg
)
270 struct rtentry
*rt
= mem
;
276 vnet_route_init(const void *unused __unused
)
279 struct rib_head
**rnh
;
283 V_rt_tables
= malloc(rt_numfibs
* (AF_MAX
+1) *
284 sizeof(struct rib_head
*), M_RTABLE
, M_WAITOK
|M_ZERO
);
286 V_rtzone
= uma_zcreate("rtentry", sizeof(struct rtentry
),
287 rtentry_ctor
, rtentry_dtor
,
288 rtentry_zinit
, rtentry_zfini
, UMA_ALIGN_PTR
, 0);
289 for (dom
= domains
; dom
; dom
= dom
->dom_next
) {
290 if (dom
->dom_rtattach
== NULL
)
293 for (table
= 0; table
< rt_numfibs
; table
++) {
294 fam
= dom
->dom_family
;
295 if (table
!= 0 && fam
!= AF_INET6
&& fam
!= AF_INET
)
298 rnh
= rt_tables_get_rnh_ptr(table
, fam
);
300 panic("%s: rnh NULL", __func__
);
301 dom
->dom_rtattach((void **)rnh
, 0);
305 VNET_SYSINIT(vnet_route_init
, SI_SUB_PROTO_DOMAIN
, SI_ORDER_FOURTH
,
310 vnet_route_uninit(const void *unused __unused
)
315 struct rib_head
**rnh
;
317 for (dom
= domains
; dom
; dom
= dom
->dom_next
) {
318 if (dom
->dom_rtdetach
== NULL
)
321 for (table
= 0; table
< rt_numfibs
; table
++) {
322 fam
= dom
->dom_family
;
324 if (table
!= 0 && fam
!= AF_INET6
&& fam
!= AF_INET
)
327 rnh
= rt_tables_get_rnh_ptr(table
, fam
);
329 panic("%s: rnh NULL", __func__
);
330 dom
->dom_rtdetach((void **)rnh
, 0);
334 free(V_rt_tables
, M_RTABLE
);
335 uma_zdestroy(V_rtzone
);
337 VNET_SYSUNINIT(vnet_route_uninit
, SI_SUB_PROTO_DOMAIN
, SI_ORDER_FIRST
,
338 vnet_route_uninit
, 0);
342 rt_table_init(int offset
)
346 rh
= malloc(sizeof(struct rib_head
), M_RTABLE
, M_WAITOK
| M_ZERO
);
348 /* TODO: These details should be hidded inside radix.c */
349 /* Init masks tree */
350 rn_inithead_internal(&rh
->head
, rh
->rnh_nodes
, offset
);
351 rn_inithead_internal(&rh
->rmhead
.head
, rh
->rmhead
.mask_nodes
, 0);
352 rh
->head
.rnh_masks
= &rh
->rmhead
;
355 rw_init(&rh
->rib_lock
, "rib head lock");
357 /* Finally, set base callbacks */
358 rh
->rnh_addaddr
= rn_addroute
;
359 rh
->rnh_deladdr
= rn_delete
;
360 rh
->rnh_matchaddr
= rn_match
;
361 rh
->rnh_lookup
= rn_lookup
;
362 rh
->rnh_walktree
= rn_walktree
;
363 rh
->rnh_walktree_from
= rn_walktree_from
;
369 rt_freeentry(struct radix_node
*rn
, void *arg
)
371 struct radix_head
* const rnh
= arg
;
372 struct radix_node
*x
;
374 x
= (struct radix_node
*)rn_delete(rn
+ 2, NULL
, rnh
);
381 rt_table_destroy(struct rib_head
*rh
)
384 rn_walktree(&rh
->rmhead
.head
, rt_freeentry
, &rh
->rmhead
.head
);
386 /* Assume table is already empty */
387 rw_destroy(&rh
->rib_lock
);
392 #ifndef _SYS_SYSPROTO_H_
398 sys_setfib(struct thread
*td
, struct setfib_args
*uap
)
400 if (uap
->fibnum
< 0 || uap
->fibnum
>= rt_numfibs
)
402 td
->td_proc
->p_fibnum
= uap
->fibnum
;
407 * Packet routing routines.
410 rtalloc_ign_fib(struct route
*ro
, u_long ignore
, u_int fibnum
)
414 if ((rt
= ro
->ro_rt
) != NULL
) {
415 if (rt
->rt_ifp
!= NULL
&& rt
->rt_flags
& RTF_UP
)
420 ro
->ro_rt
= rtalloc1_fib(&ro
->ro_dst
, 1, ignore
, fibnum
);
422 RT_UNLOCK(ro
->ro_rt
);
426 * Look up the route that matches the address given
427 * Or, at least try.. Create a cloned route if needed.
429 * The returned route, if any, is locked.
432 rtalloc1(struct sockaddr
*dst
, int report
, u_long ignflags
)
435 return (rtalloc1_fib(dst
, report
, ignflags
, RT_DEFAULT_FIB
));
439 rtalloc1_fib(struct sockaddr
*dst
, int report
, u_long ignflags
,
443 struct radix_node
*rn
;
444 struct rtentry
*newrt
;
445 struct rt_addrinfo info
;
446 int err
= 0, msgtype
= RTM_MISS
;
448 KASSERT((fibnum
< rt_numfibs
), ("rtalloc1_fib: bad fibnum"));
449 rh
= rt_tables_get_rnh(fibnum
, dst
->sa_family
);
455 * Look up the address in the table for that Address Family
458 rn
= rh
->rnh_matchaddr(dst
, &rh
->head
);
459 if (rn
&& ((rn
->rn_flags
& RNF_ROOT
) == 0)) {
470 * Either we hit the root or couldn't find any match,
471 * Which basically means
472 * "caint get there frm here"
475 V_rtstat
.rts_unreach
++;
479 * If required, report the failure to the supervising
481 * For a delete, this is not an error. (report == 0)
483 bzero(&info
, sizeof(info
));
484 info
.rti_info
[RTAX_DST
] = dst
;
485 rt_missmsg_fib(msgtype
, &info
, 0, err
, fibnum
);
491 * Remove a reference count from an rtentry.
492 * If the count gets low enough, take it out of the routing table
495 rtfree(struct rtentry
*rt
)
497 struct rib_head
*rnh
;
499 KASSERT(rt
!= NULL
,("%s: NULL rt", __func__
));
500 rnh
= rt_tables_get_rnh(rt
->rt_fibnum
, rt_key(rt
)->sa_family
);
501 KASSERT(rnh
!= NULL
,("%s: NULL rnh", __func__
));
506 * The callers should use RTFREE_LOCKED() or RTFREE(), so
507 * we should come here exactly with the last reference.
510 if (rt
->rt_refcnt
> 0) {
511 log(LOG_DEBUG
, "%s: %p has %d refs\n", __func__
, rt
, rt
->rt_refcnt
);
516 * On last reference give the "close method" a chance
517 * to cleanup private state. This also permits (for
518 * IPv4 and IPv6) a chance to decide if the routing table
519 * entry should be purged immediately or at a later time.
520 * When an immediate purge is to happen the close routine
521 * typically calls rtexpunge which clears the RTF_UP flag
522 * on the entry so that the code below reclaims the storage.
524 if (rt
->rt_refcnt
== 0 && rnh
->rnh_close
)
525 rnh
->rnh_close((struct radix_node
*)rt
, &rnh
->head
);
528 * If we are no longer "up" (and ref == 0)
529 * then we can free the resources associated
532 if ((rt
->rt_flags
& RTF_UP
) == 0) {
533 if (rt
->rt_nodes
->rn_flags
& (RNF_ACTIVE
| RNF_ROOT
))
536 * the rtentry must have been removed from the routing table
537 * so it is represented in rttrash.. remove that now.
541 if (rt
->rt_refcnt
< 0) {
542 printf("rtfree: %p not freed (neg refs)\n", rt
);
547 * release references on items we hold them on..
548 * e.g other routes and ifaddrs.
551 ifa_free(rt
->rt_ifa
);
553 * The key is separatly alloc'd so free it (see rt_setgate()).
554 * This also frees the gateway, as they are always malloc'd
560 * and the rtentry itself of course
562 uma_zfree(V_rtzone
, rt
);
571 * Force a routing table entry to the specified
572 * destination to go through the given gateway.
573 * Normally called as a result of a routing redirect
574 * message from the network layer.
577 rtredirect_fib(struct sockaddr
*dst
,
578 struct sockaddr
*gateway
,
579 struct sockaddr
*netmask
,
581 struct sockaddr
*src
,
587 struct rt_addrinfo info
;
589 struct rib_head
*rnh
;
592 rnh
= rt_tables_get_rnh(fibnum
, dst
->sa_family
);
594 error
= EAFNOSUPPORT
;
598 /* verify the gateway is directly reachable */
599 if ((ifa
= ifa_ifwithnet(gateway
, 0, fibnum
)) == NULL
) {
603 rt
= rtalloc1_fib(dst
, 0, 0UL, fibnum
); /* NB: rt is locked */
605 * If the redirect isn't from our current router for this dst,
606 * it's either old or wrong. If it redirects us to ourselves,
607 * we have a routing loop, perhaps as a result of an interface
608 * going down recently.
610 if (!(flags
& RTF_DONE
) && rt
) {
611 if (!sa_equal(src
, rt
->rt_gateway
)) {
615 if (rt
->rt_ifa
!= ifa
&& ifa
->ifa_addr
->sa_family
!= AF_LINK
) {
620 if ((flags
& RTF_GATEWAY
) && ifa_ifwithaddr_check(gateway
)) {
621 error
= EHOSTUNREACH
;
625 * Create a new entry if we just got back a wildcard entry
626 * or the lookup failed. This is necessary for hosts
627 * which use routing redirects generated by smart gateways
628 * to dynamically build the routing tables.
630 if (rt
== NULL
|| (rt_mask(rt
) && rt_mask(rt
)->sa_len
< 2))
633 * Don't listen to the redirect if it's
634 * for a route to an interface.
636 if (rt
->rt_flags
& RTF_GATEWAY
) {
637 if (((rt
->rt_flags
& RTF_HOST
) == 0) && (flags
& RTF_HOST
)) {
639 * Changing from route to net => route to host.
640 * Create new route, rather than smashing route to net.
646 flags
|= RTF_DYNAMIC
;
647 bzero((caddr_t
)&info
, sizeof(info
));
648 info
.rti_info
[RTAX_DST
] = dst
;
649 info
.rti_info
[RTAX_GATEWAY
] = gateway
;
650 info
.rti_info
[RTAX_NETMASK
] = netmask
;
652 info
.rti_flags
= flags
;
653 error
= rtrequest1_fib(RTM_ADD
, &info
, &rt
, fibnum
);
656 flags
= rt
->rt_flags
;
659 stat
= &V_rtstat
.rts_dynamic
;
663 * Smash the current notion of the gateway to
664 * this destination. Should check about netmask!!!
666 if ((flags
& RTF_GATEWAY
) == 0)
667 rt
->rt_flags
&= ~RTF_GATEWAY
;
668 rt
->rt_flags
|= RTF_MODIFIED
;
669 flags
|= RTF_MODIFIED
;
670 stat
= &V_rtstat
.rts_newgateway
;
672 * add the key and gateway (in one malloc'd chunk).
677 rt_setgate(rt
, rt_key(rt
), gateway
);
681 error
= EHOSTUNREACH
;
687 V_rtstat
.rts_badredirect
++;
688 else if (stat
!= NULL
)
690 bzero((caddr_t
)&info
, sizeof(info
));
691 info
.rti_info
[RTAX_DST
] = dst
;
692 info
.rti_info
[RTAX_GATEWAY
] = gateway
;
693 info
.rti_info
[RTAX_NETMASK
] = netmask
;
694 info
.rti_info
[RTAX_AUTHOR
] = src
;
695 rt_missmsg_fib(RTM_REDIRECT
, &info
, flags
, error
, fibnum
);
701 * Routing table ioctl interface.
704 rtioctl_fib(u_long req
, caddr_t data
, u_int fibnum
)
708 * If more ioctl commands are added here, make sure the proper
709 * super-user checks are being performed because it is possible for
710 * prison-root to make it this far if raw sockets have been enabled
714 /* Multicast goop, grrr... */
715 return mrt_ioctl
? mrt_ioctl(req
, data
, fibnum
) : EOPNOTSUPP
;
722 ifa_ifwithroute(int flags
, const struct sockaddr
*dst
, struct sockaddr
*gateway
,
728 if ((flags
& RTF_GATEWAY
) == 0) {
730 * If we are adding a route to an interface,
731 * and the interface is a pt to pt link
732 * we should search for the destination
733 * as our clue to the interface. Otherwise
734 * we can use the local address.
737 if (flags
& RTF_HOST
)
738 ifa
= ifa_ifwithdstaddr(dst
, fibnum
);
740 ifa
= ifa_ifwithaddr(gateway
);
743 * If we are adding a route to a remote net
744 * or host, the gateway may still be on the
745 * other end of a pt to pt link.
747 ifa
= ifa_ifwithdstaddr(gateway
, fibnum
);
750 ifa
= ifa_ifwithnet(gateway
, 0, fibnum
);
752 struct rtentry
*rt
= rtalloc1_fib(gateway
, 0, 0, fibnum
);
756 * dismiss a gateway that is reachable only
757 * through the default router
759 switch (gateway
->sa_family
) {
761 if (satosin(rt_key(rt
))->sin_addr
.s_addr
== INADDR_ANY
)
765 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt
))->sin6_addr
))
771 if (!not_found
&& rt
->rt_ifa
!= NULL
) {
777 if (not_found
|| ifa
== NULL
)
780 if (ifa
->ifa_addr
->sa_family
!= dst
->sa_family
) {
781 struct ifaddr
*oifa
= ifa
;
782 ifa
= ifaof_ifpforaddr(dst
, ifa
->ifa_ifp
);
792 * Do appropriate manipulations of a routing tree given
793 * all the bits of info needed
796 rtrequest_fib(int req
,
797 struct sockaddr
*dst
,
798 struct sockaddr
*gateway
,
799 struct sockaddr
*netmask
,
801 struct rtentry
**ret_nrt
,
804 struct rt_addrinfo info
;
806 if (dst
->sa_len
== 0)
809 bzero((caddr_t
)&info
, sizeof(info
));
810 info
.rti_flags
= flags
;
811 info
.rti_info
[RTAX_DST
] = dst
;
812 info
.rti_info
[RTAX_GATEWAY
] = gateway
;
813 info
.rti_info
[RTAX_NETMASK
] = netmask
;
814 return rtrequest1_fib(req
, &info
, ret_nrt
, fibnum
);
819 * Copy most of @rt data into @info.
821 * If @flags contains NHR_COPY, copies dst,netmask and gw to the
822 * pointers specified by @info structure. Assume such pointers
823 * are zeroed sockaddr-like structures with sa_len field initialized
824 * to reflect size of the provided buffer. if no NHR_COPY is specified,
825 * point dst,netmask and gw @info fields to appropriate @rt values.
827 * if @flags contains NHR_REF, do refcouting on rt_ifp.
829 * Returns 0 on success.
832 rt_exportinfo(struct rtentry
*rt
, struct rt_addrinfo
*info
, int flags
)
834 struct rt_metrics
*rmx
;
835 struct sockaddr
*src
, *dst
;
838 if (flags
& NHR_COPY
) {
839 /* Copy destination if dst is non-zero */
841 dst
= info
->rti_info
[RTAX_DST
];
842 sa_len
= src
->sa_len
;
844 if (src
->sa_len
> dst
->sa_len
)
846 memcpy(dst
, src
, src
->sa_len
);
847 info
->rti_addrs
|= RTA_DST
;
850 /* Copy mask if set && dst is non-zero */
852 dst
= info
->rti_info
[RTAX_NETMASK
];
853 if (src
!= NULL
&& dst
!= NULL
) {
856 * Radix stores different value in sa_len,
857 * assume rt_mask() to have the same length
860 if (sa_len
> dst
->sa_len
)
862 memcpy(dst
, src
, src
->sa_len
);
863 info
->rti_addrs
|= RTA_NETMASK
;
866 /* Copy gateway is set && dst is non-zero */
867 src
= rt
->rt_gateway
;
868 dst
= info
->rti_info
[RTAX_GATEWAY
];
869 if ((rt
->rt_flags
& RTF_GATEWAY
) && src
!= NULL
&& dst
!= NULL
){
870 if (src
->sa_len
> dst
->sa_len
)
872 memcpy(dst
, src
, src
->sa_len
);
873 info
->rti_addrs
|= RTA_GATEWAY
;
876 info
->rti_info
[RTAX_DST
] = rt_key(rt
);
877 info
->rti_addrs
|= RTA_DST
;
878 if (rt_mask(rt
) != NULL
) {
879 info
->rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
880 info
->rti_addrs
|= RTA_NETMASK
;
882 if (rt
->rt_flags
& RTF_GATEWAY
) {
883 info
->rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
884 info
->rti_addrs
|= RTA_GATEWAY
;
890 info
->rti_mflags
|= RTV_MTU
;
891 rmx
->rmx_mtu
= rt
->rt_mtu
;
894 info
->rti_flags
= rt
->rt_flags
;
895 info
->rti_ifp
= rt
->rt_ifp
;
896 info
->rti_ifa
= rt
->rt_ifa
;
898 if (flags
& NHR_REF
) {
899 /* Do 'traditional' refcouting */
900 if_ref(info
->rti_ifp
);
907 * Lookups up route entry for @dst in RIB database for fib @fibnum.
908 * Exports entry data to @info using rt_exportinfo().
910 * if @flags contains NHR_REF, refcouting is performed on rt_ifp.
911 * All references can be released later by calling rib_free_info()
913 * Returns 0 on success.
914 * Returns ENOENT for lookup failure, ENOMEM for export failure.
917 rib_lookup_info(uint32_t fibnum
, const struct sockaddr
*dst
, uint32_t flags
,
918 uint32_t flowid
, struct rt_addrinfo
*info
)
921 struct radix_node
*rn
;
925 KASSERT((fibnum
< rt_numfibs
), ("rib_lookup_rte: bad fibnum"));
926 rh
= rt_tables_get_rnh(fibnum
, dst
->sa_family
);
931 rn
= rh
->rnh_matchaddr(__DECONST(void *, dst
), &rh
->head
);
932 if (rn
!= NULL
&& ((rn
->rn_flags
& RNF_ROOT
) == 0)) {
934 /* Ensure route & ifp is UP */
935 if (RT_LINK_IS_UP(rt
->rt_ifp
)) {
936 flags
= (flags
& NHR_REF
) | NHR_COPY
;
937 error
= rt_exportinfo(rt
, info
, flags
);
949 * Releases all references acquired by rib_lookup_info() when
950 * called with NHR_REF flags.
953 rib_free_info(struct rt_addrinfo
*info
)
956 if_rele(info
->rti_ifp
);
960 * Iterates over all existing fibs in system calling
961 * @setwa_f function prior to traversing each fib.
962 * Calls @wa_f function for each element in current fib.
963 * If af is not AF_UNSPEC, iterates over fibs in particular
967 rt_foreach_fib_walk(int af
, rt_setwarg_t
*setwa_f
, rt_walktree_f_t
*wa_f
,
970 struct rib_head
*rnh
;
974 for (fibnum
= 0; fibnum
< rt_numfibs
; fibnum
++) {
975 /* Do we want some specific family? */
976 if (af
!= AF_UNSPEC
) {
977 rnh
= rt_tables_get_rnh(fibnum
, af
);
981 setwa_f(rnh
, fibnum
, af
, arg
);
984 rnh
->rnh_walktree(&rnh
->head
, (walktree_f_t
*)wa_f
,arg
);
989 for (i
= 1; i
<= AF_MAX
; i
++) {
990 rnh
= rt_tables_get_rnh(fibnum
, i
);
994 setwa_f(rnh
, fibnum
, i
, arg
);
997 rnh
->rnh_walktree(&rnh
->head
, (walktree_f_t
*)wa_f
,arg
);
1005 struct rt_addrinfo info
;
1006 struct rib_head
*rnh
;
1007 struct rtentry
*head
;
1011 * Conditionally unlinks @rn from radix tree based
1012 * on info data passed in @arg.
1015 rt_checkdelroute(struct radix_node
*rn
, void *arg
)
1017 struct rt_delinfo
*di
;
1018 struct rt_addrinfo
*info
;
1022 di
= (struct rt_delinfo
*)arg
;
1023 rt
= (struct rtentry
*)rn
;
1027 info
->rti_info
[RTAX_DST
] = rt_key(rt
);
1028 info
->rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
1029 info
->rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
1031 rt
= rt_unlinkrte(di
->rnh
, info
, &error
);
1033 /* Either not allowed or not matched. Skip entry */
1037 /* Entry was unlinked. Add to the list and return */
1038 rt
->rt_chain
= di
->head
;
1045 * Iterates over all existing fibs in system.
1046 * Deletes each element for which @filter_f function returned
1048 * If @af is not AF_UNSPEC, iterates over fibs in particular
1052 rt_foreach_fib_walk_del(int af
, rt_filter_f_t
*filter_f
, void *arg
)
1054 struct rib_head
*rnh
;
1055 struct rt_delinfo di
;
1060 bzero(&di
, sizeof(di
));
1061 di
.info
.rti_filter
= filter_f
;
1062 di
.info
.rti_filterdata
= arg
;
1064 for (fibnum
= 0; fibnum
< rt_numfibs
; fibnum
++) {
1065 /* Do we want some specific family? */
1066 if (af
!= AF_UNSPEC
) {
1074 for (i
= start
; i
<= end
; i
++) {
1075 rnh
= rt_tables_get_rnh(fibnum
, i
);
1081 rnh
->rnh_walktree(&rnh
->head
, rt_checkdelroute
, &di
);
1084 if (di
.head
== NULL
)
1087 /* We might have something to reclaim */
1088 while (di
.head
!= NULL
) {
1090 di
.head
= rt
->rt_chain
;
1091 rt
->rt_chain
= NULL
;
1093 /* TODO std rt -> rt_addrinfo export */
1094 di
.info
.rti_info
[RTAX_DST
] = rt_key(rt
);
1095 di
.info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
1097 rt_notifydelete(rt
, &di
.info
);
1106 * Delete Routes for a Network Interface
1108 * Called for each routing entry via the rnh->rnh_walktree() call above
1109 * to delete all route entries referencing a detaching network interface.
1112 * rt pointer to rtentry
1113 * arg argument passed to rnh->rnh_walktree() - detaching interface
1117 * errno failed - reason indicated
1120 rt_ifdelroute(const struct rtentry
*rt
, void *arg
)
1122 struct ifnet
*ifp
= arg
;
1124 if (rt
->rt_ifp
!= ifp
)
1128 * Protect (sorta) against walktree recursion problems
1129 * with cloned routes
1131 if ((rt
->rt_flags
& RTF_UP
) == 0)
1138 * Delete all remaining routes using this interface
1139 * Unfortuneatly the only way to do this is to slog through
1140 * the entire routing table looking for routes which point
1141 * to this interface...oh well...
1144 rt_flushifroutes_af(struct ifnet
*ifp
, int af
)
1146 KASSERT((af
>= 1 && af
<= AF_MAX
), ("%s: af %d not >= 1 and <= %d",
1147 __func__
, af
, AF_MAX
));
1149 rt_foreach_fib_walk_del(af
, rt_ifdelroute
, ifp
);
1153 rt_flushifroutes(struct ifnet
*ifp
)
1156 rt_foreach_fib_walk_del(AF_UNSPEC
, rt_ifdelroute
, ifp
);
1160 * Conditionally unlinks rtentry matching data inside @info from @rnh.
1161 * Returns unlinked, locked and referenced @rtentry on success,
1162 * Returns NULL and sets @perror to:
1163 * ESRCH - if prefix was not found,
1164 * EADDRINUSE - if trying to delete PINNED route without appropriate flag.
1165 * ENOENT - if supplied filter function returned 0 (not matched).
1167 static struct rtentry
*
1168 rt_unlinkrte(struct rib_head
*rnh
, struct rt_addrinfo
*info
, int *perror
)
1170 struct sockaddr
*dst
, *netmask
;
1172 struct radix_node
*rn
;
1174 dst
= info
->rti_info
[RTAX_DST
];
1175 netmask
= info
->rti_info
[RTAX_NETMASK
];
1177 rt
= (struct rtentry
*)rnh
->rnh_lookup(dst
, netmask
, &rnh
->head
);
1183 if ((info
->rti_flags
& RTF_PINNED
) == 0) {
1184 /* Check if target route can be deleted */
1185 if (rt
->rt_flags
& RTF_PINNED
) {
1186 *perror
= EADDRINUSE
;
1191 if (info
->rti_filter
!= NULL
) {
1192 if (info
->rti_filter(rt
, info
->rti_filterdata
) == 0) {
1199 * Filter function requested rte deletion.
1200 * Ease the caller work by filling in remaining info
1201 * from that particular entry.
1203 info
->rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
1207 * Remove the item from the tree and return it.
1208 * Complain if it is not there and do no more processing.
1212 if (rt_mpath_capable(rnh
))
1213 rn
= rt_mpath_unlink(rnh
, info
, rt
, perror
);
1216 rn
= rnh
->rnh_deladdr(dst
, netmask
, &rnh
->head
);
1220 if (rn
->rn_flags
& (RNF_ACTIVE
| RNF_ROOT
))
1221 panic ("rtrequest delete");
1226 rt
->rt_flags
&= ~RTF_UP
;
1234 rt_notifydelete(struct rtentry
*rt
, struct rt_addrinfo
*info
)
1239 * give the protocol a chance to keep things in sync.
1242 if (ifa
!= NULL
&& ifa
->ifa_rtrequest
!= NULL
)
1243 ifa
->ifa_rtrequest(RTM_DELETE
, rt
, info
);
1246 * One more rtentry floating around that is not
1247 * linked to the routing table. rttrash will be decremented
1248 * when RTFREE(rt) is eventually called.
1255 * These (questionable) definitions of apparent local variables apply
1256 * to the next two functions. XXXXXX!!!
1258 #define dst info->rti_info[RTAX_DST]
1259 #define gateway info->rti_info[RTAX_GATEWAY]
1260 #define netmask info->rti_info[RTAX_NETMASK]
1261 #define ifaaddr info->rti_info[RTAX_IFA]
1262 #define ifpaddr info->rti_info[RTAX_IFP]
1263 #define flags info->rti_flags
1266 * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined,
1267 * it will be referenced so the caller must free it.
1270 rt_getifa_fib(struct rt_addrinfo
*info
, u_int fibnum
)
1276 * ifp may be specified by sockaddr_dl
1277 * when protocol address is ambiguous.
1279 if (info
->rti_ifp
== NULL
&& ifpaddr
!= NULL
&&
1280 ifpaddr
->sa_family
== AF_LINK
&&
1281 (ifa
= ifa_ifwithnet(ifpaddr
, 0, fibnum
)) != NULL
) {
1282 info
->rti_ifp
= ifa
->ifa_ifp
;
1285 if (info
->rti_ifa
== NULL
&& ifaaddr
!= NULL
)
1286 info
->rti_ifa
= ifa_ifwithaddr(ifaaddr
);
1287 if (info
->rti_ifa
== NULL
) {
1288 struct sockaddr
*sa
;
1290 sa
= ifaaddr
!= NULL
? ifaaddr
:
1291 (gateway
!= NULL
? gateway
: dst
);
1292 if (sa
!= NULL
&& info
->rti_ifp
!= NULL
)
1293 info
->rti_ifa
= ifaof_ifpforaddr(sa
, info
->rti_ifp
);
1294 else if (dst
!= NULL
&& gateway
!= NULL
)
1295 info
->rti_ifa
= ifa_ifwithroute(flags
, dst
, gateway
,
1297 else if (sa
!= NULL
)
1298 info
->rti_ifa
= ifa_ifwithroute(flags
, sa
, sa
,
1301 if ((ifa
= info
->rti_ifa
) != NULL
) {
1302 if (info
->rti_ifp
== NULL
)
1303 info
->rti_ifp
= ifa
->ifa_ifp
;
1305 error
= ENETUNREACH
;
1310 if_updatemtu_cb(struct radix_node
*rn
, void *arg
)
1313 struct if_mtuinfo
*ifmtu
;
1315 rt
= (struct rtentry
*)rn
;
1316 ifmtu
= (struct if_mtuinfo
*)arg
;
1318 if (rt
->rt_ifp
!= ifmtu
->ifp
)
1321 if (rt
->rt_mtu
>= ifmtu
->mtu
) {
1322 /* We have to decrease mtu regardless of flags */
1323 rt
->rt_mtu
= ifmtu
->mtu
;
1328 * New MTU is bigger. Check if are allowed to alter it
1330 if ((rt
->rt_flags
& (RTF_FIXEDMTU
| RTF_GATEWAY
| RTF_HOST
)) != 0) {
1333 * Skip routes with user-supplied MTU and
1334 * non-interface routes
1339 /* We are safe to update route MTU */
1340 rt
->rt_mtu
= ifmtu
->mtu
;
1346 rt_updatemtu(struct ifnet
*ifp
)
1348 struct if_mtuinfo ifmtu
;
1349 struct rib_head
*rnh
;
1355 * Try to update rt_mtu for all routes using this interface
1356 * Unfortunately the only way to do this is to traverse all
1357 * routing tables in all fibs/domains.
1359 for (i
= 1; i
<= AF_MAX
; i
++) {
1360 ifmtu
.mtu
= if_getmtu_family(ifp
, i
);
1361 for (j
= 0; j
< rt_numfibs
; j
++) {
1362 rnh
= rt_tables_get_rnh(j
, i
);
1366 rnh
->rnh_walktree(&rnh
->head
, if_updatemtu_cb
, &ifmtu
);
1374 int p_sockaddr(char *buf
, int buflen
, struct sockaddr
*s
);
1375 int rt_print(char *buf
, int buflen
, struct rtentry
*rt
);
1378 p_sockaddr(char *buf
, int buflen
, struct sockaddr
*s
)
1382 switch (s
->sa_family
) {
1384 paddr
= &((struct sockaddr_in
*)s
)->sin_addr
;
1387 paddr
= &((struct sockaddr_in6
*)s
)->sin6_addr
;
1394 if (inet_ntop(s
->sa_family
, paddr
, buf
, buflen
) == NULL
)
1397 return (strlen(buf
));
1401 rt_print(char *buf
, int buflen
, struct rtentry
*rt
)
1403 struct sockaddr
*addr
, *mask
;
1409 i
= p_sockaddr(buf
, buflen
, addr
);
1410 if (!(rt
->rt_flags
& RTF_HOST
)) {
1412 i
+= p_sockaddr(buf
+ i
, buflen
- i
, mask
);
1415 if (rt
->rt_flags
& RTF_GATEWAY
) {
1417 i
+= p_sockaddr(buf
+ i
, buflen
- i
, rt
->rt_gateway
);
1426 * Deletes key for single-path routes, unlinks rtentry with
1427 * gateway specified in @info from multi-path routes.
1429 * Returnes unlinked entry. In case of failure, returns NULL
1430 * and sets @perror to ESRCH.
1432 static struct radix_node
*
1433 rt_mpath_unlink(struct rib_head
*rnh
, struct rt_addrinfo
*info
,
1434 struct rtentry
*rto
, int *perror
)
1437 * if we got multipath routes, we require users to specify
1438 * a matching RTAX_GATEWAY.
1440 struct rtentry
*rt
; // *rto = NULL;
1441 struct radix_node
*rn
;
1442 struct sockaddr
*gw
;
1444 gw
= info
->rti_info
[RTAX_GATEWAY
];
1445 rt
= rt_mpath_matchgate(rto
, gw
);
1452 * this is the first entry in the chain
1455 rn
= rn_mpath_next((struct radix_node
*)rt
);
1457 * there is another entry, now it's active
1462 rto
->rt_flags
|= RTF_UP
;
1464 } else if (rt
->rt_flags
& RTF_GATEWAY
) {
1466 * For gateway routes, we need to
1467 * make sure that we we are deleting
1468 * the correct gateway.
1469 * rt_mpath_matchgate() does not
1470 * check the case when there is only
1471 * one route in the chain.
1474 (rt
->rt_gateway
->sa_len
!= gw
->sa_len
||
1475 memcmp(rt
->rt_gateway
, gw
, gw
->sa_len
))) {
1482 * use the normal delete code to remove
1485 rn
= rnh
->rnh_deladdr(dst
, netmask
, &rnh
->head
);
1491 * if the entry is 2nd and on up
1493 if (rt_mpath_deldup(rto
, rt
) == 0)
1494 panic ("rtrequest1: rt_mpath_deldup");
1496 rn
= (struct radix_node
*)rt
;
1502 static struct rtentry
*
1503 rt_flowtable_check_route(struct rib_head
*rnh
, struct rt_addrinfo
*info
)
1505 #if defined(INET6) || defined(INET)
1506 struct radix_node
*rn
;
1508 struct rtentry
*rt0
;
1511 /* "flow-table" only supports IPv6 and IPv4 at the moment. */
1512 switch (dst
->sa_family
) {
1519 #if defined(INET6) || defined(INET)
1520 rn
= rnh
->rnh_matchaddr(dst
, &rnh
->head
);
1521 if (rn
&& ((rn
->rn_flags
& RNF_ROOT
) == 0)) {
1522 struct sockaddr
*mask
;
1527 * compare mask to see if the new route is
1528 * more specific than the existing one
1535 * A host route is already present, so
1536 * leave the flow-table entries as is.
1538 if (rt0
->rt_flags
& RTF_HOST
) {
1541 } else if (!(flags
& RTF_HOST
) && netmask
) {
1542 mask
= rt_mask(rt0
);
1545 n
= (u_char
*)netmask
;
1552 if (len
== 0 || (*n
< *m
)) {
1558 #endif/* INET6 || INET */
1566 rtrequest1_fib(int req
, struct rt_addrinfo
*info
, struct rtentry
**ret_nrt
,
1570 struct rtentry
*rt
, *rt_old
;
1572 struct rtentry
*rt0
;
1574 struct radix_node
*rn
;
1575 struct rib_head
*rnh
;
1577 struct sockaddr
*ndst
;
1578 struct sockaddr_storage mdst
;
1580 KASSERT((fibnum
< rt_numfibs
), ("rtrequest1_fib: bad fibnum"));
1581 KASSERT((flags
& RTF_RNH_LOCKED
) == 0, ("rtrequest1_fib: locked"));
1582 switch (dst
->sa_family
) {
1585 /* We support multiple FIBs. */
1588 fibnum
= RT_DEFAULT_FIB
;
1593 * Find the correct routing tree to use for this Address Family
1595 rnh
= rt_tables_get_rnh(fibnum
, dst
->sa_family
);
1597 return (EAFNOSUPPORT
);
1600 * If we are adding a host route then we don't want to put
1601 * a netmask in the tree, nor do we want to clone it.
1603 if (flags
& RTF_HOST
)
1609 rt_maskedcopy(dst
, (struct sockaddr
*)&mdst
, netmask
);
1610 dst
= (struct sockaddr
*)&mdst
;
1614 rt
= rt_unlinkrte(rnh
, info
, &error
);
1619 rt_notifydelete(rt
, info
);
1622 * If the caller wants it, then it can have it,
1623 * but it's up to it to free the rtentry as we won't be
1634 * resolve was only used for route cloning
1639 if ((flags
& RTF_GATEWAY
) && !gateway
)
1641 if (dst
&& gateway
&& (dst
->sa_family
!= gateway
->sa_family
) &&
1642 (gateway
->sa_family
!= AF_UNSPEC
) && (gateway
->sa_family
!= AF_LINK
))
1645 if (info
->rti_ifa
== NULL
) {
1646 error
= rt_getifa_fib(info
, fibnum
);
1650 ifa_ref(info
->rti_ifa
);
1651 ifa
= info
->rti_ifa
;
1652 rt
= uma_zalloc(V_rtzone
, M_NOWAIT
);
1657 rt
->rt_flags
= RTF_UP
| flags
;
1658 rt
->rt_fibnum
= fibnum
;
1660 * Add the gateway. Possibly re-malloc-ing the storage for it.
1662 if ((error
= rt_setgate(rt
, dst
, gateway
)) != 0) {
1664 uma_zfree(V_rtzone
, rt
);
1669 * point to the (possibly newly malloc'd) dest address.
1671 ndst
= (struct sockaddr
*)rt_key(rt
);
1674 * make sure it contains the value we want (masked if needed).
1677 rt_maskedcopy(dst
, ndst
, netmask
);
1679 bcopy(dst
, ndst
, dst
->sa_len
);
1682 * We use the ifa reference returned by rt_getifa_fib().
1683 * This moved from below so that rnh->rnh_addaddr() can
1684 * examine the ifa and ifa->ifa_ifp if it so desires.
1687 rt
->rt_ifp
= ifa
->ifa_ifp
;
1690 rt_setmetrics(info
, rt
);
1695 /* do not permit exactly the same dst/mask/gw pair */
1696 if (rt_mpath_capable(rnh
) &&
1697 rt_mpath_conflict(rnh
, rt
, netmask
)) {
1700 ifa_free(rt
->rt_ifa
);
1702 uma_zfree(V_rtzone
, rt
);
1708 rt0
= rt_flowtable_check_route(rnh
, info
);
1709 #endif /* FLOWTABLE */
1711 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
1712 rn
= rnh
->rnh_addaddr(ndst
, netmask
, &rnh
->head
, rt
->rt_nodes
);
1715 if (rn
== NULL
&& (info
->rti_flags
& RTF_PINNED
) != 0) {
1718 * Force removal and re-try addition
1719 * TODO: better multipath&pinned support
1721 struct sockaddr
*info_dst
= info
->rti_info
[RTAX_DST
];
1722 info
->rti_info
[RTAX_DST
] = ndst
;
1723 /* Do not delete existing PINNED(interface) routes */
1724 info
->rti_flags
&= ~RTF_PINNED
;
1725 rt_old
= rt_unlinkrte(rnh
, info
, &error
);
1726 info
->rti_flags
|= RTF_PINNED
;
1727 info
->rti_info
[RTAX_DST
] = info_dst
;
1729 rn
= rnh
->rnh_addaddr(ndst
, netmask
, &rnh
->head
,
1738 * If it still failed to go into the tree,
1739 * then un-make it (this should be a function)
1742 ifa_free(rt
->rt_ifa
);
1744 uma_zfree(V_rtzone
, rt
);
1752 else if (rt0
!= NULL
) {
1753 flowtable_route_flush(dst
->sa_family
, rt0
);
1758 if (rt_old
!= NULL
) {
1759 rt_notifydelete(rt_old
, info
);
1764 * If this protocol has something to add to this then
1765 * allow it to do that as well.
1767 if (ifa
->ifa_rtrequest
)
1768 ifa
->ifa_rtrequest(req
, rt
, info
);
1771 * actually return a resultant rtentry and
1772 * give the caller a single reference.
1778 rnh
->rnh_gen
++; /* Routing table updated */
1783 error
= rtrequest1_fib_change(rnh
, info
, ret_nrt
, fibnum
);
1801 rtrequest1_fib_change(struct rib_head
*rnh
, struct rt_addrinfo
*info
,
1802 struct rtentry
**ret_nrt
, u_int fibnum
)
1804 struct rtentry
*rt
= NULL
;
1808 struct if_mtuinfo ifmtu
;
1810 rt
= (struct rtentry
*)rnh
->rnh_lookup(info
->rti_info
[RTAX_DST
],
1811 info
->rti_info
[RTAX_NETMASK
], &rnh
->head
);
1818 * If we got multipath routes,
1819 * we require users to specify a matching RTAX_GATEWAY.
1821 if (rt_mpath_capable(rnh
)) {
1822 rt
= rt_mpath_matchgate(rt
, info
->rti_info
[RTAX_GATEWAY
]);
1830 rt_setmetrics(info
, rt
);
1833 * New gateway could require new ifaddr, ifp;
1834 * flags may also be different; ifp may be specified
1835 * by ll sockaddr when protocol address is ambiguous
1837 if (((rt
->rt_flags
& RTF_GATEWAY
) &&
1838 info
->rti_info
[RTAX_GATEWAY
] != NULL
) ||
1839 info
->rti_info
[RTAX_IFP
] != NULL
||
1840 (info
->rti_info
[RTAX_IFA
] != NULL
&&
1841 !sa_equal(info
->rti_info
[RTAX_IFA
], rt
->rt_ifa
->ifa_addr
))) {
1843 error
= rt_getifa_fib(info
, fibnum
);
1844 if (info
->rti_ifa
!= NULL
)
1851 /* Check if outgoing interface has changed */
1852 if (info
->rti_ifa
!= NULL
&& info
->rti_ifa
!= rt
->rt_ifa
&&
1853 rt
->rt_ifa
!= NULL
&& rt
->rt_ifa
->ifa_rtrequest
!= NULL
) {
1854 rt
->rt_ifa
->ifa_rtrequest(RTM_DELETE
, rt
, info
);
1855 ifa_free(rt
->rt_ifa
);
1857 /* Update gateway address */
1858 if (info
->rti_info
[RTAX_GATEWAY
] != NULL
) {
1859 error
= rt_setgate(rt
, rt_key(rt
), info
->rti_info
[RTAX_GATEWAY
]);
1863 rt
->rt_flags
&= ~RTF_GATEWAY
;
1864 rt
->rt_flags
|= (RTF_GATEWAY
& info
->rti_flags
);
1867 if (info
->rti_ifa
!= NULL
&& info
->rti_ifa
!= rt
->rt_ifa
) {
1868 ifa_ref(info
->rti_ifa
);
1869 rt
->rt_ifa
= info
->rti_ifa
;
1870 rt
->rt_ifp
= info
->rti_ifp
;
1872 /* Allow some flags to be toggled on change. */
1873 rt
->rt_flags
&= ~RTF_FMASK
;
1874 rt
->rt_flags
|= info
->rti_flags
& RTF_FMASK
;
1876 if (rt
->rt_ifa
&& rt
->rt_ifa
->ifa_rtrequest
!= NULL
)
1877 rt
->rt_ifa
->ifa_rtrequest(RTM_ADD
, rt
, info
);
1879 /* Alter route MTU if necessary */
1880 if (rt
->rt_ifp
!= NULL
) {
1881 family
= info
->rti_info
[RTAX_DST
]->sa_family
;
1882 mtu
= if_getmtu_family(rt
->rt_ifp
, family
);
1883 /* Set default MTU */
1884 if (rt
->rt_mtu
== 0)
1886 if (rt
->rt_mtu
!= mtu
) {
1887 /* Check if we really need to update */
1888 ifmtu
.ifp
= rt
->rt_ifp
;
1890 if_updatemtu_cb(rt
->rt_nodes
, &ifmtu
);
1901 ifa_free(info
->rti_ifa
);
1906 rt_setmetrics(const struct rt_addrinfo
*info
, struct rtentry
*rt
)
1909 if (info
->rti_mflags
& RTV_MTU
) {
1910 if (info
->rti_rmx
->rmx_mtu
!= 0) {
1913 * MTU was explicitly provided by user.
1916 rt
->rt_flags
|= RTF_FIXEDMTU
;
1920 * User explicitly sets MTU to 0.
1921 * Assume rollback to default.
1923 rt
->rt_flags
&= ~RTF_FIXEDMTU
;
1925 rt
->rt_mtu
= info
->rti_rmx
->rmx_mtu
;
1927 if (info
->rti_mflags
& RTV_WEIGHT
)
1928 rt
->rt_weight
= info
->rti_rmx
->rmx_weight
;
1929 /* Kernel -> userland timebase conversion. */
1930 if (info
->rti_mflags
& RTV_EXPIRE
)
1931 rt
->rt_expire
= info
->rti_rmx
->rmx_expire
?
1932 info
->rti_rmx
->rmx_expire
- time_second
+ time_uptime
: 0;
1936 rt_setgate(struct rtentry
*rt
, struct sockaddr
*dst
, struct sockaddr
*gate
)
1938 /* XXX dst may be overwritten, can we move this to below */
1939 int dlen
= SA_SIZE(dst
), glen
= SA_SIZE(gate
);
1942 * Prepare to store the gateway in rt->rt_gateway.
1943 * Both dst and gateway are stored one after the other in the same
1944 * malloc'd chunk. If we have room, we can reuse the old buffer,
1945 * rt_gateway already points to the right place.
1946 * Otherwise, malloc a new block and update the 'dst' address.
1948 if (rt
->rt_gateway
== NULL
|| glen
> SA_SIZE(rt
->rt_gateway
)) {
1951 R_Malloc(new, caddr_t
, dlen
+ glen
);
1955 * XXX note, we copy from *dst and not *rt_key(rt) because
1956 * rt_setgate() can be called to initialize a newly
1957 * allocated route entry, in which case rt_key(rt) == NULL
1958 * (and also rt->rt_gateway == NULL).
1959 * Free()/free() handle a NULL argument just fine.
1961 bcopy(dst
, new, dlen
);
1962 R_Free(rt_key(rt
)); /* free old block, if any */
1963 rt_key(rt
) = (struct sockaddr
*)new;
1964 rt
->rt_gateway
= (struct sockaddr
*)(new + dlen
);
1968 * Copy the new gateway value into the memory chunk.
1970 bcopy(gate
, rt
->rt_gateway
, glen
);
1976 rt_maskedcopy(struct sockaddr
*src
, struct sockaddr
*dst
, struct sockaddr
*netmask
)
1978 u_char
*cp1
= (u_char
*)src
;
1979 u_char
*cp2
= (u_char
*)dst
;
1980 u_char
*cp3
= (u_char
*)netmask
;
1981 u_char
*cplim
= cp2
+ *cp3
;
1982 u_char
*cplim2
= cp2
+ *cp1
;
1984 *cp2
++ = *cp1
++; *cp2
++ = *cp1
++; /* copies sa_len & sa_family */
1989 *cp2
++ = *cp1
++ & *cp3
++;
1991 bzero((caddr_t
)cp2
, (unsigned)(cplim2
- cp2
));
1995 * Set up a routing table entry, normally
1998 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
2000 rtinit1(struct ifaddr
*ifa
, int cmd
, int flags
, int fibnum
)
2002 struct sockaddr
*dst
;
2003 struct sockaddr
*netmask
;
2004 struct rtentry
*rt
= NULL
;
2005 struct rt_addrinfo info
;
2007 int startfib
, endfib
;
2008 char tempbuf
[_SOCKADDR_TMPSIZE
];
2011 static struct sockaddr_dl null_sdl
= {sizeof(null_sdl
), AF_LINK
};
2012 struct rib_head
*rnh
;
2014 if (flags
& RTF_HOST
) {
2015 dst
= ifa
->ifa_dstaddr
;
2018 dst
= ifa
->ifa_addr
;
2019 netmask
= ifa
->ifa_netmask
;
2021 if (dst
->sa_len
== 0)
2023 switch (dst
->sa_family
) {
2026 /* We support multiple FIBs. */
2029 fibnum
= RT_DEFAULT_FIB
;
2032 if (fibnum
== RT_ALL_FIBS
) {
2033 if (V_rt_add_addr_allfibs
== 0 && cmd
== (int)RTM_ADD
)
2034 startfib
= endfib
= ifa
->ifa_ifp
->if_fib
;
2037 endfib
= rt_numfibs
- 1;
2040 KASSERT((fibnum
< rt_numfibs
), ("rtinit1: bad fibnum"));
2046 * If it's a delete, check that if it exists,
2047 * it's on the correct interface or we might scrub
2048 * a route to another ifa which would
2049 * be confusing at best and possibly worse.
2051 if (cmd
== RTM_DELETE
) {
2053 * It's a delete, so it should already exist..
2054 * If it's a net, mask off the host bits
2055 * (Assuming we have a mask)
2056 * XXX this is kinda inet specific..
2058 if (netmask
!= NULL
) {
2059 rt_maskedcopy(dst
, (struct sockaddr
*)tempbuf
, netmask
);
2060 dst
= (struct sockaddr
*)tempbuf
;
2064 * Now go through all the requested tables (fibs) and do the
2065 * requested action. Realistically, this will either be fib 0
2066 * for protocols that don't do multiple tables or all the
2067 * tables for those that do.
2069 for ( fibnum
= startfib
; fibnum
<= endfib
; fibnum
++) {
2070 if (cmd
== RTM_DELETE
) {
2071 struct radix_node
*rn
;
2073 * Look up an rtentry that is in the routing tree and
2074 * contains the correct info.
2076 rnh
= rt_tables_get_rnh(fibnum
, dst
->sa_family
);
2078 /* this table doesn't exist but others might */
2081 rn
= rnh
->rnh_lookup(dst
, netmask
, &rnh
->head
);
2083 if (rt_mpath_capable(rnh
)) {
2090 * for interface route the
2091 * rt->rt_gateway is sockaddr_intf
2092 * for cloning ARP entries, so
2093 * rt_mpath_matchgate must use the
2096 rt
= rt_mpath_matchgate(rt
,
2103 error
= (rn
== NULL
||
2104 (rn
->rn_flags
& RNF_ROOT
) ||
2105 RNTORT(rn
)->rt_ifa
!= ifa
);
2108 /* this is only an error if bad on ALL tables */
2113 * Do the actual request
2115 bzero((caddr_t
)&info
, sizeof(info
));
2117 info
.rti_flags
= flags
|
2118 (ifa
->ifa_flags
& ~IFA_RTSELF
) | RTF_PINNED
;
2119 info
.rti_info
[RTAX_DST
] = dst
;
2121 * doing this for compatibility reasons
2124 info
.rti_info
[RTAX_GATEWAY
] =
2125 (struct sockaddr
*)&null_sdl
;
2127 info
.rti_info
[RTAX_GATEWAY
] = ifa
->ifa_addr
;
2128 info
.rti_info
[RTAX_NETMASK
] = netmask
;
2129 error
= rtrequest1_fib(cmd
, &info
, &rt
, fibnum
);
2131 if (error
== 0 && rt
!= NULL
) {
2133 * notify any listening routing agents of the change
2138 * in case address alias finds the first address
2139 * e.g. ifconfig bge0 192.0.2.246/24
2140 * e.g. ifconfig bge0 192.0.2.247/24
2141 * the address set in the route is 192.0.2.246
2142 * so we need to replace it with 192.0.2.247
2144 if (memcmp(rt
->rt_ifa
->ifa_addr
,
2145 ifa
->ifa_addr
, ifa
->ifa_addr
->sa_len
)) {
2146 ifa_free(rt
->rt_ifa
);
2148 rt
->rt_ifp
= ifa
->ifa_ifp
;
2153 * doing this for compatibility reasons
2155 if (cmd
== RTM_ADD
) {
2156 ((struct sockaddr_dl
*)rt
->rt_gateway
)->sdl_type
=
2157 rt
->rt_ifp
->if_type
;
2158 ((struct sockaddr_dl
*)rt
->rt_gateway
)->sdl_index
=
2159 rt
->rt_ifp
->if_index
;
2163 rt_newaddrmsg_fib(cmd
, ifa
, error
, rt
, fibnum
);
2166 if (cmd
== RTM_DELETE
) {
2168 * If we are deleting, and we found an entry,
2169 * then it's been removed from the tree..
2170 * now throw it away.
2174 if (cmd
== RTM_ADD
) {
2176 * We just wanted to add it..
2177 * we don't actually need a reference.
2188 if (cmd
== RTM_DELETE
) {
2192 /* we only give an error if it wasn't in any table */
2193 error
= ((flags
& RTF_HOST
) ?
2194 EHOSTUNREACH
: ENETUNREACH
);
2198 /* return an error if any of them failed */
2206 * Set up a routing table entry, normally
2210 rtinit(struct ifaddr
*ifa
, int cmd
, int flags
)
2212 struct sockaddr
*dst
;
2213 int fib
= RT_DEFAULT_FIB
;
2215 if (flags
& RTF_HOST
) {
2216 dst
= ifa
->ifa_dstaddr
;
2218 dst
= ifa
->ifa_addr
;
2221 switch (dst
->sa_family
) {
2224 /* We do support multiple FIBs. */
2228 return (rtinit1(ifa
, cmd
, flags
, fib
));
2232 * Announce interface address arrival/withdraw
2233 * Returns 0 on success.
2236 rt_addrmsg(int cmd
, struct ifaddr
*ifa
, int fibnum
)
2239 KASSERT(cmd
== RTM_ADD
|| cmd
== RTM_DELETE
,
2240 ("unexpected cmd %d", cmd
));
2242 KASSERT(fibnum
== RT_ALL_FIBS
|| (fibnum
>= 0 && fibnum
< rt_numfibs
),
2243 ("%s: fib out of range 0 <=%d<%d", __func__
, fibnum
, rt_numfibs
));
2245 #if defined(INET) || defined(INET6)
2248 * notify the SCTP stack
2249 * this will only get called when an address is added/deleted
2250 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
2252 sctp_addr_change(ifa
, cmd
);
2255 return (rtsock_addrmsg(cmd
, ifa
, fibnum
));
2259 * Announce route addition/removal.
2260 * Users of this function MUST validate input data BEFORE calling.
2261 * However we have to be able to handle invalid data:
2262 * if some userland app sends us "invalid" route message (invalid mask,
2263 * no dst, wrong address families, etc...) we need to pass it back
2264 * to app (and any other rtsock consumers) with rtm_errno field set to
2266 * Returns 0 on success.
2269 rt_routemsg(int cmd
, struct ifnet
*ifp
, int error
, struct rtentry
*rt
,
2273 KASSERT(cmd
== RTM_ADD
|| cmd
== RTM_DELETE
,
2274 ("unexpected cmd %d", cmd
));
2276 KASSERT(fibnum
== RT_ALL_FIBS
|| (fibnum
>= 0 && fibnum
< rt_numfibs
),
2277 ("%s: fib out of range 0 <=%d<%d", __func__
, fibnum
, rt_numfibs
));
2279 KASSERT(rt_key(rt
) != NULL
, (":%s: rt_key must be supplied", __func__
));
2281 return (rtsock_routemsg(cmd
, ifp
, error
, rt
, fibnum
));
2285 rt_newaddrmsg(int cmd
, struct ifaddr
*ifa
, int error
, struct rtentry
*rt
)
2288 rt_newaddrmsg_fib(cmd
, ifa
, error
, rt
, RT_ALL_FIBS
);
2292 * This is called to generate messages from the routing socket
2293 * indicating a network interface has had addresses associated with it.
2296 rt_newaddrmsg_fib(int cmd
, struct ifaddr
*ifa
, int error
, struct rtentry
*rt
,
2300 KASSERT(cmd
== RTM_ADD
|| cmd
== RTM_DELETE
,
2301 ("unexpected cmd %u", cmd
));
2302 KASSERT(fibnum
== RT_ALL_FIBS
|| (fibnum
>= 0 && fibnum
< rt_numfibs
),
2303 ("%s: fib out of range 0 <=%d<%d", __func__
, fibnum
, rt_numfibs
));
2305 if (cmd
== RTM_ADD
) {
2306 rt_addrmsg(cmd
, ifa
, fibnum
);
2308 rt_routemsg(cmd
, ifa
->ifa_ifp
, error
, rt
, fibnum
);
2311 rt_routemsg(cmd
, ifa
->ifa_ifp
, error
, rt
, fibnum
);
2312 rt_addrmsg(cmd
, ifa
, fibnum
);