2 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
6 * Copyright (c) 1988, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95
41 * This file contains routines that processes routing socket requests.
44 #include <sys/types.h>
45 #include <sys/stream.h>
46 #include <sys/stropts.h>
48 #include <sys/strsubr.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/policy.h>
54 #include <sys/systm.h>
55 #include <sys/param.h>
56 #include <sys/socket.h>
57 #include <sys/strsun.h>
59 #include <net/route.h>
60 #include <netinet/in.h>
61 #include <net/if_dl.h>
62 #include <netinet/ip6.h>
64 #include <inet/common.h>
67 #include <inet/ip_if.h>
68 #include <inet/ip_ire.h>
69 #include <inet/ip_ftable.h>
70 #include <inet/ip_rts.h>
72 #include <inet/ipclassifier.h>
74 #define RTS_MSG_SIZE(type, rtm_addrs, af) \
75 (rts_data_msg_size(rtm_addrs, af) + rts_header_msg_size(type))
77 static size_t rts_copyfromsockaddr(struct sockaddr
*sa
, in6_addr_t
*addrp
);
78 static void rts_fill_msg(int type
, int rtm_addrs
, ipaddr_t dst
,
79 ipaddr_t mask
, ipaddr_t gateway
, ipaddr_t src_addr
, ipaddr_t brd_addr
,
80 ipaddr_t author
, ipaddr_t ifaddr
, const ill_t
*ill
, mblk_t
*mp
);
81 static int rts_getaddrs(rt_msghdr_t
*rtm
, in6_addr_t
*dst_addrp
,
82 in6_addr_t
*gw_addrp
, in6_addr_t
*net_maskp
, in6_addr_t
*authorp
,
83 in6_addr_t
*if_addrp
, in6_addr_t
*src_addrp
, ushort_t
*indexp
,
84 sa_family_t
*afp
, int *error
);
85 static void rts_getifdata(if_data_t
*if_data
, const ipif_t
*ipif
);
86 static int rts_getmetrics(ire_t
*ire
, ill_t
*ill
, rt_metrics_t
*metrics
);
87 static mblk_t
*rts_rtmget(mblk_t
*mp
, ire_t
*ire
, ire_t
*ifire
,
88 const in6_addr_t
*setsrc
, sa_family_t af
);
89 static void rts_setmetrics(ire_t
*ire
, uint_t which
, rt_metrics_t
*metrics
);
90 static ire_t
*ire_lookup_v4(ipaddr_t dst_addr
, ipaddr_t net_mask
,
91 ipaddr_t gw_addr
, const ill_t
*ill
, zoneid_t zoneid
, int match_flags
,
92 ip_stack_t
*ipst
, ire_t
**pifire
, ipaddr_t
*v4setsrcp
);
93 static ire_t
*ire_lookup_v6(const in6_addr_t
*dst_addr_v6
,
94 const in6_addr_t
*net_mask_v6
, const in6_addr_t
*gw_addr_v6
,
95 const ill_t
*ill
, zoneid_t zoneid
, int match_flags
, ip_stack_t
*ipst
, ire_t
96 **pifire
, in6_addr_t
*v6setsrcp
);
99 * Send `mp' to all eligible routing queues. A queue is ineligible if:
101 * 1. SO_USELOOPBACK is off and it is not the originating queue.
102 * 2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'.
103 * 3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'.
104 * 4. It is not the same address family as `af', and `af' isn't AF_UNSPEC.
107 rts_queue_input(mblk_t
*mp
, conn_t
*o_connp
, sa_family_t af
, uint_t flags
,
111 conn_t
*connp
, *next_connp
;
114 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be
115 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point.
117 ASSERT(!(flags
& RTSQ_DEFAULT
));
119 mutex_enter(&ipst
->ips_rts_clients
->connf_lock
);
120 connp
= ipst
->ips_rts_clients
->connf_head
;
122 for (; connp
!= NULL
; connp
= next_connp
) {
123 next_connp
= connp
->conn_next
;
125 * If there was a family specified when this routing socket was
126 * created and it doesn't match the family of the message to
127 * copy, then continue.
129 if ((connp
->conn_proto
!= AF_UNSPEC
) &&
130 (connp
->conn_proto
!= af
))
134 * Queue the message only if the conn_t and flags match.
136 if (connp
->conn_rtaware
& RTAW_UNDER_IPMP
) {
137 if (!(flags
& RTSQ_UNDER_IPMP
))
140 if (!(flags
& RTSQ_NORMAL
))
144 * For the originating queue, we only copy the message upstream
145 * if loopback is set. For others reading on the routing
146 * socket, we check if there is room upstream for a copy of the
149 if ((o_connp
== connp
) && connp
->conn_useloopback
== 0) {
150 connp
= connp
->conn_next
;
154 mutex_exit(&ipst
->ips_rts_clients
->connf_lock
);
155 /* Pass to rts_input */
156 if (IPCL_IS_NONSTR(connp
) ? !connp
->conn_flow_cntrld
:
157 canputnext(connp
->conn_rq
)) {
161 /* Note that we pass a NULL ira to rts_input */
163 (connp
->conn_recv
)(connp
, mp1
, NULL
, NULL
);
166 mutex_enter(&ipst
->ips_rts_clients
->connf_lock
);
167 /* reload next_connp since conn_next may have changed */
168 next_connp
= connp
->conn_next
;
171 mutex_exit(&ipst
->ips_rts_clients
->connf_lock
);
176 * Takes an ire and sends an ack to all the routing sockets. This
178 * - when a route is created/deleted through the ioctl interface.
179 * - when a stale redirect is deleted
182 ip_rts_rtmsg(int type
, ire_t
*ire
, int error
, ip_stack_t
*ipst
)
186 int rtm_addrs
= (RTA_DST
| RTA_NETMASK
| RTA_GATEWAY
);
188 in6_addr_t gw_addr_v6
;
192 ASSERT(ire
->ire_ipversion
== IPV4_VERSION
||
193 ire
->ire_ipversion
== IPV6_VERSION
);
195 ASSERT(!(ire
->ire_type
& IRE_IF_CLONE
));
197 if (ire
->ire_flags
& RTF_SETSRC
)
198 rtm_addrs
|= RTA_SRC
;
200 switch (ire
->ire_ipversion
) {
203 mp
= rts_alloc_msg(type
, rtm_addrs
, af
);
206 rts_fill_msg(type
, rtm_addrs
, ire
->ire_addr
, ire
->ire_mask
,
207 ire
->ire_gateway_addr
, ire
->ire_setsrc_addr
, 0, 0, 0, NULL
,
212 mp
= rts_alloc_msg(type
, rtm_addrs
, af
);
215 mutex_enter(&ire
->ire_lock
);
216 gw_addr_v6
= ire
->ire_gateway_addr_v6
;
217 mutex_exit(&ire
->ire_lock
);
218 rts_fill_msg_v6(type
, rtm_addrs
, &ire
->ire_addr_v6
,
219 &ire
->ire_mask_v6
, &gw_addr_v6
,
220 &ire
->ire_setsrc_addr_v6
, &ipv6_all_zeros
, &ipv6_all_zeros
,
221 &ipv6_all_zeros
, NULL
, mp
);
224 rtm
= (rt_msghdr_t
*)mp
->b_rptr
;
225 mp
->b_wptr
= (uchar_t
*)&mp
->b_rptr
[rtm
->rtm_msglen
];
226 rtm
->rtm_addrs
= rtm_addrs
;
227 rtm
->rtm_flags
= ire
->ire_flags
;
229 rtm
->rtm_errno
= error
;
231 rtm
->rtm_flags
|= RTF_DONE
;
232 rts_queue_input(mp
, NULL
, af
, RTSQ_ALL
, ipst
);
236 * This is a call from the RTS module
237 * indicating that this is a Routing Socket
238 * Stream. Insert this conn_t in routing
239 * socket client list.
242 ip_rts_register(conn_t
*connp
)
244 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
246 connp
->conn_useloopback
= 1;
247 ipcl_hash_insert_wildcard(ipst
->ips_rts_clients
, connp
);
251 * This is a call from the RTS module indicating that it is closing.
254 ip_rts_unregister(conn_t
*connp
)
256 ipcl_hash_remove(connp
);
260 * Processes requests received on a routing socket. It extracts all the
261 * arguments and calls the appropriate function to process the request.
263 * RTA_SRC bit flag requests are sent by 'route -setsrc'.
265 * In general, this function does not consume the message supplied but rather
266 * sends the message upstream with an appropriate UNIX errno.
269 ip_rts_request_common(mblk_t
*mp
, conn_t
*connp
, cred_t
*ioc_cr
)
271 rt_msghdr_t
*rtm
= NULL
;
272 in6_addr_t dst_addr_v6
;
273 in6_addr_t src_addr_v6
;
274 in6_addr_t gw_addr_v6
;
275 in6_addr_t net_mask_v6
;
276 in6_addr_t author_v6
;
277 in6_addr_t if_addr_v6
;
282 in6_addr_t v6setsrc
= ipv6_all_zeros
;
284 int match_flags
= MATCH_IRE_DSTONLY
;
285 int match_flags_local
= MATCH_IRE_TYPE
| MATCH_IRE_GW
;
297 zoneid
= connp
->conn_zoneid
;
298 ipst
= connp
->conn_netstack
->netstack_ip
;
300 if (mp
->b_cont
!= NULL
&& !pullupmsg(mp
, -1)) {
305 if ((mp
->b_wptr
- mp
->b_rptr
) < sizeof (rt_msghdr_t
)) {
312 * Check the routing message for basic consistency including the
313 * version number and that the number of octets written is the same
314 * as specified by the rtm_msglen field.
316 * At this point, an error can be delivered back via rtm_errno.
318 rtm
= (rt_msghdr_t
*)mp
->b_rptr
;
319 if ((mp
->b_wptr
- mp
->b_rptr
) != rtm
->rtm_msglen
) {
323 if (rtm
->rtm_version
!= RTM_VERSION
) {
324 error
= EPROTONOSUPPORT
;
328 /* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
329 if (rtm
->rtm_type
!= RTM_GET
&&
330 rtm
->rtm_type
!= RTM_RESOLVE
&&
332 secpolicy_ip_config(ioc_cr
, B_FALSE
) != 0)) {
337 found_addrs
= rts_getaddrs(rtm
, &dst_addr_v6
, &gw_addr_v6
, &net_mask_v6
,
338 &author_v6
, &if_addr_v6
, &src_addr_v6
, &index
, &af
, &error
);
343 if ((found_addrs
& RTA_DST
) == 0) {
349 * Based on the address family of the destination address, determine
350 * the destination, gateway and netmask and return the appropriate error
351 * if an unknown address family was specified (following the errno
352 * values that 4.4BSD-Lite2 returns.)
356 IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6
, dst_addr
);
357 IN6_V4MAPPED_TO_IPADDR(&src_addr_v6
, src_addr
);
358 IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6
, gw_addr
);
359 if (((found_addrs
& RTA_NETMASK
) == 0) ||
360 (rtm
->rtm_flags
& RTF_HOST
))
361 net_mask
= IP_HOST_MASK
;
363 IN6_V4MAPPED_TO_IPADDR(&net_mask_v6
, net_mask
);
366 if (((found_addrs
& RTA_NETMASK
) == 0) ||
367 (rtm
->rtm_flags
& RTF_HOST
))
368 net_mask_v6
= ipv6_all_ones
;
372 * These errno values are meant to be compatible with
373 * 4.4BSD-Lite2 for the given message types.
375 switch (rtm
->rtm_type
) {
382 error
= EAFNOSUPPORT
;
391 * At this point, the address family must be something known.
393 ASSERT(af
== AF_INET
|| af
== AF_INET6
);
399 ill
= ill_lookup_on_ifindex(index
, af
== AF_INET6
, ipst
);
406 * Since all interfaces in an IPMP group must be equivalent,
407 * we prevent changes to a specific underlying interface's
408 * routing configuration. However, for backward compatibility,
409 * we intepret a request to add a route on an underlying
410 * interface as a request to add a route on its IPMP interface.
412 if (IS_UNDER_IPMP(ill
)) {
413 switch (rtm
->rtm_type
) {
419 index
= ipmp_ill_get_ipmp_ifindex(ill
);
422 ill
= NULL
; /* already refrele'd */
430 match_flags
|= MATCH_IRE_ILL
;
432 * This provides the same zoneid as in Solaris 10
433 * that -ifp picks the zoneid from the first ipif on the ill.
434 * But it might not be useful since the first ipif will always
435 * have the same zoneid as the ill.
437 ipif
= ipif_get_next_ipif(NULL
, ill
);
439 zoneid
= ipif
->ipif_zoneid
;
445 * If a netmask was supplied in the message, then subsequent route
446 * lookups will attempt to match on the netmask as well.
448 if ((found_addrs
& RTA_NETMASK
) != 0)
449 match_flags
|= MATCH_IRE_MASK
;
451 switch (rtm
->rtm_type
) {
453 /* if we are adding a route, gateway is a must */
454 if ((found_addrs
& RTA_GATEWAY
) == 0) {
460 * User-specified source addresses
461 * do not support interface based routing.
462 * Assigning a source address to an interface based
463 * route is achievable by plumbing a new ipif and
464 * setting up the interface route via this ipif,
467 if (rtm
->rtm_flags
& RTF_SETSRC
) {
468 if ((rtm
->rtm_flags
& RTF_GATEWAY
) == 0) {
469 error
= EADDRNOTAVAIL
;
476 if (src_addr
!= INADDR_ANY
) {
480 * The RTF_SETSRC flag is present, check that
481 * the supplied src address is not the loopback
482 * address. This would produce martian packets.
484 if (src_addr
== htonl(INADDR_LOOPBACK
)) {
489 * Also check that the supplied address is a
490 * valid, local one. Only allow IFF_UP ones
492 type
= ip_type_v4(src_addr
, ipst
);
493 if (!(type
& (IRE_LOCAL
|IRE_LOOPBACK
))) {
494 error
= EADDRNOTAVAIL
;
499 * The RTF_SETSRC modifier must be associated
500 * to a non-null source address.
502 if (rtm
->rtm_flags
& RTF_SETSRC
) {
508 error
= ip_rt_add(dst_addr
, net_mask
, gw_addr
, src_addr
,
509 rtm
->rtm_flags
, ill
, &ire
, B_FALSE
, ipst
, zoneid
);
511 ASSERT(!MUTEX_HELD(&ill
->ill_lock
));
514 if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6
)) {
518 * The RTF_SETSRC flag is present, check that
519 * the supplied src address is not the loopback
520 * address. This would produce martian packets.
522 if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6
)) {
527 * Also check that the supplied address is a
528 * valid, local one. Only allow UP ones.
530 type
= ip_type_v6(&src_addr_v6
, ipst
);
531 if (!(type
& (IRE_LOCAL
|IRE_LOOPBACK
))) {
532 error
= EADDRNOTAVAIL
;
536 error
= ip_rt_add_v6(&dst_addr_v6
, &net_mask_v6
,
537 &gw_addr_v6
, &src_addr_v6
, rtm
->rtm_flags
,
538 ill
, &ire
, ipst
, zoneid
);
542 * The RTF_SETSRC modifier must be associated
543 * to a non-null source address.
545 if (rtm
->rtm_flags
& RTF_SETSRC
) {
549 error
= ip_rt_add_v6(&dst_addr_v6
, &net_mask_v6
,
550 &gw_addr_v6
, NULL
, rtm
->rtm_flags
,
551 ill
, &ire
, ipst
, zoneid
);
553 ASSERT(!MUTEX_HELD(&ill
->ill_lock
));
559 rts_setmetrics(ire
, rtm
->rtm_inits
, &rtm
->rtm_rmx
);
562 /* if we are deleting a route, gateway is a must */
563 if ((found_addrs
& RTA_GATEWAY
) == 0) {
568 * The RTF_SETSRC modifier does not make sense
569 * when deleting a route.
571 if (rtm
->rtm_flags
& RTF_SETSRC
) {
578 error
= ip_rt_delete(dst_addr
, net_mask
, gw_addr
,
579 found_addrs
, rtm
->rtm_flags
, ill
, B_FALSE
,
583 error
= ip_rt_delete_v6(&dst_addr_v6
, &net_mask_v6
,
584 &gw_addr_v6
, found_addrs
, rtm
->rtm_flags
, ill
,
592 * In the case of RTM_GET, the forwarding table should be
593 * searched recursively. Also, if a gateway was
594 * specified then the gateway address must also be matched.
596 * In the case of RTM_CHANGE, the gateway address (if supplied)
597 * is the new gateway address so matching on the gateway address
598 * is not done. This can lead to ambiguity when looking up the
599 * route to change as usually only the destination (and netmask,
600 * if supplied) is used for the lookup. However if a RTA_IFP
601 * sockaddr is also supplied, it can disambiguate which route to
602 * change provided the ambigous routes are tied to distinct
603 * ill's (or interface indices). If the routes are not tied to
604 * any particular interfaces (for example, with traditional
605 * gateway routes), then a RTA_IFP sockaddr will be of no use as
606 * it won't match any such routes.
607 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
608 * except when RTM_CHANGE is combined to RTF_SETSRC.
610 if (((found_addrs
& RTA_SRC
) != 0) &&
611 ((rtm
->rtm_type
== RTM_GET
) ||
612 !(rtm
->rtm_flags
& RTF_SETSRC
))) {
617 if (rtm
->rtm_type
== RTM_GET
) {
618 if ((found_addrs
& RTA_GATEWAY
) != 0)
619 match_flags
|= MATCH_IRE_GW
;
621 if (rtm
->rtm_type
== RTM_CHANGE
) {
622 if ((found_addrs
& RTA_GATEWAY
) &&
623 (rtm
->rtm_flags
& RTF_SETSRC
)) {
625 * Do not want to change the gateway,
626 * but rather the source address.
628 match_flags
|= MATCH_IRE_GW
;
633 * If the netmask is all ones (either as supplied or as derived
634 * above), then first check for an IRE_LOOPBACK or
637 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
638 * entry, then look for any other type of IRE.
642 if (net_mask
== IP_HOST_MASK
) {
643 ire
= ire_ftable_lookup_v4(dst_addr
, 0, gw_addr
,
644 IRE_LOCAL
| IRE_LOOPBACK
, NULL
, zoneid
,
645 match_flags_local
, 0, ipst
, NULL
);
648 ire
= ire_lookup_v4(dst_addr
, net_mask
,
649 gw_addr
, ill
, zoneid
, match_flags
,
650 ipst
, &ifire
, &v4setsrc
);
651 IN6_IPADDR_TO_V4MAPPED(v4setsrc
, &v6setsrc
);
655 if (IN6_ARE_ADDR_EQUAL(&net_mask_v6
, &ipv6_all_ones
)) {
656 ire
= ire_ftable_lookup_v6(&dst_addr_v6
, NULL
,
657 &gw_addr_v6
, IRE_LOCAL
| IRE_LOOPBACK
, NULL
,
658 zoneid
, match_flags_local
, 0, ipst
, NULL
);
661 ire
= ire_lookup_v6(&dst_addr_v6
,
662 &net_mask_v6
, &gw_addr_v6
, ill
, zoneid
,
663 match_flags
, ipst
, &ifire
, &v6setsrc
);
673 * Want to return failure if we get an IRE_NOROUTE from
674 * ire_route_recursive
676 if (ire
->ire_type
& IRE_NOROUTE
) {
683 /* we know the IRE before we come here */
684 switch (rtm
->rtm_type
) {
686 mp1
= rts_rtmget(mp
, ire
, ifire
, &v6setsrc
, af
);
693 rtm
= (rt_msghdr_t
*)mp
->b_rptr
;
697 * Note that we do not need to do
698 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
699 * in metrics or gateway will not affect existing
700 * routes since it does not create a more specific
705 if ((found_addrs
& RTA_GATEWAY
) != 0 &&
706 (ire
->ire_gateway_addr
!= gw_addr
)) {
707 ire
->ire_gateway_addr
= gw_addr
;
710 if ((found_addrs
& RTA_SRC
) != 0 &&
711 (rtm
->rtm_flags
& RTF_SETSRC
) != 0 &&
712 (ire
->ire_setsrc_addr
!= src_addr
)) {
713 if (src_addr
!= INADDR_ANY
) {
717 * The RTF_SETSRC flag is
718 * present, check that the
719 * supplied src address is not
720 * the loopback address. This
721 * would produce martian
725 htonl(INADDR_LOOPBACK
)) {
730 * Also check that the
731 * supplied addr is a valid
734 type
= ip_type_v4(src_addr
,
737 (IRE_LOCAL
|IRE_LOOPBACK
))) {
738 error
= EADDRNOTAVAIL
;
741 ire
->ire_flags
|= RTF_SETSRC
;
742 ire
->ire_setsrc_addr
=
745 ire
->ire_flags
&= ~RTF_SETSRC
;
746 ire
->ire_setsrc_addr
=
750 * Let conn_ixa caching know that
751 * source address selection changed
753 ip_update_source_selection(ipst
);
755 ire_flush_cache_v4(ire
, IRE_FLUSH_GWCHANGE
);
758 mutex_enter(&ire
->ire_lock
);
759 if ((found_addrs
& RTA_GATEWAY
) != 0 &&
761 &ire
->ire_gateway_addr_v6
, &gw_addr_v6
)) {
762 ire
->ire_gateway_addr_v6
= gw_addr_v6
;
764 mutex_exit(&ire
->ire_lock
);
766 if ((found_addrs
& RTA_SRC
) != 0 &&
767 (rtm
->rtm_flags
& RTF_SETSRC
) != 0 &&
769 &ire
->ire_setsrc_addr_v6
, &src_addr_v6
)) {
770 if (!IN6_IS_ADDR_UNSPECIFIED(
775 * The RTF_SETSRC flag is
776 * present, check that the
777 * supplied src address is not
778 * the loopback address. This
779 * would produce martian
782 if (IN6_IS_ADDR_LOOPBACK(
788 * Also check that the
789 * supplied addr is a valid
792 type
= ip_type_v6(&src_addr_v6
,
795 (IRE_LOCAL
|IRE_LOOPBACK
))) {
796 error
= EADDRNOTAVAIL
;
799 mutex_enter(&ire
->ire_lock
);
800 ire
->ire_flags
|= RTF_SETSRC
;
801 ire
->ire_setsrc_addr_v6
=
803 mutex_exit(&ire
->ire_lock
);
805 mutex_enter(&ire
->ire_lock
);
806 ire
->ire_flags
&= ~RTF_SETSRC
;
807 ire
->ire_setsrc_addr_v6
=
809 mutex_exit(&ire
->ire_lock
);
812 * Let conn_ixa caching know that
813 * source address selection changed
815 ip_update_source_selection(ipst
);
817 ire_flush_cache_v6(ire
, IRE_FLUSH_GWCHANGE
);
821 rts_setmetrics(ire
, rtm
->rtm_inits
, &rtm
->rtm_rmx
);
838 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
840 rtm
->rtm_errno
= error
;
842 ip1dbg(("ip_rts_request: error %d\n", error
));
844 rtm
->rtm_flags
|= RTF_DONE
;
845 /* OK ACK already set up by caller except this */
846 ip2dbg(("ip_rts_request: OK ACK\n"));
848 rts_queue_input(mp
, connp
, af
, RTSQ_ALL
, ipst
);
854 * Helper function that can do recursive lookups including when
855 * MATCH_IRE_GW and/or MATCH_IRE_MASK is set.
858 ire_lookup_v4(ipaddr_t dst_addr
, ipaddr_t net_mask
, ipaddr_t gw_addr
,
859 const ill_t
*ill
, zoneid_t zoneid
, int match_flags
, ip_stack_t
*ipst
,
860 ire_t
**pifire
, ipaddr_t
*v4setsrcp
)
867 *v4setsrcp
= INADDR_ANY
;
869 /* Skip IRE_IF_CLONE */
870 match_flags
|= MATCH_IRE_TYPE
;
871 ire_type
= (IRE_ONLINK
|IRE_OFFLINK
) & ~IRE_IF_CLONE
;
874 * ire_route_recursive can't match gateway or mask thus if they are
875 * set we have to do two steps of lookups
877 if (match_flags
& (MATCH_IRE_GW
|MATCH_IRE_MASK
)) {
878 ire
= ire_ftable_lookup_v4(dst_addr
, net_mask
, gw_addr
,
879 ire_type
, ill
, zoneid
, match_flags
, 0, ipst
, NULL
);
881 if (ire
== NULL
||(ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)))
884 if (ire
->ire_type
& IRE_ONLINK
)
887 if (ire
->ire_flags
& RTF_SETSRC
) {
888 ASSERT(ire
->ire_setsrc_addr
!= INADDR_ANY
);
889 *v4setsrcp
= ire
->ire_setsrc_addr
;
893 /* Look for an interface ire recursively based on the gateway */
894 dst_addr
= ire
->ire_gateway_addr
;
895 match_flags
&= ~(MATCH_IRE_GW
|MATCH_IRE_MASK
);
897 * Don't allow anything unusual past the first iteration.
898 * After the first lookup, we should no longer look for
899 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT
902 * In addition, after we have found a direct IRE_OFFLINK,
903 * we should only look for interface or clone routes.
905 match_flags
|= MATCH_IRE_DIRECT
; /* no more RTF_INDIRECTs */
907 if ((ire
->ire_type
& IRE_OFFLINK
) &&
908 !(ire
->ire_flags
& RTF_INDIRECT
)) {
909 ire_type
= IRE_IF_ALL
;
912 * no more local, loopback, broadcast routes
914 if (!(match_flags
& MATCH_IRE_TYPE
))
915 ire_type
= (IRE_OFFLINK
|IRE_ONLINK
);
916 ire_type
&= ~(IRE_LOCAL
|IRE_LOOPBACK
|IRE_BROADCAST
);
918 match_flags
|= MATCH_IRE_TYPE
;
920 ifire
= ire_route_recursive_v4(dst_addr
, ire_type
, ill
, zoneid
,
921 match_flags
, IRR_INCOMPLETE
, 0, ipst
, v4setsrcp
,
924 ire
= ire_route_recursive_v4(dst_addr
, ire_type
, ill
, zoneid
,
925 match_flags
, IRR_INCOMPLETE
, 0, ipst
, v4setsrcp
,
933 ire_lookup_v6(const in6_addr_t
*dst_addr_v6
,
934 const in6_addr_t
*net_mask_v6
, const in6_addr_t
*gw_addr_v6
,
935 const ill_t
*ill
, zoneid_t zoneid
, int match_flags
, ip_stack_t
*ipst
,
936 ire_t
**pifire
, in6_addr_t
*v6setsrcp
)
943 *v6setsrcp
= ipv6_all_zeros
;
945 /* Skip IRE_IF_CLONE */
946 match_flags
|= MATCH_IRE_TYPE
;
947 ire_type
= (IRE_ONLINK
|IRE_OFFLINK
) & ~IRE_IF_CLONE
;
950 * ire_route_recursive can't match gateway or mask thus if they are
951 * set we have to do two steps of lookups
953 if (match_flags
& (MATCH_IRE_GW
|MATCH_IRE_MASK
)) {
956 ire
= ire_ftable_lookup_v6(dst_addr_v6
, net_mask_v6
,
957 gw_addr_v6
, ire_type
, ill
, zoneid
, match_flags
, 0,
960 if (ire
== NULL
||(ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)))
963 if (ire
->ire_type
& IRE_ONLINK
)
966 if (ire
->ire_flags
& RTF_SETSRC
) {
967 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
968 &ire
->ire_setsrc_addr_v6
));
969 *v6setsrcp
= ire
->ire_setsrc_addr_v6
;
973 mutex_enter(&ire
->ire_lock
);
974 dst
= ire
->ire_gateway_addr_v6
;
975 mutex_exit(&ire
->ire_lock
);
976 match_flags
&= ~(MATCH_IRE_GW
|MATCH_IRE_MASK
);
978 * Don't allow anything unusual past the first iteration.
979 * After the first lookup, we should no longer look for
980 * (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST) or RTF_INDIRECT
983 * In addition, after we have found a direct IRE_OFFLINK,
984 * we should only look for interface or clone routes.
986 match_flags
|= MATCH_IRE_DIRECT
; /* no more RTF_INDIRECTs */
988 if ((ire
->ire_type
& IRE_OFFLINK
) &&
989 !(ire
->ire_flags
& RTF_INDIRECT
)) {
990 ire_type
= IRE_IF_ALL
;
993 * no more local, loopback routes
995 if (!(match_flags
& MATCH_IRE_TYPE
))
996 ire_type
= (IRE_OFFLINK
|IRE_ONLINK
);
997 ire_type
&= ~(IRE_LOCAL
|IRE_LOOPBACK
);
999 match_flags
|= MATCH_IRE_TYPE
;
1001 ifire
= ire_route_recursive_v6(&dst
, ire_type
, ill
, zoneid
,
1002 match_flags
, IRR_INCOMPLETE
, 0, ipst
, v6setsrcp
, NULL
);
1004 ire
= ire_route_recursive_v6(dst_addr_v6
, ire_type
, ill
, zoneid
,
1005 match_flags
, IRR_INCOMPLETE
, 0, ipst
, v6setsrcp
, NULL
);
1013 * Handle IP_IOC_RTS_REQUEST ioctls
1016 ip_rts_request(queue_t
*q
, mblk_t
*mp
, cred_t
*ioc_cr
)
1018 conn_t
*connp
= Q_TO_CONN(q
);
1019 IOCP iocp
= (IOCP
)mp
->b_rptr
;
1020 mblk_t
*mp1
, *ioc_mp
= mp
;
1024 ipst
= connp
->conn_netstack
->netstack_ip
;
1026 ASSERT(mp
->b_cont
!= NULL
);
1027 /* ioc_mp holds mp */
1031 * The Routing Socket data starts on
1032 * next block. If there is no next block
1033 * this is an indication from routing module
1034 * that it is a routing socket stream queue.
1035 * We need to support that for compatibility with SDP since
1036 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
1037 * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this.
1039 if (mp
->b_cont
== NULL
) {
1041 * This is a message from SDP
1042 * indicating that this is a Routing Socket
1043 * Stream. Insert this conn_t in routing
1044 * socket client list.
1046 connp
->conn_useloopback
= 1;
1047 ipcl_hash_insert_wildcard(ipst
->ips_rts_clients
, connp
);
1050 mp1
= dupmsg(mp
->b_cont
);
1057 error
= ip_rts_request_common(mp
, connp
, ioc_cr
);
1059 iocp
->ioc_error
= error
;
1060 ioc_mp
->b_datap
->db_type
= M_IOCACK
;
1061 if (iocp
->ioc_error
!= 0)
1062 iocp
->ioc_count
= 0;
1063 /* Note that we pass a NULL ira to rts_input */
1064 (connp
->conn_recv
)(connp
, ioc_mp
, NULL
, NULL
);
1066 /* conn was refheld in ip_wput_ioctl. */
1067 CONN_DEC_IOCTLREF(connp
);
1068 CONN_OPER_PENDING_DONE(connp
);
1074 * Build a reply to the RTM_GET request contained in the given message block
1075 * using the retrieved IRE of the destination address, the parent IRE (if it
1076 * exists) and the address family.
1078 * Returns a pointer to a message block containing the reply if successful,
1079 * otherwise NULL is returned.
1082 rts_rtmget(mblk_t
*mp
, ire_t
*ire
, ire_t
*ifire
, const in6_addr_t
*setsrc
,
1086 rt_msghdr_t
*new_rtm
;
1091 ipif_t
*ipif
= NULL
;
1092 ipaddr_t brdaddr
; /* IFF_POINTOPOINT destination */
1094 in6_addr_t brdaddr6
; /* IFF_POINTOPOINT destination */
1098 rtm
= (rt_msghdr_t
*)mp
->b_rptr
;
1101 * Find the ill used to send packets. This will be NULL in case
1102 * of a reject or blackhole.
1105 ill
= ire_nexthop_ill(ifire
);
1107 ill
= ire_nexthop_ill(ire
);
1110 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1112 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1113 * RTA_IFP and RTA_IFA if either is defined, and also
1114 * returns RTA_BRD if the appropriate interface is
1117 rtm_addrs
= (RTA_DST
| RTA_GATEWAY
| RTA_NETMASK
);
1118 if ((rtm
->rtm_addrs
& (RTA_IFP
| RTA_IFA
)) && ill
!= NULL
) {
1119 rtm_addrs
|= (RTA_IFP
| RTA_IFA
);
1121 * We associate an IRE with an ILL, hence we don't exactly
1122 * know what might make sense for RTA_IFA and RTA_BRD. We
1123 * pick the first ipif on the ill.
1125 ipif
= ipif_get_next_ipif(NULL
, ill
);
1127 if (ipif
->ipif_isv6
)
1128 ifaddr6
= ipif
->ipif_v6lcl_addr
;
1130 ifaddr
= ipif
->ipif_lcl_addr
;
1131 if (ipif
->ipif_flags
& IPIF_POINTOPOINT
) {
1132 rtm_addrs
|= RTA_BRD
;
1133 if (ipif
->ipif_isv6
)
1134 brdaddr6
= ipif
->ipif_v6pp_dst_addr
;
1136 brdaddr
= ipif
->ipif_pp_dst_addr
;
1142 new_mp
= rts_alloc_msg(RTM_GET
, rtm_addrs
, af
);
1143 if (new_mp
== NULL
) {
1150 * We set the destination address, gateway address,
1151 * netmask and flags in the RTM_GET response depending
1152 * on whether we found a parent IRE or not.
1153 * In particular, if we did find a parent IRE during the
1154 * recursive search, use that IRE's gateway address.
1155 * Otherwise, we use the IRE's source address for the
1158 ASSERT(af
== AF_INET
|| af
== AF_INET6
);
1161 IN6_V4MAPPED_TO_IPADDR(setsrc
, v4setsrc
);
1162 if (v4setsrc
!= INADDR_ANY
)
1163 rtm_addrs
|= RTA_SRC
;
1165 rtm_flags
= ire
->ire_flags
;
1166 rts_fill_msg(RTM_GET
, rtm_addrs
, ire
->ire_addr
,
1167 ire
->ire_mask
, ire
->ire_gateway_addr
, v4setsrc
,
1168 brdaddr
, 0, ifaddr
, ill
, new_mp
);
1171 if (!IN6_IS_ADDR_UNSPECIFIED(setsrc
))
1172 rtm_addrs
|= RTA_SRC
;
1174 rtm_flags
= ire
->ire_flags
;
1175 rts_fill_msg_v6(RTM_GET
, rtm_addrs
, &ire
->ire_addr_v6
,
1176 &ire
->ire_mask_v6
, &ire
->ire_gateway_addr_v6
,
1177 setsrc
, &brdaddr6
, &ipv6_all_zeros
,
1178 &ifaddr6
, ill
, new_mp
);
1182 new_rtm
= (rt_msghdr_t
*)new_mp
->b_rptr
;
1185 * The rtm_msglen, rtm_version and rtm_type fields in
1186 * RTM_GET response are filled in by rts_fill_msg.
1188 * rtm_addrs and rtm_flags are filled in based on what
1189 * was requested and the state of the IREs looked up
1192 * rtm_inits and rtm_rmx are filled in with metrics
1193 * based on whether a parent IRE was found or not.
1195 * TODO: rtm_index and rtm_use should probably be
1196 * filled in with something resonable here and not just
1197 * copied from the request.
1199 new_rtm
->rtm_index
= rtm
->rtm_index
;
1200 new_rtm
->rtm_pid
= rtm
->rtm_pid
;
1201 new_rtm
->rtm_seq
= rtm
->rtm_seq
;
1202 new_rtm
->rtm_use
= rtm
->rtm_use
;
1203 new_rtm
->rtm_addrs
= rtm_addrs
;
1204 new_rtm
->rtm_flags
= rtm_flags
;
1205 new_rtm
->rtm_inits
= rts_getmetrics(ire
, ill
, &new_rtm
->rtm_rmx
);
1212 * Fill the given if_data_t with interface statistics.
1215 rts_getifdata(if_data_t
*if_data
, const ipif_t
*ipif
)
1217 if_data
->ifi_type
= ipif
->ipif_ill
->ill_type
;
1218 /* ethernet, tokenring, etc */
1219 if_data
->ifi_addrlen
= 0; /* media address length */
1220 if_data
->ifi_hdrlen
= 0; /* media header length */
1221 if_data
->ifi_mtu
= ipif
->ipif_ill
->ill_mtu
; /* mtu */
1222 /* metric (external only) */
1223 if_data
->ifi_metric
= ipif
->ipif_ill
->ill_metric
;
1224 if_data
->ifi_baudrate
= 0; /* linespeed */
1226 if_data
->ifi_ipackets
= 0; /* packets received on if */
1227 if_data
->ifi_ierrors
= 0; /* input errors on interface */
1228 if_data
->ifi_opackets
= 0; /* packets sent on interface */
1229 if_data
->ifi_oerrors
= 0; /* output errors on if */
1230 if_data
->ifi_collisions
= 0; /* collisions on csma if */
1231 if_data
->ifi_ibytes
= 0; /* total number received */
1232 if_data
->ifi_obytes
= 0; /* total number sent */
1233 if_data
->ifi_imcasts
= 0; /* multicast packets received */
1234 if_data
->ifi_omcasts
= 0; /* multicast packets sent */
1235 if_data
->ifi_iqdrops
= 0; /* dropped on input */
1236 if_data
->ifi_noproto
= 0; /* destined for unsupported */
1241 * Set the metrics on a forwarding table route.
1244 rts_setmetrics(ire_t
*ire
, uint_t which
, rt_metrics_t
*metrics
)
1251 in6_addr_t gw_addr_v6
;
1253 /* Need to add back some metrics to the IRE? */
1255 * Bypass obtaining the lock and searching ill_saved_ire_mp in the
1256 * common case of no metrics.
1260 ire
->ire_metrics
.iulp_set
= B_TRUE
;
1263 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1264 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1267 if (which
& RTV_RTT
)
1268 rtt
= metrics
->rmx_rtt
/ 1000;
1269 if (which
& RTV_RTTVAR
)
1270 rtt_sd
= metrics
->rmx_rttvar
/ 1000;
1273 * Update the metrics in the IRE itself.
1275 mutex_enter(&ire
->ire_lock
);
1276 if (which
& RTV_MTU
)
1277 ire
->ire_metrics
.iulp_mtu
= metrics
->rmx_mtu
;
1278 if (which
& RTV_RTT
)
1279 ire
->ire_metrics
.iulp_rtt
= rtt
;
1280 if (which
& RTV_SSTHRESH
)
1281 ire
->ire_metrics
.iulp_ssthresh
= metrics
->rmx_ssthresh
;
1282 if (which
& RTV_RTTVAR
)
1283 ire
->ire_metrics
.iulp_rtt_sd
= rtt_sd
;
1284 if (which
& RTV_SPIPE
)
1285 ire
->ire_metrics
.iulp_spipe
= metrics
->rmx_sendpipe
;
1286 if (which
& RTV_RPIPE
)
1287 ire
->ire_metrics
.iulp_rpipe
= metrics
->rmx_recvpipe
;
1288 mutex_exit(&ire
->ire_lock
);
1291 * Search through the ifrt_t chain hanging off the ILL in order to
1292 * reflect the metric change there.
1297 ASSERT((ill
->ill_isv6
&& ire
->ire_ipversion
== IPV6_VERSION
) ||
1298 ((!ill
->ill_isv6
&& ire
->ire_ipversion
== IPV4_VERSION
)));
1299 if (ill
->ill_isv6
) {
1300 mutex_enter(&ire
->ire_lock
);
1301 gw_addr_v6
= ire
->ire_gateway_addr_v6
;
1302 mutex_exit(&ire
->ire_lock
);
1304 mutex_enter(&ill
->ill_saved_ire_lock
);
1305 for (mp
= ill
->ill_saved_ire_mp
; mp
!= NULL
; mp
= mp
->b_cont
) {
1307 * On a given ill, the tuple of address, gateway, mask,
1308 * ire_type and zoneid unique for each saved IRE.
1310 ifrt
= (ifrt_t
*)mp
->b_rptr
;
1311 if (ill
->ill_isv6
) {
1312 if (!IN6_ARE_ADDR_EQUAL(&ifrt
->ifrt_v6addr
,
1313 &ire
->ire_addr_v6
) ||
1314 !IN6_ARE_ADDR_EQUAL(&ifrt
->ifrt_v6gateway_addr
,
1316 !IN6_ARE_ADDR_EQUAL(&ifrt
->ifrt_v6mask
,
1320 if (ifrt
->ifrt_addr
!= ire
->ire_addr
||
1321 ifrt
->ifrt_gateway_addr
!= ire
->ire_gateway_addr
||
1322 ifrt
->ifrt_mask
!= ire
->ire_mask
)
1325 if (ifrt
->ifrt_zoneid
!= ire
->ire_zoneid
||
1326 ifrt
->ifrt_type
!= ire
->ire_type
)
1329 if (which
& RTV_MTU
)
1330 ifrt
->ifrt_metrics
.iulp_mtu
= metrics
->rmx_mtu
;
1331 if (which
& RTV_RTT
)
1332 ifrt
->ifrt_metrics
.iulp_rtt
= rtt
;
1333 if (which
& RTV_SSTHRESH
) {
1334 ifrt
->ifrt_metrics
.iulp_ssthresh
=
1335 metrics
->rmx_ssthresh
;
1337 if (which
& RTV_RTTVAR
)
1338 ifrt
->ifrt_metrics
.iulp_rtt_sd
= metrics
->rmx_rttvar
;
1339 if (which
& RTV_SPIPE
)
1340 ifrt
->ifrt_metrics
.iulp_spipe
= metrics
->rmx_sendpipe
;
1341 if (which
& RTV_RPIPE
)
1342 ifrt
->ifrt_metrics
.iulp_rpipe
= metrics
->rmx_recvpipe
;
1345 mutex_exit(&ill
->ill_saved_ire_lock
);
1348 * Update any IRE_IF_CLONE hanging created from this IRE_IF so they
1349 * get any new iulp_mtu.
1350 * We do that by deleting them; ire_create_if_clone will pick
1351 * up the new metrics.
1353 if ((ire
->ire_type
& IRE_INTERFACE
) && ire
->ire_dep_children
!= 0)
1354 ire_dep_delete_if_clone(ire
);
1358 * Get the metrics from a forwarding table route.
1361 rts_getmetrics(ire_t
*ire
, ill_t
*ill
, rt_metrics_t
*metrics
)
1363 int metrics_set
= 0;
1365 bzero(metrics
, sizeof (rt_metrics_t
));
1368 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1369 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1372 metrics
->rmx_rtt
= ire
->ire_metrics
.iulp_rtt
* 1000;
1373 metrics_set
|= RTV_RTT
;
1374 if (ire
->ire_metrics
.iulp_mtu
!= 0) {
1375 metrics
->rmx_mtu
= ire
->ire_metrics
.iulp_mtu
;
1376 metrics_set
|= RTV_MTU
;
1377 } else if (ill
!= NULL
) {
1378 metrics
->rmx_mtu
= ill
->ill_mtu
;
1379 metrics_set
|= RTV_MTU
;
1381 metrics
->rmx_ssthresh
= ire
->ire_metrics
.iulp_ssthresh
;
1382 metrics_set
|= RTV_SSTHRESH
;
1383 metrics
->rmx_rttvar
= ire
->ire_metrics
.iulp_rtt_sd
* 1000;
1384 metrics_set
|= RTV_RTTVAR
;
1385 metrics
->rmx_sendpipe
= ire
->ire_metrics
.iulp_spipe
;
1386 metrics_set
|= RTV_SPIPE
;
1387 metrics
->rmx_recvpipe
= ire
->ire_metrics
.iulp_rpipe
;
1388 metrics_set
|= RTV_RPIPE
;
1389 return (metrics_set
);
1393 * Given two sets of metrics (src and dst), use the dst values if they are
1394 * set. If a dst value is not set but the src value is set, then we use
1396 * dst is updated with the new values.
1397 * This is used to merge information from a dce_t and ire_metrics, where the
1398 * dce values takes precedence.
1401 rts_merge_metrics(iulp_t
*dst
, const iulp_t
*src
)
1406 if (dst
->iulp_ssthresh
== 0)
1407 dst
->iulp_ssthresh
= src
->iulp_ssthresh
;
1408 if (dst
->iulp_rtt
== 0)
1409 dst
->iulp_rtt
= src
->iulp_rtt
;
1410 if (dst
->iulp_rtt_sd
== 0)
1411 dst
->iulp_rtt_sd
= src
->iulp_rtt_sd
;
1412 if (dst
->iulp_spipe
== 0)
1413 dst
->iulp_spipe
= src
->iulp_spipe
;
1414 if (dst
->iulp_rpipe
== 0)
1415 dst
->iulp_rpipe
= src
->iulp_rpipe
;
1416 if (dst
->iulp_rtomax
== 0)
1417 dst
->iulp_rtomax
= src
->iulp_rtomax
;
1418 if (dst
->iulp_sack
== 0)
1419 dst
->iulp_sack
= src
->iulp_sack
;
1420 if (dst
->iulp_tstamp_ok
== 0)
1421 dst
->iulp_tstamp_ok
= src
->iulp_tstamp_ok
;
1422 if (dst
->iulp_wscale_ok
== 0)
1423 dst
->iulp_wscale_ok
= src
->iulp_wscale_ok
;
1424 if (dst
->iulp_ecn_ok
== 0)
1425 dst
->iulp_ecn_ok
= src
->iulp_ecn_ok
;
1426 if (dst
->iulp_pmtud_ok
== 0)
1427 dst
->iulp_pmtud_ok
= src
->iulp_pmtud_ok
;
1428 if (dst
->iulp_mtu
== 0)
1429 dst
->iulp_mtu
= src
->iulp_mtu
;
1434 * Takes a pointer to a routing message and extracts necessary info by looking
1435 * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1436 * passed (all of which must be valid).
1438 * The bitmask of sockaddrs actually found in the message is returned, or zero
1439 * is returned in the case of an error.
1442 rts_getaddrs(rt_msghdr_t
*rtm
, in6_addr_t
*dst_addrp
, in6_addr_t
*gw_addrp
,
1443 in6_addr_t
*net_maskp
, in6_addr_t
*authorp
, in6_addr_t
*if_addrp
,
1444 in6_addr_t
*in_src_addrp
, ushort_t
*indexp
, sa_family_t
*afp
, int *error
)
1446 struct sockaddr
*sa
;
1450 int found_addrs
= 0;
1453 struct sockaddr_dl
*sdl
;
1455 *dst_addrp
= ipv6_all_zeros
;
1456 *gw_addrp
= ipv6_all_zeros
;
1457 *net_maskp
= ipv6_all_zeros
;
1458 *authorp
= ipv6_all_zeros
;
1459 *if_addrp
= ipv6_all_zeros
;
1460 *in_src_addrp
= ipv6_all_zeros
;
1466 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1467 * RTA_IFA and RTA_AUTHOR. The rest will be added as we need them.
1469 cp
= (caddr_t
)&rtm
[1];
1470 length
= rtm
->rtm_msglen
;
1471 for (i
= 0; (i
< RTA_NUMBITS
) && ((cp
- (caddr_t
)rtm
) < length
); i
++) {
1473 * The address family we are working with starts out as
1474 * AF_UNSPEC, but is set to the one specified with the
1475 * destination address.
1477 * If the "working" address family that has been set to
1478 * something other than AF_UNSPEC, then the address family of
1479 * subsequent sockaddrs must either be AF_UNSPEC (for
1480 * compatibility with older programs) or must be the same as our
1483 * This code assumes that RTA_DST (1) comes first in the loop.
1485 sa
= (struct sockaddr
*)cp
;
1486 addr_bits
= (rtm
->rtm_addrs
& (1 << i
));
1489 switch (addr_bits
) {
1491 size
= rts_copyfromsockaddr(sa
, dst_addrp
);
1492 *afp
= sa
->sa_family
;
1495 if (sa
->sa_family
!= *afp
&& sa
->sa_family
!= AF_UNSPEC
)
1497 size
= rts_copyfromsockaddr(sa
, gw_addrp
);
1500 if (sa
->sa_family
!= *afp
&& sa
->sa_family
!= AF_UNSPEC
)
1502 size
= rts_copyfromsockaddr(sa
, net_maskp
);
1505 if (sa
->sa_family
!= AF_LINK
&&
1506 sa
->sa_family
!= AF_UNSPEC
)
1508 sdl
= (struct sockaddr_dl
*)cp
;
1509 *indexp
= sdl
->sdl_index
;
1510 size
= sizeof (struct sockaddr_dl
);
1513 /* Source address of the incoming packet */
1514 size
= rts_copyfromsockaddr(sa
, in_src_addrp
);
1515 *afp
= sa
->sa_family
;
1518 if (sa
->sa_family
!= *afp
&& sa
->sa_family
!= AF_UNSPEC
)
1520 size
= rts_copyfromsockaddr(sa
, if_addrp
);
1523 if (sa
->sa_family
!= *afp
&& sa
->sa_family
!= AF_UNSPEC
)
1525 size
= rts_copyfromsockaddr(sa
, authorp
);
1533 found_addrs
|= addr_bits
;
1536 return (found_addrs
);
1540 * Fills the message with the given info.
1543 rts_fill_msg(int type
, int rtm_addrs
, ipaddr_t dst
, ipaddr_t mask
,
1544 ipaddr_t gateway
, ipaddr_t src_addr
, ipaddr_t brd_addr
, ipaddr_t author
,
1545 ipaddr_t ifaddr
, const ill_t
*ill
, mblk_t
*mp
)
1549 size_t data_size
, header_size
;
1555 * First find the type of the message
1558 header_size
= rts_header_msg_size(type
);
1560 * Now find the size of the data
1561 * that follows the message header.
1563 data_size
= rts_data_msg_size(rtm_addrs
, AF_INET
);
1565 rtm
= (rt_msghdr_t
*)mp
->b_rptr
;
1566 mp
->b_wptr
= &mp
->b_rptr
[header_size
];
1568 bzero(cp
, data_size
);
1569 for (i
= 0; i
< RTA_NUMBITS
; i
++) {
1571 switch (rtm_addrs
& (1 << i
)) {
1573 sin
->sin_addr
.s_addr
= dst
;
1574 sin
->sin_family
= AF_INET
;
1575 cp
+= sizeof (sin_t
);
1578 sin
->sin_addr
.s_addr
= gateway
;
1579 sin
->sin_family
= AF_INET
;
1580 cp
+= sizeof (sin_t
);
1583 sin
->sin_addr
.s_addr
= mask
;
1584 sin
->sin_family
= AF_INET
;
1585 cp
+= sizeof (sin_t
);
1588 cp
+= ill_dls_info((struct sockaddr_dl
*)cp
, ill
);
1591 sin
->sin_addr
.s_addr
= ifaddr
;
1592 sin
->sin_family
= AF_INET
;
1593 cp
+= sizeof (sin_t
);
1596 sin
->sin_addr
.s_addr
= src_addr
;
1597 sin
->sin_family
= AF_INET
;
1598 cp
+= sizeof (sin_t
);
1601 sin
->sin_addr
.s_addr
= author
;
1602 sin
->sin_family
= AF_INET
;
1603 cp
+= sizeof (sin_t
);
1607 * RTA_BRD is used typically to specify a point-to-point
1608 * destination address.
1610 sin
->sin_addr
.s_addr
= brd_addr
;
1611 sin
->sin_family
= AF_INET
;
1612 cp
+= sizeof (sin_t
);
1620 * set the fields that are common to
1621 * to different messages.
1623 rtm
->rtm_msglen
= (short)(header_size
+ data_size
);
1624 rtm
->rtm_version
= RTM_VERSION
;
1625 rtm
->rtm_type
= (uchar_t
)type
;
1629 * Allocates and initializes a routing socket message.
1632 rts_alloc_msg(int type
, int rtm_addrs
, sa_family_t af
)
1637 length
= RTS_MSG_SIZE(type
, rtm_addrs
, af
);
1638 mp
= allocb(length
, BPRI_MED
);
1641 bzero(mp
->b_rptr
, length
);
1646 * Returns the size of the routing
1647 * socket message header size.
1650 rts_header_msg_size(int type
)
1657 return (sizeof (ifa_msghdr_t
));
1659 return (sizeof (if_msghdr_t
));
1661 return (sizeof (rt_msghdr_t
));
1666 * Returns the size of the message needed with the given rtm_addrs and family.
1668 * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1669 * of the same family (currently either AF_INET or AF_INET6).
1672 rts_data_msg_size(int rtm_addrs
, sa_family_t af
)
1677 for (i
= 0; i
< RTA_NUMBITS
; i
++) {
1678 switch (rtm_addrs
& (1 << i
)) {
1680 length
+= sizeof (struct sockaddr_dl
);
1689 ASSERT(af
== AF_INET
|| af
== AF_INET6
);
1692 length
+= sizeof (sin_t
);
1695 length
+= sizeof (sin6_t
);
1706 * This routine is called to generate a message to the routing
1707 * socket indicating that a redirect has occured, a routing lookup
1708 * has failed, or that a protocol has detected timeouts to a particular
1709 * destination. This routine is called for message types RTM_LOSING,
1710 * RTM_REDIRECT, and RTM_MISS.
1713 ip_rts_change(int type
, ipaddr_t dst_addr
, ipaddr_t gw_addr
, ipaddr_t net_mask
,
1714 ipaddr_t source
, ipaddr_t author
, int flags
, int error
, int rtm_addrs
,
1722 mp
= rts_alloc_msg(type
, rtm_addrs
, AF_INET
);
1725 rts_fill_msg(type
, rtm_addrs
, dst_addr
, net_mask
, gw_addr
, source
, 0,
1726 author
, 0, NULL
, mp
);
1727 rtm
= (rt_msghdr_t
*)mp
->b_rptr
;
1728 rtm
->rtm_flags
= flags
;
1729 rtm
->rtm_errno
= error
;
1730 rtm
->rtm_flags
|= RTF_DONE
;
1731 rtm
->rtm_addrs
= rtm_addrs
;
1732 rts_queue_input(mp
, NULL
, AF_INET
, RTSQ_ALL
, ipst
);
1736 * This routine is called to generate a message to the routing
1737 * socket indicating that the status of a network interface has changed.
1738 * Message type generated RTM_IFINFO.
1741 ip_rts_ifmsg(const ipif_t
*ipif
, uint_t flags
)
1743 ip_rts_xifmsg(ipif
, 0, 0, flags
);
1747 ip_rts_xifmsg(const ipif_t
*ipif
, uint64_t set
, uint64_t clear
, uint_t flags
)
1752 ip_stack_t
*ipst
= ipif
->ipif_ill
->ill_ipst
;
1755 * This message should be generated only
1756 * when the physical device is changing
1759 if (ipif
->ipif_id
!= 0)
1761 if (ipif
->ipif_isv6
) {
1763 mp
= rts_alloc_msg(RTM_IFINFO
, RTA_IFP
, af
);
1766 rts_fill_msg_v6(RTM_IFINFO
, RTA_IFP
, &ipv6_all_zeros
,
1767 &ipv6_all_zeros
, &ipv6_all_zeros
, &ipv6_all_zeros
,
1768 &ipv6_all_zeros
, &ipv6_all_zeros
, &ipv6_all_zeros
,
1769 ipif
->ipif_ill
, mp
);
1772 mp
= rts_alloc_msg(RTM_IFINFO
, RTA_IFP
, af
);
1775 rts_fill_msg(RTM_IFINFO
, RTA_IFP
, 0, 0, 0, 0, 0, 0, 0,
1776 ipif
->ipif_ill
, mp
);
1778 ifm
= (if_msghdr_t
*)mp
->b_rptr
;
1779 ifm
->ifm_index
= ipif
->ipif_ill
->ill_phyint
->phyint_ifindex
;
1780 ifm
->ifm_flags
= (ipif
->ipif_flags
| ipif
->ipif_ill
->ill_flags
|
1781 ipif
->ipif_ill
->ill_phyint
->phyint_flags
| set
) & ~clear
;
1782 rts_getifdata(&ifm
->ifm_data
, ipif
);
1783 ifm
->ifm_addrs
= RTA_IFP
;
1785 if (flags
& RTSQ_DEFAULT
) {
1788 * If this message is for an underlying interface, prevent
1789 * "normal" (IPMP-unaware) routing sockets from seeing it.
1791 if (IS_UNDER_IPMP(ipif
->ipif_ill
))
1792 flags
&= ~RTSQ_NORMAL
;
1795 rts_queue_input(mp
, NULL
, af
, flags
, ipst
);
1799 * If cmd is RTM_ADD or RTM_DELETE, generate the rt_msghdr_t message;
1800 * otherwise (RTM_NEWADDR, RTM_DELADDR, RTM_CHGADDR and RTM_FREEADDR)
1801 * generate the ifa_msghdr_t message.
1804 rts_new_rtsmsg(int cmd
, int error
, const ipif_t
*ipif
, uint_t flags
)
1811 ip_stack_t
*ipst
= ipif
->ipif_ill
->ill_ipst
;
1814 * Do not report unspecified address if this is the RTM_CHGADDR or
1815 * RTM_FREEADDR message.
1817 if (cmd
== RTM_CHGADDR
|| cmd
== RTM_FREEADDR
) {
1818 if (!ipif
->ipif_isv6
) {
1819 if (ipif
->ipif_lcl_addr
== INADDR_ANY
)
1821 } else if (IN6_IS_ADDR_UNSPECIFIED(&ipif
->ipif_v6lcl_addr
)) {
1826 if (ipif
->ipif_isv6
)
1831 if (cmd
== RTM_ADD
|| cmd
== RTM_DELETE
)
1832 rtm_addrs
= (RTA_DST
| RTA_NETMASK
);
1834 rtm_addrs
= (RTA_IFA
| RTA_NETMASK
| RTA_BRD
| RTA_IFP
);
1836 mp
= rts_alloc_msg(cmd
, rtm_addrs
, af
);
1840 if (cmd
!= RTM_ADD
&& cmd
!= RTM_DELETE
) {
1843 rts_fill_msg(cmd
, rtm_addrs
, 0,
1844 ipif
->ipif_net_mask
, 0, ipif
->ipif_lcl_addr
,
1845 ipif
->ipif_pp_dst_addr
, 0,
1846 ipif
->ipif_lcl_addr
, ipif
->ipif_ill
,
1850 rts_fill_msg_v6(cmd
, rtm_addrs
,
1851 &ipv6_all_zeros
, &ipif
->ipif_v6net_mask
,
1852 &ipv6_all_zeros
, &ipif
->ipif_v6lcl_addr
,
1853 &ipif
->ipif_v6pp_dst_addr
, &ipv6_all_zeros
,
1854 &ipif
->ipif_v6lcl_addr
, ipif
->ipif_ill
,
1858 ifam
= (ifa_msghdr_t
*)mp
->b_rptr
;
1860 ipif
->ipif_ill
->ill_phyint
->phyint_ifindex
;
1861 ifam
->ifam_metric
= ipif
->ipif_ill
->ill_metric
;
1862 ifam
->ifam_flags
= ((cmd
== RTM_NEWADDR
) ? RTF_UP
: 0);
1863 ifam
->ifam_addrs
= rtm_addrs
;
1867 rts_fill_msg(cmd
, rtm_addrs
,
1868 ipif
->ipif_lcl_addr
, ipif
->ipif_net_mask
, 0,
1869 0, 0, 0, 0, NULL
, mp
);
1872 rts_fill_msg_v6(cmd
, rtm_addrs
,
1873 &ipif
->ipif_v6lcl_addr
,
1874 &ipif
->ipif_v6net_mask
, &ipv6_all_zeros
,
1875 &ipv6_all_zeros
, &ipv6_all_zeros
,
1876 &ipv6_all_zeros
, &ipv6_all_zeros
,
1880 rtm
= (rt_msghdr_t
*)mp
->b_rptr
;
1882 ipif
->ipif_ill
->ill_phyint
->phyint_ifindex
;
1883 rtm
->rtm_flags
= ((cmd
== RTM_ADD
) ? RTF_UP
: 0);
1884 rtm
->rtm_errno
= error
;
1886 rtm
->rtm_flags
|= RTF_DONE
;
1887 rtm
->rtm_addrs
= rtm_addrs
;
1889 rts_queue_input(mp
, NULL
, af
, flags
, ipst
);
1893 * This is called to generate messages to the routing socket
1894 * indicating a network interface has had addresses associated with it.
1895 * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1898 ip_rts_newaddrmsg(int cmd
, int error
, const ipif_t
*ipif
, uint_t flags
)
1900 ip_stack_t
*ipst
= ipif
->ipif_ill
->ill_ipst
;
1902 if (flags
& RTSQ_DEFAULT
) {
1905 * If this message is for an underlying interface, prevent
1906 * "normal" (IPMP-unaware) routing sockets from seeing it.
1908 if (IS_UNDER_IPMP(ipif
->ipif_ill
))
1909 flags
&= ~RTSQ_NORMAL
;
1913 * Let conn_ixa caching know that source address selection
1916 if (cmd
== RTM_ADD
|| cmd
== RTM_DELETE
)
1917 ip_update_source_selection(ipst
);
1920 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1921 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1922 * otherwise simply send the request.
1926 rts_new_rtsmsg(RTM_NEWADDR
, error
, ipif
, flags
);
1927 rts_new_rtsmsg(RTM_ADD
, error
, ipif
, flags
);
1930 rts_new_rtsmsg(RTM_DELETE
, error
, ipif
, flags
);
1931 rts_new_rtsmsg(RTM_DELADDR
, error
, ipif
, flags
);
1934 rts_new_rtsmsg(cmd
, error
, ipif
, flags
);
1940 * Based on the address family specified in a sockaddr, copy the address field
1941 * into an in6_addr_t.
1943 * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
1944 * compatibility with programs that leave the family cleared in the sockaddr.
1945 * Callers of rts_copyfromsockaddr should check the family themselves if they
1946 * wish to verify its value.
1948 * In the case of AF_INET6, a check is made to ensure that address is not an
1949 * IPv4-mapped address.
1952 rts_copyfromsockaddr(struct sockaddr
*sa
, in6_addr_t
*addrp
)
1954 switch (sa
->sa_family
) {
1957 IN6_IPADDR_TO_V4MAPPED(((sin_t
*)sa
)->sin_addr
.s_addr
, addrp
);
1958 return (sizeof (sin_t
));
1960 *addrp
= ((sin6_t
*)sa
)->sin6_addr
;
1961 if (IN6_IS_ADDR_V4MAPPED(addrp
))
1963 return (sizeof (sin6_t
));