4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Copyright 2017 Sebastian Wiedenroth
24 * Use is subject to license terms.
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/ksynch.h>
31 #include <sys/errno.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/cmn_err.h>
35 #include <sys/strsun.h>
37 #include <netinet/in.h>
38 #include <inet/common.h>
41 #include <inet/ip6_asp.h>
42 #include <inet/ip_ire.h>
43 #include <inet/ip_if.h>
44 #include <inet/ipclassifier.h>
46 #define IN6ADDR_MASK128_INIT \
47 IN6ADDR_INITIALIZER(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu)
48 #define IN6ADDR_MASK96_INIT \
49 IN6ADDR_INITIALIZER(0xffffffffu, 0xffffffffu, 0xffffffffu, 0)
50 #define IN6ADDR_MASK32_INIT \
51 IN6ADDR_INITIALIZER(0xffffffffu, 0, 0, 0)
52 #define IN6ADDR_MASK16_INIT \
53 IN6ADDR_INITIALIZER(0xffff0000u, 0, 0, 0)
54 #define IN6ADDR_MASK10_INIT \
55 IN6ADDR_INITIALIZER(0xffc00000u, 0, 0, 0)
56 #define IN6ADDR_MASK7_INIT \
57 IN6ADDR_INITIALIZER(0xfe000000u, 0, 0, 0)
60 * This table is ordered such that longest prefix matches are hit first
61 * (longer prefix lengths first). The last entry must be the "default"
64 static ip6_asp_t default_ip6_asp_table
[] = {
65 { IN6ADDR_LOOPBACK_INIT
, IN6ADDR_MASK128_INIT
,
67 { IN6ADDR_ANY_INIT
, IN6ADDR_MASK96_INIT
,
68 "IPv4_Compatible", 1 },
69 { IN6ADDR_INITIALIZER(0, 0, 0x0000ffffu
, 0), IN6ADDR_MASK96_INIT
,
71 { IN6ADDR_INITIALIZER(0x20010000U
, 0, 0, 0), IN6ADDR_MASK32_INIT
,
73 { IN6ADDR_INITIALIZER(0x20020000u
, 0, 0, 0), IN6ADDR_MASK16_INIT
,
75 { IN6ADDR_INITIALIZER(0x3ffe0000U
, 0, 0, 0), IN6ADDR_MASK16_INIT
,
77 { IN6ADDR_INITIALIZER(0xfec00000U
, 0, 0, 0), IN6ADDR_MASK10_INIT
,
79 { IN6ADDR_INITIALIZER(0xfc000000U
, 0, 0, 0), IN6ADDR_MASK7_INIT
,
81 { IN6ADDR_ANY_INIT
, IN6ADDR_ANY_INIT
,
86 * The IPv6 Default Address Selection policy table.
87 * Until someone up above reconfigures the policy table, use the global
88 * default. The table needs no lock since the only way to alter it is
89 * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
91 static void ip6_asp_copy(ip6_asp_t
*, ip6_asp_t
*, uint_t
);
92 static void ip6_asp_check_for_updates(ip_stack_t
*);
95 ip6_asp_init(ip_stack_t
*ipst
)
97 /* Initialize the table lock */
98 mutex_init(&ipst
->ips_ip6_asp_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
100 ipst
->ips_ip6_asp_table
= default_ip6_asp_table
;
102 ipst
->ips_ip6_asp_table_count
=
103 sizeof (default_ip6_asp_table
) / sizeof (ip6_asp_t
);
107 ip6_asp_free(ip_stack_t
*ipst
)
109 if (ipst
->ips_ip6_asp_table
!= default_ip6_asp_table
) {
110 kmem_free(ipst
->ips_ip6_asp_table
,
111 ipst
->ips_ip6_asp_table_count
* sizeof (ip6_asp_t
));
112 ipst
->ips_ip6_asp_table
= NULL
;
114 mutex_destroy(&ipst
->ips_ip6_asp_lock
);
118 * Return false if the table is being updated. Else, increment the ref
119 * count and return true.
122 ip6_asp_can_lookup(ip_stack_t
*ipst
)
124 mutex_enter(&ipst
->ips_ip6_asp_lock
);
125 if (ipst
->ips_ip6_asp_uip
) {
126 mutex_exit(&ipst
->ips_ip6_asp_lock
);
129 IP6_ASP_TABLE_REFHOLD(ipst
);
130 mutex_exit(&ipst
->ips_ip6_asp_lock
);
136 ip6_asp_pending_op(queue_t
*q
, mblk_t
*mp
, aspfunc_t func
)
138 conn_t
*connp
= Q_TO_CONN(q
);
139 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
141 ASSERT((mp
->b_prev
== NULL
) && (mp
->b_queue
== NULL
) &&
142 (mp
->b_next
== NULL
));
143 mp
->b_queue
= (void *)q
;
144 mp
->b_prev
= (void *)func
;
147 mutex_enter(&ipst
->ips_ip6_asp_lock
);
148 if (ipst
->ips_ip6_asp_pending_ops
== NULL
) {
149 ASSERT(ipst
->ips_ip6_asp_pending_ops_tail
== NULL
);
150 ipst
->ips_ip6_asp_pending_ops
=
151 ipst
->ips_ip6_asp_pending_ops_tail
= mp
;
153 ipst
->ips_ip6_asp_pending_ops_tail
->b_next
= mp
;
154 ipst
->ips_ip6_asp_pending_ops_tail
= mp
;
156 mutex_exit(&ipst
->ips_ip6_asp_lock
);
160 ip6_asp_complete_op(ip_stack_t
*ipst
)
166 mutex_enter(&ipst
->ips_ip6_asp_lock
);
167 while (ipst
->ips_ip6_asp_pending_ops
!= NULL
) {
168 mp
= ipst
->ips_ip6_asp_pending_ops
;
169 ipst
->ips_ip6_asp_pending_ops
= mp
->b_next
;
171 if (ipst
->ips_ip6_asp_pending_ops
== NULL
)
172 ipst
->ips_ip6_asp_pending_ops_tail
= NULL
;
173 mutex_exit(&ipst
->ips_ip6_asp_lock
);
175 q
= (queue_t
*)mp
->b_queue
;
176 func
= (aspfunc_t
)mp
->b_prev
;
182 (*func
)(NULL
, q
, mp
, NULL
);
183 mutex_enter(&ipst
->ips_ip6_asp_lock
);
185 mutex_exit(&ipst
->ips_ip6_asp_lock
);
189 * Decrement reference count. When it gets to 0, we check for (pending)
190 * saved update to the table, if any.
193 ip6_asp_table_refrele(ip_stack_t
*ipst
)
195 IP6_ASP_TABLE_REFRELE(ipst
);
199 * This function is guaranteed never to return a NULL pointer. It
200 * will always return information from one of the entries in the
201 * asp_table (which will never be empty). If a pointer is passed
202 * in for the precedence, the precedence value will be set; a
203 * pointer to the label will be returned by the function.
205 * Since the table is only anticipated to have about 10 entries
206 * total, the lookup algorithm hasn't been optimized to anything
210 ip6_asp_lookup(const in6_addr_t
*addr
, uint32_t *precedence
, ip_stack_t
*ipst
)
213 ip6_asp_t
*match
= NULL
;
214 ip6_asp_t
*default_policy
;
216 aspp
= ipst
->ips_ip6_asp_table
;
217 /* The default entry must always be the last one */
218 default_policy
= aspp
+ ipst
->ips_ip6_asp_table_count
- 1;
220 while (match
== NULL
) {
221 if (aspp
== default_policy
) {
224 if (V6_MASK_EQ(*addr
, aspp
->ip6_asp_mask
,
225 aspp
->ip6_asp_prefix
))
232 if (precedence
!= NULL
)
233 *precedence
= match
->ip6_asp_precedence
;
234 return (match
->ip6_asp_label
);
238 * If we had deferred updating the table because of outstanding references,
239 * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
240 * ip_sioctl_ip6addrpolicy() has already done it for us.
243 ip6_asp_check_for_updates(ip_stack_t
*ipst
)
247 mblk_t
*data_mp
, *mp
;
250 mutex_enter(&ipst
->ips_ip6_asp_lock
);
251 if (ipst
->ips_ip6_asp_pending_update
== NULL
||
252 ipst
->ips_ip6_asp_refcnt
> 0) {
253 mutex_exit(&ipst
->ips_ip6_asp_lock
);
257 mp
= ipst
->ips_ip6_asp_pending_update
;
258 ipst
->ips_ip6_asp_pending_update
= NULL
;
259 ASSERT(mp
->b_prev
!= NULL
);
261 ipst
->ips_ip6_asp_uip
= B_TRUE
;
263 iocp
= (struct iocblk
*)mp
->b_rptr
;
264 data_mp
= mp
->b_cont
;
265 if (data_mp
== NULL
) {
267 table_size
= iocp
->ioc_count
;
269 table
= (ip6_asp_t
*)data_mp
->b_rptr
;
270 table_size
= iocp
->ioc_count
;
273 ip6_asp_replace(mp
, table
, table_size
, B_TRUE
, ipst
,
274 iocp
->ioc_flag
& IOC_MODELS
);
278 * ip6_asp_replace replaces the contents of the IPv6 address selection
279 * policy table with those specified in new_table. If new_table is NULL,
280 * this indicates that the caller wishes ip to use the default policy
281 * table. The caller is responsible for making sure that there are exactly
282 * new_count policy entries in new_table.
286 ip6_asp_replace(mblk_t
*mp
, ip6_asp_t
*new_table
, size_t new_size
,
287 boolean_t locked
, ip_stack_t
*ipst
, model_t datamodel
)
290 ip6_asp_t
*tmp_table
;
294 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
295 size_t ip6_asp_size
= SIZEOF_STRUCT(ip6_asp
, datamodel
);
297 const size_t ip6_asp_size
= sizeof (ip6_asp_t
);
300 if (new_size
% ip6_asp_size
!= 0) {
301 ip1dbg(("ip6_asp_replace: invalid table size\n"));
307 count
= new_size
/ ip6_asp_size
;
312 mutex_enter(&ipst
->ips_ip6_asp_lock
);
314 * Check if we are in the process of creating any IRE using the
315 * current information. If so, wait till that is done.
317 if (!locked
&& ipst
->ips_ip6_asp_refcnt
> 0) {
318 /* Save this request for later processing */
319 if (ipst
->ips_ip6_asp_pending_update
== NULL
) {
320 ipst
->ips_ip6_asp_pending_update
= mp
;
322 /* Let's not queue multiple requests for now */
323 ip1dbg(("ip6_asp_replace: discarding request\n"));
324 mutex_exit(&ipst
->ips_ip6_asp_lock
);
328 mutex_exit(&ipst
->ips_ip6_asp_lock
);
332 /* Prevent lookups till the table have been updated */
334 ipst
->ips_ip6_asp_uip
= B_TRUE
;
336 ASSERT(ipst
->ips_ip6_asp_refcnt
== 0);
338 if (new_table
== NULL
) {
340 * This is a special case. The user wants to revert
341 * back to using the default table.
343 if (ipst
->ips_ip6_asp_table
== default_ip6_asp_table
)
346 kmem_free(ipst
->ips_ip6_asp_table
,
347 ipst
->ips_ip6_asp_table_count
* sizeof (ip6_asp_t
));
348 ipst
->ips_ip6_asp_table
= default_ip6_asp_table
;
349 ipst
->ips_ip6_asp_table_count
=
350 sizeof (default_ip6_asp_table
) / sizeof (ip6_asp_t
);
356 ip1dbg(("ip6_asp_replace: empty table\n"));
360 if ((tmp_table
= kmem_alloc(count
* sizeof (ip6_asp_t
), KM_NOSLEEP
)) ==
366 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
369 * If 'new_table' -actually- originates from a 32-bit process
370 * then the nicely aligned ip6_asp_label array will be
371 * subtlely misaligned on this kernel, because the structure
372 * is 8 byte aligned in the kernel, but only 4 byte aligned in
373 * userland. Fix it up here.
375 * XX64 See the notes in ip_sioctl_ip6addrpolicy. Perhaps we could
376 * do the datamodel transformation (below) there instead of here?
378 if (datamodel
== IOC_ILP32
) {
383 if ((dst
= kmem_zalloc(count
* sizeof (*dst
),
384 KM_NOSLEEP
)) == NULL
) {
385 kmem_free(tmp_table
, count
* sizeof (ip6_asp_t
));
391 * Copy each element of the table from ip6_asp32_t
392 * format into ip6_asp_t format. Fortunately, since
393 * we're just dealing with a trailing structure pad,
394 * we can do this straightforwardly with a flurry of
397 src
= (void *)new_table
;
398 for (i
= 0; i
< count
; i
++)
399 bcopy(src
+ i
, dst
+ i
, sizeof (*src
));
401 ip6_asp_copy(dst
, tmp_table
, count
);
402 kmem_free(dst
, count
* sizeof (*dst
));
405 ip6_asp_copy(new_table
, tmp_table
, count
);
407 /* Make sure the last entry is the default entry */
408 if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table
[count
- 1].ip6_asp_prefix
) ||
409 !IN6_IS_ADDR_UNSPECIFIED(&tmp_table
[count
- 1].ip6_asp_mask
)) {
411 kmem_free(tmp_table
, count
* sizeof (ip6_asp_t
));
412 ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
415 if (ipst
->ips_ip6_asp_table
!= default_ip6_asp_table
) {
416 kmem_free(ipst
->ips_ip6_asp_table
,
417 ipst
->ips_ip6_asp_table_count
* sizeof (ip6_asp_t
));
419 ipst
->ips_ip6_asp_table
= tmp_table
;
420 ipst
->ips_ip6_asp_table_count
= count
;
423 ipst
->ips_ip6_asp_uip
= B_FALSE
;
424 mutex_exit(&ipst
->ips_ip6_asp_lock
);
426 /* Let conn_ixa caching know that source address selection changed */
427 ip_update_source_selection(ipst
);
430 /* Reply to the ioctl */
431 q
= (queue_t
*)mp
->b_prev
;
437 iocp
= (struct iocblk
*)mp
->b_rptr
;
438 iocp
->ioc_error
= ret_val
;
440 DB_TYPE(mp
) = (iocp
->ioc_error
== 0) ? M_IOCACK
: M_IOCNAK
;
443 ip6_asp_complete_op(ipst
);
447 * Copies the contents of src_table to dst_table, and sorts the
448 * entries in decending order of prefix lengths. It assumes that both
449 * tables are appropriately sized to contain count entries.
452 ip6_asp_copy(ip6_asp_t
*src_table
, ip6_asp_t
*dst_table
, uint_t count
)
454 ip6_asp_t
*src_ptr
, *src_limit
, *dst_ptr
, *dst_limit
, *dp
;
456 dst_table
[0] = src_table
[0];
461 * Sort the entries in descending order of prefix lengths.
463 * Note: this should be a small table. In 99% of cases, we
464 * expect the table to have 9 entries. In the remaining 1%
465 * of cases, we expect the table to have one or two more
468 src_limit
= src_table
+ count
;
469 dst_limit
= dst_table
+ 1;
470 for (src_ptr
= src_table
+ 1; src_ptr
!= src_limit
;
471 src_ptr
++, dst_limit
++) {
472 for (dst_ptr
= dst_table
; dst_ptr
< dst_limit
; dst_ptr
++) {
473 if (ip_mask_to_plen_v6(&src_ptr
->ip6_asp_mask
) >
474 ip_mask_to_plen_v6(&dst_ptr
->ip6_asp_mask
)) {
476 * Make room to insert the source entry
477 * before dst_ptr by shifting entries to
480 for (dp
= dst_limit
- 1; dp
>= dst_ptr
; dp
--)
490 * This function copies as many entries from ip6_asp_table as will fit
491 * into dtable. The dtable_size parameter is the size of dtable
492 * in bytes. This function returns the number of entries in
493 * ip6_asp_table, even if it's not able to fit all of the entries into
497 ip6_asp_get(ip6_asp_t
*dtable
, size_t dtable_size
, ip_stack_t
*ipst
)
501 if (dtable
!= NULL
) {
502 if (dtable_size
< sizeof (ip6_asp_t
))
505 dtable_count
= dtable_size
/ sizeof (ip6_asp_t
);
506 bcopy(ipst
->ips_ip6_asp_table
, dtable
,
507 MIN(ipst
->ips_ip6_asp_table_count
, dtable_count
) *
511 return (ipst
->ips_ip6_asp_table_count
);
515 * Compare two labels. Return B_TRUE if they are equal, B_FALSE
519 ip6_asp_labelcmp(const char *label1
, const char *label2
)
521 int64_t *llptr1
, *llptr2
;
524 * The common case, the two labels are actually the same string
525 * from the policy table.
527 if (label1
== label2
)
531 * Since we know the labels are at most 16 bytes long, compare
532 * the two strings as two 8-byte long integers. The ip6_asp_t
533 * structure guarantees that the labels are 8 byte alligned.
535 llptr1
= (int64_t *)label1
;
536 llptr2
= (int64_t *)label2
;
537 if (llptr1
[0] == llptr2
[0] && llptr1
[1] == llptr2
[1])