Merge commit '4213c2d8f7d24a0383e863621115570a68fb016c'
[unleashed.git] / kernel / net / ip / ip6_asp.c
blob8a0628511b5a36c1394a8e3b2eb54586a3fb3dfe
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Copyright 2017 Sebastian Wiedenroth
24 * Use is subject to license terms.
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/errno.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/cmn_err.h>
35 #include <sys/strsun.h>
36 #include <sys/zone.h>
37 #include <netinet/in.h>
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ip6_asp.h>
42 #include <inet/ip_ire.h>
43 #include <inet/ip_if.h>
44 #include <inet/ipclassifier.h>
46 #define IN6ADDR_MASK128_INIT \
47 IN6ADDR_INITIALIZER(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu)
48 #define IN6ADDR_MASK96_INIT \
49 IN6ADDR_INITIALIZER(0xffffffffu, 0xffffffffu, 0xffffffffu, 0)
50 #define IN6ADDR_MASK32_INIT \
51 IN6ADDR_INITIALIZER(0xffffffffu, 0, 0, 0)
52 #define IN6ADDR_MASK16_INIT \
53 IN6ADDR_INITIALIZER(0xffff0000u, 0, 0, 0)
54 #define IN6ADDR_MASK10_INIT \
55 IN6ADDR_INITIALIZER(0xffc00000u, 0, 0, 0)
56 #define IN6ADDR_MASK7_INIT \
57 IN6ADDR_INITIALIZER(0xfe000000u, 0, 0, 0)
60 * This table is ordered such that longest prefix matches are hit first
61 * (longer prefix lengths first). The last entry must be the "default"
62 * entry (::0/0).
64 static ip6_asp_t default_ip6_asp_table[] = {
65 { IN6ADDR_LOOPBACK_INIT, IN6ADDR_MASK128_INIT,
66 "Loopback", 50 },
67 { IN6ADDR_ANY_INIT, IN6ADDR_MASK96_INIT,
68 "IPv4_Compatible", 1 },
69 { IN6ADDR_INITIALIZER(0, 0, 0x0000ffffu, 0), IN6ADDR_MASK96_INIT,
70 "IPv4", 35 },
71 { IN6ADDR_INITIALIZER(0x20010000U, 0, 0, 0), IN6ADDR_MASK32_INIT,
72 "Teredo", 5 },
73 { IN6ADDR_INITIALIZER(0x20020000u, 0, 0, 0), IN6ADDR_MASK16_INIT,
74 "6to4", 30 },
75 { IN6ADDR_INITIALIZER(0x3ffe0000U, 0, 0, 0), IN6ADDR_MASK16_INIT,
76 "6bone", 1 },
77 { IN6ADDR_INITIALIZER(0xfec00000U, 0, 0, 0), IN6ADDR_MASK10_INIT,
78 "Site_Local", 1 },
79 { IN6ADDR_INITIALIZER(0xfc000000U, 0, 0, 0), IN6ADDR_MASK7_INIT,
80 "ULA", 3 },
81 { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT,
82 "Default", 40 }
86 * The IPv6 Default Address Selection policy table.
87 * Until someone up above reconfigures the policy table, use the global
88 * default. The table needs no lock since the only way to alter it is
89 * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
91 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t);
92 static void ip6_asp_check_for_updates(ip_stack_t *);
94 void
95 ip6_asp_init(ip_stack_t *ipst)
97 /* Initialize the table lock */
98 mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL);
100 ipst->ips_ip6_asp_table = default_ip6_asp_table;
102 ipst->ips_ip6_asp_table_count =
103 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
106 void
107 ip6_asp_free(ip_stack_t *ipst)
109 if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
110 kmem_free(ipst->ips_ip6_asp_table,
111 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
112 ipst->ips_ip6_asp_table = NULL;
114 mutex_destroy(&ipst->ips_ip6_asp_lock);
118 * Return false if the table is being updated. Else, increment the ref
119 * count and return true.
121 boolean_t
122 ip6_asp_can_lookup(ip_stack_t *ipst)
124 mutex_enter(&ipst->ips_ip6_asp_lock);
125 if (ipst->ips_ip6_asp_uip) {
126 mutex_exit(&ipst->ips_ip6_asp_lock);
127 return (B_FALSE);
129 IP6_ASP_TABLE_REFHOLD(ipst);
130 mutex_exit(&ipst->ips_ip6_asp_lock);
131 return (B_TRUE);
135 void
136 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func)
138 conn_t *connp = Q_TO_CONN(q);
139 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
141 ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) &&
142 (mp->b_next == NULL));
143 mp->b_queue = (void *)q;
144 mp->b_prev = (void *)func;
145 mp->b_next = NULL;
147 mutex_enter(&ipst->ips_ip6_asp_lock);
148 if (ipst->ips_ip6_asp_pending_ops == NULL) {
149 ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL);
150 ipst->ips_ip6_asp_pending_ops =
151 ipst->ips_ip6_asp_pending_ops_tail = mp;
152 } else {
153 ipst->ips_ip6_asp_pending_ops_tail->b_next = mp;
154 ipst->ips_ip6_asp_pending_ops_tail = mp;
156 mutex_exit(&ipst->ips_ip6_asp_lock);
159 static void
160 ip6_asp_complete_op(ip_stack_t *ipst)
162 mblk_t *mp;
163 queue_t *q;
164 aspfunc_t func;
166 mutex_enter(&ipst->ips_ip6_asp_lock);
167 while (ipst->ips_ip6_asp_pending_ops != NULL) {
168 mp = ipst->ips_ip6_asp_pending_ops;
169 ipst->ips_ip6_asp_pending_ops = mp->b_next;
170 mp->b_next = NULL;
171 if (ipst->ips_ip6_asp_pending_ops == NULL)
172 ipst->ips_ip6_asp_pending_ops_tail = NULL;
173 mutex_exit(&ipst->ips_ip6_asp_lock);
175 q = (queue_t *)mp->b_queue;
176 func = (aspfunc_t)mp->b_prev;
178 mp->b_prev = NULL;
179 mp->b_queue = NULL;
182 (*func)(NULL, q, mp, NULL);
183 mutex_enter(&ipst->ips_ip6_asp_lock);
185 mutex_exit(&ipst->ips_ip6_asp_lock);
189 * Decrement reference count. When it gets to 0, we check for (pending)
190 * saved update to the table, if any.
192 void
193 ip6_asp_table_refrele(ip_stack_t *ipst)
195 IP6_ASP_TABLE_REFRELE(ipst);
199 * This function is guaranteed never to return a NULL pointer. It
200 * will always return information from one of the entries in the
201 * asp_table (which will never be empty). If a pointer is passed
202 * in for the precedence, the precedence value will be set; a
203 * pointer to the label will be returned by the function.
205 * Since the table is only anticipated to have about 10 entries
206 * total, the lookup algorithm hasn't been optimized to anything
207 * better than O(n).
209 char *
210 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst)
212 ip6_asp_t *aspp;
213 ip6_asp_t *match = NULL;
214 ip6_asp_t *default_policy;
216 aspp = ipst->ips_ip6_asp_table;
217 /* The default entry must always be the last one */
218 default_policy = aspp + ipst->ips_ip6_asp_table_count - 1;
220 while (match == NULL) {
221 if (aspp == default_policy) {
222 match = aspp;
223 } else {
224 if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask,
225 aspp->ip6_asp_prefix))
226 match = aspp;
227 else
228 aspp++;
232 if (precedence != NULL)
233 *precedence = match->ip6_asp_precedence;
234 return (match->ip6_asp_label);
238 * If we had deferred updating the table because of outstanding references,
239 * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
240 * ip_sioctl_ip6addrpolicy() has already done it for us.
242 void
243 ip6_asp_check_for_updates(ip_stack_t *ipst)
245 ip6_asp_t *table;
246 size_t table_size;
247 mblk_t *data_mp, *mp;
248 struct iocblk *iocp;
250 mutex_enter(&ipst->ips_ip6_asp_lock);
251 if (ipst->ips_ip6_asp_pending_update == NULL ||
252 ipst->ips_ip6_asp_refcnt > 0) {
253 mutex_exit(&ipst->ips_ip6_asp_lock);
254 return;
257 mp = ipst->ips_ip6_asp_pending_update;
258 ipst->ips_ip6_asp_pending_update = NULL;
259 ASSERT(mp->b_prev != NULL);
261 ipst->ips_ip6_asp_uip = B_TRUE;
263 iocp = (struct iocblk *)mp->b_rptr;
264 data_mp = mp->b_cont;
265 if (data_mp == NULL) {
266 table = NULL;
267 table_size = iocp->ioc_count;
268 } else {
269 table = (ip6_asp_t *)data_mp->b_rptr;
270 table_size = iocp->ioc_count;
273 ip6_asp_replace(mp, table, table_size, B_TRUE, ipst,
274 iocp->ioc_flag & IOC_MODELS);
278 * ip6_asp_replace replaces the contents of the IPv6 address selection
279 * policy table with those specified in new_table. If new_table is NULL,
280 * this indicates that the caller wishes ip to use the default policy
281 * table. The caller is responsible for making sure that there are exactly
282 * new_count policy entries in new_table.
284 /*ARGSUSED5*/
285 void
286 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size,
287 boolean_t locked, ip_stack_t *ipst, model_t datamodel)
289 int ret_val = 0;
290 ip6_asp_t *tmp_table;
291 uint_t count;
292 queue_t *q;
293 struct iocblk *iocp;
294 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
295 size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel);
296 #else
297 const size_t ip6_asp_size = sizeof (ip6_asp_t);
298 #endif
300 if (new_size % ip6_asp_size != 0) {
301 ip1dbg(("ip6_asp_replace: invalid table size\n"));
302 ret_val = EINVAL;
303 if (locked)
304 goto unlock_end;
305 goto replace_end;
306 } else {
307 count = new_size / ip6_asp_size;
311 if (!locked)
312 mutex_enter(&ipst->ips_ip6_asp_lock);
314 * Check if we are in the process of creating any IRE using the
315 * current information. If so, wait till that is done.
317 if (!locked && ipst->ips_ip6_asp_refcnt > 0) {
318 /* Save this request for later processing */
319 if (ipst->ips_ip6_asp_pending_update == NULL) {
320 ipst->ips_ip6_asp_pending_update = mp;
321 } else {
322 /* Let's not queue multiple requests for now */
323 ip1dbg(("ip6_asp_replace: discarding request\n"));
324 mutex_exit(&ipst->ips_ip6_asp_lock);
325 ret_val = EAGAIN;
326 goto replace_end;
328 mutex_exit(&ipst->ips_ip6_asp_lock);
329 return;
332 /* Prevent lookups till the table have been updated */
333 if (!locked)
334 ipst->ips_ip6_asp_uip = B_TRUE;
336 ASSERT(ipst->ips_ip6_asp_refcnt == 0);
338 if (new_table == NULL) {
340 * This is a special case. The user wants to revert
341 * back to using the default table.
343 if (ipst->ips_ip6_asp_table == default_ip6_asp_table)
344 goto unlock_end;
346 kmem_free(ipst->ips_ip6_asp_table,
347 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
348 ipst->ips_ip6_asp_table = default_ip6_asp_table;
349 ipst->ips_ip6_asp_table_count =
350 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
351 goto unlock_end;
354 if (count == 0) {
355 ret_val = EINVAL;
356 ip1dbg(("ip6_asp_replace: empty table\n"));
357 goto unlock_end;
360 if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) ==
361 NULL) {
362 ret_val = ENOMEM;
363 goto unlock_end;
366 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
369 * If 'new_table' -actually- originates from a 32-bit process
370 * then the nicely aligned ip6_asp_label array will be
371 * subtlely misaligned on this kernel, because the structure
372 * is 8 byte aligned in the kernel, but only 4 byte aligned in
373 * userland. Fix it up here.
375 * XX64 See the notes in ip_sioctl_ip6addrpolicy. Perhaps we could
376 * do the datamodel transformation (below) there instead of here?
378 if (datamodel == IOC_ILP32) {
379 ip6_asp_t *dst;
380 ip6_asp32_t *src;
381 int i;
383 if ((dst = kmem_zalloc(count * sizeof (*dst),
384 KM_NOSLEEP)) == NULL) {
385 kmem_free(tmp_table, count * sizeof (ip6_asp_t));
386 ret_val = ENOMEM;
387 goto unlock_end;
391 * Copy each element of the table from ip6_asp32_t
392 * format into ip6_asp_t format. Fortunately, since
393 * we're just dealing with a trailing structure pad,
394 * we can do this straightforwardly with a flurry of
395 * bcopying.
397 src = (void *)new_table;
398 for (i = 0; i < count; i++)
399 bcopy(src + i, dst + i, sizeof (*src));
401 ip6_asp_copy(dst, tmp_table, count);
402 kmem_free(dst, count * sizeof (*dst));
403 } else
404 #endif
405 ip6_asp_copy(new_table, tmp_table, count);
407 /* Make sure the last entry is the default entry */
408 if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) ||
409 !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) {
410 ret_val = EINVAL;
411 kmem_free(tmp_table, count * sizeof (ip6_asp_t));
412 ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
413 goto unlock_end;
415 if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
416 kmem_free(ipst->ips_ip6_asp_table,
417 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
419 ipst->ips_ip6_asp_table = tmp_table;
420 ipst->ips_ip6_asp_table_count = count;
422 unlock_end:
423 ipst->ips_ip6_asp_uip = B_FALSE;
424 mutex_exit(&ipst->ips_ip6_asp_lock);
426 /* Let conn_ixa caching know that source address selection changed */
427 ip_update_source_selection(ipst);
429 replace_end:
430 /* Reply to the ioctl */
431 q = (queue_t *)mp->b_prev;
432 mp->b_prev = NULL;
433 if (q == NULL) {
434 freemsg(mp);
435 goto check_binds;
437 iocp = (struct iocblk *)mp->b_rptr;
438 iocp->ioc_error = ret_val;
439 iocp->ioc_count = 0;
440 DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK;
441 qreply(q, mp);
442 check_binds:
443 ip6_asp_complete_op(ipst);
447 * Copies the contents of src_table to dst_table, and sorts the
448 * entries in decending order of prefix lengths. It assumes that both
449 * tables are appropriately sized to contain count entries.
451 static void
452 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count)
454 ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp;
456 dst_table[0] = src_table[0];
457 if (count == 1)
458 return;
461 * Sort the entries in descending order of prefix lengths.
463 * Note: this should be a small table. In 99% of cases, we
464 * expect the table to have 9 entries. In the remaining 1%
465 * of cases, we expect the table to have one or two more
466 * entries.
468 src_limit = src_table + count;
469 dst_limit = dst_table + 1;
470 for (src_ptr = src_table + 1; src_ptr != src_limit;
471 src_ptr++, dst_limit++) {
472 for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) {
473 if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) >
474 ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) {
476 * Make room to insert the source entry
477 * before dst_ptr by shifting entries to
478 * the right.
480 for (dp = dst_limit - 1; dp >= dst_ptr; dp--)
481 *(dp + 1) = *dp;
482 break;
485 *dst_ptr = *src_ptr;
490 * This function copies as many entries from ip6_asp_table as will fit
491 * into dtable. The dtable_size parameter is the size of dtable
492 * in bytes. This function returns the number of entries in
493 * ip6_asp_table, even if it's not able to fit all of the entries into
494 * dtable.
497 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst)
499 uint_t dtable_count;
501 if (dtable != NULL) {
502 if (dtable_size < sizeof (ip6_asp_t))
503 return (-1);
505 dtable_count = dtable_size / sizeof (ip6_asp_t);
506 bcopy(ipst->ips_ip6_asp_table, dtable,
507 MIN(ipst->ips_ip6_asp_table_count, dtable_count) *
508 sizeof (ip6_asp_t));
511 return (ipst->ips_ip6_asp_table_count);
515 * Compare two labels. Return B_TRUE if they are equal, B_FALSE
516 * otherwise.
518 boolean_t
519 ip6_asp_labelcmp(const char *label1, const char *label2)
521 int64_t *llptr1, *llptr2;
524 * The common case, the two labels are actually the same string
525 * from the policy table.
527 if (label1 == label2)
528 return (B_TRUE);
531 * Since we know the labels are at most 16 bytes long, compare
532 * the two strings as two 8-byte long integers. The ip6_asp_t
533 * structure guarantees that the labels are 8 byte alligned.
535 llptr1 = (int64_t *)label1;
536 llptr2 = (int64_t *)label2;
537 if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1])
538 return (B_TRUE);
539 return (B_FALSE);