4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Copyright (c) 2019, Joyent, Inc.
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/stropts.h>
32 #include <sys/strsun.h>
33 #include <sys/sysmacros.h>
34 #include <sys/errno.h>
36 #include <sys/socket.h>
38 #include <sys/sunddi.h>
39 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/vtrace.h>
44 #include <sys/ethernet.h>
49 #include <net/if_types.h>
50 #include <net/if_dl.h>
51 #include <net/route.h>
52 #include <netinet/in.h>
53 #include <netinet/ip6.h>
54 #include <netinet/icmp6.h>
56 #include <inet/common.h>
58 #include <inet/mib2.h>
61 #include <inet/ip_impl.h>
62 #include <inet/ipclassifier.h>
63 #include <inet/ip_if.h>
64 #include <inet/ip_ire.h>
65 #include <inet/ip_rts.h>
67 #include <inet/ip_ndp.h>
68 #include <inet/sctp_ip.h>
69 #include <inet/ip_arp.h>
70 #include <inet/ip2mac_impl.h>
72 #define ANNOUNCE_INTERVAL(isv6) \
73 (isv6 ? ipst->ips_ip_ndp_unsolicit_interval : \
74 ipst->ips_ip_arp_publish_interval)
76 #define DEFENSE_INTERVAL(isv6) \
77 (isv6 ? ipst->ips_ndp_defend_interval : \
78 ipst->ips_arp_defend_interval)
80 /* Non-tunable probe interval, based on link capabilities */
81 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500)
84 * The IPv4 Link Local address space is special; we do extra duplicate checking
85 * there, as the entire assignment mechanism rests on random numbers.
87 #define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \
88 ((uchar_t *)ptr)[1] == 254)
91 * NCE_EXTERNAL_FLAGS_MASK defines the set of ncec_flags that may be passed
92 * in to the ncec*add* functions.
94 * NCE_F_AUTHORITY means that we ignore any incoming adverts for that
95 * mapping (though DAD is performed for the mapping). NCE_F_PUBLISH means
96 * that we will respond to requests for the protocol address.
98 #define NCE_EXTERNAL_FLAGS_MASK \
99 (NCE_F_MYADDR | NCE_F_ISROUTER | NCE_F_NONUD | \
100 NCE_F_ANYCAST | NCE_F_UNSOL_ADV | NCE_F_BCAST | NCE_F_MCAST | \
101 NCE_F_AUTHORITY | NCE_F_PUBLISH | NCE_F_STATIC)
106 * ndp_g_lock -> ill_lock -> ncec_lock
108 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
109 * ncec_next. ncec_lock protects the contents of the NCE (particularly
113 static void nce_cleanup_list(ncec_t
*ncec
);
114 static void nce_set_ll(ncec_t
*ncec
, uchar_t
*ll_addr
);
115 static ncec_t
*ncec_lookup_illgrp(ill_t
*, const in6_addr_t
*,
117 static nce_t
*nce_lookup_addr(ill_t
*, const in6_addr_t
*);
118 static int nce_set_multicast_v6(ill_t
*ill
, const in6_addr_t
*addr
,
119 uint16_t ncec_flags
, nce_t
**newnce
);
120 static int nce_set_multicast_v4(ill_t
*ill
, const in_addr_t
*dst
,
121 uint16_t ncec_flags
, nce_t
**newnce
);
122 static boolean_t
ndp_xmit(ill_t
*ill
, uint32_t operation
,
123 uint8_t *hwaddr
, uint_t hwaddr_len
, const in6_addr_t
*sender
,
124 const in6_addr_t
*target
, int flag
);
125 static void ncec_refhold_locked(ncec_t
*);
126 static boolean_t
ill_defend_rate_limit(ill_t
*, ncec_t
*);
127 static void nce_queue_mp_common(ncec_t
*, mblk_t
*, boolean_t
);
128 static int nce_add_common(ill_t
*, uchar_t
*, uint_t
, const in6_addr_t
*,
129 uint16_t, uint16_t, nce_t
**);
130 static nce_t
*nce_add_impl(ill_t
*, ncec_t
*, nce_t
*, mblk_t
*, list_t
*);
131 static nce_t
*nce_add(ill_t
*, ncec_t
*, list_t
*);
132 static void nce_inactive(nce_t
*);
133 extern nce_t
*nce_lookup(ill_t
*, const in6_addr_t
*);
134 static nce_t
*nce_ill_lookup_then_add(ill_t
*, ncec_t
*);
135 static int nce_add_v6(ill_t
*, uchar_t
*, uint_t
, const in6_addr_t
*,
136 uint16_t, uint16_t, nce_t
**);
137 static int nce_add_v4(ill_t
*, uchar_t
*, uint_t
, const in_addr_t
*,
138 uint16_t, uint16_t, nce_t
**);
139 static int nce_add_v6_postprocess(nce_t
*);
140 static int nce_add_v4_postprocess(nce_t
*);
141 static ill_t
*nce_resolve_src(ncec_t
*, in6_addr_t
*);
142 static clock_t nce_fuzz_interval(clock_t, boolean_t
);
143 static void nce_resolv_ipmp_ok(ncec_t
*);
144 static void nce_walk_common(ill_t
*, pfi_t
, void *);
145 static void nce_start_timer(ncec_t
*, uint_t
);
146 static nce_t
*nce_fastpath_create(ill_t
*, ncec_t
*);
147 static void nce_fastpath_trigger(nce_t
*);
148 static nce_t
*nce_fastpath(ncec_t
*, boolean_t
, nce_t
*);
151 static void ncec_trace_cleanup(const ncec_t
*);
154 #define NCE_HASH_PTR_V4(ipst, addr) \
155 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)]))
157 #define NCE_HASH_PTR_V6(ipst, addr) \
158 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
161 extern kmem_cache_t
*ncec_cache
;
162 extern kmem_cache_t
*nce_cache
;
165 * Send out a IPv6 (unicast) or IPv4 (broadcast) DAD probe
166 * If src_ill is not null, the ncec_addr is bound to src_ill. The
167 * src_ill is ignored by nce_dad for IPv4 Neighbor Cache entries where
168 * the probe is sent on the ncec_ill (in the non-IPMP case) or the
169 * IPMP cast_ill (in the IPMP case).
171 * Note that the probe interval is based on the src_ill for IPv6, and
172 * the ncec_xmit_interval for IPv4.
175 nce_dad(ncec_t
*ncec
, ill_t
*src_ill
, boolean_t send_probe
)
178 uint32_t probe_interval
;
180 ASSERT(!(ncec
->ncec_flags
& NCE_F_MCAST
));
181 ASSERT(!(ncec
->ncec_flags
& NCE_F_BCAST
));
182 if (ncec
->ncec_ipversion
== IPV6_VERSION
) {
183 dropped
= ndp_xmit(src_ill
, ND_NEIGHBOR_SOLICIT
,
184 ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
,
185 &ipv6_all_zeros
, &ncec
->ncec_addr
, NDP_PROBE
);
186 probe_interval
= ILL_PROBE_INTERVAL(src_ill
);
188 /* IPv4 DAD delay the initial probe. */
190 dropped
= arp_probe(ncec
);
193 probe_interval
= nce_fuzz_interval(ncec
->ncec_xmit_interval
,
197 mutex_enter(&ncec
->ncec_lock
);
199 mutex_exit(&ncec
->ncec_lock
);
201 nce_restart_timer(ncec
, probe_interval
);
205 * Compute default flags to use for an advertisement of this ncec's address.
208 nce_advert_flags(const ncec_t
*ncec
)
212 if (ncec
->ncec_flags
& NCE_F_ISROUTER
)
213 flag
|= NDP_ISROUTER
;
214 if (!(ncec
->ncec_flags
& NCE_F_ANYCAST
))
221 * NDP Cache Entry creation routine.
222 * This routine must always be called with ndp6->ndp_g_lock held.
225 nce_add_v6(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
226 const in6_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
231 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp6
->ndp_g_lock
));
232 ASSERT(ill
!= NULL
&& ill
->ill_isv6
);
234 err
= nce_add_common(ill
, hw_addr
, hw_addr_len
, addr
, flags
, state
,
238 ASSERT(newnce
!= NULL
);
244 * Post-processing routine to be executed after nce_add_v6(). This function
245 * triggers fastpath (if appropriate) and DAD on the newly added nce entry
246 * and must be called without any locks held.
249 nce_add_v6_postprocess(nce_t
*nce
)
251 ncec_t
*ncec
= nce
->nce_common
;
252 boolean_t dropped
= B_FALSE
;
253 uchar_t
*hw_addr
= ncec
->ncec_lladdr
;
254 uint_t hw_addr_len
= ncec
->ncec_lladdr_length
;
255 ill_t
*ill
= ncec
->ncec_ill
;
257 uint16_t flags
= ncec
->ncec_flags
;
258 ip_stack_t
*ipst
= ill
->ill_ipst
;
259 boolean_t trigger_fastpath
= B_TRUE
;
262 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
263 * we call nce_fastpath as soon as the ncec is resolved in nce_process.
264 * We call nce_fastpath from nce_update if the link layer address of
265 * the peer changes from nce_update
267 if (NCE_PUBLISH(ncec
) || !NCE_ISREACHABLE(ncec
) ||
268 (hw_addr
== NULL
&& ill
->ill_net_type
!= IRE_IF_NORESOLVER
))
269 trigger_fastpath
= B_FALSE
;
271 if (trigger_fastpath
)
272 nce_fastpath_trigger(nce
);
273 if (NCE_PUBLISH(ncec
) && ncec
->ncec_state
== ND_PROBE
) {
276 * Unicast entry that needs DAD.
279 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
,
280 hw_addr
, hw_addr_len
);
284 nce_dad(ncec
, hwaddr_ill
, B_TRUE
);
286 } else if (flags
& NCE_F_UNSOL_ADV
) {
288 * We account for the transmit below by assigning one
289 * less than the ndd variable. Subsequent decrements
290 * are done in nce_timer.
292 mutex_enter(&ncec
->ncec_lock
);
293 ncec
->ncec_unsolicit_count
=
294 ipst
->ips_ip_ndp_unsolicit_count
- 1;
295 mutex_exit(&ncec
->ncec_lock
);
296 dropped
= ndp_xmit(ill
,
300 &ncec
->ncec_addr
, /* Source and target of the adv */
301 &ipv6_all_hosts_mcast
, /* Destination of the packet */
302 nce_advert_flags(ncec
));
303 mutex_enter(&ncec
->ncec_lock
);
305 ncec
->ncec_unsolicit_count
++;
307 ncec
->ncec_last_time_defended
= ddi_get_lbolt();
308 if (ncec
->ncec_unsolicit_count
!= 0) {
309 nce_start_timer(ncec
,
310 ipst
->ips_ip_ndp_unsolicit_interval
);
312 mutex_exit(&ncec
->ncec_lock
);
318 * Atomically lookup and add (if needed) Neighbor Cache information for
321 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
322 * are always added pointing at the ipmp_ill. Thus, when the ill passed
323 * to nce_add_v6 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
324 * entries will be created, both pointing at the same ncec_t. The nce_t
325 * entries will have their nce_ill set to the ipmp_ill and the under_ill
326 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
327 * Local addresses are always created on the ill passed to nce_add_v6.
330 nce_lookup_then_add_v6(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
331 const in6_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
334 ip_stack_t
*ipst
= ill
->ill_ipst
;
335 nce_t
*nce
, *upper_nce
= NULL
;
337 boolean_t need_ill_refrele
= B_FALSE
;
339 if (flags
& NCE_F_MCAST
) {
341 * hw_addr will be figured out in nce_set_multicast_v6;
342 * caller has to select the cast_ill
344 ASSERT(hw_addr
== NULL
);
345 ASSERT(!IS_IPMP(ill
));
346 err
= nce_set_multicast_v6(ill
, addr
, flags
, newnce
);
349 ASSERT(ill
->ill_isv6
);
350 if (IS_UNDER_IPMP(ill
) && !(flags
& NCE_F_MYADDR
)) {
351 ill
= ipmp_ill_hold_ipmp_ill(ill
);
354 need_ill_refrele
= B_TRUE
;
357 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
358 nce
= nce_lookup_addr(ill
, addr
);
360 err
= nce_add_v6(ill
, hw_addr
, hw_addr_len
, addr
, flags
, state
,
365 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
367 err
= nce_add_v6_postprocess(nce
);
368 if (in_ill
!= ill
&& nce
!= NULL
) {
369 nce_t
*under_nce
= NULL
;
372 * in_ill was the under_ill. Try to create the under_nce.
373 * Hold the ill_g_lock to prevent changes to group membership
376 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
377 if (!IS_IN_SAME_ILLGRP(in_ill
, ill
)) {
378 DTRACE_PROBE2(ill__not__in__group
, nce_t
*, nce
,
380 rw_exit(&ipst
->ips_ill_g_lock
);
386 under_nce
= nce_fastpath_create(in_ill
, nce
->nce_common
);
387 if (under_nce
== NULL
) {
388 rw_exit(&ipst
->ips_ill_g_lock
);
394 rw_exit(&ipst
->ips_ill_g_lock
);
396 nce
= under_nce
; /* will be returned to caller */
397 if (NCE_ISREACHABLE(nce
->nce_common
))
398 nce_fastpath_trigger(under_nce
);
400 /* nce_refrele is deferred until the lock is dropped */
408 if (upper_nce
!= NULL
)
409 nce_refrele(upper_nce
);
410 if (need_ill_refrele
)
416 * Remove all the CONDEMNED nces from the appropriate hash table.
417 * We create a private list of NCEs, these may have ires pointing
418 * to them, so the list will be passed through to clean up dependent
419 * ires and only then we can do ncec_refrele() which can make NCE inactive.
422 nce_remove(ndp_g_t
*ndp
, ncec_t
*ncec
, ncec_t
**free_nce_list
)
427 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
428 ASSERT(ndp
->ndp_g_walker
== 0);
429 for (; ncec
; ncec
= ncec1
) {
430 ncec1
= ncec
->ncec_next
;
431 mutex_enter(&ncec
->ncec_lock
);
432 if (NCE_ISCONDEMNED(ncec
)) {
433 ptpn
= ncec
->ncec_ptpn
;
434 ncec1
= ncec
->ncec_next
;
436 ncec1
->ncec_ptpn
= ptpn
;
438 ncec
->ncec_ptpn
= NULL
;
439 ncec
->ncec_next
= NULL
;
440 ncec
->ncec_next
= *free_nce_list
;
441 *free_nce_list
= ncec
;
443 mutex_exit(&ncec
->ncec_lock
);
448 * 1. Mark the entry CONDEMNED. This ensures that no new nce_lookup()
449 * will return this NCE. Also no new timeouts will
450 * be started (See nce_restart_timer).
451 * 2. Cancel any currently running timeouts.
452 * 3. If there is an ndp walker, return. The walker will do the cleanup.
453 * This ensures that walkers see a consistent list of NCEs while walking.
454 * 4. Otherwise remove the NCE from the list of NCEs
457 ncec_delete(ncec_t
*ncec
)
461 int ipversion
= ncec
->ncec_ipversion
;
463 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
465 if (ipversion
== IPV4_VERSION
)
466 ndp
= ipst
->ips_ndp4
;
468 ndp
= ipst
->ips_ndp6
;
470 /* Serialize deletes */
471 mutex_enter(&ncec
->ncec_lock
);
472 if (NCE_ISCONDEMNED(ncec
)) {
473 /* Some other thread is doing the delete */
474 mutex_exit(&ncec
->ncec_lock
);
478 * Caller has a refhold. Also 1 ref for being in the list. Thus
479 * refcnt has to be >= 2
481 ASSERT(ncec
->ncec_refcnt
>= 2);
482 ncec
->ncec_flags
|= NCE_F_CONDEMNED
;
483 mutex_exit(&ncec
->ncec_lock
);
485 /* Count how many condemned ires for kmem_cache callback */
486 atomic_inc_32(&ipst
->ips_num_nce_condemned
);
487 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
489 /* Complete any waiting callbacks */
490 ncec_cb_dispatch(ncec
);
493 * Cancel any running timer. Timeout can't be restarted
494 * since CONDEMNED is set. Can't hold ncec_lock across untimeout.
495 * Passing invalid timeout id is fine.
497 if (ncec
->ncec_timeout_id
!= 0) {
498 (void) untimeout(ncec
->ncec_timeout_id
);
499 ncec
->ncec_timeout_id
= 0;
502 mutex_enter(&ndp
->ndp_g_lock
);
503 if (ncec
->ncec_ptpn
== NULL
) {
505 * The last ndp walker has already removed this ncec from
506 * the list after we marked the ncec CONDEMNED and before
507 * we grabbed the global lock.
509 mutex_exit(&ndp
->ndp_g_lock
);
512 if (ndp
->ndp_g_walker
> 0) {
514 * Can't unlink. The walker will clean up
516 ndp
->ndp_g_walker_cleanup
= B_TRUE
;
517 mutex_exit(&ndp
->ndp_g_lock
);
522 * Now remove the ncec from the list. nce_restart_timer won't restart
523 * the timer since it is marked CONDEMNED.
525 ptpn
= ncec
->ncec_ptpn
;
526 ncec1
= ncec
->ncec_next
;
528 ncec1
->ncec_ptpn
= ptpn
;
530 ncec
->ncec_ptpn
= NULL
;
531 ncec
->ncec_next
= NULL
;
532 mutex_exit(&ndp
->ndp_g_lock
);
534 /* Removed from ncec_ptpn/ncec_next list */
535 ncec_refrele_notr(ncec
);
539 ncec_inactive(ncec_t
*ncec
)
542 ill_t
*ill
= ncec
->ncec_ill
;
543 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
545 ASSERT(ncec
->ncec_refcnt
== 0);
546 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
548 /* Count how many condemned nces for kmem_cache callback */
549 if (NCE_ISCONDEMNED(ncec
))
550 atomic_add_32(&ipst
->ips_num_nce_condemned
, -1);
552 /* Free all allocated messages */
553 mpp
= &ncec
->ncec_qd_mp
;
554 while (*mpp
!= NULL
) {
563 * must have been cleaned up in ncec_delete
565 ASSERT(list_is_empty(&ncec
->ncec_cb
));
566 list_destroy(&ncec
->ncec_cb
);
568 * free the ncec_lladdr if one was allocated in nce_add_common()
570 if (ncec
->ncec_lladdr_length
> 0)
571 kmem_free(ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
);
574 ncec_trace_cleanup(ncec
);
577 mutex_enter(&ill
->ill_lock
);
578 DTRACE_PROBE3(ill__decr__cnt
, (ill_t
*), ill
,
579 (char *), "ncec", (void *), ncec
);
581 ncec
->ncec_ill
= NULL
;
583 * If the number of ncec's associated with this ill have dropped
584 * to zero, check whether we need to restart any operation that
585 * is waiting for this to happen.
587 if (ILL_DOWN_OK(ill
)) {
588 /* ipif_ill_refrele_tail drops the ill_lock */
589 ipif_ill_refrele_tail(ill
);
591 mutex_exit(&ill
->ill_lock
);
594 mutex_destroy(&ncec
->ncec_lock
);
595 kmem_cache_free(ncec_cache
, ncec
);
599 * ncec_walk routine. Delete the ncec if it is associated with the ill
600 * that is going away. Always called as a writer.
603 ncec_delete_per_ill(ncec_t
*ncec
, void *arg
)
605 if ((ncec
!= NULL
) && ncec
->ncec_ill
== arg
) {
611 * Neighbor Cache cleanup logic for a list of ncec_t entries.
614 nce_cleanup_list(ncec_t
*ncec
)
618 ASSERT(ncec
!= NULL
);
619 while (ncec
!= NULL
) {
620 ncec_next
= ncec
->ncec_next
;
621 ncec
->ncec_next
= NULL
;
624 * It is possible for the last ndp walker (this thread)
625 * to come here after ncec_delete has marked the ncec CONDEMNED
626 * and before it has removed the ncec from the fastpath list
627 * or called untimeout. So we need to do it here. It is safe
628 * for both ncec_delete and this thread to do it twice or
629 * even simultaneously since each of the threads has a
630 * reference on the ncec.
632 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
634 * Cancel any running timer. Timeout can't be restarted
635 * since CONDEMNED is set. The ncec_lock can't be
636 * held across untimeout though passing invalid timeout
639 if (ncec
->ncec_timeout_id
!= 0) {
640 (void) untimeout(ncec
->ncec_timeout_id
);
641 ncec
->ncec_timeout_id
= 0;
643 /* Removed from ncec_ptpn/ncec_next list */
644 ncec_refrele_notr(ncec
);
650 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted.
653 nce_restart_dad(ncec_t
*ncec
)
656 ill_t
*ill
, *hwaddr_ill
;
660 ill
= ncec
->ncec_ill
;
661 mutex_enter(&ncec
->ncec_lock
);
662 if (ncec
->ncec_state
== ND_PROBE
) {
663 mutex_exit(&ncec
->ncec_lock
);
665 } else if (ncec
->ncec_state
== ND_REACHABLE
) {
666 ASSERT(ncec
->ncec_lladdr
!= NULL
);
667 ncec
->ncec_state
= ND_PROBE
;
668 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
670 * Slight cheat here: we don't use the initial probe delay
671 * for IPv4 in this obscure case.
673 mutex_exit(&ncec
->ncec_lock
);
675 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
,
676 ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
);
680 nce_dad(ncec
, hwaddr_ill
, B_TRUE
);
683 mutex_exit(&ncec
->ncec_lock
);
690 * IPv6 Cache entry lookup. Try to find an ncec matching the parameters passed.
691 * If one is found, the refcnt on the ncec will be incremented.
694 ncec_lookup_illgrp_v6(ill_t
*ill
, const in6_addr_t
*addr
)
697 ip_stack_t
*ipst
= ill
->ill_ipst
;
699 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
700 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
702 /* Get head of v6 hash table */
703 ncec
= *((ncec_t
**)NCE_HASH_PTR_V6(ipst
, *addr
));
704 ncec
= ncec_lookup_illgrp(ill
, addr
, ncec
);
705 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
706 rw_exit(&ipst
->ips_ill_g_lock
);
710 * IPv4 Cache entry lookup. Try to find an ncec matching the parameters passed.
711 * If one is found, the refcnt on the ncec will be incremented.
714 ncec_lookup_illgrp_v4(ill_t
*ill
, const in_addr_t
*addr
)
718 ip_stack_t
*ipst
= ill
->ill_ipst
;
720 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
721 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
723 /* Get head of v4 hash table */
724 ncec
= *((ncec_t
**)NCE_HASH_PTR_V4(ipst
, *addr
));
725 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
726 ncec
= ncec_lookup_illgrp(ill
, &addr6
, ncec
);
727 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
728 rw_exit(&ipst
->ips_ill_g_lock
);
733 * Cache entry lookup. Try to find an ncec matching the parameters passed.
734 * If an ncec is found, increment the hold count on that ncec.
735 * The caller passes in the start of the appropriate hash table, and must
736 * be holding the appropriate global lock (ndp_g_lock). In addition, since
737 * this function matches ncec_t entries across the illgrp, the ips_ill_g_lock
738 * must be held as reader.
740 * This function always matches across the ipmp group.
743 ncec_lookup_illgrp(ill_t
*ill
, const in6_addr_t
*addr
, ncec_t
*ncec
)
746 ip_stack_t
*ipst
= ill
->ill_ipst
;
749 ndp
= ipst
->ips_ndp6
;
751 ndp
= ipst
->ips_ndp4
;
754 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
755 if (IN6_IS_ADDR_UNSPECIFIED(addr
))
757 for (; ncec
!= NULL
; ncec
= ncec
->ncec_next
) {
758 if (ncec
->ncec_ill
== ill
||
759 IS_IN_SAME_ILLGRP(ill
, ncec
->ncec_ill
)) {
760 if (IN6_ARE_ADDR_EQUAL(&ncec
->ncec_addr
, addr
)) {
761 mutex_enter(&ncec
->ncec_lock
);
762 if (!NCE_ISCONDEMNED(ncec
)) {
763 ncec_refhold_locked(ncec
);
764 mutex_exit(&ncec
->ncec_lock
);
767 mutex_exit(&ncec
->ncec_lock
);
775 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
776 * entries for ill only, i.e., when ill is part of an ipmp group,
777 * nce_lookup_v4 will never try to match across the group.
780 nce_lookup_v4(ill_t
*ill
, const in_addr_t
*addr
)
784 ip_stack_t
*ipst
= ill
->ill_ipst
;
786 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
787 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
788 nce
= nce_lookup_addr(ill
, &addr6
);
789 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
794 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
795 * entries for ill only, i.e., when ill is part of an ipmp group,
796 * nce_lookup_v6 will never try to match across the group.
799 nce_lookup_v6(ill_t
*ill
, const in6_addr_t
*addr6
)
802 ip_stack_t
*ipst
= ill
->ill_ipst
;
804 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
805 nce
= nce_lookup_addr(ill
, addr6
);
806 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
811 nce_lookup_addr(ill_t
*ill
, const in6_addr_t
*addr
)
818 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp6
->ndp_g_lock
));
820 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp4
->ndp_g_lock
));
822 mutex_enter(&ill
->ill_lock
);
823 nce
= nce_lookup(ill
, addr
);
824 mutex_exit(&ill
->ill_lock
);
830 * Router turned to host. We need to make sure that cached copies of the ncec
831 * are not used for forwarding packets if they were derived from the default
832 * route, and that the default route itself is removed, as required by
833 * section 7.2.5 of RFC 2461.
835 * Note that the ncec itself probably has valid link-layer information for the
836 * nexthop, so that there is no reason to delete the ncec, as long as the
837 * ISROUTER flag is turned off.
840 ncec_router_to_host(ncec_t
*ncec
)
843 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
845 mutex_enter(&ncec
->ncec_lock
);
846 ncec
->ncec_flags
&= ~NCE_F_ISROUTER
;
847 mutex_exit(&ncec
->ncec_lock
);
849 ire
= ire_ftable_lookup_v6(&ipv6_all_zeros
, &ipv6_all_zeros
,
850 &ncec
->ncec_addr
, IRE_DEFAULT
, ncec
->ncec_ill
, ALL_ZONES
, NULL
,
851 MATCH_IRE_ILL
| MATCH_IRE_TYPE
| MATCH_IRE_GW
, 0, ipst
, NULL
);
853 ip_rts_rtmsg(RTM_DELETE
, ire
, 0, ipst
);
860 * Process passed in parameters either from an incoming packet or via
864 nce_process(ncec_t
*ncec
, uchar_t
*hw_addr
, uint32_t flag
, boolean_t is_adv
)
866 ill_t
*ill
= ncec
->ncec_ill
;
867 uint32_t hw_addr_len
= ill
->ill_phys_addr_length
;
868 boolean_t ll_updated
= B_FALSE
;
869 boolean_t ll_changed
;
872 ASSERT(ncec
->ncec_ipversion
== IPV6_VERSION
);
874 * No updates of link layer address or the neighbor state is
875 * allowed, when the cache is in NONUD state. This still
876 * allows for responding to reachability solicitation.
878 mutex_enter(&ncec
->ncec_lock
);
879 if (ncec
->ncec_state
== ND_INCOMPLETE
) {
880 if (hw_addr
== NULL
) {
881 mutex_exit(&ncec
->ncec_lock
);
884 nce_set_ll(ncec
, hw_addr
);
886 * Update ncec state and send the queued packets
887 * back to ip this time ire will be added.
889 if (flag
& ND_NA_FLAG_SOLICITED
) {
890 nce_update(ncec
, ND_REACHABLE
, NULL
);
892 nce_update(ncec
, ND_STALE
, NULL
);
894 mutex_exit(&ncec
->ncec_lock
);
895 nce
= nce_fastpath(ncec
, B_TRUE
, NULL
);
901 ll_changed
= nce_cmp_ll_addr(ncec
, hw_addr
, hw_addr_len
);
903 /* If this is a SOLICITATION request only */
905 nce_update(ncec
, ND_STALE
, hw_addr
);
906 mutex_exit(&ncec
->ncec_lock
);
907 ncec_cb_dispatch(ncec
);
910 if (!(flag
& ND_NA_FLAG_OVERRIDE
) && ll_changed
) {
911 /* If in any other state than REACHABLE, ignore */
912 if (ncec
->ncec_state
== ND_REACHABLE
) {
913 nce_update(ncec
, ND_STALE
, NULL
);
915 mutex_exit(&ncec
->ncec_lock
);
916 ncec_cb_dispatch(ncec
);
920 nce_update(ncec
, ND_UNCHANGED
, hw_addr
);
923 if (flag
& ND_NA_FLAG_SOLICITED
) {
924 nce_update(ncec
, ND_REACHABLE
, NULL
);
927 nce_update(ncec
, ND_STALE
, NULL
);
930 mutex_exit(&ncec
->ncec_lock
);
931 if (!(flag
& ND_NA_FLAG_ROUTER
) && (ncec
->ncec_flags
&
933 ncec_router_to_host(ncec
);
935 ncec_cb_dispatch(ncec
);
941 * Pass arg1 to the cbf supplied, along with each ncec in existence.
942 * ncec_walk() places a REFHOLD on the ncec and drops the lock when
943 * walking the hash list.
946 ncec_walk_common(ndp_g_t
*ndp
, ill_t
*ill
, ncec_walk_cb_t cbf
,
947 void *arg1
, boolean_t trace
)
952 ncec_t
*free_nce_list
= NULL
;
954 mutex_enter(&ndp
->ndp_g_lock
);
955 /* Prevent ncec_delete from unlink and free of NCE */
957 mutex_exit(&ndp
->ndp_g_lock
);
958 for (ncep
= ndp
->nce_hash_tbl
;
959 ncep
< A_END(ndp
->nce_hash_tbl
); ncep
++) {
960 for (ncec
= *ncep
; ncec
!= NULL
; ncec
= ncec1
) {
961 ncec1
= ncec
->ncec_next
;
962 if (ill
== NULL
|| ncec
->ncec_ill
== ill
) {
968 ncec_refhold_notr(ncec
);
970 ncec_refrele_notr(ncec
);
975 mutex_enter(&ndp
->ndp_g_lock
);
977 if (ndp
->ndp_g_walker_cleanup
&& ndp
->ndp_g_walker
== 0) {
978 /* Time to delete condemned entries */
979 for (ncep
= ndp
->nce_hash_tbl
;
980 ncep
< A_END(ndp
->nce_hash_tbl
); ncep
++) {
983 nce_remove(ndp
, ncec
, &free_nce_list
);
986 ndp
->ndp_g_walker_cleanup
= B_FALSE
;
989 mutex_exit(&ndp
->ndp_g_lock
);
991 if (free_nce_list
!= NULL
) {
992 nce_cleanup_list(free_nce_list
);
998 * Note that ill can be NULL hence can't derive the ipst from it.
1001 ncec_walk(ill_t
*ill
, ncec_walk_cb_t cbf
, void *arg1
, ip_stack_t
*ipst
)
1003 ncec_walk_common(ipst
->ips_ndp4
, ill
, cbf
, arg1
, B_TRUE
);
1004 ncec_walk_common(ipst
->ips_ndp6
, ill
, cbf
, arg1
, B_TRUE
);
1008 * Cheesy globals (i.e. all netstacks) for both a limit on per-ill multicast
1009 * NCEs, and the number to reclaim if we hit the limit. Used by
1010 * nce_set_multicast_v[46]() to limit the linked-list length of ill_nce. Until
1011 * we solve the multicast-mappings-shouldn't-be-NCEs problem, use this.
1014 /* Maximum number of multicast NCEs on an ill. */
1015 uint_t ip_max_ill_mcast_nces
= 16384;
1017 * Number of NCEs to delete if we hit the maximum above. 0 means *don't* and
1018 * return an error. Non-zero means delete so many, and if the number is >=
1019 * the max above, that means delete them all.
1021 uint_t ip_ill_mcast_reclaim
= 256;
1024 * Encapsulate multicast ill capping in a function, for easier DTrace
1025 * detections. Return a list of refheld NCEs to destroy-via-refrele. That
1026 * list can be NULL, but can only be non-NULL if we successfully reclaimed.
1028 * NOTE: This function must be called while holding the ill_lock AND
1029 * JUST PRIOR to making the insertion into the ill_nce list.
1031 * We can't release the ones we delete ourselves because the ill_lock is held
1032 * by the caller. They are, instead, passed back in a list_t for deletion
1033 * outside of the ill_lock hold. nce_graveyard_free() actually frees them.
1035 * While this covers nce_t, ncec_t gets done even further down the road. See
1036 * nce_graveyard_free() for why.
1039 nce_too_many_mcast(ill_t
*ill
, list_t
*graveyard
)
1041 uint_t reclaim_count
, max_count
, reclaimed
= 0;
1043 nce_t
*nce
, *deadman
;
1045 ASSERT(graveyard
!= NULL
);
1046 ASSERT(list_is_empty(graveyard
));
1047 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
1050 * NOTE: Some grinning weirdo may have lowered the global max beyond
1051 * what this ill currently has. The behavior in this case will be
1052 * trim-back just by the reclaim amount for any new ones.
1054 max_count
= ip_max_ill_mcast_nces
;
1055 reclaim_count
= min(ip_ill_mcast_reclaim
, max_count
);
1058 if (ill
->ill_mcast_nces
< max_count
)
1059 return (B_FALSE
); /* Yes, all good. */
1061 if (reclaim_count
== 0)
1062 return (B_TRUE
); /* Don't bother - we're stuck. */
1064 /* We need to reclaim now. Exploit our held ill_lock. */
1067 * Start at the tail and work backwards, new nces are head-inserted,
1068 * so we'll be reaping the oldest entries.
1070 nce
= list_tail(&ill
->ill_nce
);
1071 while (reclaimed
< reclaim_count
) {
1072 /* Skip ahead to a multicast NCE. */
1073 while (nce
!= NULL
&&
1074 (nce
->nce_common
->ncec_flags
& NCE_F_MCAST
) == 0) {
1075 nce
= list_prev(&ill
->ill_nce
, nce
);
1081 * NOTE: For now, we just delete the first one(s) we find.
1082 * This is not optimal, and may require some inspection of nce
1083 * & its ncec to be better.
1086 nce
= list_prev(&ill
->ill_nce
, nce
);
1088 /* nce_delete() requires caller holds... */
1089 nce_refhold(deadman
);
1090 nce_delete(deadman
); /* Bumps down ill_mcast_nces. */
1092 /* Link the dead ones singly, still refheld... */
1093 list_insert_tail(graveyard
, deadman
);
1097 if (reclaimed
!= reclaim_count
) {
1098 /* We didn't have enough to reach reclaim_count. Why?!? */
1099 DTRACE_PROBE3(ill__mcast__nce__reclaim__mismatch
, ill_t
*, ill
,
1100 uint_t
, reclaimed
, uint_t
, reclaim_count
);
1102 /* In case for some REALLY weird reason we found none! */
1103 too_many
= (reclaimed
== 0);
1112 ncec_mcast_reap_one(ncec_t
*ncec
, void *arg
)
1115 ill_t
*ill
= (ill_t
*)arg
;
1117 /* Obvious no-lock-needed checks... */
1118 if (ncec
== NULL
|| ncec
->ncec_ill
!= ill
||
1119 (ncec
->ncec_flags
& NCE_F_MCAST
) == 0)
1122 mutex_enter(&ncec
->ncec_lock
);
1124 * It's refheld by the walk infrastructure. It has one reference for
1125 * being in the ndp_g_hash, and if an nce_t exists, that's one more.
1126 * We want ones without an nce_t, so 2 is the magic number. If it's
1127 * LESS than 2, we have much bigger problems anyway.
1129 ASSERT(ncec
->ncec_refcnt
>= 2);
1130 reapit
= (ncec
->ncec_refcnt
== 2);
1131 mutex_exit(&ncec
->ncec_lock
);
1134 IP_STAT(ill
->ill_ipst
, ip_nce_mcast_reclaim_deleted
);
1140 * Attempt to reap stray multicast ncec_t structures left in the wake of
1141 * nce_graveyard_free(). This is a taskq servicing routine, as it's well
1142 * outside any netstack-global locks being held - ndp_g_lock in this case. We
1143 * have a reference hold on the ill, which will prevent any unplumbing races.
1146 ncec_mcast_reap(void *arg
)
1148 ill_t
*ill
= (ill_t
*)arg
;
1150 IP_STAT(ill
->ill_ipst
, ip_nce_mcast_reclaim_calls
);
1151 ncec_walk(ill
, ncec_mcast_reap_one
, ill
, ill
->ill_ipst
);
1152 mutex_enter(&ill
->ill_lock
);
1153 ill
->ill_mcast_ncec_cleanup
= B_FALSE
;
1155 * Inline a _notr() version of ill_refrele. See nce_graveyard_free()
1159 if (ill
->ill_refcnt
== 0)
1160 ipif_ill_refrele_tail(ill
); /* Drops ill_lock. */
1162 mutex_exit(&ill
->ill_lock
);
1166 * Free a list (including handling an empty list or NULL list) of
1167 * reference-held NCEs that were reaped from a nce_too_many_mcast()
1168 * call. Separate because the caller must have dropped ndp_g_lock first.
1170 * This also schedules a taskq task to unlink underlying NCECs from the
1171 * ndp_g_hash, which are protected by ndp_g_lock.
1174 nce_graveyard_free(list_t
*graveyard
)
1176 nce_t
*deadman
, *current
;
1180 if (graveyard
== NULL
)
1183 current
= list_head(graveyard
);
1184 if (current
== NULL
) {
1185 list_destroy(graveyard
);
1189 ill
= current
->nce_ill
;
1191 * Normally one should ill_refhold(ill) here. There's no _notr()
1192 * variant like there is for ire_t, dce_t, or even ncec_t, but this is
1193 * the ONLY case that'll break the mh_trace that IP debugging uses for
1194 * reference counts (i.e. they assume same thread releases as
1195 * holds). Instead, we inline ill_refhold() here. We must do the same
1196 * in the release done by the ncec_mcast_reap() above.
1198 mutex_enter(&ill
->ill_lock
);
1200 mutex_exit(&ill
->ill_lock
);
1202 while (current
!= NULL
) {
1203 ASSERT3P(ill
, ==, current
->nce_ill
);
1205 current
= list_next(graveyard
, deadman
);
1206 list_remove(graveyard
, deadman
);
1207 ASSERT3U((deadman
->nce_common
->ncec_flags
& NCE_F_MCAST
), !=,
1209 nce_refrele(deadman
);
1211 list_destroy(graveyard
);
1213 mutex_enter(&ill
->ill_lock
);
1214 if (ill
->ill_mcast_ncec_cleanup
)
1217 ill
->ill_mcast_ncec_cleanup
= B_TRUE
;
1220 mutex_exit(&ill
->ill_lock
);
1221 if (!doit
|| taskq_dispatch(system_taskq
, ncec_mcast_reap
,
1222 ill
, TQ_NOSLEEP
) == TASKQID_INVALID
) {
1223 mutex_enter(&ill
->ill_lock
);
1225 IP_STAT(ill
->ill_ipst
, ip_nce_mcast_reclaim_tqfail
);
1226 ill
->ill_mcast_ncec_cleanup
= B_FALSE
;
1228 /* There's no _notr() for ill_refrele(), so inline it here. */
1230 if (ill
->ill_refcnt
== 0)
1231 ipif_ill_refrele_tail(ill
); /* Drops ill_lock */
1233 mutex_exit(&ill
->ill_lock
);
1238 * For each interface an entry is added for the unspecified multicast group.
1239 * Here that mapping is used to form the multicast cache entry for a particular
1240 * multicast destination.
1243 nce_set_multicast_v6(ill_t
*ill
, const in6_addr_t
*dst
,
1244 uint16_t flags
, nce_t
**newnce
)
1248 ip_stack_t
*ipst
= ill
->ill_ipst
;
1251 ASSERT(ill
!= NULL
);
1252 ASSERT(ill
->ill_isv6
);
1253 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst
)));
1255 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
1256 nce
= nce_lookup_addr(ill
, dst
);
1258 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1261 if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
1263 * For IRE_IF_RESOLVER a hardware mapping can be
1266 hw_addr
= kmem_alloc(ill
->ill_nd_lla_len
, KM_NOSLEEP
);
1267 if (hw_addr
== NULL
) {
1268 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1271 ip_mcast_mapping(ill
, (uchar_t
*)dst
, hw_addr
);
1273 /* No hw_addr is needed for IRE_IF_NORESOLVER. */
1276 ASSERT((flags
& NCE_F_MCAST
) != 0);
1277 ASSERT((flags
& NCE_F_NONUD
) != 0);
1278 /* nce_state will be computed by nce_add_common() */
1279 err
= nce_add_v6(ill
, hw_addr
, ill
->ill_phys_addr_length
, dst
, flags
,
1280 ND_UNCHANGED
, &nce
);
1281 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1283 err
= (nce
!= NULL
) ? nce_add_v6_postprocess(nce
) : ENOMEM
;
1284 if (hw_addr
!= NULL
)
1285 kmem_free(hw_addr
, ill
->ill_nd_lla_len
);
1287 ip1dbg(("nce_set_multicast_v6: create failed" "%d\n", err
));
1291 ASSERT(nce
->nce_common
->ncec_state
== ND_REACHABLE
);
1300 * Return the link layer address, and any flags of a ncec.
1303 ndp_query(ill_t
*ill
, struct lif_nd_req
*lnr
)
1309 ASSERT(ill
!= NULL
&& ill
->ill_isv6
);
1310 sin6
= (sin6_t
*)&lnr
->lnr_addr
;
1311 addr
= &sin6
->sin6_addr
;
1314 * NOTE: if the ill is an IPMP interface, then match against the whole
1315 * illgrp. This e.g. allows in.ndpd to retrieve the link layer
1316 * addresses for the data addresses on an IPMP interface even though
1317 * ipif_ndp_up() created them with an ncec_ill of ipif_bound_ill.
1319 ncec
= ncec_lookup_illgrp_v6(ill
, addr
);
1322 /* If no link layer address is available yet, return ESRCH */
1323 if (!NCE_ISREACHABLE(ncec
)) {
1327 lnr
->lnr_hdw_len
= ill
->ill_phys_addr_length
;
1328 bcopy(ncec
->ncec_lladdr
, (uchar_t
*)&lnr
->lnr_hdw_addr
,
1330 if (ncec
->ncec_flags
& NCE_F_ISROUTER
)
1331 lnr
->lnr_flags
= NDF_ISROUTER_ON
;
1332 if (ncec
->ncec_flags
& NCE_F_ANYCAST
)
1333 lnr
->lnr_flags
|= NDF_ANYCAST_ON
;
1334 if (ncec
->ncec_flags
& NCE_F_STATIC
)
1335 lnr
->lnr_flags
|= NDF_STATIC
;
1341 * Finish setting up the Enable/Disable multicast for the driver.
1344 ndp_mcastreq(ill_t
*ill
, const in6_addr_t
*v6group
, uint32_t hw_addr_len
,
1345 uint32_t hw_addr_offset
, mblk_t
*mp
)
1351 ASSERT(ill
->ill_net_type
== IRE_IF_RESOLVER
);
1352 if (IN6_IS_ADDR_V4MAPPED(v6group
)) {
1353 IN6_V4MAPPED_TO_IPADDR(v6group
, v4group
);
1355 ASSERT(CLASSD(v4group
));
1356 ASSERT(!(ill
->ill_isv6
));
1358 addr
= (uchar_t
*)&v4group
;
1360 ASSERT(IN6_IS_ADDR_MULTICAST(v6group
));
1361 ASSERT(ill
->ill_isv6
);
1363 addr
= (uchar_t
*)v6group
;
1365 hw_addr
= mi_offset_paramc(mp
, hw_addr_offset
, hw_addr_len
);
1366 if (hw_addr
== NULL
) {
1367 ip0dbg(("ndp_mcastreq NULL hw_addr\n"));
1372 ip_mcast_mapping(ill
, addr
, hw_addr
);
1377 ip_ndp_resolve(ncec_t
*ncec
)
1379 in_addr_t sender4
= INADDR_ANY
;
1380 in6_addr_t sender6
= ipv6_all_zeros
;
1384 src_ill
= nce_resolve_src(ncec
, &sender6
);
1385 if (src_ill
== NULL
) {
1386 /* Make sure we try again later */
1387 ms
= ncec
->ncec_ill
->ill_reachable_retrans_time
;
1388 nce_restart_timer(ncec
, (clock_t)ms
);
1391 if (ncec
->ncec_ipversion
== IPV4_VERSION
)
1392 IN6_V4MAPPED_TO_IPADDR(&sender6
, sender4
);
1393 mutex_enter(&ncec
->ncec_lock
);
1394 if (ncec
->ncec_ipversion
== IPV6_VERSION
)
1395 ms
= ndp_solicit(ncec
, sender6
, src_ill
);
1397 ms
= arp_request(ncec
, sender4
, src_ill
);
1398 mutex_exit(&ncec
->ncec_lock
);
1400 if (ncec
->ncec_state
!= ND_REACHABLE
) {
1401 if (ncec
->ncec_ipversion
== IPV6_VERSION
)
1402 ndp_resolv_failed(ncec
);
1404 arp_resolv_failed(ncec
);
1405 ASSERT((ncec
->ncec_flags
& NCE_F_STATIC
) == 0);
1406 nce_make_unreachable(ncec
);
1410 nce_restart_timer(ncec
, (clock_t)ms
);
1413 ill_refrele(src_ill
);
1417 * Send an IPv6 neighbor solicitation.
1418 * Returns number of milliseconds after which we should either rexmit or abort.
1419 * Return of zero means we should abort.
1420 * The caller holds the ncec_lock to protect ncec_qd_mp and ncec_rcnt.
1421 * The optional source address is used as a hint to ndp_solicit for
1422 * which source to use in the packet.
1424 * NOTE: This routine drops ncec_lock (and later reacquires it) when sending
1428 ndp_solicit(ncec_t
*ncec
, in6_addr_t src
, ill_t
*ill
)
1431 boolean_t dropped
= B_FALSE
;
1433 ASSERT(ncec
->ncec_ipversion
== IPV6_VERSION
);
1434 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
1436 if (ncec
->ncec_rcnt
== 0)
1439 dst
= ncec
->ncec_addr
;
1441 mutex_exit(&ncec
->ncec_lock
);
1442 dropped
= ndp_xmit(ill
, ND_NEIGHBOR_SOLICIT
, ill
->ill_phys_addr
,
1443 ill
->ill_phys_addr_length
, &src
, &dst
, 0);
1444 mutex_enter(&ncec
->ncec_lock
);
1447 return (ncec
->ncec_ill
->ill_reachable_retrans_time
);
1451 * Attempt to recover an address on an interface that's been marked as a
1452 * duplicate. Because NCEs are destroyed when the interface goes down, there's
1453 * no easy way to just probe the address and have the right thing happen if
1454 * it's no longer in use. Instead, we just bring it up normally and allow the
1455 * regular interface start-up logic to probe for a remaining duplicate and take
1456 * us back down if necessary.
1457 * Neither DHCP nor temporary addresses arrive here; they're excluded by
1462 ip_addr_recover(ipsq_t
*ipsq
, queue_t
*rq
, mblk_t
*mp
, void *dummy_arg
)
1464 ill_t
*ill
= rq
->q_ptr
;
1466 in6_addr_t
*addr6
= (in6_addr_t
*)mp
->b_rptr
;
1467 in_addr_t
*addr4
= (in_addr_t
*)mp
->b_rptr
;
1468 boolean_t addr_equal
;
1470 for (ipif
= ill
->ill_ipif
; ipif
!= NULL
; ipif
= ipif
->ipif_next
) {
1472 * We do not support recovery of proxy ARP'd interfaces,
1473 * because the system lacks a complete proxy ARP mechanism.
1475 if (ill
->ill_isv6
) {
1476 addr_equal
= IN6_ARE_ADDR_EQUAL(&ipif
->ipif_v6lcl_addr
,
1479 addr_equal
= (ipif
->ipif_lcl_addr
== *addr4
);
1482 if ((ipif
->ipif_flags
& IPIF_POINTOPOINT
) || !addr_equal
)
1486 * If we have already recovered or if the interface is going
1487 * away, then ignore.
1489 mutex_enter(&ill
->ill_lock
);
1490 if (!(ipif
->ipif_flags
& IPIF_DUPLICATE
) ||
1491 (ipif
->ipif_state_flags
& IPIF_CONDEMNED
)) {
1492 mutex_exit(&ill
->ill_lock
);
1496 ipif
->ipif_flags
&= ~IPIF_DUPLICATE
;
1497 ill
->ill_ipif_dup_count
--;
1498 mutex_exit(&ill
->ill_lock
);
1499 ipif
->ipif_was_dup
= B_TRUE
;
1501 if (ill
->ill_isv6
) {
1502 VERIFY(ipif_ndp_up(ipif
, B_TRUE
) != EINPROGRESS
);
1503 (void) ipif_up_done_v6(ipif
);
1505 VERIFY(ipif_arp_up(ipif
, Res_act_initial
, B_TRUE
) !=
1507 (void) ipif_up_done(ipif
);
1514 * Attempt to recover an IPv6 interface that's been shut down as a duplicate.
1515 * As long as someone else holds the address, the interface will stay down.
1516 * When that conflict goes away, the interface is brought back up. This is
1517 * done so that accidental shutdowns of addresses aren't made permanent. Your
1518 * server will recover from a failure.
1520 * For DHCP and temporary addresses, recovery is not done in the kernel.
1521 * Instead, it's handled by user space processes (dhcpagent and in.ndpd).
1523 * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
1526 ipif_dup_recovery(void *arg
)
1530 ipif
->ipif_recovery_id
= 0;
1531 if (!(ipif
->ipif_flags
& IPIF_DUPLICATE
))
1535 * No lock, because this is just an optimization.
1537 if (ipif
->ipif_state_flags
& IPIF_CONDEMNED
)
1540 /* If the link is down, we'll retry this later */
1541 if (!(ipif
->ipif_ill
->ill_phyint
->phyint_flags
& PHYI_RUNNING
))
1544 ipif_do_recovery(ipif
);
1548 * Perform interface recovery by forcing the duplicate interfaces up and
1549 * allowing the system to determine which ones should stay up.
1551 * Called both by recovery timer expiry and link-up notification.
1554 ipif_do_recovery(ipif_t
*ipif
)
1556 ill_t
*ill
= ipif
->ipif_ill
;
1558 ip_stack_t
*ipst
= ill
->ill_ipst
;
1561 if (ipif
->ipif_isv6
)
1562 mp_size
= sizeof (ipif
->ipif_v6lcl_addr
);
1564 mp_size
= sizeof (ipif
->ipif_lcl_addr
);
1565 mp
= allocb(mp_size
, BPRI_MED
);
1567 mutex_enter(&ill
->ill_lock
);
1568 if (ipst
->ips_ip_dup_recovery
> 0 &&
1569 ipif
->ipif_recovery_id
== 0 &&
1570 !(ipif
->ipif_state_flags
& IPIF_CONDEMNED
)) {
1571 ipif
->ipif_recovery_id
= timeout(ipif_dup_recovery
,
1572 ipif
, MSEC_TO_TICK(ipst
->ips_ip_dup_recovery
));
1574 mutex_exit(&ill
->ill_lock
);
1577 * A recovery timer may still be running if we got here from
1578 * ill_restart_dad(); cancel that timer.
1580 if (ipif
->ipif_recovery_id
!= 0)
1581 (void) untimeout(ipif
->ipif_recovery_id
);
1582 ipif
->ipif_recovery_id
= 0;
1584 if (ipif
->ipif_isv6
) {
1585 bcopy(&ipif
->ipif_v6lcl_addr
, mp
->b_rptr
,
1586 sizeof (ipif
->ipif_v6lcl_addr
));
1588 bcopy(&ipif
->ipif_lcl_addr
, mp
->b_rptr
,
1589 sizeof (ipif
->ipif_lcl_addr
));
1592 qwriter_ip(ill
, ill
->ill_rq
, mp
, ip_addr_recover
, NEW_OP
,
1598 * Find the MAC and IP addresses in an NA/NS message.
1601 ip_ndp_find_addresses(mblk_t
*mp
, ip_recv_attr_t
*ira
, ill_t
*ill
,
1602 in6_addr_t
*targp
, uchar_t
**haddr
, uint_t
*haddrlenp
)
1604 icmp6_t
*icmp6
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
1605 nd_neighbor_solicit_t
*ns
= (nd_neighbor_solicit_t
*)icmp6
;
1609 /* icmp_inbound_v6 ensures this */
1610 ASSERT(ira
->ira_flags
& IRAF_L2SRC_SET
);
1612 addr
= ira
->ira_l2src
;
1613 alen
= ill
->ill_phys_addr_length
;
1622 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */
1623 *targp
= ns
->nd_ns_target
;
1627 * This is for exclusive changes due to NDP duplicate address detection
1632 ip_ndp_excl(ipsq_t
*ipsq
, queue_t
*rq
, mblk_t
*mp
, void *dummy_arg
)
1634 ill_t
*ill
= rq
->q_ptr
;
1638 ip_stack_t
*ipst
= ill
->ill_ipst
;
1640 ip_recv_attr_t iras
;
1645 attrmp
->b_cont
= NULL
;
1646 if (!ip_recv_attr_from_mblk(attrmp
, &iras
)) {
1647 /* The ill or ip_stack_t disappeared on us */
1648 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1649 ip_drop_input("ip_recv_attr_from_mblk", mp
, ill
);
1651 ira_cleanup(&iras
, B_TRUE
);
1655 ASSERT(ill
== iras
.ira_rill
);
1657 ip_ndp_find_addresses(mp
, &iras
, ill
, &targ
, &haddr
, &haddrlen
);
1658 if (haddr
!= NULL
&& haddrlen
== ill
->ill_phys_addr_length
) {
1660 * Ignore conflicts generated by misbehaving switches that
1661 * just reflect our own messages back to us. For IPMP, we may
1662 * see reflections across any ill in the illgrp.
1664 * RFC2462 and revisions tried to detect both the case
1665 * when a statically configured IPv6 address is a duplicate,
1666 * and the case when the L2 address itself is a duplicate. The
1667 * later is important because, with stateles address autoconf,
1668 * if the L2 address is a duplicate, the resulting IPv6
1669 * address(es) would also be duplicates. We rely on DAD of the
1670 * IPv6 address itself to detect the latter case.
1672 /* For an under ill_grp can change under lock */
1673 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1674 if (bcmp(haddr
, ill
->ill_phys_addr
, haddrlen
) == 0 ||
1675 IS_UNDER_IPMP(ill
) &&
1676 ipmp_illgrp_find_ill(ill
->ill_grp
, haddr
,
1677 haddrlen
) != NULL
) {
1678 rw_exit(&ipst
->ips_ill_g_lock
);
1679 goto ignore_conflict
;
1681 rw_exit(&ipst
->ips_ill_g_lock
);
1685 * Look up the appropriate ipif.
1687 ipif
= ipif_lookup_addr_v6(&targ
, ill
, ALL_ZONES
, ipst
);
1689 goto ignore_conflict
;
1691 /* Reload the ill to match the ipif */
1692 ill
= ipif
->ipif_ill
;
1694 /* If it's already duplicate or ineligible, then don't do anything. */
1695 if (ipif
->ipif_flags
& (IPIF_POINTOPOINT
|IPIF_DUPLICATE
)) {
1697 goto ignore_conflict
;
1701 * If this is a failure during duplicate recovery, then don't
1702 * complain. It may take a long time to recover.
1704 if (!ipif
->ipif_was_dup
) {
1705 char ibuf
[LIFNAMSIZ
];
1706 char hbuf
[MAC_STR_LEN
];
1707 char sbuf
[INET6_ADDRSTRLEN
];
1709 ipif_get_name(ipif
, ibuf
, sizeof (ibuf
));
1710 cmn_err(CE_WARN
, "%s has duplicate address %s (in use by %s);"
1712 inet_ntop(AF_INET6
, &targ
, sbuf
, sizeof (sbuf
)),
1713 mac_colon_addr(haddr
, haddrlen
, hbuf
, sizeof (hbuf
)));
1715 mutex_enter(&ill
->ill_lock
);
1716 ASSERT(!(ipif
->ipif_flags
& IPIF_DUPLICATE
));
1717 ipif
->ipif_flags
|= IPIF_DUPLICATE
;
1718 ill
->ill_ipif_dup_count
++;
1719 mutex_exit(&ill
->ill_lock
);
1720 (void) ipif_down(ipif
, NULL
, NULL
);
1721 (void) ipif_down_tail(ipif
);
1722 mutex_enter(&ill
->ill_lock
);
1723 if (!(ipif
->ipif_flags
& (IPIF_DHCPRUNNING
|IPIF_TEMPORARY
)) &&
1724 ill
->ill_net_type
== IRE_IF_RESOLVER
&&
1725 !(ipif
->ipif_state_flags
& IPIF_CONDEMNED
) &&
1726 ipst
->ips_ip_dup_recovery
> 0) {
1727 ASSERT(ipif
->ipif_recovery_id
== 0);
1728 ipif
->ipif_recovery_id
= timeout(ipif_dup_recovery
,
1729 ipif
, MSEC_TO_TICK(ipst
->ips_ip_dup_recovery
));
1731 mutex_exit(&ill
->ill_lock
);
1736 ira_cleanup(&iras
, B_TRUE
);
1740 * Handle failure by tearing down the ipifs with the specified address. Note
1741 * that tearing down the ipif also means deleting the ncec through ipif_down, so
1742 * it's not possible to do recovery by just restarting the ncec timer. Instead,
1743 * we start a timer on the ipif.
1744 * Caller has to free mp;
1747 ndp_failure(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1749 const uchar_t
*haddr
;
1750 ill_t
*ill
= ira
->ira_rill
;
1753 * Ignore conflicts generated by misbehaving switches that just
1754 * reflect our own messages back to us.
1757 /* icmp_inbound_v6 ensures this */
1758 ASSERT(ira
->ira_flags
& IRAF_L2SRC_SET
);
1759 haddr
= ira
->ira_l2src
;
1760 if (haddr
!= NULL
&&
1761 bcmp(haddr
, ill
->ill_phys_addr
, ill
->ill_phys_addr_length
) == 0) {
1765 if ((mp
= copymsg(mp
)) != NULL
) {
1768 attrmp
= ip_recv_attr_to_mblk(ira
);
1769 if (attrmp
== NULL
) {
1770 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1771 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
1774 ASSERT(attrmp
->b_cont
== NULL
);
1775 attrmp
->b_cont
= mp
;
1778 qwriter_ip(ill
, ill
->ill_rq
, mp
, ip_ndp_excl
, NEW_OP
,
1785 * Handle a discovered conflict: some other system is advertising that it owns
1786 * one of our IP addresses. We need to defend ourselves, or just shut down the
1789 * Handles both IPv4 and IPv6
1792 ip_nce_conflict(mblk_t
*mp
, ip_recv_attr_t
*ira
, ncec_t
*ncec
)
1798 ill_t
*ill
= ira
->ira_ill
;
1799 ip_stack_t
*ipst
= ill
->ill_ipst
;
1801 boolean_t isv6
= ill
->ill_isv6
;
1805 ipif
= ipif_lookup_addr_v6(&ncec
->ncec_addr
, ill
, ALL_ZONES
,
1808 if (arp_no_defense
) {
1810 * Yes, there is a conflict, but no, we do not
1815 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, ncec_addr
);
1816 ipif
= ipif_lookup_addr(ncec_addr
, ill
, ALL_ZONES
,
1823 * First, figure out if this address is disposable.
1825 if (ipif
->ipif_flags
& (IPIF_DHCPRUNNING
| IPIF_TEMPORARY
))
1826 maxdefense
= ipst
->ips_ip_max_temp_defend
;
1828 maxdefense
= ipst
->ips_ip_max_defend
;
1831 * Now figure out how many times we've defended ourselves. Ignore
1832 * defenses that happened long in the past.
1834 now
= ddi_get_lbolt();
1835 elapsed
= (drv_hztousec(now
- ncec
->ncec_last_time_defended
))/1000000;
1836 mutex_enter(&ncec
->ncec_lock
);
1837 if ((defs
= ncec
->ncec_defense_count
) > 0 &&
1838 elapsed
> ipst
->ips_ip_defend_interval
) {
1840 * ip_defend_interval has elapsed.
1841 * reset the defense count.
1843 ncec
->ncec_defense_count
= defs
= 0;
1845 ncec
->ncec_defense_count
++;
1846 ncec
->ncec_last_time_defended
= now
;
1847 mutex_exit(&ncec
->ncec_lock
);
1851 * If we've defended ourselves too many times already, then give up and
1852 * tear down the interface(s) using this address.
1853 * Otherwise, caller has to defend by sending out an announce.
1855 if (defs
>= maxdefense
) {
1857 ndp_failure(mp
, ira
);
1859 arp_failure(mp
, ira
);
1861 return (B_TRUE
); /* caller must defend this address */
1867 * Handle reception of Neighbor Solicitation messages.
1870 ndp_input_solicit(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1872 ill_t
*ill
= ira
->ira_ill
, *under_ill
;
1873 nd_neighbor_solicit_t
*ns
;
1874 uint32_t hlen
= ill
->ill_phys_addr_length
;
1875 uchar_t
*haddr
= NULL
;
1878 ncec_t
*our_ncec
= NULL
;
1883 nd_opt_hdr_t
*opt
= NULL
;
1884 boolean_t bad_solicit
= B_FALSE
;
1885 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
1886 boolean_t need_ill_refrele
= B_FALSE
;
1888 ip6h
= (ip6_t
*)mp
->b_rptr
;
1889 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
1890 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
1891 src
= ip6h
->ip6_src
;
1892 ns
= (nd_neighbor_solicit_t
*)icmp_nd
;
1893 target
= ns
->nd_ns_target
;
1894 if (IN6_IS_ADDR_MULTICAST(&target
) || IN6_IS_ADDR_V4MAPPED(&target
) ||
1895 IN6_IS_ADDR_LOOPBACK(&target
)) {
1898 pr_addr_dbg("ndp_input_solicit: Martian Target %s\n",
1901 bad_solicit
= B_TRUE
;
1904 if (len
> sizeof (nd_neighbor_solicit_t
)) {
1905 /* Options present */
1906 opt
= (nd_opt_hdr_t
*)&ns
[1];
1907 len
-= sizeof (nd_neighbor_solicit_t
);
1908 if (!ndp_verify_optlen(opt
, len
)) {
1909 ip1dbg(("ndp_input_solicit: Bad opt len\n"));
1910 bad_solicit
= B_TRUE
;
1914 if (IN6_IS_ADDR_UNSPECIFIED(&src
)) {
1915 /* Check to see if this is a valid DAD solicitation */
1916 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h
->ip6_dst
)) {
1919 pr_addr_dbg("ndp_input_solicit: IPv6 "
1920 "Destination is not solicited node "
1921 "multicast %s\n", AF_INET6
,
1924 bad_solicit
= B_TRUE
;
1930 * NOTE: with IPMP, it's possible the nominated multicast ill (which
1931 * received this packet if it's multicast) is not the ill tied to
1932 * e.g. the IPMP ill's data link-local. So we match across the illgrp
1933 * to ensure we find the associated NCE.
1935 our_ncec
= ncec_lookup_illgrp_v6(ill
, &target
);
1937 * If this is a valid Solicitation for an address we are publishing,
1938 * then a PUBLISH entry should exist in the cache
1940 if (our_ncec
== NULL
|| !NCE_PUBLISH(our_ncec
)) {
1941 ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
1942 "ifname=%s ", ill
->ill_name
));
1945 pr_addr_dbg(" dst %s\n", AF_INET6
, &target
);
1947 if (our_ncec
== NULL
)
1948 bad_solicit
= B_TRUE
;
1952 /* At this point we should have a verified NS per spec */
1954 opt
= ndp_get_option(opt
, len
, ND_OPT_SOURCE_LINKADDR
);
1956 haddr
= (uchar_t
*)&opt
[1];
1957 if (hlen
> opt
->nd_opt_len
* 8 - sizeof (*opt
) ||
1959 ip1dbg(("ndp_input_advert: bad SLLA\n"));
1960 bad_solicit
= B_TRUE
;
1966 /* If sending directly to peer, set the unicast flag */
1967 if (!IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
))
1968 flag
|= NDP_UNICAST
;
1971 * Create/update the entry for the soliciting node on the ipmp_ill.
1972 * or respond to outstanding queries, don't if
1973 * the source is unspecified address.
1975 if (!IN6_IS_ADDR_UNSPECIFIED(&src
)) {
1979 ASSERT(ill
->ill_isv6
);
1981 * Regular solicitations *must* include the Source Link-Layer
1982 * Address option. Ignore messages that do not.
1984 if (haddr
== NULL
&& IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
)) {
1985 ip1dbg(("ndp_input_solicit: source link-layer address "
1986 "option missing with a specified source.\n"));
1987 bad_solicit
= B_TRUE
;
1992 * This is a regular solicitation. If we're still in the
1993 * process of verifying the address, then don't respond at all
1994 * and don't keep track of the sender.
1996 if (our_ncec
->ncec_state
== ND_PROBE
)
2000 * If the solicitation doesn't have sender hardware address
2001 * (legal for unicast solicitation), then process without
2002 * installing the return NCE. Either we already know it, or
2003 * we'll be forced to look it up when (and if) we reply to the
2010 if (IS_UNDER_IPMP(under_ill
)) {
2011 ill
= ipmp_ill_hold_ipmp_ill(under_ill
);
2015 need_ill_refrele
= B_TRUE
;
2017 err
= nce_lookup_then_add_v6(ill
,
2019 &src
, /* Soliciting nodes address */
2024 if (need_ill_refrele
) {
2027 need_ill_refrele
= B_FALSE
;
2031 /* done with this entry */
2036 * B_FALSE indicates this is not an an advertisement.
2038 nce_process(nnce
->nce_common
, haddr
, 0, B_FALSE
);
2042 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
2047 flag
|= NDP_SOLICITED
;
2050 * No source link layer address option should be present in a
2051 * valid DAD request.
2053 if (haddr
!= NULL
) {
2054 ip1dbg(("ndp_input_solicit: source link-layer address "
2055 "option present with an unspecified source.\n"));
2056 bad_solicit
= B_TRUE
;
2059 if (our_ncec
->ncec_state
== ND_PROBE
) {
2061 * Internally looped-back probes will have
2062 * IRAF_L2SRC_LOOPBACK set so we can ignore our own
2065 if (!(ira
->ira_flags
& IRAF_L2SRC_LOOPBACK
)) {
2067 * If someone else is probing our address, then
2068 * we've crossed wires. Declare failure.
2070 ndp_failure(mp
, ira
);
2075 * This is a DAD probe. Multicast the advertisement to the
2076 * all-nodes address.
2078 src
= ipv6_all_hosts_mcast
;
2080 flag
|= nce_advert_flags(our_ncec
);
2081 (void) ndp_xmit(ill
,
2083 our_ncec
->ncec_lladdr
,
2084 our_ncec
->ncec_lladdr_length
,
2085 &target
, /* Source and target of the advertisement pkt */
2086 &src
, /* IP Destination (source of original pkt) */
2090 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborSolicitations
);
2091 if (our_ncec
!= NULL
)
2092 ncec_refrele(our_ncec
);
2096 * Handle reception of Neighbor Solicitation messages
2099 ndp_input_advert(mblk_t
*mp
, ip_recv_attr_t
*ira
)
2101 ill_t
*ill
= ira
->ira_ill
;
2102 nd_neighbor_advert_t
*na
;
2103 uint32_t hlen
= ill
->ill_phys_addr_length
;
2104 uchar_t
*haddr
= NULL
;
2107 ncec_t
*dst_ncec
= NULL
;
2109 nd_opt_hdr_t
*opt
= NULL
;
2111 ip_stack_t
*ipst
= ill
->ill_ipst
;
2112 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
2114 ip6h
= (ip6_t
*)mp
->b_rptr
;
2115 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
2116 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
2117 na
= (nd_neighbor_advert_t
*)icmp_nd
;
2119 if (IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
) &&
2120 (na
->nd_na_flags_reserved
& ND_NA_FLAG_SOLICITED
)) {
2121 ip1dbg(("ndp_input_advert: Target is multicast but the "
2122 "solicited flag is not zero\n"));
2123 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
2126 target
= na
->nd_na_target
;
2127 if (IN6_IS_ADDR_MULTICAST(&target
) || IN6_IS_ADDR_V4MAPPED(&target
) ||
2128 IN6_IS_ADDR_LOOPBACK(&target
)) {
2131 pr_addr_dbg("ndp_input_solicit: Martian Target %s\n",
2134 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
2137 if (len
> sizeof (nd_neighbor_advert_t
)) {
2138 opt
= (nd_opt_hdr_t
*)&na
[1];
2139 if (!ndp_verify_optlen(opt
,
2140 len
- sizeof (nd_neighbor_advert_t
))) {
2141 ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
2142 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
2145 /* At this point we have a verified NA per spec */
2146 len
-= sizeof (nd_neighbor_advert_t
);
2147 opt
= ndp_get_option(opt
, len
, ND_OPT_TARGET_LINKADDR
);
2149 haddr
= (uchar_t
*)&opt
[1];
2150 if (hlen
> opt
->nd_opt_len
* 8 - sizeof (*opt
) ||
2152 ip1dbg(("ndp_input_advert: bad SLLA\n"));
2154 ipv6IfIcmpInBadNeighborAdvertisements
);
2161 * NOTE: we match across the illgrp since we need to do DAD for all of
2162 * our local addresses, and those are spread across all the active
2163 * ills in the group.
2165 if ((dst_ncec
= ncec_lookup_illgrp_v6(ill
, &target
)) == NULL
)
2168 if (NCE_PUBLISH(dst_ncec
)) {
2170 * Someone just advertised an addresses that we publish. First,
2171 * check it it was us -- if so, we can safely ignore it.
2172 * We don't get the haddr from the ira_l2src because, in the
2173 * case that the packet originated from us, on an IPMP group,
2174 * the ira_l2src may would be the link-layer address of the
2175 * cast_ill used to send the packet, which may not be the same
2176 * as the dst_ncec->ncec_lladdr of the address.
2178 if (haddr
!= NULL
) {
2179 if (ira
->ira_flags
& IRAF_L2SRC_LOOPBACK
)
2182 if (!nce_cmp_ll_addr(dst_ncec
, haddr
, hlen
))
2183 goto out
; /* from us -- no conflict */
2186 * If we're in an IPMP group, check if this is an echo
2187 * from another ill in the group. Use the double-
2188 * checked locking pattern to avoid grabbing
2189 * ill_g_lock in the non-IPMP case.
2191 if (IS_UNDER_IPMP(ill
)) {
2192 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
2193 if (IS_UNDER_IPMP(ill
) && ipmp_illgrp_find_ill(
2194 ill
->ill_grp
, haddr
, hlen
) != NULL
) {
2195 rw_exit(&ipst
->ips_ill_g_lock
);
2198 rw_exit(&ipst
->ips_ill_g_lock
);
2203 * This appears to be a real conflict. If we're trying to
2204 * configure this NCE (ND_PROBE), then shut it down.
2205 * Otherwise, handle the discovered conflict.
2207 if (dst_ncec
->ncec_state
== ND_PROBE
) {
2208 ndp_failure(mp
, ira
);
2210 if (ip_nce_conflict(mp
, ira
, dst_ncec
)) {
2211 char hbuf
[MAC_STR_LEN
];
2212 char sbuf
[INET6_ADDRSTRLEN
];
2215 "node '%s' is using %s on %s",
2216 inet_ntop(AF_INET6
, &target
, sbuf
,
2218 haddr
== NULL
? "<none>" :
2219 mac_colon_addr(haddr
, hlen
, hbuf
,
2220 sizeof (hbuf
)), ill
->ill_name
);
2222 * RFC 4862, Section 5.4.4 does not mandate
2223 * any specific behavior when an NA matches
2224 * a non-tentative address assigned to the
2225 * receiver. We make the choice of defending
2226 * our address, based on the assumption that
2227 * the sender has not detected the Duplicate.
2229 * ncec_last_time_defended has been adjusted
2230 * in ip_nce_conflict()
2232 (void) ndp_announce(dst_ncec
);
2236 if (na
->nd_na_flags_reserved
& ND_NA_FLAG_ROUTER
)
2237 dst_ncec
->ncec_flags
|= NCE_F_ISROUTER
;
2239 /* B_TRUE indicates this an advertisement */
2240 nce_process(dst_ncec
, haddr
, na
->nd_na_flags_reserved
, B_TRUE
);
2243 ncec_refrele(dst_ncec
);
2247 * Process NDP neighbor solicitation/advertisement messages.
2248 * The checksum has already checked o.k before reaching here.
2249 * Information about the datalink header is contained in ira_l2src, but
2250 * that should be ignored for loopback packets.
2253 ndp_input(mblk_t
*mp
, ip_recv_attr_t
*ira
)
2255 ill_t
*ill
= ira
->ira_rill
;
2259 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
2260 ill_t
*orig_ill
= NULL
;
2263 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
2264 * and make it be the IPMP upper so avoid being confused by a packet
2265 * addressed to a unicast address on a different ill.
2267 if (IS_UNDER_IPMP(ill
)) {
2269 ill
= ipmp_ill_hold_ipmp_ill(orig_ill
);
2272 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2273 ip_drop_input("ipIfStatsInDiscards - IPMP ill",
2278 ASSERT(ill
!= orig_ill
);
2279 orig_ill
= ira
->ira_ill
;
2281 mib
= ill
->ill_icmp6_mib
;
2283 if (!pullupmsg(mp
, -1)) {
2284 ip1dbg(("ndp_input: pullupmsg failed\n"));
2285 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2286 ip_drop_input("ipIfStatsInDiscards - pullupmsg", mp
, ill
);
2289 ip6h
= (ip6_t
*)mp
->b_rptr
;
2290 if (ip6h
->ip6_hops
!= IPV6_MAX_HOPS
) {
2291 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
2292 ip_drop_input("ipv6IfIcmpBadHoplimit", mp
, ill
);
2293 BUMP_MIB(mib
, ipv6IfIcmpBadHoplimit
);
2297 * NDP does not accept any extension headers between the
2298 * IP header and the ICMP header since e.g. a routing
2299 * header could be dangerous.
2300 * This assumes that any AH or ESP headers are removed
2301 * by ip prior to passing the packet to ndp_input.
2303 if (ip6h
->ip6_nxt
!= IPPROTO_ICMPV6
) {
2304 ip1dbg(("ndp_input: Wrong next header 0x%x\n",
2306 ip_drop_input("Wrong next header", mp
, ill
);
2307 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2310 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
2311 ASSERT(icmp_nd
->icmp6_type
== ND_NEIGHBOR_SOLICIT
||
2312 icmp_nd
->icmp6_type
== ND_NEIGHBOR_ADVERT
);
2313 if (icmp_nd
->icmp6_code
!= 0) {
2314 ip1dbg(("ndp_input: icmp6 code != 0 \n"));
2315 ip_drop_input("code non-zero", mp
, ill
);
2316 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2319 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
2321 * Make sure packet length is large enough for either
2322 * a NS or a NA icmp packet.
2324 if (len
< sizeof (struct icmp6_hdr
) + sizeof (struct in6_addr
)) {
2325 ip1dbg(("ndp_input: packet too short\n"));
2326 ip_drop_input("packet too short", mp
, ill
);
2327 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2330 if (icmp_nd
->icmp6_type
== ND_NEIGHBOR_SOLICIT
) {
2331 ndp_input_solicit(mp
, ira
);
2333 ndp_input_advert(mp
, ira
);
2337 if (orig_ill
!= NULL
) {
2339 ira
->ira_ill
= orig_ill
;
2344 * ndp_xmit is called to form and transmit a ND solicitation or
2345 * advertisement ICMP packet.
2347 * If the source address is unspecified and this isn't a probe (used for
2348 * duplicate address detection), an appropriate source address and link layer
2349 * address will be chosen here. The link layer address option is included if
2350 * the source is specified (i.e., all non-probe packets), and omitted (per the
2351 * specification) otherwise.
2353 * It returns B_FALSE only if it does a successful put() to the
2354 * corresponding ill's ill_wq otherwise returns B_TRUE.
2357 ndp_xmit(ill_t
*ill
, uint32_t operation
, uint8_t *hw_addr
, uint_t hw_addr_len
,
2358 const in6_addr_t
*sender
, const in6_addr_t
*target
, int flag
)
2366 zoneid_t zoneid
= GLOBAL_ZONEID
;
2367 ill_t
*hwaddr_ill
= ill
;
2368 ip_xmit_attr_t ixas
;
2369 ip_stack_t
*ipst
= ill
->ill_ipst
;
2370 boolean_t need_refrele
= B_FALSE
;
2371 boolean_t probe
= B_FALSE
;
2373 if (IS_UNDER_IPMP(ill
)) {
2374 probe
= ipif_lookup_testaddr_v6(ill
, sender
, NULL
);
2376 * We send non-probe packets on the upper IPMP interface.
2377 * ip_output_simple() will use cast_ill for sending any
2378 * multicast packets. Note that we can't follow the same
2379 * logic for probe packets because all interfaces in the ipmp
2380 * group may have failed, so that we really want to only try
2381 * to send the ND packet on the ill corresponding to the src
2385 ill
= ipmp_ill_hold_ipmp_ill(ill
);
2387 need_refrele
= B_TRUE
;
2394 * If we have a unspecified source(sender) address, select a
2395 * proper source address for the solicitation here itself so
2396 * that we can initialize the h/w address correctly.
2398 * If the sender is specified then we use this address in order
2399 * to lookup the zoneid before calling ip_output_v6(). This is to
2400 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly
2401 * by IP (we cannot guarantee that the global zone has an interface
2402 * route to the destination).
2404 * Note that the NA never comes here with the unspecified source
2409 * Probes will have unspec src at this point.
2411 if (!(IN6_IS_ADDR_UNSPECIFIED(sender
))) {
2412 zoneid
= ipif_lookup_addr_zoneid_v6(sender
, ill
, ipst
);
2414 * It's possible for ipif_lookup_addr_zoneid_v6() to return
2415 * ALL_ZONES if it cannot find a matching ipif for the address
2416 * we are trying to use. In this case we err on the side of
2417 * trying to send the packet by defaulting to the GLOBAL_ZONEID.
2419 if (zoneid
== ALL_ZONES
)
2420 zoneid
= GLOBAL_ZONEID
;
2423 plen
= (sizeof (nd_opt_hdr_t
) + hw_addr_len
+ 7) / 8;
2424 len
= IPV6_HDR_LEN
+ sizeof (nd_neighbor_advert_t
) + plen
* 8;
2425 mp
= allocb(len
, BPRI_LO
);
2432 bzero((char *)mp
->b_rptr
, len
);
2433 mp
->b_wptr
= mp
->b_rptr
+ len
;
2435 bzero(&ixas
, sizeof (ixas
));
2436 ixas
.ixa_flags
= IXAF_SET_ULP_CKSUM
| IXAF_NO_HW_CKSUM
;
2438 ixas
.ixa_ifindex
= ill
->ill_phyint
->phyint_ifindex
;
2439 ixas
.ixa_ipst
= ipst
;
2440 ixas
.ixa_cred
= kcred
;
2441 ixas
.ixa_cpid
= NOPID
;
2442 ixas
.ixa_tsl
= NULL
;
2443 ixas
.ixa_zoneid
= zoneid
;
2445 ip6h
= (ip6_t
*)mp
->b_rptr
;
2446 ip6h
->ip6_vcf
= IPV6_DEFAULT_VERS_AND_FLOW
;
2447 ip6h
->ip6_plen
= htons(len
- IPV6_HDR_LEN
);
2448 ip6h
->ip6_nxt
= IPPROTO_ICMPV6
;
2449 ip6h
->ip6_hops
= IPV6_MAX_HOPS
;
2450 ixas
.ixa_multicast_ttl
= ip6h
->ip6_hops
;
2451 ip6h
->ip6_dst
= *target
;
2452 icmp6
= (icmp6_t
*)&ip6h
[1];
2454 if (hw_addr_len
!= 0) {
2455 opt
= (nd_opt_hdr_t
*)((uint8_t *)ip6h
+ IPV6_HDR_LEN
+
2456 sizeof (nd_neighbor_advert_t
));
2460 if (operation
== ND_NEIGHBOR_SOLICIT
) {
2461 nd_neighbor_solicit_t
*ns
= (nd_neighbor_solicit_t
*)icmp6
;
2463 if (opt
!= NULL
&& !(flag
& NDP_PROBE
)) {
2465 * Note that we don't send out SLLA for ND probes
2466 * per RFC 4862, even though we do send out the src
2467 * haddr for IPv4 DAD probes, even though both IPv4
2468 * and IPv6 go out with the unspecified/INADDR_ANY
2471 opt
->nd_opt_type
= ND_OPT_SOURCE_LINKADDR
;
2473 ip6h
->ip6_src
= *sender
;
2474 ns
->nd_ns_target
= *target
;
2475 if (!(flag
& NDP_UNICAST
)) {
2476 /* Form multicast address of the target */
2477 ip6h
->ip6_dst
= ipv6_solicited_node_mcast
;
2478 ip6h
->ip6_dst
.s6_addr32
[3] |=
2479 ns
->nd_ns_target
.s6_addr32
[3];
2482 nd_neighbor_advert_t
*na
= (nd_neighbor_advert_t
*)icmp6
;
2484 ASSERT(!(flag
& NDP_PROBE
));
2486 opt
->nd_opt_type
= ND_OPT_TARGET_LINKADDR
;
2487 ip6h
->ip6_src
= *sender
;
2488 na
->nd_na_target
= *sender
;
2489 if (flag
& NDP_ISROUTER
)
2490 na
->nd_na_flags_reserved
|= ND_NA_FLAG_ROUTER
;
2491 if (flag
& NDP_SOLICITED
)
2492 na
->nd_na_flags_reserved
|= ND_NA_FLAG_SOLICITED
;
2493 if (flag
& NDP_ORIDE
)
2494 na
->nd_na_flags_reserved
|= ND_NA_FLAG_OVERRIDE
;
2497 if (!(flag
& NDP_PROBE
)) {
2498 if (hw_addr
!= NULL
&& opt
!= NULL
) {
2499 /* Fill in link layer address and option len */
2500 opt
->nd_opt_len
= (uint8_t)plen
;
2501 bcopy(hw_addr
, &opt
[1], hw_addr_len
);
2504 if (opt
!= NULL
&& opt
->nd_opt_type
== 0) {
2505 /* If there's no link layer address option, then strip it. */
2507 mp
->b_wptr
= mp
->b_rptr
+ len
;
2508 ip6h
->ip6_plen
= htons(len
- IPV6_HDR_LEN
);
2511 icmp6
->icmp6_type
= (uint8_t)operation
;
2512 icmp6
->icmp6_code
= 0;
2514 * Prepare for checksum by putting icmp length in the icmp
2515 * checksum field. The checksum is calculated in ip_output.c.
2517 icmp6
->icmp6_cksum
= ip6h
->ip6_plen
;
2519 (void) ip_output_simple(mp
, &ixas
);
2527 * Used to set ND_UNREACHBLE before ncec_delete sets it NCE_F_CONDEMNED.
2528 * The datapath uses this as an indication that there
2529 * is a problem (as opposed to a NCE that was just
2530 * reclaimed due to lack of memory.
2531 * Note that static ARP entries never become unreachable.
2534 nce_make_unreachable(ncec_t
*ncec
)
2536 mutex_enter(&ncec
->ncec_lock
);
2537 ncec
->ncec_state
= ND_UNREACHABLE
;
2538 mutex_exit(&ncec
->ncec_lock
);
2542 * NCE retransmit timer. Common to IPv4 and IPv6.
2543 * This timer goes off when:
2544 * a. It is time to retransmit a resolution for resolver.
2545 * b. It is time to send reachability probes.
2548 nce_timer(void *arg
)
2551 ill_t
*ill
= ncec
->ncec_ill
, *src_ill
;
2552 char addrbuf
[INET6_ADDRSTRLEN
];
2553 boolean_t dropped
= B_FALSE
;
2554 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
2555 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
2556 in_addr_t sender4
= INADDR_ANY
;
2557 in6_addr_t sender6
= ipv6_all_zeros
;
2560 * The timer has to be cancelled by ncec_delete before doing the final
2561 * refrele. So the NCE is guaranteed to exist when the timer runs
2562 * until it clears the timeout_id. Before clearing the timeout_id
2563 * bump up the refcnt so that we can continue to use the ncec
2565 ASSERT(ncec
!= NULL
);
2566 mutex_enter(&ncec
->ncec_lock
);
2567 ncec_refhold_locked(ncec
);
2568 ncec
->ncec_timeout_id
= 0;
2569 mutex_exit(&ncec
->ncec_lock
);
2571 src_ill
= nce_resolve_src(ncec
, &sender6
);
2572 /* if we could not find a sender address, return */
2573 if (src_ill
== NULL
) {
2575 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, sender4
);
2576 ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET
,
2577 &sender4
, addrbuf
, sizeof (addrbuf
))));
2579 ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET6
,
2580 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2582 nce_restart_timer(ncec
, ill
->ill_reachable_retrans_time
);
2587 IN6_V4MAPPED_TO_IPADDR(&sender6
, sender4
);
2589 mutex_enter(&ncec
->ncec_lock
);
2591 * Check the reachability state.
2593 switch (ncec
->ncec_state
) {
2595 ASSERT(ncec
->ncec_lladdr
!= NULL
);
2596 ncec
->ncec_state
= ND_PROBE
;
2597 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
2599 mutex_exit(&ncec
->ncec_lock
);
2600 dropped
= ndp_xmit(src_ill
, ND_NEIGHBOR_SOLICIT
,
2601 src_ill
->ill_phys_addr
,
2602 src_ill
->ill_phys_addr_length
,
2603 &sender6
, &ncec
->ncec_addr
,
2606 dropped
= (arp_request(ncec
, sender4
, src_ill
) == 0);
2607 mutex_exit(&ncec
->ncec_lock
);
2610 mutex_enter(&ncec
->ncec_lock
);
2612 mutex_exit(&ncec
->ncec_lock
);
2616 pr_addr_dbg("nce_timer: state for %s changed "
2617 "to PROBE\n", AF_INET6
, &ncec
->ncec_addr
);
2619 nce_restart_timer(ncec
, ill
->ill_reachable_retrans_time
);
2622 /* must be retransmit timer */
2623 ASSERT(ncec
->ncec_pcnt
>= -1);
2624 if (ncec
->ncec_pcnt
> 0) {
2626 * As per RFC2461, the ncec gets deleted after
2627 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
2628 * Note that the first unicast solicitation is sent
2629 * during the DELAY state.
2631 ip2dbg(("nce_timer: pcount=%x dst %s\n",
2633 inet_ntop((isv6
? AF_INET6
: AF_INET
),
2634 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2635 if (NCE_PUBLISH(ncec
)) {
2636 mutex_exit(&ncec
->ncec_lock
);
2638 * send out a probe; note that src_ill
2639 * is ignored by nce_dad() for all
2640 * DAD message types other than IPv6
2643 nce_dad(ncec
, src_ill
, B_TRUE
);
2645 ASSERT(src_ill
!= NULL
);
2647 mutex_exit(&ncec
->ncec_lock
);
2648 dropped
= ndp_xmit(src_ill
,
2649 ND_NEIGHBOR_SOLICIT
,
2650 src_ill
->ill_phys_addr
,
2651 src_ill
->ill_phys_addr_length
,
2652 &sender6
, &ncec
->ncec_addr
,
2656 * since the nce is REACHABLE,
2657 * the ARP request will be sent out
2658 * as a link-layer unicast.
2660 dropped
= (arp_request(ncec
, sender4
,
2662 mutex_exit(&ncec
->ncec_lock
);
2665 mutex_enter(&ncec
->ncec_lock
);
2667 mutex_exit(&ncec
->ncec_lock
);
2669 nce_restart_timer(ncec
,
2670 ill
->ill_reachable_retrans_time
);
2672 } else if (ncec
->ncec_pcnt
< 0) {
2673 /* No hope, delete the ncec */
2674 /* Tell datapath it went bad */
2675 ncec
->ncec_state
= ND_UNREACHABLE
;
2676 mutex_exit(&ncec
->ncec_lock
);
2679 pr_addr_dbg("nce_timer: Delete NCE for"
2680 " dst %s\n", (isv6
? AF_INET6
: AF_INET
),
2683 /* if static ARP can't delete. */
2684 if ((ncec
->ncec_flags
& NCE_F_STATIC
) == 0)
2687 } else if (!NCE_PUBLISH(ncec
)) {
2689 * Probe count is 0 for a dynamic entry (one that we
2690 * ourselves are not publishing). We should never get
2691 * here if NONUD was requested, hence the ASSERT below.
2693 ASSERT((ncec
->ncec_flags
& NCE_F_NONUD
) == 0);
2694 ip2dbg(("nce_timer: pcount=%x dst %s\n",
2695 ncec
->ncec_pcnt
, inet_ntop(AF_INET6
,
2696 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2698 mutex_exit(&ncec
->ncec_lock
);
2699 /* Wait one interval before killing */
2700 nce_restart_timer(ncec
,
2701 ill
->ill_reachable_retrans_time
);
2702 } else if (ill
->ill_phyint
->phyint_flags
& PHYI_RUNNING
) {
2707 * We're done probing, and we can now declare this
2708 * address to be usable. Let IP know that it's ok to
2711 ncec
->ncec_state
= ND_REACHABLE
;
2712 ncec
->ncec_flags
&= ~NCE_F_UNVERIFIED
;
2713 mutex_exit(&ncec
->ncec_lock
);
2715 ipif
= ipif_lookup_addr_exact_v6(
2716 &ncec
->ncec_addr
, ill
, ipst
);
2718 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
,
2720 ipif
= ipif_lookup_addr_exact(ncec_addr
, ill
,
2724 if (ipif
->ipif_was_dup
) {
2725 char ibuf
[LIFNAMSIZ
];
2726 char sbuf
[INET6_ADDRSTRLEN
];
2728 ipif
->ipif_was_dup
= B_FALSE
;
2729 (void) inet_ntop(AF_INET6
,
2730 &ipif
->ipif_v6lcl_addr
,
2731 sbuf
, sizeof (sbuf
));
2732 ipif_get_name(ipif
, ibuf
,
2734 cmn_err(CE_NOTE
, "recovered address "
2735 "%s on %s", sbuf
, ibuf
);
2737 if ((ipif
->ipif_flags
& IPIF_UP
) &&
2738 !ipif
->ipif_addr_ready
)
2739 ipif_up_notify(ipif
);
2740 ipif
->ipif_addr_ready
= 1;
2743 if (!isv6
&& arp_no_defense
)
2745 /* Begin defending our new address */
2746 if (ncec
->ncec_unsolicit_count
> 0) {
2747 ncec
->ncec_unsolicit_count
--;
2749 dropped
= ndp_announce(ncec
);
2751 dropped
= arp_announce(ncec
);
2755 ncec
->ncec_unsolicit_count
++;
2757 ncec
->ncec_last_time_defended
=
2760 if (ncec
->ncec_unsolicit_count
> 0) {
2761 nce_restart_timer(ncec
,
2762 ANNOUNCE_INTERVAL(isv6
));
2763 } else if (DEFENSE_INTERVAL(isv6
) != 0) {
2764 nce_restart_timer(ncec
, DEFENSE_INTERVAL(isv6
));
2768 * This is an address we're probing to be our own, but
2769 * the ill is down. Wait until it comes back before
2770 * doing anything, but switch to reachable state so
2771 * that the restart will work.
2773 ncec
->ncec_state
= ND_REACHABLE
;
2774 mutex_exit(&ncec
->ncec_lock
);
2777 case ND_INCOMPLETE
: {
2778 mblk_t
*mp
, *nextmp
;
2782 * Per case (2) in the nce_queue_mp() comments, scan ncec_qd_mp
2783 * for any IPMP probe packets, and toss them. IPMP probe
2784 * packets will always be at the head of ncec_qd_mp, so that
2785 * we can stop at the first queued ND packet that is
2786 * not a probe packet.
2788 prevmpp
= &ncec
->ncec_qd_mp
;
2789 for (mp
= ncec
->ncec_qd_mp
; mp
!= NULL
; mp
= nextmp
) {
2790 nextmp
= mp
->b_next
;
2792 if (IS_UNDER_IPMP(ill
) && ncec
->ncec_nprobes
> 0) {
2794 ncec
->ncec_nprobes
--;
2797 prevmpp
= &mp
->b_next
;
2802 * Must be resolver's retransmit timer.
2804 mutex_exit(&ncec
->ncec_lock
);
2805 ip_ndp_resolve(ncec
);
2809 if (((ncec
->ncec_flags
& NCE_F_UNSOL_ADV
) &&
2810 ncec
->ncec_unsolicit_count
!= 0) ||
2811 (NCE_PUBLISH(ncec
) && DEFENSE_INTERVAL(isv6
) != 0)) {
2812 if (ncec
->ncec_unsolicit_count
> 0) {
2813 ncec
->ncec_unsolicit_count
--;
2814 mutex_exit(&ncec
->ncec_lock
);
2816 * When we get to zero announcements left,
2817 * switch to address defense
2820 boolean_t rate_limit
;
2822 mutex_exit(&ncec
->ncec_lock
);
2823 rate_limit
= ill_defend_rate_limit(ill
, ncec
);
2825 nce_restart_timer(ncec
,
2826 DEFENSE_INTERVAL(isv6
));
2831 dropped
= ndp_announce(ncec
);
2833 dropped
= arp_announce(ncec
);
2835 mutex_enter(&ncec
->ncec_lock
);
2837 ncec
->ncec_unsolicit_count
++;
2839 ncec
->ncec_last_time_defended
=
2842 mutex_exit(&ncec
->ncec_lock
);
2843 if (ncec
->ncec_unsolicit_count
!= 0) {
2844 nce_restart_timer(ncec
,
2845 ANNOUNCE_INTERVAL(isv6
));
2847 nce_restart_timer(ncec
, DEFENSE_INTERVAL(isv6
));
2850 mutex_exit(&ncec
->ncec_lock
);
2854 mutex_exit(&ncec
->ncec_lock
);
2859 ill_refrele(src_ill
);
2863 * Set a link layer address from the ll_addr passed in.
2864 * Copy SAP from ill.
2867 nce_set_ll(ncec_t
*ncec
, uchar_t
*ll_addr
)
2869 ill_t
*ill
= ncec
->ncec_ill
;
2871 ASSERT(ll_addr
!= NULL
);
2872 if (ill
->ill_phys_addr_length
> 0) {
2874 * The bcopy() below used to be called for the physical address
2875 * length rather than the link layer address length. For
2876 * ethernet and many other media, the phys_addr and lla are
2879 * The phys_addr and lla may not be the same for devices that
2880 * support DL_IPV6_LINK_LAYER_ADDR, though there are currently
2881 * no known instances of these.
2883 * For PPP or other interfaces with a zero length
2884 * physical address, don't do anything here.
2885 * The bcopy() with a zero phys_addr length was previously
2886 * a no-op for interfaces with a zero-length physical address.
2887 * Using the lla for them would change the way they operate.
2888 * Doing nothing in such cases preserves expected behavior.
2890 bcopy(ll_addr
, ncec
->ncec_lladdr
, ill
->ill_nd_lla_len
);
2895 nce_cmp_ll_addr(const ncec_t
*ncec
, const uchar_t
*ll_addr
,
2896 uint32_t ll_addr_len
)
2898 ASSERT(ncec
->ncec_lladdr
!= NULL
);
2899 if (ll_addr
== NULL
)
2901 if (bcmp(ll_addr
, ncec
->ncec_lladdr
, ll_addr_len
) != 0)
2907 * Updates the link layer address or the reachability state of
2908 * a cache entry. Reset probe counter if needed.
2911 nce_update(ncec_t
*ncec
, uint16_t new_state
, uchar_t
*new_ll_addr
)
2913 ill_t
*ill
= ncec
->ncec_ill
;
2914 boolean_t need_stop_timer
= B_FALSE
;
2915 boolean_t need_fastpath_update
= B_FALSE
;
2919 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
2921 * If this interface does not do NUD, there is no point
2922 * in allowing an update to the cache entry. Although
2923 * we will respond to NS.
2924 * The only time we accept an update for a resolver when
2925 * NUD is turned off is when it has just been created.
2926 * Non-Resolvers will always be created as REACHABLE.
2928 if (new_state
!= ND_UNCHANGED
) {
2929 if ((ncec
->ncec_flags
& NCE_F_NONUD
) &&
2930 (ncec
->ncec_state
!= ND_INCOMPLETE
))
2932 ASSERT((int16_t)new_state
>= ND_STATE_VALID_MIN
);
2933 ASSERT((int16_t)new_state
<= ND_STATE_VALID_MAX
);
2934 need_stop_timer
= B_TRUE
;
2935 if (new_state
== ND_REACHABLE
)
2936 ncec
->ncec_last
= TICK_TO_MSEC(ddi_get_lbolt64());
2938 /* We force NUD in this case */
2939 ncec
->ncec_last
= 0;
2941 ncec
->ncec_state
= new_state
;
2942 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
2943 ASSERT(ncec
->ncec_lladdr
!= NULL
|| new_state
== ND_INITIAL
||
2944 new_state
== ND_INCOMPLETE
);
2948 if (need_stop_timer
|| (ncec
->ncec_flags
& NCE_F_STATIC
)) {
2949 tid
= ncec
->ncec_timeout_id
;
2950 ncec
->ncec_timeout_id
= 0;
2953 * Re-trigger fastpath probe and
2954 * overwrite the DL_UNITDATA_REQ data, noting we'll lose
2955 * whatever packets that happens to be transmitting at the time.
2957 if (new_ll_addr
!= NULL
) {
2958 bcopy(new_ll_addr
, ncec
->ncec_lladdr
,
2959 ill
->ill_phys_addr_length
);
2960 need_fastpath_update
= B_TRUE
;
2962 mutex_exit(&ncec
->ncec_lock
);
2963 if (need_stop_timer
|| (ncec
->ncec_flags
& NCE_F_STATIC
)) {
2965 (void) untimeout(tid
);
2967 if (need_fastpath_update
) {
2969 * Delete any existing existing dlur_mp and fp_mp information.
2970 * For IPMP interfaces, all underlying ill's must be checked
2973 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
2975 * add the new dlur_mp and fp_mp
2977 nce
= nce_fastpath(ncec
, B_TRUE
, NULL
);
2981 mutex_enter(&ncec
->ncec_lock
);
2985 nce_queue_mp_common(ncec_t
*ncec
, mblk_t
*mp
, boolean_t head_insert
)
2990 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
2992 for (mpp
= &ncec
->ncec_qd_mp
; *mpp
!= NULL
; mpp
= &(*mpp
)->b_next
) {
2993 if (++count
> ncec
->ncec_ill
->ill_max_buf
) {
2994 tmp
= ncec
->ncec_qd_mp
->b_next
;
2995 ncec
->ncec_qd_mp
->b_next
= NULL
;
2997 * if we never create data addrs on the under_ill
3000 BUMP_MIB(ncec
->ncec_ill
->ill_ip_mib
,
3001 ipIfStatsOutDiscards
);
3002 ip_drop_output("ipIfStatsOutDiscards", ncec
->ncec_qd_mp
,
3004 freemsg(ncec
->ncec_qd_mp
);
3005 ncec
->ncec_qd_mp
= tmp
;
3010 ncec
->ncec_nprobes
++;
3011 mp
->b_next
= ncec
->ncec_qd_mp
;
3012 ncec
->ncec_qd_mp
= mp
;
3019 * nce_queue_mp will queue the packet into the ncec_qd_mp. The packet will be
3020 * queued at the head or tail of the queue based on the input argument
3021 * 'head_insert'. The caller should specify this argument as B_TRUE if this
3022 * packet is an IPMP probe packet, in which case the following happens:
3024 * 1. Insert it at the head of the ncec_qd_mp list. Consider the normal
3025 * (non-ipmp_probe) load-speading case where the source address of the ND
3026 * packet is not tied to ncec_ill. If the ill bound to the source address
3027 * cannot receive, the response to the ND packet will not be received.
3028 * However, if ND packets for ncec_ill's probes are queued behind that ND
3029 * packet, those probes will also fail to be sent, and thus in.mpathd will
3030 * erroneously conclude that ncec_ill has also failed.
3032 * 2. Drop the ipmp_probe packet in ndp_timer() if the ND did not succeed on
3033 * the first attempt. This ensures that ND problems do not manifest as
3036 * We achieve this by inserting ipmp_probe() packets at the head of the
3039 * The ncec for the probe target is created with ncec_ill set to the ipmp_ill,
3040 * but the caller needs to set head_insert to B_TRUE if this is a probe packet.
3043 nce_queue_mp(ncec_t
*ncec
, mblk_t
*mp
, boolean_t head_insert
)
3045 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3046 nce_queue_mp_common(ncec
, mp
, head_insert
);
3050 * Called when address resolution failed due to a timeout.
3051 * Send an ICMP unreachable in response to all queued packets.
3054 ndp_resolv_failed(ncec_t
*ncec
)
3056 mblk_t
*mp
, *nxt_mp
;
3057 char buf
[INET6_ADDRSTRLEN
];
3058 ill_t
*ill
= ncec
->ncec_ill
;
3059 ip_recv_attr_t iras
;
3061 bzero(&iras
, sizeof (iras
));
3064 * we are setting the ira_rill to the ipmp_ill (instead of
3065 * the actual ill on which the packet was received), but this
3066 * is ok because we don't actually need the real ira_rill.
3067 * to send the icmp unreachable to the sender.
3069 iras
.ira_ill
= iras
.ira_rill
= ill
;
3070 iras
.ira_ruifindex
= ill
->ill_phyint
->phyint_ifindex
;
3071 iras
.ira_rifindex
= iras
.ira_ruifindex
;
3073 ip1dbg(("ndp_resolv_failed: dst %s\n",
3074 inet_ntop(AF_INET6
, (char *)&ncec
->ncec_addr
, buf
, sizeof (buf
))));
3075 mutex_enter(&ncec
->ncec_lock
);
3076 mp
= ncec
->ncec_qd_mp
;
3077 ncec
->ncec_qd_mp
= NULL
;
3078 ncec
->ncec_nprobes
= 0;
3079 mutex_exit(&ncec
->ncec_lock
);
3080 while (mp
!= NULL
) {
3081 nxt_mp
= mp
->b_next
;
3084 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
3085 ip_drop_output("ipIfStatsOutDiscards - address unreachable",
3087 icmp_unreachable_v6(mp
,
3088 ICMP6_DST_UNREACH_ADDR
, B_FALSE
, &iras
);
3089 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
3092 ncec_cb_dispatch(ncec
); /* finish off waiting callbacks */
3096 * Handle the completion of NDP and ARP resolution.
3099 nce_resolv_ok(ncec_t
*ncec
)
3103 iaflags_t ixaflags
= IXAF_NO_TRACE
;
3105 ill_t
*ill
= ncec
->ncec_ill
;
3106 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
3107 ip_stack_t
*ipst
= ill
->ill_ipst
;
3109 if (IS_IPMP(ncec
->ncec_ill
)) {
3110 nce_resolv_ipmp_ok(ncec
);
3115 mutex_enter(&ncec
->ncec_lock
);
3116 ASSERT(ncec
->ncec_nprobes
== 0);
3117 mp
= ncec
->ncec_qd_mp
;
3118 ncec
->ncec_qd_mp
= NULL
;
3119 mutex_exit(&ncec
->ncec_lock
);
3121 while (mp
!= NULL
) {
3124 if (ill
->ill_isv6
) {
3125 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
3127 pkt_len
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
3129 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
3131 ixaflags
|= IXAF_IS_IPV4
;
3132 pkt_len
= ntohs(ipha
->ipha_length
);
3134 nxt_mp
= mp
->b_next
;
3137 * IXAF_NO_DEV_FLOW_CTL information for TCP packets is no
3138 * longer available, but it's ok to drop this flag because TCP
3139 * has its own flow-control in effect, so TCP packets
3140 * are not likely to get here when flow-control is in effect.
3142 mutex_enter(&ill
->ill_lock
);
3143 nce
= nce_lookup(ill
, &ncec
->ncec_addr
);
3144 mutex_exit(&ill
->ill_lock
);
3148 BUMP_MIB(&ipst
->ips_ip6_mib
,
3149 ipIfStatsOutDiscards
);
3151 BUMP_MIB(&ipst
->ips_ip_mib
,
3152 ipIfStatsOutDiscards
);
3154 ip_drop_output("ipIfStatsOutDiscards - no nce",
3159 * We don't know the zoneid, but
3160 * ip_xmit does not care since IXAF_NO_TRACE
3161 * is set. (We traced the packet the first
3162 * time through ip_xmit.)
3164 (void) ip_xmit(mp
, nce
, ixaflags
, pkt_len
, 0,
3165 ALL_ZONES
, 0, NULL
);
3171 ncec_cb_dispatch(ncec
); /* complete callbacks */
3175 * Called by SIOCSNDP* ioctl to add/change an ncec entry
3176 * and the corresponding attributes.
3177 * Disallow states other than ND_REACHABLE or ND_STALE.
3180 ndp_sioc_update(ill_t
*ill
, lif_nd_req_t
*lnr
)
3187 uint16_t new_flags
= 0;
3188 uint16_t old_flags
= 0;
3189 int inflags
= lnr
->lnr_flags
;
3190 ip_stack_t
*ipst
= ill
->ill_ipst
;
3191 boolean_t do_postprocess
= B_FALSE
;
3193 ASSERT(ill
->ill_isv6
);
3194 if ((lnr
->lnr_state_create
!= ND_REACHABLE
) &&
3195 (lnr
->lnr_state_create
!= ND_STALE
))
3198 sin6
= (sin6_t
*)&lnr
->lnr_addr
;
3199 addr
= &sin6
->sin6_addr
;
3201 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
3202 ASSERT(!IS_UNDER_IPMP(ill
));
3203 nce
= nce_lookup_addr(ill
, addr
);
3205 new_flags
= nce
->nce_common
->ncec_flags
;
3207 switch (inflags
& (NDF_ISROUTER_ON
|NDF_ISROUTER_OFF
)) {
3208 case NDF_ISROUTER_ON
:
3209 new_flags
|= NCE_F_ISROUTER
;
3211 case NDF_ISROUTER_OFF
:
3212 new_flags
&= ~NCE_F_ISROUTER
;
3214 case (NDF_ISROUTER_OFF
|NDF_ISROUTER_ON
):
3215 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3220 if (inflags
& NDF_STATIC
)
3221 new_flags
|= NCE_F_STATIC
;
3223 switch (inflags
& (NDF_ANYCAST_ON
|NDF_ANYCAST_OFF
)) {
3224 case NDF_ANYCAST_ON
:
3225 new_flags
|= NCE_F_ANYCAST
;
3227 case NDF_ANYCAST_OFF
:
3228 new_flags
&= ~NCE_F_ANYCAST
;
3230 case (NDF_ANYCAST_OFF
|NDF_ANYCAST_ON
):
3231 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3238 err
= nce_add_v6(ill
,
3239 (uchar_t
*)lnr
->lnr_hdw_addr
,
3240 ill
->ill_phys_addr_length
,
3243 lnr
->lnr_state_create
,
3246 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3247 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err
));
3250 do_postprocess
= B_TRUE
;
3253 ncec
= nce
->nce_common
;
3254 old_flags
= ncec
->ncec_flags
;
3255 if (old_flags
& NCE_F_ISROUTER
&& !(new_flags
& NCE_F_ISROUTER
)) {
3256 ncec_router_to_host(ncec
);
3257 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3259 err
= nce_add_v6_postprocess(nce
);
3263 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3266 err
= nce_add_v6_postprocess(nce
);
3268 * err cannot be anything other than 0 because we don't support
3269 * proxy arp of static addresses.
3273 mutex_enter(&ncec
->ncec_lock
);
3274 ncec
->ncec_flags
= new_flags
;
3275 mutex_exit(&ncec
->ncec_lock
);
3277 * Note that we ignore the state at this point, which
3278 * should be either STALE or REACHABLE. Instead we let
3279 * the link layer address passed in to determine the state
3280 * much like incoming packets.
3282 nce_process(ncec
, (uchar_t
*)lnr
->lnr_hdw_addr
, 0, B_FALSE
);
3288 * Create an nce_t structure for ill using the ncec->ncec_lladdr to set up
3289 * the nce_dlur_mp. If ill != ncec->ncec_ill, then the ips_ill_g_lock must
3290 * be held to ensure that they are in the same group.
3293 nce_fastpath_create(ill_t
*ill
, ncec_t
*ncec
)
3298 nce
= nce_ill_lookup_then_add(ill
, ncec
);
3300 if (nce
== NULL
|| IS_LOOPBACK(nce
->nce_ill
) || IS_VNI(nce
->nce_ill
))
3304 * hold the ncec_lock to synchronize with nce_update() so that,
3305 * at the end of this function, the contents of nce_dlur_mp are
3306 * consistent with ncec->ncec_lladdr, even though some intermediate
3307 * packet may have been sent out with a mangled address, which would
3308 * only be a transient condition.
3310 mutex_enter(&ncec
->ncec_lock
);
3311 if (ncec
->ncec_lladdr
!= NULL
) {
3312 bcopy(ncec
->ncec_lladdr
, nce
->nce_dlur_mp
->b_rptr
+
3313 NCE_LL_ADDR_OFFSET(ill
), ill
->ill_phys_addr_length
);
3315 nce
->nce_dlur_mp
= ill_dlur_gen(NULL
, 0, ill
->ill_sap
,
3316 ill
->ill_sap_length
);
3318 mutex_exit(&ncec
->ncec_lock
);
3323 * we make nce_fp_mp to have an M_DATA prepend.
3324 * The caller ensures there is hold on ncec for this function.
3325 * Note that since ill_fastpath_probe() copies the mblk there is
3326 * no need to hold the nce or ncec beyond this function.
3328 * If the caller has passed in a non-null ncec_nce to nce_fastpath() that
3329 * ncec_nce must correspond to the nce for ncec with nce_ill == ncec->ncec_ill
3330 * and will be returned back by this function, so that no extra nce_refrele
3331 * is required for the caller. The calls from nce_add_common() use this
3332 * method. All other callers (that pass in NULL ncec_nce) will have to do a
3333 * nce_refrele of the returned nce (when it is non-null).
3336 nce_fastpath(ncec_t
*ncec
, boolean_t trigger_fp_req
, nce_t
*ncec_nce
)
3339 ill_t
*ill
= ncec
->ncec_ill
;
3341 ASSERT(ill
!= NULL
);
3343 if (IS_IPMP(ill
) && trigger_fp_req
) {
3344 trigger_fp_req
= B_FALSE
;
3345 ipmp_ncec_refresh_nce(ncec
);
3349 * If the caller already has the nce corresponding to the ill, use
3350 * that one. Otherwise we have to lookup/add the nce. Calls from
3351 * nce_add_common() fall in the former category, and have just done
3352 * the nce lookup/add that can be reused.
3354 if (ncec_nce
== NULL
)
3355 nce
= nce_fastpath_create(ill
, ncec
);
3359 if (nce
== NULL
|| IS_LOOPBACK(nce
->nce_ill
) || IS_VNI(nce
->nce_ill
))
3363 nce_fastpath_trigger(nce
);
3368 * Trigger fastpath on nce. No locks may be held.
3371 nce_fastpath_trigger(nce_t
*nce
)
3374 ill_t
*ill
= nce
->nce_ill
;
3375 ncec_t
*ncec
= nce
->nce_common
;
3377 res
= ill_fastpath_probe(ill
, nce
->nce_dlur_mp
);
3379 * EAGAIN is an indication of a transient error
3380 * i.e. allocation failure etc. leave the ncec in the list it
3381 * will be updated when another probe happens for another ire
3382 * if not it will be taken out of the list when the ire is
3385 if (res
!= 0 && res
!= EAGAIN
&& res
!= ENOTSUP
)
3386 nce_fastpath_list_delete(ill
, ncec
, NULL
);
3390 * Add ncec to the nce fastpath list on ill.
3393 nce_ill_lookup_then_add_locked(ill_t
*ill
, ncec_t
*ncec
, list_t
*graveyard
)
3397 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
3399 * Atomically ensure that the ill is not CONDEMNED and is not going
3400 * down, before adding the NCE.
3402 if (ill
->ill_state_flags
& ILL_CONDEMNED
)
3404 mutex_enter(&ncec
->ncec_lock
);
3406 * if ncec has not been deleted and
3407 * is not already in the list add it.
3409 if (!NCE_ISCONDEMNED(ncec
)) {
3410 nce
= nce_lookup(ill
, &ncec
->ncec_addr
);
3413 nce
= nce_add(ill
, ncec
, graveyard
);
3416 mutex_exit(&ncec
->ncec_lock
);
3421 nce_ill_lookup_then_add(ill_t
*ill
, ncec_t
*ncec
)
3426 list_create(&graveyard
, sizeof (nce_t
), offsetof(nce_t
, nce_node
));
3427 mutex_enter(&ill
->ill_lock
);
3428 nce
= nce_ill_lookup_then_add_locked(ill
, ncec
, &graveyard
);
3429 mutex_exit(&ill
->ill_lock
);
3430 nce_graveyard_free(&graveyard
);
3436 * remove ncec from the ill_nce list. If 'dead' is non-null, the deleted
3437 * nce is added to the 'dead' list, and the caller must nce_refrele() the
3438 * entry after all locks have been dropped.
3441 nce_fastpath_list_delete(ill_t
*ill
, ncec_t
*ncec
, list_t
*dead
)
3445 ASSERT(ill
!= NULL
);
3447 /* delete any nces referencing the ncec from underlying ills */
3449 ipmp_ncec_delete_nce(ncec
);
3451 /* now the ill itself */
3452 mutex_enter(&ill
->ill_lock
);
3453 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
;
3454 nce
= list_next(&ill
->ill_nce
, nce
)) {
3455 if (nce
->nce_common
== ncec
) {
3461 mutex_exit(&ill
->ill_lock
);
3466 list_insert_tail(dead
, nce
);
3471 * when the fastpath response does not fit in the datab
3472 * associated with the existing nce_fp_mp, we delete and
3473 * add the nce to retrigger fastpath based on the information
3477 nce_delete_then_add(nce_t
*nce
)
3479 ill_t
*ill
= nce
->nce_ill
;
3480 nce_t
*newnce
= NULL
;
3483 list_create(&graveyard
, sizeof (nce_t
), offsetof(nce_t
, nce_node
));
3484 ip0dbg(("nce_delete_then_add nce %p ill %s\n",
3485 (void *)nce
, ill
->ill_name
));
3486 mutex_enter(&ill
->ill_lock
);
3487 mutex_enter(&nce
->nce_common
->ncec_lock
);
3490 * Make sure that ncec is not condemned before adding. We hold the
3491 * ill_lock and ncec_lock to synchronize with ncec_delete() and
3492 * ipmp_ncec_delete_nce()
3494 if (!NCE_ISCONDEMNED(nce
->nce_common
))
3495 newnce
= nce_add(ill
, nce
->nce_common
, &graveyard
);
3496 mutex_exit(&nce
->nce_common
->ncec_lock
);
3497 mutex_exit(&ill
->ill_lock
);
3498 nce_graveyard_free(&graveyard
);
3500 return (newnce
); /* could be null if nomem */
3503 typedef struct nce_fp_match_s
{
3504 nce_t
*nce_fp_match_res
;
3505 mblk_t
*nce_fp_match_ack_mp
;
3510 nce_fastpath_match_dlur(ill_t
*ill
, nce_t
*nce
, void *arg
)
3512 nce_fp_match_t
*nce_fp_marg
= arg
;
3513 ncec_t
*ncec
= nce
->nce_common
;
3514 mblk_t
*mp
= nce_fp_marg
->nce_fp_match_ack_mp
;
3515 uchar_t
*mp_rptr
, *ud_mp_rptr
;
3516 mblk_t
*ud_mp
= nce
->nce_dlur_mp
;
3520 * mp is the mp associated with the fastpath ack.
3521 * ud_mp is the outstanding DL_UNITDATA_REQ on the nce_t
3522 * under consideration. If the contents match, then the
3523 * fastpath ack is used to update the nce.
3527 mp_rptr
= mp
->b_rptr
;
3528 cmplen
= mp
->b_wptr
- mp_rptr
;
3529 ASSERT(cmplen
>= 0);
3531 ud_mp_rptr
= ud_mp
->b_rptr
;
3533 * The ncec is locked here to prevent any other threads from accessing
3534 * and changing nce_dlur_mp when the address becomes resolved to an
3535 * lla while we're in the middle of looking at and comparing the
3536 * hardware address (lla). It is also locked to prevent multiple
3537 * threads in nce_fastpath() from examining nce_dlur_mp at the same
3540 mutex_enter(&ncec
->ncec_lock
);
3541 if (ud_mp
->b_wptr
- ud_mp_rptr
!= cmplen
||
3542 bcmp((char *)mp_rptr
, (char *)ud_mp_rptr
, cmplen
) == 0) {
3543 nce_fp_marg
->nce_fp_match_res
= nce
;
3544 mutex_exit(&ncec
->ncec_lock
);
3548 mutex_exit(&ncec
->ncec_lock
);
3553 * Update all NCE's that are not in fastpath mode and
3554 * have an nce_fp_mp that matches mp. mp->b_cont contains
3555 * the fastpath header.
3557 * Returns TRUE if entry should be dequeued, or FALSE otherwise.
3560 nce_fastpath_update(ill_t
*ill
, mblk_t
*mp
)
3562 nce_fp_match_t nce_fp_marg
;
3564 mblk_t
*nce_fp_mp
, *fp_mp
;
3566 nce_fp_marg
.nce_fp_match_res
= NULL
;
3567 nce_fp_marg
.nce_fp_match_ack_mp
= mp
;
3569 nce_walk(ill
, nce_fastpath_match_dlur
, &nce_fp_marg
);
3571 if ((nce
= nce_fp_marg
.nce_fp_match_res
) == NULL
)
3574 mutex_enter(&nce
->nce_lock
);
3575 nce_fp_mp
= nce
->nce_fp_mp
;
3577 if (nce_fp_mp
!= NULL
) {
3579 if (nce_fp_mp
->b_rptr
+ MBLKL(fp_mp
) >
3580 nce_fp_mp
->b_datap
->db_lim
) {
3581 mutex_exit(&nce
->nce_lock
);
3582 nce
= nce_delete_then_add(nce
);
3586 mutex_enter(&nce
->nce_lock
);
3587 nce_fp_mp
= nce
->nce_fp_mp
;
3591 /* Matched - install mp as the fastpath mp */
3592 if (nce_fp_mp
== NULL
) {
3593 fp_mp
= dupb(mp
->b_cont
);
3594 nce
->nce_fp_mp
= fp_mp
;
3597 bcopy(fp_mp
->b_rptr
, nce_fp_mp
->b_rptr
, MBLKL(fp_mp
));
3598 nce
->nce_fp_mp
->b_wptr
= nce
->nce_fp_mp
->b_rptr
3601 mutex_exit(&nce
->nce_lock
);
3606 * Return a pointer to a given option in the packet.
3607 * Assumes that option part of the packet have already been validated.
3610 ndp_get_option(nd_opt_hdr_t
*opt
, int optlen
, int opt_type
)
3612 while (optlen
> 0) {
3613 if (opt
->nd_opt_type
== opt_type
)
3615 optlen
-= 8 * opt
->nd_opt_len
;
3616 opt
= (struct nd_opt_hdr
*)((char *)opt
+ 8 * opt
->nd_opt_len
);
3622 * Verify all option lengths present are > 0, also check to see
3623 * if the option lengths and packet length are consistent.
3626 ndp_verify_optlen(nd_opt_hdr_t
*opt
, int optlen
)
3628 ASSERT(opt
!= NULL
);
3629 while (optlen
> 0) {
3630 if (opt
->nd_opt_len
== 0)
3632 optlen
-= 8 * opt
->nd_opt_len
;
3635 opt
= (struct nd_opt_hdr
*)((char *)opt
+ 8 * opt
->nd_opt_len
);
3641 * ncec_walk function.
3642 * Free a fraction of the NCE cache entries.
3644 * A possible optimization here would be to use ncec_last where possible, and
3645 * delete the least-frequently used entry, which would require more complex
3646 * computation as we walk through the ncec's (e.g., track ncec entries by
3647 * order of ncec_last and/or maintain state)
3650 ncec_cache_reclaim(ncec_t
*ncec
, void *arg
)
3652 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
3653 uint_t fraction
= *(uint_t
*)arg
;
3656 if ((ncec
->ncec_flags
&
3657 (NCE_F_MYADDR
| NCE_F_STATIC
| NCE_F_BCAST
)) != 0) {
3661 rand
= (uint_t
)ddi_get_lbolt() +
3662 NCE_ADDR_HASH_V6(ncec
->ncec_addr
, NCE_TABLE_SIZE
);
3663 if ((rand
/fraction
)*fraction
== rand
) {
3664 IP_STAT(ipst
, ip_nce_reclaim_deleted
);
3670 * kmem_cache callback to free up memory.
3672 * For now we just delete a fixed fraction.
3675 ip_nce_reclaim_stack(ip_stack_t
*ipst
)
3677 uint_t fraction
= ipst
->ips_ip_nce_reclaim_fraction
;
3679 IP_STAT(ipst
, ip_nce_reclaim_calls
);
3681 ncec_walk(NULL
, ncec_cache_reclaim
, &fraction
, ipst
);
3684 * Walk all CONNs that can have a reference on an ire, ncec or dce.
3685 * Get them to update any stale references to drop any refholds they
3688 ipcl_walk(conn_ixa_cleanup
, (void *)B_FALSE
, ipst
);
3692 * Called by the memory allocator subsystem directly, when the system
3693 * is running low on memory.
3697 ip_nce_reclaim(void *args
)
3699 netstack_handle_t nh
;
3703 netstack_next_init(&nh
);
3704 while ((ns
= netstack_next(&nh
)) != NULL
) {
3706 * netstack_next() can return a netstack_t with a NULL
3707 * netstack_ip at boot time.
3709 if ((ipst
= ns
->netstack_ip
) == NULL
) {
3713 ip_nce_reclaim_stack(ipst
);
3716 netstack_next_fini(&nh
);
3721 ncec_trace_ref(ncec_t
*ncec
)
3723 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3725 if (ncec
->ncec_trace_disable
)
3728 if (!th_trace_ref(ncec
, ncec
->ncec_ipst
)) {
3729 ncec
->ncec_trace_disable
= B_TRUE
;
3730 ncec_trace_cleanup(ncec
);
3735 ncec_untrace_ref(ncec_t
*ncec
)
3737 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3739 if (!ncec
->ncec_trace_disable
)
3740 th_trace_unref(ncec
);
3744 ncec_trace_cleanup(const ncec_t
*ncec
)
3746 th_trace_cleanup(ncec
, ncec
->ncec_trace_disable
);
3751 * Called when address resolution fails due to a timeout.
3752 * Send an ICMP unreachable in response to all queued packets.
3755 arp_resolv_failed(ncec_t
*ncec
)
3757 mblk_t
*mp
, *nxt_mp
;
3758 char buf
[INET6_ADDRSTRLEN
];
3759 struct in_addr ipv4addr
;
3760 ill_t
*ill
= ncec
->ncec_ill
;
3761 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
3762 ip_recv_attr_t iras
;
3764 bzero(&iras
, sizeof (iras
));
3765 iras
.ira_flags
= IRAF_IS_IPV4
;
3767 * we are setting the ira_rill to the ipmp_ill (instead of
3768 * the actual ill on which the packet was received), but this
3769 * is ok because we don't actually need the real ira_rill.
3770 * to send the icmp unreachable to the sender.
3772 iras
.ira_ill
= iras
.ira_rill
= ill
;
3773 iras
.ira_ruifindex
= ill
->ill_phyint
->phyint_ifindex
;
3774 iras
.ira_rifindex
= iras
.ira_ruifindex
;
3776 IN6_V4MAPPED_TO_INADDR(&ncec
->ncec_addr
, &ipv4addr
);
3777 ip3dbg(("arp_resolv_failed: dst %s\n",
3778 inet_ntop(AF_INET
, &ipv4addr
, buf
, sizeof (buf
))));
3779 mutex_enter(&ncec
->ncec_lock
);
3780 mp
= ncec
->ncec_qd_mp
;
3781 ncec
->ncec_qd_mp
= NULL
;
3782 ncec
->ncec_nprobes
= 0;
3783 mutex_exit(&ncec
->ncec_lock
);
3784 while (mp
!= NULL
) {
3785 nxt_mp
= mp
->b_next
;
3788 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
3789 ip_drop_output("ipIfStatsOutDiscards - address unreachable",
3791 if (ipst
->ips_ip_arp_icmp_error
) {
3792 ip3dbg(("arp_resolv_failed: "
3793 "Calling icmp_unreachable\n"));
3794 icmp_unreachable(mp
, ICMP_HOST_UNREACHABLE
, &iras
);
3798 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
3801 ncec_cb_dispatch(ncec
); /* finish off waiting callbacks */
3805 * if ill is an under_ill, translate it to the ipmp_ill and add the
3806 * nce on the ipmp_ill. Two nce_t entries (one on the ipmp_ill, and
3807 * one on the underlying in_ill) will be created for the
3808 * ncec_t in this case. The ncec_t itself will be created on the ipmp_ill.
3811 nce_lookup_then_add_v4(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
3812 const in_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
3816 ip_stack_t
*ipst
= ill
->ill_ipst
;
3817 nce_t
*nce
, *upper_nce
= NULL
;
3818 ill_t
*in_ill
= ill
, *under
= NULL
;
3819 boolean_t need_ill_refrele
= B_FALSE
;
3821 if (flags
& NCE_F_MCAST
) {
3823 * hw_addr will be figured out in nce_set_multicast_v4;
3824 * caller needs to pass in the cast_ill for ipmp
3826 ASSERT(hw_addr
== NULL
);
3827 ASSERT(!IS_IPMP(ill
));
3828 err
= nce_set_multicast_v4(ill
, addr
, flags
, newnce
);
3832 if (IS_UNDER_IPMP(ill
) && !(flags
& NCE_F_MYADDR
)) {
3833 ill
= ipmp_ill_hold_ipmp_ill(ill
);
3836 need_ill_refrele
= B_TRUE
;
3838 if ((flags
& NCE_F_BCAST
) != 0) {
3840 * IPv4 broadcast ncec: compute the hwaddr.
3843 under
= ipmp_ill_hold_xmit_ill(ill
, B_FALSE
);
3844 if (under
== NULL
) {
3845 if (need_ill_refrele
)
3849 hw_addr
= under
->ill_bcast_mp
->b_rptr
+
3850 NCE_LL_ADDR_OFFSET(under
);
3851 hw_addr_len
= under
->ill_phys_addr_length
;
3853 hw_addr
= ill
->ill_bcast_mp
->b_rptr
+
3854 NCE_LL_ADDR_OFFSET(ill
),
3855 hw_addr_len
= ill
->ill_phys_addr_length
;
3859 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
3860 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
3861 nce
= nce_lookup_addr(ill
, &addr6
);
3863 err
= nce_add_v4(ill
, hw_addr
, hw_addr_len
, addr
, flags
,
3868 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3870 err
= nce_add_v4_postprocess(nce
);
3872 if (in_ill
!= ill
&& nce
!= NULL
) {
3873 nce_t
*under_nce
= NULL
;
3876 * in_ill was the under_ill. Try to create the under_nce.
3877 * Hold the ill_g_lock to prevent changes to group membership
3878 * until we are done.
3880 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
3881 if (!IS_IN_SAME_ILLGRP(in_ill
, ill
)) {
3882 DTRACE_PROBE2(ill__not__in__group
, nce_t
*, nce
,
3884 rw_exit(&ipst
->ips_ill_g_lock
);
3890 under_nce
= nce_fastpath_create(in_ill
, nce
->nce_common
);
3891 if (under_nce
== NULL
) {
3892 rw_exit(&ipst
->ips_ill_g_lock
);
3898 rw_exit(&ipst
->ips_ill_g_lock
);
3900 nce
= under_nce
; /* will be returned to caller */
3901 if (NCE_ISREACHABLE(nce
->nce_common
))
3902 nce_fastpath_trigger(under_nce
);
3913 if (upper_nce
!= NULL
)
3914 nce_refrele(upper_nce
);
3915 if (need_ill_refrele
)
3922 * NDP Cache Entry creation routine for IPv4.
3923 * This routine must always be called with ndp4->ndp_g_lock held.
3924 * Prior to return, ncec_refcnt is incremented.
3926 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
3927 * are always added pointing at the ipmp_ill. Thus, when the ill passed
3928 * to nce_add_v4 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
3929 * entries will be created, both pointing at the same ncec_t. The nce_t
3930 * entries will have their nce_ill set to the ipmp_ill and the under_ill
3931 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
3932 * Local addresses are always created on the ill passed to nce_add_v4.
3935 nce_add_v4(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
3936 const in_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
3939 boolean_t is_multicast
= (flags
& NCE_F_MCAST
);
3940 struct in6_addr addr6
;
3943 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp4
->ndp_g_lock
));
3944 ASSERT(!ill
->ill_isv6
);
3945 ASSERT(!IN_MULTICAST(htonl(*addr
)) || is_multicast
);
3947 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
3948 err
= nce_add_common(ill
, hw_addr
, hw_addr_len
, &addr6
, flags
, state
,
3950 ASSERT(newnce
!= NULL
);
3956 * Post-processing routine to be executed after nce_add_v4(). This function
3957 * triggers fastpath (if appropriate) and DAD on the newly added nce entry
3958 * and must be called without any locks held.
3960 * Always returns 0, but we return an int to keep this symmetric with the
3961 * IPv6 counter-part.
3964 nce_add_v4_postprocess(nce_t
*nce
)
3966 ncec_t
*ncec
= nce
->nce_common
;
3967 uint16_t flags
= ncec
->ncec_flags
;
3968 boolean_t ndp_need_dad
= B_FALSE
;
3971 ip_stack_t
*ipst
= ncec
->ncec_ill
->ill_ipst
;
3972 uchar_t
*hw_addr
= ncec
->ncec_lladdr
;
3973 boolean_t trigger_fastpath
= B_TRUE
;
3976 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
3977 * we call nce_fastpath as soon as the ncec is resolved in nce_process.
3978 * We call nce_fastpath from nce_update if the link layer address of
3979 * the peer changes from nce_update
3981 if (NCE_PUBLISH(ncec
) || !NCE_ISREACHABLE(ncec
) || (hw_addr
== NULL
&&
3982 ncec
->ncec_ill
->ill_net_type
!= IRE_IF_NORESOLVER
))
3983 trigger_fastpath
= B_FALSE
;
3985 if (trigger_fastpath
)
3986 nce_fastpath_trigger(nce
);
3988 if (NCE_PUBLISH(ncec
) && ncec
->ncec_state
== ND_PROBE
) {
3990 * Either the caller (by passing in ND_PROBE)
3991 * or nce_add_common() (by the internally computed state
3992 * based on ncec_addr and ill_net_type) has determined
3993 * that this unicast entry needs DAD. Trigger DAD.
3995 ndp_need_dad
= B_TRUE
;
3996 } else if (flags
& NCE_F_UNSOL_ADV
) {
3998 * We account for the transmit below by assigning one
3999 * less than the ndd variable. Subsequent decrements
4000 * are done in nce_timer.
4002 mutex_enter(&ncec
->ncec_lock
);
4003 ncec
->ncec_unsolicit_count
=
4004 ipst
->ips_ip_arp_publish_count
- 1;
4005 mutex_exit(&ncec
->ncec_lock
);
4006 dropped
= arp_announce(ncec
);
4007 mutex_enter(&ncec
->ncec_lock
);
4009 ncec
->ncec_unsolicit_count
++;
4011 ncec
->ncec_last_time_defended
= ddi_get_lbolt();
4012 if (ncec
->ncec_unsolicit_count
!= 0) {
4013 nce_start_timer(ncec
,
4014 ipst
->ips_ip_arp_publish_interval
);
4016 mutex_exit(&ncec
->ncec_lock
);
4020 * If ncec_xmit_interval is 0, user has configured us to send the first
4021 * probe right away. Do so, and set up for the subsequent probes.
4024 mutex_enter(&ncec
->ncec_lock
);
4025 if (ncec
->ncec_pcnt
== 0) {
4027 * DAD probes and announce can be
4028 * administratively disabled by setting the
4029 * probe_count to zero. Restart the timer in
4030 * this case to mark the ipif as ready.
4032 ncec
->ncec_unsolicit_count
= 0;
4033 mutex_exit(&ncec
->ncec_lock
);
4034 nce_restart_timer(ncec
, 0);
4036 mutex_exit(&ncec
->ncec_lock
);
4037 delay
= ((ncec
->ncec_flags
& NCE_F_FAST
) ?
4038 ipst
->ips_arp_probe_delay
:
4039 ipst
->ips_arp_fastprobe_delay
);
4040 nce_dad(ncec
, NULL
, (delay
== 0 ? B_TRUE
: B_FALSE
));
4047 * ncec_walk routine to update all entries that have a given destination or
4048 * gateway address and cached link layer (MAC) address. This is used when ARP
4049 * informs us that a network-to-link-layer mapping may have changed.
4052 nce_update_hw_changed(ncec_t
*ncec
, void *arg
)
4054 nce_hw_map_t
*hwm
= arg
;
4057 if (ncec
->ncec_state
!= ND_REACHABLE
)
4060 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, ncec_addr
);
4061 if (ncec_addr
!= hwm
->hwm_addr
)
4064 mutex_enter(&ncec
->ncec_lock
);
4065 if (hwm
->hwm_flags
!= 0)
4066 ncec
->ncec_flags
= hwm
->hwm_flags
;
4067 nce_update(ncec
, ND_STALE
, hwm
->hwm_hwaddr
);
4068 mutex_exit(&ncec
->ncec_lock
);
4072 ncec_refhold(ncec_t
*ncec
)
4074 mutex_enter(&(ncec
)->ncec_lock
);
4075 (ncec
)->ncec_refcnt
++;
4076 ASSERT((ncec
)->ncec_refcnt
!= 0);
4078 ncec_trace_ref(ncec
);
4080 mutex_exit(&(ncec
)->ncec_lock
);
4084 ncec_refhold_notr(ncec_t
*ncec
)
4086 mutex_enter(&(ncec
)->ncec_lock
);
4087 (ncec
)->ncec_refcnt
++;
4088 ASSERT((ncec
)->ncec_refcnt
!= 0);
4089 mutex_exit(&(ncec
)->ncec_lock
);
4093 ncec_refhold_locked(ncec_t
*ncec
)
4095 ASSERT(MUTEX_HELD(&(ncec
)->ncec_lock
));
4096 (ncec
)->ncec_refcnt
++;
4098 ncec_trace_ref(ncec
);
4102 /* ncec_inactive destroys the mutex thus no mutex_exit is needed */
4104 ncec_refrele(ncec_t
*ncec
)
4106 mutex_enter(&(ncec
)->ncec_lock
);
4108 ncec_untrace_ref(ncec
);
4110 ASSERT((ncec
)->ncec_refcnt
!= 0);
4111 if (--(ncec
)->ncec_refcnt
== 0) {
4112 ncec_inactive(ncec
);
4114 mutex_exit(&(ncec
)->ncec_lock
);
4119 ncec_refrele_notr(ncec_t
*ncec
)
4121 mutex_enter(&(ncec
)->ncec_lock
);
4122 ASSERT((ncec
)->ncec_refcnt
!= 0);
4123 if (--(ncec
)->ncec_refcnt
== 0) {
4124 ncec_inactive(ncec
);
4126 mutex_exit(&(ncec
)->ncec_lock
);
4131 * Common to IPv4 and IPv6.
4134 nce_restart_timer(ncec_t
*ncec
, uint_t ms
)
4138 ASSERT(!MUTEX_HELD(&(ncec
)->ncec_lock
));
4140 /* First cancel any running timer */
4141 mutex_enter(&ncec
->ncec_lock
);
4142 tid
= ncec
->ncec_timeout_id
;
4143 ncec
->ncec_timeout_id
= 0;
4145 mutex_exit(&ncec
->ncec_lock
);
4146 (void) untimeout(tid
);
4147 mutex_enter(&ncec
->ncec_lock
);
4151 nce_start_timer(ncec
, ms
);
4152 mutex_exit(&ncec
->ncec_lock
);
4156 nce_start_timer(ncec_t
*ncec
, uint_t ms
)
4158 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
4160 * Don't start the timer if the ncec has been deleted, or if the timer
4161 * is already running
4163 if (!NCE_ISCONDEMNED(ncec
) && ncec
->ncec_timeout_id
== 0) {
4164 ncec
->ncec_timeout_id
= timeout(nce_timer
, ncec
,
4165 MSEC_TO_TICK(ms
) == 0 ? 1 : MSEC_TO_TICK(ms
));
4170 nce_set_multicast_v4(ill_t
*ill
, const in_addr_t
*dst
,
4171 uint16_t flags
, nce_t
**newnce
)
4175 ip_stack_t
*ipst
= ill
->ill_ipst
;
4179 ASSERT(!ill
->ill_isv6
);
4181 IN6_IPADDR_TO_V4MAPPED(*dst
, &dst6
);
4182 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
4183 if ((nce
= nce_lookup_addr(ill
, &dst6
)) != NULL
) {
4184 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
4187 if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
4189 * For IRE_IF_RESOLVER a hardware mapping can be
4190 * generated, for IRE_IF_NORESOLVER, resolution cookie
4191 * in the ill is copied in nce_add_v4().
4193 hw_addr
= kmem_alloc(ill
->ill_phys_addr_length
, KM_NOSLEEP
);
4194 if (hw_addr
== NULL
) {
4195 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
4198 ip_mcast_mapping(ill
, (uchar_t
*)dst
, hw_addr
);
4201 * IRE_IF_NORESOLVER type simply copies the resolution
4202 * cookie passed in. So no hw_addr is needed.
4206 ASSERT(flags
& NCE_F_MCAST
);
4207 ASSERT(flags
& NCE_F_NONUD
);
4208 /* nce_state will be computed by nce_add_common() */
4209 err
= nce_add_v4(ill
, hw_addr
, ill
->ill_phys_addr_length
, dst
, flags
,
4210 ND_UNCHANGED
, &nce
);
4211 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
4213 err
= (nce
!= NULL
) ? nce_add_v4_postprocess(nce
) : ENOMEM
;
4214 if (hw_addr
!= NULL
)
4215 kmem_free(hw_addr
, ill
->ill_phys_addr_length
);
4217 ip1dbg(("nce_set_multicast_v4: create failed" "%d\n", err
));
4229 * This is used when scanning for "old" (least recently broadcast) NCEs. We
4230 * don't want to have to walk the list for every single one, so we gather up
4231 * batches at a time.
4233 #define NCE_RESCHED_LIST_LEN 8
4238 ncec_t
*ncert_nces
[NCE_RESCHED_LIST_LEN
];
4242 * Pick the longest waiting NCEs for defense.
4246 ncec_reschedule(ill_t
*ill
, nce_t
*nce
, void *arg
)
4248 nce_resched_t
*ncert
= arg
;
4252 ncec_t
*ncec
= nce
->nce_common
;
4254 ASSERT(ncec
->ncec_ill
== ncert
->ncert_ill
);
4256 * Only reachable entries that are ready for announcement are eligible.
4258 if (!NCE_MYADDR(ncec
) || ncec
->ncec_state
!= ND_REACHABLE
)
4260 if (ncert
->ncert_num
< NCE_RESCHED_LIST_LEN
) {
4262 ncert
->ncert_nces
[ncert
->ncert_num
++] = ncec
;
4264 ncecs
= ncert
->ncert_nces
;
4265 ncec_max
= ncecs
+ NCE_RESCHED_LIST_LEN
;
4267 for (; ncecs
< ncec_max
; ncecs
++) {
4268 ASSERT(ncec
!= NULL
);
4269 if ((*ncecs
)->ncec_last_time_defended
>
4270 ncec
->ncec_last_time_defended
) {
4282 * Reschedule the ARP defense of any long-waiting NCEs. It's assumed that this
4283 * doesn't happen very often (if at all), and thus it needn't be highly
4284 * optimized. (Note, though, that it's actually O(N) complexity, because the
4285 * outer loop is bounded by a constant rather than by the length of the list.)
4288 nce_ill_reschedule(ill_t
*ill
, nce_resched_t
*ncert
)
4291 ip_stack_t
*ipst
= ill
->ill_ipst
;
4292 uint_t i
, defend_rate
;
4294 i
= ill
->ill_defend_count
;
4295 ill
->ill_defend_count
= 0;
4297 defend_rate
= ipst
->ips_ndp_defend_rate
;
4299 defend_rate
= ipst
->ips_arp_defend_rate
;
4300 /* If none could be sitting around, then don't reschedule */
4301 if (i
< defend_rate
) {
4302 DTRACE_PROBE1(reschedule_none
, ill_t
*, ill
);
4305 ncert
->ncert_ill
= ill
;
4306 while (ill
->ill_defend_count
< defend_rate
) {
4307 nce_walk_common(ill
, ncec_reschedule
, ncert
);
4308 for (i
= 0; i
< ncert
->ncert_num
; i
++) {
4310 ncec
= ncert
->ncert_nces
[i
];
4311 mutex_enter(&ncec
->ncec_lock
);
4312 ncec
->ncec_flags
|= NCE_F_DELAYED
;
4313 mutex_exit(&ncec
->ncec_lock
);
4315 * we plan to schedule this ncec, so incr the
4316 * defend_count in anticipation.
4318 if (++ill
->ill_defend_count
>= defend_rate
)
4321 if (ncert
->ncert_num
< NCE_RESCHED_LIST_LEN
)
4327 * Check if the current rate-limiting parameters permit the sending
4328 * of another address defense announcement for both IPv4 and IPv6.
4329 * Returns B_TRUE if rate-limiting is in effect (i.e., send is not
4330 * permitted), and B_FALSE otherwise. The `defend_rate' parameter
4331 * determines how many address defense announcements are permitted
4332 * in any `defense_perio' interval.
4335 ill_defend_rate_limit(ill_t
*ill
, ncec_t
*ncec
)
4337 clock_t now
= ddi_get_lbolt();
4338 ip_stack_t
*ipst
= ill
->ill_ipst
;
4339 clock_t start
= ill
->ill_defend_start
;
4340 uint32_t elapsed
, defend_period
, defend_rate
;
4341 nce_resched_t ncert
;
4345 if (ill
->ill_isv6
) {
4346 defend_period
= ipst
->ips_ndp_defend_period
;
4347 defend_rate
= ipst
->ips_ndp_defend_rate
;
4349 defend_period
= ipst
->ips_arp_defend_period
;
4350 defend_rate
= ipst
->ips_arp_defend_rate
;
4352 if (defend_rate
== 0)
4354 bzero(&ncert
, sizeof (ncert
));
4355 mutex_enter(&ill
->ill_lock
);
4357 elapsed
= now
- start
;
4358 if (elapsed
> SEC_TO_TICK(defend_period
)) {
4359 ill
->ill_defend_start
= now
;
4361 * nce_ill_reschedule will attempt to
4362 * prevent starvation by reschduling the
4363 * oldest entries, which are marked with
4364 * the NCE_F_DELAYED flag.
4366 nce_ill_reschedule(ill
, &ncert
);
4369 ill
->ill_defend_start
= now
;
4371 ASSERT(ill
->ill_defend_count
<= defend_rate
);
4372 mutex_enter(&ncec
->ncec_lock
);
4373 if (ncec
->ncec_flags
& NCE_F_DELAYED
) {
4375 * This ncec was rescheduled as one of the really old
4376 * entries needing on-going defense. The
4377 * ill_defend_count was already incremented in
4378 * nce_ill_reschedule. Go ahead and send the announce.
4380 ncec
->ncec_flags
&= ~NCE_F_DELAYED
;
4381 mutex_exit(&ncec
->ncec_lock
);
4385 mutex_exit(&ncec
->ncec_lock
);
4386 if (ill
->ill_defend_count
< defend_rate
)
4387 ill
->ill_defend_count
++;
4388 if (ill
->ill_defend_count
== defend_rate
) {
4390 * we are no longer allowed to send unbidden defense
4391 * messages. Wait for rescheduling.
4398 mutex_exit(&ill
->ill_lock
);
4400 * After all the locks have been dropped we can restart nce timer,
4401 * and refrele the delayed ncecs
4403 for (i
= 0; i
< ncert
.ncert_num
; i
++) {
4404 clock_t xmit_interval
;
4407 tmp
= ncert
.ncert_nces
[i
];
4408 xmit_interval
= nce_fuzz_interval(tmp
->ncec_xmit_interval
,
4410 nce_restart_timer(tmp
, xmit_interval
);
4417 ndp_announce(ncec_t
*ncec
)
4419 return (ndp_xmit(ncec
->ncec_ill
, ND_NEIGHBOR_ADVERT
, ncec
->ncec_lladdr
,
4420 ncec
->ncec_lladdr_length
, &ncec
->ncec_addr
, &ipv6_all_hosts_mcast
,
4421 nce_advert_flags(ncec
)));
4425 nce_resolve_src(ncec_t
*ncec
, in6_addr_t
*src
)
4430 ill_t
*ill
= ncec
->ncec_ill
;
4431 ill_t
*src_ill
= NULL
;
4432 ipif_t
*ipif
= NULL
;
4433 boolean_t is_myaddr
= NCE_MYADDR(ncec
);
4434 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
4436 ASSERT(src
!= NULL
);
4437 ASSERT(IN6_IS_ADDR_UNSPECIFIED(src
));
4441 src6
= ncec
->ncec_addr
;
4443 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, src4
);
4446 * try to find one from the outgoing packet.
4448 mutex_enter(&ncec
->ncec_lock
);
4449 mp
= ncec
->ncec_qd_mp
;
4452 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
4454 src6
= ip6h
->ip6_src
;
4456 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
4458 src4
= ipha
->ipha_src
;
4459 IN6_IPADDR_TO_V4MAPPED(src4
, &src6
);
4462 mutex_exit(&ncec
->ncec_lock
);
4466 * For outgoing packets, if the src of outgoing packet is one
4467 * of the assigned interface addresses use it, otherwise we
4468 * will pick the source address below.
4469 * For local addresses (is_myaddr) doing DAD, NDP announce
4470 * messages are mcast. So we use the (IPMP) cast_ill or the
4471 * (non-IPMP) ncec_ill for these message types. The only case
4472 * of unicast DAD messages are for IPv6 ND probes, for which
4473 * we find the ipif_bound_ill corresponding to the ncec_addr.
4475 if (!IN6_IS_ADDR_UNSPECIFIED(&src6
) || is_myaddr
) {
4477 ipif
= ipif_lookup_addr_nondup_v6(&src6
, ill
, ALL_ZONES
,
4480 ipif
= ipif_lookup_addr_nondup(src4
, ill
, ALL_ZONES
,
4485 * If no relevant ipif can be found, then it's not one of our
4486 * addresses. Reset to :: and try to find a src for the NS or
4487 * ARP request using ipif_select_source_v[4,6] below.
4488 * If an ipif can be found, but it's not yet done with
4489 * DAD verification, and we are not being invoked for
4490 * DAD (i.e., !is_myaddr), then just postpone this
4491 * transmission until later.
4494 src6
= ipv6_all_zeros
;
4496 } else if (!ipif
->ipif_addr_ready
&& !is_myaddr
) {
4497 DTRACE_PROBE2(nce__resolve__ipif__not__ready
,
4498 ncec_t
*, ncec
, ipif_t
*, ipif
);
4504 if (IN6_IS_ADDR_UNSPECIFIED(&src6
) && !is_myaddr
) {
4506 * Pick a source address for this solicitation, but
4507 * restrict the selection to addresses assigned to the
4508 * output interface. We do this because the destination will
4509 * create a neighbor cache entry for the source address of
4510 * this packet, so the source address had better be a valid
4514 ipif
= ipif_select_source_v6(ill
, &ncec
->ncec_addr
,
4515 B_TRUE
, IPV6_PREFER_SRC_DEFAULT
, ALL_ZONES
,
4520 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, nce_addr
);
4521 ipif
= ipif_select_source_v4(ill
, nce_addr
, ALL_ZONES
,
4524 if (ipif
== NULL
&& IS_IPMP(ill
)) {
4525 ill_t
*send_ill
= ipmp_ill_hold_xmit_ill(ill
, B_TRUE
);
4527 if (send_ill
!= NULL
) {
4529 ipif
= ipif_select_source_v6(send_ill
,
4530 &ncec
->ncec_addr
, B_TRUE
,
4531 IPV6_PREFER_SRC_DEFAULT
, ALL_ZONES
,
4534 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
,
4536 ipif
= ipif_select_source_v4(send_ill
,
4537 src4
, ALL_ZONES
, B_TRUE
, NULL
);
4539 ill_refrele(send_ill
);
4544 char buf
[INET6_ADDRSTRLEN
];
4546 ip1dbg(("nce_resolve_src: No source ipif for dst %s\n",
4547 inet_ntop((isv6
? AF_INET6
: AF_INET
),
4548 (char *)&ncec
->ncec_addr
, buf
, sizeof (buf
))));
4549 DTRACE_PROBE1(nce__resolve__no__ipif
, ncec_t
*, ncec
);
4552 src6
= ipif
->ipif_v6lcl_addr
;
4556 src_ill
= ipif
->ipif_ill
;
4557 if (IS_IPMP(src_ill
))
4558 src_ill
= ipmp_ipif_hold_bound_ill(ipif
);
4560 ill_refhold(src_ill
);
4562 DTRACE_PROBE2(nce__resolve__src__ill
, ncec_t
*, ncec
,
4569 ip_nce_lookup_and_update(ipaddr_t
*addr
, ipif_t
*ipif
, ip_stack_t
*ipst
,
4570 uchar_t
*hwaddr
, int hwaddr_len
, int flags
)
4577 ill
= (ipif
? ipif
->ipif_ill
: NULL
);
4580 * only one ncec is possible
4582 nce
= nce_lookup_v4(ill
, addr
);
4584 ncec
= nce
->nce_common
;
4585 mutex_enter(&ncec
->ncec_lock
);
4586 if (NCE_ISREACHABLE(ncec
))
4587 new_state
= ND_UNCHANGED
;
4589 new_state
= ND_STALE
;
4590 ncec
->ncec_flags
= flags
;
4591 nce_update(ncec
, new_state
, hwaddr
);
4592 mutex_exit(&ncec
->ncec_lock
);
4598 * ill is wildcard; clean up all ncec's and ire's
4599 * that match on addr.
4603 hwm
.hwm_addr
= *addr
;
4604 hwm
.hwm_hwlen
= hwaddr_len
;
4605 hwm
.hwm_hwaddr
= hwaddr
;
4606 hwm
.hwm_flags
= flags
;
4608 ncec_walk_common(ipst
->ips_ndp4
, NULL
,
4609 nce_update_hw_changed
, &hwm
, B_TRUE
);
4614 * Common function to add ncec entries.
4615 * we always add the ncec with ncec_ill == ill, and always create
4616 * nce_t on ncec_ill. A dlpi fastpath message may be triggered if the
4617 * ncec is !reachable.
4619 * When the caller passes in an nce_state of ND_UNCHANGED,
4620 * nce_add_common() will determine the state of the created nce based
4621 * on the ill_net_type and nce_flags used. Otherwise, the nce will
4622 * be created with state set to the passed in nce_state.
4625 nce_add_common(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
4626 const in6_addr_t
*addr
, uint16_t flags
, uint16_t nce_state
, nce_t
**retnce
)
4628 static ncec_t nce_nil
;
4629 uchar_t
*template = NULL
;
4633 ip_stack_t
*ipst
= ill
->ill_ipst
;
4635 boolean_t fastprobe
= B_FALSE
;
4636 struct ndp_g_s
*ndp
;
4639 mblk_t
*dlur_mp
= NULL
;
4642 ndp
= ill
->ill_ipst
->ips_ndp6
;
4644 ndp
= ill
->ill_ipst
->ips_ndp4
;
4649 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
4651 if (IN6_IS_ADDR_UNSPECIFIED(addr
)) {
4652 ip0dbg(("nce_add_common: no addr\n"));
4655 if ((flags
& ~NCE_EXTERNAL_FLAGS_MASK
)) {
4656 ip0dbg(("nce_add_common: flags = %x\n", (int)flags
));
4660 if (ill
->ill_isv6
) {
4661 ncep
= ((ncec_t
**)NCE_HASH_PTR_V6(ipst
, *addr
));
4665 IN6_V4MAPPED_TO_IPADDR(addr
, v4addr
);
4666 ncep
= ((ncec_t
**)NCE_HASH_PTR_V4(ipst
, v4addr
));
4670 * The caller has ensured that there is no nce on ill, but there could
4671 * still be an nce_common_t for the address, so that we find exisiting
4672 * ncec_t strucutures first, and atomically add a new nce_t if
4673 * one is found. The ndp_g_lock ensures that we don't cross threads
4674 * with an ncec_delete(). Unlike ncec_lookup_illgrp() we do not
4675 * compare for matches across the illgrp because this function is
4676 * called via nce_lookup_then_add_v* -> nce_add_v* -> nce_add_common,
4677 * with the nce_lookup_then_add_v* passing in the ipmp_ill where
4681 for (; ncec
!= NULL
; ncec
= ncec
->ncec_next
) {
4682 if (ncec
->ncec_ill
== ill
) {
4683 if (IN6_ARE_ADDR_EQUAL(&ncec
->ncec_addr
, addr
)) {
4685 * We should never find *retnce to be
4686 * MYADDR, since the caller may then
4687 * incorrectly restart a DAD timer that's
4688 * already running. However, if we are in
4689 * forwarding mode, and the interface is
4690 * moving in/out of groups, the data
4691 * path ire lookup (e.g., ire_revalidate_nce)
4692 * may have determined that some destination
4693 * is offlink while the control path is adding
4694 * that address as a local address.
4695 * Recover from this case by failing the
4698 if (NCE_MYADDR(ncec
))
4700 *retnce
= nce_ill_lookup_then_add(ill
, ncec
);
4701 if (*retnce
!= NULL
)
4706 if (*retnce
!= NULL
) /* caller must trigger fastpath on nce */
4709 ncec
= kmem_cache_alloc(ncec_cache
, KM_NOSLEEP
);
4713 ncec
->ncec_ill
= ill
;
4714 ncec
->ncec_ipversion
= (ill
->ill_isv6
? IPV6_VERSION
: IPV4_VERSION
);
4715 ncec
->ncec_flags
= flags
;
4716 ncec
->ncec_ipst
= ipst
; /* No netstack_hold */
4718 if (!ill
->ill_isv6
) {
4722 * DAD probe interval and probe count are set based on
4723 * fast/slow probe settings. If the underlying link doesn't
4724 * have reliably up/down notifications or if we're working
4725 * with IPv4 169.254.0.0/16 Link Local Address space, then
4726 * don't use the fast timers. Otherwise, use them.
4728 ASSERT(IN6_IS_ADDR_V4MAPPED(addr
));
4729 IN6_V4MAPPED_TO_IPADDR(addr
, addr4
);
4730 if (ill
->ill_note_link
&& !IS_IPV4_LL_SPACE(&addr4
)) {
4732 } else if (IS_IPMP(ill
) && NCE_PUBLISH(ncec
) &&
4733 !IS_IPV4_LL_SPACE(&addr4
)) {
4736 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
, hw_addr
,
4738 if (hwaddr_ill
!= NULL
&& hwaddr_ill
->ill_note_link
)
4742 ncec
->ncec_xmit_interval
=
4743 ipst
->ips_arp_fastprobe_interval
;
4745 ipst
->ips_arp_fastprobe_count
;
4746 ncec
->ncec_flags
|= NCE_F_FAST
;
4748 ncec
->ncec_xmit_interval
=
4749 ipst
->ips_arp_probe_interval
;
4751 ipst
->ips_arp_probe_count
;
4753 if (NCE_PUBLISH(ncec
)) {
4754 ncec
->ncec_unsolicit_count
=
4755 ipst
->ips_ip_arp_publish_count
;
4759 * probe interval is constant: ILL_PROBE_INTERVAL
4760 * probe count is constant: ND_MAX_UNICAST_SOLICIT
4762 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
4763 if (NCE_PUBLISH(ncec
)) {
4764 ncec
->ncec_unsolicit_count
=
4765 ipst
->ips_ip_ndp_unsolicit_count
;
4768 ncec
->ncec_rcnt
= ill
->ill_xmit_count
;
4769 ncec
->ncec_addr
= *addr
;
4770 ncec
->ncec_qd_mp
= NULL
;
4771 ncec
->ncec_refcnt
= 1; /* for ncec getting created */
4772 mutex_init(&ncec
->ncec_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
4773 ncec
->ncec_trace_disable
= B_FALSE
;
4776 * ncec_lladdr holds link layer address
4778 if (hw_addr_len
> 0) {
4779 template = kmem_alloc(hw_addr_len
, KM_NOSLEEP
);
4780 if (template == NULL
) {
4784 ncec
->ncec_lladdr
= template;
4785 ncec
->ncec_lladdr_length
= hw_addr_len
;
4786 bzero(ncec
->ncec_lladdr
, hw_addr_len
);
4788 if ((flags
& NCE_F_BCAST
) != 0) {
4789 state
= ND_REACHABLE
;
4790 ASSERT(hw_addr_len
> 0);
4791 } else if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
4793 } else if (ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4795 * NORESOLVER entries are always created in the REACHABLE
4798 state
= ND_REACHABLE
;
4799 if (ill
->ill_phys_addr_length
== IP_ADDR_LEN
&&
4800 ill
->ill_mactype
!= DL_IPV4
&&
4801 ill
->ill_mactype
!= DL_6TO4
) {
4803 * We create a nce_res_mp with the IP nexthop address
4804 * as the destination address if the physical length
4805 * is exactly 4 bytes for point-to-multipoint links
4806 * that do their own resolution from IP to link-layer
4807 * address (e.g. IP over X.25).
4809 bcopy((uchar_t
*)addr
,
4810 ncec
->ncec_lladdr
, ill
->ill_phys_addr_length
);
4812 if (ill
->ill_phys_addr_length
== IPV6_ADDR_LEN
&&
4813 ill
->ill_mactype
!= DL_IPV6
) {
4815 * We create a nce_res_mp with the IP nexthop address
4816 * as the destination address if the physical legnth
4817 * is exactly 16 bytes for point-to-multipoint links
4818 * that do their own resolution from IP to link-layer
4821 bcopy((uchar_t
*)addr
,
4822 ncec
->ncec_lladdr
, ill
->ill_phys_addr_length
);
4825 * Since NUD is not part of the base IPv4 protocol definition,
4826 * IPv4 neighbor entries on NORESOLVER interfaces will never
4827 * age, and are marked NCE_F_NONUD.
4830 ncec
->ncec_flags
|= NCE_F_NONUD
;
4831 } else if (ill
->ill_net_type
== IRE_LOOPBACK
) {
4832 state
= ND_REACHABLE
;
4835 if (hw_addr
!= NULL
|| ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4837 * We are adding an ncec with a deterministic hw_addr,
4838 * so the state can only be one of {REACHABLE, STALE, PROBE}.
4840 * if we are adding a unicast ncec for the local address
4841 * it would be REACHABLE; we would be adding a ND_STALE entry
4842 * for the requestor of an ARP_REQUEST/ND_SOLICIT. Our own
4843 * addresses are added in PROBE to trigger DAD.
4845 if ((flags
& (NCE_F_MCAST
|NCE_F_BCAST
)) ||
4846 ill
->ill_net_type
== IRE_IF_NORESOLVER
)
4847 state
= ND_REACHABLE
;
4848 else if (!NCE_PUBLISH(ncec
))
4852 if (hw_addr
!= NULL
)
4853 nce_set_ll(ncec
, hw_addr
);
4855 /* caller overrides internally computed state */
4856 if (nce_state
!= ND_UNCHANGED
)
4859 if (state
== ND_PROBE
)
4860 ncec
->ncec_flags
|= NCE_F_UNVERIFIED
;
4862 ncec
->ncec_state
= state
;
4864 if (state
== ND_REACHABLE
) {
4865 ncec
->ncec_last
= ncec
->ncec_init_time
=
4866 TICK_TO_MSEC(ddi_get_lbolt64());
4868 ncec
->ncec_last
= 0;
4869 if (state
== ND_INITIAL
)
4870 ncec
->ncec_init_time
= TICK_TO_MSEC(ddi_get_lbolt64());
4872 list_create(&ncec
->ncec_cb
, sizeof (ncec_cb_t
),
4873 offsetof(ncec_cb_t
, ncec_cb_node
));
4875 * have all the memory allocations out of the way before taking locks
4876 * and adding the nce.
4878 nce
= kmem_cache_alloc(nce_cache
, KM_NOSLEEP
);
4883 if (ncec
->ncec_lladdr
!= NULL
||
4884 ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4885 dlur_mp
= ill_dlur_gen(ncec
->ncec_lladdr
,
4886 ill
->ill_phys_addr_length
, ill
->ill_sap
,
4887 ill
->ill_sap_length
);
4888 if (dlur_mp
== NULL
) {
4895 * Atomically ensure that the ill is not CONDEMNED, before
4898 mutex_enter(&ill
->ill_lock
);
4899 if (ill
->ill_state_flags
& ILL_CONDEMNED
) {
4900 mutex_exit(&ill
->ill_lock
);
4904 if (!NCE_MYADDR(ncec
) &&
4905 (ill
->ill_state_flags
& ILL_DOWN_IN_PROGRESS
)) {
4906 mutex_exit(&ill
->ill_lock
);
4907 DTRACE_PROBE1(nce__add__on__down__ill
, ncec_t
*, ncec
);
4912 * Acquire the ncec_lock even before adding the ncec to the list
4913 * so that it cannot get deleted after the ncec is added, but
4914 * before we add the nce.
4916 mutex_enter(&ncec
->ncec_lock
);
4917 if ((ncec
->ncec_next
= *ncep
) != NULL
)
4918 ncec
->ncec_next
->ncec_ptpn
= &ncec
->ncec_next
;
4920 ncec
->ncec_ptpn
= ncep
;
4922 /* Bump up the number of ncec's referencing this ill */
4923 DTRACE_PROBE3(ill__incr__cnt
, (ill_t
*), ill
,
4924 (char *), "ncec", (void *), ncec
);
4925 ill
->ill_ncec_cnt
++;
4927 * Since we hold the ncec_lock at this time, the ncec cannot be
4928 * condemned, and we can safely add the nce.
4930 list_create(&graveyard
, sizeof (nce_t
), offsetof(nce_t
, nce_node
));
4931 *retnce
= nce_add_impl(ill
, ncec
, nce
, dlur_mp
, &graveyard
);
4932 mutex_exit(&ncec
->ncec_lock
);
4933 mutex_exit(&ill
->ill_lock
);
4934 nce_graveyard_free(&graveyard
);
4936 /* caller must trigger fastpath on *retnce */
4941 kmem_cache_free(ncec_cache
, ncec
);
4943 kmem_cache_free(nce_cache
, nce
);
4945 if (template != NULL
)
4946 kmem_free(template, ill
->ill_phys_addr_length
);
4951 * take a ref on the nce
4954 nce_refhold(nce_t
*nce
)
4956 mutex_enter(&nce
->nce_lock
);
4958 ASSERT((nce
)->nce_refcnt
!= 0);
4959 mutex_exit(&nce
->nce_lock
);
4963 * release a ref on the nce; In general, this
4964 * cannot be called with locks held because nce_inactive
4965 * may result in nce_inactive which will take the ill_lock,
4966 * do ipif_ill_refrele_tail etc. Thus the one exception
4967 * where this can be called with locks held is when the caller
4968 * is certain that the nce_refcnt is sufficient to prevent
4969 * the invocation of nce_inactive.
4972 nce_refrele(nce_t
*nce
)
4974 ASSERT((nce
)->nce_refcnt
!= 0);
4975 mutex_enter(&nce
->nce_lock
);
4976 if (--nce
->nce_refcnt
== 0)
4977 nce_inactive(nce
); /* destroys the mutex */
4979 mutex_exit(&nce
->nce_lock
);
4983 * free the nce after all refs have gone away.
4986 nce_inactive(nce_t
*nce
)
4988 ill_t
*ill
= nce
->nce_ill
;
4990 ASSERT(nce
->nce_refcnt
== 0);
4992 ncec_refrele_notr(nce
->nce_common
);
4993 nce
->nce_common
= NULL
;
4994 freemsg(nce
->nce_fp_mp
);
4995 freemsg(nce
->nce_dlur_mp
);
4997 mutex_enter(&ill
->ill_lock
);
4998 DTRACE_PROBE3(ill__decr__cnt
, (ill_t
*), ill
,
4999 (char *), "nce", (void *), nce
);
5001 nce
->nce_ill
= NULL
;
5003 * If the number of ncec's associated with this ill have dropped
5004 * to zero, check whether we need to restart any operation that
5005 * is waiting for this to happen.
5007 if (ILL_DOWN_OK(ill
)) {
5008 /* ipif_ill_refrele_tail drops the ill_lock */
5009 ipif_ill_refrele_tail(ill
);
5011 mutex_exit(&ill
->ill_lock
);
5014 mutex_destroy(&nce
->nce_lock
);
5015 kmem_cache_free(nce_cache
, nce
);
5019 * Add an nce to the ill_nce list.
5021 * Adding multicast NCEs is subject to a per-ill limit. This function returns
5022 * NULL if that's the case, and it may reap a number of multicast nces.
5023 * Callers (and upstack) must be able to cope with NULL returns.
5026 nce_add_impl(ill_t
*ill
, ncec_t
*ncec
, nce_t
*nce
, mblk_t
*dlur_mp
,
5029 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
5031 if ((ncec
->ncec_flags
& NCE_F_MCAST
) != 0) {
5032 if (nce_too_many_mcast(ill
, graveyard
)) {
5033 kmem_cache_free(nce_cache
, nce
);
5036 ill
->ill_mcast_nces
++;
5039 bzero(nce
, sizeof (*nce
));
5040 mutex_init(&nce
->nce_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
5041 nce
->nce_common
= ncec
;
5042 nce
->nce_addr
= ncec
->ncec_addr
;
5044 DTRACE_PROBE3(ill__incr__cnt
, (ill_t
*), ill
,
5045 (char *), "nce", (void *), nce
);
5048 nce
->nce_refcnt
= 1; /* for the thread */
5049 ncec
->ncec_refcnt
++; /* want ncec_refhold_locked_notr(ncec) */
5050 nce
->nce_dlur_mp
= dlur_mp
;
5052 /* add nce to the ill's fastpath list. */
5053 nce
->nce_refcnt
++; /* for the list */
5054 list_insert_head(&ill
->ill_nce
, nce
);
5059 nce_add(ill_t
*ill
, ncec_t
*ncec
, list_t
*graveyard
)
5062 mblk_t
*dlur_mp
= NULL
;
5064 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
5065 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
5067 nce
= kmem_cache_alloc(nce_cache
, KM_NOSLEEP
);
5070 if (ncec
->ncec_lladdr
!= NULL
||
5071 ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
5072 dlur_mp
= ill_dlur_gen(ncec
->ncec_lladdr
,
5073 ill
->ill_phys_addr_length
, ill
->ill_sap
,
5074 ill
->ill_sap_length
);
5075 if (dlur_mp
== NULL
) {
5076 kmem_cache_free(nce_cache
, nce
);
5081 * If nce_add_impl() returns NULL due to on multicast limiting, caller
5082 * will (correctly) assume ENOMEM.
5084 return (nce_add_impl(ill
, ncec
, nce
, dlur_mp
, graveyard
));
5088 * remove the nce from the ill_faspath list
5091 nce_delete(nce_t
*nce
)
5093 ill_t
*ill
= nce
->nce_ill
;
5095 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
5097 mutex_enter(&nce
->nce_lock
);
5098 if (nce
->nce_is_condemned
) {
5100 * some other thread has removed this nce from the ill_nce list
5102 mutex_exit(&nce
->nce_lock
);
5105 nce
->nce_is_condemned
= B_TRUE
;
5106 mutex_exit(&nce
->nce_lock
);
5108 /* Update the count of multicast NCEs. */
5109 if ((nce
->nce_common
->ncec_flags
& NCE_F_MCAST
) == NCE_F_MCAST
)
5110 ill
->ill_mcast_nces
--;
5112 list_remove(&ill
->ill_nce
, nce
);
5114 * even though we are holding the ill_lock, it is ok to
5115 * call nce_refrele here because we know that we should have
5116 * at least 2 refs on the nce: one for the thread, and one
5117 * for the list. The refrele below will release the one for
5124 nce_lookup(ill_t
*ill
, const in6_addr_t
*addr
)
5128 ASSERT(ill
!= NULL
);
5129 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
5131 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
;
5132 nce
= list_next(&ill
->ill_nce
, nce
)) {
5133 if (IN6_ARE_ADDR_EQUAL(&nce
->nce_addr
, addr
))
5138 * if we found the nce on the ill_nce list while holding
5139 * the ill_lock, then it cannot be condemned yet.
5142 ASSERT(!nce
->nce_is_condemned
);
5149 * Walk the ill_nce list on ill. The callback function func() cannot perform
5150 * any destructive actions.
5153 nce_walk_common(ill_t
*ill
, pfi_t func
, void *arg
)
5155 nce_t
*nce
= NULL
, *nce_next
;
5157 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
5158 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
; ) {
5159 nce_next
= list_next(&ill
->ill_nce
, nce
);
5160 if (func(ill
, nce
, arg
) != 0)
5167 nce_walk(ill_t
*ill
, pfi_t func
, void *arg
)
5169 mutex_enter(&ill
->ill_lock
);
5170 nce_walk_common(ill
, func
, arg
);
5171 mutex_exit(&ill
->ill_lock
);
5175 nce_flush(ill_t
*ill
, boolean_t flushall
)
5177 nce_t
*nce
, *nce_next
;
5180 list_create(&dead
, sizeof (nce_t
), offsetof(nce_t
, nce_node
));
5181 mutex_enter(&ill
->ill_lock
);
5182 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
; ) {
5183 nce_next
= list_next(&ill
->ill_nce
, nce
);
5184 if (!flushall
&& NCE_PUBLISH(nce
->nce_common
)) {
5189 * nce_delete requires that the caller should either not
5190 * be holding locks, or should hold a ref to ensure that
5191 * we wont hit ncec_inactive. So take a ref and clean up
5192 * after the list is flushed.
5196 list_insert_tail(&dead
, nce
);
5199 mutex_exit(&ill
->ill_lock
);
5200 while ((nce
= list_head(&dead
)) != NULL
) {
5201 list_remove(&dead
, nce
);
5204 ASSERT(list_is_empty(&dead
));
5205 list_destroy(&dead
);
5208 /* Return an interval that is anywhere in the [1 .. intv] range */
5210 nce_fuzz_interval(clock_t intv
, boolean_t initial_time
)
5214 (void) random_get_pseudo_bytes((uint8_t *)&rnd
, sizeof (rnd
));
5215 /* Note that clock_t is signed; must chop off bits */
5216 rnd
&= (1ul << (NBBY
* sizeof (rnd
) - 1)) - 1;
5221 intv
= (rnd
% intv
) + 1;
5223 /* Compute 'frac' as 20% of the configured interval */
5224 if ((frac
= intv
/ 5) <= 1)
5226 /* Set intv randomly in the range [intv-frac .. intv+frac] */
5227 if ((intv
= intv
- frac
+ rnd
% (2 * frac
+ 1)) <= 0)
5234 nce_resolv_ipmp_ok(ncec_t
*ncec
)
5238 iaflags_t ixaflags
= IXAF_NO_TRACE
;
5240 ill_t
*ill
= ncec
->ncec_ill
;
5241 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
5242 ipif_t
*src_ipif
= NULL
;
5243 ip_stack_t
*ipst
= ill
->ill_ipst
;
5247 ASSERT(IS_IPMP(ill
));
5249 mutex_enter(&ncec
->ncec_lock
);
5250 nprobes
= ncec
->ncec_nprobes
;
5251 mp
= ncec
->ncec_qd_mp
;
5252 ncec
->ncec_qd_mp
= NULL
;
5253 ncec
->ncec_nprobes
= 0;
5254 mutex_exit(&ncec
->ncec_lock
);
5256 while (mp
!= NULL
) {
5259 nxt_mp
= mp
->b_next
;
5262 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
5264 pkt_len
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
5265 src_ipif
= ipif_lookup_addr_nondup_v6(&ip6h
->ip6_src
,
5266 ill
, ALL_ZONES
, ipst
);
5268 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
5270 ixaflags
|= IXAF_IS_IPV4
;
5271 pkt_len
= ntohs(ipha
->ipha_length
);
5272 src_ipif
= ipif_lookup_addr_nondup(ipha
->ipha_src
,
5273 ill
, ALL_ZONES
, ipst
);
5277 * find a new nce based on an under_ill. The first IPMP probe
5278 * packet gets queued, so we could still find a src_ipif that
5279 * matches an IPMP test address.
5281 if (src_ipif
== NULL
|| IS_IPMP(src_ipif
->ipif_ill
)) {
5283 * if src_ipif is null, this could be either a
5284 * forwarded packet or a probe whose src got deleted.
5285 * We identify the former case by looking for the
5286 * ncec_nprobes: the first ncec_nprobes packets are
5289 if (src_ipif
== NULL
&& nprobes
> 0)
5293 * For forwarded packets, we use the ipmp rotor
5296 send_ill
= ipmp_ill_hold_xmit_ill(ncec
->ncec_ill
,
5299 send_ill
= src_ipif
->ipif_ill
;
5300 ill_refhold(send_ill
);
5303 DTRACE_PROBE4(nce__resolve__ipmp
, (mblk_t
*), mp
,
5304 (ncec_t
*), ncec
, (ipif_t
*),
5305 src_ipif
, (ill_t
*), send_ill
);
5307 if (send_ill
== NULL
) {
5308 if (src_ipif
!= NULL
)
5309 ipif_refrele(src_ipif
);
5312 /* create an under_nce on send_ill */
5313 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
5314 if (IS_IN_SAME_ILLGRP(send_ill
, ncec
->ncec_ill
))
5315 under_nce
= nce_fastpath_create(send_ill
, ncec
);
5318 rw_exit(&ipst
->ips_ill_g_lock
);
5319 if (under_nce
!= NULL
&& NCE_ISREACHABLE(ncec
))
5320 nce_fastpath_trigger(under_nce
);
5322 ill_refrele(send_ill
);
5323 if (src_ipif
!= NULL
)
5324 ipif_refrele(src_ipif
);
5326 if (under_nce
!= NULL
) {
5327 (void) ip_xmit(mp
, under_nce
, ixaflags
, pkt_len
, 0,
5328 ALL_ZONES
, 0, NULL
);
5329 nce_refrele(under_nce
);
5337 BUMP_MIB(&ipst
->ips_ip6_mib
, ipIfStatsOutDiscards
);
5339 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
5341 ip_drop_output("ipIfStatsOutDiscards - no under_ill", mp
, NULL
);
5347 ncec_cb_dispatch(ncec
); /* complete callbacks */