4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/types.h>
26 #include <sys/stream.h>
27 #include <sys/stropts.h>
28 #include <sys/strsun.h>
29 #include <sys/sysmacros.h>
30 #include <sys/errno.h>
32 #include <sys/socket.h>
34 #include <sys/sunddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/debug.h>
37 #include <sys/vtrace.h>
40 #include <sys/ethernet.h>
45 #include <net/if_types.h>
46 #include <net/if_dl.h>
47 #include <net/route.h>
48 #include <netinet/in.h>
49 #include <netinet/ip6.h>
50 #include <netinet/icmp6.h>
52 #include <inet/common.h>
54 #include <inet/mib2.h>
57 #include <inet/ip_impl.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ip_if.h>
60 #include <inet/ip_ire.h>
61 #include <inet/ip_rts.h>
63 #include <inet/ip_ndp.h>
64 #include <inet/sctp_ip.h>
65 #include <inet/ip_arp.h>
66 #include <inet/ip2mac_impl.h>
68 #define ANNOUNCE_INTERVAL(isv6) \
69 (isv6 ? ipst->ips_ip_ndp_unsolicit_interval : \
70 ipst->ips_ip_arp_publish_interval)
72 #define DEFENSE_INTERVAL(isv6) \
73 (isv6 ? ipst->ips_ndp_defend_interval : \
74 ipst->ips_arp_defend_interval)
76 /* Non-tunable probe interval, based on link capabilities */
77 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500)
80 * The IPv4 Link Local address space is special; we do extra duplicate checking
81 * there, as the entire assignment mechanism rests on random numbers.
83 #define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \
84 ((uchar_t *)ptr)[1] == 254)
87 * NCE_EXTERNAL_FLAGS_MASK defines the set of ncec_flags that may be passed
88 * in to the ncec*add* functions.
90 * NCE_F_AUTHORITY means that we ignore any incoming adverts for that
91 * mapping (though DAD is performed for the mapping). NCE_F_PUBLISH means
92 * that we will respond to requests for the protocol address.
94 #define NCE_EXTERNAL_FLAGS_MASK \
95 (NCE_F_MYADDR | NCE_F_ISROUTER | NCE_F_NONUD | \
96 NCE_F_ANYCAST | NCE_F_UNSOL_ADV | NCE_F_BCAST | NCE_F_MCAST | \
97 NCE_F_AUTHORITY | NCE_F_PUBLISH | NCE_F_STATIC)
102 * ndp_g_lock -> ill_lock -> ncec_lock
104 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
105 * ncec_next. ncec_lock protects the contents of the NCE (particularly
109 static void nce_cleanup_list(ncec_t
*ncec
);
110 static void nce_set_ll(ncec_t
*ncec
, uchar_t
*ll_addr
);
111 static ncec_t
*ncec_lookup_illgrp(ill_t
*, const in6_addr_t
*,
113 static nce_t
*nce_lookup_addr(ill_t
*, const in6_addr_t
*);
114 static int nce_set_multicast_v6(ill_t
*ill
, const in6_addr_t
*addr
,
115 uint16_t ncec_flags
, nce_t
**newnce
);
116 static int nce_set_multicast_v4(ill_t
*ill
, const in_addr_t
*dst
,
117 uint16_t ncec_flags
, nce_t
**newnce
);
118 static boolean_t
ndp_xmit(ill_t
*ill
, uint32_t operation
,
119 uint8_t *hwaddr
, uint_t hwaddr_len
, const in6_addr_t
*sender
,
120 const in6_addr_t
*target
, int flag
);
121 static void ncec_refhold_locked(ncec_t
*);
122 static boolean_t
ill_defend_rate_limit(ill_t
*, ncec_t
*);
123 static void nce_queue_mp_common(ncec_t
*, mblk_t
*, boolean_t
);
124 static int nce_add_common(ill_t
*, uchar_t
*, uint_t
, const in6_addr_t
*,
125 uint16_t, uint16_t, nce_t
**);
126 static nce_t
*nce_add_impl(ill_t
*, ncec_t
*, nce_t
*, mblk_t
*);
127 static nce_t
*nce_add(ill_t
*, ncec_t
*);
128 static void nce_inactive(nce_t
*);
129 extern nce_t
*nce_lookup(ill_t
*, const in6_addr_t
*);
130 static nce_t
*nce_ill_lookup_then_add(ill_t
*, ncec_t
*);
131 static int nce_add_v6(ill_t
*, uchar_t
*, uint_t
, const in6_addr_t
*,
132 uint16_t, uint16_t, nce_t
**);
133 static int nce_add_v4(ill_t
*, uchar_t
*, uint_t
, const in_addr_t
*,
134 uint16_t, uint16_t, nce_t
**);
135 static int nce_add_v6_postprocess(nce_t
*);
136 static int nce_add_v4_postprocess(nce_t
*);
137 static ill_t
*nce_resolve_src(ncec_t
*, in6_addr_t
*);
138 static clock_t nce_fuzz_interval(clock_t, boolean_t
);
139 static void nce_resolv_ipmp_ok(ncec_t
*);
140 static void nce_walk_common(ill_t
*, pfi_t
, void *);
141 static void nce_start_timer(ncec_t
*, uint_t
);
142 static nce_t
*nce_fastpath_create(ill_t
*, ncec_t
*);
143 static void nce_fastpath_trigger(nce_t
*);
144 static nce_t
*nce_fastpath(ncec_t
*, boolean_t
, nce_t
*);
147 static void ncec_trace_cleanup(const ncec_t
*);
150 #define NCE_HASH_PTR_V4(ipst, addr) \
151 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)]))
153 #define NCE_HASH_PTR_V6(ipst, addr) \
154 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
157 extern kmem_cache_t
*ncec_cache
;
158 extern kmem_cache_t
*nce_cache
;
161 * Send out a IPv6 (unicast) or IPv4 (broadcast) DAD probe
162 * If src_ill is not null, the ncec_addr is bound to src_ill. The
163 * src_ill is ignored by nce_dad for IPv4 Neighbor Cache entries where
164 * the probe is sent on the ncec_ill (in the non-IPMP case) or the
165 * IPMP cast_ill (in the IPMP case).
167 * Note that the probe interval is based on the src_ill for IPv6, and
168 * the ncec_xmit_interval for IPv4.
171 nce_dad(ncec_t
*ncec
, ill_t
*src_ill
, boolean_t send_probe
)
174 uint32_t probe_interval
;
176 ASSERT(!(ncec
->ncec_flags
& NCE_F_MCAST
));
177 ASSERT(!(ncec
->ncec_flags
& NCE_F_BCAST
));
178 if (ncec
->ncec_ipversion
== IPV6_VERSION
) {
179 dropped
= ndp_xmit(src_ill
, ND_NEIGHBOR_SOLICIT
,
180 ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
,
181 &ipv6_all_zeros
, &ncec
->ncec_addr
, NDP_PROBE
);
182 probe_interval
= ILL_PROBE_INTERVAL(src_ill
);
184 /* IPv4 DAD delay the initial probe. */
186 dropped
= arp_probe(ncec
);
189 probe_interval
= nce_fuzz_interval(ncec
->ncec_xmit_interval
,
193 mutex_enter(&ncec
->ncec_lock
);
195 mutex_exit(&ncec
->ncec_lock
);
197 nce_restart_timer(ncec
, probe_interval
);
201 * Compute default flags to use for an advertisement of this ncec's address.
204 nce_advert_flags(const ncec_t
*ncec
)
208 if (ncec
->ncec_flags
& NCE_F_ISROUTER
)
209 flag
|= NDP_ISROUTER
;
210 if (!(ncec
->ncec_flags
& NCE_F_ANYCAST
))
217 * NDP Cache Entry creation routine.
218 * This routine must always be called with ndp6->ndp_g_lock held.
221 nce_add_v6(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
222 const in6_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
227 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp6
->ndp_g_lock
));
228 ASSERT(ill
!= NULL
&& ill
->ill_isv6
);
230 err
= nce_add_common(ill
, hw_addr
, hw_addr_len
, addr
, flags
, state
,
234 ASSERT(newnce
!= NULL
);
240 * Post-processing routine to be executed after nce_add_v6(). This function
241 * triggers fastpath (if appropriate) and DAD on the newly added nce entry
242 * and must be called without any locks held.
245 nce_add_v6_postprocess(nce_t
*nce
)
247 ncec_t
*ncec
= nce
->nce_common
;
248 boolean_t dropped
= B_FALSE
;
249 uchar_t
*hw_addr
= ncec
->ncec_lladdr
;
250 uint_t hw_addr_len
= ncec
->ncec_lladdr_length
;
251 ill_t
*ill
= ncec
->ncec_ill
;
253 uint16_t flags
= ncec
->ncec_flags
;
254 ip_stack_t
*ipst
= ill
->ill_ipst
;
255 boolean_t trigger_fastpath
= B_TRUE
;
258 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
259 * we call nce_fastpath as soon as the ncec is resolved in nce_process.
260 * We call nce_fastpath from nce_update if the link layer address of
261 * the peer changes from nce_update
263 if (NCE_PUBLISH(ncec
) || !NCE_ISREACHABLE(ncec
) ||
264 (hw_addr
== NULL
&& ill
->ill_net_type
!= IRE_IF_NORESOLVER
))
265 trigger_fastpath
= B_FALSE
;
267 if (trigger_fastpath
)
268 nce_fastpath_trigger(nce
);
269 if (NCE_PUBLISH(ncec
) && ncec
->ncec_state
== ND_PROBE
) {
272 * Unicast entry that needs DAD.
275 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
,
276 hw_addr
, hw_addr_len
);
280 nce_dad(ncec
, hwaddr_ill
, B_TRUE
);
282 } else if (flags
& NCE_F_UNSOL_ADV
) {
284 * We account for the transmit below by assigning one
285 * less than the ndd variable. Subsequent decrements
286 * are done in nce_timer.
288 mutex_enter(&ncec
->ncec_lock
);
289 ncec
->ncec_unsolicit_count
=
290 ipst
->ips_ip_ndp_unsolicit_count
- 1;
291 mutex_exit(&ncec
->ncec_lock
);
292 dropped
= ndp_xmit(ill
,
296 &ncec
->ncec_addr
, /* Source and target of the adv */
297 &ipv6_all_hosts_mcast
, /* Destination of the packet */
298 nce_advert_flags(ncec
));
299 mutex_enter(&ncec
->ncec_lock
);
301 ncec
->ncec_unsolicit_count
++;
303 ncec
->ncec_last_time_defended
= ddi_get_lbolt();
304 if (ncec
->ncec_unsolicit_count
!= 0) {
305 nce_start_timer(ncec
,
306 ipst
->ips_ip_ndp_unsolicit_interval
);
308 mutex_exit(&ncec
->ncec_lock
);
314 * Atomically lookup and add (if needed) Neighbor Cache information for
317 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
318 * are always added pointing at the ipmp_ill. Thus, when the ill passed
319 * to nce_add_v6 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
320 * entries will be created, both pointing at the same ncec_t. The nce_t
321 * entries will have their nce_ill set to the ipmp_ill and the under_ill
322 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
323 * Local addresses are always created on the ill passed to nce_add_v6.
326 nce_lookup_then_add_v6(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
327 const in6_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
330 ip_stack_t
*ipst
= ill
->ill_ipst
;
331 nce_t
*nce
, *upper_nce
= NULL
;
333 boolean_t need_ill_refrele
= B_FALSE
;
335 if (flags
& NCE_F_MCAST
) {
337 * hw_addr will be figured out in nce_set_multicast_v6;
338 * caller has to select the cast_ill
340 ASSERT(hw_addr
== NULL
);
341 ASSERT(!IS_IPMP(ill
));
342 err
= nce_set_multicast_v6(ill
, addr
, flags
, newnce
);
345 ASSERT(ill
->ill_isv6
);
346 if (IS_UNDER_IPMP(ill
) && !(flags
& NCE_F_MYADDR
)) {
347 ill
= ipmp_ill_hold_ipmp_ill(ill
);
350 need_ill_refrele
= B_TRUE
;
353 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
354 nce
= nce_lookup_addr(ill
, addr
);
356 err
= nce_add_v6(ill
, hw_addr
, hw_addr_len
, addr
, flags
, state
,
361 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
363 err
= nce_add_v6_postprocess(nce
);
364 if (in_ill
!= ill
&& nce
!= NULL
) {
365 nce_t
*under_nce
= NULL
;
368 * in_ill was the under_ill. Try to create the under_nce.
369 * Hold the ill_g_lock to prevent changes to group membership
372 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
373 if (!IS_IN_SAME_ILLGRP(in_ill
, ill
)) {
374 DTRACE_PROBE2(ill__not__in__group
, nce_t
*, nce
,
376 rw_exit(&ipst
->ips_ill_g_lock
);
382 under_nce
= nce_fastpath_create(in_ill
, nce
->nce_common
);
383 if (under_nce
== NULL
) {
384 rw_exit(&ipst
->ips_ill_g_lock
);
390 rw_exit(&ipst
->ips_ill_g_lock
);
392 nce
= under_nce
; /* will be returned to caller */
393 if (NCE_ISREACHABLE(nce
->nce_common
))
394 nce_fastpath_trigger(under_nce
);
396 /* nce_refrele is deferred until the lock is dropped */
404 if (upper_nce
!= NULL
)
405 nce_refrele(upper_nce
);
406 if (need_ill_refrele
)
412 * Remove all the CONDEMNED nces from the appropriate hash table.
413 * We create a private list of NCEs, these may have ires pointing
414 * to them, so the list will be passed through to clean up dependent
415 * ires and only then we can do ncec_refrele() which can make NCE inactive.
418 nce_remove(ndp_g_t
*ndp
, ncec_t
*ncec
, ncec_t
**free_nce_list
)
423 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
424 ASSERT(ndp
->ndp_g_walker
== 0);
425 for (; ncec
; ncec
= ncec1
) {
426 ncec1
= ncec
->ncec_next
;
427 mutex_enter(&ncec
->ncec_lock
);
428 if (NCE_ISCONDEMNED(ncec
)) {
429 ptpn
= ncec
->ncec_ptpn
;
430 ncec1
= ncec
->ncec_next
;
432 ncec1
->ncec_ptpn
= ptpn
;
434 ncec
->ncec_ptpn
= NULL
;
435 ncec
->ncec_next
= NULL
;
436 ncec
->ncec_next
= *free_nce_list
;
437 *free_nce_list
= ncec
;
439 mutex_exit(&ncec
->ncec_lock
);
444 * 1. Mark the entry CONDEMNED. This ensures that no new nce_lookup()
445 * will return this NCE. Also no new timeouts will
446 * be started (See nce_restart_timer).
447 * 2. Cancel any currently running timeouts.
448 * 3. If there is an ndp walker, return. The walker will do the cleanup.
449 * This ensures that walkers see a consistent list of NCEs while walking.
450 * 4. Otherwise remove the NCE from the list of NCEs
453 ncec_delete(ncec_t
*ncec
)
457 int ipversion
= ncec
->ncec_ipversion
;
459 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
461 if (ipversion
== IPV4_VERSION
)
462 ndp
= ipst
->ips_ndp4
;
464 ndp
= ipst
->ips_ndp6
;
466 /* Serialize deletes */
467 mutex_enter(&ncec
->ncec_lock
);
468 if (NCE_ISCONDEMNED(ncec
)) {
469 /* Some other thread is doing the delete */
470 mutex_exit(&ncec
->ncec_lock
);
474 * Caller has a refhold. Also 1 ref for being in the list. Thus
475 * refcnt has to be >= 2
477 ASSERT(ncec
->ncec_refcnt
>= 2);
478 ncec
->ncec_flags
|= NCE_F_CONDEMNED
;
479 mutex_exit(&ncec
->ncec_lock
);
481 /* Count how many condemned ires for kmem_cache callback */
482 atomic_inc_32(&ipst
->ips_num_nce_condemned
);
483 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
485 /* Complete any waiting callbacks */
486 ncec_cb_dispatch(ncec
);
489 * Cancel any running timer. Timeout can't be restarted
490 * since CONDEMNED is set. Can't hold ncec_lock across untimeout.
491 * Passing invalid timeout id is fine.
493 if (ncec
->ncec_timeout_id
!= 0) {
494 (void) untimeout(ncec
->ncec_timeout_id
);
495 ncec
->ncec_timeout_id
= 0;
498 mutex_enter(&ndp
->ndp_g_lock
);
499 if (ncec
->ncec_ptpn
== NULL
) {
501 * The last ndp walker has already removed this ncec from
502 * the list after we marked the ncec CONDEMNED and before
503 * we grabbed the global lock.
505 mutex_exit(&ndp
->ndp_g_lock
);
508 if (ndp
->ndp_g_walker
> 0) {
510 * Can't unlink. The walker will clean up
512 ndp
->ndp_g_walker_cleanup
= B_TRUE
;
513 mutex_exit(&ndp
->ndp_g_lock
);
518 * Now remove the ncec from the list. nce_restart_timer won't restart
519 * the timer since it is marked CONDEMNED.
521 ptpn
= ncec
->ncec_ptpn
;
522 ncec1
= ncec
->ncec_next
;
524 ncec1
->ncec_ptpn
= ptpn
;
526 ncec
->ncec_ptpn
= NULL
;
527 ncec
->ncec_next
= NULL
;
528 mutex_exit(&ndp
->ndp_g_lock
);
530 /* Removed from ncec_ptpn/ncec_next list */
531 ncec_refrele_notr(ncec
);
535 ncec_inactive(ncec_t
*ncec
)
538 ill_t
*ill
= ncec
->ncec_ill
;
539 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
541 ASSERT(ncec
->ncec_refcnt
== 0);
542 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
544 /* Count how many condemned nces for kmem_cache callback */
545 if (NCE_ISCONDEMNED(ncec
))
546 atomic_add_32(&ipst
->ips_num_nce_condemned
, -1);
548 /* Free all allocated messages */
549 mpp
= &ncec
->ncec_qd_mp
;
550 while (*mpp
!= NULL
) {
559 * must have been cleaned up in ncec_delete
561 ASSERT(list_is_empty(&ncec
->ncec_cb
));
562 list_destroy(&ncec
->ncec_cb
);
564 * free the ncec_lladdr if one was allocated in nce_add_common()
566 if (ncec
->ncec_lladdr_length
> 0)
567 kmem_free(ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
);
570 ncec_trace_cleanup(ncec
);
573 mutex_enter(&ill
->ill_lock
);
574 DTRACE_PROBE3(ill__decr__cnt
, (ill_t
*), ill
,
575 (char *), "ncec", (void *), ncec
);
577 ncec
->ncec_ill
= NULL
;
579 * If the number of ncec's associated with this ill have dropped
580 * to zero, check whether we need to restart any operation that
581 * is waiting for this to happen.
583 if (ILL_DOWN_OK(ill
)) {
584 /* ipif_ill_refrele_tail drops the ill_lock */
585 ipif_ill_refrele_tail(ill
);
587 mutex_exit(&ill
->ill_lock
);
590 mutex_destroy(&ncec
->ncec_lock
);
591 kmem_cache_free(ncec_cache
, ncec
);
595 * ncec_walk routine. Delete the ncec if it is associated with the ill
596 * that is going away. Always called as a writer.
599 ncec_delete_per_ill(ncec_t
*ncec
, uchar_t
*arg
)
601 if ((ncec
!= NULL
) && ncec
->ncec_ill
== (ill_t
*)arg
) {
607 * Neighbor Cache cleanup logic for a list of ncec_t entries.
610 nce_cleanup_list(ncec_t
*ncec
)
614 ASSERT(ncec
!= NULL
);
615 while (ncec
!= NULL
) {
616 ncec_next
= ncec
->ncec_next
;
617 ncec
->ncec_next
= NULL
;
620 * It is possible for the last ndp walker (this thread)
621 * to come here after ncec_delete has marked the ncec CONDEMNED
622 * and before it has removed the ncec from the fastpath list
623 * or called untimeout. So we need to do it here. It is safe
624 * for both ncec_delete and this thread to do it twice or
625 * even simultaneously since each of the threads has a
626 * reference on the ncec.
628 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
630 * Cancel any running timer. Timeout can't be restarted
631 * since CONDEMNED is set. The ncec_lock can't be
632 * held across untimeout though passing invalid timeout
635 if (ncec
->ncec_timeout_id
!= 0) {
636 (void) untimeout(ncec
->ncec_timeout_id
);
637 ncec
->ncec_timeout_id
= 0;
639 /* Removed from ncec_ptpn/ncec_next list */
640 ncec_refrele_notr(ncec
);
646 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted.
649 nce_restart_dad(ncec_t
*ncec
)
652 ill_t
*ill
, *hwaddr_ill
;
656 ill
= ncec
->ncec_ill
;
657 mutex_enter(&ncec
->ncec_lock
);
658 if (ncec
->ncec_state
== ND_PROBE
) {
659 mutex_exit(&ncec
->ncec_lock
);
661 } else if (ncec
->ncec_state
== ND_REACHABLE
) {
662 ASSERT(ncec
->ncec_lladdr
!= NULL
);
663 ncec
->ncec_state
= ND_PROBE
;
664 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
666 * Slight cheat here: we don't use the initial probe delay
667 * for IPv4 in this obscure case.
669 mutex_exit(&ncec
->ncec_lock
);
671 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
,
672 ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
);
676 nce_dad(ncec
, hwaddr_ill
, B_TRUE
);
679 mutex_exit(&ncec
->ncec_lock
);
686 * IPv6 Cache entry lookup. Try to find an ncec matching the parameters passed.
687 * If one is found, the refcnt on the ncec will be incremented.
690 ncec_lookup_illgrp_v6(ill_t
*ill
, const in6_addr_t
*addr
)
693 ip_stack_t
*ipst
= ill
->ill_ipst
;
695 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
696 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
698 /* Get head of v6 hash table */
699 ncec
= *((ncec_t
**)NCE_HASH_PTR_V6(ipst
, *addr
));
700 ncec
= ncec_lookup_illgrp(ill
, addr
, ncec
);
701 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
702 rw_exit(&ipst
->ips_ill_g_lock
);
706 * IPv4 Cache entry lookup. Try to find an ncec matching the parameters passed.
707 * If one is found, the refcnt on the ncec will be incremented.
710 ncec_lookup_illgrp_v4(ill_t
*ill
, const in_addr_t
*addr
)
714 ip_stack_t
*ipst
= ill
->ill_ipst
;
716 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
717 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
719 /* Get head of v4 hash table */
720 ncec
= *((ncec_t
**)NCE_HASH_PTR_V4(ipst
, *addr
));
721 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
722 ncec
= ncec_lookup_illgrp(ill
, &addr6
, ncec
);
723 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
724 rw_exit(&ipst
->ips_ill_g_lock
);
729 * Cache entry lookup. Try to find an ncec matching the parameters passed.
730 * If an ncec is found, increment the hold count on that ncec.
731 * The caller passes in the start of the appropriate hash table, and must
732 * be holding the appropriate global lock (ndp_g_lock). In addition, since
733 * this function matches ncec_t entries across the illgrp, the ips_ill_g_lock
734 * must be held as reader.
736 * This function always matches across the ipmp group.
739 ncec_lookup_illgrp(ill_t
*ill
, const in6_addr_t
*addr
, ncec_t
*ncec
)
742 ip_stack_t
*ipst
= ill
->ill_ipst
;
745 ndp
= ipst
->ips_ndp6
;
747 ndp
= ipst
->ips_ndp4
;
750 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
751 if (IN6_IS_ADDR_UNSPECIFIED(addr
))
753 for (; ncec
!= NULL
; ncec
= ncec
->ncec_next
) {
754 if (ncec
->ncec_ill
== ill
||
755 IS_IN_SAME_ILLGRP(ill
, ncec
->ncec_ill
)) {
756 if (IN6_ARE_ADDR_EQUAL(&ncec
->ncec_addr
, addr
)) {
757 mutex_enter(&ncec
->ncec_lock
);
758 if (!NCE_ISCONDEMNED(ncec
)) {
759 ncec_refhold_locked(ncec
);
760 mutex_exit(&ncec
->ncec_lock
);
763 mutex_exit(&ncec
->ncec_lock
);
771 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
772 * entries for ill only, i.e., when ill is part of an ipmp group,
773 * nce_lookup_v4 will never try to match across the group.
776 nce_lookup_v4(ill_t
*ill
, const in_addr_t
*addr
)
780 ip_stack_t
*ipst
= ill
->ill_ipst
;
782 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
783 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
784 nce
= nce_lookup_addr(ill
, &addr6
);
785 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
790 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
791 * entries for ill only, i.e., when ill is part of an ipmp group,
792 * nce_lookup_v6 will never try to match across the group.
795 nce_lookup_v6(ill_t
*ill
, const in6_addr_t
*addr6
)
798 ip_stack_t
*ipst
= ill
->ill_ipst
;
800 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
801 nce
= nce_lookup_addr(ill
, addr6
);
802 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
807 nce_lookup_addr(ill_t
*ill
, const in6_addr_t
*addr
)
814 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp6
->ndp_g_lock
));
816 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp4
->ndp_g_lock
));
818 mutex_enter(&ill
->ill_lock
);
819 nce
= nce_lookup(ill
, addr
);
820 mutex_exit(&ill
->ill_lock
);
826 * Router turned to host. We need to make sure that cached copies of the ncec
827 * are not used for forwarding packets if they were derived from the default
828 * route, and that the default route itself is removed, as required by
829 * section 7.2.5 of RFC 2461.
831 * Note that the ncec itself probably has valid link-layer information for the
832 * nexthop, so that there is no reason to delete the ncec, as long as the
833 * ISROUTER flag is turned off.
836 ncec_router_to_host(ncec_t
*ncec
)
839 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
841 mutex_enter(&ncec
->ncec_lock
);
842 ncec
->ncec_flags
&= ~NCE_F_ISROUTER
;
843 mutex_exit(&ncec
->ncec_lock
);
845 ire
= ire_ftable_lookup_v6(&ipv6_all_zeros
, &ipv6_all_zeros
,
846 &ncec
->ncec_addr
, IRE_DEFAULT
, ncec
->ncec_ill
, ALL_ZONES
,
847 MATCH_IRE_ILL
| MATCH_IRE_TYPE
| MATCH_IRE_GW
, 0, ipst
, NULL
);
849 ip_rts_rtmsg(RTM_DELETE
, ire
, 0, ipst
);
856 * Process passed in parameters either from an incoming packet or via
860 nce_process(ncec_t
*ncec
, uchar_t
*hw_addr
, uint32_t flag
, boolean_t is_adv
)
862 ill_t
*ill
= ncec
->ncec_ill
;
863 uint32_t hw_addr_len
= ill
->ill_phys_addr_length
;
864 boolean_t ll_updated
= B_FALSE
;
865 boolean_t ll_changed
;
868 ASSERT(ncec
->ncec_ipversion
== IPV6_VERSION
);
870 * No updates of link layer address or the neighbor state is
871 * allowed, when the cache is in NONUD state. This still
872 * allows for responding to reachability solicitation.
874 mutex_enter(&ncec
->ncec_lock
);
875 if (ncec
->ncec_state
== ND_INCOMPLETE
) {
876 if (hw_addr
== NULL
) {
877 mutex_exit(&ncec
->ncec_lock
);
880 nce_set_ll(ncec
, hw_addr
);
882 * Update ncec state and send the queued packets
883 * back to ip this time ire will be added.
885 if (flag
& ND_NA_FLAG_SOLICITED
) {
886 nce_update(ncec
, ND_REACHABLE
, NULL
);
888 nce_update(ncec
, ND_STALE
, NULL
);
890 mutex_exit(&ncec
->ncec_lock
);
891 nce
= nce_fastpath(ncec
, B_TRUE
, NULL
);
897 ll_changed
= nce_cmp_ll_addr(ncec
, hw_addr
, hw_addr_len
);
899 /* If this is a SOLICITATION request only */
901 nce_update(ncec
, ND_STALE
, hw_addr
);
902 mutex_exit(&ncec
->ncec_lock
);
903 ncec_cb_dispatch(ncec
);
906 if (!(flag
& ND_NA_FLAG_OVERRIDE
) && ll_changed
) {
907 /* If in any other state than REACHABLE, ignore */
908 if (ncec
->ncec_state
== ND_REACHABLE
) {
909 nce_update(ncec
, ND_STALE
, NULL
);
911 mutex_exit(&ncec
->ncec_lock
);
912 ncec_cb_dispatch(ncec
);
916 nce_update(ncec
, ND_UNCHANGED
, hw_addr
);
919 if (flag
& ND_NA_FLAG_SOLICITED
) {
920 nce_update(ncec
, ND_REACHABLE
, NULL
);
923 nce_update(ncec
, ND_STALE
, NULL
);
926 mutex_exit(&ncec
->ncec_lock
);
927 if (!(flag
& ND_NA_FLAG_ROUTER
) && (ncec
->ncec_flags
&
929 ncec_router_to_host(ncec
);
931 ncec_cb_dispatch(ncec
);
937 * Pass arg1 to the pfi supplied, along with each ncec in existence.
938 * ncec_walk() places a REFHOLD on the ncec and drops the lock when
939 * walking the hash list.
942 ncec_walk_common(ndp_g_t
*ndp
, ill_t
*ill
, pfi_t pfi
, void *arg1
,
948 ncec_t
*free_nce_list
= NULL
;
950 mutex_enter(&ndp
->ndp_g_lock
);
951 /* Prevent ncec_delete from unlink and free of NCE */
953 mutex_exit(&ndp
->ndp_g_lock
);
954 for (ncep
= ndp
->nce_hash_tbl
;
955 ncep
< A_END(ndp
->nce_hash_tbl
); ncep
++) {
956 for (ncec
= *ncep
; ncec
!= NULL
; ncec
= ncec1
) {
957 ncec1
= ncec
->ncec_next
;
958 if (ill
== NULL
|| ncec
->ncec_ill
== ill
) {
964 ncec_refhold_notr(ncec
);
966 ncec_refrele_notr(ncec
);
971 mutex_enter(&ndp
->ndp_g_lock
);
973 if (ndp
->ndp_g_walker_cleanup
&& ndp
->ndp_g_walker
== 0) {
974 /* Time to delete condemned entries */
975 for (ncep
= ndp
->nce_hash_tbl
;
976 ncep
< A_END(ndp
->nce_hash_tbl
); ncep
++) {
979 nce_remove(ndp
, ncec
, &free_nce_list
);
982 ndp
->ndp_g_walker_cleanup
= B_FALSE
;
985 mutex_exit(&ndp
->ndp_g_lock
);
987 if (free_nce_list
!= NULL
) {
988 nce_cleanup_list(free_nce_list
);
994 * Note that ill can be NULL hence can't derive the ipst from it.
997 ncec_walk(ill_t
*ill
, pfi_t pfi
, void *arg1
, ip_stack_t
*ipst
)
999 ncec_walk_common(ipst
->ips_ndp4
, ill
, pfi
, arg1
, B_TRUE
);
1000 ncec_walk_common(ipst
->ips_ndp6
, ill
, pfi
, arg1
, B_TRUE
);
1004 * For each interface an entry is added for the unspecified multicast group.
1005 * Here that mapping is used to form the multicast cache entry for a particular
1006 * multicast destination.
1009 nce_set_multicast_v6(ill_t
*ill
, const in6_addr_t
*dst
,
1010 uint16_t flags
, nce_t
**newnce
)
1014 ip_stack_t
*ipst
= ill
->ill_ipst
;
1017 ASSERT(ill
!= NULL
);
1018 ASSERT(ill
->ill_isv6
);
1019 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst
)));
1021 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
1022 nce
= nce_lookup_addr(ill
, dst
);
1024 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1027 if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
1029 * For IRE_IF_RESOLVER a hardware mapping can be
1032 hw_addr
= kmem_alloc(ill
->ill_nd_lla_len
, KM_NOSLEEP
);
1033 if (hw_addr
== NULL
) {
1034 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1037 ip_mcast_mapping(ill
, (uchar_t
*)dst
, hw_addr
);
1039 /* No hw_addr is needed for IRE_IF_NORESOLVER. */
1042 ASSERT((flags
& NCE_F_MCAST
) != 0);
1043 ASSERT((flags
& NCE_F_NONUD
) != 0);
1044 /* nce_state will be computed by nce_add_common() */
1045 err
= nce_add_v6(ill
, hw_addr
, ill
->ill_phys_addr_length
, dst
, flags
,
1046 ND_UNCHANGED
, &nce
);
1047 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1049 err
= nce_add_v6_postprocess(nce
);
1050 if (hw_addr
!= NULL
)
1051 kmem_free(hw_addr
, ill
->ill_nd_lla_len
);
1053 ip1dbg(("nce_set_multicast_v6: create failed" "%d\n", err
));
1057 ASSERT(nce
->nce_common
->ncec_state
== ND_REACHABLE
);
1066 * Return the link layer address, and any flags of a ncec.
1069 ndp_query(ill_t
*ill
, struct lif_nd_req
*lnr
)
1075 ASSERT(ill
!= NULL
&& ill
->ill_isv6
);
1076 sin6
= (sin6_t
*)&lnr
->lnr_addr
;
1077 addr
= &sin6
->sin6_addr
;
1080 * NOTE: if the ill is an IPMP interface, then match against the whole
1081 * illgrp. This e.g. allows in.ndpd to retrieve the link layer
1082 * addresses for the data addresses on an IPMP interface even though
1083 * ipif_ndp_up() created them with an ncec_ill of ipif_bound_ill.
1085 ncec
= ncec_lookup_illgrp_v6(ill
, addr
);
1088 /* If no link layer address is available yet, return ESRCH */
1089 if (!NCE_ISREACHABLE(ncec
)) {
1093 lnr
->lnr_hdw_len
= ill
->ill_phys_addr_length
;
1094 bcopy(ncec
->ncec_lladdr
, (uchar_t
*)&lnr
->lnr_hdw_addr
,
1096 if (ncec
->ncec_flags
& NCE_F_ISROUTER
)
1097 lnr
->lnr_flags
= NDF_ISROUTER_ON
;
1098 if (ncec
->ncec_flags
& NCE_F_ANYCAST
)
1099 lnr
->lnr_flags
|= NDF_ANYCAST_ON
;
1100 if (ncec
->ncec_flags
& NCE_F_STATIC
)
1101 lnr
->lnr_flags
|= NDF_STATIC
;
1107 * Finish setting up the Enable/Disable multicast for the driver.
1110 ndp_mcastreq(ill_t
*ill
, const in6_addr_t
*v6group
, uint32_t hw_addr_len
,
1111 uint32_t hw_addr_offset
, mblk_t
*mp
)
1117 ASSERT(ill
->ill_net_type
== IRE_IF_RESOLVER
);
1118 if (IN6_IS_ADDR_V4MAPPED(v6group
)) {
1119 IN6_V4MAPPED_TO_IPADDR(v6group
, v4group
);
1121 ASSERT(CLASSD(v4group
));
1122 ASSERT(!(ill
->ill_isv6
));
1124 addr
= (uchar_t
*)&v4group
;
1126 ASSERT(IN6_IS_ADDR_MULTICAST(v6group
));
1127 ASSERT(ill
->ill_isv6
);
1129 addr
= (uchar_t
*)v6group
;
1131 hw_addr
= mi_offset_paramc(mp
, hw_addr_offset
, hw_addr_len
);
1132 if (hw_addr
== NULL
) {
1133 ip0dbg(("ndp_mcastreq NULL hw_addr\n"));
1138 ip_mcast_mapping(ill
, addr
, hw_addr
);
1143 ip_ndp_resolve(ncec_t
*ncec
)
1145 in_addr_t sender4
= INADDR_ANY
;
1146 in6_addr_t sender6
= ipv6_all_zeros
;
1150 src_ill
= nce_resolve_src(ncec
, &sender6
);
1151 if (src_ill
== NULL
) {
1152 /* Make sure we try again later */
1153 ms
= ncec
->ncec_ill
->ill_reachable_retrans_time
;
1154 nce_restart_timer(ncec
, (clock_t)ms
);
1157 if (ncec
->ncec_ipversion
== IPV4_VERSION
)
1158 IN6_V4MAPPED_TO_IPADDR(&sender6
, sender4
);
1159 mutex_enter(&ncec
->ncec_lock
);
1160 if (ncec
->ncec_ipversion
== IPV6_VERSION
)
1161 ms
= ndp_solicit(ncec
, sender6
, src_ill
);
1163 ms
= arp_request(ncec
, sender4
, src_ill
);
1164 mutex_exit(&ncec
->ncec_lock
);
1166 if (ncec
->ncec_state
!= ND_REACHABLE
) {
1167 if (ncec
->ncec_ipversion
== IPV6_VERSION
)
1168 ndp_resolv_failed(ncec
);
1170 arp_resolv_failed(ncec
);
1171 ASSERT((ncec
->ncec_flags
& NCE_F_STATIC
) == 0);
1172 nce_make_unreachable(ncec
);
1176 nce_restart_timer(ncec
, (clock_t)ms
);
1179 ill_refrele(src_ill
);
1183 * Send an IPv6 neighbor solicitation.
1184 * Returns number of milliseconds after which we should either rexmit or abort.
1185 * Return of zero means we should abort.
1186 * The caller holds the ncec_lock to protect ncec_qd_mp and ncec_rcnt.
1187 * The optional source address is used as a hint to ndp_solicit for
1188 * which source to use in the packet.
1190 * NOTE: This routine drops ncec_lock (and later reacquires it) when sending
1194 ndp_solicit(ncec_t
*ncec
, in6_addr_t src
, ill_t
*ill
)
1197 boolean_t dropped
= B_FALSE
;
1199 ASSERT(ncec
->ncec_ipversion
== IPV6_VERSION
);
1200 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
1202 if (ncec
->ncec_rcnt
== 0)
1205 dst
= ncec
->ncec_addr
;
1207 mutex_exit(&ncec
->ncec_lock
);
1208 dropped
= ndp_xmit(ill
, ND_NEIGHBOR_SOLICIT
, ill
->ill_phys_addr
,
1209 ill
->ill_phys_addr_length
, &src
, &dst
, 0);
1210 mutex_enter(&ncec
->ncec_lock
);
1213 return (ncec
->ncec_ill
->ill_reachable_retrans_time
);
1217 * Attempt to recover an address on an interface that's been marked as a
1218 * duplicate. Because NCEs are destroyed when the interface goes down, there's
1219 * no easy way to just probe the address and have the right thing happen if
1220 * it's no longer in use. Instead, we just bring it up normally and allow the
1221 * regular interface start-up logic to probe for a remaining duplicate and take
1222 * us back down if necessary.
1223 * Neither DHCP nor temporary addresses arrive here; they're excluded by
1228 ip_addr_recover(ipsq_t
*ipsq
, queue_t
*rq
, mblk_t
*mp
, void *dummy_arg
)
1230 ill_t
*ill
= rq
->q_ptr
;
1232 in6_addr_t
*addr6
= (in6_addr_t
*)mp
->b_rptr
;
1233 in_addr_t
*addr4
= (in_addr_t
*)mp
->b_rptr
;
1234 boolean_t addr_equal
;
1236 for (ipif
= ill
->ill_ipif
; ipif
!= NULL
; ipif
= ipif
->ipif_next
) {
1238 * We do not support recovery of proxy ARP'd interfaces,
1239 * because the system lacks a complete proxy ARP mechanism.
1241 if (ill
->ill_isv6
) {
1242 addr_equal
= IN6_ARE_ADDR_EQUAL(&ipif
->ipif_v6lcl_addr
,
1245 addr_equal
= (ipif
->ipif_lcl_addr
== *addr4
);
1248 if ((ipif
->ipif_flags
& IPIF_POINTOPOINT
) || !addr_equal
)
1252 * If we have already recovered or if the interface is going
1253 * away, then ignore.
1255 mutex_enter(&ill
->ill_lock
);
1256 if (!(ipif
->ipif_flags
& IPIF_DUPLICATE
) ||
1257 (ipif
->ipif_state_flags
& IPIF_CONDEMNED
)) {
1258 mutex_exit(&ill
->ill_lock
);
1262 ipif
->ipif_flags
&= ~IPIF_DUPLICATE
;
1263 ill
->ill_ipif_dup_count
--;
1264 mutex_exit(&ill
->ill_lock
);
1265 ipif
->ipif_was_dup
= B_TRUE
;
1267 if (ill
->ill_isv6
) {
1268 VERIFY(ipif_ndp_up(ipif
, B_TRUE
) != EINPROGRESS
);
1269 (void) ipif_up_done_v6(ipif
);
1271 VERIFY(ipif_arp_up(ipif
, Res_act_initial
, B_TRUE
) !=
1273 (void) ipif_up_done(ipif
);
1280 * Attempt to recover an IPv6 interface that's been shut down as a duplicate.
1281 * As long as someone else holds the address, the interface will stay down.
1282 * When that conflict goes away, the interface is brought back up. This is
1283 * done so that accidental shutdowns of addresses aren't made permanent. Your
1284 * server will recover from a failure.
1286 * For DHCP and temporary addresses, recovery is not done in the kernel.
1287 * Instead, it's handled by user space processes (dhcpagent and in.ndpd).
1289 * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
1292 ipif_dup_recovery(void *arg
)
1296 ipif
->ipif_recovery_id
= 0;
1297 if (!(ipif
->ipif_flags
& IPIF_DUPLICATE
))
1301 * No lock, because this is just an optimization.
1303 if (ipif
->ipif_state_flags
& IPIF_CONDEMNED
)
1306 /* If the link is down, we'll retry this later */
1307 if (!(ipif
->ipif_ill
->ill_phyint
->phyint_flags
& PHYI_RUNNING
))
1310 ipif_do_recovery(ipif
);
1314 * Perform interface recovery by forcing the duplicate interfaces up and
1315 * allowing the system to determine which ones should stay up.
1317 * Called both by recovery timer expiry and link-up notification.
1320 ipif_do_recovery(ipif_t
*ipif
)
1322 ill_t
*ill
= ipif
->ipif_ill
;
1324 ip_stack_t
*ipst
= ill
->ill_ipst
;
1327 if (ipif
->ipif_isv6
)
1328 mp_size
= sizeof (ipif
->ipif_v6lcl_addr
);
1330 mp_size
= sizeof (ipif
->ipif_lcl_addr
);
1331 mp
= allocb(mp_size
, BPRI_MED
);
1333 mutex_enter(&ill
->ill_lock
);
1334 if (ipst
->ips_ip_dup_recovery
> 0 &&
1335 ipif
->ipif_recovery_id
== 0 &&
1336 !(ipif
->ipif_state_flags
& IPIF_CONDEMNED
)) {
1337 ipif
->ipif_recovery_id
= timeout(ipif_dup_recovery
,
1338 ipif
, MSEC_TO_TICK(ipst
->ips_ip_dup_recovery
));
1340 mutex_exit(&ill
->ill_lock
);
1343 * A recovery timer may still be running if we got here from
1344 * ill_restart_dad(); cancel that timer.
1346 if (ipif
->ipif_recovery_id
!= 0)
1347 (void) untimeout(ipif
->ipif_recovery_id
);
1348 ipif
->ipif_recovery_id
= 0;
1350 if (ipif
->ipif_isv6
) {
1351 bcopy(&ipif
->ipif_v6lcl_addr
, mp
->b_rptr
,
1352 sizeof (ipif
->ipif_v6lcl_addr
));
1354 bcopy(&ipif
->ipif_lcl_addr
, mp
->b_rptr
,
1355 sizeof (ipif
->ipif_lcl_addr
));
1358 qwriter_ip(ill
, ill
->ill_rq
, mp
, ip_addr_recover
, NEW_OP
,
1364 * Find the MAC and IP addresses in an NA/NS message.
1367 ip_ndp_find_addresses(mblk_t
*mp
, ip_recv_attr_t
*ira
, ill_t
*ill
,
1368 in6_addr_t
*targp
, uchar_t
**haddr
, uint_t
*haddrlenp
)
1370 icmp6_t
*icmp6
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
1371 nd_neighbor_solicit_t
*ns
= (nd_neighbor_solicit_t
*)icmp6
;
1375 /* icmp_inbound_v6 ensures this */
1376 ASSERT(ira
->ira_flags
& IRAF_L2SRC_SET
);
1378 addr
= ira
->ira_l2src
;
1379 alen
= ill
->ill_phys_addr_length
;
1388 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */
1389 *targp
= ns
->nd_ns_target
;
1393 * This is for exclusive changes due to NDP duplicate address detection
1398 ip_ndp_excl(ipsq_t
*ipsq
, queue_t
*rq
, mblk_t
*mp
, void *dummy_arg
)
1400 ill_t
*ill
= rq
->q_ptr
;
1404 ip_stack_t
*ipst
= ill
->ill_ipst
;
1406 ip_recv_attr_t iras
;
1411 attrmp
->b_cont
= NULL
;
1412 if (!ip_recv_attr_from_mblk(attrmp
, &iras
)) {
1413 /* The ill or ip_stack_t disappeared on us */
1414 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1415 ip_drop_input("ip_recv_attr_from_mblk", mp
, ill
);
1417 ira_cleanup(&iras
, B_TRUE
);
1421 ASSERT(ill
== iras
.ira_rill
);
1423 ip_ndp_find_addresses(mp
, &iras
, ill
, &targ
, &haddr
, &haddrlen
);
1424 if (haddr
!= NULL
&& haddrlen
== ill
->ill_phys_addr_length
) {
1426 * Ignore conflicts generated by misbehaving switches that
1427 * just reflect our own messages back to us. For IPMP, we may
1428 * see reflections across any ill in the illgrp.
1430 * RFC2462 and revisions tried to detect both the case
1431 * when a statically configured IPv6 address is a duplicate,
1432 * and the case when the L2 address itself is a duplicate. The
1433 * later is important because, with stateles address autoconf,
1434 * if the L2 address is a duplicate, the resulting IPv6
1435 * address(es) would also be duplicates. We rely on DAD of the
1436 * IPv6 address itself to detect the latter case.
1438 /* For an under ill_grp can change under lock */
1439 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1440 if (bcmp(haddr
, ill
->ill_phys_addr
, haddrlen
) == 0 ||
1441 IS_UNDER_IPMP(ill
) &&
1442 ipmp_illgrp_find_ill(ill
->ill_grp
, haddr
,
1443 haddrlen
) != NULL
) {
1444 rw_exit(&ipst
->ips_ill_g_lock
);
1445 goto ignore_conflict
;
1447 rw_exit(&ipst
->ips_ill_g_lock
);
1451 * Look up the appropriate ipif.
1453 ipif
= ipif_lookup_addr_v6(&targ
, ill
, ALL_ZONES
, ipst
);
1455 goto ignore_conflict
;
1457 /* Reload the ill to match the ipif */
1458 ill
= ipif
->ipif_ill
;
1460 /* If it's already duplicate or ineligible, then don't do anything. */
1461 if (ipif
->ipif_flags
& (IPIF_POINTOPOINT
|IPIF_DUPLICATE
)) {
1463 goto ignore_conflict
;
1467 * If this is a failure during duplicate recovery, then don't
1468 * complain. It may take a long time to recover.
1470 if (!ipif
->ipif_was_dup
) {
1471 char ibuf
[LIFNAMSIZ
];
1472 char hbuf
[MAC_STR_LEN
];
1473 char sbuf
[INET6_ADDRSTRLEN
];
1475 ipif_get_name(ipif
, ibuf
, sizeof (ibuf
));
1476 cmn_err(CE_WARN
, "%s has duplicate address %s (in use by %s);"
1478 inet_ntop(AF_INET6
, &targ
, sbuf
, sizeof (sbuf
)),
1479 mac_colon_addr(haddr
, haddrlen
, hbuf
, sizeof (hbuf
)));
1481 mutex_enter(&ill
->ill_lock
);
1482 ASSERT(!(ipif
->ipif_flags
& IPIF_DUPLICATE
));
1483 ipif
->ipif_flags
|= IPIF_DUPLICATE
;
1484 ill
->ill_ipif_dup_count
++;
1485 mutex_exit(&ill
->ill_lock
);
1486 (void) ipif_down(ipif
, NULL
, NULL
);
1487 (void) ipif_down_tail(ipif
);
1488 mutex_enter(&ill
->ill_lock
);
1489 if (!(ipif
->ipif_flags
& (IPIF_DHCPRUNNING
|IPIF_TEMPORARY
)) &&
1490 ill
->ill_net_type
== IRE_IF_RESOLVER
&&
1491 !(ipif
->ipif_state_flags
& IPIF_CONDEMNED
) &&
1492 ipst
->ips_ip_dup_recovery
> 0) {
1493 ASSERT(ipif
->ipif_recovery_id
== 0);
1494 ipif
->ipif_recovery_id
= timeout(ipif_dup_recovery
,
1495 ipif
, MSEC_TO_TICK(ipst
->ips_ip_dup_recovery
));
1497 mutex_exit(&ill
->ill_lock
);
1502 ira_cleanup(&iras
, B_TRUE
);
1506 * Handle failure by tearing down the ipifs with the specified address. Note
1507 * that tearing down the ipif also means deleting the ncec through ipif_down, so
1508 * it's not possible to do recovery by just restarting the ncec timer. Instead,
1509 * we start a timer on the ipif.
1510 * Caller has to free mp;
1513 ndp_failure(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1515 const uchar_t
*haddr
;
1516 ill_t
*ill
= ira
->ira_rill
;
1519 * Ignore conflicts generated by misbehaving switches that just
1520 * reflect our own messages back to us.
1523 /* icmp_inbound_v6 ensures this */
1524 ASSERT(ira
->ira_flags
& IRAF_L2SRC_SET
);
1525 haddr
= ira
->ira_l2src
;
1526 if (haddr
!= NULL
&&
1527 bcmp(haddr
, ill
->ill_phys_addr
, ill
->ill_phys_addr_length
) == 0) {
1531 if ((mp
= copymsg(mp
)) != NULL
) {
1534 attrmp
= ip_recv_attr_to_mblk(ira
);
1535 if (attrmp
== NULL
) {
1536 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1537 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
1540 ASSERT(attrmp
->b_cont
== NULL
);
1541 attrmp
->b_cont
= mp
;
1544 qwriter_ip(ill
, ill
->ill_rq
, mp
, ip_ndp_excl
, NEW_OP
,
1551 * Handle a discovered conflict: some other system is advertising that it owns
1552 * one of our IP addresses. We need to defend ourselves, or just shut down the
1555 * Handles both IPv4 and IPv6
1558 ip_nce_conflict(mblk_t
*mp
, ip_recv_attr_t
*ira
, ncec_t
*ncec
)
1564 ill_t
*ill
= ira
->ira_ill
;
1565 ip_stack_t
*ipst
= ill
->ill_ipst
;
1567 boolean_t isv6
= ill
->ill_isv6
;
1571 ipif
= ipif_lookup_addr_v6(&ncec
->ncec_addr
, ill
, ALL_ZONES
,
1574 if (arp_no_defense
) {
1576 * Yes, there is a conflict, but no, we do not
1581 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, ncec_addr
);
1582 ipif
= ipif_lookup_addr(ncec_addr
, ill
, ALL_ZONES
,
1589 * First, figure out if this address is disposable.
1591 if (ipif
->ipif_flags
& (IPIF_DHCPRUNNING
| IPIF_TEMPORARY
))
1592 maxdefense
= ipst
->ips_ip_max_temp_defend
;
1594 maxdefense
= ipst
->ips_ip_max_defend
;
1597 * Now figure out how many times we've defended ourselves. Ignore
1598 * defenses that happened long in the past.
1600 now
= ddi_get_lbolt();
1601 elapsed
= (drv_hztousec(now
- ncec
->ncec_last_time_defended
))/1000000;
1602 mutex_enter(&ncec
->ncec_lock
);
1603 if ((defs
= ncec
->ncec_defense_count
) > 0 &&
1604 elapsed
> ipst
->ips_ip_defend_interval
) {
1606 * ip_defend_interval has elapsed.
1607 * reset the defense count.
1609 ncec
->ncec_defense_count
= defs
= 0;
1611 ncec
->ncec_defense_count
++;
1612 ncec
->ncec_last_time_defended
= now
;
1613 mutex_exit(&ncec
->ncec_lock
);
1617 * If we've defended ourselves too many times already, then give up and
1618 * tear down the interface(s) using this address.
1619 * Otherwise, caller has to defend by sending out an announce.
1621 if (defs
>= maxdefense
) {
1623 ndp_failure(mp
, ira
);
1625 arp_failure(mp
, ira
);
1627 return (B_TRUE
); /* caller must defend this address */
1633 * Handle reception of Neighbor Solicitation messages.
1636 ndp_input_solicit(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1638 ill_t
*ill
= ira
->ira_ill
, *under_ill
;
1639 nd_neighbor_solicit_t
*ns
;
1640 uint32_t hlen
= ill
->ill_phys_addr_length
;
1641 uchar_t
*haddr
= NULL
;
1644 ncec_t
*our_ncec
= NULL
;
1649 nd_opt_hdr_t
*opt
= NULL
;
1650 boolean_t bad_solicit
= B_FALSE
;
1651 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
1652 boolean_t need_ill_refrele
= B_FALSE
;
1654 ip6h
= (ip6_t
*)mp
->b_rptr
;
1655 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
1656 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
1657 src
= ip6h
->ip6_src
;
1658 ns
= (nd_neighbor_solicit_t
*)icmp_nd
;
1659 target
= ns
->nd_ns_target
;
1660 if (IN6_IS_ADDR_MULTICAST(&target
) || IN6_IS_ADDR_V4MAPPED(&target
) ||
1661 IN6_IS_ADDR_LOOPBACK(&target
)) {
1664 pr_addr_dbg("ndp_input_solicit: Martian Target %s\n",
1667 bad_solicit
= B_TRUE
;
1670 if (len
> sizeof (nd_neighbor_solicit_t
)) {
1671 /* Options present */
1672 opt
= (nd_opt_hdr_t
*)&ns
[1];
1673 len
-= sizeof (nd_neighbor_solicit_t
);
1674 if (!ndp_verify_optlen(opt
, len
)) {
1675 ip1dbg(("ndp_input_solicit: Bad opt len\n"));
1676 bad_solicit
= B_TRUE
;
1680 if (IN6_IS_ADDR_UNSPECIFIED(&src
)) {
1681 /* Check to see if this is a valid DAD solicitation */
1682 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h
->ip6_dst
)) {
1685 pr_addr_dbg("ndp_input_solicit: IPv6 "
1686 "Destination is not solicited node "
1687 "multicast %s\n", AF_INET6
,
1690 bad_solicit
= B_TRUE
;
1696 * NOTE: with IPMP, it's possible the nominated multicast ill (which
1697 * received this packet if it's multicast) is not the ill tied to
1698 * e.g. the IPMP ill's data link-local. So we match across the illgrp
1699 * to ensure we find the associated NCE.
1701 our_ncec
= ncec_lookup_illgrp_v6(ill
, &target
);
1703 * If this is a valid Solicitation for an address we are publishing,
1704 * then a PUBLISH entry should exist in the cache
1706 if (our_ncec
== NULL
|| !NCE_PUBLISH(our_ncec
)) {
1707 ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
1708 "ifname=%s ", ill
->ill_name
));
1711 pr_addr_dbg(" dst %s\n", AF_INET6
, &target
);
1713 if (our_ncec
== NULL
)
1714 bad_solicit
= B_TRUE
;
1718 /* At this point we should have a verified NS per spec */
1720 opt
= ndp_get_option(opt
, len
, ND_OPT_SOURCE_LINKADDR
);
1722 haddr
= (uchar_t
*)&opt
[1];
1723 if (hlen
> opt
->nd_opt_len
* 8 - sizeof (*opt
) ||
1725 ip1dbg(("ndp_input_advert: bad SLLA\n"));
1726 bad_solicit
= B_TRUE
;
1732 /* If sending directly to peer, set the unicast flag */
1733 if (!IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
))
1734 flag
|= NDP_UNICAST
;
1737 * Create/update the entry for the soliciting node on the ipmp_ill.
1738 * or respond to outstanding queries, don't if
1739 * the source is unspecified address.
1741 if (!IN6_IS_ADDR_UNSPECIFIED(&src
)) {
1745 ASSERT(ill
->ill_isv6
);
1747 * Regular solicitations *must* include the Source Link-Layer
1748 * Address option. Ignore messages that do not.
1750 if (haddr
== NULL
&& IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
)) {
1751 ip1dbg(("ndp_input_solicit: source link-layer address "
1752 "option missing with a specified source.\n"));
1753 bad_solicit
= B_TRUE
;
1758 * This is a regular solicitation. If we're still in the
1759 * process of verifying the address, then don't respond at all
1760 * and don't keep track of the sender.
1762 if (our_ncec
->ncec_state
== ND_PROBE
)
1766 * If the solicitation doesn't have sender hardware address
1767 * (legal for unicast solicitation), then process without
1768 * installing the return NCE. Either we already know it, or
1769 * we'll be forced to look it up when (and if) we reply to the
1776 if (IS_UNDER_IPMP(under_ill
)) {
1777 ill
= ipmp_ill_hold_ipmp_ill(under_ill
);
1781 need_ill_refrele
= B_TRUE
;
1783 err
= nce_lookup_then_add_v6(ill
,
1785 &src
, /* Soliciting nodes address */
1790 if (need_ill_refrele
) {
1793 need_ill_refrele
= B_FALSE
;
1797 /* done with this entry */
1802 * B_FALSE indicates this is not an an advertisement.
1804 nce_process(nnce
->nce_common
, haddr
, 0, B_FALSE
);
1808 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
1813 flag
|= NDP_SOLICITED
;
1816 * No source link layer address option should be present in a
1817 * valid DAD request.
1819 if (haddr
!= NULL
) {
1820 ip1dbg(("ndp_input_solicit: source link-layer address "
1821 "option present with an unspecified source.\n"));
1822 bad_solicit
= B_TRUE
;
1825 if (our_ncec
->ncec_state
== ND_PROBE
) {
1827 * Internally looped-back probes will have
1828 * IRAF_L2SRC_LOOPBACK set so we can ignore our own
1831 if (!(ira
->ira_flags
& IRAF_L2SRC_LOOPBACK
)) {
1833 * If someone else is probing our address, then
1834 * we've crossed wires. Declare failure.
1836 ndp_failure(mp
, ira
);
1841 * This is a DAD probe. Multicast the advertisement to the
1842 * all-nodes address.
1844 src
= ipv6_all_hosts_mcast
;
1846 flag
|= nce_advert_flags(our_ncec
);
1847 (void) ndp_xmit(ill
,
1849 our_ncec
->ncec_lladdr
,
1850 our_ncec
->ncec_lladdr_length
,
1851 &target
, /* Source and target of the advertisement pkt */
1852 &src
, /* IP Destination (source of original pkt) */
1856 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborSolicitations
);
1857 if (our_ncec
!= NULL
)
1858 ncec_refrele(our_ncec
);
1862 * Handle reception of Neighbor Solicitation messages
1865 ndp_input_advert(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1867 ill_t
*ill
= ira
->ira_ill
;
1868 nd_neighbor_advert_t
*na
;
1869 uint32_t hlen
= ill
->ill_phys_addr_length
;
1870 uchar_t
*haddr
= NULL
;
1873 ncec_t
*dst_ncec
= NULL
;
1875 nd_opt_hdr_t
*opt
= NULL
;
1877 ip_stack_t
*ipst
= ill
->ill_ipst
;
1878 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
1880 ip6h
= (ip6_t
*)mp
->b_rptr
;
1881 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
1882 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
1883 na
= (nd_neighbor_advert_t
*)icmp_nd
;
1885 if (IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
) &&
1886 (na
->nd_na_flags_reserved
& ND_NA_FLAG_SOLICITED
)) {
1887 ip1dbg(("ndp_input_advert: Target is multicast but the "
1888 "solicited flag is not zero\n"));
1889 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
1892 target
= na
->nd_na_target
;
1893 if (IN6_IS_ADDR_MULTICAST(&target
) || IN6_IS_ADDR_V4MAPPED(&target
) ||
1894 IN6_IS_ADDR_LOOPBACK(&target
)) {
1897 pr_addr_dbg("ndp_input_solicit: Martian Target %s\n",
1900 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
1903 if (len
> sizeof (nd_neighbor_advert_t
)) {
1904 opt
= (nd_opt_hdr_t
*)&na
[1];
1905 if (!ndp_verify_optlen(opt
,
1906 len
- sizeof (nd_neighbor_advert_t
))) {
1907 ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
1908 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
1911 /* At this point we have a verified NA per spec */
1912 len
-= sizeof (nd_neighbor_advert_t
);
1913 opt
= ndp_get_option(opt
, len
, ND_OPT_TARGET_LINKADDR
);
1915 haddr
= (uchar_t
*)&opt
[1];
1916 if (hlen
> opt
->nd_opt_len
* 8 - sizeof (*opt
) ||
1918 ip1dbg(("ndp_input_advert: bad SLLA\n"));
1920 ipv6IfIcmpInBadNeighborAdvertisements
);
1927 * NOTE: we match across the illgrp since we need to do DAD for all of
1928 * our local addresses, and those are spread across all the active
1929 * ills in the group.
1931 if ((dst_ncec
= ncec_lookup_illgrp_v6(ill
, &target
)) == NULL
)
1934 if (NCE_PUBLISH(dst_ncec
)) {
1936 * Someone just advertised an addresses that we publish. First,
1937 * check it it was us -- if so, we can safely ignore it.
1938 * We don't get the haddr from the ira_l2src because, in the
1939 * case that the packet originated from us, on an IPMP group,
1940 * the ira_l2src may would be the link-layer address of the
1941 * cast_ill used to send the packet, which may not be the same
1942 * as the dst_ncec->ncec_lladdr of the address.
1944 if (haddr
!= NULL
) {
1945 if (ira
->ira_flags
& IRAF_L2SRC_LOOPBACK
)
1948 if (!nce_cmp_ll_addr(dst_ncec
, haddr
, hlen
))
1949 goto out
; /* from us -- no conflict */
1952 * If we're in an IPMP group, check if this is an echo
1953 * from another ill in the group. Use the double-
1954 * checked locking pattern to avoid grabbing
1955 * ill_g_lock in the non-IPMP case.
1957 if (IS_UNDER_IPMP(ill
)) {
1958 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1959 if (IS_UNDER_IPMP(ill
) && ipmp_illgrp_find_ill(
1960 ill
->ill_grp
, haddr
, hlen
) != NULL
) {
1961 rw_exit(&ipst
->ips_ill_g_lock
);
1964 rw_exit(&ipst
->ips_ill_g_lock
);
1969 * This appears to be a real conflict. If we're trying to
1970 * configure this NCE (ND_PROBE), then shut it down.
1971 * Otherwise, handle the discovered conflict.
1973 if (dst_ncec
->ncec_state
== ND_PROBE
) {
1974 ndp_failure(mp
, ira
);
1976 if (ip_nce_conflict(mp
, ira
, dst_ncec
)) {
1977 char hbuf
[MAC_STR_LEN
];
1978 char sbuf
[INET6_ADDRSTRLEN
];
1981 "node '%s' is using %s on %s",
1982 inet_ntop(AF_INET6
, &target
, sbuf
,
1984 haddr
== NULL
? "<none>" :
1985 mac_colon_addr(haddr
, hlen
, hbuf
,
1986 sizeof (hbuf
)), ill
->ill_name
);
1988 * RFC 4862, Section 5.4.4 does not mandate
1989 * any specific behavior when an NA matches
1990 * a non-tentative address assigned to the
1991 * receiver. We make the choice of defending
1992 * our address, based on the assumption that
1993 * the sender has not detected the Duplicate.
1995 * ncec_last_time_defended has been adjusted
1996 * in ip_nce_conflict()
1998 (void) ndp_announce(dst_ncec
);
2002 if (na
->nd_na_flags_reserved
& ND_NA_FLAG_ROUTER
)
2003 dst_ncec
->ncec_flags
|= NCE_F_ISROUTER
;
2005 /* B_TRUE indicates this an advertisement */
2006 nce_process(dst_ncec
, haddr
, na
->nd_na_flags_reserved
, B_TRUE
);
2009 ncec_refrele(dst_ncec
);
2013 * Process NDP neighbor solicitation/advertisement messages.
2014 * The checksum has already checked o.k before reaching here.
2015 * Information about the datalink header is contained in ira_l2src, but
2016 * that should be ignored for loopback packets.
2019 ndp_input(mblk_t
*mp
, ip_recv_attr_t
*ira
)
2021 ill_t
*ill
= ira
->ira_rill
;
2025 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
2026 ill_t
*orig_ill
= NULL
;
2029 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
2030 * and make it be the IPMP upper so avoid being confused by a packet
2031 * addressed to a unicast address on a different ill.
2033 if (IS_UNDER_IPMP(ill
)) {
2035 ill
= ipmp_ill_hold_ipmp_ill(orig_ill
);
2038 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2039 ip_drop_input("ipIfStatsInDiscards - IPMP ill",
2044 ASSERT(ill
!= orig_ill
);
2045 orig_ill
= ira
->ira_ill
;
2047 mib
= ill
->ill_icmp6_mib
;
2049 if (!pullupmsg(mp
, -1)) {
2050 ip1dbg(("ndp_input: pullupmsg failed\n"));
2051 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2052 ip_drop_input("ipIfStatsInDiscards - pullupmsg", mp
, ill
);
2055 ip6h
= (ip6_t
*)mp
->b_rptr
;
2056 if (ip6h
->ip6_hops
!= IPV6_MAX_HOPS
) {
2057 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
2058 ip_drop_input("ipv6IfIcmpBadHoplimit", mp
, ill
);
2059 BUMP_MIB(mib
, ipv6IfIcmpBadHoplimit
);
2063 * NDP does not accept any extension headers between the
2064 * IP header and the ICMP header since e.g. a routing
2065 * header could be dangerous.
2066 * This assumes that any AH or ESP headers are removed
2067 * by ip prior to passing the packet to ndp_input.
2069 if (ip6h
->ip6_nxt
!= IPPROTO_ICMPV6
) {
2070 ip1dbg(("ndp_input: Wrong next header 0x%x\n",
2072 ip_drop_input("Wrong next header", mp
, ill
);
2073 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2076 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
2077 ASSERT(icmp_nd
->icmp6_type
== ND_NEIGHBOR_SOLICIT
||
2078 icmp_nd
->icmp6_type
== ND_NEIGHBOR_ADVERT
);
2079 if (icmp_nd
->icmp6_code
!= 0) {
2080 ip1dbg(("ndp_input: icmp6 code != 0 \n"));
2081 ip_drop_input("code non-zero", mp
, ill
);
2082 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2085 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
2087 * Make sure packet length is large enough for either
2088 * a NS or a NA icmp packet.
2090 if (len
< sizeof (struct icmp6_hdr
) + sizeof (struct in6_addr
)) {
2091 ip1dbg(("ndp_input: packet too short\n"));
2092 ip_drop_input("packet too short", mp
, ill
);
2093 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2096 if (icmp_nd
->icmp6_type
== ND_NEIGHBOR_SOLICIT
) {
2097 ndp_input_solicit(mp
, ira
);
2099 ndp_input_advert(mp
, ira
);
2103 if (orig_ill
!= NULL
) {
2105 ira
->ira_ill
= orig_ill
;
2110 * ndp_xmit is called to form and transmit a ND solicitation or
2111 * advertisement ICMP packet.
2113 * If the source address is unspecified and this isn't a probe (used for
2114 * duplicate address detection), an appropriate source address and link layer
2115 * address will be chosen here. The link layer address option is included if
2116 * the source is specified (i.e., all non-probe packets), and omitted (per the
2117 * specification) otherwise.
2119 * It returns B_FALSE only if it does a successful put() to the
2120 * corresponding ill's ill_wq otherwise returns B_TRUE.
2123 ndp_xmit(ill_t
*ill
, uint32_t operation
, uint8_t *hw_addr
, uint_t hw_addr_len
,
2124 const in6_addr_t
*sender
, const in6_addr_t
*target
, int flag
)
2132 zoneid_t zoneid
= GLOBAL_ZONEID
;
2133 ill_t
*hwaddr_ill
= ill
;
2134 ip_xmit_attr_t ixas
;
2135 ip_stack_t
*ipst
= ill
->ill_ipst
;
2136 boolean_t need_refrele
= B_FALSE
;
2137 boolean_t probe
= B_FALSE
;
2139 if (IS_UNDER_IPMP(ill
)) {
2140 probe
= ipif_lookup_testaddr_v6(ill
, sender
, NULL
);
2142 * We send non-probe packets on the upper IPMP interface.
2143 * ip_output_simple() will use cast_ill for sending any
2144 * multicast packets. Note that we can't follow the same
2145 * logic for probe packets because all interfaces in the ipmp
2146 * group may have failed, so that we really want to only try
2147 * to send the ND packet on the ill corresponding to the src
2151 ill
= ipmp_ill_hold_ipmp_ill(ill
);
2153 need_refrele
= B_TRUE
;
2160 * If we have a unspecified source(sender) address, select a
2161 * proper source address for the solicitation here itself so
2162 * that we can initialize the h/w address correctly.
2164 * If the sender is specified then we use this address in order
2165 * to lookup the zoneid before calling ip_output_v6(). This is to
2166 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly
2167 * by IP (we cannot guarantee that the global zone has an interface
2168 * route to the destination).
2170 * Note that the NA never comes here with the unspecified source
2175 * Probes will have unspec src at this point.
2177 if (!(IN6_IS_ADDR_UNSPECIFIED(sender
))) {
2178 zoneid
= ipif_lookup_addr_zoneid_v6(sender
, ill
, ipst
);
2180 * It's possible for ipif_lookup_addr_zoneid_v6() to return
2181 * ALL_ZONES if it cannot find a matching ipif for the address
2182 * we are trying to use. In this case we err on the side of
2183 * trying to send the packet by defaulting to the GLOBAL_ZONEID.
2185 if (zoneid
== ALL_ZONES
)
2186 zoneid
= GLOBAL_ZONEID
;
2189 plen
= (sizeof (nd_opt_hdr_t
) + hw_addr_len
+ 7) / 8;
2190 len
= IPV6_HDR_LEN
+ sizeof (nd_neighbor_advert_t
) + plen
* 8;
2191 mp
= allocb(len
, BPRI_LO
);
2198 bzero((char *)mp
->b_rptr
, len
);
2199 mp
->b_wptr
= mp
->b_rptr
+ len
;
2201 bzero(&ixas
, sizeof (ixas
));
2202 ixas
.ixa_flags
= IXAF_SET_ULP_CKSUM
| IXAF_NO_HW_CKSUM
;
2204 ixas
.ixa_ifindex
= ill
->ill_phyint
->phyint_ifindex
;
2205 ixas
.ixa_ipst
= ipst
;
2206 ixas
.ixa_cred
= kcred
;
2207 ixas
.ixa_cpid
= NOPID
;
2208 ixas
.ixa_zoneid
= zoneid
;
2210 ip6h
= (ip6_t
*)mp
->b_rptr
;
2211 ip6h
->ip6_vcf
= IPV6_DEFAULT_VERS_AND_FLOW
;
2212 ip6h
->ip6_plen
= htons(len
- IPV6_HDR_LEN
);
2213 ip6h
->ip6_nxt
= IPPROTO_ICMPV6
;
2214 ip6h
->ip6_hops
= IPV6_MAX_HOPS
;
2215 ixas
.ixa_multicast_ttl
= ip6h
->ip6_hops
;
2216 ip6h
->ip6_dst
= *target
;
2217 icmp6
= (icmp6_t
*)&ip6h
[1];
2219 if (hw_addr_len
!= 0) {
2220 opt
= (nd_opt_hdr_t
*)((uint8_t *)ip6h
+ IPV6_HDR_LEN
+
2221 sizeof (nd_neighbor_advert_t
));
2225 if (operation
== ND_NEIGHBOR_SOLICIT
) {
2226 nd_neighbor_solicit_t
*ns
= (nd_neighbor_solicit_t
*)icmp6
;
2228 if (opt
!= NULL
&& !(flag
& NDP_PROBE
)) {
2230 * Note that we don't send out SLLA for ND probes
2231 * per RFC 4862, even though we do send out the src
2232 * haddr for IPv4 DAD probes, even though both IPv4
2233 * and IPv6 go out with the unspecified/INADDR_ANY
2236 opt
->nd_opt_type
= ND_OPT_SOURCE_LINKADDR
;
2238 ip6h
->ip6_src
= *sender
;
2239 ns
->nd_ns_target
= *target
;
2240 if (!(flag
& NDP_UNICAST
)) {
2241 /* Form multicast address of the target */
2242 ip6h
->ip6_dst
= ipv6_solicited_node_mcast
;
2243 ip6h
->ip6_dst
.s6_addr32
[3] |=
2244 ns
->nd_ns_target
.s6_addr32
[3];
2247 nd_neighbor_advert_t
*na
= (nd_neighbor_advert_t
*)icmp6
;
2249 ASSERT(!(flag
& NDP_PROBE
));
2251 opt
->nd_opt_type
= ND_OPT_TARGET_LINKADDR
;
2252 ip6h
->ip6_src
= *sender
;
2253 na
->nd_na_target
= *sender
;
2254 if (flag
& NDP_ISROUTER
)
2255 na
->nd_na_flags_reserved
|= ND_NA_FLAG_ROUTER
;
2256 if (flag
& NDP_SOLICITED
)
2257 na
->nd_na_flags_reserved
|= ND_NA_FLAG_SOLICITED
;
2258 if (flag
& NDP_ORIDE
)
2259 na
->nd_na_flags_reserved
|= ND_NA_FLAG_OVERRIDE
;
2262 if (!(flag
& NDP_PROBE
)) {
2263 if (hw_addr
!= NULL
&& opt
!= NULL
) {
2264 /* Fill in link layer address and option len */
2265 opt
->nd_opt_len
= (uint8_t)plen
;
2266 bcopy(hw_addr
, &opt
[1], hw_addr_len
);
2269 if (opt
!= NULL
&& opt
->nd_opt_type
== 0) {
2270 /* If there's no link layer address option, then strip it. */
2272 mp
->b_wptr
= mp
->b_rptr
+ len
;
2273 ip6h
->ip6_plen
= htons(len
- IPV6_HDR_LEN
);
2276 icmp6
->icmp6_type
= (uint8_t)operation
;
2277 icmp6
->icmp6_code
= 0;
2279 * Prepare for checksum by putting icmp length in the icmp
2280 * checksum field. The checksum is calculated in ip_output.c.
2282 icmp6
->icmp6_cksum
= ip6h
->ip6_plen
;
2284 (void) ip_output_simple(mp
, &ixas
);
2292 * Used to set ND_UNREACHBLE before ncec_delete sets it NCE_F_CONDEMNED.
2293 * The datapath uses this as an indication that there
2294 * is a problem (as opposed to a NCE that was just
2295 * reclaimed due to lack of memory.
2296 * Note that static ARP entries never become unreachable.
2299 nce_make_unreachable(ncec_t
*ncec
)
2301 mutex_enter(&ncec
->ncec_lock
);
2302 ncec
->ncec_state
= ND_UNREACHABLE
;
2303 mutex_exit(&ncec
->ncec_lock
);
2307 * NCE retransmit timer. Common to IPv4 and IPv6.
2308 * This timer goes off when:
2309 * a. It is time to retransmit a resolution for resolver.
2310 * b. It is time to send reachability probes.
2313 nce_timer(void *arg
)
2316 ill_t
*ill
= ncec
->ncec_ill
, *src_ill
;
2317 char addrbuf
[INET6_ADDRSTRLEN
];
2318 boolean_t dropped
= B_FALSE
;
2319 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
2320 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
2321 in_addr_t sender4
= INADDR_ANY
;
2322 in6_addr_t sender6
= ipv6_all_zeros
;
2325 * The timer has to be cancelled by ncec_delete before doing the final
2326 * refrele. So the NCE is guaranteed to exist when the timer runs
2327 * until it clears the timeout_id. Before clearing the timeout_id
2328 * bump up the refcnt so that we can continue to use the ncec
2330 ASSERT(ncec
!= NULL
);
2331 mutex_enter(&ncec
->ncec_lock
);
2332 ncec_refhold_locked(ncec
);
2333 ncec
->ncec_timeout_id
= 0;
2334 mutex_exit(&ncec
->ncec_lock
);
2336 src_ill
= nce_resolve_src(ncec
, &sender6
);
2337 /* if we could not find a sender address, return */
2338 if (src_ill
== NULL
) {
2340 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, sender4
);
2341 ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET
,
2342 &sender4
, addrbuf
, sizeof (addrbuf
))));
2344 ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET6
,
2345 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2347 nce_restart_timer(ncec
, ill
->ill_reachable_retrans_time
);
2352 IN6_V4MAPPED_TO_IPADDR(&sender6
, sender4
);
2354 mutex_enter(&ncec
->ncec_lock
);
2356 * Check the reachability state.
2358 switch (ncec
->ncec_state
) {
2360 ASSERT(ncec
->ncec_lladdr
!= NULL
);
2361 ncec
->ncec_state
= ND_PROBE
;
2362 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
2364 mutex_exit(&ncec
->ncec_lock
);
2365 dropped
= ndp_xmit(src_ill
, ND_NEIGHBOR_SOLICIT
,
2366 src_ill
->ill_phys_addr
,
2367 src_ill
->ill_phys_addr_length
,
2368 &sender6
, &ncec
->ncec_addr
,
2371 dropped
= (arp_request(ncec
, sender4
, src_ill
) == 0);
2372 mutex_exit(&ncec
->ncec_lock
);
2375 mutex_enter(&ncec
->ncec_lock
);
2377 mutex_exit(&ncec
->ncec_lock
);
2381 pr_addr_dbg("nce_timer: state for %s changed "
2382 "to PROBE\n", AF_INET6
, &ncec
->ncec_addr
);
2384 nce_restart_timer(ncec
, ill
->ill_reachable_retrans_time
);
2387 /* must be retransmit timer */
2388 ASSERT(ncec
->ncec_pcnt
>= -1);
2389 if (ncec
->ncec_pcnt
> 0) {
2391 * As per RFC2461, the ncec gets deleted after
2392 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
2393 * Note that the first unicast solicitation is sent
2394 * during the DELAY state.
2396 ip2dbg(("nce_timer: pcount=%x dst %s\n",
2398 inet_ntop((isv6
? AF_INET6
: AF_INET
),
2399 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2400 if (NCE_PUBLISH(ncec
)) {
2401 mutex_exit(&ncec
->ncec_lock
);
2403 * send out a probe; note that src_ill
2404 * is ignored by nce_dad() for all
2405 * DAD message types other than IPv6
2408 nce_dad(ncec
, src_ill
, B_TRUE
);
2410 ASSERT(src_ill
!= NULL
);
2412 mutex_exit(&ncec
->ncec_lock
);
2413 dropped
= ndp_xmit(src_ill
,
2414 ND_NEIGHBOR_SOLICIT
,
2415 src_ill
->ill_phys_addr
,
2416 src_ill
->ill_phys_addr_length
,
2417 &sender6
, &ncec
->ncec_addr
,
2421 * since the nce is REACHABLE,
2422 * the ARP request will be sent out
2423 * as a link-layer unicast.
2425 dropped
= (arp_request(ncec
, sender4
,
2427 mutex_exit(&ncec
->ncec_lock
);
2430 mutex_enter(&ncec
->ncec_lock
);
2432 mutex_exit(&ncec
->ncec_lock
);
2434 nce_restart_timer(ncec
,
2435 ill
->ill_reachable_retrans_time
);
2437 } else if (ncec
->ncec_pcnt
< 0) {
2438 /* No hope, delete the ncec */
2439 /* Tell datapath it went bad */
2440 ncec
->ncec_state
= ND_UNREACHABLE
;
2441 mutex_exit(&ncec
->ncec_lock
);
2444 pr_addr_dbg("nce_timer: Delete NCE for"
2445 " dst %s\n", (isv6
? AF_INET6
: AF_INET
),
2448 /* if static ARP can't delete. */
2449 if ((ncec
->ncec_flags
& NCE_F_STATIC
) == 0)
2452 } else if (!NCE_PUBLISH(ncec
)) {
2454 * Probe count is 0 for a dynamic entry (one that we
2455 * ourselves are not publishing). We should never get
2456 * here if NONUD was requested, hence the ASSERT below.
2458 ASSERT((ncec
->ncec_flags
& NCE_F_NONUD
) == 0);
2459 ip2dbg(("nce_timer: pcount=%x dst %s\n",
2460 ncec
->ncec_pcnt
, inet_ntop(AF_INET6
,
2461 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2463 mutex_exit(&ncec
->ncec_lock
);
2464 /* Wait one interval before killing */
2465 nce_restart_timer(ncec
,
2466 ill
->ill_reachable_retrans_time
);
2467 } else if (ill
->ill_phyint
->phyint_flags
& PHYI_RUNNING
) {
2472 * We're done probing, and we can now declare this
2473 * address to be usable. Let IP know that it's ok to
2476 ncec
->ncec_state
= ND_REACHABLE
;
2477 ncec
->ncec_flags
&= ~NCE_F_UNVERIFIED
;
2478 mutex_exit(&ncec
->ncec_lock
);
2480 ipif
= ipif_lookup_addr_exact_v6(
2481 &ncec
->ncec_addr
, ill
, ipst
);
2483 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
,
2485 ipif
= ipif_lookup_addr_exact(ncec_addr
, ill
,
2489 if (ipif
->ipif_was_dup
) {
2490 char ibuf
[LIFNAMSIZ
];
2491 char sbuf
[INET6_ADDRSTRLEN
];
2493 ipif
->ipif_was_dup
= B_FALSE
;
2494 (void) inet_ntop(AF_INET6
,
2495 &ipif
->ipif_v6lcl_addr
,
2496 sbuf
, sizeof (sbuf
));
2497 ipif_get_name(ipif
, ibuf
,
2499 cmn_err(CE_NOTE
, "recovered address "
2500 "%s on %s", sbuf
, ibuf
);
2502 if ((ipif
->ipif_flags
& IPIF_UP
) &&
2503 !ipif
->ipif_addr_ready
)
2504 ipif_up_notify(ipif
);
2505 ipif
->ipif_addr_ready
= 1;
2508 if (!isv6
&& arp_no_defense
)
2510 /* Begin defending our new address */
2511 if (ncec
->ncec_unsolicit_count
> 0) {
2512 ncec
->ncec_unsolicit_count
--;
2514 dropped
= ndp_announce(ncec
);
2516 dropped
= arp_announce(ncec
);
2520 ncec
->ncec_unsolicit_count
++;
2522 ncec
->ncec_last_time_defended
=
2525 if (ncec
->ncec_unsolicit_count
> 0) {
2526 nce_restart_timer(ncec
,
2527 ANNOUNCE_INTERVAL(isv6
));
2528 } else if (DEFENSE_INTERVAL(isv6
) != 0) {
2529 nce_restart_timer(ncec
, DEFENSE_INTERVAL(isv6
));
2533 * This is an address we're probing to be our own, but
2534 * the ill is down. Wait until it comes back before
2535 * doing anything, but switch to reachable state so
2536 * that the restart will work.
2538 ncec
->ncec_state
= ND_REACHABLE
;
2539 mutex_exit(&ncec
->ncec_lock
);
2542 case ND_INCOMPLETE
: {
2543 mblk_t
*mp
, *nextmp
;
2547 * Per case (2) in the nce_queue_mp() comments, scan ncec_qd_mp
2548 * for any IPMP probe packets, and toss them. IPMP probe
2549 * packets will always be at the head of ncec_qd_mp, so that
2550 * we can stop at the first queued ND packet that is
2551 * not a probe packet.
2553 prevmpp
= &ncec
->ncec_qd_mp
;
2554 for (mp
= ncec
->ncec_qd_mp
; mp
!= NULL
; mp
= nextmp
) {
2555 nextmp
= mp
->b_next
;
2557 if (IS_UNDER_IPMP(ill
) && ncec
->ncec_nprobes
> 0) {
2559 ncec
->ncec_nprobes
--;
2562 prevmpp
= &mp
->b_next
;
2567 * Must be resolver's retransmit timer.
2569 mutex_exit(&ncec
->ncec_lock
);
2570 ip_ndp_resolve(ncec
);
2574 if (((ncec
->ncec_flags
& NCE_F_UNSOL_ADV
) &&
2575 ncec
->ncec_unsolicit_count
!= 0) ||
2576 (NCE_PUBLISH(ncec
) && DEFENSE_INTERVAL(isv6
) != 0)) {
2577 if (ncec
->ncec_unsolicit_count
> 0) {
2578 ncec
->ncec_unsolicit_count
--;
2579 mutex_exit(&ncec
->ncec_lock
);
2581 * When we get to zero announcements left,
2582 * switch to address defense
2585 boolean_t rate_limit
;
2587 mutex_exit(&ncec
->ncec_lock
);
2588 rate_limit
= ill_defend_rate_limit(ill
, ncec
);
2590 nce_restart_timer(ncec
,
2591 DEFENSE_INTERVAL(isv6
));
2596 dropped
= ndp_announce(ncec
);
2598 dropped
= arp_announce(ncec
);
2600 mutex_enter(&ncec
->ncec_lock
);
2602 ncec
->ncec_unsolicit_count
++;
2604 ncec
->ncec_last_time_defended
=
2607 mutex_exit(&ncec
->ncec_lock
);
2608 if (ncec
->ncec_unsolicit_count
!= 0) {
2609 nce_restart_timer(ncec
,
2610 ANNOUNCE_INTERVAL(isv6
));
2612 nce_restart_timer(ncec
, DEFENSE_INTERVAL(isv6
));
2615 mutex_exit(&ncec
->ncec_lock
);
2619 mutex_exit(&ncec
->ncec_lock
);
2624 ill_refrele(src_ill
);
2628 * Set a link layer address from the ll_addr passed in.
2629 * Copy SAP from ill.
2632 nce_set_ll(ncec_t
*ncec
, uchar_t
*ll_addr
)
2634 ill_t
*ill
= ncec
->ncec_ill
;
2636 ASSERT(ll_addr
!= NULL
);
2637 if (ill
->ill_phys_addr_length
> 0) {
2639 * The bcopy() below used to be called for the physical address
2640 * length rather than the link layer address length. For
2641 * ethernet and many other media, the phys_addr and lla are
2644 * The phys_addr and lla may not be the same for devices that
2645 * support DL_IPV6_LINK_LAYER_ADDR, though there are currently
2646 * no known instances of these.
2648 * For PPP or other interfaces with a zero length
2649 * physical address, don't do anything here.
2650 * The bcopy() with a zero phys_addr length was previously
2651 * a no-op for interfaces with a zero-length physical address.
2652 * Using the lla for them would change the way they operate.
2653 * Doing nothing in such cases preserves expected behavior.
2655 bcopy(ll_addr
, ncec
->ncec_lladdr
, ill
->ill_nd_lla_len
);
2660 nce_cmp_ll_addr(const ncec_t
*ncec
, const uchar_t
*ll_addr
,
2661 uint32_t ll_addr_len
)
2663 ASSERT(ncec
->ncec_lladdr
!= NULL
);
2664 if (ll_addr
== NULL
)
2666 if (bcmp(ll_addr
, ncec
->ncec_lladdr
, ll_addr_len
) != 0)
2672 * Updates the link layer address or the reachability state of
2673 * a cache entry. Reset probe counter if needed.
2676 nce_update(ncec_t
*ncec
, uint16_t new_state
, uchar_t
*new_ll_addr
)
2678 ill_t
*ill
= ncec
->ncec_ill
;
2679 boolean_t need_stop_timer
= B_FALSE
;
2680 boolean_t need_fastpath_update
= B_FALSE
;
2684 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
2686 * If this interface does not do NUD, there is no point
2687 * in allowing an update to the cache entry. Although
2688 * we will respond to NS.
2689 * The only time we accept an update for a resolver when
2690 * NUD is turned off is when it has just been created.
2691 * Non-Resolvers will always be created as REACHABLE.
2693 if (new_state
!= ND_UNCHANGED
) {
2694 if ((ncec
->ncec_flags
& NCE_F_NONUD
) &&
2695 (ncec
->ncec_state
!= ND_INCOMPLETE
))
2697 ASSERT((int16_t)new_state
>= ND_STATE_VALID_MIN
);
2698 ASSERT((int16_t)new_state
<= ND_STATE_VALID_MAX
);
2699 need_stop_timer
= B_TRUE
;
2700 if (new_state
== ND_REACHABLE
)
2701 ncec
->ncec_last
= TICK_TO_MSEC(ddi_get_lbolt64());
2703 /* We force NUD in this case */
2704 ncec
->ncec_last
= 0;
2706 ncec
->ncec_state
= new_state
;
2707 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
2708 ASSERT(ncec
->ncec_lladdr
!= NULL
|| new_state
== ND_INITIAL
||
2709 new_state
== ND_INCOMPLETE
);
2711 if (need_stop_timer
|| (ncec
->ncec_flags
& NCE_F_STATIC
)) {
2712 tid
= ncec
->ncec_timeout_id
;
2713 ncec
->ncec_timeout_id
= 0;
2716 * Re-trigger fastpath probe and
2717 * overwrite the DL_UNITDATA_REQ data, noting we'll lose
2718 * whatever packets that happens to be transmitting at the time.
2720 if (new_ll_addr
!= NULL
) {
2721 bcopy(new_ll_addr
, ncec
->ncec_lladdr
,
2722 ill
->ill_phys_addr_length
);
2723 need_fastpath_update
= B_TRUE
;
2725 mutex_exit(&ncec
->ncec_lock
);
2726 if (need_stop_timer
|| (ncec
->ncec_flags
& NCE_F_STATIC
)) {
2728 (void) untimeout(tid
);
2730 if (need_fastpath_update
) {
2732 * Delete any existing existing dlur_mp and fp_mp information.
2733 * For IPMP interfaces, all underlying ill's must be checked
2736 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
2738 * add the new dlur_mp and fp_mp
2740 nce
= nce_fastpath(ncec
, B_TRUE
, NULL
);
2744 mutex_enter(&ncec
->ncec_lock
);
2748 nce_queue_mp_common(ncec_t
*ncec
, mblk_t
*mp
, boolean_t head_insert
)
2753 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
2755 for (mpp
= &ncec
->ncec_qd_mp
; *mpp
!= NULL
; mpp
= &(*mpp
)->b_next
) {
2756 if (++count
> ncec
->ncec_ill
->ill_max_buf
) {
2757 tmp
= ncec
->ncec_qd_mp
->b_next
;
2758 ncec
->ncec_qd_mp
->b_next
= NULL
;
2760 * if we never create data addrs on the under_ill
2763 BUMP_MIB(ncec
->ncec_ill
->ill_ip_mib
,
2764 ipIfStatsOutDiscards
);
2765 ip_drop_output("ipIfStatsOutDiscards", ncec
->ncec_qd_mp
,
2767 freemsg(ncec
->ncec_qd_mp
);
2768 ncec
->ncec_qd_mp
= tmp
;
2773 ncec
->ncec_nprobes
++;
2774 mp
->b_next
= ncec
->ncec_qd_mp
;
2775 ncec
->ncec_qd_mp
= mp
;
2782 * nce_queue_mp will queue the packet into the ncec_qd_mp. The packet will be
2783 * queued at the head or tail of the queue based on the input argument
2784 * 'head_insert'. The caller should specify this argument as B_TRUE if this
2785 * packet is an IPMP probe packet, in which case the following happens:
2787 * 1. Insert it at the head of the ncec_qd_mp list. Consider the normal
2788 * (non-ipmp_probe) load-speading case where the source address of the ND
2789 * packet is not tied to ncec_ill. If the ill bound to the source address
2790 * cannot receive, the response to the ND packet will not be received.
2791 * However, if ND packets for ncec_ill's probes are queued behind that ND
2792 * packet, those probes will also fail to be sent, and thus in.mpathd will
2793 * erroneously conclude that ncec_ill has also failed.
2795 * 2. Drop the ipmp_probe packet in ndp_timer() if the ND did not succeed on
2796 * the first attempt. This ensures that ND problems do not manifest as
2799 * We achieve this by inserting ipmp_probe() packets at the head of the
2802 * The ncec for the probe target is created with ncec_ill set to the ipmp_ill,
2803 * but the caller needs to set head_insert to B_TRUE if this is a probe packet.
2806 nce_queue_mp(ncec_t
*ncec
, mblk_t
*mp
, boolean_t head_insert
)
2808 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
2809 nce_queue_mp_common(ncec
, mp
, head_insert
);
2813 * Called when address resolution failed due to a timeout.
2814 * Send an ICMP unreachable in response to all queued packets.
2817 ndp_resolv_failed(ncec_t
*ncec
)
2819 mblk_t
*mp
, *nxt_mp
;
2820 char buf
[INET6_ADDRSTRLEN
];
2821 ill_t
*ill
= ncec
->ncec_ill
;
2822 ip_recv_attr_t iras
;
2824 bzero(&iras
, sizeof (iras
));
2827 * we are setting the ira_rill to the ipmp_ill (instead of
2828 * the actual ill on which the packet was received), but this
2829 * is ok because we don't actually need the real ira_rill.
2830 * to send the icmp unreachable to the sender.
2832 iras
.ira_ill
= iras
.ira_rill
= ill
;
2833 iras
.ira_ruifindex
= ill
->ill_phyint
->phyint_ifindex
;
2834 iras
.ira_rifindex
= iras
.ira_ruifindex
;
2836 ip1dbg(("ndp_resolv_failed: dst %s\n",
2837 inet_ntop(AF_INET6
, (char *)&ncec
->ncec_addr
, buf
, sizeof (buf
))));
2838 mutex_enter(&ncec
->ncec_lock
);
2839 mp
= ncec
->ncec_qd_mp
;
2840 ncec
->ncec_qd_mp
= NULL
;
2841 ncec
->ncec_nprobes
= 0;
2842 mutex_exit(&ncec
->ncec_lock
);
2843 while (mp
!= NULL
) {
2844 nxt_mp
= mp
->b_next
;
2847 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
2848 ip_drop_output("ipIfStatsOutDiscards - address unreachable",
2850 icmp_unreachable_v6(mp
,
2851 ICMP6_DST_UNREACH_ADDR
, B_FALSE
, &iras
);
2852 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
2855 ncec_cb_dispatch(ncec
); /* finish off waiting callbacks */
2859 * Handle the completion of NDP and ARP resolution.
2862 nce_resolv_ok(ncec_t
*ncec
)
2866 iaflags_t ixaflags
= IXAF_NO_TRACE
;
2868 ill_t
*ill
= ncec
->ncec_ill
;
2869 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
2870 ip_stack_t
*ipst
= ill
->ill_ipst
;
2872 if (IS_IPMP(ncec
->ncec_ill
)) {
2873 nce_resolv_ipmp_ok(ncec
);
2878 mutex_enter(&ncec
->ncec_lock
);
2879 ASSERT(ncec
->ncec_nprobes
== 0);
2880 mp
= ncec
->ncec_qd_mp
;
2881 ncec
->ncec_qd_mp
= NULL
;
2882 mutex_exit(&ncec
->ncec_lock
);
2884 while (mp
!= NULL
) {
2887 if (ill
->ill_isv6
) {
2888 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
2890 pkt_len
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
2892 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
2894 ixaflags
|= IXAF_IS_IPV4
;
2895 pkt_len
= ntohs(ipha
->ipha_length
);
2897 nxt_mp
= mp
->b_next
;
2900 * IXAF_NO_DEV_FLOW_CTL information for TCP packets is no
2901 * longer available, but it's ok to drop this flag because TCP
2902 * has its own flow-control in effect, so TCP packets
2903 * are not likely to get here when flow-control is in effect.
2905 mutex_enter(&ill
->ill_lock
);
2906 nce
= nce_lookup(ill
, &ncec
->ncec_addr
);
2907 mutex_exit(&ill
->ill_lock
);
2911 BUMP_MIB(&ipst
->ips_ip6_mib
,
2912 ipIfStatsOutDiscards
);
2914 BUMP_MIB(&ipst
->ips_ip_mib
,
2915 ipIfStatsOutDiscards
);
2917 ip_drop_output("ipIfStatsOutDiscards - no nce",
2922 * We don't know the zoneid, but
2923 * ip_xmit does not care since IXAF_NO_TRACE
2924 * is set. (We traced the packet the first
2925 * time through ip_xmit.)
2927 (void) ip_xmit(mp
, nce
, ixaflags
, pkt_len
, 0,
2928 ALL_ZONES
, 0, NULL
);
2934 ncec_cb_dispatch(ncec
); /* complete callbacks */
2938 * Called by SIOCSNDP* ioctl to add/change an ncec entry
2939 * and the corresponding attributes.
2940 * Disallow states other than ND_REACHABLE or ND_STALE.
2943 ndp_sioc_update(ill_t
*ill
, lif_nd_req_t
*lnr
)
2950 uint16_t new_flags
= 0;
2951 uint16_t old_flags
= 0;
2952 int inflags
= lnr
->lnr_flags
;
2953 ip_stack_t
*ipst
= ill
->ill_ipst
;
2954 boolean_t do_postprocess
= B_FALSE
;
2956 ASSERT(ill
->ill_isv6
);
2957 if ((lnr
->lnr_state_create
!= ND_REACHABLE
) &&
2958 (lnr
->lnr_state_create
!= ND_STALE
))
2961 sin6
= (sin6_t
*)&lnr
->lnr_addr
;
2962 addr
= &sin6
->sin6_addr
;
2964 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
2965 ASSERT(!IS_UNDER_IPMP(ill
));
2966 nce
= nce_lookup_addr(ill
, addr
);
2968 new_flags
= nce
->nce_common
->ncec_flags
;
2970 switch (inflags
& (NDF_ISROUTER_ON
|NDF_ISROUTER_OFF
)) {
2971 case NDF_ISROUTER_ON
:
2972 new_flags
|= NCE_F_ISROUTER
;
2974 case NDF_ISROUTER_OFF
:
2975 new_flags
&= ~NCE_F_ISROUTER
;
2977 case (NDF_ISROUTER_OFF
|NDF_ISROUTER_ON
):
2978 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
2983 if (inflags
& NDF_STATIC
)
2984 new_flags
|= NCE_F_STATIC
;
2986 switch (inflags
& (NDF_ANYCAST_ON
|NDF_ANYCAST_OFF
)) {
2987 case NDF_ANYCAST_ON
:
2988 new_flags
|= NCE_F_ANYCAST
;
2990 case NDF_ANYCAST_OFF
:
2991 new_flags
&= ~NCE_F_ANYCAST
;
2993 case (NDF_ANYCAST_OFF
|NDF_ANYCAST_ON
):
2994 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3001 err
= nce_add_v6(ill
,
3002 (uchar_t
*)lnr
->lnr_hdw_addr
,
3003 ill
->ill_phys_addr_length
,
3006 lnr
->lnr_state_create
,
3009 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3010 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err
));
3013 do_postprocess
= B_TRUE
;
3016 ncec
= nce
->nce_common
;
3017 old_flags
= ncec
->ncec_flags
;
3018 if (old_flags
& NCE_F_ISROUTER
&& !(new_flags
& NCE_F_ISROUTER
)) {
3019 ncec_router_to_host(ncec
);
3020 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3022 err
= nce_add_v6_postprocess(nce
);
3026 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3029 err
= nce_add_v6_postprocess(nce
);
3031 * err cannot be anything other than 0 because we don't support
3032 * proxy arp of static addresses.
3036 mutex_enter(&ncec
->ncec_lock
);
3037 ncec
->ncec_flags
= new_flags
;
3038 mutex_exit(&ncec
->ncec_lock
);
3040 * Note that we ignore the state at this point, which
3041 * should be either STALE or REACHABLE. Instead we let
3042 * the link layer address passed in to determine the state
3043 * much like incoming packets.
3045 nce_process(ncec
, (uchar_t
*)lnr
->lnr_hdw_addr
, 0, B_FALSE
);
3051 * Create an nce_t structure for ill using the ncec->ncec_lladdr to set up
3052 * the nce_dlur_mp. If ill != ncec->ncec_ill, then the ips_ill_g_lock must
3053 * be held to ensure that they are in the same group.
3056 nce_fastpath_create(ill_t
*ill
, ncec_t
*ncec
)
3061 nce
= nce_ill_lookup_then_add(ill
, ncec
);
3063 if (nce
== NULL
|| IS_LOOPBACK(nce
->nce_ill
) || IS_VNI(nce
->nce_ill
))
3067 * hold the ncec_lock to synchronize with nce_update() so that,
3068 * at the end of this function, the contents of nce_dlur_mp are
3069 * consistent with ncec->ncec_lladdr, even though some intermediate
3070 * packet may have been sent out with a mangled address, which would
3071 * only be a transient condition.
3073 mutex_enter(&ncec
->ncec_lock
);
3074 if (ncec
->ncec_lladdr
!= NULL
) {
3075 bcopy(ncec
->ncec_lladdr
, nce
->nce_dlur_mp
->b_rptr
+
3076 NCE_LL_ADDR_OFFSET(ill
), ill
->ill_phys_addr_length
);
3078 nce
->nce_dlur_mp
= ill_dlur_gen(NULL
, 0, ill
->ill_sap
,
3079 ill
->ill_sap_length
);
3081 mutex_exit(&ncec
->ncec_lock
);
3086 * we make nce_fp_mp to have an M_DATA prepend.
3087 * The caller ensures there is hold on ncec for this function.
3088 * Note that since ill_fastpath_probe() copies the mblk there is
3089 * no need to hold the nce or ncec beyond this function.
3091 * If the caller has passed in a non-null ncec_nce to nce_fastpath() that
3092 * ncec_nce must correspond to the nce for ncec with nce_ill == ncec->ncec_ill
3093 * and will be returned back by this function, so that no extra nce_refrele
3094 * is required for the caller. The calls from nce_add_common() use this
3095 * method. All other callers (that pass in NULL ncec_nce) will have to do a
3096 * nce_refrele of the returned nce (when it is non-null).
3099 nce_fastpath(ncec_t
*ncec
, boolean_t trigger_fp_req
, nce_t
*ncec_nce
)
3102 ill_t
*ill
= ncec
->ncec_ill
;
3104 ASSERT(ill
!= NULL
);
3106 if (IS_IPMP(ill
) && trigger_fp_req
) {
3107 trigger_fp_req
= B_FALSE
;
3108 ipmp_ncec_refresh_nce(ncec
);
3112 * If the caller already has the nce corresponding to the ill, use
3113 * that one. Otherwise we have to lookup/add the nce. Calls from
3114 * nce_add_common() fall in the former category, and have just done
3115 * the nce lookup/add that can be reused.
3117 if (ncec_nce
== NULL
)
3118 nce
= nce_fastpath_create(ill
, ncec
);
3122 if (nce
== NULL
|| IS_LOOPBACK(nce
->nce_ill
) || IS_VNI(nce
->nce_ill
))
3126 nce_fastpath_trigger(nce
);
3131 * Trigger fastpath on nce. No locks may be held.
3134 nce_fastpath_trigger(nce_t
*nce
)
3137 ill_t
*ill
= nce
->nce_ill
;
3138 ncec_t
*ncec
= nce
->nce_common
;
3140 res
= ill_fastpath_probe(ill
, nce
->nce_dlur_mp
);
3142 * EAGAIN is an indication of a transient error
3143 * i.e. allocation failure etc. leave the ncec in the list it
3144 * will be updated when another probe happens for another ire
3145 * if not it will be taken out of the list when the ire is
3148 if (res
!= 0 && res
!= EAGAIN
&& res
!= ENOTSUP
)
3149 nce_fastpath_list_delete(ill
, ncec
, NULL
);
3153 * Add ncec to the nce fastpath list on ill.
3156 nce_ill_lookup_then_add_locked(ill_t
*ill
, ncec_t
*ncec
)
3160 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
3162 * Atomically ensure that the ill is not CONDEMNED and is not going
3163 * down, before adding the NCE.
3165 if (ill
->ill_state_flags
& ILL_CONDEMNED
)
3167 mutex_enter(&ncec
->ncec_lock
);
3169 * if ncec has not been deleted and
3170 * is not already in the list add it.
3172 if (!NCE_ISCONDEMNED(ncec
)) {
3173 nce
= nce_lookup(ill
, &ncec
->ncec_addr
);
3176 nce
= nce_add(ill
, ncec
);
3179 mutex_exit(&ncec
->ncec_lock
);
3184 nce_ill_lookup_then_add(ill_t
*ill
, ncec_t
*ncec
)
3188 mutex_enter(&ill
->ill_lock
);
3189 nce
= nce_ill_lookup_then_add_locked(ill
, ncec
);
3190 mutex_exit(&ill
->ill_lock
);
3196 * remove ncec from the ill_nce list. If 'dead' is non-null, the deleted
3197 * nce is added to the 'dead' list, and the caller must nce_refrele() the
3198 * entry after all locks have been dropped.
3201 nce_fastpath_list_delete(ill_t
*ill
, ncec_t
*ncec
, list_t
*dead
)
3205 ASSERT(ill
!= NULL
);
3207 /* delete any nces referencing the ncec from underlying ills */
3209 ipmp_ncec_delete_nce(ncec
);
3211 /* now the ill itself */
3212 mutex_enter(&ill
->ill_lock
);
3213 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
;
3214 nce
= list_next(&ill
->ill_nce
, nce
)) {
3215 if (nce
->nce_common
== ncec
) {
3221 mutex_exit(&ill
->ill_lock
);
3226 list_insert_tail(dead
, nce
);
3231 * when the fastpath response does not fit in the datab
3232 * associated with the existing nce_fp_mp, we delete and
3233 * add the nce to retrigger fastpath based on the information
3237 nce_delete_then_add(nce_t
*nce
)
3239 ill_t
*ill
= nce
->nce_ill
;
3240 nce_t
*newnce
= NULL
;
3242 ip0dbg(("nce_delete_then_add nce %p ill %s\n",
3243 (void *)nce
, ill
->ill_name
));
3244 mutex_enter(&ill
->ill_lock
);
3245 mutex_enter(&nce
->nce_common
->ncec_lock
);
3248 * Make sure that ncec is not condemned before adding. We hold the
3249 * ill_lock and ncec_lock to synchronize with ncec_delete() and
3250 * ipmp_ncec_delete_nce()
3252 if (!NCE_ISCONDEMNED(nce
->nce_common
))
3253 newnce
= nce_add(ill
, nce
->nce_common
);
3254 mutex_exit(&nce
->nce_common
->ncec_lock
);
3255 mutex_exit(&ill
->ill_lock
);
3257 return (newnce
); /* could be null if nomem */
3260 typedef struct nce_fp_match_s
{
3261 nce_t
*nce_fp_match_res
;
3262 mblk_t
*nce_fp_match_ack_mp
;
3267 nce_fastpath_match_dlur(ill_t
*ill
, nce_t
*nce
, void *arg
)
3269 nce_fp_match_t
*nce_fp_marg
= arg
;
3270 ncec_t
*ncec
= nce
->nce_common
;
3271 mblk_t
*mp
= nce_fp_marg
->nce_fp_match_ack_mp
;
3272 uchar_t
*mp_rptr
, *ud_mp_rptr
;
3273 mblk_t
*ud_mp
= nce
->nce_dlur_mp
;
3277 * mp is the mp associated with the fastpath ack.
3278 * ud_mp is the outstanding DL_UNITDATA_REQ on the nce_t
3279 * under consideration. If the contents match, then the
3280 * fastpath ack is used to update the nce.
3284 mp_rptr
= mp
->b_rptr
;
3285 cmplen
= mp
->b_wptr
- mp_rptr
;
3286 ASSERT(cmplen
>= 0);
3288 ud_mp_rptr
= ud_mp
->b_rptr
;
3290 * The ncec is locked here to prevent any other threads from accessing
3291 * and changing nce_dlur_mp when the address becomes resolved to an
3292 * lla while we're in the middle of looking at and comparing the
3293 * hardware address (lla). It is also locked to prevent multiple
3294 * threads in nce_fastpath() from examining nce_dlur_mp at the same
3297 mutex_enter(&ncec
->ncec_lock
);
3298 if (ud_mp
->b_wptr
- ud_mp_rptr
!= cmplen
||
3299 bcmp((char *)mp_rptr
, (char *)ud_mp_rptr
, cmplen
) == 0) {
3300 nce_fp_marg
->nce_fp_match_res
= nce
;
3301 mutex_exit(&ncec
->ncec_lock
);
3305 mutex_exit(&ncec
->ncec_lock
);
3310 * Update all NCE's that are not in fastpath mode and
3311 * have an nce_fp_mp that matches mp. mp->b_cont contains
3312 * the fastpath header.
3314 * Returns TRUE if entry should be dequeued, or FALSE otherwise.
3317 nce_fastpath_update(ill_t
*ill
, mblk_t
*mp
)
3319 nce_fp_match_t nce_fp_marg
;
3321 mblk_t
*nce_fp_mp
, *fp_mp
;
3323 nce_fp_marg
.nce_fp_match_res
= NULL
;
3324 nce_fp_marg
.nce_fp_match_ack_mp
= mp
;
3326 nce_walk(ill
, nce_fastpath_match_dlur
, &nce_fp_marg
);
3328 if ((nce
= nce_fp_marg
.nce_fp_match_res
) == NULL
)
3331 mutex_enter(&nce
->nce_lock
);
3332 nce_fp_mp
= nce
->nce_fp_mp
;
3334 if (nce_fp_mp
!= NULL
) {
3336 if (nce_fp_mp
->b_rptr
+ MBLKL(fp_mp
) >
3337 nce_fp_mp
->b_datap
->db_lim
) {
3338 mutex_exit(&nce
->nce_lock
);
3339 nce
= nce_delete_then_add(nce
);
3343 mutex_enter(&nce
->nce_lock
);
3344 nce_fp_mp
= nce
->nce_fp_mp
;
3348 /* Matched - install mp as the fastpath mp */
3349 if (nce_fp_mp
== NULL
) {
3350 fp_mp
= dupb(mp
->b_cont
);
3351 nce
->nce_fp_mp
= fp_mp
;
3354 bcopy(fp_mp
->b_rptr
, nce_fp_mp
->b_rptr
, MBLKL(fp_mp
));
3355 nce
->nce_fp_mp
->b_wptr
= nce
->nce_fp_mp
->b_rptr
3358 mutex_exit(&nce
->nce_lock
);
3363 * Return a pointer to a given option in the packet.
3364 * Assumes that option part of the packet have already been validated.
3367 ndp_get_option(nd_opt_hdr_t
*opt
, int optlen
, int opt_type
)
3369 while (optlen
> 0) {
3370 if (opt
->nd_opt_type
== opt_type
)
3372 optlen
-= 8 * opt
->nd_opt_len
;
3373 opt
= (struct nd_opt_hdr
*)((char *)opt
+ 8 * opt
->nd_opt_len
);
3379 * Verify all option lengths present are > 0, also check to see
3380 * if the option lengths and packet length are consistent.
3383 ndp_verify_optlen(nd_opt_hdr_t
*opt
, int optlen
)
3385 ASSERT(opt
!= NULL
);
3386 while (optlen
> 0) {
3387 if (opt
->nd_opt_len
== 0)
3389 optlen
-= 8 * opt
->nd_opt_len
;
3392 opt
= (struct nd_opt_hdr
*)((char *)opt
+ 8 * opt
->nd_opt_len
);
3398 * ncec_walk function.
3399 * Free a fraction of the NCE cache entries.
3401 * A possible optimization here would be to use ncec_last where possible, and
3402 * delete the least-frequently used entry, which would require more complex
3403 * computation as we walk through the ncec's (e.g., track ncec entries by
3404 * order of ncec_last and/or maintain state)
3407 ncec_cache_reclaim(ncec_t
*ncec
, char *arg
)
3409 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
3410 uint_t fraction
= *(uint_t
*)arg
;
3413 if ((ncec
->ncec_flags
&
3414 (NCE_F_MYADDR
| NCE_F_STATIC
| NCE_F_BCAST
)) != 0) {
3418 rand
= (uint_t
)ddi_get_lbolt() +
3419 NCE_ADDR_HASH_V6(ncec
->ncec_addr
, NCE_TABLE_SIZE
);
3420 if ((rand
/fraction
)*fraction
== rand
) {
3421 IP_STAT(ipst
, ip_nce_reclaim_deleted
);
3427 * kmem_cache callback to free up memory.
3429 * For now we just delete a fixed fraction.
3432 ip_nce_reclaim_stack(ip_stack_t
*ipst
)
3434 uint_t fraction
= ipst
->ips_ip_nce_reclaim_fraction
;
3436 IP_STAT(ipst
, ip_nce_reclaim_calls
);
3438 ncec_walk(NULL
, (pfi_t
)ncec_cache_reclaim
, (uchar_t
*)&fraction
, ipst
);
3441 * Walk all CONNs that can have a reference on an ire, ncec or dce.
3442 * Get them to update any stale references to drop any refholds they
3445 ipcl_walk(conn_ixa_cleanup
, (void *)B_FALSE
, ipst
);
3449 * Called by the memory allocator subsystem directly, when the system
3450 * is running low on memory.
3454 ip_nce_reclaim(void *args
)
3456 netstack_handle_t nh
;
3460 netstack_next_init(&nh
);
3461 while ((ns
= netstack_next(&nh
)) != NULL
) {
3463 * netstack_next() can return a netstack_t with a NULL
3464 * netstack_ip at boot time.
3466 if ((ipst
= ns
->netstack_ip
) == NULL
) {
3470 ip_nce_reclaim_stack(ipst
);
3473 netstack_next_fini(&nh
);
3478 ncec_trace_ref(ncec_t
*ncec
)
3480 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3482 if (ncec
->ncec_trace_disable
)
3485 if (!th_trace_ref(ncec
, ncec
->ncec_ipst
)) {
3486 ncec
->ncec_trace_disable
= B_TRUE
;
3487 ncec_trace_cleanup(ncec
);
3492 ncec_untrace_ref(ncec_t
*ncec
)
3494 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3496 if (!ncec
->ncec_trace_disable
)
3497 th_trace_unref(ncec
);
3501 ncec_trace_cleanup(const ncec_t
*ncec
)
3503 th_trace_cleanup(ncec
, ncec
->ncec_trace_disable
);
3508 * Called when address resolution fails due to a timeout.
3509 * Send an ICMP unreachable in response to all queued packets.
3512 arp_resolv_failed(ncec_t
*ncec
)
3514 mblk_t
*mp
, *nxt_mp
;
3515 char buf
[INET6_ADDRSTRLEN
];
3516 struct in_addr ipv4addr
;
3517 ill_t
*ill
= ncec
->ncec_ill
;
3518 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
3519 ip_recv_attr_t iras
;
3521 bzero(&iras
, sizeof (iras
));
3522 iras
.ira_flags
= IRAF_IS_IPV4
;
3524 * we are setting the ira_rill to the ipmp_ill (instead of
3525 * the actual ill on which the packet was received), but this
3526 * is ok because we don't actually need the real ira_rill.
3527 * to send the icmp unreachable to the sender.
3529 iras
.ira_ill
= iras
.ira_rill
= ill
;
3530 iras
.ira_ruifindex
= ill
->ill_phyint
->phyint_ifindex
;
3531 iras
.ira_rifindex
= iras
.ira_ruifindex
;
3533 IN6_V4MAPPED_TO_INADDR(&ncec
->ncec_addr
, &ipv4addr
);
3534 ip3dbg(("arp_resolv_failed: dst %s\n",
3535 inet_ntop(AF_INET
, &ipv4addr
, buf
, sizeof (buf
))));
3536 mutex_enter(&ncec
->ncec_lock
);
3537 mp
= ncec
->ncec_qd_mp
;
3538 ncec
->ncec_qd_mp
= NULL
;
3539 ncec
->ncec_nprobes
= 0;
3540 mutex_exit(&ncec
->ncec_lock
);
3541 while (mp
!= NULL
) {
3542 nxt_mp
= mp
->b_next
;
3545 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
3546 ip_drop_output("ipIfStatsOutDiscards - address unreachable",
3548 if (ipst
->ips_ip_arp_icmp_error
) {
3549 ip3dbg(("arp_resolv_failed: "
3550 "Calling icmp_unreachable\n"));
3551 icmp_unreachable(mp
, ICMP_HOST_UNREACHABLE
, &iras
);
3555 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
3558 ncec_cb_dispatch(ncec
); /* finish off waiting callbacks */
3562 * if ill is an under_ill, translate it to the ipmp_ill and add the
3563 * nce on the ipmp_ill. Two nce_t entries (one on the ipmp_ill, and
3564 * one on the underlying in_ill) will be created for the
3565 * ncec_t in this case. The ncec_t itself will be created on the ipmp_ill.
3568 nce_lookup_then_add_v4(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
3569 const in_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
3573 ip_stack_t
*ipst
= ill
->ill_ipst
;
3574 nce_t
*nce
, *upper_nce
= NULL
;
3575 ill_t
*in_ill
= ill
, *under
= NULL
;
3576 boolean_t need_ill_refrele
= B_FALSE
;
3578 if (flags
& NCE_F_MCAST
) {
3580 * hw_addr will be figured out in nce_set_multicast_v4;
3581 * caller needs to pass in the cast_ill for ipmp
3583 ASSERT(hw_addr
== NULL
);
3584 ASSERT(!IS_IPMP(ill
));
3585 err
= nce_set_multicast_v4(ill
, addr
, flags
, newnce
);
3589 if (IS_UNDER_IPMP(ill
) && !(flags
& NCE_F_MYADDR
)) {
3590 ill
= ipmp_ill_hold_ipmp_ill(ill
);
3593 need_ill_refrele
= B_TRUE
;
3595 if ((flags
& NCE_F_BCAST
) != 0) {
3597 * IPv4 broadcast ncec: compute the hwaddr.
3600 under
= ipmp_ill_hold_xmit_ill(ill
, B_FALSE
);
3601 if (under
== NULL
) {
3602 if (need_ill_refrele
)
3606 hw_addr
= under
->ill_bcast_mp
->b_rptr
+
3607 NCE_LL_ADDR_OFFSET(under
);
3608 hw_addr_len
= under
->ill_phys_addr_length
;
3610 hw_addr
= ill
->ill_bcast_mp
->b_rptr
+
3611 NCE_LL_ADDR_OFFSET(ill
),
3612 hw_addr_len
= ill
->ill_phys_addr_length
;
3616 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
3617 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
3618 nce
= nce_lookup_addr(ill
, &addr6
);
3620 err
= nce_add_v4(ill
, hw_addr
, hw_addr_len
, addr
, flags
,
3625 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3627 err
= nce_add_v4_postprocess(nce
);
3629 if (in_ill
!= ill
&& nce
!= NULL
) {
3630 nce_t
*under_nce
= NULL
;
3633 * in_ill was the under_ill. Try to create the under_nce.
3634 * Hold the ill_g_lock to prevent changes to group membership
3635 * until we are done.
3637 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
3638 if (!IS_IN_SAME_ILLGRP(in_ill
, ill
)) {
3639 DTRACE_PROBE2(ill__not__in__group
, nce_t
*, nce
,
3641 rw_exit(&ipst
->ips_ill_g_lock
);
3647 under_nce
= nce_fastpath_create(in_ill
, nce
->nce_common
);
3648 if (under_nce
== NULL
) {
3649 rw_exit(&ipst
->ips_ill_g_lock
);
3655 rw_exit(&ipst
->ips_ill_g_lock
);
3657 nce
= under_nce
; /* will be returned to caller */
3658 if (NCE_ISREACHABLE(nce
->nce_common
))
3659 nce_fastpath_trigger(under_nce
);
3670 if (upper_nce
!= NULL
)
3671 nce_refrele(upper_nce
);
3672 if (need_ill_refrele
)
3679 * NDP Cache Entry creation routine for IPv4.
3680 * This routine must always be called with ndp4->ndp_g_lock held.
3681 * Prior to return, ncec_refcnt is incremented.
3683 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
3684 * are always added pointing at the ipmp_ill. Thus, when the ill passed
3685 * to nce_add_v4 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
3686 * entries will be created, both pointing at the same ncec_t. The nce_t
3687 * entries will have their nce_ill set to the ipmp_ill and the under_ill
3688 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
3689 * Local addresses are always created on the ill passed to nce_add_v4.
3692 nce_add_v4(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
3693 const in_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
3696 boolean_t is_multicast
= (flags
& NCE_F_MCAST
);
3697 struct in6_addr addr6
;
3700 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp4
->ndp_g_lock
));
3701 ASSERT(!ill
->ill_isv6
);
3702 ASSERT(!IN_MULTICAST(htonl(*addr
)) || is_multicast
);
3704 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
3705 err
= nce_add_common(ill
, hw_addr
, hw_addr_len
, &addr6
, flags
, state
,
3707 ASSERT(newnce
!= NULL
);
3713 * Post-processing routine to be executed after nce_add_v4(). This function
3714 * triggers fastpath (if appropriate) and DAD on the newly added nce entry
3715 * and must be called without any locks held.
3717 * Always returns 0, but we return an int to keep this symmetric with the
3718 * IPv6 counter-part.
3721 nce_add_v4_postprocess(nce_t
*nce
)
3723 ncec_t
*ncec
= nce
->nce_common
;
3724 uint16_t flags
= ncec
->ncec_flags
;
3725 boolean_t ndp_need_dad
= B_FALSE
;
3728 ip_stack_t
*ipst
= ncec
->ncec_ill
->ill_ipst
;
3729 uchar_t
*hw_addr
= ncec
->ncec_lladdr
;
3730 boolean_t trigger_fastpath
= B_TRUE
;
3733 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
3734 * we call nce_fastpath as soon as the ncec is resolved in nce_process.
3735 * We call nce_fastpath from nce_update if the link layer address of
3736 * the peer changes from nce_update
3738 if (NCE_PUBLISH(ncec
) || !NCE_ISREACHABLE(ncec
) || (hw_addr
== NULL
&&
3739 ncec
->ncec_ill
->ill_net_type
!= IRE_IF_NORESOLVER
))
3740 trigger_fastpath
= B_FALSE
;
3742 if (trigger_fastpath
)
3743 nce_fastpath_trigger(nce
);
3745 if (NCE_PUBLISH(ncec
) && ncec
->ncec_state
== ND_PROBE
) {
3747 * Either the caller (by passing in ND_PROBE)
3748 * or nce_add_common() (by the internally computed state
3749 * based on ncec_addr and ill_net_type) has determined
3750 * that this unicast entry needs DAD. Trigger DAD.
3752 ndp_need_dad
= B_TRUE
;
3753 } else if (flags
& NCE_F_UNSOL_ADV
) {
3755 * We account for the transmit below by assigning one
3756 * less than the ndd variable. Subsequent decrements
3757 * are done in nce_timer.
3759 mutex_enter(&ncec
->ncec_lock
);
3760 ncec
->ncec_unsolicit_count
=
3761 ipst
->ips_ip_arp_publish_count
- 1;
3762 mutex_exit(&ncec
->ncec_lock
);
3763 dropped
= arp_announce(ncec
);
3764 mutex_enter(&ncec
->ncec_lock
);
3766 ncec
->ncec_unsolicit_count
++;
3768 ncec
->ncec_last_time_defended
= ddi_get_lbolt();
3769 if (ncec
->ncec_unsolicit_count
!= 0) {
3770 nce_start_timer(ncec
,
3771 ipst
->ips_ip_arp_publish_interval
);
3773 mutex_exit(&ncec
->ncec_lock
);
3777 * If ncec_xmit_interval is 0, user has configured us to send the first
3778 * probe right away. Do so, and set up for the subsequent probes.
3781 mutex_enter(&ncec
->ncec_lock
);
3782 if (ncec
->ncec_pcnt
== 0) {
3784 * DAD probes and announce can be
3785 * administratively disabled by setting the
3786 * probe_count to zero. Restart the timer in
3787 * this case to mark the ipif as ready.
3789 ncec
->ncec_unsolicit_count
= 0;
3790 mutex_exit(&ncec
->ncec_lock
);
3791 nce_restart_timer(ncec
, 0);
3793 mutex_exit(&ncec
->ncec_lock
);
3794 delay
= ((ncec
->ncec_flags
& NCE_F_FAST
) ?
3795 ipst
->ips_arp_probe_delay
:
3796 ipst
->ips_arp_fastprobe_delay
);
3797 nce_dad(ncec
, NULL
, (delay
== 0 ? B_TRUE
: B_FALSE
));
3804 * ncec_walk routine to update all entries that have a given destination or
3805 * gateway address and cached link layer (MAC) address. This is used when ARP
3806 * informs us that a network-to-link-layer mapping may have changed.
3809 nce_update_hw_changed(ncec_t
*ncec
, void *arg
)
3811 nce_hw_map_t
*hwm
= arg
;
3814 if (ncec
->ncec_state
!= ND_REACHABLE
)
3817 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, ncec_addr
);
3818 if (ncec_addr
!= hwm
->hwm_addr
)
3821 mutex_enter(&ncec
->ncec_lock
);
3822 if (hwm
->hwm_flags
!= 0)
3823 ncec
->ncec_flags
= hwm
->hwm_flags
;
3824 nce_update(ncec
, ND_STALE
, hwm
->hwm_hwaddr
);
3825 mutex_exit(&ncec
->ncec_lock
);
3829 ncec_refhold(ncec_t
*ncec
)
3831 mutex_enter(&(ncec
)->ncec_lock
);
3832 (ncec
)->ncec_refcnt
++;
3833 ASSERT((ncec
)->ncec_refcnt
!= 0);
3835 ncec_trace_ref(ncec
);
3837 mutex_exit(&(ncec
)->ncec_lock
);
3841 ncec_refhold_notr(ncec_t
*ncec
)
3843 mutex_enter(&(ncec
)->ncec_lock
);
3844 (ncec
)->ncec_refcnt
++;
3845 ASSERT((ncec
)->ncec_refcnt
!= 0);
3846 mutex_exit(&(ncec
)->ncec_lock
);
3850 ncec_refhold_locked(ncec_t
*ncec
)
3852 ASSERT(MUTEX_HELD(&(ncec
)->ncec_lock
));
3853 (ncec
)->ncec_refcnt
++;
3855 ncec_trace_ref(ncec
);
3859 /* ncec_inactive destroys the mutex thus no mutex_exit is needed */
3861 ncec_refrele(ncec_t
*ncec
)
3863 mutex_enter(&(ncec
)->ncec_lock
);
3865 ncec_untrace_ref(ncec
);
3867 ASSERT((ncec
)->ncec_refcnt
!= 0);
3868 if (--(ncec
)->ncec_refcnt
== 0) {
3869 ncec_inactive(ncec
);
3871 mutex_exit(&(ncec
)->ncec_lock
);
3876 ncec_refrele_notr(ncec_t
*ncec
)
3878 mutex_enter(&(ncec
)->ncec_lock
);
3879 ASSERT((ncec
)->ncec_refcnt
!= 0);
3880 if (--(ncec
)->ncec_refcnt
== 0) {
3881 ncec_inactive(ncec
);
3883 mutex_exit(&(ncec
)->ncec_lock
);
3888 * Common to IPv4 and IPv6.
3891 nce_restart_timer(ncec_t
*ncec
, uint_t ms
)
3895 ASSERT(!MUTEX_HELD(&(ncec
)->ncec_lock
));
3897 /* First cancel any running timer */
3898 mutex_enter(&ncec
->ncec_lock
);
3899 tid
= ncec
->ncec_timeout_id
;
3900 ncec
->ncec_timeout_id
= 0;
3902 mutex_exit(&ncec
->ncec_lock
);
3903 (void) untimeout(tid
);
3904 mutex_enter(&ncec
->ncec_lock
);
3908 nce_start_timer(ncec
, ms
);
3909 mutex_exit(&ncec
->ncec_lock
);
3913 nce_start_timer(ncec_t
*ncec
, uint_t ms
)
3915 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3917 * Don't start the timer if the ncec has been deleted, or if the timer
3918 * is already running
3920 if (!NCE_ISCONDEMNED(ncec
) && ncec
->ncec_timeout_id
== 0) {
3921 ncec
->ncec_timeout_id
= timeout(nce_timer
, ncec
,
3922 MSEC_TO_TICK(ms
) == 0 ? 1 : MSEC_TO_TICK(ms
));
3927 nce_set_multicast_v4(ill_t
*ill
, const in_addr_t
*dst
,
3928 uint16_t flags
, nce_t
**newnce
)
3932 ip_stack_t
*ipst
= ill
->ill_ipst
;
3936 ASSERT(!ill
->ill_isv6
);
3938 IN6_IPADDR_TO_V4MAPPED(*dst
, &dst6
);
3939 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
3940 if ((nce
= nce_lookup_addr(ill
, &dst6
)) != NULL
) {
3941 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3944 if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
3946 * For IRE_IF_RESOLVER a hardware mapping can be
3947 * generated, for IRE_IF_NORESOLVER, resolution cookie
3948 * in the ill is copied in nce_add_v4().
3950 hw_addr
= kmem_alloc(ill
->ill_phys_addr_length
, KM_NOSLEEP
);
3951 if (hw_addr
== NULL
) {
3952 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3955 ip_mcast_mapping(ill
, (uchar_t
*)dst
, hw_addr
);
3958 * IRE_IF_NORESOLVER type simply copies the resolution
3959 * cookie passed in. So no hw_addr is needed.
3963 ASSERT(flags
& NCE_F_MCAST
);
3964 ASSERT(flags
& NCE_F_NONUD
);
3965 /* nce_state will be computed by nce_add_common() */
3966 err
= nce_add_v4(ill
, hw_addr
, ill
->ill_phys_addr_length
, dst
, flags
,
3967 ND_UNCHANGED
, &nce
);
3968 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3970 err
= nce_add_v4_postprocess(nce
);
3971 if (hw_addr
!= NULL
)
3972 kmem_free(hw_addr
, ill
->ill_phys_addr_length
);
3974 ip1dbg(("nce_set_multicast_v4: create failed" "%d\n", err
));
3986 * This is used when scanning for "old" (least recently broadcast) NCEs. We
3987 * don't want to have to walk the list for every single one, so we gather up
3988 * batches at a time.
3990 #define NCE_RESCHED_LIST_LEN 8
3995 ncec_t
*ncert_nces
[NCE_RESCHED_LIST_LEN
];
3999 * Pick the longest waiting NCEs for defense.
4003 ncec_reschedule(ill_t
*ill
, nce_t
*nce
, void *arg
)
4005 nce_resched_t
*ncert
= arg
;
4009 ncec_t
*ncec
= nce
->nce_common
;
4011 ASSERT(ncec
->ncec_ill
== ncert
->ncert_ill
);
4013 * Only reachable entries that are ready for announcement are eligible.
4015 if (!NCE_MYADDR(ncec
) || ncec
->ncec_state
!= ND_REACHABLE
)
4017 if (ncert
->ncert_num
< NCE_RESCHED_LIST_LEN
) {
4019 ncert
->ncert_nces
[ncert
->ncert_num
++] = ncec
;
4021 ncecs
= ncert
->ncert_nces
;
4022 ncec_max
= ncecs
+ NCE_RESCHED_LIST_LEN
;
4024 for (; ncecs
< ncec_max
; ncecs
++) {
4025 ASSERT(ncec
!= NULL
);
4026 if ((*ncecs
)->ncec_last_time_defended
>
4027 ncec
->ncec_last_time_defended
) {
4039 * Reschedule the ARP defense of any long-waiting NCEs. It's assumed that this
4040 * doesn't happen very often (if at all), and thus it needn't be highly
4041 * optimized. (Note, though, that it's actually O(N) complexity, because the
4042 * outer loop is bounded by a constant rather than by the length of the list.)
4045 nce_ill_reschedule(ill_t
*ill
, nce_resched_t
*ncert
)
4048 ip_stack_t
*ipst
= ill
->ill_ipst
;
4049 uint_t i
, defend_rate
;
4051 i
= ill
->ill_defend_count
;
4052 ill
->ill_defend_count
= 0;
4054 defend_rate
= ipst
->ips_ndp_defend_rate
;
4056 defend_rate
= ipst
->ips_arp_defend_rate
;
4057 /* If none could be sitting around, then don't reschedule */
4058 if (i
< defend_rate
) {
4059 DTRACE_PROBE1(reschedule_none
, ill_t
*, ill
);
4062 ncert
->ncert_ill
= ill
;
4063 while (ill
->ill_defend_count
< defend_rate
) {
4064 nce_walk_common(ill
, ncec_reschedule
, ncert
);
4065 for (i
= 0; i
< ncert
->ncert_num
; i
++) {
4067 ncec
= ncert
->ncert_nces
[i
];
4068 mutex_enter(&ncec
->ncec_lock
);
4069 ncec
->ncec_flags
|= NCE_F_DELAYED
;
4070 mutex_exit(&ncec
->ncec_lock
);
4072 * we plan to schedule this ncec, so incr the
4073 * defend_count in anticipation.
4075 if (++ill
->ill_defend_count
>= defend_rate
)
4078 if (ncert
->ncert_num
< NCE_RESCHED_LIST_LEN
)
4084 * Check if the current rate-limiting parameters permit the sending
4085 * of another address defense announcement for both IPv4 and IPv6.
4086 * Returns B_TRUE if rate-limiting is in effect (i.e., send is not
4087 * permitted), and B_FALSE otherwise. The `defend_rate' parameter
4088 * determines how many address defense announcements are permitted
4089 * in any `defense_perio' interval.
4092 ill_defend_rate_limit(ill_t
*ill
, ncec_t
*ncec
)
4094 clock_t now
= ddi_get_lbolt();
4095 ip_stack_t
*ipst
= ill
->ill_ipst
;
4096 clock_t start
= ill
->ill_defend_start
;
4097 uint32_t elapsed
, defend_period
, defend_rate
;
4098 nce_resched_t ncert
;
4102 if (ill
->ill_isv6
) {
4103 defend_period
= ipst
->ips_ndp_defend_period
;
4104 defend_rate
= ipst
->ips_ndp_defend_rate
;
4106 defend_period
= ipst
->ips_arp_defend_period
;
4107 defend_rate
= ipst
->ips_arp_defend_rate
;
4109 if (defend_rate
== 0)
4111 bzero(&ncert
, sizeof (ncert
));
4112 mutex_enter(&ill
->ill_lock
);
4114 elapsed
= now
- start
;
4115 if (elapsed
> SEC_TO_TICK(defend_period
)) {
4116 ill
->ill_defend_start
= now
;
4118 * nce_ill_reschedule will attempt to
4119 * prevent starvation by reschduling the
4120 * oldest entries, which are marked with
4121 * the NCE_F_DELAYED flag.
4123 nce_ill_reschedule(ill
, &ncert
);
4126 ill
->ill_defend_start
= now
;
4128 ASSERT(ill
->ill_defend_count
<= defend_rate
);
4129 mutex_enter(&ncec
->ncec_lock
);
4130 if (ncec
->ncec_flags
& NCE_F_DELAYED
) {
4132 * This ncec was rescheduled as one of the really old
4133 * entries needing on-going defense. The
4134 * ill_defend_count was already incremented in
4135 * nce_ill_reschedule. Go ahead and send the announce.
4137 ncec
->ncec_flags
&= ~NCE_F_DELAYED
;
4138 mutex_exit(&ncec
->ncec_lock
);
4142 mutex_exit(&ncec
->ncec_lock
);
4143 if (ill
->ill_defend_count
< defend_rate
)
4144 ill
->ill_defend_count
++;
4145 if (ill
->ill_defend_count
== defend_rate
) {
4147 * we are no longer allowed to send unbidden defense
4148 * messages. Wait for rescheduling.
4155 mutex_exit(&ill
->ill_lock
);
4157 * After all the locks have been dropped we can restart nce timer,
4158 * and refrele the delayed ncecs
4160 for (i
= 0; i
< ncert
.ncert_num
; i
++) {
4161 clock_t xmit_interval
;
4164 tmp
= ncert
.ncert_nces
[i
];
4165 xmit_interval
= nce_fuzz_interval(tmp
->ncec_xmit_interval
,
4167 nce_restart_timer(tmp
, xmit_interval
);
4174 ndp_announce(ncec_t
*ncec
)
4176 return (ndp_xmit(ncec
->ncec_ill
, ND_NEIGHBOR_ADVERT
, ncec
->ncec_lladdr
,
4177 ncec
->ncec_lladdr_length
, &ncec
->ncec_addr
, &ipv6_all_hosts_mcast
,
4178 nce_advert_flags(ncec
)));
4182 nce_resolve_src(ncec_t
*ncec
, in6_addr_t
*src
)
4187 ill_t
*ill
= ncec
->ncec_ill
;
4188 ill_t
*src_ill
= NULL
;
4189 ipif_t
*ipif
= NULL
;
4190 boolean_t is_myaddr
= NCE_MYADDR(ncec
);
4191 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
4193 ASSERT(src
!= NULL
);
4194 ASSERT(IN6_IS_ADDR_UNSPECIFIED(src
));
4197 src6
= ncec
->ncec_addr
;
4199 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, src4
);
4202 * try to find one from the outgoing packet.
4204 mutex_enter(&ncec
->ncec_lock
);
4205 mp
= ncec
->ncec_qd_mp
;
4208 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
4210 src6
= ip6h
->ip6_src
;
4212 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
4214 src4
= ipha
->ipha_src
;
4215 IN6_IPADDR_TO_V4MAPPED(src4
, &src6
);
4218 mutex_exit(&ncec
->ncec_lock
);
4222 * For outgoing packets, if the src of outgoing packet is one
4223 * of the assigned interface addresses use it, otherwise we
4224 * will pick the source address below.
4225 * For local addresses (is_myaddr) doing DAD, NDP announce
4226 * messages are mcast. So we use the (IPMP) cast_ill or the
4227 * (non-IPMP) ncec_ill for these message types. The only case
4228 * of unicast DAD messages are for IPv6 ND probes, for which
4229 * we find the ipif_bound_ill corresponding to the ncec_addr.
4231 if (!IN6_IS_ADDR_UNSPECIFIED(&src6
) || is_myaddr
) {
4233 ipif
= ipif_lookup_addr_nondup_v6(&src6
, ill
, ALL_ZONES
,
4236 ipif
= ipif_lookup_addr_nondup(src4
, ill
, ALL_ZONES
,
4241 * If no relevant ipif can be found, then it's not one of our
4242 * addresses. Reset to :: and try to find a src for the NS or
4243 * ARP request using ipif_select_source_v[4,6] below.
4244 * If an ipif can be found, but it's not yet done with
4245 * DAD verification, and we are not being invoked for
4246 * DAD (i.e., !is_myaddr), then just postpone this
4247 * transmission until later.
4250 src6
= ipv6_all_zeros
;
4252 } else if (!ipif
->ipif_addr_ready
&& !is_myaddr
) {
4253 DTRACE_PROBE2(nce__resolve__ipif__not__ready
,
4254 ncec_t
*, ncec
, ipif_t
*, ipif
);
4260 if (IN6_IS_ADDR_UNSPECIFIED(&src6
) && !is_myaddr
) {
4262 * Pick a source address for this solicitation, but
4263 * restrict the selection to addresses assigned to the
4264 * output interface. We do this because the destination will
4265 * create a neighbor cache entry for the source address of
4266 * this packet, so the source address had better be a valid
4270 ipif
= ipif_select_source_v6(ill
, &ncec
->ncec_addr
,
4271 B_TRUE
, IPV6_PREFER_SRC_DEFAULT
, ALL_ZONES
,
4276 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, nce_addr
);
4277 ipif
= ipif_select_source_v4(ill
, nce_addr
, ALL_ZONES
,
4280 if (ipif
== NULL
&& IS_IPMP(ill
)) {
4281 ill_t
*send_ill
= ipmp_ill_hold_xmit_ill(ill
, B_TRUE
);
4283 if (send_ill
!= NULL
) {
4285 ipif
= ipif_select_source_v6(send_ill
,
4286 &ncec
->ncec_addr
, B_TRUE
,
4287 IPV6_PREFER_SRC_DEFAULT
, ALL_ZONES
,
4290 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
,
4292 ipif
= ipif_select_source_v4(send_ill
,
4293 src4
, ALL_ZONES
, B_TRUE
, NULL
);
4295 ill_refrele(send_ill
);
4300 char buf
[INET6_ADDRSTRLEN
];
4302 ip1dbg(("nce_resolve_src: No source ipif for dst %s\n",
4303 inet_ntop((isv6
? AF_INET6
: AF_INET
),
4304 (char *)&ncec
->ncec_addr
, buf
, sizeof (buf
))));
4305 DTRACE_PROBE1(nce__resolve__no__ipif
, ncec_t
*, ncec
);
4308 src6
= ipif
->ipif_v6lcl_addr
;
4312 src_ill
= ipif
->ipif_ill
;
4313 if (IS_IPMP(src_ill
))
4314 src_ill
= ipmp_ipif_hold_bound_ill(ipif
);
4316 ill_refhold(src_ill
);
4318 DTRACE_PROBE2(nce__resolve__src__ill
, ncec_t
*, ncec
,
4325 ip_nce_lookup_and_update(ipaddr_t
*addr
, ipif_t
*ipif
, ip_stack_t
*ipst
,
4326 uchar_t
*hwaddr
, int hwaddr_len
, int flags
)
4333 ill
= (ipif
? ipif
->ipif_ill
: NULL
);
4336 * only one ncec is possible
4338 nce
= nce_lookup_v4(ill
, addr
);
4340 ncec
= nce
->nce_common
;
4341 mutex_enter(&ncec
->ncec_lock
);
4342 if (NCE_ISREACHABLE(ncec
))
4343 new_state
= ND_UNCHANGED
;
4345 new_state
= ND_STALE
;
4346 ncec
->ncec_flags
= flags
;
4347 nce_update(ncec
, new_state
, hwaddr
);
4348 mutex_exit(&ncec
->ncec_lock
);
4354 * ill is wildcard; clean up all ncec's and ire's
4355 * that match on addr.
4359 hwm
.hwm_addr
= *addr
;
4360 hwm
.hwm_hwlen
= hwaddr_len
;
4361 hwm
.hwm_hwaddr
= hwaddr
;
4362 hwm
.hwm_flags
= flags
;
4364 ncec_walk_common(ipst
->ips_ndp4
, NULL
,
4365 (pfi_t
)nce_update_hw_changed
, (uchar_t
*)&hwm
, B_TRUE
);
4370 * Common function to add ncec entries.
4371 * we always add the ncec with ncec_ill == ill, and always create
4372 * nce_t on ncec_ill. A dlpi fastpath message may be triggered if the
4373 * ncec is !reachable.
4375 * When the caller passes in an nce_state of ND_UNCHANGED,
4376 * nce_add_common() will determine the state of the created nce based
4377 * on the ill_net_type and nce_flags used. Otherwise, the nce will
4378 * be created with state set to the passed in nce_state.
4381 nce_add_common(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
4382 const in6_addr_t
*addr
, uint16_t flags
, uint16_t nce_state
, nce_t
**retnce
)
4384 static ncec_t nce_nil
;
4385 uchar_t
*template = NULL
;
4389 ip_stack_t
*ipst
= ill
->ill_ipst
;
4391 boolean_t fastprobe
= B_FALSE
;
4392 struct ndp_g_s
*ndp
;
4394 mblk_t
*dlur_mp
= NULL
;
4397 ndp
= ill
->ill_ipst
->ips_ndp6
;
4399 ndp
= ill
->ill_ipst
->ips_ndp4
;
4403 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
4405 if (IN6_IS_ADDR_UNSPECIFIED(addr
)) {
4406 ip0dbg(("nce_add_common: no addr\n"));
4409 if ((flags
& ~NCE_EXTERNAL_FLAGS_MASK
)) {
4410 ip0dbg(("nce_add_common: flags = %x\n", (int)flags
));
4414 if (ill
->ill_isv6
) {
4415 ncep
= ((ncec_t
**)NCE_HASH_PTR_V6(ipst
, *addr
));
4419 IN6_V4MAPPED_TO_IPADDR(addr
, v4addr
);
4420 ncep
= ((ncec_t
**)NCE_HASH_PTR_V4(ipst
, v4addr
));
4424 * The caller has ensured that there is no nce on ill, but there could
4425 * still be an nce_common_t for the address, so that we find exisiting
4426 * ncec_t strucutures first, and atomically add a new nce_t if
4427 * one is found. The ndp_g_lock ensures that we don't cross threads
4428 * with an ncec_delete(). Unlike ncec_lookup_illgrp() we do not
4429 * compare for matches across the illgrp because this function is
4430 * called via nce_lookup_then_add_v* -> nce_add_v* -> nce_add_common,
4431 * with the nce_lookup_then_add_v* passing in the ipmp_ill where
4435 for (; ncec
!= NULL
; ncec
= ncec
->ncec_next
) {
4436 if (ncec
->ncec_ill
== ill
) {
4437 if (IN6_ARE_ADDR_EQUAL(&ncec
->ncec_addr
, addr
)) {
4439 * We should never find *retnce to be
4440 * MYADDR, since the caller may then
4441 * incorrectly restart a DAD timer that's
4442 * already running. However, if we are in
4443 * forwarding mode, and the interface is
4444 * moving in/out of groups, the data
4445 * path ire lookup (e.g., ire_revalidate_nce)
4446 * may have determined that some destination
4447 * is offlink while the control path is adding
4448 * that address as a local address.
4449 * Recover from this case by failing the
4452 if (NCE_MYADDR(ncec
))
4454 *retnce
= nce_ill_lookup_then_add(ill
, ncec
);
4455 if (*retnce
!= NULL
)
4460 if (*retnce
!= NULL
) /* caller must trigger fastpath on nce */
4463 ncec
= kmem_cache_alloc(ncec_cache
, KM_NOSLEEP
);
4467 ncec
->ncec_ill
= ill
;
4468 ncec
->ncec_ipversion
= (ill
->ill_isv6
? IPV6_VERSION
: IPV4_VERSION
);
4469 ncec
->ncec_flags
= flags
;
4470 ncec
->ncec_ipst
= ipst
; /* No netstack_hold */
4472 if (!ill
->ill_isv6
) {
4476 * DAD probe interval and probe count are set based on
4477 * fast/slow probe settings. If the underlying link doesn't
4478 * have reliably up/down notifications or if we're working
4479 * with IPv4 169.254.0.0/16 Link Local Address space, then
4480 * don't use the fast timers. Otherwise, use them.
4482 ASSERT(IN6_IS_ADDR_V4MAPPED(addr
));
4483 IN6_V4MAPPED_TO_IPADDR(addr
, addr4
);
4484 if (ill
->ill_note_link
&& !IS_IPV4_LL_SPACE(&addr4
)) {
4486 } else if (IS_IPMP(ill
) && NCE_PUBLISH(ncec
) &&
4487 !IS_IPV4_LL_SPACE(&addr4
)) {
4490 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
, hw_addr
,
4492 if (hwaddr_ill
!= NULL
&& hwaddr_ill
->ill_note_link
)
4496 ncec
->ncec_xmit_interval
=
4497 ipst
->ips_arp_fastprobe_interval
;
4499 ipst
->ips_arp_fastprobe_count
;
4500 ncec
->ncec_flags
|= NCE_F_FAST
;
4502 ncec
->ncec_xmit_interval
=
4503 ipst
->ips_arp_probe_interval
;
4505 ipst
->ips_arp_probe_count
;
4507 if (NCE_PUBLISH(ncec
)) {
4508 ncec
->ncec_unsolicit_count
=
4509 ipst
->ips_ip_arp_publish_count
;
4513 * probe interval is constant: ILL_PROBE_INTERVAL
4514 * probe count is constant: ND_MAX_UNICAST_SOLICIT
4516 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
4517 if (NCE_PUBLISH(ncec
)) {
4518 ncec
->ncec_unsolicit_count
=
4519 ipst
->ips_ip_ndp_unsolicit_count
;
4522 ncec
->ncec_rcnt
= ill
->ill_xmit_count
;
4523 ncec
->ncec_addr
= *addr
;
4524 ncec
->ncec_qd_mp
= NULL
;
4525 ncec
->ncec_refcnt
= 1; /* for ncec getting created */
4526 mutex_init(&ncec
->ncec_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
4527 ncec
->ncec_trace_disable
= B_FALSE
;
4530 * ncec_lladdr holds link layer address
4532 if (hw_addr_len
> 0) {
4533 template = kmem_alloc(hw_addr_len
, KM_NOSLEEP
);
4534 if (template == NULL
) {
4538 ncec
->ncec_lladdr
= template;
4539 ncec
->ncec_lladdr_length
= hw_addr_len
;
4540 bzero(ncec
->ncec_lladdr
, hw_addr_len
);
4542 if ((flags
& NCE_F_BCAST
) != 0) {
4543 state
= ND_REACHABLE
;
4544 ASSERT(hw_addr_len
> 0);
4545 } else if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
4547 } else if (ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4549 * NORESOLVER entries are always created in the REACHABLE
4552 state
= ND_REACHABLE
;
4553 if (ill
->ill_phys_addr_length
== IP_ADDR_LEN
&&
4554 ill
->ill_mactype
!= DL_IPV4
&&
4555 ill
->ill_mactype
!= DL_6TO4
) {
4557 * We create a nce_res_mp with the IP nexthop address
4558 * as the destination address if the physical length
4559 * is exactly 4 bytes for point-to-multipoint links
4560 * that do their own resolution from IP to link-layer
4561 * address (e.g. IP over X.25).
4563 bcopy((uchar_t
*)addr
,
4564 ncec
->ncec_lladdr
, ill
->ill_phys_addr_length
);
4566 if (ill
->ill_phys_addr_length
== IPV6_ADDR_LEN
&&
4567 ill
->ill_mactype
!= DL_IPV6
) {
4569 * We create a nce_res_mp with the IP nexthop address
4570 * as the destination address if the physical legnth
4571 * is exactly 16 bytes for point-to-multipoint links
4572 * that do their own resolution from IP to link-layer
4575 bcopy((uchar_t
*)addr
,
4576 ncec
->ncec_lladdr
, ill
->ill_phys_addr_length
);
4579 * Since NUD is not part of the base IPv4 protocol definition,
4580 * IPv4 neighbor entries on NORESOLVER interfaces will never
4581 * age, and are marked NCE_F_NONUD.
4584 ncec
->ncec_flags
|= NCE_F_NONUD
;
4585 } else if (ill
->ill_net_type
== IRE_LOOPBACK
) {
4586 state
= ND_REACHABLE
;
4589 if (hw_addr
!= NULL
|| ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4591 * We are adding an ncec with a deterministic hw_addr,
4592 * so the state can only be one of {REACHABLE, STALE, PROBE}.
4594 * if we are adding a unicast ncec for the local address
4595 * it would be REACHABLE; we would be adding a ND_STALE entry
4596 * for the requestor of an ARP_REQUEST/ND_SOLICIT. Our own
4597 * addresses are added in PROBE to trigger DAD.
4599 if ((flags
& (NCE_F_MCAST
|NCE_F_BCAST
)) ||
4600 ill
->ill_net_type
== IRE_IF_NORESOLVER
)
4601 state
= ND_REACHABLE
;
4602 else if (!NCE_PUBLISH(ncec
))
4606 if (hw_addr
!= NULL
)
4607 nce_set_ll(ncec
, hw_addr
);
4609 /* caller overrides internally computed state */
4610 if (nce_state
!= ND_UNCHANGED
)
4613 if (state
== ND_PROBE
)
4614 ncec
->ncec_flags
|= NCE_F_UNVERIFIED
;
4616 ncec
->ncec_state
= state
;
4618 if (state
== ND_REACHABLE
) {
4619 ncec
->ncec_last
= ncec
->ncec_init_time
=
4620 TICK_TO_MSEC(ddi_get_lbolt64());
4622 ncec
->ncec_last
= 0;
4623 if (state
== ND_INITIAL
)
4624 ncec
->ncec_init_time
= TICK_TO_MSEC(ddi_get_lbolt64());
4626 list_create(&ncec
->ncec_cb
, sizeof (ncec_cb_t
),
4627 offsetof(ncec_cb_t
, ncec_cb_node
));
4629 * have all the memory allocations out of the way before taking locks
4630 * and adding the nce.
4632 nce
= kmem_cache_alloc(nce_cache
, KM_NOSLEEP
);
4637 if (ncec
->ncec_lladdr
!= NULL
||
4638 ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4639 dlur_mp
= ill_dlur_gen(ncec
->ncec_lladdr
,
4640 ill
->ill_phys_addr_length
, ill
->ill_sap
,
4641 ill
->ill_sap_length
);
4642 if (dlur_mp
== NULL
) {
4649 * Atomically ensure that the ill is not CONDEMNED, before
4652 mutex_enter(&ill
->ill_lock
);
4653 if (ill
->ill_state_flags
& ILL_CONDEMNED
) {
4654 mutex_exit(&ill
->ill_lock
);
4658 if (!NCE_MYADDR(ncec
) &&
4659 (ill
->ill_state_flags
& ILL_DOWN_IN_PROGRESS
)) {
4660 mutex_exit(&ill
->ill_lock
);
4661 DTRACE_PROBE1(nce__add__on__down__ill
, ncec_t
*, ncec
);
4666 * Acquire the ncec_lock even before adding the ncec to the list
4667 * so that it cannot get deleted after the ncec is added, but
4668 * before we add the nce.
4670 mutex_enter(&ncec
->ncec_lock
);
4671 if ((ncec
->ncec_next
= *ncep
) != NULL
)
4672 ncec
->ncec_next
->ncec_ptpn
= &ncec
->ncec_next
;
4674 ncec
->ncec_ptpn
= ncep
;
4676 /* Bump up the number of ncec's referencing this ill */
4677 DTRACE_PROBE3(ill__incr__cnt
, (ill_t
*), ill
,
4678 (char *), "ncec", (void *), ncec
);
4679 ill
->ill_ncec_cnt
++;
4681 * Since we hold the ncec_lock at this time, the ncec cannot be
4682 * condemned, and we can safely add the nce.
4684 *retnce
= nce_add_impl(ill
, ncec
, nce
, dlur_mp
);
4685 mutex_exit(&ncec
->ncec_lock
);
4686 mutex_exit(&ill
->ill_lock
);
4688 /* caller must trigger fastpath on *retnce */
4693 kmem_cache_free(ncec_cache
, ncec
);
4695 kmem_cache_free(nce_cache
, nce
);
4697 if (template != NULL
)
4698 kmem_free(template, ill
->ill_phys_addr_length
);
4703 * take a ref on the nce
4706 nce_refhold(nce_t
*nce
)
4708 mutex_enter(&nce
->nce_lock
);
4710 ASSERT((nce
)->nce_refcnt
!= 0);
4711 mutex_exit(&nce
->nce_lock
);
4715 * release a ref on the nce; In general, this
4716 * cannot be called with locks held because nce_inactive
4717 * may result in nce_inactive which will take the ill_lock,
4718 * do ipif_ill_refrele_tail etc. Thus the one exception
4719 * where this can be called with locks held is when the caller
4720 * is certain that the nce_refcnt is sufficient to prevent
4721 * the invocation of nce_inactive.
4724 nce_refrele(nce_t
*nce
)
4726 ASSERT((nce
)->nce_refcnt
!= 0);
4727 mutex_enter(&nce
->nce_lock
);
4728 if (--nce
->nce_refcnt
== 0)
4729 nce_inactive(nce
); /* destroys the mutex */
4731 mutex_exit(&nce
->nce_lock
);
4735 * free the nce after all refs have gone away.
4738 nce_inactive(nce_t
*nce
)
4740 ill_t
*ill
= nce
->nce_ill
;
4742 ASSERT(nce
->nce_refcnt
== 0);
4744 ncec_refrele_notr(nce
->nce_common
);
4745 nce
->nce_common
= NULL
;
4746 freemsg(nce
->nce_fp_mp
);
4747 freemsg(nce
->nce_dlur_mp
);
4749 mutex_enter(&ill
->ill_lock
);
4750 DTRACE_PROBE3(ill__decr__cnt
, (ill_t
*), ill
,
4751 (char *), "nce", (void *), nce
);
4753 nce
->nce_ill
= NULL
;
4755 * If the number of ncec's associated with this ill have dropped
4756 * to zero, check whether we need to restart any operation that
4757 * is waiting for this to happen.
4759 if (ILL_DOWN_OK(ill
)) {
4760 /* ipif_ill_refrele_tail drops the ill_lock */
4761 ipif_ill_refrele_tail(ill
);
4763 mutex_exit(&ill
->ill_lock
);
4766 mutex_destroy(&nce
->nce_lock
);
4767 kmem_cache_free(nce_cache
, nce
);
4771 * Add an nce to the ill_nce list.
4774 nce_add_impl(ill_t
*ill
, ncec_t
*ncec
, nce_t
*nce
, mblk_t
*dlur_mp
)
4776 bzero(nce
, sizeof (*nce
));
4777 mutex_init(&nce
->nce_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
4778 nce
->nce_common
= ncec
;
4779 nce
->nce_addr
= ncec
->ncec_addr
;
4781 DTRACE_PROBE3(ill__incr__cnt
, (ill_t
*), ill
,
4782 (char *), "nce", (void *), nce
);
4785 nce
->nce_refcnt
= 1; /* for the thread */
4786 ncec
->ncec_refcnt
++; /* want ncec_refhold_locked_notr(ncec) */
4787 nce
->nce_dlur_mp
= dlur_mp
;
4789 /* add nce to the ill's fastpath list. */
4790 nce
->nce_refcnt
++; /* for the list */
4791 list_insert_head(&ill
->ill_nce
, nce
);
4796 nce_add(ill_t
*ill
, ncec_t
*ncec
)
4799 mblk_t
*dlur_mp
= NULL
;
4801 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
4802 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
4804 nce
= kmem_cache_alloc(nce_cache
, KM_NOSLEEP
);
4807 if (ncec
->ncec_lladdr
!= NULL
||
4808 ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4809 dlur_mp
= ill_dlur_gen(ncec
->ncec_lladdr
,
4810 ill
->ill_phys_addr_length
, ill
->ill_sap
,
4811 ill
->ill_sap_length
);
4812 if (dlur_mp
== NULL
) {
4813 kmem_cache_free(nce_cache
, nce
);
4817 return (nce_add_impl(ill
, ncec
, nce
, dlur_mp
));
4821 * remove the nce from the ill_faspath list
4824 nce_delete(nce_t
*nce
)
4826 ill_t
*ill
= nce
->nce_ill
;
4828 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
4830 mutex_enter(&nce
->nce_lock
);
4831 if (nce
->nce_is_condemned
) {
4833 * some other thread has removed this nce from the ill_nce list
4835 mutex_exit(&nce
->nce_lock
);
4838 nce
->nce_is_condemned
= B_TRUE
;
4839 mutex_exit(&nce
->nce_lock
);
4841 list_remove(&ill
->ill_nce
, nce
);
4843 * even though we are holding the ill_lock, it is ok to
4844 * call nce_refrele here because we know that we should have
4845 * at least 2 refs on the nce: one for the thread, and one
4846 * for the list. The refrele below will release the one for
4853 nce_lookup(ill_t
*ill
, const in6_addr_t
*addr
)
4857 ASSERT(ill
!= NULL
);
4858 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
4860 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
;
4861 nce
= list_next(&ill
->ill_nce
, nce
)) {
4862 if (IN6_ARE_ADDR_EQUAL(&nce
->nce_addr
, addr
))
4867 * if we found the nce on the ill_nce list while holding
4868 * the ill_lock, then it cannot be condemned yet.
4871 ASSERT(!nce
->nce_is_condemned
);
4878 * Walk the ill_nce list on ill. The callback function func() cannot perform
4879 * any destructive actions.
4882 nce_walk_common(ill_t
*ill
, pfi_t func
, void *arg
)
4884 nce_t
*nce
= NULL
, *nce_next
;
4886 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
4887 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
; ) {
4888 nce_next
= list_next(&ill
->ill_nce
, nce
);
4889 if (func(ill
, nce
, arg
) != 0)
4896 nce_walk(ill_t
*ill
, pfi_t func
, void *arg
)
4898 mutex_enter(&ill
->ill_lock
);
4899 nce_walk_common(ill
, func
, arg
);
4900 mutex_exit(&ill
->ill_lock
);
4904 nce_flush(ill_t
*ill
, boolean_t flushall
)
4906 nce_t
*nce
, *nce_next
;
4909 list_create(&dead
, sizeof (nce_t
), offsetof(nce_t
, nce_node
));
4910 mutex_enter(&ill
->ill_lock
);
4911 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
; ) {
4912 nce_next
= list_next(&ill
->ill_nce
, nce
);
4913 if (!flushall
&& NCE_PUBLISH(nce
->nce_common
)) {
4918 * nce_delete requires that the caller should either not
4919 * be holding locks, or should hold a ref to ensure that
4920 * we wont hit ncec_inactive. So take a ref and clean up
4921 * after the list is flushed.
4925 list_insert_tail(&dead
, nce
);
4928 mutex_exit(&ill
->ill_lock
);
4929 while ((nce
= list_head(&dead
)) != NULL
) {
4930 list_remove(&dead
, nce
);
4933 ASSERT(list_is_empty(&dead
));
4934 list_destroy(&dead
);
4937 /* Return an interval that is anywhere in the [1 .. intv] range */
4939 nce_fuzz_interval(clock_t intv
, boolean_t initial_time
)
4943 (void) random_get_pseudo_bytes((uint8_t *)&rnd
, sizeof (rnd
));
4944 /* Note that clock_t is signed; must chop off bits */
4945 rnd
&= (1ul << (NBBY
* sizeof (rnd
) - 1)) - 1;
4950 intv
= (rnd
% intv
) + 1;
4952 /* Compute 'frac' as 20% of the configured interval */
4953 if ((frac
= intv
/ 5) <= 1)
4955 /* Set intv randomly in the range [intv-frac .. intv+frac] */
4956 if ((intv
= intv
- frac
+ rnd
% (2 * frac
+ 1)) <= 0)
4963 nce_resolv_ipmp_ok(ncec_t
*ncec
)
4967 iaflags_t ixaflags
= IXAF_NO_TRACE
;
4969 ill_t
*ill
= ncec
->ncec_ill
;
4970 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
4971 ipif_t
*src_ipif
= NULL
;
4972 ip_stack_t
*ipst
= ill
->ill_ipst
;
4976 ASSERT(IS_IPMP(ill
));
4978 mutex_enter(&ncec
->ncec_lock
);
4979 nprobes
= ncec
->ncec_nprobes
;
4980 mp
= ncec
->ncec_qd_mp
;
4981 ncec
->ncec_qd_mp
= NULL
;
4982 ncec
->ncec_nprobes
= 0;
4983 mutex_exit(&ncec
->ncec_lock
);
4985 while (mp
!= NULL
) {
4988 nxt_mp
= mp
->b_next
;
4991 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
4993 pkt_len
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
4994 src_ipif
= ipif_lookup_addr_nondup_v6(&ip6h
->ip6_src
,
4995 ill
, ALL_ZONES
, ipst
);
4997 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
4999 ixaflags
|= IXAF_IS_IPV4
;
5000 pkt_len
= ntohs(ipha
->ipha_length
);
5001 src_ipif
= ipif_lookup_addr_nondup(ipha
->ipha_src
,
5002 ill
, ALL_ZONES
, ipst
);
5006 * find a new nce based on an under_ill. The first IPMP probe
5007 * packet gets queued, so we could still find a src_ipif that
5008 * matches an IPMP test address.
5010 if (src_ipif
== NULL
|| IS_IPMP(src_ipif
->ipif_ill
)) {
5012 * if src_ipif is null, this could be either a
5013 * forwarded packet or a probe whose src got deleted.
5014 * We identify the former case by looking for the
5015 * ncec_nprobes: the first ncec_nprobes packets are
5018 if (src_ipif
== NULL
&& nprobes
> 0)
5022 * For forwarded packets, we use the ipmp rotor
5025 send_ill
= ipmp_ill_hold_xmit_ill(ncec
->ncec_ill
,
5028 send_ill
= src_ipif
->ipif_ill
;
5029 ill_refhold(send_ill
);
5032 DTRACE_PROBE4(nce__resolve__ipmp
, (mblk_t
*), mp
,
5033 (ncec_t
*), ncec
, (ipif_t
*),
5034 src_ipif
, (ill_t
*), send_ill
);
5036 if (send_ill
== NULL
) {
5037 if (src_ipif
!= NULL
)
5038 ipif_refrele(src_ipif
);
5041 /* create an under_nce on send_ill */
5042 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
5043 if (IS_IN_SAME_ILLGRP(send_ill
, ncec
->ncec_ill
))
5044 under_nce
= nce_fastpath_create(send_ill
, ncec
);
5047 rw_exit(&ipst
->ips_ill_g_lock
);
5048 if (under_nce
!= NULL
&& NCE_ISREACHABLE(ncec
))
5049 nce_fastpath_trigger(under_nce
);
5051 ill_refrele(send_ill
);
5052 if (src_ipif
!= NULL
)
5053 ipif_refrele(src_ipif
);
5055 if (under_nce
!= NULL
) {
5056 (void) ip_xmit(mp
, under_nce
, ixaflags
, pkt_len
, 0,
5057 ALL_ZONES
, 0, NULL
);
5058 nce_refrele(under_nce
);
5066 BUMP_MIB(&ipst
->ips_ip6_mib
, ipIfStatsOutDiscards
);
5068 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
5070 ip_drop_output("ipIfStatsOutDiscards - no under_ill", mp
, NULL
);
5076 ncec_cb_dispatch(ncec
); /* complete callbacks */