4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/types.h>
26 #include <sys/stream.h>
27 #include <sys/stropts.h>
28 #include <sys/strsun.h>
29 #include <sys/sysmacros.h>
30 #include <sys/errno.h>
32 #include <sys/socket.h>
34 #include <sys/sunddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/debug.h>
37 #include <sys/vtrace.h>
40 #include <sys/ethernet.h>
45 #include <net/if_types.h>
46 #include <net/if_dl.h>
47 #include <net/route.h>
48 #include <netinet/in.h>
49 #include <netinet/ip6.h>
50 #include <netinet/icmp6.h>
52 #include <inet/common.h>
54 #include <inet/mib2.h>
57 #include <inet/ip_impl.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ip_if.h>
60 #include <inet/ip_ire.h>
61 #include <inet/ip_rts.h>
63 #include <inet/ip_ndp.h>
64 #include <inet/sctp_ip.h>
65 #include <inet/ip_arp.h>
66 #include <inet/ip2mac_impl.h>
68 #define ANNOUNCE_INTERVAL(isv6) \
69 (isv6 ? ipst->ips_ip_ndp_unsolicit_interval : \
70 ipst->ips_ip_arp_publish_interval)
72 #define DEFENSE_INTERVAL(isv6) \
73 (isv6 ? ipst->ips_ndp_defend_interval : \
74 ipst->ips_arp_defend_interval)
76 /* Non-tunable probe interval, based on link capabilities */
77 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500)
80 * The IPv4 Link Local address space is special; we do extra duplicate checking
81 * there, as the entire assignment mechanism rests on random numbers.
83 #define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \
84 ((uchar_t *)ptr)[1] == 254)
87 * NCE_EXTERNAL_FLAGS_MASK defines the set of ncec_flags that may be passed
88 * in to the ncec*add* functions.
90 * NCE_F_AUTHORITY means that we ignore any incoming adverts for that
91 * mapping (though DAD is performed for the mapping). NCE_F_PUBLISH means
92 * that we will respond to requests for the protocol address.
94 #define NCE_EXTERNAL_FLAGS_MASK \
95 (NCE_F_MYADDR | NCE_F_ISROUTER | NCE_F_NONUD | \
96 NCE_F_ANYCAST | NCE_F_UNSOL_ADV | NCE_F_BCAST | NCE_F_MCAST | \
97 NCE_F_AUTHORITY | NCE_F_PUBLISH | NCE_F_STATIC)
102 * ndp_g_lock -> ill_lock -> ncec_lock
104 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
105 * ncec_next. ncec_lock protects the contents of the NCE (particularly
109 static void nce_cleanup_list(ncec_t
*ncec
);
110 static void nce_set_ll(ncec_t
*ncec
, uchar_t
*ll_addr
);
111 static ncec_t
*ncec_lookup_illgrp(ill_t
*, const in6_addr_t
*,
113 static nce_t
*nce_lookup_addr(ill_t
*, const in6_addr_t
*);
114 static int nce_set_multicast_v6(ill_t
*ill
, const in6_addr_t
*addr
,
115 uint16_t ncec_flags
, nce_t
**newnce
);
116 static int nce_set_multicast_v4(ill_t
*ill
, const in_addr_t
*dst
,
117 uint16_t ncec_flags
, nce_t
**newnce
);
118 static boolean_t
ndp_xmit(ill_t
*ill
, uint32_t operation
,
119 uint8_t *hwaddr
, uint_t hwaddr_len
, const in6_addr_t
*sender
,
120 const in6_addr_t
*target
, int flag
);
121 static void ncec_refhold_locked(ncec_t
*);
122 static boolean_t
ill_defend_rate_limit(ill_t
*, ncec_t
*);
123 static void nce_queue_mp_common(ncec_t
*, mblk_t
*, boolean_t
);
124 static int nce_add_common(ill_t
*, uchar_t
*, uint_t
, const in6_addr_t
*,
125 uint16_t, uint16_t, nce_t
**);
126 static nce_t
*nce_add_impl(ill_t
*, ncec_t
*, nce_t
*, mblk_t
*);
127 static nce_t
*nce_add(ill_t
*, ncec_t
*);
128 static void nce_inactive(nce_t
*);
129 extern nce_t
*nce_lookup(ill_t
*, const in6_addr_t
*);
130 static nce_t
*nce_ill_lookup_then_add(ill_t
*, ncec_t
*);
131 static int nce_add_v6(ill_t
*, uchar_t
*, uint_t
, const in6_addr_t
*,
132 uint16_t, uint16_t, nce_t
**);
133 static int nce_add_v4(ill_t
*, uchar_t
*, uint_t
, const in_addr_t
*,
134 uint16_t, uint16_t, nce_t
**);
135 static int nce_add_v6_postprocess(nce_t
*);
136 static int nce_add_v4_postprocess(nce_t
*);
137 static ill_t
*nce_resolve_src(ncec_t
*, in6_addr_t
*);
138 static clock_t nce_fuzz_interval(clock_t, boolean_t
);
139 static void nce_resolv_ipmp_ok(ncec_t
*);
140 static void nce_walk_common(ill_t
*, pfi_t
, void *);
141 static void nce_start_timer(ncec_t
*, uint_t
);
142 static nce_t
*nce_fastpath_create(ill_t
*, ncec_t
*);
143 static void nce_fastpath_trigger(nce_t
*);
144 static nce_t
*nce_fastpath(ncec_t
*, boolean_t
, nce_t
*);
147 static void ncec_trace_cleanup(const ncec_t
*);
150 #define NCE_HASH_PTR_V4(ipst, addr) \
151 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)]))
153 #define NCE_HASH_PTR_V6(ipst, addr) \
154 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
157 extern kmem_cache_t
*ncec_cache
;
158 extern kmem_cache_t
*nce_cache
;
161 * Send out a IPv6 (unicast) or IPv4 (broadcast) DAD probe
162 * If src_ill is not null, the ncec_addr is bound to src_ill. The
163 * src_ill is ignored by nce_dad for IPv4 Neighbor Cache entries where
164 * the probe is sent on the ncec_ill (in the non-IPMP case) or the
165 * IPMP cast_ill (in the IPMP case).
167 * Note that the probe interval is based on the src_ill for IPv6, and
168 * the ncec_xmit_interval for IPv4.
171 nce_dad(ncec_t
*ncec
, ill_t
*src_ill
, boolean_t send_probe
)
174 uint32_t probe_interval
;
176 ASSERT(!(ncec
->ncec_flags
& NCE_F_MCAST
));
177 ASSERT(!(ncec
->ncec_flags
& NCE_F_BCAST
));
178 if (ncec
->ncec_ipversion
== IPV6_VERSION
) {
179 dropped
= ndp_xmit(src_ill
, ND_NEIGHBOR_SOLICIT
,
180 ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
,
181 &ipv6_all_zeros
, &ncec
->ncec_addr
, NDP_PROBE
);
182 probe_interval
= ILL_PROBE_INTERVAL(src_ill
);
184 /* IPv4 DAD delay the initial probe. */
186 dropped
= arp_probe(ncec
);
189 probe_interval
= nce_fuzz_interval(ncec
->ncec_xmit_interval
,
193 mutex_enter(&ncec
->ncec_lock
);
195 mutex_exit(&ncec
->ncec_lock
);
197 nce_restart_timer(ncec
, probe_interval
);
201 * Compute default flags to use for an advertisement of this ncec's address.
204 nce_advert_flags(const ncec_t
*ncec
)
208 if (ncec
->ncec_flags
& NCE_F_ISROUTER
)
209 flag
|= NDP_ISROUTER
;
210 if (!(ncec
->ncec_flags
& NCE_F_ANYCAST
))
217 * NDP Cache Entry creation routine.
218 * This routine must always be called with ndp6->ndp_g_lock held.
221 nce_add_v6(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
222 const in6_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
227 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp6
->ndp_g_lock
));
228 ASSERT(ill
!= NULL
&& ill
->ill_isv6
);
230 err
= nce_add_common(ill
, hw_addr
, hw_addr_len
, addr
, flags
, state
,
234 ASSERT(newnce
!= NULL
);
240 * Post-processing routine to be executed after nce_add_v6(). This function
241 * triggers fastpath (if appropriate) and DAD on the newly added nce entry
242 * and must be called without any locks held.
245 nce_add_v6_postprocess(nce_t
*nce
)
247 ncec_t
*ncec
= nce
->nce_common
;
248 boolean_t dropped
= B_FALSE
;
249 uchar_t
*hw_addr
= ncec
->ncec_lladdr
;
250 uint_t hw_addr_len
= ncec
->ncec_lladdr_length
;
251 ill_t
*ill
= ncec
->ncec_ill
;
253 uint16_t flags
= ncec
->ncec_flags
;
254 ip_stack_t
*ipst
= ill
->ill_ipst
;
255 boolean_t trigger_fastpath
= B_TRUE
;
258 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
259 * we call nce_fastpath as soon as the ncec is resolved in nce_process.
260 * We call nce_fastpath from nce_update if the link layer address of
261 * the peer changes from nce_update
263 if (NCE_PUBLISH(ncec
) || !NCE_ISREACHABLE(ncec
) ||
264 (hw_addr
== NULL
&& ill
->ill_net_type
!= IRE_IF_NORESOLVER
))
265 trigger_fastpath
= B_FALSE
;
267 if (trigger_fastpath
)
268 nce_fastpath_trigger(nce
);
269 if (NCE_PUBLISH(ncec
) && ncec
->ncec_state
== ND_PROBE
) {
272 * Unicast entry that needs DAD.
275 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
,
276 hw_addr
, hw_addr_len
);
280 nce_dad(ncec
, hwaddr_ill
, B_TRUE
);
282 } else if (flags
& NCE_F_UNSOL_ADV
) {
284 * We account for the transmit below by assigning one
285 * less than the ndd variable. Subsequent decrements
286 * are done in nce_timer.
288 mutex_enter(&ncec
->ncec_lock
);
289 ncec
->ncec_unsolicit_count
=
290 ipst
->ips_ip_ndp_unsolicit_count
- 1;
291 mutex_exit(&ncec
->ncec_lock
);
292 dropped
= ndp_xmit(ill
,
296 &ncec
->ncec_addr
, /* Source and target of the adv */
297 &ipv6_all_hosts_mcast
, /* Destination of the packet */
298 nce_advert_flags(ncec
));
299 mutex_enter(&ncec
->ncec_lock
);
301 ncec
->ncec_unsolicit_count
++;
303 ncec
->ncec_last_time_defended
= ddi_get_lbolt();
304 if (ncec
->ncec_unsolicit_count
!= 0) {
305 nce_start_timer(ncec
,
306 ipst
->ips_ip_ndp_unsolicit_interval
);
308 mutex_exit(&ncec
->ncec_lock
);
314 * Atomically lookup and add (if needed) Neighbor Cache information for
317 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
318 * are always added pointing at the ipmp_ill. Thus, when the ill passed
319 * to nce_add_v6 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
320 * entries will be created, both pointing at the same ncec_t. The nce_t
321 * entries will have their nce_ill set to the ipmp_ill and the under_ill
322 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
323 * Local addresses are always created on the ill passed to nce_add_v6.
326 nce_lookup_then_add_v6(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
327 const in6_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
330 ip_stack_t
*ipst
= ill
->ill_ipst
;
331 nce_t
*nce
, *upper_nce
= NULL
;
333 boolean_t need_ill_refrele
= B_FALSE
;
335 if (flags
& NCE_F_MCAST
) {
337 * hw_addr will be figured out in nce_set_multicast_v6;
338 * caller has to select the cast_ill
340 ASSERT(hw_addr
== NULL
);
341 ASSERT(!IS_IPMP(ill
));
342 err
= nce_set_multicast_v6(ill
, addr
, flags
, newnce
);
345 ASSERT(ill
->ill_isv6
);
346 if (IS_UNDER_IPMP(ill
) && !(flags
& NCE_F_MYADDR
)) {
347 ill
= ipmp_ill_hold_ipmp_ill(ill
);
350 need_ill_refrele
= B_TRUE
;
353 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
354 nce
= nce_lookup_addr(ill
, addr
);
356 err
= nce_add_v6(ill
, hw_addr
, hw_addr_len
, addr
, flags
, state
,
361 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
363 err
= nce_add_v6_postprocess(nce
);
364 if (in_ill
!= ill
&& nce
!= NULL
) {
365 nce_t
*under_nce
= NULL
;
368 * in_ill was the under_ill. Try to create the under_nce.
369 * Hold the ill_g_lock to prevent changes to group membership
372 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
373 if (!IS_IN_SAME_ILLGRP(in_ill
, ill
)) {
374 DTRACE_PROBE2(ill__not__in__group
, nce_t
*, nce
,
376 rw_exit(&ipst
->ips_ill_g_lock
);
382 under_nce
= nce_fastpath_create(in_ill
, nce
->nce_common
);
383 if (under_nce
== NULL
) {
384 rw_exit(&ipst
->ips_ill_g_lock
);
390 rw_exit(&ipst
->ips_ill_g_lock
);
392 nce
= under_nce
; /* will be returned to caller */
393 if (NCE_ISREACHABLE(nce
->nce_common
))
394 nce_fastpath_trigger(under_nce
);
396 /* nce_refrele is deferred until the lock is dropped */
404 if (upper_nce
!= NULL
)
405 nce_refrele(upper_nce
);
406 if (need_ill_refrele
)
412 * Remove all the CONDEMNED nces from the appropriate hash table.
413 * We create a private list of NCEs, these may have ires pointing
414 * to them, so the list will be passed through to clean up dependent
415 * ires and only then we can do ncec_refrele() which can make NCE inactive.
418 nce_remove(ndp_g_t
*ndp
, ncec_t
*ncec
, ncec_t
**free_nce_list
)
423 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
424 ASSERT(ndp
->ndp_g_walker
== 0);
425 for (; ncec
; ncec
= ncec1
) {
426 ncec1
= ncec
->ncec_next
;
427 mutex_enter(&ncec
->ncec_lock
);
428 if (NCE_ISCONDEMNED(ncec
)) {
429 ptpn
= ncec
->ncec_ptpn
;
430 ncec1
= ncec
->ncec_next
;
432 ncec1
->ncec_ptpn
= ptpn
;
434 ncec
->ncec_ptpn
= NULL
;
435 ncec
->ncec_next
= NULL
;
436 ncec
->ncec_next
= *free_nce_list
;
437 *free_nce_list
= ncec
;
439 mutex_exit(&ncec
->ncec_lock
);
444 * 1. Mark the entry CONDEMNED. This ensures that no new nce_lookup()
445 * will return this NCE. Also no new timeouts will
446 * be started (See nce_restart_timer).
447 * 2. Cancel any currently running timeouts.
448 * 3. If there is an ndp walker, return. The walker will do the cleanup.
449 * This ensures that walkers see a consistent list of NCEs while walking.
450 * 4. Otherwise remove the NCE from the list of NCEs
453 ncec_delete(ncec_t
*ncec
)
457 int ipversion
= ncec
->ncec_ipversion
;
459 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
461 if (ipversion
== IPV4_VERSION
)
462 ndp
= ipst
->ips_ndp4
;
464 ndp
= ipst
->ips_ndp6
;
466 /* Serialize deletes */
467 mutex_enter(&ncec
->ncec_lock
);
468 if (NCE_ISCONDEMNED(ncec
)) {
469 /* Some other thread is doing the delete */
470 mutex_exit(&ncec
->ncec_lock
);
474 * Caller has a refhold. Also 1 ref for being in the list. Thus
475 * refcnt has to be >= 2
477 ASSERT(ncec
->ncec_refcnt
>= 2);
478 ncec
->ncec_flags
|= NCE_F_CONDEMNED
;
479 mutex_exit(&ncec
->ncec_lock
);
481 /* Count how many condemned ires for kmem_cache callback */
482 atomic_inc_32(&ipst
->ips_num_nce_condemned
);
483 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
485 /* Complete any waiting callbacks */
486 ncec_cb_dispatch(ncec
);
489 * Cancel any running timer. Timeout can't be restarted
490 * since CONDEMNED is set. Can't hold ncec_lock across untimeout.
491 * Passing invalid timeout id is fine.
493 if (ncec
->ncec_timeout_id
!= 0) {
494 (void) untimeout(ncec
->ncec_timeout_id
);
495 ncec
->ncec_timeout_id
= 0;
498 mutex_enter(&ndp
->ndp_g_lock
);
499 if (ncec
->ncec_ptpn
== NULL
) {
501 * The last ndp walker has already removed this ncec from
502 * the list after we marked the ncec CONDEMNED and before
503 * we grabbed the global lock.
505 mutex_exit(&ndp
->ndp_g_lock
);
508 if (ndp
->ndp_g_walker
> 0) {
510 * Can't unlink. The walker will clean up
512 ndp
->ndp_g_walker_cleanup
= B_TRUE
;
513 mutex_exit(&ndp
->ndp_g_lock
);
518 * Now remove the ncec from the list. nce_restart_timer won't restart
519 * the timer since it is marked CONDEMNED.
521 ptpn
= ncec
->ncec_ptpn
;
522 ncec1
= ncec
->ncec_next
;
524 ncec1
->ncec_ptpn
= ptpn
;
526 ncec
->ncec_ptpn
= NULL
;
527 ncec
->ncec_next
= NULL
;
528 mutex_exit(&ndp
->ndp_g_lock
);
530 /* Removed from ncec_ptpn/ncec_next list */
531 ncec_refrele_notr(ncec
);
535 ncec_inactive(ncec_t
*ncec
)
538 ill_t
*ill
= ncec
->ncec_ill
;
539 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
541 ASSERT(ncec
->ncec_refcnt
== 0);
542 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
544 /* Count how many condemned nces for kmem_cache callback */
545 if (NCE_ISCONDEMNED(ncec
))
546 atomic_add_32(&ipst
->ips_num_nce_condemned
, -1);
548 /* Free all allocated messages */
549 mpp
= &ncec
->ncec_qd_mp
;
550 while (*mpp
!= NULL
) {
559 * must have been cleaned up in ncec_delete
561 ASSERT(list_is_empty(&ncec
->ncec_cb
));
562 list_destroy(&ncec
->ncec_cb
);
564 * free the ncec_lladdr if one was allocated in nce_add_common()
566 if (ncec
->ncec_lladdr_length
> 0)
567 kmem_free(ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
);
570 ncec_trace_cleanup(ncec
);
573 mutex_enter(&ill
->ill_lock
);
574 DTRACE_PROBE3(ill__decr__cnt
, (ill_t
*), ill
,
575 (char *), "ncec", (void *), ncec
);
577 ncec
->ncec_ill
= NULL
;
579 * If the number of ncec's associated with this ill have dropped
580 * to zero, check whether we need to restart any operation that
581 * is waiting for this to happen.
583 if (ILL_DOWN_OK(ill
)) {
584 /* ipif_ill_refrele_tail drops the ill_lock */
585 ipif_ill_refrele_tail(ill
);
587 mutex_exit(&ill
->ill_lock
);
590 mutex_destroy(&ncec
->ncec_lock
);
591 kmem_cache_free(ncec_cache
, ncec
);
595 * ncec_walk routine. Delete the ncec if it is associated with the ill
596 * that is going away. Always called as a writer.
599 ncec_delete_per_ill(ncec_t
*ncec
, uchar_t
*arg
)
601 if ((ncec
!= NULL
) && ncec
->ncec_ill
== (ill_t
*)arg
) {
607 * Neighbor Cache cleanup logic for a list of ncec_t entries.
610 nce_cleanup_list(ncec_t
*ncec
)
614 ASSERT(ncec
!= NULL
);
615 while (ncec
!= NULL
) {
616 ncec_next
= ncec
->ncec_next
;
617 ncec
->ncec_next
= NULL
;
620 * It is possible for the last ndp walker (this thread)
621 * to come here after ncec_delete has marked the ncec CONDEMNED
622 * and before it has removed the ncec from the fastpath list
623 * or called untimeout. So we need to do it here. It is safe
624 * for both ncec_delete and this thread to do it twice or
625 * even simultaneously since each of the threads has a
626 * reference on the ncec.
628 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
630 * Cancel any running timer. Timeout can't be restarted
631 * since CONDEMNED is set. The ncec_lock can't be
632 * held across untimeout though passing invalid timeout
635 if (ncec
->ncec_timeout_id
!= 0) {
636 (void) untimeout(ncec
->ncec_timeout_id
);
637 ncec
->ncec_timeout_id
= 0;
639 /* Removed from ncec_ptpn/ncec_next list */
640 ncec_refrele_notr(ncec
);
646 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted.
649 nce_restart_dad(ncec_t
*ncec
)
652 ill_t
*ill
, *hwaddr_ill
;
656 ill
= ncec
->ncec_ill
;
657 mutex_enter(&ncec
->ncec_lock
);
658 if (ncec
->ncec_state
== ND_PROBE
) {
659 mutex_exit(&ncec
->ncec_lock
);
661 } else if (ncec
->ncec_state
== ND_REACHABLE
) {
662 ASSERT(ncec
->ncec_lladdr
!= NULL
);
663 ncec
->ncec_state
= ND_PROBE
;
664 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
666 * Slight cheat here: we don't use the initial probe delay
667 * for IPv4 in this obscure case.
669 mutex_exit(&ncec
->ncec_lock
);
671 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
,
672 ncec
->ncec_lladdr
, ncec
->ncec_lladdr_length
);
676 nce_dad(ncec
, hwaddr_ill
, B_TRUE
);
679 mutex_exit(&ncec
->ncec_lock
);
686 * IPv6 Cache entry lookup. Try to find an ncec matching the parameters passed.
687 * If one is found, the refcnt on the ncec will be incremented.
690 ncec_lookup_illgrp_v6(ill_t
*ill
, const in6_addr_t
*addr
)
693 ip_stack_t
*ipst
= ill
->ill_ipst
;
695 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
696 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
698 /* Get head of v6 hash table */
699 ncec
= *((ncec_t
**)NCE_HASH_PTR_V6(ipst
, *addr
));
700 ncec
= ncec_lookup_illgrp(ill
, addr
, ncec
);
701 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
702 rw_exit(&ipst
->ips_ill_g_lock
);
706 * IPv4 Cache entry lookup. Try to find an ncec matching the parameters passed.
707 * If one is found, the refcnt on the ncec will be incremented.
710 ncec_lookup_illgrp_v4(ill_t
*ill
, const in_addr_t
*addr
)
714 ip_stack_t
*ipst
= ill
->ill_ipst
;
716 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
717 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
719 /* Get head of v4 hash table */
720 ncec
= *((ncec_t
**)NCE_HASH_PTR_V4(ipst
, *addr
));
721 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
722 ncec
= ncec_lookup_illgrp(ill
, &addr6
, ncec
);
723 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
724 rw_exit(&ipst
->ips_ill_g_lock
);
729 * Cache entry lookup. Try to find an ncec matching the parameters passed.
730 * If an ncec is found, increment the hold count on that ncec.
731 * The caller passes in the start of the appropriate hash table, and must
732 * be holding the appropriate global lock (ndp_g_lock). In addition, since
733 * this function matches ncec_t entries across the illgrp, the ips_ill_g_lock
734 * must be held as reader.
736 * This function always matches across the ipmp group.
739 ncec_lookup_illgrp(ill_t
*ill
, const in6_addr_t
*addr
, ncec_t
*ncec
)
742 ip_stack_t
*ipst
= ill
->ill_ipst
;
745 ndp
= ipst
->ips_ndp6
;
747 ndp
= ipst
->ips_ndp4
;
750 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
751 if (IN6_IS_ADDR_UNSPECIFIED(addr
))
753 for (; ncec
!= NULL
; ncec
= ncec
->ncec_next
) {
754 if (ncec
->ncec_ill
== ill
||
755 IS_IN_SAME_ILLGRP(ill
, ncec
->ncec_ill
)) {
756 if (IN6_ARE_ADDR_EQUAL(&ncec
->ncec_addr
, addr
)) {
757 mutex_enter(&ncec
->ncec_lock
);
758 if (!NCE_ISCONDEMNED(ncec
)) {
759 ncec_refhold_locked(ncec
);
760 mutex_exit(&ncec
->ncec_lock
);
763 mutex_exit(&ncec
->ncec_lock
);
771 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
772 * entries for ill only, i.e., when ill is part of an ipmp group,
773 * nce_lookup_v4 will never try to match across the group.
776 nce_lookup_v4(ill_t
*ill
, const in_addr_t
*addr
)
780 ip_stack_t
*ipst
= ill
->ill_ipst
;
782 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
783 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
784 nce
= nce_lookup_addr(ill
, &addr6
);
785 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
790 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
791 * entries for ill only, i.e., when ill is part of an ipmp group,
792 * nce_lookup_v6 will never try to match across the group.
795 nce_lookup_v6(ill_t
*ill
, const in6_addr_t
*addr6
)
798 ip_stack_t
*ipst
= ill
->ill_ipst
;
800 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
801 nce
= nce_lookup_addr(ill
, addr6
);
802 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
807 nce_lookup_addr(ill_t
*ill
, const in6_addr_t
*addr
)
814 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp6
->ndp_g_lock
));
816 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp4
->ndp_g_lock
));
818 mutex_enter(&ill
->ill_lock
);
819 nce
= nce_lookup(ill
, addr
);
820 mutex_exit(&ill
->ill_lock
);
826 * Router turned to host. We need to make sure that cached copies of the ncec
827 * are not used for forwarding packets if they were derived from the default
828 * route, and that the default route itself is removed, as required by
829 * section 7.2.5 of RFC 2461.
831 * Note that the ncec itself probably has valid link-layer information for the
832 * nexthop, so that there is no reason to delete the ncec, as long as the
833 * ISROUTER flag is turned off.
836 ncec_router_to_host(ncec_t
*ncec
)
839 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
841 mutex_enter(&ncec
->ncec_lock
);
842 ncec
->ncec_flags
&= ~NCE_F_ISROUTER
;
843 mutex_exit(&ncec
->ncec_lock
);
845 ire
= ire_ftable_lookup_v6(&ipv6_all_zeros
, &ipv6_all_zeros
,
846 &ncec
->ncec_addr
, IRE_DEFAULT
, ncec
->ncec_ill
, ALL_ZONES
, NULL
,
847 MATCH_IRE_ILL
| MATCH_IRE_TYPE
| MATCH_IRE_GW
, 0, ipst
, NULL
);
849 ip_rts_rtmsg(RTM_DELETE
, ire
, 0, ipst
);
856 * Process passed in parameters either from an incoming packet or via
860 nce_process(ncec_t
*ncec
, uchar_t
*hw_addr
, uint32_t flag
, boolean_t is_adv
)
862 ill_t
*ill
= ncec
->ncec_ill
;
863 uint32_t hw_addr_len
= ill
->ill_phys_addr_length
;
864 boolean_t ll_updated
= B_FALSE
;
865 boolean_t ll_changed
;
868 ASSERT(ncec
->ncec_ipversion
== IPV6_VERSION
);
870 * No updates of link layer address or the neighbor state is
871 * allowed, when the cache is in NONUD state. This still
872 * allows for responding to reachability solicitation.
874 mutex_enter(&ncec
->ncec_lock
);
875 if (ncec
->ncec_state
== ND_INCOMPLETE
) {
876 if (hw_addr
== NULL
) {
877 mutex_exit(&ncec
->ncec_lock
);
880 nce_set_ll(ncec
, hw_addr
);
882 * Update ncec state and send the queued packets
883 * back to ip this time ire will be added.
885 if (flag
& ND_NA_FLAG_SOLICITED
) {
886 nce_update(ncec
, ND_REACHABLE
, NULL
);
888 nce_update(ncec
, ND_STALE
, NULL
);
890 mutex_exit(&ncec
->ncec_lock
);
891 nce
= nce_fastpath(ncec
, B_TRUE
, NULL
);
897 ll_changed
= nce_cmp_ll_addr(ncec
, hw_addr
, hw_addr_len
);
899 /* If this is a SOLICITATION request only */
901 nce_update(ncec
, ND_STALE
, hw_addr
);
902 mutex_exit(&ncec
->ncec_lock
);
903 ncec_cb_dispatch(ncec
);
906 if (!(flag
& ND_NA_FLAG_OVERRIDE
) && ll_changed
) {
907 /* If in any other state than REACHABLE, ignore */
908 if (ncec
->ncec_state
== ND_REACHABLE
) {
909 nce_update(ncec
, ND_STALE
, NULL
);
911 mutex_exit(&ncec
->ncec_lock
);
912 ncec_cb_dispatch(ncec
);
916 nce_update(ncec
, ND_UNCHANGED
, hw_addr
);
919 if (flag
& ND_NA_FLAG_SOLICITED
) {
920 nce_update(ncec
, ND_REACHABLE
, NULL
);
923 nce_update(ncec
, ND_STALE
, NULL
);
926 mutex_exit(&ncec
->ncec_lock
);
927 if (!(flag
& ND_NA_FLAG_ROUTER
) && (ncec
->ncec_flags
&
929 ncec_router_to_host(ncec
);
931 ncec_cb_dispatch(ncec
);
937 * Pass arg1 to the pfi supplied, along with each ncec in existence.
938 * ncec_walk() places a REFHOLD on the ncec and drops the lock when
939 * walking the hash list.
942 ncec_walk_common(ndp_g_t
*ndp
, ill_t
*ill
, pfi_t pfi
, void *arg1
,
948 ncec_t
*free_nce_list
= NULL
;
950 mutex_enter(&ndp
->ndp_g_lock
);
951 /* Prevent ncec_delete from unlink and free of NCE */
953 mutex_exit(&ndp
->ndp_g_lock
);
954 for (ncep
= ndp
->nce_hash_tbl
;
955 ncep
< A_END(ndp
->nce_hash_tbl
); ncep
++) {
956 for (ncec
= *ncep
; ncec
!= NULL
; ncec
= ncec1
) {
957 ncec1
= ncec
->ncec_next
;
958 if (ill
== NULL
|| ncec
->ncec_ill
== ill
) {
964 ncec_refhold_notr(ncec
);
966 ncec_refrele_notr(ncec
);
971 mutex_enter(&ndp
->ndp_g_lock
);
973 if (ndp
->ndp_g_walker_cleanup
&& ndp
->ndp_g_walker
== 0) {
974 /* Time to delete condemned entries */
975 for (ncep
= ndp
->nce_hash_tbl
;
976 ncep
< A_END(ndp
->nce_hash_tbl
); ncep
++) {
979 nce_remove(ndp
, ncec
, &free_nce_list
);
982 ndp
->ndp_g_walker_cleanup
= B_FALSE
;
985 mutex_exit(&ndp
->ndp_g_lock
);
987 if (free_nce_list
!= NULL
) {
988 nce_cleanup_list(free_nce_list
);
994 * Note that ill can be NULL hence can't derive the ipst from it.
997 ncec_walk(ill_t
*ill
, pfi_t pfi
, void *arg1
, ip_stack_t
*ipst
)
999 ncec_walk_common(ipst
->ips_ndp4
, ill
, pfi
, arg1
, B_TRUE
);
1000 ncec_walk_common(ipst
->ips_ndp6
, ill
, pfi
, arg1
, B_TRUE
);
1004 * For each interface an entry is added for the unspecified multicast group.
1005 * Here that mapping is used to form the multicast cache entry for a particular
1006 * multicast destination.
1009 nce_set_multicast_v6(ill_t
*ill
, const in6_addr_t
*dst
,
1010 uint16_t flags
, nce_t
**newnce
)
1014 ip_stack_t
*ipst
= ill
->ill_ipst
;
1017 ASSERT(ill
!= NULL
);
1018 ASSERT(ill
->ill_isv6
);
1019 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst
)));
1021 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
1022 nce
= nce_lookup_addr(ill
, dst
);
1024 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1027 if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
1029 * For IRE_IF_RESOLVER a hardware mapping can be
1032 hw_addr
= kmem_alloc(ill
->ill_nd_lla_len
, KM_NOSLEEP
);
1033 if (hw_addr
== NULL
) {
1034 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1037 ip_mcast_mapping(ill
, (uchar_t
*)dst
, hw_addr
);
1039 /* No hw_addr is needed for IRE_IF_NORESOLVER. */
1042 ASSERT((flags
& NCE_F_MCAST
) != 0);
1043 ASSERT((flags
& NCE_F_NONUD
) != 0);
1044 /* nce_state will be computed by nce_add_common() */
1045 err
= nce_add_v6(ill
, hw_addr
, ill
->ill_phys_addr_length
, dst
, flags
,
1046 ND_UNCHANGED
, &nce
);
1047 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
1049 err
= nce_add_v6_postprocess(nce
);
1050 if (hw_addr
!= NULL
)
1051 kmem_free(hw_addr
, ill
->ill_nd_lla_len
);
1053 ip1dbg(("nce_set_multicast_v6: create failed" "%d\n", err
));
1057 ASSERT(nce
->nce_common
->ncec_state
== ND_REACHABLE
);
1066 * Return the link layer address, and any flags of a ncec.
1069 ndp_query(ill_t
*ill
, struct lif_nd_req
*lnr
)
1075 ASSERT(ill
!= NULL
&& ill
->ill_isv6
);
1076 sin6
= (sin6_t
*)&lnr
->lnr_addr
;
1077 addr
= &sin6
->sin6_addr
;
1080 * NOTE: if the ill is an IPMP interface, then match against the whole
1081 * illgrp. This e.g. allows in.ndpd to retrieve the link layer
1082 * addresses for the data addresses on an IPMP interface even though
1083 * ipif_ndp_up() created them with an ncec_ill of ipif_bound_ill.
1085 ncec
= ncec_lookup_illgrp_v6(ill
, addr
);
1088 /* If no link layer address is available yet, return ESRCH */
1089 if (!NCE_ISREACHABLE(ncec
)) {
1093 lnr
->lnr_hdw_len
= ill
->ill_phys_addr_length
;
1094 bcopy(ncec
->ncec_lladdr
, (uchar_t
*)&lnr
->lnr_hdw_addr
,
1096 if (ncec
->ncec_flags
& NCE_F_ISROUTER
)
1097 lnr
->lnr_flags
= NDF_ISROUTER_ON
;
1098 if (ncec
->ncec_flags
& NCE_F_ANYCAST
)
1099 lnr
->lnr_flags
|= NDF_ANYCAST_ON
;
1100 if (ncec
->ncec_flags
& NCE_F_STATIC
)
1101 lnr
->lnr_flags
|= NDF_STATIC
;
1107 * Finish setting up the Enable/Disable multicast for the driver.
1110 ndp_mcastreq(ill_t
*ill
, const in6_addr_t
*v6group
, uint32_t hw_addr_len
,
1111 uint32_t hw_addr_offset
, mblk_t
*mp
)
1117 ASSERT(ill
->ill_net_type
== IRE_IF_RESOLVER
);
1118 if (IN6_IS_ADDR_V4MAPPED(v6group
)) {
1119 IN6_V4MAPPED_TO_IPADDR(v6group
, v4group
);
1121 ASSERT(CLASSD(v4group
));
1122 ASSERT(!(ill
->ill_isv6
));
1124 addr
= (uchar_t
*)&v4group
;
1126 ASSERT(IN6_IS_ADDR_MULTICAST(v6group
));
1127 ASSERT(ill
->ill_isv6
);
1129 addr
= (uchar_t
*)v6group
;
1131 hw_addr
= mi_offset_paramc(mp
, hw_addr_offset
, hw_addr_len
);
1132 if (hw_addr
== NULL
) {
1133 ip0dbg(("ndp_mcastreq NULL hw_addr\n"));
1138 ip_mcast_mapping(ill
, addr
, hw_addr
);
1143 ip_ndp_resolve(ncec_t
*ncec
)
1145 in_addr_t sender4
= INADDR_ANY
;
1146 in6_addr_t sender6
= ipv6_all_zeros
;
1150 src_ill
= nce_resolve_src(ncec
, &sender6
);
1151 if (src_ill
== NULL
) {
1152 /* Make sure we try again later */
1153 ms
= ncec
->ncec_ill
->ill_reachable_retrans_time
;
1154 nce_restart_timer(ncec
, (clock_t)ms
);
1157 if (ncec
->ncec_ipversion
== IPV4_VERSION
)
1158 IN6_V4MAPPED_TO_IPADDR(&sender6
, sender4
);
1159 mutex_enter(&ncec
->ncec_lock
);
1160 if (ncec
->ncec_ipversion
== IPV6_VERSION
)
1161 ms
= ndp_solicit(ncec
, sender6
, src_ill
);
1163 ms
= arp_request(ncec
, sender4
, src_ill
);
1164 mutex_exit(&ncec
->ncec_lock
);
1166 if (ncec
->ncec_state
!= ND_REACHABLE
) {
1167 if (ncec
->ncec_ipversion
== IPV6_VERSION
)
1168 ndp_resolv_failed(ncec
);
1170 arp_resolv_failed(ncec
);
1171 ASSERT((ncec
->ncec_flags
& NCE_F_STATIC
) == 0);
1172 nce_make_unreachable(ncec
);
1176 nce_restart_timer(ncec
, (clock_t)ms
);
1179 ill_refrele(src_ill
);
1183 * Send an IPv6 neighbor solicitation.
1184 * Returns number of milliseconds after which we should either rexmit or abort.
1185 * Return of zero means we should abort.
1186 * The caller holds the ncec_lock to protect ncec_qd_mp and ncec_rcnt.
1187 * The optional source address is used as a hint to ndp_solicit for
1188 * which source to use in the packet.
1190 * NOTE: This routine drops ncec_lock (and later reacquires it) when sending
1194 ndp_solicit(ncec_t
*ncec
, in6_addr_t src
, ill_t
*ill
)
1197 boolean_t dropped
= B_FALSE
;
1199 ASSERT(ncec
->ncec_ipversion
== IPV6_VERSION
);
1200 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
1202 if (ncec
->ncec_rcnt
== 0)
1205 dst
= ncec
->ncec_addr
;
1207 mutex_exit(&ncec
->ncec_lock
);
1208 dropped
= ndp_xmit(ill
, ND_NEIGHBOR_SOLICIT
, ill
->ill_phys_addr
,
1209 ill
->ill_phys_addr_length
, &src
, &dst
, 0);
1210 mutex_enter(&ncec
->ncec_lock
);
1213 return (ncec
->ncec_ill
->ill_reachable_retrans_time
);
1217 * Attempt to recover an address on an interface that's been marked as a
1218 * duplicate. Because NCEs are destroyed when the interface goes down, there's
1219 * no easy way to just probe the address and have the right thing happen if
1220 * it's no longer in use. Instead, we just bring it up normally and allow the
1221 * regular interface start-up logic to probe for a remaining duplicate and take
1222 * us back down if necessary.
1223 * Neither DHCP nor temporary addresses arrive here; they're excluded by
1228 ip_addr_recover(ipsq_t
*ipsq
, queue_t
*rq
, mblk_t
*mp
, void *dummy_arg
)
1230 ill_t
*ill
= rq
->q_ptr
;
1232 in6_addr_t
*addr6
= (in6_addr_t
*)mp
->b_rptr
;
1233 in_addr_t
*addr4
= (in_addr_t
*)mp
->b_rptr
;
1234 boolean_t addr_equal
;
1236 for (ipif
= ill
->ill_ipif
; ipif
!= NULL
; ipif
= ipif
->ipif_next
) {
1238 * We do not support recovery of proxy ARP'd interfaces,
1239 * because the system lacks a complete proxy ARP mechanism.
1241 if (ill
->ill_isv6
) {
1242 addr_equal
= IN6_ARE_ADDR_EQUAL(&ipif
->ipif_v6lcl_addr
,
1245 addr_equal
= (ipif
->ipif_lcl_addr
== *addr4
);
1248 if ((ipif
->ipif_flags
& IPIF_POINTOPOINT
) || !addr_equal
)
1252 * If we have already recovered or if the interface is going
1253 * away, then ignore.
1255 mutex_enter(&ill
->ill_lock
);
1256 if (!(ipif
->ipif_flags
& IPIF_DUPLICATE
) ||
1257 (ipif
->ipif_state_flags
& IPIF_CONDEMNED
)) {
1258 mutex_exit(&ill
->ill_lock
);
1262 ipif
->ipif_flags
&= ~IPIF_DUPLICATE
;
1263 ill
->ill_ipif_dup_count
--;
1264 mutex_exit(&ill
->ill_lock
);
1265 ipif
->ipif_was_dup
= B_TRUE
;
1267 if (ill
->ill_isv6
) {
1268 VERIFY(ipif_ndp_up(ipif
, B_TRUE
) != EINPROGRESS
);
1269 (void) ipif_up_done_v6(ipif
);
1271 VERIFY(ipif_arp_up(ipif
, Res_act_initial
, B_TRUE
) !=
1273 (void) ipif_up_done(ipif
);
1280 * Attempt to recover an IPv6 interface that's been shut down as a duplicate.
1281 * As long as someone else holds the address, the interface will stay down.
1282 * When that conflict goes away, the interface is brought back up. This is
1283 * done so that accidental shutdowns of addresses aren't made permanent. Your
1284 * server will recover from a failure.
1286 * For DHCP and temporary addresses, recovery is not done in the kernel.
1287 * Instead, it's handled by user space processes (dhcpagent and in.ndpd).
1289 * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
1292 ipif_dup_recovery(void *arg
)
1296 ipif
->ipif_recovery_id
= 0;
1297 if (!(ipif
->ipif_flags
& IPIF_DUPLICATE
))
1301 * No lock, because this is just an optimization.
1303 if (ipif
->ipif_state_flags
& IPIF_CONDEMNED
)
1306 /* If the link is down, we'll retry this later */
1307 if (!(ipif
->ipif_ill
->ill_phyint
->phyint_flags
& PHYI_RUNNING
))
1310 ipif_do_recovery(ipif
);
1314 * Perform interface recovery by forcing the duplicate interfaces up and
1315 * allowing the system to determine which ones should stay up.
1317 * Called both by recovery timer expiry and link-up notification.
1320 ipif_do_recovery(ipif_t
*ipif
)
1322 ill_t
*ill
= ipif
->ipif_ill
;
1324 ip_stack_t
*ipst
= ill
->ill_ipst
;
1327 if (ipif
->ipif_isv6
)
1328 mp_size
= sizeof (ipif
->ipif_v6lcl_addr
);
1330 mp_size
= sizeof (ipif
->ipif_lcl_addr
);
1331 mp
= allocb(mp_size
, BPRI_MED
);
1333 mutex_enter(&ill
->ill_lock
);
1334 if (ipst
->ips_ip_dup_recovery
> 0 &&
1335 ipif
->ipif_recovery_id
== 0 &&
1336 !(ipif
->ipif_state_flags
& IPIF_CONDEMNED
)) {
1337 ipif
->ipif_recovery_id
= timeout(ipif_dup_recovery
,
1338 ipif
, MSEC_TO_TICK(ipst
->ips_ip_dup_recovery
));
1340 mutex_exit(&ill
->ill_lock
);
1343 * A recovery timer may still be running if we got here from
1344 * ill_restart_dad(); cancel that timer.
1346 if (ipif
->ipif_recovery_id
!= 0)
1347 (void) untimeout(ipif
->ipif_recovery_id
);
1348 ipif
->ipif_recovery_id
= 0;
1350 if (ipif
->ipif_isv6
) {
1351 bcopy(&ipif
->ipif_v6lcl_addr
, mp
->b_rptr
,
1352 sizeof (ipif
->ipif_v6lcl_addr
));
1354 bcopy(&ipif
->ipif_lcl_addr
, mp
->b_rptr
,
1355 sizeof (ipif
->ipif_lcl_addr
));
1358 qwriter_ip(ill
, ill
->ill_rq
, mp
, ip_addr_recover
, NEW_OP
,
1364 * Find the MAC and IP addresses in an NA/NS message.
1367 ip_ndp_find_addresses(mblk_t
*mp
, ip_recv_attr_t
*ira
, ill_t
*ill
,
1368 in6_addr_t
*targp
, uchar_t
**haddr
, uint_t
*haddrlenp
)
1370 icmp6_t
*icmp6
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
1371 nd_neighbor_solicit_t
*ns
= (nd_neighbor_solicit_t
*)icmp6
;
1375 /* icmp_inbound_v6 ensures this */
1376 ASSERT(ira
->ira_flags
& IRAF_L2SRC_SET
);
1378 addr
= ira
->ira_l2src
;
1379 alen
= ill
->ill_phys_addr_length
;
1388 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */
1389 *targp
= ns
->nd_ns_target
;
1393 * This is for exclusive changes due to NDP duplicate address detection
1398 ip_ndp_excl(ipsq_t
*ipsq
, queue_t
*rq
, mblk_t
*mp
, void *dummy_arg
)
1400 ill_t
*ill
= rq
->q_ptr
;
1404 ip_stack_t
*ipst
= ill
->ill_ipst
;
1406 ip_recv_attr_t iras
;
1411 attrmp
->b_cont
= NULL
;
1412 if (!ip_recv_attr_from_mblk(attrmp
, &iras
)) {
1413 /* The ill or ip_stack_t disappeared on us */
1414 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1415 ip_drop_input("ip_recv_attr_from_mblk", mp
, ill
);
1417 ira_cleanup(&iras
, B_TRUE
);
1421 ASSERT(ill
== iras
.ira_rill
);
1423 ip_ndp_find_addresses(mp
, &iras
, ill
, &targ
, &haddr
, &haddrlen
);
1424 if (haddr
!= NULL
&& haddrlen
== ill
->ill_phys_addr_length
) {
1426 * Ignore conflicts generated by misbehaving switches that
1427 * just reflect our own messages back to us. For IPMP, we may
1428 * see reflections across any ill in the illgrp.
1430 * RFC2462 and revisions tried to detect both the case
1431 * when a statically configured IPv6 address is a duplicate,
1432 * and the case when the L2 address itself is a duplicate. The
1433 * later is important because, with stateles address autoconf,
1434 * if the L2 address is a duplicate, the resulting IPv6
1435 * address(es) would also be duplicates. We rely on DAD of the
1436 * IPv6 address itself to detect the latter case.
1438 /* For an under ill_grp can change under lock */
1439 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1440 if (bcmp(haddr
, ill
->ill_phys_addr
, haddrlen
) == 0 ||
1441 IS_UNDER_IPMP(ill
) &&
1442 ipmp_illgrp_find_ill(ill
->ill_grp
, haddr
,
1443 haddrlen
) != NULL
) {
1444 rw_exit(&ipst
->ips_ill_g_lock
);
1445 goto ignore_conflict
;
1447 rw_exit(&ipst
->ips_ill_g_lock
);
1451 * Look up the appropriate ipif.
1453 ipif
= ipif_lookup_addr_v6(&targ
, ill
, ALL_ZONES
, ipst
);
1455 goto ignore_conflict
;
1457 /* Reload the ill to match the ipif */
1458 ill
= ipif
->ipif_ill
;
1460 /* If it's already duplicate or ineligible, then don't do anything. */
1461 if (ipif
->ipif_flags
& (IPIF_POINTOPOINT
|IPIF_DUPLICATE
)) {
1463 goto ignore_conflict
;
1467 * If this is a failure during duplicate recovery, then don't
1468 * complain. It may take a long time to recover.
1470 if (!ipif
->ipif_was_dup
) {
1471 char ibuf
[LIFNAMSIZ
];
1472 char hbuf
[MAC_STR_LEN
];
1473 char sbuf
[INET6_ADDRSTRLEN
];
1475 ipif_get_name(ipif
, ibuf
, sizeof (ibuf
));
1476 cmn_err(CE_WARN
, "%s has duplicate address %s (in use by %s);"
1478 inet_ntop(AF_INET6
, &targ
, sbuf
, sizeof (sbuf
)),
1479 mac_colon_addr(haddr
, haddrlen
, hbuf
, sizeof (hbuf
)));
1481 mutex_enter(&ill
->ill_lock
);
1482 ASSERT(!(ipif
->ipif_flags
& IPIF_DUPLICATE
));
1483 ipif
->ipif_flags
|= IPIF_DUPLICATE
;
1484 ill
->ill_ipif_dup_count
++;
1485 mutex_exit(&ill
->ill_lock
);
1486 (void) ipif_down(ipif
, NULL
, NULL
);
1487 (void) ipif_down_tail(ipif
);
1488 mutex_enter(&ill
->ill_lock
);
1489 if (!(ipif
->ipif_flags
& (IPIF_DHCPRUNNING
|IPIF_TEMPORARY
)) &&
1490 ill
->ill_net_type
== IRE_IF_RESOLVER
&&
1491 !(ipif
->ipif_state_flags
& IPIF_CONDEMNED
) &&
1492 ipst
->ips_ip_dup_recovery
> 0) {
1493 ASSERT(ipif
->ipif_recovery_id
== 0);
1494 ipif
->ipif_recovery_id
= timeout(ipif_dup_recovery
,
1495 ipif
, MSEC_TO_TICK(ipst
->ips_ip_dup_recovery
));
1497 mutex_exit(&ill
->ill_lock
);
1502 ira_cleanup(&iras
, B_TRUE
);
1506 * Handle failure by tearing down the ipifs with the specified address. Note
1507 * that tearing down the ipif also means deleting the ncec through ipif_down, so
1508 * it's not possible to do recovery by just restarting the ncec timer. Instead,
1509 * we start a timer on the ipif.
1510 * Caller has to free mp;
1513 ndp_failure(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1515 const uchar_t
*haddr
;
1516 ill_t
*ill
= ira
->ira_rill
;
1519 * Ignore conflicts generated by misbehaving switches that just
1520 * reflect our own messages back to us.
1523 /* icmp_inbound_v6 ensures this */
1524 ASSERT(ira
->ira_flags
& IRAF_L2SRC_SET
);
1525 haddr
= ira
->ira_l2src
;
1526 if (haddr
!= NULL
&&
1527 bcmp(haddr
, ill
->ill_phys_addr
, ill
->ill_phys_addr_length
) == 0) {
1531 if ((mp
= copymsg(mp
)) != NULL
) {
1534 attrmp
= ip_recv_attr_to_mblk(ira
);
1535 if (attrmp
== NULL
) {
1536 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1537 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
1540 ASSERT(attrmp
->b_cont
== NULL
);
1541 attrmp
->b_cont
= mp
;
1544 qwriter_ip(ill
, ill
->ill_rq
, mp
, ip_ndp_excl
, NEW_OP
,
1551 * Handle a discovered conflict: some other system is advertising that it owns
1552 * one of our IP addresses. We need to defend ourselves, or just shut down the
1555 * Handles both IPv4 and IPv6
1558 ip_nce_conflict(mblk_t
*mp
, ip_recv_attr_t
*ira
, ncec_t
*ncec
)
1564 ill_t
*ill
= ira
->ira_ill
;
1565 ip_stack_t
*ipst
= ill
->ill_ipst
;
1567 boolean_t isv6
= ill
->ill_isv6
;
1571 ipif
= ipif_lookup_addr_v6(&ncec
->ncec_addr
, ill
, ALL_ZONES
,
1574 if (arp_no_defense
) {
1576 * Yes, there is a conflict, but no, we do not
1581 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, ncec_addr
);
1582 ipif
= ipif_lookup_addr(ncec_addr
, ill
, ALL_ZONES
,
1589 * First, figure out if this address is disposable.
1591 if (ipif
->ipif_flags
& (IPIF_DHCPRUNNING
| IPIF_TEMPORARY
))
1592 maxdefense
= ipst
->ips_ip_max_temp_defend
;
1594 maxdefense
= ipst
->ips_ip_max_defend
;
1597 * Now figure out how many times we've defended ourselves. Ignore
1598 * defenses that happened long in the past.
1600 now
= ddi_get_lbolt();
1601 elapsed
= (drv_hztousec(now
- ncec
->ncec_last_time_defended
))/1000000;
1602 mutex_enter(&ncec
->ncec_lock
);
1603 if ((defs
= ncec
->ncec_defense_count
) > 0 &&
1604 elapsed
> ipst
->ips_ip_defend_interval
) {
1606 * ip_defend_interval has elapsed.
1607 * reset the defense count.
1609 ncec
->ncec_defense_count
= defs
= 0;
1611 ncec
->ncec_defense_count
++;
1612 ncec
->ncec_last_time_defended
= now
;
1613 mutex_exit(&ncec
->ncec_lock
);
1617 * If we've defended ourselves too many times already, then give up and
1618 * tear down the interface(s) using this address.
1619 * Otherwise, caller has to defend by sending out an announce.
1621 if (defs
>= maxdefense
) {
1623 ndp_failure(mp
, ira
);
1625 arp_failure(mp
, ira
);
1627 return (B_TRUE
); /* caller must defend this address */
1633 * Handle reception of Neighbor Solicitation messages.
1636 ndp_input_solicit(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1638 ill_t
*ill
= ira
->ira_ill
, *under_ill
;
1639 nd_neighbor_solicit_t
*ns
;
1640 uint32_t hlen
= ill
->ill_phys_addr_length
;
1641 uchar_t
*haddr
= NULL
;
1644 ncec_t
*our_ncec
= NULL
;
1649 nd_opt_hdr_t
*opt
= NULL
;
1650 boolean_t bad_solicit
= B_FALSE
;
1651 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
1652 boolean_t need_ill_refrele
= B_FALSE
;
1654 ip6h
= (ip6_t
*)mp
->b_rptr
;
1655 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
1656 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
1657 src
= ip6h
->ip6_src
;
1658 ns
= (nd_neighbor_solicit_t
*)icmp_nd
;
1659 target
= ns
->nd_ns_target
;
1660 if (IN6_IS_ADDR_MULTICAST(&target
) || IN6_IS_ADDR_V4MAPPED(&target
) ||
1661 IN6_IS_ADDR_LOOPBACK(&target
)) {
1664 pr_addr_dbg("ndp_input_solicit: Martian Target %s\n",
1667 bad_solicit
= B_TRUE
;
1670 if (len
> sizeof (nd_neighbor_solicit_t
)) {
1671 /* Options present */
1672 opt
= (nd_opt_hdr_t
*)&ns
[1];
1673 len
-= sizeof (nd_neighbor_solicit_t
);
1674 if (!ndp_verify_optlen(opt
, len
)) {
1675 ip1dbg(("ndp_input_solicit: Bad opt len\n"));
1676 bad_solicit
= B_TRUE
;
1680 if (IN6_IS_ADDR_UNSPECIFIED(&src
)) {
1681 /* Check to see if this is a valid DAD solicitation */
1682 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h
->ip6_dst
)) {
1685 pr_addr_dbg("ndp_input_solicit: IPv6 "
1686 "Destination is not solicited node "
1687 "multicast %s\n", AF_INET6
,
1690 bad_solicit
= B_TRUE
;
1696 * NOTE: with IPMP, it's possible the nominated multicast ill (which
1697 * received this packet if it's multicast) is not the ill tied to
1698 * e.g. the IPMP ill's data link-local. So we match across the illgrp
1699 * to ensure we find the associated NCE.
1701 our_ncec
= ncec_lookup_illgrp_v6(ill
, &target
);
1703 * If this is a valid Solicitation for an address we are publishing,
1704 * then a PUBLISH entry should exist in the cache
1706 if (our_ncec
== NULL
|| !NCE_PUBLISH(our_ncec
)) {
1707 ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
1708 "ifname=%s ", ill
->ill_name
));
1711 pr_addr_dbg(" dst %s\n", AF_INET6
, &target
);
1713 if (our_ncec
== NULL
)
1714 bad_solicit
= B_TRUE
;
1718 /* At this point we should have a verified NS per spec */
1720 opt
= ndp_get_option(opt
, len
, ND_OPT_SOURCE_LINKADDR
);
1722 haddr
= (uchar_t
*)&opt
[1];
1723 if (hlen
> opt
->nd_opt_len
* 8 - sizeof (*opt
) ||
1725 ip1dbg(("ndp_input_advert: bad SLLA\n"));
1726 bad_solicit
= B_TRUE
;
1732 /* If sending directly to peer, set the unicast flag */
1733 if (!IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
))
1734 flag
|= NDP_UNICAST
;
1737 * Create/update the entry for the soliciting node on the ipmp_ill.
1738 * or respond to outstanding queries, don't if
1739 * the source is unspecified address.
1741 if (!IN6_IS_ADDR_UNSPECIFIED(&src
)) {
1745 ASSERT(ill
->ill_isv6
);
1747 * Regular solicitations *must* include the Source Link-Layer
1748 * Address option. Ignore messages that do not.
1750 if (haddr
== NULL
&& IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
)) {
1751 ip1dbg(("ndp_input_solicit: source link-layer address "
1752 "option missing with a specified source.\n"));
1753 bad_solicit
= B_TRUE
;
1758 * This is a regular solicitation. If we're still in the
1759 * process of verifying the address, then don't respond at all
1760 * and don't keep track of the sender.
1762 if (our_ncec
->ncec_state
== ND_PROBE
)
1766 * If the solicitation doesn't have sender hardware address
1767 * (legal for unicast solicitation), then process without
1768 * installing the return NCE. Either we already know it, or
1769 * we'll be forced to look it up when (and if) we reply to the
1776 if (IS_UNDER_IPMP(under_ill
)) {
1777 ill
= ipmp_ill_hold_ipmp_ill(under_ill
);
1781 need_ill_refrele
= B_TRUE
;
1783 err
= nce_lookup_then_add_v6(ill
,
1785 &src
, /* Soliciting nodes address */
1790 if (need_ill_refrele
) {
1793 need_ill_refrele
= B_FALSE
;
1797 /* done with this entry */
1802 * B_FALSE indicates this is not an an advertisement.
1804 nce_process(nnce
->nce_common
, haddr
, 0, B_FALSE
);
1808 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
1813 flag
|= NDP_SOLICITED
;
1816 * No source link layer address option should be present in a
1817 * valid DAD request.
1819 if (haddr
!= NULL
) {
1820 ip1dbg(("ndp_input_solicit: source link-layer address "
1821 "option present with an unspecified source.\n"));
1822 bad_solicit
= B_TRUE
;
1825 if (our_ncec
->ncec_state
== ND_PROBE
) {
1827 * Internally looped-back probes will have
1828 * IRAF_L2SRC_LOOPBACK set so we can ignore our own
1831 if (!(ira
->ira_flags
& IRAF_L2SRC_LOOPBACK
)) {
1833 * If someone else is probing our address, then
1834 * we've crossed wires. Declare failure.
1836 ndp_failure(mp
, ira
);
1841 * This is a DAD probe. Multicast the advertisement to the
1842 * all-nodes address.
1844 src
= ipv6_all_hosts_mcast
;
1846 flag
|= nce_advert_flags(our_ncec
);
1847 (void) ndp_xmit(ill
,
1849 our_ncec
->ncec_lladdr
,
1850 our_ncec
->ncec_lladdr_length
,
1851 &target
, /* Source and target of the advertisement pkt */
1852 &src
, /* IP Destination (source of original pkt) */
1856 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborSolicitations
);
1857 if (our_ncec
!= NULL
)
1858 ncec_refrele(our_ncec
);
1862 * Handle reception of Neighbor Solicitation messages
1865 ndp_input_advert(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1867 ill_t
*ill
= ira
->ira_ill
;
1868 nd_neighbor_advert_t
*na
;
1869 uint32_t hlen
= ill
->ill_phys_addr_length
;
1870 uchar_t
*haddr
= NULL
;
1873 ncec_t
*dst_ncec
= NULL
;
1875 nd_opt_hdr_t
*opt
= NULL
;
1877 ip_stack_t
*ipst
= ill
->ill_ipst
;
1878 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
1880 ip6h
= (ip6_t
*)mp
->b_rptr
;
1881 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
1882 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
1883 na
= (nd_neighbor_advert_t
*)icmp_nd
;
1885 if (IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
) &&
1886 (na
->nd_na_flags_reserved
& ND_NA_FLAG_SOLICITED
)) {
1887 ip1dbg(("ndp_input_advert: Target is multicast but the "
1888 "solicited flag is not zero\n"));
1889 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
1892 target
= na
->nd_na_target
;
1893 if (IN6_IS_ADDR_MULTICAST(&target
) || IN6_IS_ADDR_V4MAPPED(&target
) ||
1894 IN6_IS_ADDR_LOOPBACK(&target
)) {
1897 pr_addr_dbg("ndp_input_solicit: Martian Target %s\n",
1900 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
1903 if (len
> sizeof (nd_neighbor_advert_t
)) {
1904 opt
= (nd_opt_hdr_t
*)&na
[1];
1905 if (!ndp_verify_optlen(opt
,
1906 len
- sizeof (nd_neighbor_advert_t
))) {
1907 ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
1908 BUMP_MIB(mib
, ipv6IfIcmpInBadNeighborAdvertisements
);
1911 /* At this point we have a verified NA per spec */
1912 len
-= sizeof (nd_neighbor_advert_t
);
1913 opt
= ndp_get_option(opt
, len
, ND_OPT_TARGET_LINKADDR
);
1915 haddr
= (uchar_t
*)&opt
[1];
1916 if (hlen
> opt
->nd_opt_len
* 8 - sizeof (*opt
) ||
1918 ip1dbg(("ndp_input_advert: bad SLLA\n"));
1920 ipv6IfIcmpInBadNeighborAdvertisements
);
1927 * NOTE: we match across the illgrp since we need to do DAD for all of
1928 * our local addresses, and those are spread across all the active
1929 * ills in the group.
1931 if ((dst_ncec
= ncec_lookup_illgrp_v6(ill
, &target
)) == NULL
)
1934 if (NCE_PUBLISH(dst_ncec
)) {
1936 * Someone just advertised an addresses that we publish. First,
1937 * check it it was us -- if so, we can safely ignore it.
1938 * We don't get the haddr from the ira_l2src because, in the
1939 * case that the packet originated from us, on an IPMP group,
1940 * the ira_l2src may would be the link-layer address of the
1941 * cast_ill used to send the packet, which may not be the same
1942 * as the dst_ncec->ncec_lladdr of the address.
1944 if (haddr
!= NULL
) {
1945 if (ira
->ira_flags
& IRAF_L2SRC_LOOPBACK
)
1948 if (!nce_cmp_ll_addr(dst_ncec
, haddr
, hlen
))
1949 goto out
; /* from us -- no conflict */
1952 * If we're in an IPMP group, check if this is an echo
1953 * from another ill in the group. Use the double-
1954 * checked locking pattern to avoid grabbing
1955 * ill_g_lock in the non-IPMP case.
1957 if (IS_UNDER_IPMP(ill
)) {
1958 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1959 if (IS_UNDER_IPMP(ill
) && ipmp_illgrp_find_ill(
1960 ill
->ill_grp
, haddr
, hlen
) != NULL
) {
1961 rw_exit(&ipst
->ips_ill_g_lock
);
1964 rw_exit(&ipst
->ips_ill_g_lock
);
1969 * This appears to be a real conflict. If we're trying to
1970 * configure this NCE (ND_PROBE), then shut it down.
1971 * Otherwise, handle the discovered conflict.
1973 if (dst_ncec
->ncec_state
== ND_PROBE
) {
1974 ndp_failure(mp
, ira
);
1976 if (ip_nce_conflict(mp
, ira
, dst_ncec
)) {
1977 char hbuf
[MAC_STR_LEN
];
1978 char sbuf
[INET6_ADDRSTRLEN
];
1981 "node '%s' is using %s on %s",
1982 inet_ntop(AF_INET6
, &target
, sbuf
,
1984 haddr
== NULL
? "<none>" :
1985 mac_colon_addr(haddr
, hlen
, hbuf
,
1986 sizeof (hbuf
)), ill
->ill_name
);
1988 * RFC 4862, Section 5.4.4 does not mandate
1989 * any specific behavior when an NA matches
1990 * a non-tentative address assigned to the
1991 * receiver. We make the choice of defending
1992 * our address, based on the assumption that
1993 * the sender has not detected the Duplicate.
1995 * ncec_last_time_defended has been adjusted
1996 * in ip_nce_conflict()
1998 (void) ndp_announce(dst_ncec
);
2002 if (na
->nd_na_flags_reserved
& ND_NA_FLAG_ROUTER
)
2003 dst_ncec
->ncec_flags
|= NCE_F_ISROUTER
;
2005 /* B_TRUE indicates this an advertisement */
2006 nce_process(dst_ncec
, haddr
, na
->nd_na_flags_reserved
, B_TRUE
);
2009 ncec_refrele(dst_ncec
);
2013 * Process NDP neighbor solicitation/advertisement messages.
2014 * The checksum has already checked o.k before reaching here.
2015 * Information about the datalink header is contained in ira_l2src, but
2016 * that should be ignored for loopback packets.
2019 ndp_input(mblk_t
*mp
, ip_recv_attr_t
*ira
)
2021 ill_t
*ill
= ira
->ira_rill
;
2025 mib2_ipv6IfIcmpEntry_t
*mib
= ill
->ill_icmp6_mib
;
2026 ill_t
*orig_ill
= NULL
;
2029 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
2030 * and make it be the IPMP upper so avoid being confused by a packet
2031 * addressed to a unicast address on a different ill.
2033 if (IS_UNDER_IPMP(ill
)) {
2035 ill
= ipmp_ill_hold_ipmp_ill(orig_ill
);
2038 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2039 ip_drop_input("ipIfStatsInDiscards - IPMP ill",
2044 ASSERT(ill
!= orig_ill
);
2045 orig_ill
= ira
->ira_ill
;
2047 mib
= ill
->ill_icmp6_mib
;
2049 if (!pullupmsg(mp
, -1)) {
2050 ip1dbg(("ndp_input: pullupmsg failed\n"));
2051 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2052 ip_drop_input("ipIfStatsInDiscards - pullupmsg", mp
, ill
);
2055 ip6h
= (ip6_t
*)mp
->b_rptr
;
2056 if (ip6h
->ip6_hops
!= IPV6_MAX_HOPS
) {
2057 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
2058 ip_drop_input("ipv6IfIcmpBadHoplimit", mp
, ill
);
2059 BUMP_MIB(mib
, ipv6IfIcmpBadHoplimit
);
2063 * NDP does not accept any extension headers between the
2064 * IP header and the ICMP header since e.g. a routing
2065 * header could be dangerous.
2066 * This assumes that any AH or ESP headers are removed
2067 * by ip prior to passing the packet to ndp_input.
2069 if (ip6h
->ip6_nxt
!= IPPROTO_ICMPV6
) {
2070 ip1dbg(("ndp_input: Wrong next header 0x%x\n",
2072 ip_drop_input("Wrong next header", mp
, ill
);
2073 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2076 icmp_nd
= (icmp6_t
*)(mp
->b_rptr
+ IPV6_HDR_LEN
);
2077 ASSERT(icmp_nd
->icmp6_type
== ND_NEIGHBOR_SOLICIT
||
2078 icmp_nd
->icmp6_type
== ND_NEIGHBOR_ADVERT
);
2079 if (icmp_nd
->icmp6_code
!= 0) {
2080 ip1dbg(("ndp_input: icmp6 code != 0 \n"));
2081 ip_drop_input("code non-zero", mp
, ill
);
2082 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2085 len
= mp
->b_wptr
- mp
->b_rptr
- IPV6_HDR_LEN
;
2087 * Make sure packet length is large enough for either
2088 * a NS or a NA icmp packet.
2090 if (len
< sizeof (struct icmp6_hdr
) + sizeof (struct in6_addr
)) {
2091 ip1dbg(("ndp_input: packet too short\n"));
2092 ip_drop_input("packet too short", mp
, ill
);
2093 BUMP_MIB(mib
, ipv6IfIcmpInErrors
);
2096 if (icmp_nd
->icmp6_type
== ND_NEIGHBOR_SOLICIT
) {
2097 ndp_input_solicit(mp
, ira
);
2099 ndp_input_advert(mp
, ira
);
2103 if (orig_ill
!= NULL
) {
2105 ira
->ira_ill
= orig_ill
;
2110 * ndp_xmit is called to form and transmit a ND solicitation or
2111 * advertisement ICMP packet.
2113 * If the source address is unspecified and this isn't a probe (used for
2114 * duplicate address detection), an appropriate source address and link layer
2115 * address will be chosen here. The link layer address option is included if
2116 * the source is specified (i.e., all non-probe packets), and omitted (per the
2117 * specification) otherwise.
2119 * It returns B_FALSE only if it does a successful put() to the
2120 * corresponding ill's ill_wq otherwise returns B_TRUE.
2123 ndp_xmit(ill_t
*ill
, uint32_t operation
, uint8_t *hw_addr
, uint_t hw_addr_len
,
2124 const in6_addr_t
*sender
, const in6_addr_t
*target
, int flag
)
2132 zoneid_t zoneid
= GLOBAL_ZONEID
;
2133 ill_t
*hwaddr_ill
= ill
;
2134 ip_xmit_attr_t ixas
;
2135 ip_stack_t
*ipst
= ill
->ill_ipst
;
2136 boolean_t need_refrele
= B_FALSE
;
2137 boolean_t probe
= B_FALSE
;
2139 if (IS_UNDER_IPMP(ill
)) {
2140 probe
= ipif_lookup_testaddr_v6(ill
, sender
, NULL
);
2142 * We send non-probe packets on the upper IPMP interface.
2143 * ip_output_simple() will use cast_ill for sending any
2144 * multicast packets. Note that we can't follow the same
2145 * logic for probe packets because all interfaces in the ipmp
2146 * group may have failed, so that we really want to only try
2147 * to send the ND packet on the ill corresponding to the src
2151 ill
= ipmp_ill_hold_ipmp_ill(ill
);
2153 need_refrele
= B_TRUE
;
2160 * If we have a unspecified source(sender) address, select a
2161 * proper source address for the solicitation here itself so
2162 * that we can initialize the h/w address correctly.
2164 * If the sender is specified then we use this address in order
2165 * to lookup the zoneid before calling ip_output_v6(). This is to
2166 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly
2167 * by IP (we cannot guarantee that the global zone has an interface
2168 * route to the destination).
2170 * Note that the NA never comes here with the unspecified source
2175 * Probes will have unspec src at this point.
2177 if (!(IN6_IS_ADDR_UNSPECIFIED(sender
))) {
2178 zoneid
= ipif_lookup_addr_zoneid_v6(sender
, ill
, ipst
);
2180 * It's possible for ipif_lookup_addr_zoneid_v6() to return
2181 * ALL_ZONES if it cannot find a matching ipif for the address
2182 * we are trying to use. In this case we err on the side of
2183 * trying to send the packet by defaulting to the GLOBAL_ZONEID.
2185 if (zoneid
== ALL_ZONES
)
2186 zoneid
= GLOBAL_ZONEID
;
2189 plen
= (sizeof (nd_opt_hdr_t
) + hw_addr_len
+ 7) / 8;
2190 len
= IPV6_HDR_LEN
+ sizeof (nd_neighbor_advert_t
) + plen
* 8;
2191 mp
= allocb(len
, BPRI_LO
);
2198 bzero((char *)mp
->b_rptr
, len
);
2199 mp
->b_wptr
= mp
->b_rptr
+ len
;
2201 bzero(&ixas
, sizeof (ixas
));
2202 ixas
.ixa_flags
= IXAF_SET_ULP_CKSUM
| IXAF_NO_HW_CKSUM
;
2204 ixas
.ixa_ifindex
= ill
->ill_phyint
->phyint_ifindex
;
2205 ixas
.ixa_ipst
= ipst
;
2206 ixas
.ixa_cred
= kcred
;
2207 ixas
.ixa_cpid
= NOPID
;
2208 ixas
.ixa_tsl
= NULL
;
2209 ixas
.ixa_zoneid
= zoneid
;
2211 ip6h
= (ip6_t
*)mp
->b_rptr
;
2212 ip6h
->ip6_vcf
= IPV6_DEFAULT_VERS_AND_FLOW
;
2213 ip6h
->ip6_plen
= htons(len
- IPV6_HDR_LEN
);
2214 ip6h
->ip6_nxt
= IPPROTO_ICMPV6
;
2215 ip6h
->ip6_hops
= IPV6_MAX_HOPS
;
2216 ixas
.ixa_multicast_ttl
= ip6h
->ip6_hops
;
2217 ip6h
->ip6_dst
= *target
;
2218 icmp6
= (icmp6_t
*)&ip6h
[1];
2220 if (hw_addr_len
!= 0) {
2221 opt
= (nd_opt_hdr_t
*)((uint8_t *)ip6h
+ IPV6_HDR_LEN
+
2222 sizeof (nd_neighbor_advert_t
));
2226 if (operation
== ND_NEIGHBOR_SOLICIT
) {
2227 nd_neighbor_solicit_t
*ns
= (nd_neighbor_solicit_t
*)icmp6
;
2229 if (opt
!= NULL
&& !(flag
& NDP_PROBE
)) {
2231 * Note that we don't send out SLLA for ND probes
2232 * per RFC 4862, even though we do send out the src
2233 * haddr for IPv4 DAD probes, even though both IPv4
2234 * and IPv6 go out with the unspecified/INADDR_ANY
2237 opt
->nd_opt_type
= ND_OPT_SOURCE_LINKADDR
;
2239 ip6h
->ip6_src
= *sender
;
2240 ns
->nd_ns_target
= *target
;
2241 if (!(flag
& NDP_UNICAST
)) {
2242 /* Form multicast address of the target */
2243 ip6h
->ip6_dst
= ipv6_solicited_node_mcast
;
2244 ip6h
->ip6_dst
.s6_addr32
[3] |=
2245 ns
->nd_ns_target
.s6_addr32
[3];
2248 nd_neighbor_advert_t
*na
= (nd_neighbor_advert_t
*)icmp6
;
2250 ASSERT(!(flag
& NDP_PROBE
));
2252 opt
->nd_opt_type
= ND_OPT_TARGET_LINKADDR
;
2253 ip6h
->ip6_src
= *sender
;
2254 na
->nd_na_target
= *sender
;
2255 if (flag
& NDP_ISROUTER
)
2256 na
->nd_na_flags_reserved
|= ND_NA_FLAG_ROUTER
;
2257 if (flag
& NDP_SOLICITED
)
2258 na
->nd_na_flags_reserved
|= ND_NA_FLAG_SOLICITED
;
2259 if (flag
& NDP_ORIDE
)
2260 na
->nd_na_flags_reserved
|= ND_NA_FLAG_OVERRIDE
;
2263 if (!(flag
& NDP_PROBE
)) {
2264 if (hw_addr
!= NULL
&& opt
!= NULL
) {
2265 /* Fill in link layer address and option len */
2266 opt
->nd_opt_len
= (uint8_t)plen
;
2267 bcopy(hw_addr
, &opt
[1], hw_addr_len
);
2270 if (opt
!= NULL
&& opt
->nd_opt_type
== 0) {
2271 /* If there's no link layer address option, then strip it. */
2273 mp
->b_wptr
= mp
->b_rptr
+ len
;
2274 ip6h
->ip6_plen
= htons(len
- IPV6_HDR_LEN
);
2277 icmp6
->icmp6_type
= (uint8_t)operation
;
2278 icmp6
->icmp6_code
= 0;
2280 * Prepare for checksum by putting icmp length in the icmp
2281 * checksum field. The checksum is calculated in ip_output.c.
2283 icmp6
->icmp6_cksum
= ip6h
->ip6_plen
;
2285 (void) ip_output_simple(mp
, &ixas
);
2293 * Used to set ND_UNREACHBLE before ncec_delete sets it NCE_F_CONDEMNED.
2294 * The datapath uses this as an indication that there
2295 * is a problem (as opposed to a NCE that was just
2296 * reclaimed due to lack of memory.
2297 * Note that static ARP entries never become unreachable.
2300 nce_make_unreachable(ncec_t
*ncec
)
2302 mutex_enter(&ncec
->ncec_lock
);
2303 ncec
->ncec_state
= ND_UNREACHABLE
;
2304 mutex_exit(&ncec
->ncec_lock
);
2308 * NCE retransmit timer. Common to IPv4 and IPv6.
2309 * This timer goes off when:
2310 * a. It is time to retransmit a resolution for resolver.
2311 * b. It is time to send reachability probes.
2314 nce_timer(void *arg
)
2317 ill_t
*ill
= ncec
->ncec_ill
, *src_ill
;
2318 char addrbuf
[INET6_ADDRSTRLEN
];
2319 boolean_t dropped
= B_FALSE
;
2320 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
2321 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
2322 in_addr_t sender4
= INADDR_ANY
;
2323 in6_addr_t sender6
= ipv6_all_zeros
;
2326 * The timer has to be cancelled by ncec_delete before doing the final
2327 * refrele. So the NCE is guaranteed to exist when the timer runs
2328 * until it clears the timeout_id. Before clearing the timeout_id
2329 * bump up the refcnt so that we can continue to use the ncec
2331 ASSERT(ncec
!= NULL
);
2332 mutex_enter(&ncec
->ncec_lock
);
2333 ncec_refhold_locked(ncec
);
2334 ncec
->ncec_timeout_id
= 0;
2335 mutex_exit(&ncec
->ncec_lock
);
2337 src_ill
= nce_resolve_src(ncec
, &sender6
);
2338 /* if we could not find a sender address, return */
2339 if (src_ill
== NULL
) {
2341 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, sender4
);
2342 ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET
,
2343 &sender4
, addrbuf
, sizeof (addrbuf
))));
2345 ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET6
,
2346 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2348 nce_restart_timer(ncec
, ill
->ill_reachable_retrans_time
);
2353 IN6_V4MAPPED_TO_IPADDR(&sender6
, sender4
);
2355 mutex_enter(&ncec
->ncec_lock
);
2357 * Check the reachability state.
2359 switch (ncec
->ncec_state
) {
2361 ASSERT(ncec
->ncec_lladdr
!= NULL
);
2362 ncec
->ncec_state
= ND_PROBE
;
2363 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
2365 mutex_exit(&ncec
->ncec_lock
);
2366 dropped
= ndp_xmit(src_ill
, ND_NEIGHBOR_SOLICIT
,
2367 src_ill
->ill_phys_addr
,
2368 src_ill
->ill_phys_addr_length
,
2369 &sender6
, &ncec
->ncec_addr
,
2372 dropped
= (arp_request(ncec
, sender4
, src_ill
) == 0);
2373 mutex_exit(&ncec
->ncec_lock
);
2376 mutex_enter(&ncec
->ncec_lock
);
2378 mutex_exit(&ncec
->ncec_lock
);
2382 pr_addr_dbg("nce_timer: state for %s changed "
2383 "to PROBE\n", AF_INET6
, &ncec
->ncec_addr
);
2385 nce_restart_timer(ncec
, ill
->ill_reachable_retrans_time
);
2388 /* must be retransmit timer */
2389 ASSERT(ncec
->ncec_pcnt
>= -1);
2390 if (ncec
->ncec_pcnt
> 0) {
2392 * As per RFC2461, the ncec gets deleted after
2393 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
2394 * Note that the first unicast solicitation is sent
2395 * during the DELAY state.
2397 ip2dbg(("nce_timer: pcount=%x dst %s\n",
2399 inet_ntop((isv6
? AF_INET6
: AF_INET
),
2400 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2401 if (NCE_PUBLISH(ncec
)) {
2402 mutex_exit(&ncec
->ncec_lock
);
2404 * send out a probe; note that src_ill
2405 * is ignored by nce_dad() for all
2406 * DAD message types other than IPv6
2409 nce_dad(ncec
, src_ill
, B_TRUE
);
2411 ASSERT(src_ill
!= NULL
);
2413 mutex_exit(&ncec
->ncec_lock
);
2414 dropped
= ndp_xmit(src_ill
,
2415 ND_NEIGHBOR_SOLICIT
,
2416 src_ill
->ill_phys_addr
,
2417 src_ill
->ill_phys_addr_length
,
2418 &sender6
, &ncec
->ncec_addr
,
2422 * since the nce is REACHABLE,
2423 * the ARP request will be sent out
2424 * as a link-layer unicast.
2426 dropped
= (arp_request(ncec
, sender4
,
2428 mutex_exit(&ncec
->ncec_lock
);
2431 mutex_enter(&ncec
->ncec_lock
);
2433 mutex_exit(&ncec
->ncec_lock
);
2435 nce_restart_timer(ncec
,
2436 ill
->ill_reachable_retrans_time
);
2438 } else if (ncec
->ncec_pcnt
< 0) {
2439 /* No hope, delete the ncec */
2440 /* Tell datapath it went bad */
2441 ncec
->ncec_state
= ND_UNREACHABLE
;
2442 mutex_exit(&ncec
->ncec_lock
);
2445 pr_addr_dbg("nce_timer: Delete NCE for"
2446 " dst %s\n", (isv6
? AF_INET6
: AF_INET
),
2449 /* if static ARP can't delete. */
2450 if ((ncec
->ncec_flags
& NCE_F_STATIC
) == 0)
2453 } else if (!NCE_PUBLISH(ncec
)) {
2455 * Probe count is 0 for a dynamic entry (one that we
2456 * ourselves are not publishing). We should never get
2457 * here if NONUD was requested, hence the ASSERT below.
2459 ASSERT((ncec
->ncec_flags
& NCE_F_NONUD
) == 0);
2460 ip2dbg(("nce_timer: pcount=%x dst %s\n",
2461 ncec
->ncec_pcnt
, inet_ntop(AF_INET6
,
2462 &ncec
->ncec_addr
, addrbuf
, sizeof (addrbuf
))));
2464 mutex_exit(&ncec
->ncec_lock
);
2465 /* Wait one interval before killing */
2466 nce_restart_timer(ncec
,
2467 ill
->ill_reachable_retrans_time
);
2468 } else if (ill
->ill_phyint
->phyint_flags
& PHYI_RUNNING
) {
2473 * We're done probing, and we can now declare this
2474 * address to be usable. Let IP know that it's ok to
2477 ncec
->ncec_state
= ND_REACHABLE
;
2478 ncec
->ncec_flags
&= ~NCE_F_UNVERIFIED
;
2479 mutex_exit(&ncec
->ncec_lock
);
2481 ipif
= ipif_lookup_addr_exact_v6(
2482 &ncec
->ncec_addr
, ill
, ipst
);
2484 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
,
2486 ipif
= ipif_lookup_addr_exact(ncec_addr
, ill
,
2490 if (ipif
->ipif_was_dup
) {
2491 char ibuf
[LIFNAMSIZ
];
2492 char sbuf
[INET6_ADDRSTRLEN
];
2494 ipif
->ipif_was_dup
= B_FALSE
;
2495 (void) inet_ntop(AF_INET6
,
2496 &ipif
->ipif_v6lcl_addr
,
2497 sbuf
, sizeof (sbuf
));
2498 ipif_get_name(ipif
, ibuf
,
2500 cmn_err(CE_NOTE
, "recovered address "
2501 "%s on %s", sbuf
, ibuf
);
2503 if ((ipif
->ipif_flags
& IPIF_UP
) &&
2504 !ipif
->ipif_addr_ready
)
2505 ipif_up_notify(ipif
);
2506 ipif
->ipif_addr_ready
= 1;
2509 if (!isv6
&& arp_no_defense
)
2511 /* Begin defending our new address */
2512 if (ncec
->ncec_unsolicit_count
> 0) {
2513 ncec
->ncec_unsolicit_count
--;
2515 dropped
= ndp_announce(ncec
);
2517 dropped
= arp_announce(ncec
);
2521 ncec
->ncec_unsolicit_count
++;
2523 ncec
->ncec_last_time_defended
=
2526 if (ncec
->ncec_unsolicit_count
> 0) {
2527 nce_restart_timer(ncec
,
2528 ANNOUNCE_INTERVAL(isv6
));
2529 } else if (DEFENSE_INTERVAL(isv6
) != 0) {
2530 nce_restart_timer(ncec
, DEFENSE_INTERVAL(isv6
));
2534 * This is an address we're probing to be our own, but
2535 * the ill is down. Wait until it comes back before
2536 * doing anything, but switch to reachable state so
2537 * that the restart will work.
2539 ncec
->ncec_state
= ND_REACHABLE
;
2540 mutex_exit(&ncec
->ncec_lock
);
2543 case ND_INCOMPLETE
: {
2544 mblk_t
*mp
, *nextmp
;
2548 * Per case (2) in the nce_queue_mp() comments, scan ncec_qd_mp
2549 * for any IPMP probe packets, and toss them. IPMP probe
2550 * packets will always be at the head of ncec_qd_mp, so that
2551 * we can stop at the first queued ND packet that is
2552 * not a probe packet.
2554 prevmpp
= &ncec
->ncec_qd_mp
;
2555 for (mp
= ncec
->ncec_qd_mp
; mp
!= NULL
; mp
= nextmp
) {
2556 nextmp
= mp
->b_next
;
2558 if (IS_UNDER_IPMP(ill
) && ncec
->ncec_nprobes
> 0) {
2560 ncec
->ncec_nprobes
--;
2563 prevmpp
= &mp
->b_next
;
2568 * Must be resolver's retransmit timer.
2570 mutex_exit(&ncec
->ncec_lock
);
2571 ip_ndp_resolve(ncec
);
2575 if (((ncec
->ncec_flags
& NCE_F_UNSOL_ADV
) &&
2576 ncec
->ncec_unsolicit_count
!= 0) ||
2577 (NCE_PUBLISH(ncec
) && DEFENSE_INTERVAL(isv6
) != 0)) {
2578 if (ncec
->ncec_unsolicit_count
> 0) {
2579 ncec
->ncec_unsolicit_count
--;
2580 mutex_exit(&ncec
->ncec_lock
);
2582 * When we get to zero announcements left,
2583 * switch to address defense
2586 boolean_t rate_limit
;
2588 mutex_exit(&ncec
->ncec_lock
);
2589 rate_limit
= ill_defend_rate_limit(ill
, ncec
);
2591 nce_restart_timer(ncec
,
2592 DEFENSE_INTERVAL(isv6
));
2597 dropped
= ndp_announce(ncec
);
2599 dropped
= arp_announce(ncec
);
2601 mutex_enter(&ncec
->ncec_lock
);
2603 ncec
->ncec_unsolicit_count
++;
2605 ncec
->ncec_last_time_defended
=
2608 mutex_exit(&ncec
->ncec_lock
);
2609 if (ncec
->ncec_unsolicit_count
!= 0) {
2610 nce_restart_timer(ncec
,
2611 ANNOUNCE_INTERVAL(isv6
));
2613 nce_restart_timer(ncec
, DEFENSE_INTERVAL(isv6
));
2616 mutex_exit(&ncec
->ncec_lock
);
2620 mutex_exit(&ncec
->ncec_lock
);
2625 ill_refrele(src_ill
);
2629 * Set a link layer address from the ll_addr passed in.
2630 * Copy SAP from ill.
2633 nce_set_ll(ncec_t
*ncec
, uchar_t
*ll_addr
)
2635 ill_t
*ill
= ncec
->ncec_ill
;
2637 ASSERT(ll_addr
!= NULL
);
2638 if (ill
->ill_phys_addr_length
> 0) {
2640 * The bcopy() below used to be called for the physical address
2641 * length rather than the link layer address length. For
2642 * ethernet and many other media, the phys_addr and lla are
2645 * The phys_addr and lla may not be the same for devices that
2646 * support DL_IPV6_LINK_LAYER_ADDR, though there are currently
2647 * no known instances of these.
2649 * For PPP or other interfaces with a zero length
2650 * physical address, don't do anything here.
2651 * The bcopy() with a zero phys_addr length was previously
2652 * a no-op for interfaces with a zero-length physical address.
2653 * Using the lla for them would change the way they operate.
2654 * Doing nothing in such cases preserves expected behavior.
2656 bcopy(ll_addr
, ncec
->ncec_lladdr
, ill
->ill_nd_lla_len
);
2661 nce_cmp_ll_addr(const ncec_t
*ncec
, const uchar_t
*ll_addr
,
2662 uint32_t ll_addr_len
)
2664 ASSERT(ncec
->ncec_lladdr
!= NULL
);
2665 if (ll_addr
== NULL
)
2667 if (bcmp(ll_addr
, ncec
->ncec_lladdr
, ll_addr_len
) != 0)
2673 * Updates the link layer address or the reachability state of
2674 * a cache entry. Reset probe counter if needed.
2677 nce_update(ncec_t
*ncec
, uint16_t new_state
, uchar_t
*new_ll_addr
)
2679 ill_t
*ill
= ncec
->ncec_ill
;
2680 boolean_t need_stop_timer
= B_FALSE
;
2681 boolean_t need_fastpath_update
= B_FALSE
;
2685 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
2687 * If this interface does not do NUD, there is no point
2688 * in allowing an update to the cache entry. Although
2689 * we will respond to NS.
2690 * The only time we accept an update for a resolver when
2691 * NUD is turned off is when it has just been created.
2692 * Non-Resolvers will always be created as REACHABLE.
2694 if (new_state
!= ND_UNCHANGED
) {
2695 if ((ncec
->ncec_flags
& NCE_F_NONUD
) &&
2696 (ncec
->ncec_state
!= ND_INCOMPLETE
))
2698 ASSERT((int16_t)new_state
>= ND_STATE_VALID_MIN
);
2699 ASSERT((int16_t)new_state
<= ND_STATE_VALID_MAX
);
2700 need_stop_timer
= B_TRUE
;
2701 if (new_state
== ND_REACHABLE
)
2702 ncec
->ncec_last
= TICK_TO_MSEC(ddi_get_lbolt64());
2704 /* We force NUD in this case */
2705 ncec
->ncec_last
= 0;
2707 ncec
->ncec_state
= new_state
;
2708 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
2709 ASSERT(ncec
->ncec_lladdr
!= NULL
|| new_state
== ND_INITIAL
||
2710 new_state
== ND_INCOMPLETE
);
2712 if (need_stop_timer
|| (ncec
->ncec_flags
& NCE_F_STATIC
)) {
2713 tid
= ncec
->ncec_timeout_id
;
2714 ncec
->ncec_timeout_id
= 0;
2717 * Re-trigger fastpath probe and
2718 * overwrite the DL_UNITDATA_REQ data, noting we'll lose
2719 * whatever packets that happens to be transmitting at the time.
2721 if (new_ll_addr
!= NULL
) {
2722 bcopy(new_ll_addr
, ncec
->ncec_lladdr
,
2723 ill
->ill_phys_addr_length
);
2724 need_fastpath_update
= B_TRUE
;
2726 mutex_exit(&ncec
->ncec_lock
);
2727 if (need_stop_timer
|| (ncec
->ncec_flags
& NCE_F_STATIC
)) {
2729 (void) untimeout(tid
);
2731 if (need_fastpath_update
) {
2733 * Delete any existing existing dlur_mp and fp_mp information.
2734 * For IPMP interfaces, all underlying ill's must be checked
2737 nce_fastpath_list_delete(ncec
->ncec_ill
, ncec
, NULL
);
2739 * add the new dlur_mp and fp_mp
2741 nce
= nce_fastpath(ncec
, B_TRUE
, NULL
);
2745 mutex_enter(&ncec
->ncec_lock
);
2749 nce_queue_mp_common(ncec_t
*ncec
, mblk_t
*mp
, boolean_t head_insert
)
2754 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
2756 for (mpp
= &ncec
->ncec_qd_mp
; *mpp
!= NULL
; mpp
= &(*mpp
)->b_next
) {
2757 if (++count
> ncec
->ncec_ill
->ill_max_buf
) {
2758 tmp
= ncec
->ncec_qd_mp
->b_next
;
2759 ncec
->ncec_qd_mp
->b_next
= NULL
;
2761 * if we never create data addrs on the under_ill
2764 BUMP_MIB(ncec
->ncec_ill
->ill_ip_mib
,
2765 ipIfStatsOutDiscards
);
2766 ip_drop_output("ipIfStatsOutDiscards", ncec
->ncec_qd_mp
,
2768 freemsg(ncec
->ncec_qd_mp
);
2769 ncec
->ncec_qd_mp
= tmp
;
2774 ncec
->ncec_nprobes
++;
2775 mp
->b_next
= ncec
->ncec_qd_mp
;
2776 ncec
->ncec_qd_mp
= mp
;
2783 * nce_queue_mp will queue the packet into the ncec_qd_mp. The packet will be
2784 * queued at the head or tail of the queue based on the input argument
2785 * 'head_insert'. The caller should specify this argument as B_TRUE if this
2786 * packet is an IPMP probe packet, in which case the following happens:
2788 * 1. Insert it at the head of the ncec_qd_mp list. Consider the normal
2789 * (non-ipmp_probe) load-speading case where the source address of the ND
2790 * packet is not tied to ncec_ill. If the ill bound to the source address
2791 * cannot receive, the response to the ND packet will not be received.
2792 * However, if ND packets for ncec_ill's probes are queued behind that ND
2793 * packet, those probes will also fail to be sent, and thus in.mpathd will
2794 * erroneously conclude that ncec_ill has also failed.
2796 * 2. Drop the ipmp_probe packet in ndp_timer() if the ND did not succeed on
2797 * the first attempt. This ensures that ND problems do not manifest as
2800 * We achieve this by inserting ipmp_probe() packets at the head of the
2803 * The ncec for the probe target is created with ncec_ill set to the ipmp_ill,
2804 * but the caller needs to set head_insert to B_TRUE if this is a probe packet.
2807 nce_queue_mp(ncec_t
*ncec
, mblk_t
*mp
, boolean_t head_insert
)
2809 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
2810 nce_queue_mp_common(ncec
, mp
, head_insert
);
2814 * Called when address resolution failed due to a timeout.
2815 * Send an ICMP unreachable in response to all queued packets.
2818 ndp_resolv_failed(ncec_t
*ncec
)
2820 mblk_t
*mp
, *nxt_mp
;
2821 char buf
[INET6_ADDRSTRLEN
];
2822 ill_t
*ill
= ncec
->ncec_ill
;
2823 ip_recv_attr_t iras
;
2825 bzero(&iras
, sizeof (iras
));
2828 * we are setting the ira_rill to the ipmp_ill (instead of
2829 * the actual ill on which the packet was received), but this
2830 * is ok because we don't actually need the real ira_rill.
2831 * to send the icmp unreachable to the sender.
2833 iras
.ira_ill
= iras
.ira_rill
= ill
;
2834 iras
.ira_ruifindex
= ill
->ill_phyint
->phyint_ifindex
;
2835 iras
.ira_rifindex
= iras
.ira_ruifindex
;
2837 ip1dbg(("ndp_resolv_failed: dst %s\n",
2838 inet_ntop(AF_INET6
, (char *)&ncec
->ncec_addr
, buf
, sizeof (buf
))));
2839 mutex_enter(&ncec
->ncec_lock
);
2840 mp
= ncec
->ncec_qd_mp
;
2841 ncec
->ncec_qd_mp
= NULL
;
2842 ncec
->ncec_nprobes
= 0;
2843 mutex_exit(&ncec
->ncec_lock
);
2844 while (mp
!= NULL
) {
2845 nxt_mp
= mp
->b_next
;
2848 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
2849 ip_drop_output("ipIfStatsOutDiscards - address unreachable",
2851 icmp_unreachable_v6(mp
,
2852 ICMP6_DST_UNREACH_ADDR
, B_FALSE
, &iras
);
2853 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
2856 ncec_cb_dispatch(ncec
); /* finish off waiting callbacks */
2860 * Handle the completion of NDP and ARP resolution.
2863 nce_resolv_ok(ncec_t
*ncec
)
2867 iaflags_t ixaflags
= IXAF_NO_TRACE
;
2869 ill_t
*ill
= ncec
->ncec_ill
;
2870 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
2871 ip_stack_t
*ipst
= ill
->ill_ipst
;
2873 if (IS_IPMP(ncec
->ncec_ill
)) {
2874 nce_resolv_ipmp_ok(ncec
);
2879 mutex_enter(&ncec
->ncec_lock
);
2880 ASSERT(ncec
->ncec_nprobes
== 0);
2881 mp
= ncec
->ncec_qd_mp
;
2882 ncec
->ncec_qd_mp
= NULL
;
2883 mutex_exit(&ncec
->ncec_lock
);
2885 while (mp
!= NULL
) {
2888 if (ill
->ill_isv6
) {
2889 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
2891 pkt_len
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
2893 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
2895 ixaflags
|= IXAF_IS_IPV4
;
2896 pkt_len
= ntohs(ipha
->ipha_length
);
2898 nxt_mp
= mp
->b_next
;
2901 * IXAF_NO_DEV_FLOW_CTL information for TCP packets is no
2902 * longer available, but it's ok to drop this flag because TCP
2903 * has its own flow-control in effect, so TCP packets
2904 * are not likely to get here when flow-control is in effect.
2906 mutex_enter(&ill
->ill_lock
);
2907 nce
= nce_lookup(ill
, &ncec
->ncec_addr
);
2908 mutex_exit(&ill
->ill_lock
);
2912 BUMP_MIB(&ipst
->ips_ip6_mib
,
2913 ipIfStatsOutDiscards
);
2915 BUMP_MIB(&ipst
->ips_ip_mib
,
2916 ipIfStatsOutDiscards
);
2918 ip_drop_output("ipIfStatsOutDiscards - no nce",
2923 * We don't know the zoneid, but
2924 * ip_xmit does not care since IXAF_NO_TRACE
2925 * is set. (We traced the packet the first
2926 * time through ip_xmit.)
2928 (void) ip_xmit(mp
, nce
, ixaflags
, pkt_len
, 0,
2929 ALL_ZONES
, 0, NULL
);
2935 ncec_cb_dispatch(ncec
); /* complete callbacks */
2939 * Called by SIOCSNDP* ioctl to add/change an ncec entry
2940 * and the corresponding attributes.
2941 * Disallow states other than ND_REACHABLE or ND_STALE.
2944 ndp_sioc_update(ill_t
*ill
, lif_nd_req_t
*lnr
)
2951 uint16_t new_flags
= 0;
2952 uint16_t old_flags
= 0;
2953 int inflags
= lnr
->lnr_flags
;
2954 ip_stack_t
*ipst
= ill
->ill_ipst
;
2955 boolean_t do_postprocess
= B_FALSE
;
2957 ASSERT(ill
->ill_isv6
);
2958 if ((lnr
->lnr_state_create
!= ND_REACHABLE
) &&
2959 (lnr
->lnr_state_create
!= ND_STALE
))
2962 sin6
= (sin6_t
*)&lnr
->lnr_addr
;
2963 addr
= &sin6
->sin6_addr
;
2965 mutex_enter(&ipst
->ips_ndp6
->ndp_g_lock
);
2966 ASSERT(!IS_UNDER_IPMP(ill
));
2967 nce
= nce_lookup_addr(ill
, addr
);
2969 new_flags
= nce
->nce_common
->ncec_flags
;
2971 switch (inflags
& (NDF_ISROUTER_ON
|NDF_ISROUTER_OFF
)) {
2972 case NDF_ISROUTER_ON
:
2973 new_flags
|= NCE_F_ISROUTER
;
2975 case NDF_ISROUTER_OFF
:
2976 new_flags
&= ~NCE_F_ISROUTER
;
2978 case (NDF_ISROUTER_OFF
|NDF_ISROUTER_ON
):
2979 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
2984 if (inflags
& NDF_STATIC
)
2985 new_flags
|= NCE_F_STATIC
;
2987 switch (inflags
& (NDF_ANYCAST_ON
|NDF_ANYCAST_OFF
)) {
2988 case NDF_ANYCAST_ON
:
2989 new_flags
|= NCE_F_ANYCAST
;
2991 case NDF_ANYCAST_OFF
:
2992 new_flags
&= ~NCE_F_ANYCAST
;
2994 case (NDF_ANYCAST_OFF
|NDF_ANYCAST_ON
):
2995 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3002 err
= nce_add_v6(ill
,
3003 (uchar_t
*)lnr
->lnr_hdw_addr
,
3004 ill
->ill_phys_addr_length
,
3007 lnr
->lnr_state_create
,
3010 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3011 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err
));
3014 do_postprocess
= B_TRUE
;
3017 ncec
= nce
->nce_common
;
3018 old_flags
= ncec
->ncec_flags
;
3019 if (old_flags
& NCE_F_ISROUTER
&& !(new_flags
& NCE_F_ISROUTER
)) {
3020 ncec_router_to_host(ncec
);
3021 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3023 err
= nce_add_v6_postprocess(nce
);
3027 mutex_exit(&ipst
->ips_ndp6
->ndp_g_lock
);
3030 err
= nce_add_v6_postprocess(nce
);
3032 * err cannot be anything other than 0 because we don't support
3033 * proxy arp of static addresses.
3037 mutex_enter(&ncec
->ncec_lock
);
3038 ncec
->ncec_flags
= new_flags
;
3039 mutex_exit(&ncec
->ncec_lock
);
3041 * Note that we ignore the state at this point, which
3042 * should be either STALE or REACHABLE. Instead we let
3043 * the link layer address passed in to determine the state
3044 * much like incoming packets.
3046 nce_process(ncec
, (uchar_t
*)lnr
->lnr_hdw_addr
, 0, B_FALSE
);
3052 * Create an nce_t structure for ill using the ncec->ncec_lladdr to set up
3053 * the nce_dlur_mp. If ill != ncec->ncec_ill, then the ips_ill_g_lock must
3054 * be held to ensure that they are in the same group.
3057 nce_fastpath_create(ill_t
*ill
, ncec_t
*ncec
)
3062 nce
= nce_ill_lookup_then_add(ill
, ncec
);
3064 if (nce
== NULL
|| IS_LOOPBACK(nce
->nce_ill
) || IS_VNI(nce
->nce_ill
))
3068 * hold the ncec_lock to synchronize with nce_update() so that,
3069 * at the end of this function, the contents of nce_dlur_mp are
3070 * consistent with ncec->ncec_lladdr, even though some intermediate
3071 * packet may have been sent out with a mangled address, which would
3072 * only be a transient condition.
3074 mutex_enter(&ncec
->ncec_lock
);
3075 if (ncec
->ncec_lladdr
!= NULL
) {
3076 bcopy(ncec
->ncec_lladdr
, nce
->nce_dlur_mp
->b_rptr
+
3077 NCE_LL_ADDR_OFFSET(ill
), ill
->ill_phys_addr_length
);
3079 nce
->nce_dlur_mp
= ill_dlur_gen(NULL
, 0, ill
->ill_sap
,
3080 ill
->ill_sap_length
);
3082 mutex_exit(&ncec
->ncec_lock
);
3087 * we make nce_fp_mp to have an M_DATA prepend.
3088 * The caller ensures there is hold on ncec for this function.
3089 * Note that since ill_fastpath_probe() copies the mblk there is
3090 * no need to hold the nce or ncec beyond this function.
3092 * If the caller has passed in a non-null ncec_nce to nce_fastpath() that
3093 * ncec_nce must correspond to the nce for ncec with nce_ill == ncec->ncec_ill
3094 * and will be returned back by this function, so that no extra nce_refrele
3095 * is required for the caller. The calls from nce_add_common() use this
3096 * method. All other callers (that pass in NULL ncec_nce) will have to do a
3097 * nce_refrele of the returned nce (when it is non-null).
3100 nce_fastpath(ncec_t
*ncec
, boolean_t trigger_fp_req
, nce_t
*ncec_nce
)
3103 ill_t
*ill
= ncec
->ncec_ill
;
3105 ASSERT(ill
!= NULL
);
3107 if (IS_IPMP(ill
) && trigger_fp_req
) {
3108 trigger_fp_req
= B_FALSE
;
3109 ipmp_ncec_refresh_nce(ncec
);
3113 * If the caller already has the nce corresponding to the ill, use
3114 * that one. Otherwise we have to lookup/add the nce. Calls from
3115 * nce_add_common() fall in the former category, and have just done
3116 * the nce lookup/add that can be reused.
3118 if (ncec_nce
== NULL
)
3119 nce
= nce_fastpath_create(ill
, ncec
);
3123 if (nce
== NULL
|| IS_LOOPBACK(nce
->nce_ill
) || IS_VNI(nce
->nce_ill
))
3127 nce_fastpath_trigger(nce
);
3132 * Trigger fastpath on nce. No locks may be held.
3135 nce_fastpath_trigger(nce_t
*nce
)
3138 ill_t
*ill
= nce
->nce_ill
;
3139 ncec_t
*ncec
= nce
->nce_common
;
3141 res
= ill_fastpath_probe(ill
, nce
->nce_dlur_mp
);
3143 * EAGAIN is an indication of a transient error
3144 * i.e. allocation failure etc. leave the ncec in the list it
3145 * will be updated when another probe happens for another ire
3146 * if not it will be taken out of the list when the ire is
3149 if (res
!= 0 && res
!= EAGAIN
&& res
!= ENOTSUP
)
3150 nce_fastpath_list_delete(ill
, ncec
, NULL
);
3154 * Add ncec to the nce fastpath list on ill.
3157 nce_ill_lookup_then_add_locked(ill_t
*ill
, ncec_t
*ncec
)
3161 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
3163 * Atomically ensure that the ill is not CONDEMNED and is not going
3164 * down, before adding the NCE.
3166 if (ill
->ill_state_flags
& ILL_CONDEMNED
)
3168 mutex_enter(&ncec
->ncec_lock
);
3170 * if ncec has not been deleted and
3171 * is not already in the list add it.
3173 if (!NCE_ISCONDEMNED(ncec
)) {
3174 nce
= nce_lookup(ill
, &ncec
->ncec_addr
);
3177 nce
= nce_add(ill
, ncec
);
3180 mutex_exit(&ncec
->ncec_lock
);
3185 nce_ill_lookup_then_add(ill_t
*ill
, ncec_t
*ncec
)
3189 mutex_enter(&ill
->ill_lock
);
3190 nce
= nce_ill_lookup_then_add_locked(ill
, ncec
);
3191 mutex_exit(&ill
->ill_lock
);
3197 * remove ncec from the ill_nce list. If 'dead' is non-null, the deleted
3198 * nce is added to the 'dead' list, and the caller must nce_refrele() the
3199 * entry after all locks have been dropped.
3202 nce_fastpath_list_delete(ill_t
*ill
, ncec_t
*ncec
, list_t
*dead
)
3206 ASSERT(ill
!= NULL
);
3208 /* delete any nces referencing the ncec from underlying ills */
3210 ipmp_ncec_delete_nce(ncec
);
3212 /* now the ill itself */
3213 mutex_enter(&ill
->ill_lock
);
3214 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
;
3215 nce
= list_next(&ill
->ill_nce
, nce
)) {
3216 if (nce
->nce_common
== ncec
) {
3222 mutex_exit(&ill
->ill_lock
);
3227 list_insert_tail(dead
, nce
);
3232 * when the fastpath response does not fit in the datab
3233 * associated with the existing nce_fp_mp, we delete and
3234 * add the nce to retrigger fastpath based on the information
3238 nce_delete_then_add(nce_t
*nce
)
3240 ill_t
*ill
= nce
->nce_ill
;
3241 nce_t
*newnce
= NULL
;
3243 ip0dbg(("nce_delete_then_add nce %p ill %s\n",
3244 (void *)nce
, ill
->ill_name
));
3245 mutex_enter(&ill
->ill_lock
);
3246 mutex_enter(&nce
->nce_common
->ncec_lock
);
3249 * Make sure that ncec is not condemned before adding. We hold the
3250 * ill_lock and ncec_lock to synchronize with ncec_delete() and
3251 * ipmp_ncec_delete_nce()
3253 if (!NCE_ISCONDEMNED(nce
->nce_common
))
3254 newnce
= nce_add(ill
, nce
->nce_common
);
3255 mutex_exit(&nce
->nce_common
->ncec_lock
);
3256 mutex_exit(&ill
->ill_lock
);
3258 return (newnce
); /* could be null if nomem */
3261 typedef struct nce_fp_match_s
{
3262 nce_t
*nce_fp_match_res
;
3263 mblk_t
*nce_fp_match_ack_mp
;
3268 nce_fastpath_match_dlur(ill_t
*ill
, nce_t
*nce
, void *arg
)
3270 nce_fp_match_t
*nce_fp_marg
= arg
;
3271 ncec_t
*ncec
= nce
->nce_common
;
3272 mblk_t
*mp
= nce_fp_marg
->nce_fp_match_ack_mp
;
3273 uchar_t
*mp_rptr
, *ud_mp_rptr
;
3274 mblk_t
*ud_mp
= nce
->nce_dlur_mp
;
3278 * mp is the mp associated with the fastpath ack.
3279 * ud_mp is the outstanding DL_UNITDATA_REQ on the nce_t
3280 * under consideration. If the contents match, then the
3281 * fastpath ack is used to update the nce.
3285 mp_rptr
= mp
->b_rptr
;
3286 cmplen
= mp
->b_wptr
- mp_rptr
;
3287 ASSERT(cmplen
>= 0);
3289 ud_mp_rptr
= ud_mp
->b_rptr
;
3291 * The ncec is locked here to prevent any other threads from accessing
3292 * and changing nce_dlur_mp when the address becomes resolved to an
3293 * lla while we're in the middle of looking at and comparing the
3294 * hardware address (lla). It is also locked to prevent multiple
3295 * threads in nce_fastpath() from examining nce_dlur_mp at the same
3298 mutex_enter(&ncec
->ncec_lock
);
3299 if (ud_mp
->b_wptr
- ud_mp_rptr
!= cmplen
||
3300 bcmp((char *)mp_rptr
, (char *)ud_mp_rptr
, cmplen
) == 0) {
3301 nce_fp_marg
->nce_fp_match_res
= nce
;
3302 mutex_exit(&ncec
->ncec_lock
);
3306 mutex_exit(&ncec
->ncec_lock
);
3311 * Update all NCE's that are not in fastpath mode and
3312 * have an nce_fp_mp that matches mp. mp->b_cont contains
3313 * the fastpath header.
3315 * Returns TRUE if entry should be dequeued, or FALSE otherwise.
3318 nce_fastpath_update(ill_t
*ill
, mblk_t
*mp
)
3320 nce_fp_match_t nce_fp_marg
;
3322 mblk_t
*nce_fp_mp
, *fp_mp
;
3324 nce_fp_marg
.nce_fp_match_res
= NULL
;
3325 nce_fp_marg
.nce_fp_match_ack_mp
= mp
;
3327 nce_walk(ill
, nce_fastpath_match_dlur
, &nce_fp_marg
);
3329 if ((nce
= nce_fp_marg
.nce_fp_match_res
) == NULL
)
3332 mutex_enter(&nce
->nce_lock
);
3333 nce_fp_mp
= nce
->nce_fp_mp
;
3335 if (nce_fp_mp
!= NULL
) {
3337 if (nce_fp_mp
->b_rptr
+ MBLKL(fp_mp
) >
3338 nce_fp_mp
->b_datap
->db_lim
) {
3339 mutex_exit(&nce
->nce_lock
);
3340 nce
= nce_delete_then_add(nce
);
3344 mutex_enter(&nce
->nce_lock
);
3345 nce_fp_mp
= nce
->nce_fp_mp
;
3349 /* Matched - install mp as the fastpath mp */
3350 if (nce_fp_mp
== NULL
) {
3351 fp_mp
= dupb(mp
->b_cont
);
3352 nce
->nce_fp_mp
= fp_mp
;
3355 bcopy(fp_mp
->b_rptr
, nce_fp_mp
->b_rptr
, MBLKL(fp_mp
));
3356 nce
->nce_fp_mp
->b_wptr
= nce
->nce_fp_mp
->b_rptr
3359 mutex_exit(&nce
->nce_lock
);
3364 * Return a pointer to a given option in the packet.
3365 * Assumes that option part of the packet have already been validated.
3368 ndp_get_option(nd_opt_hdr_t
*opt
, int optlen
, int opt_type
)
3370 while (optlen
> 0) {
3371 if (opt
->nd_opt_type
== opt_type
)
3373 optlen
-= 8 * opt
->nd_opt_len
;
3374 opt
= (struct nd_opt_hdr
*)((char *)opt
+ 8 * opt
->nd_opt_len
);
3380 * Verify all option lengths present are > 0, also check to see
3381 * if the option lengths and packet length are consistent.
3384 ndp_verify_optlen(nd_opt_hdr_t
*opt
, int optlen
)
3386 ASSERT(opt
!= NULL
);
3387 while (optlen
> 0) {
3388 if (opt
->nd_opt_len
== 0)
3390 optlen
-= 8 * opt
->nd_opt_len
;
3393 opt
= (struct nd_opt_hdr
*)((char *)opt
+ 8 * opt
->nd_opt_len
);
3399 * ncec_walk function.
3400 * Free a fraction of the NCE cache entries.
3402 * A possible optimization here would be to use ncec_last where possible, and
3403 * delete the least-frequently used entry, which would require more complex
3404 * computation as we walk through the ncec's (e.g., track ncec entries by
3405 * order of ncec_last and/or maintain state)
3408 ncec_cache_reclaim(ncec_t
*ncec
, char *arg
)
3410 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
3411 uint_t fraction
= *(uint_t
*)arg
;
3414 if ((ncec
->ncec_flags
&
3415 (NCE_F_MYADDR
| NCE_F_STATIC
| NCE_F_BCAST
)) != 0) {
3419 rand
= (uint_t
)ddi_get_lbolt() +
3420 NCE_ADDR_HASH_V6(ncec
->ncec_addr
, NCE_TABLE_SIZE
);
3421 if ((rand
/fraction
)*fraction
== rand
) {
3422 IP_STAT(ipst
, ip_nce_reclaim_deleted
);
3428 * kmem_cache callback to free up memory.
3430 * For now we just delete a fixed fraction.
3433 ip_nce_reclaim_stack(ip_stack_t
*ipst
)
3435 uint_t fraction
= ipst
->ips_ip_nce_reclaim_fraction
;
3437 IP_STAT(ipst
, ip_nce_reclaim_calls
);
3439 ncec_walk(NULL
, (pfi_t
)ncec_cache_reclaim
, (uchar_t
*)&fraction
, ipst
);
3442 * Walk all CONNs that can have a reference on an ire, ncec or dce.
3443 * Get them to update any stale references to drop any refholds they
3446 ipcl_walk(conn_ixa_cleanup
, (void *)B_FALSE
, ipst
);
3450 * Called by the memory allocator subsystem directly, when the system
3451 * is running low on memory.
3455 ip_nce_reclaim(void *args
)
3457 netstack_handle_t nh
;
3461 netstack_next_init(&nh
);
3462 while ((ns
= netstack_next(&nh
)) != NULL
) {
3464 * netstack_next() can return a netstack_t with a NULL
3465 * netstack_ip at boot time.
3467 if ((ipst
= ns
->netstack_ip
) == NULL
) {
3471 ip_nce_reclaim_stack(ipst
);
3474 netstack_next_fini(&nh
);
3479 ncec_trace_ref(ncec_t
*ncec
)
3481 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3483 if (ncec
->ncec_trace_disable
)
3486 if (!th_trace_ref(ncec
, ncec
->ncec_ipst
)) {
3487 ncec
->ncec_trace_disable
= B_TRUE
;
3488 ncec_trace_cleanup(ncec
);
3493 ncec_untrace_ref(ncec_t
*ncec
)
3495 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3497 if (!ncec
->ncec_trace_disable
)
3498 th_trace_unref(ncec
);
3502 ncec_trace_cleanup(const ncec_t
*ncec
)
3504 th_trace_cleanup(ncec
, ncec
->ncec_trace_disable
);
3509 * Called when address resolution fails due to a timeout.
3510 * Send an ICMP unreachable in response to all queued packets.
3513 arp_resolv_failed(ncec_t
*ncec
)
3515 mblk_t
*mp
, *nxt_mp
;
3516 char buf
[INET6_ADDRSTRLEN
];
3517 struct in_addr ipv4addr
;
3518 ill_t
*ill
= ncec
->ncec_ill
;
3519 ip_stack_t
*ipst
= ncec
->ncec_ipst
;
3520 ip_recv_attr_t iras
;
3522 bzero(&iras
, sizeof (iras
));
3523 iras
.ira_flags
= IRAF_IS_IPV4
;
3525 * we are setting the ira_rill to the ipmp_ill (instead of
3526 * the actual ill on which the packet was received), but this
3527 * is ok because we don't actually need the real ira_rill.
3528 * to send the icmp unreachable to the sender.
3530 iras
.ira_ill
= iras
.ira_rill
= ill
;
3531 iras
.ira_ruifindex
= ill
->ill_phyint
->phyint_ifindex
;
3532 iras
.ira_rifindex
= iras
.ira_ruifindex
;
3534 IN6_V4MAPPED_TO_INADDR(&ncec
->ncec_addr
, &ipv4addr
);
3535 ip3dbg(("arp_resolv_failed: dst %s\n",
3536 inet_ntop(AF_INET
, &ipv4addr
, buf
, sizeof (buf
))));
3537 mutex_enter(&ncec
->ncec_lock
);
3538 mp
= ncec
->ncec_qd_mp
;
3539 ncec
->ncec_qd_mp
= NULL
;
3540 ncec
->ncec_nprobes
= 0;
3541 mutex_exit(&ncec
->ncec_lock
);
3542 while (mp
!= NULL
) {
3543 nxt_mp
= mp
->b_next
;
3546 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
3547 ip_drop_output("ipIfStatsOutDiscards - address unreachable",
3549 if (ipst
->ips_ip_arp_icmp_error
) {
3550 ip3dbg(("arp_resolv_failed: "
3551 "Calling icmp_unreachable\n"));
3552 icmp_unreachable(mp
, ICMP_HOST_UNREACHABLE
, &iras
);
3556 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
3559 ncec_cb_dispatch(ncec
); /* finish off waiting callbacks */
3563 * if ill is an under_ill, translate it to the ipmp_ill and add the
3564 * nce on the ipmp_ill. Two nce_t entries (one on the ipmp_ill, and
3565 * one on the underlying in_ill) will be created for the
3566 * ncec_t in this case. The ncec_t itself will be created on the ipmp_ill.
3569 nce_lookup_then_add_v4(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
3570 const in_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
3574 ip_stack_t
*ipst
= ill
->ill_ipst
;
3575 nce_t
*nce
, *upper_nce
= NULL
;
3576 ill_t
*in_ill
= ill
, *under
= NULL
;
3577 boolean_t need_ill_refrele
= B_FALSE
;
3579 if (flags
& NCE_F_MCAST
) {
3581 * hw_addr will be figured out in nce_set_multicast_v4;
3582 * caller needs to pass in the cast_ill for ipmp
3584 ASSERT(hw_addr
== NULL
);
3585 ASSERT(!IS_IPMP(ill
));
3586 err
= nce_set_multicast_v4(ill
, addr
, flags
, newnce
);
3590 if (IS_UNDER_IPMP(ill
) && !(flags
& NCE_F_MYADDR
)) {
3591 ill
= ipmp_ill_hold_ipmp_ill(ill
);
3594 need_ill_refrele
= B_TRUE
;
3596 if ((flags
& NCE_F_BCAST
) != 0) {
3598 * IPv4 broadcast ncec: compute the hwaddr.
3601 under
= ipmp_ill_hold_xmit_ill(ill
, B_FALSE
);
3602 if (under
== NULL
) {
3603 if (need_ill_refrele
)
3607 hw_addr
= under
->ill_bcast_mp
->b_rptr
+
3608 NCE_LL_ADDR_OFFSET(under
);
3609 hw_addr_len
= under
->ill_phys_addr_length
;
3611 hw_addr
= ill
->ill_bcast_mp
->b_rptr
+
3612 NCE_LL_ADDR_OFFSET(ill
),
3613 hw_addr_len
= ill
->ill_phys_addr_length
;
3617 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
3618 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
3619 nce
= nce_lookup_addr(ill
, &addr6
);
3621 err
= nce_add_v4(ill
, hw_addr
, hw_addr_len
, addr
, flags
,
3626 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3628 err
= nce_add_v4_postprocess(nce
);
3630 if (in_ill
!= ill
&& nce
!= NULL
) {
3631 nce_t
*under_nce
= NULL
;
3634 * in_ill was the under_ill. Try to create the under_nce.
3635 * Hold the ill_g_lock to prevent changes to group membership
3636 * until we are done.
3638 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
3639 if (!IS_IN_SAME_ILLGRP(in_ill
, ill
)) {
3640 DTRACE_PROBE2(ill__not__in__group
, nce_t
*, nce
,
3642 rw_exit(&ipst
->ips_ill_g_lock
);
3648 under_nce
= nce_fastpath_create(in_ill
, nce
->nce_common
);
3649 if (under_nce
== NULL
) {
3650 rw_exit(&ipst
->ips_ill_g_lock
);
3656 rw_exit(&ipst
->ips_ill_g_lock
);
3658 nce
= under_nce
; /* will be returned to caller */
3659 if (NCE_ISREACHABLE(nce
->nce_common
))
3660 nce_fastpath_trigger(under_nce
);
3671 if (upper_nce
!= NULL
)
3672 nce_refrele(upper_nce
);
3673 if (need_ill_refrele
)
3680 * NDP Cache Entry creation routine for IPv4.
3681 * This routine must always be called with ndp4->ndp_g_lock held.
3682 * Prior to return, ncec_refcnt is incremented.
3684 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
3685 * are always added pointing at the ipmp_ill. Thus, when the ill passed
3686 * to nce_add_v4 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
3687 * entries will be created, both pointing at the same ncec_t. The nce_t
3688 * entries will have their nce_ill set to the ipmp_ill and the under_ill
3689 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
3690 * Local addresses are always created on the ill passed to nce_add_v4.
3693 nce_add_v4(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
3694 const in_addr_t
*addr
, uint16_t flags
, uint16_t state
, nce_t
**newnce
)
3697 boolean_t is_multicast
= (flags
& NCE_F_MCAST
);
3698 struct in6_addr addr6
;
3701 ASSERT(MUTEX_HELD(&ill
->ill_ipst
->ips_ndp4
->ndp_g_lock
));
3702 ASSERT(!ill
->ill_isv6
);
3703 ASSERT(!IN_MULTICAST(htonl(*addr
)) || is_multicast
);
3705 IN6_IPADDR_TO_V4MAPPED(*addr
, &addr6
);
3706 err
= nce_add_common(ill
, hw_addr
, hw_addr_len
, &addr6
, flags
, state
,
3708 ASSERT(newnce
!= NULL
);
3714 * Post-processing routine to be executed after nce_add_v4(). This function
3715 * triggers fastpath (if appropriate) and DAD on the newly added nce entry
3716 * and must be called without any locks held.
3718 * Always returns 0, but we return an int to keep this symmetric with the
3719 * IPv6 counter-part.
3722 nce_add_v4_postprocess(nce_t
*nce
)
3724 ncec_t
*ncec
= nce
->nce_common
;
3725 uint16_t flags
= ncec
->ncec_flags
;
3726 boolean_t ndp_need_dad
= B_FALSE
;
3729 ip_stack_t
*ipst
= ncec
->ncec_ill
->ill_ipst
;
3730 uchar_t
*hw_addr
= ncec
->ncec_lladdr
;
3731 boolean_t trigger_fastpath
= B_TRUE
;
3734 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
3735 * we call nce_fastpath as soon as the ncec is resolved in nce_process.
3736 * We call nce_fastpath from nce_update if the link layer address of
3737 * the peer changes from nce_update
3739 if (NCE_PUBLISH(ncec
) || !NCE_ISREACHABLE(ncec
) || (hw_addr
== NULL
&&
3740 ncec
->ncec_ill
->ill_net_type
!= IRE_IF_NORESOLVER
))
3741 trigger_fastpath
= B_FALSE
;
3743 if (trigger_fastpath
)
3744 nce_fastpath_trigger(nce
);
3746 if (NCE_PUBLISH(ncec
) && ncec
->ncec_state
== ND_PROBE
) {
3748 * Either the caller (by passing in ND_PROBE)
3749 * or nce_add_common() (by the internally computed state
3750 * based on ncec_addr and ill_net_type) has determined
3751 * that this unicast entry needs DAD. Trigger DAD.
3753 ndp_need_dad
= B_TRUE
;
3754 } else if (flags
& NCE_F_UNSOL_ADV
) {
3756 * We account for the transmit below by assigning one
3757 * less than the ndd variable. Subsequent decrements
3758 * are done in nce_timer.
3760 mutex_enter(&ncec
->ncec_lock
);
3761 ncec
->ncec_unsolicit_count
=
3762 ipst
->ips_ip_arp_publish_count
- 1;
3763 mutex_exit(&ncec
->ncec_lock
);
3764 dropped
= arp_announce(ncec
);
3765 mutex_enter(&ncec
->ncec_lock
);
3767 ncec
->ncec_unsolicit_count
++;
3769 ncec
->ncec_last_time_defended
= ddi_get_lbolt();
3770 if (ncec
->ncec_unsolicit_count
!= 0) {
3771 nce_start_timer(ncec
,
3772 ipst
->ips_ip_arp_publish_interval
);
3774 mutex_exit(&ncec
->ncec_lock
);
3778 * If ncec_xmit_interval is 0, user has configured us to send the first
3779 * probe right away. Do so, and set up for the subsequent probes.
3782 mutex_enter(&ncec
->ncec_lock
);
3783 if (ncec
->ncec_pcnt
== 0) {
3785 * DAD probes and announce can be
3786 * administratively disabled by setting the
3787 * probe_count to zero. Restart the timer in
3788 * this case to mark the ipif as ready.
3790 ncec
->ncec_unsolicit_count
= 0;
3791 mutex_exit(&ncec
->ncec_lock
);
3792 nce_restart_timer(ncec
, 0);
3794 mutex_exit(&ncec
->ncec_lock
);
3795 delay
= ((ncec
->ncec_flags
& NCE_F_FAST
) ?
3796 ipst
->ips_arp_probe_delay
:
3797 ipst
->ips_arp_fastprobe_delay
);
3798 nce_dad(ncec
, NULL
, (delay
== 0 ? B_TRUE
: B_FALSE
));
3805 * ncec_walk routine to update all entries that have a given destination or
3806 * gateway address and cached link layer (MAC) address. This is used when ARP
3807 * informs us that a network-to-link-layer mapping may have changed.
3810 nce_update_hw_changed(ncec_t
*ncec
, void *arg
)
3812 nce_hw_map_t
*hwm
= arg
;
3815 if (ncec
->ncec_state
!= ND_REACHABLE
)
3818 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, ncec_addr
);
3819 if (ncec_addr
!= hwm
->hwm_addr
)
3822 mutex_enter(&ncec
->ncec_lock
);
3823 if (hwm
->hwm_flags
!= 0)
3824 ncec
->ncec_flags
= hwm
->hwm_flags
;
3825 nce_update(ncec
, ND_STALE
, hwm
->hwm_hwaddr
);
3826 mutex_exit(&ncec
->ncec_lock
);
3830 ncec_refhold(ncec_t
*ncec
)
3832 mutex_enter(&(ncec
)->ncec_lock
);
3833 (ncec
)->ncec_refcnt
++;
3834 ASSERT((ncec
)->ncec_refcnt
!= 0);
3836 ncec_trace_ref(ncec
);
3838 mutex_exit(&(ncec
)->ncec_lock
);
3842 ncec_refhold_notr(ncec_t
*ncec
)
3844 mutex_enter(&(ncec
)->ncec_lock
);
3845 (ncec
)->ncec_refcnt
++;
3846 ASSERT((ncec
)->ncec_refcnt
!= 0);
3847 mutex_exit(&(ncec
)->ncec_lock
);
3851 ncec_refhold_locked(ncec_t
*ncec
)
3853 ASSERT(MUTEX_HELD(&(ncec
)->ncec_lock
));
3854 (ncec
)->ncec_refcnt
++;
3856 ncec_trace_ref(ncec
);
3860 /* ncec_inactive destroys the mutex thus no mutex_exit is needed */
3862 ncec_refrele(ncec_t
*ncec
)
3864 mutex_enter(&(ncec
)->ncec_lock
);
3866 ncec_untrace_ref(ncec
);
3868 ASSERT((ncec
)->ncec_refcnt
!= 0);
3869 if (--(ncec
)->ncec_refcnt
== 0) {
3870 ncec_inactive(ncec
);
3872 mutex_exit(&(ncec
)->ncec_lock
);
3877 ncec_refrele_notr(ncec_t
*ncec
)
3879 mutex_enter(&(ncec
)->ncec_lock
);
3880 ASSERT((ncec
)->ncec_refcnt
!= 0);
3881 if (--(ncec
)->ncec_refcnt
== 0) {
3882 ncec_inactive(ncec
);
3884 mutex_exit(&(ncec
)->ncec_lock
);
3889 * Common to IPv4 and IPv6.
3892 nce_restart_timer(ncec_t
*ncec
, uint_t ms
)
3896 ASSERT(!MUTEX_HELD(&(ncec
)->ncec_lock
));
3898 /* First cancel any running timer */
3899 mutex_enter(&ncec
->ncec_lock
);
3900 tid
= ncec
->ncec_timeout_id
;
3901 ncec
->ncec_timeout_id
= 0;
3903 mutex_exit(&ncec
->ncec_lock
);
3904 (void) untimeout(tid
);
3905 mutex_enter(&ncec
->ncec_lock
);
3909 nce_start_timer(ncec
, ms
);
3910 mutex_exit(&ncec
->ncec_lock
);
3914 nce_start_timer(ncec_t
*ncec
, uint_t ms
)
3916 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
3918 * Don't start the timer if the ncec has been deleted, or if the timer
3919 * is already running
3921 if (!NCE_ISCONDEMNED(ncec
) && ncec
->ncec_timeout_id
== 0) {
3922 ncec
->ncec_timeout_id
= timeout(nce_timer
, ncec
,
3923 MSEC_TO_TICK(ms
) == 0 ? 1 : MSEC_TO_TICK(ms
));
3928 nce_set_multicast_v4(ill_t
*ill
, const in_addr_t
*dst
,
3929 uint16_t flags
, nce_t
**newnce
)
3933 ip_stack_t
*ipst
= ill
->ill_ipst
;
3937 ASSERT(!ill
->ill_isv6
);
3939 IN6_IPADDR_TO_V4MAPPED(*dst
, &dst6
);
3940 mutex_enter(&ipst
->ips_ndp4
->ndp_g_lock
);
3941 if ((nce
= nce_lookup_addr(ill
, &dst6
)) != NULL
) {
3942 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3945 if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
3947 * For IRE_IF_RESOLVER a hardware mapping can be
3948 * generated, for IRE_IF_NORESOLVER, resolution cookie
3949 * in the ill is copied in nce_add_v4().
3951 hw_addr
= kmem_alloc(ill
->ill_phys_addr_length
, KM_NOSLEEP
);
3952 if (hw_addr
== NULL
) {
3953 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3956 ip_mcast_mapping(ill
, (uchar_t
*)dst
, hw_addr
);
3959 * IRE_IF_NORESOLVER type simply copies the resolution
3960 * cookie passed in. So no hw_addr is needed.
3964 ASSERT(flags
& NCE_F_MCAST
);
3965 ASSERT(flags
& NCE_F_NONUD
);
3966 /* nce_state will be computed by nce_add_common() */
3967 err
= nce_add_v4(ill
, hw_addr
, ill
->ill_phys_addr_length
, dst
, flags
,
3968 ND_UNCHANGED
, &nce
);
3969 mutex_exit(&ipst
->ips_ndp4
->ndp_g_lock
);
3971 err
= nce_add_v4_postprocess(nce
);
3972 if (hw_addr
!= NULL
)
3973 kmem_free(hw_addr
, ill
->ill_phys_addr_length
);
3975 ip1dbg(("nce_set_multicast_v4: create failed" "%d\n", err
));
3987 * This is used when scanning for "old" (least recently broadcast) NCEs. We
3988 * don't want to have to walk the list for every single one, so we gather up
3989 * batches at a time.
3991 #define NCE_RESCHED_LIST_LEN 8
3996 ncec_t
*ncert_nces
[NCE_RESCHED_LIST_LEN
];
4000 * Pick the longest waiting NCEs for defense.
4004 ncec_reschedule(ill_t
*ill
, nce_t
*nce
, void *arg
)
4006 nce_resched_t
*ncert
= arg
;
4010 ncec_t
*ncec
= nce
->nce_common
;
4012 ASSERT(ncec
->ncec_ill
== ncert
->ncert_ill
);
4014 * Only reachable entries that are ready for announcement are eligible.
4016 if (!NCE_MYADDR(ncec
) || ncec
->ncec_state
!= ND_REACHABLE
)
4018 if (ncert
->ncert_num
< NCE_RESCHED_LIST_LEN
) {
4020 ncert
->ncert_nces
[ncert
->ncert_num
++] = ncec
;
4022 ncecs
= ncert
->ncert_nces
;
4023 ncec_max
= ncecs
+ NCE_RESCHED_LIST_LEN
;
4025 for (; ncecs
< ncec_max
; ncecs
++) {
4026 ASSERT(ncec
!= NULL
);
4027 if ((*ncecs
)->ncec_last_time_defended
>
4028 ncec
->ncec_last_time_defended
) {
4040 * Reschedule the ARP defense of any long-waiting NCEs. It's assumed that this
4041 * doesn't happen very often (if at all), and thus it needn't be highly
4042 * optimized. (Note, though, that it's actually O(N) complexity, because the
4043 * outer loop is bounded by a constant rather than by the length of the list.)
4046 nce_ill_reschedule(ill_t
*ill
, nce_resched_t
*ncert
)
4049 ip_stack_t
*ipst
= ill
->ill_ipst
;
4050 uint_t i
, defend_rate
;
4052 i
= ill
->ill_defend_count
;
4053 ill
->ill_defend_count
= 0;
4055 defend_rate
= ipst
->ips_ndp_defend_rate
;
4057 defend_rate
= ipst
->ips_arp_defend_rate
;
4058 /* If none could be sitting around, then don't reschedule */
4059 if (i
< defend_rate
) {
4060 DTRACE_PROBE1(reschedule_none
, ill_t
*, ill
);
4063 ncert
->ncert_ill
= ill
;
4064 while (ill
->ill_defend_count
< defend_rate
) {
4065 nce_walk_common(ill
, ncec_reschedule
, ncert
);
4066 for (i
= 0; i
< ncert
->ncert_num
; i
++) {
4068 ncec
= ncert
->ncert_nces
[i
];
4069 mutex_enter(&ncec
->ncec_lock
);
4070 ncec
->ncec_flags
|= NCE_F_DELAYED
;
4071 mutex_exit(&ncec
->ncec_lock
);
4073 * we plan to schedule this ncec, so incr the
4074 * defend_count in anticipation.
4076 if (++ill
->ill_defend_count
>= defend_rate
)
4079 if (ncert
->ncert_num
< NCE_RESCHED_LIST_LEN
)
4085 * Check if the current rate-limiting parameters permit the sending
4086 * of another address defense announcement for both IPv4 and IPv6.
4087 * Returns B_TRUE if rate-limiting is in effect (i.e., send is not
4088 * permitted), and B_FALSE otherwise. The `defend_rate' parameter
4089 * determines how many address defense announcements are permitted
4090 * in any `defense_perio' interval.
4093 ill_defend_rate_limit(ill_t
*ill
, ncec_t
*ncec
)
4095 clock_t now
= ddi_get_lbolt();
4096 ip_stack_t
*ipst
= ill
->ill_ipst
;
4097 clock_t start
= ill
->ill_defend_start
;
4098 uint32_t elapsed
, defend_period
, defend_rate
;
4099 nce_resched_t ncert
;
4103 if (ill
->ill_isv6
) {
4104 defend_period
= ipst
->ips_ndp_defend_period
;
4105 defend_rate
= ipst
->ips_ndp_defend_rate
;
4107 defend_period
= ipst
->ips_arp_defend_period
;
4108 defend_rate
= ipst
->ips_arp_defend_rate
;
4110 if (defend_rate
== 0)
4112 bzero(&ncert
, sizeof (ncert
));
4113 mutex_enter(&ill
->ill_lock
);
4115 elapsed
= now
- start
;
4116 if (elapsed
> SEC_TO_TICK(defend_period
)) {
4117 ill
->ill_defend_start
= now
;
4119 * nce_ill_reschedule will attempt to
4120 * prevent starvation by reschduling the
4121 * oldest entries, which are marked with
4122 * the NCE_F_DELAYED flag.
4124 nce_ill_reschedule(ill
, &ncert
);
4127 ill
->ill_defend_start
= now
;
4129 ASSERT(ill
->ill_defend_count
<= defend_rate
);
4130 mutex_enter(&ncec
->ncec_lock
);
4131 if (ncec
->ncec_flags
& NCE_F_DELAYED
) {
4133 * This ncec was rescheduled as one of the really old
4134 * entries needing on-going defense. The
4135 * ill_defend_count was already incremented in
4136 * nce_ill_reschedule. Go ahead and send the announce.
4138 ncec
->ncec_flags
&= ~NCE_F_DELAYED
;
4139 mutex_exit(&ncec
->ncec_lock
);
4143 mutex_exit(&ncec
->ncec_lock
);
4144 if (ill
->ill_defend_count
< defend_rate
)
4145 ill
->ill_defend_count
++;
4146 if (ill
->ill_defend_count
== defend_rate
) {
4148 * we are no longer allowed to send unbidden defense
4149 * messages. Wait for rescheduling.
4156 mutex_exit(&ill
->ill_lock
);
4158 * After all the locks have been dropped we can restart nce timer,
4159 * and refrele the delayed ncecs
4161 for (i
= 0; i
< ncert
.ncert_num
; i
++) {
4162 clock_t xmit_interval
;
4165 tmp
= ncert
.ncert_nces
[i
];
4166 xmit_interval
= nce_fuzz_interval(tmp
->ncec_xmit_interval
,
4168 nce_restart_timer(tmp
, xmit_interval
);
4175 ndp_announce(ncec_t
*ncec
)
4177 return (ndp_xmit(ncec
->ncec_ill
, ND_NEIGHBOR_ADVERT
, ncec
->ncec_lladdr
,
4178 ncec
->ncec_lladdr_length
, &ncec
->ncec_addr
, &ipv6_all_hosts_mcast
,
4179 nce_advert_flags(ncec
)));
4183 nce_resolve_src(ncec_t
*ncec
, in6_addr_t
*src
)
4188 ill_t
*ill
= ncec
->ncec_ill
;
4189 ill_t
*src_ill
= NULL
;
4190 ipif_t
*ipif
= NULL
;
4191 boolean_t is_myaddr
= NCE_MYADDR(ncec
);
4192 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
4194 ASSERT(src
!= NULL
);
4195 ASSERT(IN6_IS_ADDR_UNSPECIFIED(src
));
4198 src6
= ncec
->ncec_addr
;
4200 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, src4
);
4203 * try to find one from the outgoing packet.
4205 mutex_enter(&ncec
->ncec_lock
);
4206 mp
= ncec
->ncec_qd_mp
;
4209 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
4211 src6
= ip6h
->ip6_src
;
4213 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
4215 src4
= ipha
->ipha_src
;
4216 IN6_IPADDR_TO_V4MAPPED(src4
, &src6
);
4219 mutex_exit(&ncec
->ncec_lock
);
4223 * For outgoing packets, if the src of outgoing packet is one
4224 * of the assigned interface addresses use it, otherwise we
4225 * will pick the source address below.
4226 * For local addresses (is_myaddr) doing DAD, NDP announce
4227 * messages are mcast. So we use the (IPMP) cast_ill or the
4228 * (non-IPMP) ncec_ill for these message types. The only case
4229 * of unicast DAD messages are for IPv6 ND probes, for which
4230 * we find the ipif_bound_ill corresponding to the ncec_addr.
4232 if (!IN6_IS_ADDR_UNSPECIFIED(&src6
) || is_myaddr
) {
4234 ipif
= ipif_lookup_addr_nondup_v6(&src6
, ill
, ALL_ZONES
,
4237 ipif
= ipif_lookup_addr_nondup(src4
, ill
, ALL_ZONES
,
4242 * If no relevant ipif can be found, then it's not one of our
4243 * addresses. Reset to :: and try to find a src for the NS or
4244 * ARP request using ipif_select_source_v[4,6] below.
4245 * If an ipif can be found, but it's not yet done with
4246 * DAD verification, and we are not being invoked for
4247 * DAD (i.e., !is_myaddr), then just postpone this
4248 * transmission until later.
4251 src6
= ipv6_all_zeros
;
4253 } else if (!ipif
->ipif_addr_ready
&& !is_myaddr
) {
4254 DTRACE_PROBE2(nce__resolve__ipif__not__ready
,
4255 ncec_t
*, ncec
, ipif_t
*, ipif
);
4261 if (IN6_IS_ADDR_UNSPECIFIED(&src6
) && !is_myaddr
) {
4263 * Pick a source address for this solicitation, but
4264 * restrict the selection to addresses assigned to the
4265 * output interface. We do this because the destination will
4266 * create a neighbor cache entry for the source address of
4267 * this packet, so the source address had better be a valid
4271 ipif
= ipif_select_source_v6(ill
, &ncec
->ncec_addr
,
4272 B_TRUE
, IPV6_PREFER_SRC_DEFAULT
, ALL_ZONES
,
4277 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
, nce_addr
);
4278 ipif
= ipif_select_source_v4(ill
, nce_addr
, ALL_ZONES
,
4281 if (ipif
== NULL
&& IS_IPMP(ill
)) {
4282 ill_t
*send_ill
= ipmp_ill_hold_xmit_ill(ill
, B_TRUE
);
4284 if (send_ill
!= NULL
) {
4286 ipif
= ipif_select_source_v6(send_ill
,
4287 &ncec
->ncec_addr
, B_TRUE
,
4288 IPV6_PREFER_SRC_DEFAULT
, ALL_ZONES
,
4291 IN6_V4MAPPED_TO_IPADDR(&ncec
->ncec_addr
,
4293 ipif
= ipif_select_source_v4(send_ill
,
4294 src4
, ALL_ZONES
, B_TRUE
, NULL
);
4296 ill_refrele(send_ill
);
4301 char buf
[INET6_ADDRSTRLEN
];
4303 ip1dbg(("nce_resolve_src: No source ipif for dst %s\n",
4304 inet_ntop((isv6
? AF_INET6
: AF_INET
),
4305 (char *)&ncec
->ncec_addr
, buf
, sizeof (buf
))));
4306 DTRACE_PROBE1(nce__resolve__no__ipif
, ncec_t
*, ncec
);
4309 src6
= ipif
->ipif_v6lcl_addr
;
4313 src_ill
= ipif
->ipif_ill
;
4314 if (IS_IPMP(src_ill
))
4315 src_ill
= ipmp_ipif_hold_bound_ill(ipif
);
4317 ill_refhold(src_ill
);
4319 DTRACE_PROBE2(nce__resolve__src__ill
, ncec_t
*, ncec
,
4326 ip_nce_lookup_and_update(ipaddr_t
*addr
, ipif_t
*ipif
, ip_stack_t
*ipst
,
4327 uchar_t
*hwaddr
, int hwaddr_len
, int flags
)
4334 ill
= (ipif
? ipif
->ipif_ill
: NULL
);
4337 * only one ncec is possible
4339 nce
= nce_lookup_v4(ill
, addr
);
4341 ncec
= nce
->nce_common
;
4342 mutex_enter(&ncec
->ncec_lock
);
4343 if (NCE_ISREACHABLE(ncec
))
4344 new_state
= ND_UNCHANGED
;
4346 new_state
= ND_STALE
;
4347 ncec
->ncec_flags
= flags
;
4348 nce_update(ncec
, new_state
, hwaddr
);
4349 mutex_exit(&ncec
->ncec_lock
);
4355 * ill is wildcard; clean up all ncec's and ire's
4356 * that match on addr.
4360 hwm
.hwm_addr
= *addr
;
4361 hwm
.hwm_hwlen
= hwaddr_len
;
4362 hwm
.hwm_hwaddr
= hwaddr
;
4363 hwm
.hwm_flags
= flags
;
4365 ncec_walk_common(ipst
->ips_ndp4
, NULL
,
4366 (pfi_t
)nce_update_hw_changed
, (uchar_t
*)&hwm
, B_TRUE
);
4371 * Common function to add ncec entries.
4372 * we always add the ncec with ncec_ill == ill, and always create
4373 * nce_t on ncec_ill. A dlpi fastpath message may be triggered if the
4374 * ncec is !reachable.
4376 * When the caller passes in an nce_state of ND_UNCHANGED,
4377 * nce_add_common() will determine the state of the created nce based
4378 * on the ill_net_type and nce_flags used. Otherwise, the nce will
4379 * be created with state set to the passed in nce_state.
4382 nce_add_common(ill_t
*ill
, uchar_t
*hw_addr
, uint_t hw_addr_len
,
4383 const in6_addr_t
*addr
, uint16_t flags
, uint16_t nce_state
, nce_t
**retnce
)
4385 static ncec_t nce_nil
;
4386 uchar_t
*template = NULL
;
4390 ip_stack_t
*ipst
= ill
->ill_ipst
;
4392 boolean_t fastprobe
= B_FALSE
;
4393 struct ndp_g_s
*ndp
;
4395 mblk_t
*dlur_mp
= NULL
;
4398 ndp
= ill
->ill_ipst
->ips_ndp6
;
4400 ndp
= ill
->ill_ipst
->ips_ndp4
;
4404 ASSERT(MUTEX_HELD(&ndp
->ndp_g_lock
));
4406 if (IN6_IS_ADDR_UNSPECIFIED(addr
)) {
4407 ip0dbg(("nce_add_common: no addr\n"));
4410 if ((flags
& ~NCE_EXTERNAL_FLAGS_MASK
)) {
4411 ip0dbg(("nce_add_common: flags = %x\n", (int)flags
));
4415 if (ill
->ill_isv6
) {
4416 ncep
= ((ncec_t
**)NCE_HASH_PTR_V6(ipst
, *addr
));
4420 IN6_V4MAPPED_TO_IPADDR(addr
, v4addr
);
4421 ncep
= ((ncec_t
**)NCE_HASH_PTR_V4(ipst
, v4addr
));
4425 * The caller has ensured that there is no nce on ill, but there could
4426 * still be an nce_common_t for the address, so that we find exisiting
4427 * ncec_t strucutures first, and atomically add a new nce_t if
4428 * one is found. The ndp_g_lock ensures that we don't cross threads
4429 * with an ncec_delete(). Unlike ncec_lookup_illgrp() we do not
4430 * compare for matches across the illgrp because this function is
4431 * called via nce_lookup_then_add_v* -> nce_add_v* -> nce_add_common,
4432 * with the nce_lookup_then_add_v* passing in the ipmp_ill where
4436 for (; ncec
!= NULL
; ncec
= ncec
->ncec_next
) {
4437 if (ncec
->ncec_ill
== ill
) {
4438 if (IN6_ARE_ADDR_EQUAL(&ncec
->ncec_addr
, addr
)) {
4440 * We should never find *retnce to be
4441 * MYADDR, since the caller may then
4442 * incorrectly restart a DAD timer that's
4443 * already running. However, if we are in
4444 * forwarding mode, and the interface is
4445 * moving in/out of groups, the data
4446 * path ire lookup (e.g., ire_revalidate_nce)
4447 * may have determined that some destination
4448 * is offlink while the control path is adding
4449 * that address as a local address.
4450 * Recover from this case by failing the
4453 if (NCE_MYADDR(ncec
))
4455 *retnce
= nce_ill_lookup_then_add(ill
, ncec
);
4456 if (*retnce
!= NULL
)
4461 if (*retnce
!= NULL
) /* caller must trigger fastpath on nce */
4464 ncec
= kmem_cache_alloc(ncec_cache
, KM_NOSLEEP
);
4468 ncec
->ncec_ill
= ill
;
4469 ncec
->ncec_ipversion
= (ill
->ill_isv6
? IPV6_VERSION
: IPV4_VERSION
);
4470 ncec
->ncec_flags
= flags
;
4471 ncec
->ncec_ipst
= ipst
; /* No netstack_hold */
4473 if (!ill
->ill_isv6
) {
4477 * DAD probe interval and probe count are set based on
4478 * fast/slow probe settings. If the underlying link doesn't
4479 * have reliably up/down notifications or if we're working
4480 * with IPv4 169.254.0.0/16 Link Local Address space, then
4481 * don't use the fast timers. Otherwise, use them.
4483 ASSERT(IN6_IS_ADDR_V4MAPPED(addr
));
4484 IN6_V4MAPPED_TO_IPADDR(addr
, addr4
);
4485 if (ill
->ill_note_link
&& !IS_IPV4_LL_SPACE(&addr4
)) {
4487 } else if (IS_IPMP(ill
) && NCE_PUBLISH(ncec
) &&
4488 !IS_IPV4_LL_SPACE(&addr4
)) {
4491 hwaddr_ill
= ipmp_illgrp_find_ill(ill
->ill_grp
, hw_addr
,
4493 if (hwaddr_ill
!= NULL
&& hwaddr_ill
->ill_note_link
)
4497 ncec
->ncec_xmit_interval
=
4498 ipst
->ips_arp_fastprobe_interval
;
4500 ipst
->ips_arp_fastprobe_count
;
4501 ncec
->ncec_flags
|= NCE_F_FAST
;
4503 ncec
->ncec_xmit_interval
=
4504 ipst
->ips_arp_probe_interval
;
4506 ipst
->ips_arp_probe_count
;
4508 if (NCE_PUBLISH(ncec
)) {
4509 ncec
->ncec_unsolicit_count
=
4510 ipst
->ips_ip_arp_publish_count
;
4514 * probe interval is constant: ILL_PROBE_INTERVAL
4515 * probe count is constant: ND_MAX_UNICAST_SOLICIT
4517 ncec
->ncec_pcnt
= ND_MAX_UNICAST_SOLICIT
;
4518 if (NCE_PUBLISH(ncec
)) {
4519 ncec
->ncec_unsolicit_count
=
4520 ipst
->ips_ip_ndp_unsolicit_count
;
4523 ncec
->ncec_rcnt
= ill
->ill_xmit_count
;
4524 ncec
->ncec_addr
= *addr
;
4525 ncec
->ncec_qd_mp
= NULL
;
4526 ncec
->ncec_refcnt
= 1; /* for ncec getting created */
4527 mutex_init(&ncec
->ncec_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
4528 ncec
->ncec_trace_disable
= B_FALSE
;
4531 * ncec_lladdr holds link layer address
4533 if (hw_addr_len
> 0) {
4534 template = kmem_alloc(hw_addr_len
, KM_NOSLEEP
);
4535 if (template == NULL
) {
4539 ncec
->ncec_lladdr
= template;
4540 ncec
->ncec_lladdr_length
= hw_addr_len
;
4541 bzero(ncec
->ncec_lladdr
, hw_addr_len
);
4543 if ((flags
& NCE_F_BCAST
) != 0) {
4544 state
= ND_REACHABLE
;
4545 ASSERT(hw_addr_len
> 0);
4546 } else if (ill
->ill_net_type
== IRE_IF_RESOLVER
) {
4548 } else if (ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4550 * NORESOLVER entries are always created in the REACHABLE
4553 state
= ND_REACHABLE
;
4554 if (ill
->ill_phys_addr_length
== IP_ADDR_LEN
&&
4555 ill
->ill_mactype
!= DL_IPV4
&&
4556 ill
->ill_mactype
!= DL_6TO4
) {
4558 * We create a nce_res_mp with the IP nexthop address
4559 * as the destination address if the physical length
4560 * is exactly 4 bytes for point-to-multipoint links
4561 * that do their own resolution from IP to link-layer
4562 * address (e.g. IP over X.25).
4564 bcopy((uchar_t
*)addr
,
4565 ncec
->ncec_lladdr
, ill
->ill_phys_addr_length
);
4567 if (ill
->ill_phys_addr_length
== IPV6_ADDR_LEN
&&
4568 ill
->ill_mactype
!= DL_IPV6
) {
4570 * We create a nce_res_mp with the IP nexthop address
4571 * as the destination address if the physical legnth
4572 * is exactly 16 bytes for point-to-multipoint links
4573 * that do their own resolution from IP to link-layer
4576 bcopy((uchar_t
*)addr
,
4577 ncec
->ncec_lladdr
, ill
->ill_phys_addr_length
);
4580 * Since NUD is not part of the base IPv4 protocol definition,
4581 * IPv4 neighbor entries on NORESOLVER interfaces will never
4582 * age, and are marked NCE_F_NONUD.
4585 ncec
->ncec_flags
|= NCE_F_NONUD
;
4586 } else if (ill
->ill_net_type
== IRE_LOOPBACK
) {
4587 state
= ND_REACHABLE
;
4590 if (hw_addr
!= NULL
|| ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4592 * We are adding an ncec with a deterministic hw_addr,
4593 * so the state can only be one of {REACHABLE, STALE, PROBE}.
4595 * if we are adding a unicast ncec for the local address
4596 * it would be REACHABLE; we would be adding a ND_STALE entry
4597 * for the requestor of an ARP_REQUEST/ND_SOLICIT. Our own
4598 * addresses are added in PROBE to trigger DAD.
4600 if ((flags
& (NCE_F_MCAST
|NCE_F_BCAST
)) ||
4601 ill
->ill_net_type
== IRE_IF_NORESOLVER
)
4602 state
= ND_REACHABLE
;
4603 else if (!NCE_PUBLISH(ncec
))
4607 if (hw_addr
!= NULL
)
4608 nce_set_ll(ncec
, hw_addr
);
4610 /* caller overrides internally computed state */
4611 if (nce_state
!= ND_UNCHANGED
)
4614 if (state
== ND_PROBE
)
4615 ncec
->ncec_flags
|= NCE_F_UNVERIFIED
;
4617 ncec
->ncec_state
= state
;
4619 if (state
== ND_REACHABLE
) {
4620 ncec
->ncec_last
= ncec
->ncec_init_time
=
4621 TICK_TO_MSEC(ddi_get_lbolt64());
4623 ncec
->ncec_last
= 0;
4624 if (state
== ND_INITIAL
)
4625 ncec
->ncec_init_time
= TICK_TO_MSEC(ddi_get_lbolt64());
4627 list_create(&ncec
->ncec_cb
, sizeof (ncec_cb_t
),
4628 offsetof(ncec_cb_t
, ncec_cb_node
));
4630 * have all the memory allocations out of the way before taking locks
4631 * and adding the nce.
4633 nce
= kmem_cache_alloc(nce_cache
, KM_NOSLEEP
);
4638 if (ncec
->ncec_lladdr
!= NULL
||
4639 ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4640 dlur_mp
= ill_dlur_gen(ncec
->ncec_lladdr
,
4641 ill
->ill_phys_addr_length
, ill
->ill_sap
,
4642 ill
->ill_sap_length
);
4643 if (dlur_mp
== NULL
) {
4650 * Atomically ensure that the ill is not CONDEMNED, before
4653 mutex_enter(&ill
->ill_lock
);
4654 if (ill
->ill_state_flags
& ILL_CONDEMNED
) {
4655 mutex_exit(&ill
->ill_lock
);
4659 if (!NCE_MYADDR(ncec
) &&
4660 (ill
->ill_state_flags
& ILL_DOWN_IN_PROGRESS
)) {
4661 mutex_exit(&ill
->ill_lock
);
4662 DTRACE_PROBE1(nce__add__on__down__ill
, ncec_t
*, ncec
);
4667 * Acquire the ncec_lock even before adding the ncec to the list
4668 * so that it cannot get deleted after the ncec is added, but
4669 * before we add the nce.
4671 mutex_enter(&ncec
->ncec_lock
);
4672 if ((ncec
->ncec_next
= *ncep
) != NULL
)
4673 ncec
->ncec_next
->ncec_ptpn
= &ncec
->ncec_next
;
4675 ncec
->ncec_ptpn
= ncep
;
4677 /* Bump up the number of ncec's referencing this ill */
4678 DTRACE_PROBE3(ill__incr__cnt
, (ill_t
*), ill
,
4679 (char *), "ncec", (void *), ncec
);
4680 ill
->ill_ncec_cnt
++;
4682 * Since we hold the ncec_lock at this time, the ncec cannot be
4683 * condemned, and we can safely add the nce.
4685 *retnce
= nce_add_impl(ill
, ncec
, nce
, dlur_mp
);
4686 mutex_exit(&ncec
->ncec_lock
);
4687 mutex_exit(&ill
->ill_lock
);
4689 /* caller must trigger fastpath on *retnce */
4694 kmem_cache_free(ncec_cache
, ncec
);
4696 kmem_cache_free(nce_cache
, nce
);
4698 if (template != NULL
)
4699 kmem_free(template, ill
->ill_phys_addr_length
);
4704 * take a ref on the nce
4707 nce_refhold(nce_t
*nce
)
4709 mutex_enter(&nce
->nce_lock
);
4711 ASSERT((nce
)->nce_refcnt
!= 0);
4712 mutex_exit(&nce
->nce_lock
);
4716 * release a ref on the nce; In general, this
4717 * cannot be called with locks held because nce_inactive
4718 * may result in nce_inactive which will take the ill_lock,
4719 * do ipif_ill_refrele_tail etc. Thus the one exception
4720 * where this can be called with locks held is when the caller
4721 * is certain that the nce_refcnt is sufficient to prevent
4722 * the invocation of nce_inactive.
4725 nce_refrele(nce_t
*nce
)
4727 ASSERT((nce
)->nce_refcnt
!= 0);
4728 mutex_enter(&nce
->nce_lock
);
4729 if (--nce
->nce_refcnt
== 0)
4730 nce_inactive(nce
); /* destroys the mutex */
4732 mutex_exit(&nce
->nce_lock
);
4736 * free the nce after all refs have gone away.
4739 nce_inactive(nce_t
*nce
)
4741 ill_t
*ill
= nce
->nce_ill
;
4743 ASSERT(nce
->nce_refcnt
== 0);
4745 ncec_refrele_notr(nce
->nce_common
);
4746 nce
->nce_common
= NULL
;
4747 freemsg(nce
->nce_fp_mp
);
4748 freemsg(nce
->nce_dlur_mp
);
4750 mutex_enter(&ill
->ill_lock
);
4751 DTRACE_PROBE3(ill__decr__cnt
, (ill_t
*), ill
,
4752 (char *), "nce", (void *), nce
);
4754 nce
->nce_ill
= NULL
;
4756 * If the number of ncec's associated with this ill have dropped
4757 * to zero, check whether we need to restart any operation that
4758 * is waiting for this to happen.
4760 if (ILL_DOWN_OK(ill
)) {
4761 /* ipif_ill_refrele_tail drops the ill_lock */
4762 ipif_ill_refrele_tail(ill
);
4764 mutex_exit(&ill
->ill_lock
);
4767 mutex_destroy(&nce
->nce_lock
);
4768 kmem_cache_free(nce_cache
, nce
);
4772 * Add an nce to the ill_nce list.
4775 nce_add_impl(ill_t
*ill
, ncec_t
*ncec
, nce_t
*nce
, mblk_t
*dlur_mp
)
4777 bzero(nce
, sizeof (*nce
));
4778 mutex_init(&nce
->nce_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
4779 nce
->nce_common
= ncec
;
4780 nce
->nce_addr
= ncec
->ncec_addr
;
4782 DTRACE_PROBE3(ill__incr__cnt
, (ill_t
*), ill
,
4783 (char *), "nce", (void *), nce
);
4786 nce
->nce_refcnt
= 1; /* for the thread */
4787 ncec
->ncec_refcnt
++; /* want ncec_refhold_locked_notr(ncec) */
4788 nce
->nce_dlur_mp
= dlur_mp
;
4790 /* add nce to the ill's fastpath list. */
4791 nce
->nce_refcnt
++; /* for the list */
4792 list_insert_head(&ill
->ill_nce
, nce
);
4797 nce_add(ill_t
*ill
, ncec_t
*ncec
)
4800 mblk_t
*dlur_mp
= NULL
;
4802 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
4803 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
4805 nce
= kmem_cache_alloc(nce_cache
, KM_NOSLEEP
);
4808 if (ncec
->ncec_lladdr
!= NULL
||
4809 ill
->ill_net_type
== IRE_IF_NORESOLVER
) {
4810 dlur_mp
= ill_dlur_gen(ncec
->ncec_lladdr
,
4811 ill
->ill_phys_addr_length
, ill
->ill_sap
,
4812 ill
->ill_sap_length
);
4813 if (dlur_mp
== NULL
) {
4814 kmem_cache_free(nce_cache
, nce
);
4818 return (nce_add_impl(ill
, ncec
, nce
, dlur_mp
));
4822 * remove the nce from the ill_faspath list
4825 nce_delete(nce_t
*nce
)
4827 ill_t
*ill
= nce
->nce_ill
;
4829 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
4831 mutex_enter(&nce
->nce_lock
);
4832 if (nce
->nce_is_condemned
) {
4834 * some other thread has removed this nce from the ill_nce list
4836 mutex_exit(&nce
->nce_lock
);
4839 nce
->nce_is_condemned
= B_TRUE
;
4840 mutex_exit(&nce
->nce_lock
);
4842 list_remove(&ill
->ill_nce
, nce
);
4844 * even though we are holding the ill_lock, it is ok to
4845 * call nce_refrele here because we know that we should have
4846 * at least 2 refs on the nce: one for the thread, and one
4847 * for the list. The refrele below will release the one for
4854 nce_lookup(ill_t
*ill
, const in6_addr_t
*addr
)
4858 ASSERT(ill
!= NULL
);
4859 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
4861 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
;
4862 nce
= list_next(&ill
->ill_nce
, nce
)) {
4863 if (IN6_ARE_ADDR_EQUAL(&nce
->nce_addr
, addr
))
4868 * if we found the nce on the ill_nce list while holding
4869 * the ill_lock, then it cannot be condemned yet.
4872 ASSERT(!nce
->nce_is_condemned
);
4879 * Walk the ill_nce list on ill. The callback function func() cannot perform
4880 * any destructive actions.
4883 nce_walk_common(ill_t
*ill
, pfi_t func
, void *arg
)
4885 nce_t
*nce
= NULL
, *nce_next
;
4887 ASSERT(MUTEX_HELD(&ill
->ill_lock
));
4888 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
; ) {
4889 nce_next
= list_next(&ill
->ill_nce
, nce
);
4890 if (func(ill
, nce
, arg
) != 0)
4897 nce_walk(ill_t
*ill
, pfi_t func
, void *arg
)
4899 mutex_enter(&ill
->ill_lock
);
4900 nce_walk_common(ill
, func
, arg
);
4901 mutex_exit(&ill
->ill_lock
);
4905 nce_flush(ill_t
*ill
, boolean_t flushall
)
4907 nce_t
*nce
, *nce_next
;
4910 list_create(&dead
, sizeof (nce_t
), offsetof(nce_t
, nce_node
));
4911 mutex_enter(&ill
->ill_lock
);
4912 for (nce
= list_head(&ill
->ill_nce
); nce
!= NULL
; ) {
4913 nce_next
= list_next(&ill
->ill_nce
, nce
);
4914 if (!flushall
&& NCE_PUBLISH(nce
->nce_common
)) {
4919 * nce_delete requires that the caller should either not
4920 * be holding locks, or should hold a ref to ensure that
4921 * we wont hit ncec_inactive. So take a ref and clean up
4922 * after the list is flushed.
4926 list_insert_tail(&dead
, nce
);
4929 mutex_exit(&ill
->ill_lock
);
4930 while ((nce
= list_head(&dead
)) != NULL
) {
4931 list_remove(&dead
, nce
);
4934 ASSERT(list_is_empty(&dead
));
4935 list_destroy(&dead
);
4938 /* Return an interval that is anywhere in the [1 .. intv] range */
4940 nce_fuzz_interval(clock_t intv
, boolean_t initial_time
)
4944 (void) random_get_pseudo_bytes((uint8_t *)&rnd
, sizeof (rnd
));
4945 /* Note that clock_t is signed; must chop off bits */
4946 rnd
&= (1ul << (NBBY
* sizeof (rnd
) - 1)) - 1;
4951 intv
= (rnd
% intv
) + 1;
4953 /* Compute 'frac' as 20% of the configured interval */
4954 if ((frac
= intv
/ 5) <= 1)
4956 /* Set intv randomly in the range [intv-frac .. intv+frac] */
4957 if ((intv
= intv
- frac
+ rnd
% (2 * frac
+ 1)) <= 0)
4964 nce_resolv_ipmp_ok(ncec_t
*ncec
)
4968 iaflags_t ixaflags
= IXAF_NO_TRACE
;
4970 ill_t
*ill
= ncec
->ncec_ill
;
4971 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
4972 ipif_t
*src_ipif
= NULL
;
4973 ip_stack_t
*ipst
= ill
->ill_ipst
;
4977 ASSERT(IS_IPMP(ill
));
4979 mutex_enter(&ncec
->ncec_lock
);
4980 nprobes
= ncec
->ncec_nprobes
;
4981 mp
= ncec
->ncec_qd_mp
;
4982 ncec
->ncec_qd_mp
= NULL
;
4983 ncec
->ncec_nprobes
= 0;
4984 mutex_exit(&ncec
->ncec_lock
);
4986 while (mp
!= NULL
) {
4989 nxt_mp
= mp
->b_next
;
4992 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
4994 pkt_len
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
4995 src_ipif
= ipif_lookup_addr_nondup_v6(&ip6h
->ip6_src
,
4996 ill
, ALL_ZONES
, ipst
);
4998 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
5000 ixaflags
|= IXAF_IS_IPV4
;
5001 pkt_len
= ntohs(ipha
->ipha_length
);
5002 src_ipif
= ipif_lookup_addr_nondup(ipha
->ipha_src
,
5003 ill
, ALL_ZONES
, ipst
);
5007 * find a new nce based on an under_ill. The first IPMP probe
5008 * packet gets queued, so we could still find a src_ipif that
5009 * matches an IPMP test address.
5011 if (src_ipif
== NULL
|| IS_IPMP(src_ipif
->ipif_ill
)) {
5013 * if src_ipif is null, this could be either a
5014 * forwarded packet or a probe whose src got deleted.
5015 * We identify the former case by looking for the
5016 * ncec_nprobes: the first ncec_nprobes packets are
5019 if (src_ipif
== NULL
&& nprobes
> 0)
5023 * For forwarded packets, we use the ipmp rotor
5026 send_ill
= ipmp_ill_hold_xmit_ill(ncec
->ncec_ill
,
5029 send_ill
= src_ipif
->ipif_ill
;
5030 ill_refhold(send_ill
);
5033 DTRACE_PROBE4(nce__resolve__ipmp
, (mblk_t
*), mp
,
5034 (ncec_t
*), ncec
, (ipif_t
*),
5035 src_ipif
, (ill_t
*), send_ill
);
5037 if (send_ill
== NULL
) {
5038 if (src_ipif
!= NULL
)
5039 ipif_refrele(src_ipif
);
5042 /* create an under_nce on send_ill */
5043 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
5044 if (IS_IN_SAME_ILLGRP(send_ill
, ncec
->ncec_ill
))
5045 under_nce
= nce_fastpath_create(send_ill
, ncec
);
5048 rw_exit(&ipst
->ips_ill_g_lock
);
5049 if (under_nce
!= NULL
&& NCE_ISREACHABLE(ncec
))
5050 nce_fastpath_trigger(under_nce
);
5052 ill_refrele(send_ill
);
5053 if (src_ipif
!= NULL
)
5054 ipif_refrele(src_ipif
);
5056 if (under_nce
!= NULL
) {
5057 (void) ip_xmit(mp
, under_nce
, ixaflags
, pkt_len
, 0,
5058 ALL_ZONES
, 0, NULL
);
5059 nce_refrele(under_nce
);
5067 BUMP_MIB(&ipst
->ips_ip6_mib
, ipIfStatsOutDiscards
);
5069 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
5071 ip_drop_output("ipIfStatsOutDiscards - no under_ill", mp
, NULL
);
5077 ncec_cb_dispatch(ncec
); /* complete callbacks */