4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Functions to implement IP address -> link layer address (PSARC 2006/482)
30 #include <inet/ip2mac.h>
31 #include <inet/ip2mac_impl.h>
33 #include <inet/ip_ndp.h>
34 #include <inet/ip_if.h>
38 * dispatch pending callbacks.
41 ncec_cb_dispatch(ncec_t
*ncec
)
46 mutex_enter(&ncec
->ncec_lock
);
47 if (list_is_empty(&ncec
->ncec_cb
)) {
48 mutex_exit(&ncec
->ncec_lock
);
51 ncec_ip2mac_response(&ip2m
, ncec
);
52 ncec_cb_refhold_locked(ncec
);
54 * IP does not hold internal locks like nce_lock across calls to
55 * other subsystems for fear of recursive lock entry and lock
56 * hierarchy violation. The caller may be holding locks across
57 * the call to IP. (It would be ideal if no subsystem holds locks
58 * across calls into another subsystem, especially if calls can
59 * happen in either direction).
61 ncec_cb
= list_head(&ncec
->ncec_cb
);
62 for (; ncec_cb
!= NULL
; ncec_cb
= list_next(&ncec
->ncec_cb
, ncec_cb
)) {
63 if (ncec_cb
->ncec_cb_flags
& NCE_CB_DISPATCHED
)
65 ncec_cb
->ncec_cb_flags
|= NCE_CB_DISPATCHED
;
66 mutex_exit(&ncec
->ncec_lock
);
67 (*ncec_cb
->ncec_cb_func
)(&ip2m
, ncec_cb
->ncec_cb_arg
);
68 mutex_enter(&ncec
->ncec_lock
);
70 ncec_cb_refrele(ncec
);
71 mutex_exit(&ncec
->ncec_lock
);
75 * fill up the ip2m response fields with inforamation from the nce.
78 ncec_ip2mac_response(ip2mac_t
*ip2m
, ncec_t
*ncec
)
80 boolean_t isv6
= (ncec
->ncec_ipversion
== IPV6_VERSION
);
83 struct sockaddr_dl
*sdl
;
85 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
86 bzero(ip2m
, sizeof (*ip2m
));
87 if (NCE_ISREACHABLE(ncec
) && !NCE_ISCONDEMNED(ncec
))
90 ip2m
->ip2mac_err
= ESRCH
;
92 sin6
= (sin6_t
*)&ip2m
->ip2mac_pa
;
93 sin6
->sin6_family
= AF_INET6
;
94 sin6
->sin6_addr
= ncec
->ncec_addr
;
96 sin
= (sin_t
*)&ip2m
->ip2mac_pa
;
97 sin
->sin_family
= AF_INET
;
98 IN6_V4MAPPED_TO_INADDR(&ncec
->ncec_addr
, &sin
->sin_addr
);
100 if (ip2m
->ip2mac_err
== 0) {
101 sdl
= &ip2m
->ip2mac_ha
;
102 sdl
->sdl_family
= AF_LINK
;
103 sdl
->sdl_type
= ncec
->ncec_ill
->ill_type
;
105 * should we put ncec_ill->ill_name in there? why?
106 * likewise for the sdl_index
109 sdl
->sdl_alen
= ncec
->ncec_ill
->ill_phys_addr_length
;
110 if (ncec
->ncec_lladdr
!= NULL
)
111 bcopy(ncec
->ncec_lladdr
, LLADDR(sdl
), sdl
->sdl_alen
);
116 ncec_cb_refhold_locked(ncec_t
*ncec
)
118 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
119 ncec
->ncec_cb_walker_cnt
++;
123 ncec_cb_refrele(ncec_t
*ncec
)
125 ncec_cb_t
*ncec_cb
, *ncec_cb_next
= NULL
;
127 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
128 if (--ncec
->ncec_cb_walker_cnt
== 0) {
129 for (ncec_cb
= list_head(&ncec
->ncec_cb
); ncec_cb
!= NULL
;
130 ncec_cb
= ncec_cb_next
) {
132 ncec_cb_next
= list_next(&ncec
->ncec_cb
, ncec_cb
);
133 if ((ncec_cb
->ncec_cb_flags
& NCE_CB_DISPATCHED
) == 0)
135 list_remove(&ncec
->ncec_cb
, ncec_cb
);
136 kmem_free(ncec_cb
, sizeof (*ncec_cb
));
142 * add a callback to the nce, so that the callback can be invoked
143 * after address resolution succeeds/fails.
146 ncec_add_cb(ncec_t
*ncec
, ip2mac_callback_t
*cb
, void *cbarg
)
149 ip2mac_id_t ip2mid
= NULL
;
151 ASSERT(MUTEX_HELD(&ncec
->ncec_lock
));
152 if ((nce_cb
= kmem_zalloc(sizeof (*nce_cb
), KM_NOSLEEP
)) == NULL
)
154 nce_cb
->ncec_cb_func
= cb
;
155 nce_cb
->ncec_cb_arg
= cbarg
;
157 * We identify the ncec_cb_t during cancellation by the address
158 * of the nce_cb_t itself, and, as a short-cut for eliminating
159 * clear mismatches, only look in the callback list of ncec's
160 * whose address is equal to the nce_cb_id.
162 nce_cb
->ncec_cb_id
= ncec
; /* no refs! just an address */
163 list_insert_tail(&ncec
->ncec_cb
, nce_cb
);
164 ip2mid
= ncec
; /* this is the id to be used in ip2mac_cancel */
170 * Resolve an IP address to a link-layer address using the data-structures
171 * defined in PSARC 2006/482. If the current link-layer address for the
172 * IP address is not known, the state-machine for resolving the resolution
173 * will be triggered, and the callback function (*cb) will be invoked after
174 * the resolution completes.
177 ip2mac(uint_t op
, ip2mac_t
*ip2m
, ip2mac_callback_t
*cb
, void *cbarg
,
186 ip2mac_id_t ip2mid
= NULL
;
191 boolean_t need_resolve
= B_FALSE
;
193 isv6
= (ip2m
->ip2mac_pa
.ss_family
== AF_INET6
);
195 ns
= netstack_find_by_zoneid(zoneid
);
197 ip2m
->ip2mac_err
= EINVAL
;
201 * For exclusive stacks we reset the zoneid to zero
202 * since IP uses the global zoneid in the exclusive stacks.
204 if (ns
->netstack_stackid
!= GLOBAL_NETSTACKID
)
205 zoneid
= GLOBAL_ZONEID
;
206 ipst
= ns
->netstack_ip
;
208 * find the ill from the ip2m->ip2mac_ifindex
210 ill
= ill_lookup_on_ifindex(ip2m
->ip2mac_ifindex
, isv6
, ipst
);
212 ip2m
->ip2mac_err
= ENXIO
;
217 sin6
= (sin6_t
*)&ip2m
->ip2mac_pa
;
218 if (op
== IP2MAC_LOOKUP
) {
219 nce
= nce_lookup_v6(ill
, &sin6
->sin6_addr
);
221 err
= nce_lookup_then_add_v6(ill
, NULL
,
222 ill
->ill_phys_addr_length
,
223 &sin6
->sin6_addr
, 0, ND_UNCHANGED
, &nce
);
226 sin
= (sin_t
*)&ip2m
->ip2mac_pa
;
227 if (op
== IP2MAC_LOOKUP
) {
228 nce
= nce_lookup_v4(ill
, &sin
->sin_addr
.s_addr
);
230 err
= nce_lookup_then_add_v4(ill
, NULL
,
231 ill
->ill_phys_addr_length
,
232 &sin
->sin_addr
.s_addr
, 0, ND_UNCHANGED
, &nce
);
235 if (op
== IP2MAC_LOOKUP
) {
237 ip2m
->ip2mac_err
= ESRCH
;
240 ncec
= nce
->nce_common
;
241 delta
= TICK_TO_MSEC(ddi_get_lbolt64()) - ncec
->ncec_last
;
242 mutex_enter(&ncec
->ncec_lock
);
243 if (NCE_ISREACHABLE(ncec
) &&
244 delta
< (uint64_t)ill
->ill_reachable_time
) {
245 ncec_ip2mac_response(ip2m
, ncec
);
246 ip2m
->ip2mac_err
= 0;
248 ip2m
->ip2mac_err
= ESRCH
;
250 mutex_exit(&ncec
->ncec_lock
);
253 if (err
!= 0 && err
!= EEXIST
) {
254 ip2m
->ip2mac_err
= err
;
258 ncec
= nce
->nce_common
;
259 delta
= TICK_TO_MSEC(ddi_get_lbolt64()) - ncec
->ncec_last
;
260 mutex_enter(&ncec
->ncec_lock
);
261 if (NCE_ISCONDEMNED(ncec
)) {
262 ip2m
->ip2mac_err
= ESRCH
;
264 if (NCE_ISREACHABLE(ncec
)) {
265 if (NCE_MYADDR(ncec
) ||
266 delta
< (uint64_t)ill
->ill_reachable_time
) {
267 ncec_ip2mac_response(ip2m
, ncec
);
268 ip2m
->ip2mac_err
= 0;
269 mutex_exit(&ncec
->ncec_lock
);
273 * Since we do not control the packet output
274 * path for ip2mac() callers, we need to verify
275 * if the existing information in the nce is
276 * very old, and retrigger resolution if necessary.
277 * We will not return the existing stale
278 * information until it is verified through a
279 * resolver request/response exchange.
281 * In the future, we may want to support extensions
282 * that do additional callbacks on link-layer updates,
283 * so that we can return the stale information but
284 * also update the caller if the lladdr changes.
286 ncec
->ncec_rcnt
= ill
->ill_xmit_count
;
287 ncec
->ncec_state
= ND_PROBE
;
288 need_resolve
= B_TRUE
; /* reachable but very old nce */
289 } else if (ncec
->ncec_state
== ND_INITIAL
) {
290 need_resolve
= B_TRUE
; /* ND_INITIAL nce */
291 ncec
->ncec_state
= ND_INCOMPLETE
;
294 * NCE not known to be reachable in the recent past. We must
295 * reconfirm the information before returning it to the caller
297 if (ncec
->ncec_rcnt
> 0) {
299 * Still resolving this ncec, so we can queue the
300 * callback information in ncec->ncec_cb
302 ip2mid
= ncec_add_cb(ncec
, cb
, cbarg
);
303 ip2m
->ip2mac_err
= EINPROGRESS
;
306 * No more retransmits allowed -- resolution failed.
308 ip2m
->ip2mac_err
= ESRCH
;
311 mutex_exit(&ncec
->ncec_lock
);
314 * if NCE_ISREACHABLE(ncec) but very old, or if it is ND_INITIAL,
318 ip_ndp_resolve(ncec
);
327 * data passed to ncec_walk for canceling outstanding callbacks.
329 typedef struct ip2mac_cancel_data_s
{
330 ip2mac_id_t ip2m_cancel_id
;
332 } ip2mac_cancel_data_t
;
335 * callback invoked for each active ncec. If the ip2mac_id_t corresponds
336 * to an active nce_cb_t in the ncec's callback list, we want to remove
337 * the callback (if there are no walkers) or return EBUSY to the caller
340 ip2mac_cancel_callback(ncec_t
*ncec
, void *arg
)
342 ip2mac_cancel_data_t
*ip2m_wdata
= arg
;
343 ncec_cb_t
*ip2m_nce_cb
= ip2m_wdata
->ip2m_cancel_id
;
346 if (ip2m_nce_cb
->ncec_cb_id
!= ncec
)
349 mutex_enter(&ncec
->ncec_lock
);
350 if (list_is_empty(&ncec
->ncec_cb
)) {
351 mutex_exit(&ncec
->ncec_lock
);
355 * IP does not hold internal locks like nce_lock across calls to
356 * other subsystems for fear of recursive lock entry and lock
357 * hierarchy violation. The caller may be holding locks across
358 * the call to IP. (It would be ideal if no subsystem holds locks
359 * across calls into another subsystem, especially if calls can
360 * happen in either direction).
362 ncec_cb
= list_head(&ncec
->ncec_cb
);
363 for (; ncec_cb
!= NULL
; ncec_cb
= list_next(&ncec
->ncec_cb
, ncec_cb
)) {
364 if (ncec_cb
!= ip2m_nce_cb
)
367 * If there are no walkers we can remove the nce_cb.
368 * Otherwise the exiting walker will clean up.
370 if (ncec
->ncec_cb_walker_cnt
== 0) {
371 list_remove(&ncec
->ncec_cb
, ncec_cb
);
373 ip2m_wdata
->ip2m_cancel_err
= EBUSY
;
377 mutex_exit(&ncec
->ncec_lock
);
382 * cancel an outstanding timeout set up via ip2mac
385 ip2mac_cancel(ip2mac_id_t ip2mid
, zoneid_t zoneid
)
389 ip2mac_cancel_data_t ip2m_wdata
;
391 ns
= netstack_find_by_zoneid(zoneid
);
393 ip2m_wdata
.ip2m_cancel_err
= EINVAL
;
394 return (ip2m_wdata
.ip2m_cancel_err
);
397 * For exclusive stacks we reset the zoneid to zero
398 * since IP uses the global zoneid in the exclusive stacks.
400 if (ns
->netstack_stackid
!= GLOBAL_NETSTACKID
)
401 zoneid
= GLOBAL_ZONEID
;
402 ipst
= ns
->netstack_ip
;
404 ip2m_wdata
.ip2m_cancel_id
= ip2mid
;
405 ip2m_wdata
.ip2m_cancel_err
= 0;
406 ncec_walk(NULL
, ip2mac_cancel_callback
, &ip2m_wdata
, ipst
);
408 * We may return EBUSY if a walk to dispatch callbacks is
409 * in progress, in which case the caller needs to synchronize
410 * with the registered callback function to make sure the
411 * module does not exit when there is a callback pending.
414 return (ip2m_wdata
.ip2m_cancel_err
);