Merge commit '281819e5f8b19cd8627541a22d261906fd190276' into merges
[unleashed.git] / kernel / net / ip / ip_arp.c
blob683fcf577e9aefd844b0bb50134b562ac3aad23f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <inet/ip_arp.h>
27 #include <inet/ip_ndp.h>
28 #include <net/if_arp.h>
29 #include <netinet/if_ether.h>
30 #include <sys/strsubr.h>
31 #include <inet/ip6.h>
32 #include <inet/ip.h>
33 #include <inet/ip_ire.h>
34 #include <inet/ip_if.h>
35 #include <sys/dlpi.h>
36 #include <sys/sunddi.h>
37 #include <sys/strsun.h>
38 #include <sys/sdt.h>
39 #include <inet/mi.h>
40 #include <inet/arp.h>
41 #include <inet/ipdrop.h>
42 #include <sys/sockio.h>
43 #include <inet/ip_impl.h>
44 #include <sys/policy.h>
46 #define ARL_LL_ADDR_OFFSET(arl) (((arl)->arl_sap_length) < 0 ? \
47 (sizeof (dl_unitdata_req_t)) : \
48 ((sizeof (dl_unitdata_req_t)) + (ABS((arl)->arl_sap_length))))
51 * MAC-specific intelligence. Shouldn't be needed, but the DL_INFO_ACK
52 * doesn't quite do it for us.
54 typedef struct arp_m_s {
55 t_uscalar_t arp_mac_type;
56 uint32_t arp_mac_arp_hw_type;
57 t_scalar_t arp_mac_sap_length;
58 uint32_t arp_mac_hw_addr_length;
59 } arp_m_t;
61 static int arp_close(queue_t *, int);
62 static void arp_rput(queue_t *, mblk_t *);
63 static void arp_wput(queue_t *, mblk_t *);
64 static arp_m_t *arp_m_lookup(t_uscalar_t mac_type);
65 static void arp_notify(ipaddr_t, mblk_t *, uint32_t, ip_recv_attr_t *,
66 ncec_t *);
67 static int arp_output(ill_t *, uint32_t, const uchar_t *, const uchar_t *,
68 const uchar_t *, const uchar_t *, uchar_t *);
69 static int arp_modclose(arl_t *);
70 static void arp_mod_close_tail(arl_t *);
71 static mblk_t *arl_unbind(arl_t *);
72 static void arp_process_packet(ill_t *, mblk_t *);
73 static void arp_excl(ipsq_t *, queue_t *, mblk_t *, void *);
74 static void arp_drop_packet(const char *str, mblk_t *, ill_t *);
75 static int arp_open(queue_t *, dev_t *, int, int, cred_t *);
76 static int ip_sioctl_ifunitsel_arp(queue_t *, int *);
77 static int ip_sioctl_slifname_arp(queue_t *, void *);
78 static void arp_dlpi_send(arl_t *, mblk_t *);
79 static void arl_defaults_common(arl_t *, mblk_t *);
80 static int arp_modopen(queue_t *, dev_t *, int, int, cred_t *);
81 static void arp_ifname_notify(arl_t *);
82 static void arp_rput_dlpi_writer(ipsq_t *, queue_t *, mblk_t *, void *);
83 static arl_t *ill_to_arl(ill_t *);
85 #define DL_PRIM(mp) (((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
86 #define IS_DLPI_DATA(mp) \
87 ((DB_TYPE(mp) == M_PROTO) && \
88 MBLKL(mp) >= sizeof (dl_unitdata_ind_t) && \
89 (DL_PRIM(mp) == DL_UNITDATA_IND))
91 #define AR_NOTFOUND 1 /* No matching ace found in cache */
92 #define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */
93 #define AR_LOOPBACK 3 /* Our own arp packet was received */
94 #define AR_BOGON 4 /* Another host has our IP addr. */
95 #define AR_FAILED 5 /* Duplicate Address Detection has failed */
96 #define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */
98 boolean_t arp_no_defense;
100 struct module_info arp_mod_info = {
101 IP_MOD_ID, "arp", 1, INFPSZ, 65536, 1024
103 static struct qinit rinit_arp = {
104 (pfi_t)arp_rput, NULL, arp_open, arp_close, NULL, &arp_mod_info
106 static struct qinit winit_arp = {
107 (pfi_t)arp_wput, NULL, arp_open, arp_close, NULL,
108 &arp_mod_info
110 struct streamtab arpinfo = {
111 &rinit_arp, &winit_arp
113 #define ARH_FIXED_LEN 8
114 #define AR_LL_HDR_SLACK 32
117 * pfhooks for ARP.
119 #define ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, ipst) \
121 if ((_hook).he_interested) { \
122 hook_pkt_event_t info; \
124 info.hpe_protocol = ipst->ips_arp_net_data; \
125 info.hpe_ifp = _ilp; \
126 info.hpe_ofp = 0; \
127 info.hpe_hdr = _hdr; \
128 info.hpe_mp = &(_fm); \
129 info.hpe_mb = _m; \
130 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \
131 _event, (hook_data_t)&info) != 0) { \
132 if (_fm != NULL) { \
133 freemsg(_fm); \
134 _fm = NULL; \
136 _hdr = NULL; \
137 _m = NULL; \
138 } else { \
139 _hdr = info.hpe_hdr; \
140 _m = info.hpe_mb; \
144 #define ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, ipst) \
146 if ((_hook).he_interested) { \
147 hook_pkt_event_t info; \
149 info.hpe_protocol = ipst->ips_arp_net_data; \
150 info.hpe_ifp = 0; \
151 info.hpe_ofp = _olp; \
152 info.hpe_hdr = _hdr; \
153 info.hpe_mp = &(_fm); \
154 info.hpe_mb = _m; \
155 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \
156 _event, (hook_data_t)&info) != 0) { \
157 if (_fm != NULL) { \
158 freemsg(_fm); \
159 _fm = NULL; \
161 _hdr = NULL; \
162 _m = NULL; \
163 } else { \
164 _hdr = info.hpe_hdr; \
165 _m = info.hpe_mb; \
169 static arp_m_t arp_m_tbl[] = {
170 { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */
171 { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */
172 { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */
173 { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */
174 { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */
175 { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */
176 { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */
177 { DL_OTHER, ARPHRD_ETHER, -2, 6} /* unknown */
180 static void
181 arl_refhold_locked(arl_t *arl)
183 ASSERT(MUTEX_HELD(&arl->arl_lock));
184 arl->arl_refcnt++;
185 ASSERT(arl->arl_refcnt != 0);
188 static void
189 arl_refrele(arl_t *arl)
191 mutex_enter(&arl->arl_lock);
192 ASSERT(arl->arl_refcnt != 0);
193 arl->arl_refcnt--;
194 if (arl->arl_refcnt > 1) {
195 mutex_exit(&arl->arl_lock);
196 return;
199 /* ill_close or arp_unbind_complete may be waiting */
200 cv_broadcast(&arl->arl_cv);
201 mutex_exit(&arl->arl_lock);
205 * wake up any pending ip ioctls.
207 static void
208 arp_cmd_done(ill_t *ill, int err, t_uscalar_t lastprim)
210 if (lastprim == DL_UNBIND_REQ && ill->ill_replumbing)
211 arp_replumb_done(ill, 0);
212 else
213 arp_bringup_done(ill, err);
216 static int
217 ip_nce_resolve_all(ill_t *ill, uchar_t *src_haddr, uint32_t hlen,
218 const in_addr_t *src_paddr, ncec_t **sncec, int op)
220 int retv;
221 ncec_t *ncec;
222 boolean_t ll_changed;
223 uchar_t *lladdr = NULL;
224 int new_state;
226 ASSERT(ill != NULL);
228 ncec = ncec_lookup_illgrp_v4(ill, src_paddr);
229 *sncec = ncec;
231 if (ncec == NULL) {
232 retv = AR_NOTFOUND;
233 goto done;
236 mutex_enter(&ncec->ncec_lock);
238 * IP addr and hardware address match what we already
239 * have, then this is a broadcast packet emitted by one of our
240 * interfaces, reflected by the switch and received on another
241 * interface. We return AR_LOOPBACK.
243 lladdr = ncec->ncec_lladdr;
244 if (NCE_MYADDR(ncec) && hlen == ncec->ncec_ill->ill_phys_addr_length &&
245 bcmp(lladdr, src_haddr, hlen) == 0) {
246 mutex_exit(&ncec->ncec_lock);
247 retv = AR_LOOPBACK;
248 goto done;
251 * If the entry is unverified, then we've just verified that
252 * someone else already owns this address, because this is a
253 * message with the same protocol address but different
254 * hardware address.
256 if (ncec->ncec_flags & NCE_F_UNVERIFIED) {
257 mutex_exit(&ncec->ncec_lock);
258 ncec_delete(ncec);
259 ncec_refrele(ncec);
260 *sncec = NULL;
261 retv = AR_FAILED;
262 goto done;
266 * If the IP address matches ours and we're authoritative for
267 * this entry, then some other node is using our IP addr, so
268 * return AR_BOGON. Also reset the transmit count to zero so
269 * that, if we're currently in initial announcement mode, we
270 * switch back to the lazier defense mode. Knowing that
271 * there's at least one duplicate out there, we ought not
272 * blindly announce.
274 * NCE_F_AUTHORITY is set in one of two ways:
275 * 1. /sbin/arp told us so, via the "permanent" flag.
276 * 2. This is one of my addresses.
278 if (ncec->ncec_flags & NCE_F_AUTHORITY) {
279 ncec->ncec_unsolicit_count = 0;
280 mutex_exit(&ncec->ncec_lock);
281 retv = AR_BOGON;
282 goto done;
286 * No address conflict was detected, and we are getting
287 * ready to update the ncec's hwaddr. The nce MUST NOT be on an
288 * under interface, because all dynamic nce's are created on the
289 * native interface (in the non-IPMP case) or on the IPMP
290 * meta-interface (in the IPMP case)
292 ASSERT(!IS_UNDER_IPMP(ncec->ncec_ill));
295 * update ncec with src_haddr, hlen.
297 * We are trying to resolve this ncec_addr/src_paddr and we
298 * got a REQUEST/RESPONSE from the ncec_addr/src_paddr.
299 * So the new_state is at least "STALE". If, in addition,
300 * this a solicited, unicast ARP_RESPONSE, we can transition
301 * to REACHABLE.
303 new_state = ND_STALE;
304 ip1dbg(("got info for ncec %p from addr %x\n",
305 (void *)ncec, *src_paddr));
306 retv = AR_MERGED;
307 if (ncec->ncec_state == ND_INCOMPLETE ||
308 ncec->ncec_state == ND_INITIAL) {
309 ll_changed = B_TRUE;
310 } else {
311 ll_changed = nce_cmp_ll_addr(ncec, src_haddr, hlen);
312 if (!ll_changed)
313 new_state = ND_UNCHANGED;
314 else
315 retv = AR_CHANGED;
318 * We don't have the equivalent of the IPv6 'S' flag indicating
319 * a solicited response, so we assume that if we are in
320 * INCOMPLETE, or got back an unchanged lladdr in PROBE state,
321 * and this is an ARP_RESPONSE, it must be a
322 * solicited response allowing us to transtion to REACHABLE.
324 if (op == ARP_RESPONSE) {
325 switch (ncec->ncec_state) {
326 case ND_PROBE:
327 new_state = (ll_changed ? ND_STALE : ND_REACHABLE);
328 break;
329 case ND_INCOMPLETE:
330 new_state = ND_REACHABLE;
331 break;
335 * Call nce_update() to refresh fastpath information on any
336 * dependent nce_t entries.
338 nce_update(ncec, new_state, (ll_changed ? src_haddr : NULL));
339 mutex_exit(&ncec->ncec_lock);
340 nce_resolv_ok(ncec);
341 done:
342 return (retv);
345 /* Find an entry for a particular MAC type in the arp_m_tbl. */
346 static arp_m_t *
347 arp_m_lookup(t_uscalar_t mac_type)
349 arp_m_t *arm;
351 for (arm = arp_m_tbl; arm < A_END(arp_m_tbl); arm++) {
352 if (arm->arp_mac_type == mac_type)
353 return (arm);
355 return (NULL);
358 uint32_t
359 arp_hw_type(t_uscalar_t mactype)
361 arp_m_t *arm;
363 if ((arm = arp_m_lookup(mactype)) == NULL)
364 arm = arp_m_lookup(DL_OTHER);
365 return (arm->arp_mac_arp_hw_type);
369 * Called when an DLPI control message has been acked; send down the next
370 * queued message (if any).
371 * The DLPI messages of interest being bind, attach and unbind since
372 * these are the only ones sent by ARP via arp_dlpi_send.
374 static void
375 arp_dlpi_done(arl_t *arl, ill_t *ill)
377 mblk_t *mp;
378 int err;
379 t_uscalar_t prim;
381 mutex_enter(&arl->arl_lock);
382 prim = arl->arl_dlpi_pending;
384 if ((mp = arl->arl_dlpi_deferred) == NULL) {
385 arl->arl_dlpi_pending = DL_PRIM_INVAL;
386 if (arl->arl_state_flags & ARL_LL_DOWN)
387 err = ENETDOWN;
388 else
389 err = 0;
390 mutex_exit(&arl->arl_lock);
392 mutex_enter(&ill->ill_lock);
393 ill->ill_arl_dlpi_pending = 0;
394 mutex_exit(&ill->ill_lock);
395 arp_cmd_done(ill, err, prim);
396 return;
399 arl->arl_dlpi_deferred = mp->b_next;
400 mp->b_next = NULL;
402 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
404 arl->arl_dlpi_pending = DL_PRIM(mp);
405 mutex_exit(&arl->arl_lock);
407 mutex_enter(&ill->ill_lock);
408 ill->ill_arl_dlpi_pending = 1;
409 mutex_exit(&ill->ill_lock);
411 putnext(arl->arl_wq, mp);
415 * This routine is called during module initialization when the DL_INFO_ACK
416 * comes back from the device. We set up defaults for all the device dependent
417 * doo-dads we are going to need. This will leave us ready to roll if we are
418 * attempting auto-configuration. Alternatively, these defaults can be
419 * overridden by initialization procedures possessing higher intelligence.
421 * Caller will free the mp.
423 static void
424 arp_ll_set_defaults(arl_t *arl, mblk_t *mp)
426 arp_m_t *arm;
427 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr;
429 if ((arm = arp_m_lookup(dlia->dl_mac_type)) == NULL)
430 arm = arp_m_lookup(DL_OTHER);
431 ASSERT(arm != NULL);
434 * We initialize based on parameters in the (currently) not too
435 * exhaustive arp_m_tbl.
437 if (dlia->dl_version == DL_VERSION_2) {
438 arl->arl_sap_length = dlia->dl_sap_length;
439 arl->arl_phys_addr_length = dlia->dl_brdcst_addr_length;
440 if (dlia->dl_provider_style == DL_STYLE2)
441 arl->arl_needs_attach = 1;
442 } else {
443 arl->arl_sap_length = arm->arp_mac_sap_length;
444 arl->arl_phys_addr_length = arm->arp_mac_hw_addr_length;
447 * Note: the arp_hw_type in the arp header may be derived from
448 * the ill_mac_type and arp_m_lookup().
450 arl->arl_sap = ETHERTYPE_ARP;
451 arl_defaults_common(arl, mp);
454 static void
455 arp_wput(queue_t *q, mblk_t *mp)
457 int err = EINVAL;
458 struct iocblk *ioc;
459 mblk_t *mp1;
461 switch (DB_TYPE(mp)) {
462 case M_IOCTL:
463 ASSERT(q->q_next != NULL);
464 ioc = (struct iocblk *)mp->b_rptr;
465 if (ioc->ioc_cmd != SIOCSLIFNAME &&
466 ioc->ioc_cmd != IF_UNITSEL) {
467 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput",
468 char *, "<some ioctl>", char *, "-",
469 arl_t *, (arl_t *)q->q_ptr);
470 putnext(q, mp);
471 return;
473 if ((mp1 = mp->b_cont) == 0)
474 err = EINVAL;
475 else if (ioc->ioc_cmd == SIOCSLIFNAME)
476 err = ip_sioctl_slifname_arp(q, mp1->b_rptr);
477 else if (ioc->ioc_cmd == IF_UNITSEL)
478 err = ip_sioctl_ifunitsel_arp(q, (int *)mp1->b_rptr);
479 if (err == 0)
480 miocack(q, mp, 0, 0);
481 else
482 miocnak(q, mp, 0, err);
483 return;
484 default:
485 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput default",
486 char *, "default mblk", char *, "-",
487 arl_t *, (arl_t *)q->q_ptr);
488 putnext(q, mp);
489 return;
494 * similar to ill_dlpi_pending(): verify that the received DLPI response
495 * matches the one that is pending for the arl.
497 static boolean_t
498 arl_dlpi_pending(arl_t *arl, t_uscalar_t prim)
500 t_uscalar_t pending;
502 mutex_enter(&arl->arl_lock);
503 if (arl->arl_dlpi_pending == prim) {
504 mutex_exit(&arl->arl_lock);
505 return (B_TRUE);
508 if (arl->arl_state_flags & ARL_CONDEMNED) {
509 mutex_exit(&arl->arl_lock);
510 return (B_FALSE);
512 pending = arl->arl_dlpi_pending;
513 mutex_exit(&arl->arl_lock);
515 if (pending == DL_PRIM_INVAL) {
516 ip0dbg(("arl_dlpi_pending unsolicited ack for %s on %s",
517 dl_primstr(prim), arl->arl_name));
518 } else {
519 ip0dbg(("arl_dlpi_pending ack for %s on %s expect %s",
520 dl_primstr(prim), arl->arl_name, dl_primstr(pending)));
522 return (B_FALSE);
525 /* DLPI messages, other than DL_UNITDATA_IND are handled here. */
526 static void
527 arp_rput_dlpi(queue_t *q, mblk_t *mp)
529 arl_t *arl = (arl_t *)q->q_ptr;
530 union DL_primitives *dlp;
531 t_uscalar_t prim;
532 t_uscalar_t reqprim = DL_PRIM_INVAL;
533 ill_t *ill;
535 if ((mp->b_wptr - mp->b_rptr) < sizeof (dlp->dl_primitive)) {
536 putnext(q, mp);
537 return;
539 dlp = (union DL_primitives *)mp->b_rptr;
540 prim = dlp->dl_primitive;
543 * If we received an ACK but didn't send a request for it, then it
544 * can't be part of any pending operation; discard up-front.
546 switch (prim) {
547 case DL_ERROR_ACK:
549 * ce is confused about how DLPI works, so we have to interpret
550 * an "error" on DL_NOTIFY_ACK (which we never could have sent)
551 * as really meaning an error on DL_NOTIFY_REQ.
553 * Note that supporting DL_NOTIFY_REQ is optional, so printing
554 * out an error message on the console isn't warranted except
555 * for debug.
557 if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK ||
558 dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) {
559 reqprim = DL_NOTIFY_REQ;
560 } else {
561 reqprim = dlp->error_ack.dl_error_primitive;
563 break;
564 case DL_INFO_ACK:
565 reqprim = DL_INFO_REQ;
566 break;
567 case DL_OK_ACK:
568 reqprim = dlp->ok_ack.dl_correct_primitive;
569 break;
570 case DL_BIND_ACK:
571 reqprim = DL_BIND_REQ;
572 break;
573 default:
574 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
575 union DL_primitives *, dlp);
576 putnext(q, mp);
577 return;
579 if (reqprim == DL_PRIM_INVAL || !arl_dlpi_pending(arl, reqprim)) {
580 freemsg(mp);
581 return;
583 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi received",
584 char *, dl_primstr(prim), char *, dl_primstr(reqprim),
585 arl_t *, arl);
587 ASSERT(prim != DL_NOTIFY_IND);
589 ill = arl_to_ill(arl);
591 switch (reqprim) {
592 case DL_INFO_REQ:
594 * ill has not been set up yet for this case. This is the
595 * DL_INFO_ACK for the first DL_INFO_REQ sent from
596 * arp_modopen(). There should be no other arl_dlpi_deferred
597 * messages pending. We initialize the arl here.
599 ASSERT(!arl->arl_dlpi_style_set);
600 ASSERT(arl->arl_dlpi_pending == DL_INFO_REQ);
601 ASSERT(arl->arl_dlpi_deferred == NULL);
602 arl->arl_dlpi_pending = DL_PRIM_INVAL;
603 arp_ll_set_defaults(arl, mp);
604 freemsg(mp);
605 return;
606 case DL_UNBIND_REQ:
607 mutex_enter(&arl->arl_lock);
608 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS;
610 * This is not an error, so we don't set ARL_LL_DOWN
612 arl->arl_state_flags &= ~ARL_LL_UP;
613 arl->arl_state_flags |= ARL_LL_UNBOUND;
614 if (arl->arl_state_flags & ARL_CONDEMNED) {
616 * if this is part of the unplumb the arl may
617 * vaporize any moment after we cv_signal the
618 * arl_cv so we reset arl_dlpi_pending here.
619 * All other cases (including replumb) will
620 * have the arl_dlpi_pending reset in
621 * arp_dlpi_done.
623 arl->arl_dlpi_pending = DL_PRIM_INVAL;
625 cv_signal(&arl->arl_cv);
626 mutex_exit(&arl->arl_lock);
627 break;
629 if (ill != NULL) {
631 * ill ref obtained by arl_to_ill() will be released
632 * by qwriter_ip()
634 qwriter_ip(ill, ill->ill_wq, mp, arp_rput_dlpi_writer,
635 CUR_OP, B_TRUE);
636 return;
638 freemsg(mp);
642 * Handling of DLPI messages that require exclusive access to the ipsq.
644 /* ARGSUSED */
645 static void
646 arp_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
648 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
649 ill_t *ill = (ill_t *)q->q_ptr;
650 arl_t *arl = ill_to_arl(ill);
652 if (arl == NULL) {
654 * happens as a result arp_modclose triggering unbind.
655 * arp_rput_dlpi will cv_signal the arl_cv and the modclose
656 * will complete, but when it does ipsq_exit, the waiting
657 * qwriter_ip gets into the ipsq but will find the arl null.
658 * There should be no deferred messages in this case, so
659 * just complete and exit.
661 arp_cmd_done(ill, 0, DL_UNBIND_REQ);
662 freemsg(mp);
663 return;
665 switch (dlp->dl_primitive) {
666 case DL_ERROR_ACK:
667 switch (dlp->error_ack.dl_error_primitive) {
668 case DL_UNBIND_REQ:
669 mutex_enter(&arl->arl_lock);
670 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS;
671 arl->arl_state_flags &= ~ARL_LL_UP;
672 arl->arl_state_flags |= ARL_LL_UNBOUND;
673 arl->arl_state_flags |= ARL_LL_DOWN;
674 cv_signal(&arl->arl_cv);
675 mutex_exit(&arl->arl_lock);
676 break;
677 case DL_BIND_REQ:
678 mutex_enter(&arl->arl_lock);
679 arl->arl_state_flags &= ~ARL_LL_UP;
680 arl->arl_state_flags |= ARL_LL_DOWN;
681 arl->arl_state_flags |= ARL_LL_UNBOUND;
682 cv_signal(&arl->arl_cv);
683 mutex_exit(&arl->arl_lock);
684 break;
685 case DL_ATTACH_REQ:
686 break;
687 default:
688 /* If it's anything else, we didn't send it. */
689 arl_refrele(arl);
690 putnext(q, mp);
691 return;
693 break;
694 case DL_OK_ACK:
695 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi_writer ok",
696 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive),
697 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive),
698 arl_t *, arl);
699 mutex_enter(&arl->arl_lock);
700 switch (dlp->ok_ack.dl_correct_primitive) {
701 case DL_UNBIND_REQ:
702 case DL_ATTACH_REQ:
703 break;
704 default:
705 ip0dbg(("Dropping unrecognized DL_OK_ACK for %s",
706 dl_primstr(dlp->ok_ack.dl_correct_primitive)));
707 mutex_exit(&arl->arl_lock);
708 arl_refrele(arl);
709 freemsg(mp);
710 return;
712 mutex_exit(&arl->arl_lock);
713 break;
714 case DL_BIND_ACK:
715 DTRACE_PROBE2(rput_dl_bind, arl_t *, arl,
716 dl_bind_ack_t *, &dlp->bind_ack);
718 mutex_enter(&arl->arl_lock);
719 ASSERT(arl->arl_state_flags & ARL_LL_BIND_PENDING);
720 arl->arl_state_flags &=
721 ~(ARL_LL_BIND_PENDING|ARL_LL_DOWN|ARL_LL_UNBOUND);
722 arl->arl_state_flags |= ARL_LL_UP;
723 mutex_exit(&arl->arl_lock);
724 break;
725 case DL_UDERROR_IND:
726 DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl,
727 dl_uderror_ind_t *, &dlp->uderror_ind);
728 arl_refrele(arl);
729 putnext(q, mp);
730 return;
731 default:
732 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
733 union DL_primitives *, dlp);
734 arl_refrele(arl);
735 putnext(q, mp);
736 return;
738 arp_dlpi_done(arl, ill);
739 arl_refrele(arl);
740 freemsg(mp);
743 void
744 arp_rput(queue_t *q, mblk_t *mp)
746 arl_t *arl = q->q_ptr;
747 boolean_t need_refrele = B_FALSE;
749 mutex_enter(&arl->arl_lock);
750 if (((arl->arl_state_flags &
751 (ARL_CONDEMNED | ARL_LL_REPLUMBING)) != 0)) {
753 * Only allow high priority DLPI messages during unplumb or
754 * replumb, and we don't take an arl_refcnt for that case.
756 if (DB_TYPE(mp) != M_PCPROTO) {
757 mutex_exit(&arl->arl_lock);
758 freemsg(mp);
759 return;
761 } else {
762 arl_refhold_locked(arl);
763 need_refrele = B_TRUE;
765 mutex_exit(&arl->arl_lock);
767 switch (DB_TYPE(mp)) {
768 case M_PCPROTO:
769 case M_PROTO: {
770 ill_t *ill;
773 * could be one of
774 * (i) real message from the wire, (DLPI_DATA)
775 * (ii) DLPI message
776 * Take a ref on the ill associated with this arl to
777 * prevent the ill from being unplumbed until this thread
778 * is done.
780 if (IS_DLPI_DATA(mp)) {
781 ill = arl_to_ill(arl);
782 if (ill == NULL) {
783 arp_drop_packet("No ill", mp, ill);
784 break;
786 arp_process_packet(ill, mp);
787 ill_refrele(ill);
788 break;
790 /* Miscellaneous DLPI messages get shuffled off. */
791 arp_rput_dlpi(q, mp);
792 break;
794 case M_ERROR:
795 case M_HANGUP:
796 if (mp->b_rptr < mp->b_wptr)
797 arl->arl_error = (int)(*mp->b_rptr & 0xFF);
798 if (arl->arl_error == 0)
799 arl->arl_error = ENXIO;
800 freemsg(mp);
801 break;
802 default:
803 ip1dbg(("arp_rput other db type %x\n", DB_TYPE(mp)));
804 putnext(q, mp);
805 break;
807 if (need_refrele)
808 arl_refrele(arl);
811 static void
812 arp_process_packet(ill_t *ill, mblk_t *mp)
814 mblk_t *mp1;
815 arh_t *arh;
816 in_addr_t src_paddr, dst_paddr;
817 uint32_t hlen, plen;
818 boolean_t is_probe;
819 int op;
820 ncec_t *dst_ncec, *src_ncec = NULL;
821 uchar_t *src_haddr, *arhp, *dst_haddr, *dp, *sp;
822 int err;
823 ip_stack_t *ipst;
824 boolean_t need_ill_refrele = B_FALSE;
825 nce_t *nce;
826 uchar_t *src_lladdr;
827 dl_unitdata_ind_t *dlui;
828 ip_recv_attr_t iras;
830 ASSERT(ill != NULL);
831 if (ill->ill_flags & ILLF_NOARP) {
832 arp_drop_packet("Interface does not support ARP", mp, ill);
833 return;
835 ipst = ill->ill_ipst;
837 * What we should have at this point is a DL_UNITDATA_IND message
838 * followed by an ARP packet. We do some initial checks and then
839 * get to work.
841 dlui = (dl_unitdata_ind_t *)mp->b_rptr;
842 if (dlui->dl_group_address == 1) {
844 * multicast or broadcast packet. Only accept on the ipmp
845 * nominated interface for multicasts ('cast_ill').
846 * If we have no cast_ill we are liberal and accept everything.
848 if (IS_UNDER_IPMP(ill)) {
849 /* For an under ill_grp can change under lock */
850 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
851 if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
852 ill->ill_grp->ig_cast_ill != NULL) {
853 rw_exit(&ipst->ips_ill_g_lock);
854 arp_drop_packet("Interface is not nominated "
855 "for multicast sends and receives",
856 mp, ill);
857 return;
859 rw_exit(&ipst->ips_ill_g_lock);
862 mp1 = mp->b_cont;
863 if (mp1 == NULL) {
864 arp_drop_packet("Missing ARP packet", mp, ill);
865 return;
867 if (mp1->b_cont != NULL) {
868 /* No fooling around with funny messages. */
869 if (!pullupmsg(mp1, -1)) {
870 arp_drop_packet("Funny message: pullup failed",
871 mp, ill);
872 return;
875 arh = (arh_t *)mp1->b_rptr;
876 hlen = arh->arh_hlen;
877 plen = arh->arh_plen;
878 if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) {
879 arp_drop_packet("mblk len too small", mp, ill);
880 return;
883 * hlen 0 is used for RFC 1868 UnARP.
885 * Note that the rest of the code checks that hlen is what we expect
886 * for this hardware address type, so might as well discard packets
887 * here that don't match.
889 if ((hlen > 0 && hlen != ill->ill_phys_addr_length) || plen == 0) {
890 DTRACE_PROBE2(rput_bogus, ill_t *, ill, mblk_t *, mp1);
891 arp_drop_packet("Bogus hlen or plen", mp, ill);
892 return;
895 * Historically, Solaris has been lenient about hardware type numbers.
896 * We should check here, but don't.
898 DTRACE_PROBE3(arp__physical__in__start, ill_t *, ill, arh_t *, arh,
899 mblk_t *, mp);
901 * If ill is in an ipmp group, it will be the under ill. If we want
902 * to report the packet as coming up the IPMP interface, we should
903 * convert it to the ipmp ill.
905 ARP_HOOK_IN(ipst->ips_arp_physical_in_event, ipst->ips_arp_physical_in,
906 ill->ill_phyint->phyint_ifindex, arh, mp, mp1, ipst);
907 DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp);
908 if (mp == NULL)
909 return;
910 arhp = (uchar_t *)arh + ARH_FIXED_LEN;
911 src_haddr = arhp; /* ar$sha */
912 arhp += hlen;
913 bcopy(arhp, &src_paddr, IP_ADDR_LEN); /* ar$spa */
914 sp = arhp;
915 arhp += IP_ADDR_LEN;
916 dst_haddr = arhp; /* ar$dha */
917 arhp += hlen;
918 bcopy(arhp, &dst_paddr, IP_ADDR_LEN); /* ar$tpa */
919 dp = arhp;
920 op = BE16_TO_U16(arh->arh_operation);
922 DTRACE_PROBE2(ip__arp__input, (in_addr_t), src_paddr,
923 (in_addr_t), dst_paddr);
925 /* Determine if this is just a probe */
926 is_probe = (src_paddr == INADDR_ANY);
929 * The following test for loopback is faster than
930 * IP_LOOPBACK_ADDR(), because it avoids any bitwise
931 * operations.
932 * Note that these addresses are always in network byte order
934 if ((*(uint8_t *)&src_paddr) == IN_LOOPBACKNET ||
935 (*(uint8_t *)&dst_paddr) == IN_LOOPBACKNET ||
936 CLASSD(src_paddr) || CLASSD(dst_paddr)) {
937 arp_drop_packet("Martian IP addr", mp, ill);
938 return;
942 * ira_ill is the only field used down the arp_notify path.
944 bzero(&iras, sizeof (iras));
945 iras.ira_ill = iras.ira_rill = ill;
947 * RFC 826: first check if the <protocol, sender protocol address> is
948 * in the cache, if there is a sender protocol address. Note that this
949 * step also handles resolutions based on source.
951 /* Note: after here we need to freeb(mp) and freemsg(mp1) separately */
952 mp->b_cont = NULL;
953 if (is_probe) {
954 err = AR_NOTFOUND;
955 } else {
956 if (plen != 4) {
957 arp_drop_packet("bad protocol len", mp, ill);
958 return;
960 err = ip_nce_resolve_all(ill, src_haddr, hlen, &src_paddr,
961 &src_ncec, op);
962 switch (err) {
963 case AR_BOGON:
964 ASSERT(src_ncec != NULL);
965 arp_notify(src_paddr, mp1, AR_CN_BOGON,
966 &iras, src_ncec);
967 break;
968 case AR_FAILED:
969 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras,
970 src_ncec);
971 break;
972 case AR_LOOPBACK:
973 DTRACE_PROBE2(rput_loopback, ill_t *, ill, arh_t *,
974 arh);
975 freemsg(mp1);
976 break;
977 default:
978 goto update;
980 freemsg(mp);
981 if (src_ncec != NULL)
982 ncec_refrele(src_ncec);
983 return;
985 update:
987 * Now look up the destination address. By RFC 826, we ignore the
988 * packet at this step if the target isn't one of our addresses (i.e.,
989 * one we have been asked to PUBLISH). This is true even if the
990 * target is something we're trying to resolve and the packet
991 * is a response.
993 dst_ncec = ncec_lookup_illgrp_v4(ill, &dst_paddr);
994 if (dst_ncec == NULL || !NCE_PUBLISH(dst_ncec)) {
996 * Let the client know if the source mapping has changed, even
997 * if the destination provides no useful information for the
998 * client.
1000 if (err == AR_CHANGED) {
1001 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras,
1002 NULL);
1003 freemsg(mp);
1004 } else {
1005 freemsg(mp);
1006 arp_drop_packet("Target is not interesting", mp1, ill);
1008 if (dst_ncec != NULL)
1009 ncec_refrele(dst_ncec);
1010 if (src_ncec != NULL)
1011 ncec_refrele(src_ncec);
1012 return;
1015 if (dst_ncec->ncec_flags & NCE_F_UNVERIFIED) {
1017 * Check for a reflection. Some misbehaving bridges will
1018 * reflect our own transmitted packets back to us.
1020 ASSERT(NCE_PUBLISH(dst_ncec));
1021 if (hlen != dst_ncec->ncec_ill->ill_phys_addr_length) {
1022 ncec_refrele(dst_ncec);
1023 if (src_ncec != NULL)
1024 ncec_refrele(src_ncec);
1025 freemsg(mp);
1026 arp_drop_packet("bad arh_len", mp1, ill);
1027 return;
1029 if (!nce_cmp_ll_addr(dst_ncec, src_haddr, hlen)) {
1030 DTRACE_PROBE3(rput_probe_reflected, ill_t *, ill,
1031 arh_t *, arh, ncec_t *, dst_ncec);
1032 ncec_refrele(dst_ncec);
1033 if (src_ncec != NULL)
1034 ncec_refrele(src_ncec);
1035 freemsg(mp);
1036 arp_drop_packet("Reflected probe", mp1, ill);
1037 return;
1040 * Responses targeting our HW address that are not responses to
1041 * our DAD probe must be ignored as they are related to requests
1042 * sent before DAD was restarted.
1044 if (op == ARP_RESPONSE &&
1045 (nce_cmp_ll_addr(dst_ncec, dst_haddr, hlen) == 0)) {
1046 ncec_refrele(dst_ncec);
1047 if (src_ncec != NULL)
1048 ncec_refrele(src_ncec);
1049 freemsg(mp);
1050 arp_drop_packet(
1051 "Response to request that was sent before DAD",
1052 mp1, ill);
1053 return;
1056 * Responses targeted to HW addresses which are not ours but
1057 * sent to our unverified proto address are also conflicts.
1058 * These may be reported by a proxy rather than the interface
1059 * with the conflicting address, dst_paddr is in conflict
1060 * rather than src_paddr. To ensure IP can locate the correct
1061 * ipif to take down, it is necessary to copy dst_paddr to
1062 * the src_paddr field before sending it to IP. The same is
1063 * required for probes, where src_paddr will be INADDR_ANY.
1065 if (is_probe || op == ARP_RESPONSE) {
1066 bcopy(dp, sp, plen);
1067 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras,
1068 NULL);
1069 ncec_delete(dst_ncec);
1070 } else if (err == AR_CHANGED) {
1071 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras,
1072 NULL);
1073 } else {
1074 DTRACE_PROBE3(rput_request_unverified,
1075 ill_t *, ill, arh_t *, arh, ncec_t *, dst_ncec);
1076 arp_drop_packet("Unverified request", mp1, ill);
1078 freemsg(mp);
1079 ncec_refrele(dst_ncec);
1080 if (src_ncec != NULL)
1081 ncec_refrele(src_ncec);
1082 return;
1085 * If it's a request, then we reply to this, and if we think the
1086 * sender's unknown, then we create an entry to avoid unnecessary ARPs.
1087 * The design assumption is that someone ARPing us is likely to send us
1088 * a packet soon, and that we'll want to reply to it.
1090 if (op == ARP_REQUEST) {
1091 const uchar_t *nce_hwaddr;
1092 struct in_addr nce_paddr;
1093 clock_t now;
1094 ill_t *under_ill = ill;
1095 boolean_t send_unicast = B_TRUE;
1097 ASSERT(NCE_PUBLISH(dst_ncec));
1099 if ((dst_ncec->ncec_flags & (NCE_F_BCAST|NCE_F_MCAST)) != 0) {
1101 * Ignore senders who are deliberately or accidentally
1102 * confused.
1104 goto bail;
1107 if (!is_probe && err == AR_NOTFOUND) {
1108 ASSERT(src_ncec == NULL);
1110 if (IS_UNDER_IPMP(under_ill)) {
1112 * create the ncec for the sender on ipmp_ill.
1113 * We pass in the ipmp_ill itself to avoid
1114 * creating an nce_t on the under_ill.
1116 ill = ipmp_ill_hold_ipmp_ill(under_ill);
1117 if (ill == NULL)
1118 ill = under_ill;
1119 else
1120 need_ill_refrele = B_TRUE;
1123 err = nce_lookup_then_add_v4(ill, src_haddr, hlen,
1124 &src_paddr, 0, ND_STALE, &nce);
1126 switch (err) {
1127 case 0:
1128 case EEXIST:
1129 ip1dbg(("added ncec %p in state %d ill %s\n",
1130 (void *)src_ncec, src_ncec->ncec_state,
1131 ill->ill_name));
1132 src_ncec = nce->nce_common;
1133 break;
1134 default:
1136 * Either no memory, or the outgoing interface
1137 * is in the process of down/unplumb. In the
1138 * latter case, we will fail the send anyway,
1139 * and in the former case, we should try to send
1140 * the ARP response.
1142 src_lladdr = src_haddr;
1143 goto send_response;
1145 ncec_refhold(src_ncec);
1146 nce_refrele(nce);
1147 /* set up cleanup interval on ncec */
1151 * This implements periodic address defense based on a modified
1152 * version of the RFC 3927 requirements. Instead of sending a
1153 * broadcasted reply every time, as demanded by the RFC, we
1154 * send at most one broadcast reply per arp_broadcast_interval.
1156 now = ddi_get_lbolt();
1157 if ((now - dst_ncec->ncec_last_time_defended) >
1158 MSEC_TO_TICK(ipst->ips_ipv4_dad_announce_interval)) {
1159 dst_ncec->ncec_last_time_defended = now;
1161 * If this is one of the long-suffering entries,
1162 * pull it out now. It no longer needs separate
1163 * defense, because we're now doing that with this
1164 * broadcasted reply.
1166 dst_ncec->ncec_flags &= ~NCE_F_DELAYED;
1167 send_unicast = B_FALSE;
1169 if (src_ncec != NULL && send_unicast) {
1170 src_lladdr = src_ncec->ncec_lladdr;
1171 } else {
1172 src_lladdr = under_ill->ill_bcast_mp->b_rptr +
1173 NCE_LL_ADDR_OFFSET(under_ill);
1175 send_response:
1176 nce_hwaddr = dst_ncec->ncec_lladdr;
1177 IN6_V4MAPPED_TO_INADDR(&dst_ncec->ncec_addr, &nce_paddr);
1179 (void) arp_output(under_ill, ARP_RESPONSE,
1180 nce_hwaddr, (uchar_t *)&nce_paddr, src_haddr,
1181 (uchar_t *)&src_paddr, src_lladdr);
1183 bail:
1184 if (dst_ncec != NULL) {
1185 ncec_refrele(dst_ncec);
1187 if (src_ncec != NULL) {
1188 ncec_refrele(src_ncec);
1190 if (err == AR_CHANGED) {
1191 mp->b_cont = NULL;
1192 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, NULL);
1193 mp1 = NULL;
1195 if (need_ill_refrele)
1196 ill_refrele(ill);
1197 done:
1198 freemsg(mp);
1199 freemsg(mp1);
1203 * Basic initialization of the arl_t and the arl_common structure shared with
1204 * the ill_t that is done after SLIFNAME/IF_UNITSEL.
1206 static int
1207 arl_ill_init(arl_t *arl, char *ill_name)
1209 ill_t *ill;
1210 arl_ill_common_t *ai;
1212 ill = ill_lookup_on_name(ill_name, B_FALSE, B_FALSE, B_FALSE,
1213 arl->arl_ipst);
1215 if (ill == NULL)
1216 return (ENXIO);
1219 * By the time we set up the arl, we expect the ETHERTYPE_IP
1220 * stream to be fully bound and attached. So we copy/verify
1221 * relevant information as possible from/against the ill.
1223 * The following should have been set up in arp_ll_set_defaults()
1224 * after the first DL_INFO_ACK was received.
1226 ASSERT(arl->arl_phys_addr_length == ill->ill_phys_addr_length);
1227 ASSERT(arl->arl_sap == ETHERTYPE_ARP);
1228 ASSERT(arl->arl_mactype == ill->ill_mactype);
1229 ASSERT(arl->arl_sap_length == ill->ill_sap_length);
1231 ai = kmem_zalloc(sizeof (*ai), KM_SLEEP);
1232 mutex_enter(&ill->ill_lock);
1233 /* First ensure that the ill is not CONDEMNED. */
1234 if (ill->ill_state_flags & ILL_CONDEMNED) {
1235 mutex_exit(&ill->ill_lock);
1236 ill_refrele(ill);
1237 kmem_free(ai, sizeof (*ai));
1238 return (ENXIO);
1240 if (ill->ill_common != NULL || arl->arl_common != NULL) {
1241 mutex_exit(&ill->ill_lock);
1242 ip0dbg(("%s: PPA already exists", ill->ill_name));
1243 ill_refrele(ill);
1244 kmem_free(ai, sizeof (*ai));
1245 return (EEXIST);
1247 mutex_init(&ai->ai_lock, NULL, MUTEX_DEFAULT, NULL);
1248 ai->ai_arl = arl;
1249 ai->ai_ill = ill;
1250 ill->ill_common = ai;
1251 arl->arl_common = ai;
1252 mutex_exit(&ill->ill_lock);
1253 (void) strlcpy(arl->arl_name, ill->ill_name, LIFNAMSIZ);
1254 arl->arl_name_length = ill->ill_name_length;
1255 ill_refrele(ill);
1256 arp_ifname_notify(arl);
1257 return (0);
1260 /* Allocate and do common initializations for DLPI messages. */
1261 static mblk_t *
1262 ip_ar_dlpi_comm(t_uscalar_t prim, size_t size)
1264 mblk_t *mp;
1266 if ((mp = allocb(size, BPRI_HI)) == NULL)
1267 return (NULL);
1270 * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter
1271 * of which we don't seem to use) are sent with M_PCPROTO, and
1272 * that other DLPI are M_PROTO.
1274 DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO;
1276 mp->b_wptr = mp->b_rptr + size;
1277 bzero(mp->b_rptr, size);
1278 DL_PRIM(mp) = prim;
1279 return (mp);
1284 ip_sioctl_ifunitsel_arp(queue_t *q, int *ppa)
1286 arl_t *arl;
1287 char *cp, ill_name[LIFNAMSIZ];
1289 if (q->q_next == NULL)
1290 return (EINVAL);
1292 do {
1293 q = q->q_next;
1294 } while (q->q_next != NULL);
1295 cp = q->q_qinfo->qi_minfo->mi_idname;
1297 arl = (arl_t *)q->q_ptr;
1298 (void) snprintf(ill_name, sizeof (ill_name), "%s%d", cp, *ppa);
1299 arl->arl_ppa = *ppa;
1300 return (arl_ill_init(arl, ill_name));
1304 ip_sioctl_slifname_arp(queue_t *q, void *lifreq)
1306 arl_t *arl;
1307 struct lifreq *lifr = lifreq;
1309 /* ioctl not valid when IP opened as a device */
1310 if (q->q_next == NULL)
1311 return (EINVAL);
1313 arl = (arl_t *)q->q_ptr;
1314 arl->arl_ppa = lifr->lifr_ppa;
1315 return (arl_ill_init(arl, lifr->lifr_name));
1318 arl_t *
1319 ill_to_arl(ill_t *ill)
1321 arl_ill_common_t *ai = ill->ill_common;
1322 arl_t *arl = NULL;
1324 if (ai == NULL)
1325 return (NULL);
1327 * Find the arl_t that corresponds to this ill_t from the shared
1328 * ill_common structure. We can safely access the ai here as it
1329 * will only be freed in arp_modclose() after we have become
1330 * single-threaded.
1332 mutex_enter(&ai->ai_lock);
1333 if ((arl = ai->ai_arl) != NULL) {
1334 mutex_enter(&arl->arl_lock);
1335 if (!(arl->arl_state_flags & ARL_CONDEMNED)) {
1336 arl_refhold_locked(arl);
1337 mutex_exit(&arl->arl_lock);
1338 } else {
1339 mutex_exit(&arl->arl_lock);
1340 arl = NULL;
1343 mutex_exit(&ai->ai_lock);
1344 return (arl);
1347 ill_t *
1348 arl_to_ill(arl_t *arl)
1350 arl_ill_common_t *ai = arl->arl_common;
1351 ill_t *ill = NULL;
1353 if (ai == NULL) {
1355 * happens when the arp stream is just being opened, and
1356 * arl_ill_init has not been executed yet.
1358 return (NULL);
1361 * Find the ill_t that corresponds to this arl_t from the shared
1362 * arl_common structure. We can safely access the ai here as it
1363 * will only be freed in arp_modclose() after we have become
1364 * single-threaded.
1366 mutex_enter(&ai->ai_lock);
1367 if ((ill = ai->ai_ill) != NULL) {
1368 mutex_enter(&ill->ill_lock);
1369 if (!ILL_IS_CONDEMNED(ill)) {
1370 ill_refhold_locked(ill);
1371 mutex_exit(&ill->ill_lock);
1372 } else {
1373 mutex_exit(&ill->ill_lock);
1374 ill = NULL;
1377 mutex_exit(&ai->ai_lock);
1378 return (ill);
1382 arp_ll_up(ill_t *ill)
1384 mblk_t *attach_mp = NULL;
1385 mblk_t *bind_mp = NULL;
1386 mblk_t *unbind_mp = NULL;
1387 arl_t *arl;
1389 ASSERT(IAM_WRITER_ILL(ill));
1390 arl = ill_to_arl(ill);
1392 DTRACE_PROBE2(ill__downup, char *, "arp_ll_up", ill_t *, ill);
1393 if (arl == NULL)
1394 return (ENXIO);
1395 DTRACE_PROBE2(arl__downup, char *, "arp_ll_up", arl_t *, arl);
1396 if ((arl->arl_state_flags & ARL_LL_UP) != 0) {
1397 arl_refrele(arl);
1398 return (0);
1400 if (arl->arl_needs_attach) { /* DL_STYLE2 */
1401 attach_mp =
1402 ip_ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t));
1403 if (attach_mp == NULL)
1404 goto bad;
1405 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = arl->arl_ppa;
1408 /* Allocate and initialize a bind message. */
1409 bind_mp = ip_ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t));
1410 if (bind_mp == NULL)
1411 goto bad;
1412 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP;
1413 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS;
1415 unbind_mp = ip_ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t));
1416 if (unbind_mp == NULL)
1417 goto bad;
1418 if (arl->arl_needs_attach) {
1419 arp_dlpi_send(arl, attach_mp);
1421 arl->arl_unbind_mp = unbind_mp;
1423 arl->arl_state_flags |= ARL_LL_BIND_PENDING;
1424 arp_dlpi_send(arl, bind_mp);
1425 arl_refrele(arl);
1426 return (EINPROGRESS);
1428 bad:
1429 freemsg(attach_mp);
1430 freemsg(bind_mp);
1431 freemsg(unbind_mp);
1432 arl_refrele(arl);
1433 return (ENOMEM);
1437 * consumes/frees mp
1439 static void
1440 arp_notify(in_addr_t src, mblk_t *mp, uint32_t arcn_code,
1441 ip_recv_attr_t *ira, ncec_t *ncec)
1443 char hbuf[MAC_STR_LEN];
1444 char sbuf[INET_ADDRSTRLEN];
1445 ill_t *ill = ira->ira_ill;
1446 ip_stack_t *ipst = ill->ill_ipst;
1447 arh_t *arh = (arh_t *)mp->b_rptr;
1449 switch (arcn_code) {
1450 case AR_CN_BOGON:
1452 * Someone is sending ARP packets with a source protocol
1453 * address that we have published and for which we believe our
1454 * entry is authoritative and verified to be unique on
1455 * the network.
1457 * arp_process_packet() sends AR_CN_FAILED for the case when
1458 * a DAD probe is received and the hardware address of a
1459 * non-authoritative entry has changed. Thus, AR_CN_BOGON
1460 * indicates a real conflict, and we have to do resolution.
1462 * We back away quickly from the address if it's from DHCP or
1463 * otherwise temporary and hasn't been used recently (or at
1464 * all). We'd like to include "deprecated" addresses here as
1465 * well (as there's no real reason to defend something we're
1466 * discarding), but IPMP "reuses" this flag to mean something
1467 * other than the standard meaning.
1469 if (ip_nce_conflict(mp, ira, ncec)) {
1470 (void) mac_colon_addr((uint8_t *)(arh + 1),
1471 arh->arh_hlen, hbuf, sizeof (hbuf));
1472 (void) ip_dot_addr(src, sbuf);
1473 cmn_err(CE_WARN,
1474 "proxy ARP problem? Node '%s' is using %s on %s",
1475 hbuf, sbuf, ill->ill_name);
1476 if (!arp_no_defense)
1477 (void) arp_announce(ncec);
1479 * ncec_last_time_defended has been adjusted in
1480 * ip_nce_conflict.
1482 } else {
1483 ncec_delete(ncec);
1485 freemsg(mp);
1486 break;
1487 case AR_CN_ANNOUNCE: {
1488 nce_hw_map_t hwm;
1490 * ARP gives us a copy of any packet where it thinks
1491 * the address has changed, so that we can update our
1492 * caches. We're responsible for caching known answers
1493 * in the current design. We check whether the
1494 * hardware address really has changed in all of our
1495 * entries that have cached this mapping, and if so, we
1496 * blow them away. This way we will immediately pick
1497 * up the rare case of a host changing hardware
1498 * address.
1500 if (src == 0) {
1501 freemsg(mp);
1502 break;
1504 hwm.hwm_addr = src;
1505 hwm.hwm_hwlen = arh->arh_hlen;
1506 hwm.hwm_hwaddr = (uchar_t *)(arh + 1);
1507 hwm.hwm_flags = 0;
1508 ncec_walk_common(ipst->ips_ndp4, NULL,
1509 (pfi_t)nce_update_hw_changed, &hwm, B_TRUE);
1510 freemsg(mp);
1511 break;
1513 case AR_CN_FAILED:
1514 if (arp_no_defense) {
1515 (void) mac_colon_addr((uint8_t *)(arh + 1),
1516 arh->arh_hlen, hbuf, sizeof (hbuf));
1517 (void) ip_dot_addr(src, sbuf);
1519 cmn_err(CE_WARN,
1520 "node %s is using our IP address %s on %s",
1521 hbuf, sbuf, ill->ill_name);
1522 freemsg(mp);
1523 break;
1526 * mp will be freed by arp_excl.
1528 ill_refhold(ill);
1529 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE);
1530 return;
1531 default:
1532 ASSERT(0);
1533 freemsg(mp);
1534 break;
1539 * arp_output is called to transmit an ARP Request or Response. The mapping
1540 * to RFC 826 variables is:
1541 * haddr1 == ar$sha
1542 * paddr1 == ar$spa
1543 * haddr2 == ar$tha
1544 * paddr2 == ar$tpa
1545 * The ARP frame is sent to the ether_dst in dst_lladdr.
1547 static int
1548 arp_output(ill_t *ill, uint32_t operation,
1549 const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2,
1550 const uchar_t *paddr2, uchar_t *dst_lladdr)
1552 arh_t *arh;
1553 uint8_t *cp;
1554 uint_t hlen;
1555 uint32_t plen = IPV4_ADDR_LEN; /* ar$pln from RFC 826 */
1556 uint32_t proto = IP_ARP_PROTO_TYPE;
1557 mblk_t *mp;
1558 arl_t *arl;
1560 ASSERT(dst_lladdr != NULL);
1561 hlen = ill->ill_phys_addr_length; /* ar$hln from RFC 826 */
1562 mp = ill_dlur_gen(dst_lladdr, hlen, ETHERTYPE_ARP, ill->ill_sap_length);
1564 if (mp == NULL)
1565 return (ENOMEM);
1567 /* IFF_NOARP flag is set or link down: do not send arp messages */
1568 if ((ill->ill_flags & ILLF_NOARP) || !ill->ill_dl_up) {
1569 freemsg(mp);
1570 return (ENXIO);
1573 mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) +
1574 plen + plen, BPRI_MED);
1575 if (mp->b_cont == NULL) {
1576 freeb(mp);
1577 return (ENOMEM);
1580 /* Fill in the ARP header. */
1581 cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen);
1582 mp->b_cont->b_rptr = cp;
1583 arh = (arh_t *)cp;
1584 U16_TO_BE16(arp_hw_type(ill->ill_mactype), arh->arh_hardware);
1585 U16_TO_BE16(proto, arh->arh_proto);
1586 arh->arh_hlen = (uint8_t)hlen;
1587 arh->arh_plen = (uint8_t)plen;
1588 U16_TO_BE16(operation, arh->arh_operation);
1589 cp += ARH_FIXED_LEN;
1590 bcopy(haddr1, cp, hlen);
1591 cp += hlen;
1592 if (paddr1 == NULL)
1593 bzero(cp, plen);
1594 else
1595 bcopy(paddr1, cp, plen);
1596 cp += plen;
1597 if (haddr2 == NULL)
1598 bzero(cp, hlen);
1599 else
1600 bcopy(haddr2, cp, hlen);
1601 cp += hlen;
1602 bcopy(paddr2, cp, plen);
1603 cp += plen;
1604 mp->b_cont->b_wptr = cp;
1606 DTRACE_PROBE3(arp__physical__out__start,
1607 ill_t *, ill, arh_t *, arh, mblk_t *, mp);
1608 ARP_HOOK_OUT(ill->ill_ipst->ips_arp_physical_out_event,
1609 ill->ill_ipst->ips_arp_physical_out,
1610 ill->ill_phyint->phyint_ifindex, arh, mp, mp->b_cont,
1611 ill->ill_ipst);
1612 DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp);
1613 if (mp == NULL)
1614 return (0);
1616 /* Ship it out. */
1617 arl = ill_to_arl(ill);
1618 if (arl == NULL) {
1619 freemsg(mp);
1620 return (0);
1622 if (canputnext(arl->arl_wq))
1623 putnext(arl->arl_wq, mp);
1624 else
1625 freemsg(mp);
1626 arl_refrele(arl);
1627 return (0);
1631 * Process resolve requests.
1632 * If we are not yet reachable then we check and decrease ncec_rcnt; otherwise
1633 * we leave it alone (the caller will check and manage ncec_pcnt in those
1634 * cases.)
1637 arp_request(ncec_t *ncec, in_addr_t sender, ill_t *ill)
1639 int err;
1640 const uchar_t *target_hwaddr;
1641 struct in_addr nce_paddr;
1642 uchar_t *dst_lladdr;
1643 boolean_t use_rcnt = !NCE_ISREACHABLE(ncec);
1645 ASSERT(MUTEX_HELD(&ncec->ncec_lock));
1646 ASSERT(!IS_IPMP(ill));
1648 if (use_rcnt && ncec->ncec_rcnt == 0) {
1649 /* not allowed any more retransmits. */
1650 return (0);
1653 if ((ill->ill_flags & ILLF_NOARP) != 0)
1654 return (0);
1656 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &nce_paddr);
1658 target_hwaddr =
1659 ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1661 if (NCE_ISREACHABLE(ncec)) {
1662 dst_lladdr = ncec->ncec_lladdr;
1663 } else {
1664 dst_lladdr = ill->ill_bcast_mp->b_rptr +
1665 NCE_LL_ADDR_OFFSET(ill);
1668 mutex_exit(&ncec->ncec_lock);
1669 err = arp_output(ill, ARP_REQUEST,
1670 ill->ill_phys_addr, (uchar_t *)&sender, target_hwaddr,
1671 (uchar_t *)&nce_paddr, dst_lladdr);
1672 mutex_enter(&ncec->ncec_lock);
1674 if (err != 0) {
1676 * Some transient error such as ENOMEM or a down link was
1677 * encountered. If the link has been taken down permanently,
1678 * the ncec will eventually be cleaned up (ipif_down_tail()
1679 * will call ipif_nce_down() and flush the ncec), to terminate
1680 * recurring attempts to send ARP requests. In all other cases,
1681 * allow the caller another chance at success next time.
1683 return (ncec->ncec_ill->ill_reachable_retrans_time);
1686 if (use_rcnt)
1687 ncec->ncec_rcnt--;
1689 return (ncec->ncec_ill->ill_reachable_retrans_time);
1692 /* return B_TRUE if dropped */
1693 boolean_t
1694 arp_announce(ncec_t *ncec)
1696 ill_t *ill;
1697 int err;
1698 uchar_t *sphys_addr, *bcast_addr;
1699 struct in_addr ncec_addr;
1700 boolean_t need_refrele = B_FALSE;
1702 ASSERT((ncec->ncec_flags & NCE_F_BCAST) == 0);
1703 ASSERT((ncec->ncec_flags & NCE_F_MCAST) == 0);
1705 if (IS_IPMP(ncec->ncec_ill)) {
1706 /* sent on the cast_ill */
1707 ill = ipmp_ill_hold_xmit_ill(ncec->ncec_ill, B_FALSE);
1708 if (ill == NULL)
1709 return (B_TRUE);
1710 need_refrele = B_TRUE;
1711 } else {
1712 ill = ncec->ncec_ill;
1716 * broadcast an announce to ill_bcast address.
1718 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr);
1720 sphys_addr = ncec->ncec_lladdr;
1721 bcast_addr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1723 err = arp_output(ill, ARP_REQUEST,
1724 sphys_addr, (uchar_t *)&ncec_addr, bcast_addr,
1725 (uchar_t *)&ncec_addr, bcast_addr);
1727 if (need_refrele)
1728 ill_refrele(ill);
1729 return (err != 0);
1732 /* return B_TRUE if dropped */
1733 boolean_t
1734 arp_probe(ncec_t *ncec)
1736 ill_t *ill;
1737 int err;
1738 struct in_addr ncec_addr;
1739 uchar_t *sphys_addr, *dst_lladdr;
1741 if (IS_IPMP(ncec->ncec_ill)) {
1742 ill = ipmp_ill_hold_xmit_ill(ncec->ncec_ill, B_FALSE);
1743 if (ill == NULL)
1744 return (B_TRUE);
1745 } else {
1746 ill = ncec->ncec_ill;
1749 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr);
1751 sphys_addr = ncec->ncec_lladdr;
1752 dst_lladdr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1753 err = arp_output(ill, ARP_REQUEST,
1754 sphys_addr, NULL, NULL, (uchar_t *)&ncec_addr, dst_lladdr);
1756 if (IS_IPMP(ncec->ncec_ill))
1757 ill_refrele(ill);
1758 return (err != 0);
1761 static mblk_t *
1762 arl_unbind(arl_t *arl)
1764 mblk_t *mp;
1766 if ((mp = arl->arl_unbind_mp) != NULL) {
1767 arl->arl_unbind_mp = NULL;
1768 arl->arl_state_flags |= ARL_DL_UNBIND_IN_PROGRESS;
1770 return (mp);
1774 arp_ll_down(ill_t *ill)
1776 arl_t *arl;
1777 mblk_t *unbind_mp;
1778 int err = 0;
1779 boolean_t replumb = (ill->ill_replumbing == 1);
1781 DTRACE_PROBE2(ill__downup, char *, "arp_ll_down", ill_t *, ill);
1782 if ((arl = ill_to_arl(ill)) == NULL)
1783 return (ENXIO);
1784 DTRACE_PROBE2(arl__downup, char *, "arp_ll_down", arl_t *, arl);
1785 mutex_enter(&arl->arl_lock);
1786 unbind_mp = arl_unbind(arl);
1787 if (unbind_mp != NULL) {
1788 ASSERT(arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS);
1789 DTRACE_PROBE2(arp__unbinding, mblk_t *, unbind_mp,
1790 arl_t *, arl);
1791 err = EINPROGRESS;
1792 if (replumb)
1793 arl->arl_state_flags |= ARL_LL_REPLUMBING;
1795 mutex_exit(&arl->arl_lock);
1796 if (unbind_mp != NULL)
1797 arp_dlpi_send(arl, unbind_mp);
1798 arl_refrele(arl);
1799 return (err);
1802 /* ARGSUSED */
1804 arp_close(queue_t *q, int flags)
1806 if (WR(q)->q_next != NULL) {
1807 /* This is a module close */
1808 return (arp_modclose(q->q_ptr));
1810 qprocsoff(q);
1811 q->q_ptr = WR(q)->q_ptr = NULL;
1812 return (0);
1815 static int
1816 arp_modclose(arl_t *arl)
1818 arl_ill_common_t *ai = arl->arl_common;
1819 ill_t *ill;
1820 queue_t *q = arl->arl_rq;
1821 mblk_t *mp, *nextmp;
1822 ipsq_t *ipsq = NULL;
1824 ill = arl_to_ill(arl);
1825 if (ill != NULL) {
1826 if (!ill_waiter_inc(ill)) {
1827 ill_refrele(ill);
1828 } else {
1829 ill_refrele(ill);
1830 if (ipsq_enter(ill, B_FALSE, NEW_OP))
1831 ipsq = ill->ill_phyint->phyint_ipsq;
1832 ill_waiter_dcr(ill);
1834 if (ipsq == NULL) {
1836 * could not enter the ipsq because ill is already
1837 * marked CONDEMNED.
1839 ill = NULL;
1842 if (ai != NULL && ipsq == NULL) {
1844 * Either we did not get an ill because it was marked CONDEMNED
1845 * or we could not enter the ipsq because it was unplumbing.
1846 * In both cases, wait for the ill to complete ip_modclose().
1848 * If the arp_modclose happened even before SLIFNAME, the ai
1849 * itself would be NULL, in which case we can complete the close
1850 * without waiting.
1852 mutex_enter(&ai->ai_lock);
1853 while (ai->ai_ill != NULL)
1854 cv_wait(&ai->ai_ill_unplumb_done, &ai->ai_lock);
1855 mutex_exit(&ai->ai_lock);
1857 ASSERT(ill == NULL || IAM_WRITER_ILL(ill));
1859 mutex_enter(&arl->arl_lock);
1861 * If the ill had completed unplumbing before arp_modclose(), there
1862 * would be no ill (and therefore, no ipsq) to serialize arp_modclose()
1863 * so that we need to explicitly check for ARL_CONDEMNED and back off
1864 * if it is set.
1866 if ((arl->arl_state_flags & ARL_CONDEMNED) != 0) {
1867 mutex_exit(&arl->arl_lock);
1868 ASSERT(ipsq == NULL);
1869 return (0);
1871 arl->arl_state_flags |= ARL_CONDEMNED;
1874 * send out all pending dlpi messages, don't wait for the ack (which
1875 * will be ignored in arp_rput when CONDEMNED is set)
1877 * We have to check for pending DL_UNBIND_REQ because, in the case
1878 * that ip_modclose() executed before arp_modclose(), the call to
1879 * ill_delete_tail->ipif_arp_down() would have triggered a
1880 * DL_UNBIND_REQ. When arp_modclose() executes ipsq_enter() will fail
1881 * (since ip_modclose() is in the ipsq) but the DL_UNBIND_ACK may not
1882 * have been processed yet. In this scenario, we cannot reset
1883 * arl_dlpi_pending, because the setting/clearing of arl_state_flags
1884 * related to unbind, and the associated cv_waits must be allowed to
1885 * continue.
1887 if (arl->arl_dlpi_pending != DL_UNBIND_REQ)
1888 arl->arl_dlpi_pending = DL_PRIM_INVAL;
1889 mp = arl->arl_dlpi_deferred;
1890 arl->arl_dlpi_deferred = NULL;
1891 mutex_exit(&arl->arl_lock);
1893 for (; mp != NULL; mp = nextmp) {
1894 nextmp = mp->b_next;
1895 mp->b_next = NULL;
1896 putnext(arl->arl_wq, mp);
1899 /* Wait for data paths to quiesce */
1900 mutex_enter(&arl->arl_lock);
1901 while (arl->arl_refcnt != 0)
1902 cv_wait(&arl->arl_cv, &arl->arl_lock);
1905 * unbind, so that nothing else can come up from driver.
1907 mp = arl_unbind(arl);
1908 mutex_exit(&arl->arl_lock);
1909 if (mp != NULL)
1910 arp_dlpi_send(arl, mp);
1911 mutex_enter(&arl->arl_lock);
1913 /* wait for unbind ack */
1914 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS)
1915 cv_wait(&arl->arl_cv, &arl->arl_lock);
1916 mutex_exit(&arl->arl_lock);
1918 qprocsoff(q);
1920 if (ill != NULL) {
1921 mutex_enter(&ill->ill_lock);
1922 ill->ill_arl_dlpi_pending = 0;
1923 mutex_exit(&ill->ill_lock);
1926 if (ai != NULL) {
1927 mutex_enter(&ai->ai_lock);
1928 ai->ai_arl = NULL;
1929 if (ai->ai_ill == NULL) {
1930 mutex_destroy(&ai->ai_lock);
1931 kmem_free(ai, sizeof (*ai));
1932 } else {
1933 mutex_exit(&ai->ai_lock);
1937 /* free up the rest */
1938 arp_mod_close_tail(arl);
1940 q->q_ptr = WR(q)->q_ptr = NULL;
1942 if (ipsq != NULL)
1943 ipsq_exit(ipsq);
1945 return (0);
1948 static void
1949 arp_mod_close_tail(arl_t *arl)
1951 ip_stack_t *ipst = arl->arl_ipst;
1952 mblk_t **mpp;
1954 mutex_enter(&ipst->ips_ip_mi_lock);
1955 mi_close_unlink(&ipst->ips_arp_g_head, (IDP)arl);
1956 mutex_exit(&ipst->ips_ip_mi_lock);
1959 * credp could be null if the open didn't succeed and ip_modopen
1960 * itself calls ip_close.
1962 if (arl->arl_credp != NULL)
1963 crfree(arl->arl_credp);
1965 /* Free all retained control messages. */
1966 mpp = &arl->arl_first_mp_to_free;
1967 do {
1968 while (mpp[0]) {
1969 mblk_t *mp;
1970 mblk_t *mp1;
1972 mp = mpp[0];
1973 mpp[0] = mp->b_next;
1974 for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) {
1975 mp1->b_next = NULL;
1976 mp1->b_prev = NULL;
1978 freemsg(mp);
1980 } while (mpp++ != &arl->arl_last_mp_to_free);
1982 netstack_rele(ipst->ips_netstack);
1983 mi_free(arl->arl_name);
1984 mi_close_free((IDP)arl);
1988 * DAD failed. Tear down ipifs with the specified srce address. Note that
1989 * tearing down the ipif also meas deleting the ncec through ipif_down,
1990 * so it is not possible to use nce_timer for recovery. Instead we start
1991 * a timer on the ipif. Caller has to free the mp.
1993 void
1994 arp_failure(mblk_t *mp, ip_recv_attr_t *ira)
1996 ill_t *ill = ira->ira_ill;
1998 if ((mp = copymsg(mp)) != NULL) {
1999 ill_refhold(ill);
2000 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE);
2005 * This is for exclusive changes due to ARP. Tear down an interface due
2006 * to AR_CN_FAILED and AR_CN_BOGON.
2008 /* ARGSUSED */
2009 static void
2010 arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
2012 ill_t *ill = rq->q_ptr;
2013 arh_t *arh;
2014 ipaddr_t src;
2015 ipif_t *ipif;
2016 ip_stack_t *ipst = ill->ill_ipst;
2017 uchar_t *haddr;
2018 uint_t haddrlen;
2020 /* first try src = ar$spa */
2021 arh = (arh_t *)mp->b_rptr;
2022 bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN);
2024 haddrlen = arh->arh_hlen;
2025 haddr = (uint8_t *)(arh + 1);
2027 if (haddrlen == ill->ill_phys_addr_length) {
2029 * Ignore conflicts generated by misbehaving switches that
2030 * just reflect our own messages back to us. For IPMP, we may
2031 * see reflections across any ill in the illgrp.
2033 /* For an under ill_grp can change under lock */
2034 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2035 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
2036 IS_UNDER_IPMP(ill) && ill->ill_grp != NULL &&
2037 ipmp_illgrp_find_ill(ill->ill_grp, haddr,
2038 haddrlen) != NULL) {
2039 rw_exit(&ipst->ips_ill_g_lock);
2040 goto ignore_conflict;
2042 rw_exit(&ipst->ips_ill_g_lock);
2046 * Look up the appropriate ipif.
2048 ipif = ipif_lookup_addr(src, ill, ALL_ZONES, ipst);
2049 if (ipif == NULL)
2050 goto ignore_conflict;
2052 /* Reload the ill to match the ipif */
2053 ill = ipif->ipif_ill;
2055 /* If it's already duplicate or ineligible, then don't do anything. */
2056 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) {
2057 ipif_refrele(ipif);
2058 goto ignore_conflict;
2062 * If we failed on a recovery probe, then restart the timer to
2063 * try again later.
2065 if (!ipif->ipif_was_dup) {
2066 char hbuf[MAC_STR_LEN];
2067 char sbuf[INET_ADDRSTRLEN];
2068 char ibuf[LIFNAMSIZ];
2070 (void) mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf));
2071 (void) ip_dot_addr(src, sbuf);
2072 ipif_get_name(ipif, ibuf, sizeof (ibuf));
2074 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);"
2075 " disabled", ibuf, sbuf, hbuf);
2077 mutex_enter(&ill->ill_lock);
2078 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
2079 ipif->ipif_flags |= IPIF_DUPLICATE;
2080 ill->ill_ipif_dup_count++;
2081 mutex_exit(&ill->ill_lock);
2082 (void) ipif_down(ipif, NULL, NULL);
2083 (void) ipif_down_tail(ipif);
2084 mutex_enter(&ill->ill_lock);
2085 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
2086 ill->ill_net_type == IRE_IF_RESOLVER &&
2087 !(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
2088 ipst->ips_ip_dup_recovery > 0) {
2089 ASSERT(ipif->ipif_recovery_id == 0);
2090 ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
2091 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
2093 mutex_exit(&ill->ill_lock);
2094 ipif_refrele(ipif);
2096 ignore_conflict:
2097 freemsg(mp);
2101 * This is a place for a dtrace hook.
2102 * Note that mp can be either the DL_UNITDATA_IND with a b_cont payload,
2103 * or just the ARP packet payload as an M_DATA.
2105 /* ARGSUSED */
2106 static void
2107 arp_drop_packet(const char *str, mblk_t *mp, ill_t *ill)
2109 freemsg(mp);
2112 static boolean_t
2113 arp_over_driver(queue_t *q)
2115 queue_t *qnext = STREAM(q)->sd_wrq->q_next;
2118 * check if first module below stream head is IP or UDP.
2120 ASSERT(qnext != NULL);
2121 if (strcmp(Q2NAME(qnext), "ip") != 0 &&
2122 strcmp(Q2NAME(qnext), "udp") != 0) {
2124 * module below is not ip or udp, so arp has been pushed
2125 * on the driver.
2127 return (B_TRUE);
2129 return (B_FALSE);
2132 static int
2133 arp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
2135 int err;
2137 ASSERT(sflag & MODOPEN);
2138 if (!arp_over_driver(q)) {
2139 q->q_qinfo = dummymodinfo.st_rdinit;
2140 WR(q)->q_qinfo = dummymodinfo.st_wrinit;
2141 return ((*dummymodinfo.st_rdinit->qi_qopen)(q, devp, flag,
2142 sflag, credp));
2144 err = arp_modopen(q, devp, flag, sflag, credp);
2145 return (err);
2149 * In most cases we must be a writer on the IP stream before coming to
2150 * arp_dlpi_send(), to serialize DLPI sends to the driver. The exceptions
2151 * when we are not a writer are very early duing initialization (in
2152 * arl_init, before the arl has done a SLIFNAME, so that we don't yet know
2153 * the associated ill) or during arp_mod_close, when we could not enter the
2154 * ipsq because the ill has already unplumbed.
2156 static void
2157 arp_dlpi_send(arl_t *arl, mblk_t *mp)
2159 mblk_t **mpp;
2160 t_uscalar_t prim;
2161 arl_ill_common_t *ai;
2163 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
2165 #ifdef DEBUG
2166 ai = arl->arl_common;
2167 if (ai != NULL) {
2168 mutex_enter(&ai->ai_lock);
2169 if (ai->ai_ill != NULL)
2170 ASSERT(IAM_WRITER_ILL(ai->ai_ill));
2171 mutex_exit(&ai->ai_lock);
2173 #endif /* DEBUG */
2175 mutex_enter(&arl->arl_lock);
2176 if (arl->arl_dlpi_pending != DL_PRIM_INVAL) {
2177 /* Must queue message. Tail insertion */
2178 mpp = &arl->arl_dlpi_deferred;
2179 while (*mpp != NULL)
2180 mpp = &((*mpp)->b_next);
2182 *mpp = mp;
2183 mutex_exit(&arl->arl_lock);
2184 return;
2186 mutex_exit(&arl->arl_lock);
2187 if ((prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive)
2188 == DL_BIND_REQ) {
2189 ASSERT((arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) == 0);
2192 * No need to take the arl_lock to examine ARL_CONDEMNED at this point
2193 * because the only thread that can see ARL_CONDEMNED here is the
2194 * closing arp_modclose() thread which sets the flag after becoming a
2195 * writer on the ipsq. Threads from IP must have finished and
2196 * cannot be active now.
2198 if (!(arl->arl_state_flags & ARL_CONDEMNED) ||
2199 (prim == DL_UNBIND_REQ)) {
2200 if (prim != DL_NOTIFY_CONF) {
2201 ill_t *ill = arl_to_ill(arl);
2203 arl->arl_dlpi_pending = prim;
2204 if (ill != NULL) {
2205 mutex_enter(&ill->ill_lock);
2206 ill->ill_arl_dlpi_pending = 1;
2207 mutex_exit(&ill->ill_lock);
2208 ill_refrele(ill);
2212 DTRACE_PROBE4(arl__dlpi, char *, "arp_dlpi_send",
2213 char *, dl_primstr(prim), char *, "-", arl_t *, arl);
2214 putnext(arl->arl_wq, mp);
2217 static void
2218 arl_defaults_common(arl_t *arl, mblk_t *mp)
2220 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr;
2222 * Till the ill is fully up the ill is not globally visible.
2223 * So no need for a lock.
2225 arl->arl_mactype = dlia->dl_mac_type;
2226 arl->arl_sap_length = dlia->dl_sap_length;
2228 if (!arl->arl_dlpi_style_set) {
2229 if (dlia->dl_provider_style == DL_STYLE2)
2230 arl->arl_needs_attach = 1;
2231 mutex_enter(&arl->arl_lock);
2232 ASSERT(arl->arl_dlpi_style_set == 0);
2233 arl->arl_dlpi_style_set = 1;
2234 arl->arl_state_flags &= ~ARL_LL_SUBNET_PENDING;
2235 cv_broadcast(&arl->arl_cv);
2236 mutex_exit(&arl->arl_lock);
2241 arl_init(queue_t *q, arl_t *arl)
2243 mblk_t *info_mp;
2244 dl_info_req_t *dlir;
2246 /* subset of ill_init */
2247 mutex_init(&arl->arl_lock, NULL, MUTEX_DEFAULT, 0);
2249 arl->arl_rq = q;
2250 arl->arl_wq = WR(q);
2252 info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
2253 BPRI_HI);
2254 if (info_mp == NULL)
2255 return (ENOMEM);
2257 * allocate sufficient space to contain device name.
2259 arl->arl_name = (char *)(mi_zalloc(2 * LIFNAMSIZ));
2260 arl->arl_ppa = UINT_MAX;
2261 arl->arl_state_flags |= (ARL_LL_SUBNET_PENDING | ARL_LL_UNBOUND);
2263 /* Send down the Info Request to the driver. */
2264 info_mp->b_datap->db_type = M_PCPROTO;
2265 dlir = (dl_info_req_t *)info_mp->b_rptr;
2266 info_mp->b_wptr = (uchar_t *)&dlir[1];
2267 dlir->dl_primitive = DL_INFO_REQ;
2268 arl->arl_dlpi_pending = DL_PRIM_INVAL;
2269 qprocson(q);
2271 arp_dlpi_send(arl, info_mp);
2272 return (0);
2276 arl_wait_for_info_ack(arl_t *arl)
2278 int err;
2280 mutex_enter(&arl->arl_lock);
2281 while (arl->arl_state_flags & ARL_LL_SUBNET_PENDING) {
2283 * Return value of 0 indicates a pending signal.
2285 err = cv_wait_sig(&arl->arl_cv, &arl->arl_lock);
2286 if (err == 0) {
2287 mutex_exit(&arl->arl_lock);
2288 return (EINTR);
2291 mutex_exit(&arl->arl_lock);
2293 * ip_rput_other could have set an error in ill_error on
2294 * receipt of M_ERROR.
2296 return (arl->arl_error);
2299 void
2300 arl_set_muxid(ill_t *ill, int muxid)
2302 arl_t *arl;
2304 arl = ill_to_arl(ill);
2305 if (arl != NULL) {
2306 arl->arl_muxid = muxid;
2307 arl_refrele(arl);
2312 arl_get_muxid(ill_t *ill)
2314 arl_t *arl;
2315 int muxid = 0;
2317 arl = ill_to_arl(ill);
2318 if (arl != NULL) {
2319 muxid = arl->arl_muxid;
2320 arl_refrele(arl);
2322 return (muxid);
2325 static int
2326 arp_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
2328 int err;
2329 zoneid_t zoneid;
2330 netstack_t *ns;
2331 ip_stack_t *ipst;
2332 arl_t *arl = NULL;
2335 * Prevent unprivileged processes from pushing IP so that
2336 * they can't send raw IP.
2338 if (secpolicy_net_rawaccess(credp) != 0)
2339 return (EPERM);
2341 ns = netstack_find_by_cred(credp);
2342 ASSERT(ns != NULL);
2343 ipst = ns->netstack_ip;
2344 ASSERT(ipst != NULL);
2347 * For exclusive stacks we set the zoneid to zero
2348 * to make IP operate as if in the global zone.
2350 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
2351 zoneid = GLOBAL_ZONEID;
2352 else
2353 zoneid = crgetzoneid(credp);
2355 arl = (arl_t *)mi_open_alloc_sleep(sizeof (arl_t));
2356 q->q_ptr = WR(q)->q_ptr = arl;
2357 arl->arl_ipst = ipst;
2358 arl->arl_zoneid = zoneid;
2359 err = arl_init(q, arl);
2361 if (err != 0) {
2362 mi_free(arl->arl_name);
2363 mi_free(arl);
2364 netstack_rele(ipst->ips_netstack);
2365 q->q_ptr = NULL;
2366 WR(q)->q_ptr = NULL;
2367 return (err);
2371 * Wait for the DL_INFO_ACK if a DL_INFO_REQ was sent.
2373 err = arl_wait_for_info_ack(arl);
2374 if (err == 0)
2375 arl->arl_credp = credp;
2376 else
2377 goto fail;
2379 crhold(credp);
2381 mutex_enter(&ipst->ips_ip_mi_lock);
2382 err = mi_open_link(&ipst->ips_arp_g_head, (IDP)q->q_ptr, devp, flag,
2383 sflag, credp);
2384 mutex_exit(&ipst->ips_ip_mi_lock);
2385 fail:
2386 if (err) {
2387 (void) arp_close(q, 0);
2388 return (err);
2390 return (0);
2394 * Notify any downstream modules (esp softmac and hitbox) of the name
2395 * of this interface using an M_CTL.
2397 static void
2398 arp_ifname_notify(arl_t *arl)
2400 mblk_t *mp1, *mp2;
2401 struct iocblk *iocp;
2402 struct lifreq *lifr;
2404 if ((mp1 = mkiocb(SIOCSLIFNAME)) == NULL)
2405 return;
2406 if ((mp2 = allocb(sizeof (struct lifreq), BPRI_HI)) == NULL) {
2407 freemsg(mp1);
2408 return;
2411 lifr = (struct lifreq *)mp2->b_rptr;
2412 mp2->b_wptr += sizeof (struct lifreq);
2413 bzero(lifr, sizeof (struct lifreq));
2415 (void) strncpy(lifr->lifr_name, arl->arl_name, LIFNAMSIZ);
2416 lifr->lifr_ppa = arl->arl_ppa;
2417 lifr->lifr_flags = ILLF_IPV4;
2419 /* Use M_CTL to avoid confusing anyone else who might be listening. */
2420 DB_TYPE(mp1) = M_CTL;
2421 mp1->b_cont = mp2;
2422 iocp = (struct iocblk *)mp1->b_rptr;
2423 iocp->ioc_count = msgsize(mp1->b_cont);
2424 DTRACE_PROBE4(arl__dlpi, char *, "arp_ifname_notify",
2425 char *, "SIOCSLIFNAME", char *, "-", arl_t *, arl);
2426 putnext(arl->arl_wq, mp1);
2429 void
2430 arp_send_replumb_conf(ill_t *ill)
2432 mblk_t *mp;
2433 arl_t *arl = ill_to_arl(ill);
2435 if (arl == NULL)
2436 return;
2438 * arl_got_replumb and arl_got_unbind to be cleared after we complete
2439 * arp_cmd_done.
2441 mp = mexchange(NULL, NULL, sizeof (dl_notify_conf_t), M_PROTO,
2442 DL_NOTIFY_CONF);
2443 ((dl_notify_conf_t *)(mp->b_rptr))->dl_notification =
2444 DL_NOTE_REPLUMB_DONE;
2445 arp_dlpi_send(arl, mp);
2446 mutex_enter(&arl->arl_lock);
2447 arl->arl_state_flags &= ~ARL_LL_REPLUMBING;
2448 mutex_exit(&arl->arl_lock);
2449 arl_refrele(arl);
2453 * The unplumb code paths call arp_unbind_complete() to make sure that it is
2454 * safe to tear down the ill. We wait for DL_UNBIND_ACK to complete, and also
2455 * for the arl_refcnt to fall to one so that, when we return from
2456 * arp_unbind_complete(), we know for certain that there are no threads in
2457 * arp_rput() that might access the arl_ill.
2459 void
2460 arp_unbind_complete(ill_t *ill)
2462 arl_t *arl = ill_to_arl(ill);
2464 if (arl == NULL)
2465 return;
2466 mutex_enter(&arl->arl_lock);
2468 * wait for unbind ack and arl_refcnt to drop to 1. Note that the
2469 * quiescent arl_refcnt for this function is 1 (and not 0) because
2470 * ill_to_arl() will itself return after taking a ref on the arl_t.
2472 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS)
2473 cv_wait(&arl->arl_cv, &arl->arl_lock);
2474 while (arl->arl_refcnt != 1)
2475 cv_wait(&arl->arl_cv, &arl->arl_lock);
2476 mutex_exit(&arl->arl_lock);
2477 arl_refrele(arl);