2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * This file is part of the Chelsio T4 support code.
15 * Copyright (C) 2010-2013 Chelsio Communications. All rights reserved.
17 * This program is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this
20 * release for licensing terms and conditions.
24 #include <sys/sunddi.h>
25 #include <sys/sunndi.h>
26 #include <sys/atomic.h>
28 #include <sys/pattr.h>
29 #include <sys/strsubr.h>
30 #include <sys/stream.h>
31 #include <sys/strsun.h>
32 #include <sys/ethernet.h>
34 #include <inet/ipclassifier.h>
37 #include "common/common.h"
38 #include "common/t4_msg.h"
39 #include "common/t4_regs.h"
40 #include "common/t4_regs_values.h"
43 /* identifies sync vs async L2T_WRITE_REQs */
45 #define V_SYNC_WR(x) ((x) << S_SYNC_WR)
46 #define F_SYNC_WR V_SYNC_WR(1)
47 #define VLAN_NONE 0xfff
50 * jhash.h: Jenkins hash support.
52 * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
54 * http://burtleburtle.net/bob/hash/
56 * These are the credits from Bob's sources:
58 * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
59 * hash(), hash2(), hash3, and mix() are externally useful functions.
60 * Routines to test the hash are included if SELF_TEST is defined.
61 * You can use this free for any purpose. It has no warranty.
64 /* NOTE: Arguments are modified. */
65 #define __jhash_mix(a, b, c) \
67 a -= b; a -= c; a ^= (c>>13); \
68 b -= c; b -= a; b ^= (a<<8); \
69 c -= a; c -= b; c ^= (b>>13); \
70 a -= b; a -= c; a ^= (c>>12); \
71 b -= c; b -= a; b ^= (a<<16); \
72 c -= a; c -= b; c ^= (b>>5); \
73 a -= b; a -= c; a ^= (c>>3); \
74 b -= c; b -= a; b ^= (a<<10); \
75 c -= a; c -= b; c ^= (b>>15); \
78 /* The golden ration: an arbitrary value */
79 #define JHASH_GOLDEN_RATIO 0x9e3779b9
82 * A special ultra-optimized versions that knows they are hashing exactly
85 * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
86 * done at the end is not done here.
89 jhash_3words(u32 a
, u32 b
, u32 c
, u32 initval
)
91 a
+= JHASH_GOLDEN_RATIO
;
92 b
+= JHASH_GOLDEN_RATIO
;
101 jhash_2words(u32 a
, u32 b
, u32 initval
)
103 return (jhash_3words(a
, b
, 0, initval
));
107 #define container_of(p, s, f) ((s *)(((uint8_t *)(p)) - offsetof(s, f)))
110 #if defined(__GNUC__)
111 #define likely(x) __builtin_expect((x), 1)
112 #define unlikely(x) __builtin_expect((x), 0)
114 #define likely(x) (x)
115 #define unlikely(x) (x)
116 #endif /* defined(__GNUC__) */
119 L2T_STATE_VALID
, /* entry is up to date */
120 L2T_STATE_STALE
, /* entry may be used but needs revalidation */
121 L2T_STATE_RESOLVING
, /* entry needs address resolution */
122 L2T_STATE_SYNC_WRITE
, /* synchronous write of entry underway */
124 /* when state is one of the below the entry is not hashed */
125 L2T_STATE_SWITCHING
, /* entry is being used by a switching filter */
126 L2T_STATE_UNUSED
/* entry not in use */
131 volatile uint_t nfree
; /* number of free entries */
132 struct l2t_entry
*rover
; /* starting point for next allocation */
133 struct l2t_entry l2tab
[L2T_SIZE
];
136 #define VLAN_NONE 0xfff
137 #define SA(x) ((struct sockaddr *)(x))
138 #define SIN(x) ((struct sockaddr_in *)(x))
139 #define SINADDR(x) (SIN(x)->sin_addr.s_addr)
140 #define atomic_read(x) atomic_add_int_nv(x, 0)
142 * Allocate a free L2T entry.
143 * Must be called with l2t_data.lockatomic_load_acq_int held.
145 static struct l2t_entry
*
146 alloc_l2e(struct l2t_data
*d
)
148 struct l2t_entry
*end
, *e
, **p
;
150 ASSERT(rw_write_held(&d
->lock
));
152 if (!atomic_read(&d
->nfree
))
155 /* there's definitely a free entry */
156 for (e
= d
->rover
, end
= &d
->l2tab
[L2T_SIZE
]; e
!= end
; ++e
)
157 if (atomic_read(&e
->refcnt
) == 0)
160 for (e
= d
->l2tab
; atomic_read(&e
->refcnt
); ++e
)
164 atomic_dec_uint(&d
->nfree
);
167 * The entry we found may be an inactive entry that is
168 * presently in the hash table. We need to remove it.
170 if (e
->state
< L2T_STATE_SWITCHING
) {
171 for (p
= &d
->l2tab
[e
->hash
].first
; *p
; p
= &(*p
)->next
) {
180 e
->state
= L2T_STATE_UNUSED
;
185 * Write an L2T entry. Must be called with the entry locked.
186 * The write may be synchronous or asynchronous.
189 write_l2e(adapter_t
*sc
, struct l2t_entry
*e
, int sync
)
192 struct cpl_l2t_write_req
*req
;
194 ASSERT(MUTEX_HELD(&e
->lock
));
196 if ((m
= allocb(sizeof (*req
), BPRI_HI
)) == NULL
)
199 /* LINTED: E_BAD_PTR_CAST_ALIGN */
200 req
= (struct cpl_l2t_write_req
*)m
->b_wptr
;
202 /* LINTED: E_CONSTANT_CONDITION */
204 OPCODE_TID(req
) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ
, e
->idx
|
205 V_SYNC_WR(sync
) | V_TID_QID(sc
->sge
.fwq
.abs_id
)));
206 req
->params
= htons(V_L2T_W_PORT(e
->lport
) | V_L2T_W_NOREPLY(!sync
));
207 req
->l2t_idx
= htons(e
->idx
);
208 req
->vlan
= htons(e
->vlan
);
209 (void) memcpy(req
->dst_mac
, e
->dmac
, sizeof (req
->dst_mac
));
211 m
->b_wptr
+= sizeof (*req
);
213 (void) t4_mgmt_tx(sc
, m
);
215 if (sync
&& e
->state
!= L2T_STATE_SWITCHING
)
216 e
->state
= L2T_STATE_SYNC_WRITE
;
222 t4_init_l2t(struct adapter
*sc
)
227 d
= kmem_zalloc(sizeof (*d
), KM_SLEEP
);
230 (void) atomic_swap_uint(&d
->nfree
, L2T_SIZE
);
231 rw_init(&d
->lock
, NULL
, RW_DRIVER
, NULL
);
233 for (i
= 0; i
< L2T_SIZE
; i
++) {
234 /* LINTED: E_ASSIGN_NARROW_CONV */
236 d
->l2tab
[i
].state
= L2T_STATE_UNUSED
;
237 mutex_init(&d
->l2tab
[i
].lock
, NULL
, MUTEX_DRIVER
, NULL
);
238 (void) atomic_swap_uint(&d
->l2tab
[i
].refcnt
, 0);
241 (void) t4_register_cpl_handler(sc
, CPL_L2T_WRITE_RPL
, do_l2t_write_rpl
);
247 t4_free_l2t(struct l2t_data
*d
)
251 for (i
= 0; i
< L2T_SIZE
; i
++)
252 mutex_destroy(&d
->l2tab
[i
].lock
);
253 rw_destroy(&d
->lock
);
254 kmem_free(d
, sizeof (*d
));
259 #ifndef TCP_OFFLOAD_DISABLE
261 l2t_hold(struct l2t_data
*d
, struct l2t_entry
*e
)
263 if (atomic_inc_uint_nv(&e
->refcnt
) == 1) /* 0 -> 1 transition */
264 atomic_dec_uint(&d
->nfree
);
268 * To avoid having to check address families we do not allow v4 and v6
269 * neighbors to be on the same hash chain. We keep v4 entries in the first
270 * half of available hash buckets and v6 in the second.
273 L2T_SZ_HALF
= L2T_SIZE
/ 2,
274 L2T_HASH_MASK
= L2T_SZ_HALF
- 1
277 static inline unsigned int
278 arp_hash(const uint32_t *key
, int ifindex
)
280 return (jhash_2words(*key
, ifindex
, 0) & L2T_HASH_MASK
);
283 static inline unsigned int
284 ipv6_hash(const uint32_t *key
, int ifindex
)
286 uint32_t xor = key
[0] ^ key
[1] ^ key
[2] ^ key
[3];
288 return (L2T_SZ_HALF
+ (jhash_2words(xor, ifindex
, 0) & L2T_HASH_MASK
));
291 static inline unsigned int
292 addr_hash(const uint32_t *addr
, int addr_len
, int ifindex
)
294 return (addr_len
== 4 ? arp_hash(addr
, ifindex
) :
295 ipv6_hash(addr
, ifindex
));
299 * Checks if an L2T entry is for the given IP/IPv6 address. It does not check
300 * whether the L2T entry and the address are of the same address family.
301 * Callers ensure an address is only checked against L2T entries of the same
302 * family, something made trivial by the separation of IP and IPv6 hash chains
303 * mentioned above. Returns 0 if there's a match,
306 addreq(const struct l2t_entry
*e
, const uint32_t *addr
)
309 return ((e
->addr
[0] ^ addr
[0]) | (e
->addr
[1] ^ addr
[1]) |
310 (e
->addr
[2] ^ addr
[2]) | (e
->addr
[3] ^ addr
[3]));
311 return (e
->addr
[0] ^ addr
[0]);
315 * Add a packet to an L2T entry's queue of packets awaiting resolution.
316 * Must be called with the entry's lock held.
319 arpq_enqueue(struct l2t_entry
*e
, mblk_t
*m
)
321 ASSERT(MUTEX_HELD(&e
->lock
));
323 ASSERT(m
->b_next
== NULL
);
324 if (e
->arpq_head
!= NULL
)
325 e
->arpq_tail
->b_next
= m
;
332 send_pending(struct adapter
*sc
, struct l2t_entry
*e
)
336 ASSERT(MUTEX_HELD(&e
->lock
));
338 for (m
= e
->arpq_head
; m
; m
= next
) {
341 (void) t4_wrq_tx(sc
, MBUF_EQ(m
), m
);
343 e
->arpq_head
= e
->arpq_tail
= NULL
;
347 t4_l2t_send(struct adapter
*sc
, mblk_t
*m
, struct l2t_entry
*e
)
356 case L2T_STATE_STALE
: /* entry is stale, kick off revalidation */
359 case L2T_STATE_VALID
: /* fast-path, send the packet on */
360 (void) t4_wrq_tx(sc
, MBUF_EQ(m
), m
);
363 case L2T_STATE_RESOLVING
:
364 case L2T_STATE_SYNC_WRITE
:
365 mutex_enter(&e
->lock
);
366 if (e
->state
!= L2T_STATE_SYNC_WRITE
&&
367 e
->state
!= L2T_STATE_RESOLVING
) {
368 /* state changed by the time we got here */
369 mutex_exit(&e
->lock
);
373 mutex_exit(&e
->lock
);
375 bzero(&ip2m
, sizeof (ip2m
));
376 sin
= (sin_t
*)&ip2m
.ip2mac_pa
;
377 sin
->sin_family
= AF_INET
;
378 sin
->sin_addr
.s_addr
= e
->in_addr
;
379 ip2m
.ip2mac_ifindex
= e
->ifindex
;
381 if (e
->state
== L2T_STATE_RESOLVING
) {
382 (void) ip2mac(IP2MAC_RESOLVE
, &ip2m
, t4_l2t_update
, e
,
384 if (ip2m
.ip2mac_err
== EINPROGRESS
)
386 else if (ip2m
.ip2mac_err
== 0)
387 t4_l2t_update(&ip2m
, e
);
397 * Called when an L2T entry has no more users. The entry is left in the hash
398 * table since it is likely to be reused but we also bump nfree to indicate
399 * that the entry can be reallocated for a different neighbor. We also drop
400 * the existing neighbor reference in case the neighbor is going away and is
401 * waiting on our reference.
403 * Because entries can be reallocated to other neighbors once their ref count
404 * drops to 0 we need to take the entry's lock to avoid races with a new
408 t4_l2e_free(struct l2t_entry
*e
)
412 mutex_enter(&e
->lock
);
413 /* LINTED: E_NOP_IF_STMT */
414 if (atomic_read(&e
->refcnt
) == 0) { /* hasn't been recycled */
416 * Don't need to worry about the arpq, an L2T entry can't be
417 * released if any packets are waiting for resolution as we
418 * need to be able to communicate with the device to close a
422 mutex_exit(&e
->lock
);
424 d
= container_of(e
, struct l2t_data
, l2tab
[e
->idx
]);
425 atomic_inc_uint(&d
->nfree
);
430 t4_l2t_release(struct l2t_entry
*e
)
432 if (atomic_dec_uint_nv(&e
->refcnt
) == 0)
438 do_l2t_write_rpl(struct sge_iq
*iq
, const struct rss_header
*rss
, mblk_t
*m
)
440 struct adapter
*sc
= iq
->adapter
;
441 const struct cpl_l2t_write_rpl
*rpl
= (const void *)(rss
+ 1);
442 unsigned int tid
= GET_TID(rpl
);
443 unsigned int idx
= tid
& (L2T_SIZE
- 1);
445 if (likely(rpl
->status
!= CPL_ERR_NONE
)) {
446 cxgb_printf(sc
->dip
, CE_WARN
,
447 "Unexpected L2T_WRITE_RPL status %u for entry %u",
452 if (tid
& F_SYNC_WR
) {
453 struct l2t_entry
*e
= &sc
->l2t
->l2tab
[idx
];
455 mutex_enter(&e
->lock
);
456 if (e
->state
!= L2T_STATE_SWITCHING
) {
458 e
->state
= L2T_STATE_VALID
;
460 mutex_exit(&e
->lock
);
467 * The TOE wants an L2 table entry that it can use to reach the next hop over
468 * the specified port. Produce such an entry - create one if needed.
470 * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
471 * top of the real cxgbe interface.
474 t4_l2t_get(struct port_info
*pi
, conn_t
*connp
)
477 struct l2t_data
*d
= pi
->adapter
->l2t
;
482 connp
->conn_ixa
->ixa_ire
->ire_ill
->ill_phyint
->phyint_ifindex
;
483 unsigned int smt_idx
= pi
->port_id
;
484 addr
= (uint32_t *)&connp
->conn_faddr_v4
;
485 addr_len
= sizeof (connp
->conn_faddr_v4
);
487 hash
= addr_hash(addr
, addr_len
, index
);
489 rw_enter(&d
->lock
, RW_WRITER
);
490 for (e
= d
->l2tab
[hash
].first
; e
; e
= e
->next
) {
491 if (!addreq(e
, addr
) && e
->smt_idx
== smt_idx
) {
497 /* Need to allocate a new entry */
500 mutex_enter(&e
->lock
); /* avoid race with t4_l2t_free */
501 e
->state
= L2T_STATE_RESOLVING
;
502 (void) memcpy(e
->addr
, addr
, addr_len
);
503 e
->in_addr
= connp
->conn_faddr_v4
;
505 /* LINTED: E_ASSIGN_NARROW_CONV */
506 e
->smt_idx
= smt_idx
;
507 /* LINTED: E_ASSIGN_NARROW_CONV */
509 e
->lport
= pi
->lport
;
510 e
->arpq_head
= e
->arpq_tail
= NULL
;
511 e
->v6
= (addr_len
== 16);
513 (void) atomic_swap_uint(&e
->refcnt
, 1);
515 e
->next
= d
->l2tab
[hash
].first
;
516 d
->l2tab
[hash
].first
= e
;
517 mutex_exit(&e
->lock
);
528 * Called when the host's neighbor layer makes a change to some entry that is
529 * loaded into the HW L2 table.
532 t4_l2t_update(ip2mac_t
*ip2macp
, void *arg
)
534 struct l2t_entry
*e
= (struct l2t_entry
*)arg
;
535 struct adapter
*sc
= e
->sc
;
538 if (ip2macp
->ip2mac_err
!= 0) {
539 ASSERT(0); /* Don't know what to do. Needs to be investigated */
542 mutex_enter(&e
->lock
);
543 if (atomic_read(&e
->refcnt
) != 0)
545 e
->state
= L2T_STATE_STALE
;
546 mutex_exit(&e
->lock
);
548 /* The TOE has no interest in this LLE */
552 if (atomic_read(&e
->refcnt
) != 0) {
554 /* Entry is referenced by at least 1 offloaded connection. */
556 cp
= (uchar_t
*)LLADDR(&ip2macp
->ip2mac_ha
);
557 bcopy(cp
, e
->dmac
, 6);
558 (void) write_l2e(sc
, e
, 1);
559 e
->state
= L2T_STATE_VALID
;
562 mutex_exit(&e
->lock
);