2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/skbuff.h>
36 #include <linux/netdevice.h>
38 #include <linux/if_vlan.h>
39 #include <linux/jhash.h>
40 #include <net/neighbour.h>
46 #define VLAN_NONE 0xfff
48 /* identifies sync vs async L2T_WRITE_REQs */
49 #define F_SYNC_WR (1 << 12)
52 L2T_STATE_VALID
, /* entry is up to date */
53 L2T_STATE_STALE
, /* entry may be used but needs revalidation */
54 L2T_STATE_RESOLVING
, /* entry needs address resolution */
55 L2T_STATE_SYNC_WRITE
, /* synchronous write of entry underway */
57 /* when state is one of the below the entry is not hashed */
58 L2T_STATE_SWITCHING
, /* entry is being used by a switching filter */
59 L2T_STATE_UNUSED
/* entry not in use */
64 atomic_t nfree
; /* number of free entries */
65 struct l2t_entry
*rover
; /* starting point for next allocation */
66 struct l2t_entry l2tab
[L2T_SIZE
];
69 static inline unsigned int vlan_prio(const struct l2t_entry
*e
)
74 static inline void l2t_hold(struct l2t_data
*d
, struct l2t_entry
*e
)
76 if (atomic_add_return(1, &e
->refcnt
) == 1) /* 0 -> 1 transition */
77 atomic_dec(&d
->nfree
);
81 * To avoid having to check address families we do not allow v4 and v6
82 * neighbors to be on the same hash chain. We keep v4 entries in the first
83 * half of available hash buckets and v6 in the second.
86 L2T_SZ_HALF
= L2T_SIZE
/ 2,
87 L2T_HASH_MASK
= L2T_SZ_HALF
- 1
90 static inline unsigned int arp_hash(const u32
*key
, int ifindex
)
92 return jhash_2words(*key
, ifindex
, 0) & L2T_HASH_MASK
;
95 static inline unsigned int ipv6_hash(const u32
*key
, int ifindex
)
97 u32
xor = key
[0] ^ key
[1] ^ key
[2] ^ key
[3];
99 return L2T_SZ_HALF
+ (jhash_2words(xor, ifindex
, 0) & L2T_HASH_MASK
);
102 static unsigned int addr_hash(const u32
*addr
, int addr_len
, int ifindex
)
104 return addr_len
== 4 ? arp_hash(addr
, ifindex
) :
105 ipv6_hash(addr
, ifindex
);
109 * Checks if an L2T entry is for the given IP/IPv6 address. It does not check
110 * whether the L2T entry and the address are of the same address family.
111 * Callers ensure an address is only checked against L2T entries of the same
112 * family, something made trivial by the separation of IP and IPv6 hash chains
113 * mentioned above. Returns 0 if there's a match,
115 static int addreq(const struct l2t_entry
*e
, const u32
*addr
)
118 return (e
->addr
[0] ^ addr
[0]) | (e
->addr
[1] ^ addr
[1]) |
119 (e
->addr
[2] ^ addr
[2]) | (e
->addr
[3] ^ addr
[3]);
120 return e
->addr
[0] ^ addr
[0];
123 static void neigh_replace(struct l2t_entry
*e
, struct neighbour
*n
)
127 neigh_release(e
->neigh
);
132 * Write an L2T entry. Must be called with the entry locked.
133 * The write may be synchronous or asynchronous.
135 static int write_l2e(struct adapter
*adap
, struct l2t_entry
*e
, int sync
)
138 struct cpl_l2t_write_req
*req
;
140 skb
= alloc_skb(sizeof(*req
), GFP_ATOMIC
);
144 req
= (struct cpl_l2t_write_req
*)__skb_put(skb
, sizeof(*req
));
147 OPCODE_TID(req
) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ
,
148 e
->idx
| (sync
? F_SYNC_WR
: 0) |
149 TID_QID(adap
->sge
.fw_evtq
.abs_id
)));
150 req
->params
= htons(L2T_W_PORT(e
->lport
) | L2T_W_NOREPLY(!sync
));
151 req
->l2t_idx
= htons(e
->idx
);
152 req
->vlan
= htons(e
->vlan
);
154 memcpy(e
->dmac
, e
->neigh
->ha
, sizeof(e
->dmac
));
155 memcpy(req
->dst_mac
, e
->dmac
, sizeof(req
->dst_mac
));
157 set_wr_txq(skb
, CPL_PRIORITY_CONTROL
, 0);
158 t4_ofld_send(adap
, skb
);
160 if (sync
&& e
->state
!= L2T_STATE_SWITCHING
)
161 e
->state
= L2T_STATE_SYNC_WRITE
;
166 * Send packets waiting in an L2T entry's ARP queue. Must be called with the
169 static void send_pending(struct adapter
*adap
, struct l2t_entry
*e
)
171 while (e
->arpq_head
) {
172 struct sk_buff
*skb
= e
->arpq_head
;
174 e
->arpq_head
= skb
->next
;
176 t4_ofld_send(adap
, skb
);
182 * Process a CPL_L2T_WRITE_RPL. Wake up the ARP queue if it completes a
183 * synchronous L2T_WRITE. Note that the TID in the reply is really the L2T
184 * index it refers to.
186 void do_l2t_write_rpl(struct adapter
*adap
, const struct cpl_l2t_write_rpl
*rpl
)
188 unsigned int tid
= GET_TID(rpl
);
189 unsigned int idx
= tid
& (L2T_SIZE
- 1);
191 if (unlikely(rpl
->status
!= CPL_ERR_NONE
)) {
192 dev_err(adap
->pdev_dev
,
193 "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
198 if (tid
& F_SYNC_WR
) {
199 struct l2t_entry
*e
= &adap
->l2t
->l2tab
[idx
];
202 if (e
->state
!= L2T_STATE_SWITCHING
) {
203 send_pending(adap
, e
);
204 e
->state
= (e
->neigh
->nud_state
& NUD_STALE
) ?
205 L2T_STATE_STALE
: L2T_STATE_VALID
;
207 spin_unlock(&e
->lock
);
212 * Add a packet to an L2T entry's queue of packets awaiting resolution.
213 * Must be called with the entry's lock held.
215 static inline void arpq_enqueue(struct l2t_entry
*e
, struct sk_buff
*skb
)
219 e
->arpq_tail
->next
= skb
;
225 int cxgb4_l2t_send(struct net_device
*dev
, struct sk_buff
*skb
,
228 struct adapter
*adap
= netdev2adap(dev
);
232 case L2T_STATE_STALE
: /* entry is stale, kick off revalidation */
233 neigh_event_send(e
->neigh
, NULL
);
234 spin_lock_bh(&e
->lock
);
235 if (e
->state
== L2T_STATE_STALE
)
236 e
->state
= L2T_STATE_VALID
;
237 spin_unlock_bh(&e
->lock
);
238 case L2T_STATE_VALID
: /* fast-path, send the packet on */
239 return t4_ofld_send(adap
, skb
);
240 case L2T_STATE_RESOLVING
:
241 case L2T_STATE_SYNC_WRITE
:
242 spin_lock_bh(&e
->lock
);
243 if (e
->state
!= L2T_STATE_SYNC_WRITE
&&
244 e
->state
!= L2T_STATE_RESOLVING
) {
245 spin_unlock_bh(&e
->lock
);
248 arpq_enqueue(e
, skb
);
249 spin_unlock_bh(&e
->lock
);
251 if (e
->state
== L2T_STATE_RESOLVING
&&
252 !neigh_event_send(e
->neigh
, NULL
)) {
253 spin_lock_bh(&e
->lock
);
254 if (e
->state
== L2T_STATE_RESOLVING
&& e
->arpq_head
)
255 write_l2e(adap
, e
, 1);
256 spin_unlock_bh(&e
->lock
);
261 EXPORT_SYMBOL(cxgb4_l2t_send
);
264 * Allocate a free L2T entry. Must be called with l2t_data.lock held.
266 static struct l2t_entry
*alloc_l2e(struct l2t_data
*d
)
268 struct l2t_entry
*end
, *e
, **p
;
270 if (!atomic_read(&d
->nfree
))
273 /* there's definitely a free entry */
274 for (e
= d
->rover
, end
= &d
->l2tab
[L2T_SIZE
]; e
!= end
; ++e
)
275 if (atomic_read(&e
->refcnt
) == 0)
278 for (e
= d
->l2tab
; atomic_read(&e
->refcnt
); ++e
)
282 atomic_dec(&d
->nfree
);
285 * The entry we found may be an inactive entry that is
286 * presently in the hash table. We need to remove it.
288 if (e
->state
< L2T_STATE_SWITCHING
)
289 for (p
= &d
->l2tab
[e
->hash
].first
; *p
; p
= &(*p
)->next
)
296 e
->state
= L2T_STATE_UNUSED
;
301 * Called when an L2T entry has no more users.
303 static void t4_l2e_free(struct l2t_entry
*e
)
307 spin_lock_bh(&e
->lock
);
308 if (atomic_read(&e
->refcnt
) == 0) { /* hasn't been recycled */
310 neigh_release(e
->neigh
);
313 while (e
->arpq_head
) {
314 struct sk_buff
*skb
= e
->arpq_head
;
316 e
->arpq_head
= skb
->next
;
321 spin_unlock_bh(&e
->lock
);
323 d
= container_of(e
, struct l2t_data
, l2tab
[e
->idx
]);
324 atomic_inc(&d
->nfree
);
327 void cxgb4_l2t_release(struct l2t_entry
*e
)
329 if (atomic_dec_and_test(&e
->refcnt
))
332 EXPORT_SYMBOL(cxgb4_l2t_release
);
335 * Update an L2T entry that was previously used for the same next hop as neigh.
336 * Must be called with softirqs disabled.
338 static void reuse_entry(struct l2t_entry
*e
, struct neighbour
*neigh
)
340 unsigned int nud_state
;
342 spin_lock(&e
->lock
); /* avoid race with t4_l2t_free */
343 if (neigh
!= e
->neigh
)
344 neigh_replace(e
, neigh
);
345 nud_state
= neigh
->nud_state
;
346 if (memcmp(e
->dmac
, neigh
->ha
, sizeof(e
->dmac
)) ||
347 !(nud_state
& NUD_VALID
))
348 e
->state
= L2T_STATE_RESOLVING
;
349 else if (nud_state
& NUD_CONNECTED
)
350 e
->state
= L2T_STATE_VALID
;
352 e
->state
= L2T_STATE_STALE
;
353 spin_unlock(&e
->lock
);
356 struct l2t_entry
*cxgb4_l2t_get(struct l2t_data
*d
, struct neighbour
*neigh
,
357 const struct net_device
*physdev
,
358 unsigned int priority
)
363 int addr_len
= neigh
->tbl
->key_len
;
364 u32
*addr
= (u32
*)neigh
->primary_key
;
365 int ifidx
= neigh
->dev
->ifindex
;
366 int hash
= addr_hash(addr
, addr_len
, ifidx
);
368 if (neigh
->dev
->flags
& IFF_LOOPBACK
)
369 lport
= netdev2pinfo(physdev
)->tx_chan
+ 4;
371 lport
= netdev2pinfo(physdev
)->lport
;
373 if (neigh
->dev
->priv_flags
& IFF_802_1Q_VLAN
)
374 vlan
= vlan_dev_vlan_id(neigh
->dev
);
378 write_lock_bh(&d
->lock
);
379 for (e
= d
->l2tab
[hash
].first
; e
; e
= e
->next
)
380 if (!addreq(e
, addr
) && e
->ifindex
== ifidx
&&
381 e
->vlan
== vlan
&& e
->lport
== lport
) {
383 if (atomic_read(&e
->refcnt
) == 1)
384 reuse_entry(e
, neigh
);
388 /* Need to allocate a new entry */
391 spin_lock(&e
->lock
); /* avoid race with t4_l2t_free */
392 e
->state
= L2T_STATE_RESOLVING
;
393 memcpy(e
->addr
, addr
, addr_len
);
397 e
->v6
= addr_len
== 16;
398 atomic_set(&e
->refcnt
, 1);
399 neigh_replace(e
, neigh
);
401 e
->next
= d
->l2tab
[hash
].first
;
402 d
->l2tab
[hash
].first
= e
;
403 spin_unlock(&e
->lock
);
406 write_unlock_bh(&d
->lock
);
409 EXPORT_SYMBOL(cxgb4_l2t_get
);
412 * Called when address resolution fails for an L2T entry to handle packets
413 * on the arpq head. If a packet specifies a failure handler it is invoked,
414 * otherwise the packet is sent to the device.
416 static void handle_failed_resolution(struct adapter
*adap
, struct sk_buff
*arpq
)
419 struct sk_buff
*skb
= arpq
;
420 const struct l2t_skb_cb
*cb
= L2T_SKB_CB(skb
);
424 if (cb
->arp_err_handler
)
425 cb
->arp_err_handler(cb
->handle
, skb
);
427 t4_ofld_send(adap
, skb
);
432 * Called when the host's neighbor layer makes a change to some entry that is
433 * loaded into the HW L2 table.
435 void t4_l2t_update(struct adapter
*adap
, struct neighbour
*neigh
)
438 struct sk_buff
*arpq
= NULL
;
439 struct l2t_data
*d
= adap
->l2t
;
440 int addr_len
= neigh
->tbl
->key_len
;
441 u32
*addr
= (u32
*) neigh
->primary_key
;
442 int ifidx
= neigh
->dev
->ifindex
;
443 int hash
= addr_hash(addr
, addr_len
, ifidx
);
445 read_lock_bh(&d
->lock
);
446 for (e
= d
->l2tab
[hash
].first
; e
; e
= e
->next
)
447 if (!addreq(e
, addr
) && e
->ifindex
== ifidx
) {
449 if (atomic_read(&e
->refcnt
))
451 spin_unlock(&e
->lock
);
454 read_unlock_bh(&d
->lock
);
458 read_unlock(&d
->lock
);
460 if (neigh
!= e
->neigh
)
461 neigh_replace(e
, neigh
);
463 if (e
->state
== L2T_STATE_RESOLVING
) {
464 if (neigh
->nud_state
& NUD_FAILED
) {
466 e
->arpq_head
= e
->arpq_tail
= NULL
;
467 } else if ((neigh
->nud_state
& (NUD_CONNECTED
| NUD_STALE
)) &&
469 write_l2e(adap
, e
, 1);
472 e
->state
= neigh
->nud_state
& NUD_CONNECTED
?
473 L2T_STATE_VALID
: L2T_STATE_STALE
;
474 if (memcmp(e
->dmac
, neigh
->ha
, sizeof(e
->dmac
)))
475 write_l2e(adap
, e
, 0);
478 spin_unlock_bh(&e
->lock
);
481 handle_failed_resolution(adap
, arpq
);
484 struct l2t_data
*t4_init_l2t(void)
489 d
= t4_alloc_mem(sizeof(*d
));
494 atomic_set(&d
->nfree
, L2T_SIZE
);
495 rwlock_init(&d
->lock
);
497 for (i
= 0; i
< L2T_SIZE
; ++i
) {
499 d
->l2tab
[i
].state
= L2T_STATE_UNUSED
;
500 spin_lock_init(&d
->l2tab
[i
].lock
);
501 atomic_set(&d
->l2tab
[i
].refcnt
, 0);
506 #include <linux/module.h>
507 #include <linux/debugfs.h>
508 #include <linux/seq_file.h>
510 static inline void *l2t_get_idx(struct seq_file
*seq
, loff_t pos
)
512 struct l2t_entry
*l2tab
= seq
->private;
514 return pos
>= L2T_SIZE
? NULL
: &l2tab
[pos
];
517 static void *l2t_seq_start(struct seq_file
*seq
, loff_t
*pos
)
519 return *pos
? l2t_get_idx(seq
, *pos
- 1) : SEQ_START_TOKEN
;
522 static void *l2t_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
524 v
= l2t_get_idx(seq
, *pos
);
530 static void l2t_seq_stop(struct seq_file
*seq
, void *v
)
534 static char l2e_state(const struct l2t_entry
*e
)
537 case L2T_STATE_VALID
: return 'V';
538 case L2T_STATE_STALE
: return 'S';
539 case L2T_STATE_SYNC_WRITE
: return 'W';
540 case L2T_STATE_RESOLVING
: return e
->arpq_head
? 'A' : 'R';
541 case L2T_STATE_SWITCHING
: return 'X';
547 static int l2t_seq_show(struct seq_file
*seq
, void *v
)
549 if (v
== SEQ_START_TOKEN
)
550 seq_puts(seq
, " Idx IP address "
551 "Ethernet address VLAN/P LP State Users Port\n");
554 struct l2t_entry
*e
= v
;
556 spin_lock_bh(&e
->lock
);
557 if (e
->state
== L2T_STATE_SWITCHING
)
560 sprintf(ip
, e
->v6
? "%pI6c" : "%pI4", e
->addr
);
561 seq_printf(seq
, "%4u %-25s %17pM %4d %u %2u %c %5u %s\n",
563 e
->vlan
& VLAN_VID_MASK
, vlan_prio(e
), e
->lport
,
564 l2e_state(e
), atomic_read(&e
->refcnt
),
565 e
->neigh
? e
->neigh
->dev
->name
: "");
566 spin_unlock_bh(&e
->lock
);
571 static const struct seq_operations l2t_seq_ops
= {
572 .start
= l2t_seq_start
,
573 .next
= l2t_seq_next
,
574 .stop
= l2t_seq_stop
,
578 static int l2t_seq_open(struct inode
*inode
, struct file
*file
)
580 int rc
= seq_open(file
, &l2t_seq_ops
);
583 struct adapter
*adap
= inode
->i_private
;
584 struct seq_file
*seq
= file
->private_data
;
586 seq
->private = adap
->l2t
->l2tab
;
591 const struct file_operations t4_l2t_fops
= {
592 .owner
= THIS_MODULE
,
593 .open
= l2t_seq_open
,
596 .release
= seq_release
,