2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/spinlock.h>
35 #include <linux/pci.h>
37 #include <linux/delay.h>
38 #include <linux/netdevice.h>
39 #include <linux/vmalloc.h>
44 * The size has to be longer than this string, so we can append
45 * board/chip information to it in the init code.
47 const char ib_qib_version
[] = QIB_IDSTR
"\n";
49 DEFINE_SPINLOCK(qib_devs_lock
);
50 LIST_HEAD(qib_dev_list
);
51 DEFINE_MUTEX(qib_mutex
); /* general driver use */
54 module_param_named(ibmtu
, qib_ibmtu
, uint
, S_IRUGO
);
55 MODULE_PARM_DESC(ibmtu
, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
57 unsigned qib_compat_ddr_negotiate
= 1;
58 module_param_named(compat_ddr_negotiate
, qib_compat_ddr_negotiate
, uint
,
60 MODULE_PARM_DESC(compat_ddr_negotiate
,
61 "Attempt pre-IBTA 1.2 DDR speed negotiation");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_AUTHOR("QLogic <support@qlogic.com>");
65 MODULE_DESCRIPTION("QLogic IB driver");
68 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
69 * PIO send buffers. This is well beyond anything currently
70 * defined in the InfiniBand spec.
72 #define QIB_PIO_MAXIBHDR 128
74 struct qlogic_ib_stats qib_stats
;
76 const char *qib_get_unit_name(int unit
)
78 static char iname
[16];
80 snprintf(iname
, sizeof iname
, "infinipath%u", unit
);
85 * Return count of units with at least one port ACTIVE.
87 int qib_count_active_units(void)
89 struct qib_devdata
*dd
;
90 struct qib_pportdata
*ppd
;
92 int pidx
, nunits_active
= 0;
94 spin_lock_irqsave(&qib_devs_lock
, flags
);
95 list_for_each_entry(dd
, &qib_dev_list
, list
) {
96 if (!(dd
->flags
& QIB_PRESENT
) || !dd
->kregbase
)
98 for (pidx
= 0; pidx
< dd
->num_pports
; ++pidx
) {
99 ppd
= dd
->pport
+ pidx
;
100 if (ppd
->lid
&& (ppd
->lflags
& (QIBL_LINKINIT
|
101 QIBL_LINKARMED
| QIBL_LINKACTIVE
))) {
107 spin_unlock_irqrestore(&qib_devs_lock
, flags
);
108 return nunits_active
;
112 * Return count of all units, optionally return in arguments
113 * the number of usable (present) units, and the number of
116 int qib_count_units(int *npresentp
, int *nupp
)
118 int nunits
= 0, npresent
= 0, nup
= 0;
119 struct qib_devdata
*dd
;
122 struct qib_pportdata
*ppd
;
124 spin_lock_irqsave(&qib_devs_lock
, flags
);
126 list_for_each_entry(dd
, &qib_dev_list
, list
) {
128 if ((dd
->flags
& QIB_PRESENT
) && dd
->kregbase
)
130 for (pidx
= 0; pidx
< dd
->num_pports
; ++pidx
) {
131 ppd
= dd
->pport
+ pidx
;
132 if (ppd
->lid
&& (ppd
->lflags
& (QIBL_LINKINIT
|
133 QIBL_LINKARMED
| QIBL_LINKACTIVE
)))
138 spin_unlock_irqrestore(&qib_devs_lock
, flags
);
141 *npresentp
= npresent
;
149 * qib_wait_linkstate - wait for an IB link state change to occur
150 * @dd: the qlogic_ib device
151 * @state: the state to wait for
152 * @msecs: the number of milliseconds to wait
154 * wait up to msecs milliseconds for IB link state change to occur for
155 * now, take the easy polling route. Currently used only by
156 * qib_set_linkstate. Returns 0 if state reached, otherwise
157 * -ETIMEDOUT state can have multiple states set, for any of several
160 int qib_wait_linkstate(struct qib_pportdata
*ppd
, u32 state
, int msecs
)
165 spin_lock_irqsave(&ppd
->lflags_lock
, flags
);
166 if (ppd
->state_wanted
) {
167 spin_unlock_irqrestore(&ppd
->lflags_lock
, flags
);
171 ppd
->state_wanted
= state
;
172 spin_unlock_irqrestore(&ppd
->lflags_lock
, flags
);
173 wait_event_interruptible_timeout(ppd
->state_wait
,
174 (ppd
->lflags
& state
),
175 msecs_to_jiffies(msecs
));
176 spin_lock_irqsave(&ppd
->lflags_lock
, flags
);
177 ppd
->state_wanted
= 0;
178 spin_unlock_irqrestore(&ppd
->lflags_lock
, flags
);
180 if (!(ppd
->lflags
& state
))
188 int qib_set_linkstate(struct qib_pportdata
*ppd
, u8 newstate
)
192 struct qib_devdata
*dd
= ppd
->dd
;
196 case QIB_IB_LINKDOWN_ONLY
:
197 dd
->f_set_ib_cfg(ppd
, QIB_IB_CFG_LSTATE
,
198 IB_LINKCMD_DOWN
| IB_LINKINITCMD_NOP
);
203 case QIB_IB_LINKDOWN
:
204 dd
->f_set_ib_cfg(ppd
, QIB_IB_CFG_LSTATE
,
205 IB_LINKCMD_DOWN
| IB_LINKINITCMD_POLL
);
210 case QIB_IB_LINKDOWN_SLEEP
:
211 dd
->f_set_ib_cfg(ppd
, QIB_IB_CFG_LSTATE
,
212 IB_LINKCMD_DOWN
| IB_LINKINITCMD_SLEEP
);
217 case QIB_IB_LINKDOWN_DISABLE
:
218 dd
->f_set_ib_cfg(ppd
, QIB_IB_CFG_LSTATE
,
219 IB_LINKCMD_DOWN
| IB_LINKINITCMD_DISABLE
);
225 if (ppd
->lflags
& QIBL_LINKARMED
) {
229 if (!(ppd
->lflags
& (QIBL_LINKINIT
| QIBL_LINKACTIVE
))) {
234 * Since the port can be ACTIVE when we ask for ARMED,
235 * clear QIBL_LINKV so we can wait for a transition.
236 * If the link isn't ARMED, then something else happened
237 * and there is no point waiting for ARMED.
239 spin_lock_irqsave(&ppd
->lflags_lock
, flags
);
240 ppd
->lflags
&= ~QIBL_LINKV
;
241 spin_unlock_irqrestore(&ppd
->lflags_lock
, flags
);
242 dd
->f_set_ib_cfg(ppd
, QIB_IB_CFG_LSTATE
,
243 IB_LINKCMD_ARMED
| IB_LINKINITCMD_NOP
);
247 case QIB_IB_LINKACTIVE
:
248 if (ppd
->lflags
& QIBL_LINKACTIVE
) {
252 if (!(ppd
->lflags
& QIBL_LINKARMED
)) {
256 dd
->f_set_ib_cfg(ppd
, QIB_IB_CFG_LSTATE
,
257 IB_LINKCMD_ACTIVE
| IB_LINKINITCMD_NOP
);
258 lstate
= QIBL_LINKACTIVE
;
265 ret
= qib_wait_linkstate(ppd
, lstate
, 10);
272 * Get address of eager buffer from it's index (allocated in chunks, not
275 static inline void *qib_get_egrbuf(const struct qib_ctxtdata
*rcd
, u32 etail
)
277 const u32 chunk
= etail
/ rcd
->rcvegrbufs_perchunk
;
278 const u32 idx
= etail
% rcd
->rcvegrbufs_perchunk
;
280 return rcd
->rcvegrbuf
[chunk
] + idx
* rcd
->dd
->rcvegrbufsize
;
284 * Returns 1 if error was a CRC, else 0.
285 * Needed for some chip's synthesized error counters.
287 static u32
qib_rcv_hdrerr(struct qib_pportdata
*ppd
, u32 ctxt
,
288 u32 eflags
, u32 l
, u32 etail
, __le32
*rhf_addr
,
289 struct qib_message_header
*hdr
)
293 if (eflags
& (QLOGIC_IB_RHF_H_ICRCERR
| QLOGIC_IB_RHF_H_VCRCERR
))
299 * qib_kreceive - receive a packet
300 * @rcd: the qlogic_ib context
301 * @llic: gets count of good packets needed to clear lli,
302 * (used with chips that need need to track crcs for lli)
304 * called from interrupt handler for errors or receive interrupt
305 * Returns number of CRC error packets, needed by some chips for
306 * local link integrity tracking. crcs are adjusted down by following
307 * good packets, if any, and count of good packets is also tracked.
309 u32
qib_kreceive(struct qib_ctxtdata
*rcd
, u32
*llic
, u32
*npkts
)
311 struct qib_devdata
*dd
= rcd
->dd
;
312 struct qib_pportdata
*ppd
= rcd
->ppd
;
315 const u32 rsize
= dd
->rcvhdrentsize
; /* words */
316 const u32 maxcnt
= dd
->rcvhdrcnt
* rsize
; /* words */
317 u32 etail
= -1, l
, hdrqtail
;
318 struct qib_message_header
*hdr
;
319 u32 eflags
, etype
, tlen
, i
= 0, updegr
= 0, crcs
= 0;
322 struct qib_qp
*qp
, *nqp
;
325 rhf_addr
= (__le32
*) rcd
->rcvhdrq
+ l
+ dd
->rhf_offset
;
326 if (dd
->flags
& QIB_NODMA_RTAIL
) {
327 u32 seq
= qib_hdrget_seq(rhf_addr
);
328 if (seq
!= rcd
->seq_cnt
)
332 hdrqtail
= qib_get_rcvhdrtail(rcd
);
335 smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
338 for (last
= 0, i
= 1; !last
; i
+= !last
) {
339 hdr
= dd
->f_get_msgheader(dd
, rhf_addr
);
340 eflags
= qib_hdrget_err_flags(rhf_addr
);
341 etype
= qib_hdrget_rcv_type(rhf_addr
);
343 tlen
= qib_hdrget_length_in_bytes(rhf_addr
);
345 if ((dd
->flags
& QIB_NODMA_RTAIL
) ?
346 qib_hdrget_use_egr_buf(rhf_addr
) :
347 (etype
!= RCVHQ_RCV_TYPE_EXPECTED
)) {
348 etail
= qib_hdrget_index(rhf_addr
);
350 if (tlen
> sizeof(*hdr
) ||
351 etype
>= RCVHQ_RCV_TYPE_NON_KD
)
352 ebuf
= qib_get_egrbuf(rcd
, etail
);
355 u16 lrh_len
= be16_to_cpu(hdr
->lrh
[2]) << 2;
357 if (lrh_len
!= tlen
) {
358 qib_stats
.sps_lenerrs
++;
362 if (etype
== RCVHQ_RCV_TYPE_NON_KD
&& !eflags
&&
364 tlen
> (dd
->rcvhdrentsize
- 2 + 1 -
365 qib_hdrget_offset(rhf_addr
)) << 2) {
370 * Both tiderr and qibhdrerr are set for all plain IB
371 * packets; only qibhdrerr should be set.
373 if (unlikely(eflags
))
374 crcs
+= qib_rcv_hdrerr(ppd
, rcd
->ctxt
, eflags
, l
,
375 etail
, rhf_addr
, hdr
);
376 else if (etype
== RCVHQ_RCV_TYPE_NON_KD
) {
377 qib_ib_rcv(rcd
, hdr
, ebuf
, tlen
);
380 else if (llic
&& *llic
)
387 rhf_addr
= (__le32
*) rcd
->rcvhdrq
+ l
+ dd
->rhf_offset
;
388 if (dd
->flags
& QIB_NODMA_RTAIL
) {
389 u32 seq
= qib_hdrget_seq(rhf_addr
);
391 if (++rcd
->seq_cnt
> 13)
393 if (seq
!= rcd
->seq_cnt
)
395 } else if (l
== hdrqtail
)
398 * Update head regs etc., every 16 packets, if not last pkt,
399 * to help prevent rcvhdrq overflows, when many packets
400 * are processed and queue is nearly full.
401 * Don't request an interrupt for intermediate updates.
404 if (!last
&& !(i
& 0xf)) {
405 dd
->f_update_usrhead(rcd
, lval
, updegr
, etail
);
414 * Iterate over all QPs waiting to respond.
415 * The list won't change since the IRQ is only run on one CPU.
417 list_for_each_entry_safe(qp
, nqp
, &rcd
->qp_wait_list
, rspwait
) {
418 list_del_init(&qp
->rspwait
);
419 if (qp
->r_flags
& QIB_R_RSP_NAK
) {
420 qp
->r_flags
&= ~QIB_R_RSP_NAK
;
423 if (qp
->r_flags
& QIB_R_RSP_SEND
) {
426 qp
->r_flags
&= ~QIB_R_RSP_SEND
;
427 spin_lock_irqsave(&qp
->s_lock
, flags
);
428 if (ib_qib_state_ops
[qp
->state
] &
429 QIB_PROCESS_OR_FLUSH_SEND
)
430 qib_schedule_send(qp
);
431 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
433 if (atomic_dec_and_test(&qp
->refcount
))
438 /* Report number of packets consumed */
443 * Always write head at end, and setup rcv interrupt, even
444 * if no packets were processed.
446 lval
= (u64
)rcd
->head
| dd
->rhdrhead_intr_off
;
447 dd
->f_update_usrhead(rcd
, lval
, updegr
, etail
);
452 * qib_set_mtu - set the MTU
453 * @ppd: the perport data
456 * We can handle "any" incoming size, the issue here is whether we
457 * need to restrict our outgoing size. For now, we don't do any
458 * sanity checking on this, and we don't deal with what happens to
459 * programs that are already running when the size changes.
460 * NOTE: changing the MTU will usually cause the IBC to go back to
463 int qib_set_mtu(struct qib_pportdata
*ppd
, u16 arg
)
468 if (arg
!= 256 && arg
!= 512 && arg
!= 1024 && arg
!= 2048 &&
473 chk
= ib_mtu_enum_to_int(qib_ibmtu
);
474 if (chk
> 0 && arg
> chk
) {
479 piosize
= ppd
->ibmaxlen
;
482 if (arg
>= (piosize
- QIB_PIO_MAXIBHDR
)) {
483 /* Only if it's not the initial value (or reset to it) */
484 if (piosize
!= ppd
->init_ibmaxlen
) {
485 if (arg
> piosize
&& arg
<= ppd
->init_ibmaxlen
)
486 piosize
= ppd
->init_ibmaxlen
- 2 * sizeof(u32
);
487 ppd
->ibmaxlen
= piosize
;
489 } else if ((arg
+ QIB_PIO_MAXIBHDR
) != ppd
->ibmaxlen
) {
490 piosize
= arg
+ QIB_PIO_MAXIBHDR
- 2 * sizeof(u32
);
491 ppd
->ibmaxlen
= piosize
;
494 ppd
->dd
->f_set_ib_cfg(ppd
, QIB_IB_CFG_MTU
, 0);
502 int qib_set_lid(struct qib_pportdata
*ppd
, u32 lid
, u8 lmc
)
504 struct qib_devdata
*dd
= ppd
->dd
;
508 dd
->f_set_ib_cfg(ppd
, QIB_IB_CFG_LIDLMC
,
509 lid
| (~((1U << lmc
) - 1)) << 16);
511 qib_devinfo(dd
->pcidev
, "IB%u:%u got a lid: 0x%x\n",
512 dd
->unit
, ppd
->port
, lid
);
518 * Following deal with the "obviously simple" task of overriding the state
519 * of the LEDS, which normally indicate link physical and logical status.
520 * The complications arise in dealing with different hardware mappings
521 * and the board-dependent routine being called from interrupts.
522 * and then there's the requirement to _flash_ them.
524 #define LED_OVER_FREQ_SHIFT 8
525 #define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
526 /* Below is "non-zero" to force override, but both actual LEDs are off */
527 #define LED_OVER_BOTH_OFF (8)
529 static void qib_run_led_override(unsigned long opaque
)
531 struct qib_pportdata
*ppd
= (struct qib_pportdata
*)opaque
;
532 struct qib_devdata
*dd
= ppd
->dd
;
536 if (!(dd
->flags
& QIB_INITTED
))
539 ph_idx
= ppd
->led_override_phase
++ & 1;
540 ppd
->led_override
= ppd
->led_override_vals
[ph_idx
];
541 timeoff
= ppd
->led_override_timeoff
;
543 dd
->f_setextled(ppd
, 1);
545 * don't re-fire the timer if user asked for it to be off; we let
546 * it fire one more time after they turn it off to simplify
548 if (ppd
->led_override_vals
[0] || ppd
->led_override_vals
[1])
549 mod_timer(&ppd
->led_override_timer
, jiffies
+ timeoff
);
552 void qib_set_led_override(struct qib_pportdata
*ppd
, unsigned int val
)
554 struct qib_devdata
*dd
= ppd
->dd
;
557 if (!(dd
->flags
& QIB_INITTED
))
560 /* First check if we are blinking. If not, use 1HZ polling */
562 freq
= (val
& LED_OVER_FREQ_MASK
) >> LED_OVER_FREQ_SHIFT
;
565 /* For blink, set each phase from one nybble of val */
566 ppd
->led_override_vals
[0] = val
& 0xF;
567 ppd
->led_override_vals
[1] = (val
>> 4) & 0xF;
568 timeoff
= (HZ
<< 4)/freq
;
570 /* Non-blink set both phases the same. */
571 ppd
->led_override_vals
[0] = val
& 0xF;
572 ppd
->led_override_vals
[1] = val
& 0xF;
574 ppd
->led_override_timeoff
= timeoff
;
577 * If the timer has not already been started, do so. Use a "quick"
578 * timeout so the function will be called soon, to look at our request.
580 if (atomic_inc_return(&ppd
->led_override_timer_active
) == 1) {
581 /* Need to start timer */
582 init_timer(&ppd
->led_override_timer
);
583 ppd
->led_override_timer
.function
= qib_run_led_override
;
584 ppd
->led_override_timer
.data
= (unsigned long) ppd
;
585 ppd
->led_override_timer
.expires
= jiffies
+ 1;
586 add_timer(&ppd
->led_override_timer
);
588 if (ppd
->led_override_vals
[0] || ppd
->led_override_vals
[1])
589 mod_timer(&ppd
->led_override_timer
, jiffies
+ 1);
590 atomic_dec(&ppd
->led_override_timer_active
);
595 * qib_reset_device - reset the chip if possible
596 * @unit: the device to reset
598 * Whether or not reset is successful, we attempt to re-initialize the chip
599 * (that is, much like a driver unload/reload). We clear the INITTED flag
600 * so that the various entry points will fail until we reinitialize. For
601 * now, we only allow this if no user contexts are open that use chip resources
603 int qib_reset_device(int unit
)
606 struct qib_devdata
*dd
= qib_lookup(unit
);
607 struct qib_pportdata
*ppd
;
616 qib_devinfo(dd
->pcidev
, "Reset on unit %u requested\n", unit
);
618 if (!dd
->kregbase
|| !(dd
->flags
& QIB_PRESENT
)) {
619 qib_devinfo(dd
->pcidev
, "Invalid unit number %u or "
620 "not initialized or not present\n", unit
);
625 spin_lock_irqsave(&dd
->uctxt_lock
, flags
);
627 for (i
= dd
->first_user_ctxt
; i
< dd
->cfgctxts
; i
++) {
628 if (!dd
->rcd
[i
] || !dd
->rcd
[i
]->cnt
)
630 spin_unlock_irqrestore(&dd
->uctxt_lock
, flags
);
634 spin_unlock_irqrestore(&dd
->uctxt_lock
, flags
);
636 for (pidx
= 0; pidx
< dd
->num_pports
; ++pidx
) {
637 ppd
= dd
->pport
+ pidx
;
638 if (atomic_read(&ppd
->led_override_timer_active
)) {
639 /* Need to stop LED timer, _then_ shut off LEDs */
640 del_timer_sync(&ppd
->led_override_timer
);
641 atomic_set(&ppd
->led_override_timer_active
, 0);
644 /* Shut off LEDs after we are sure timer is not running */
645 ppd
->led_override
= LED_OVER_BOTH_OFF
;
646 dd
->f_setextled(ppd
, 0);
647 if (dd
->flags
& QIB_HAS_SEND_DMA
)
648 qib_teardown_sdma(ppd
);
651 ret
= dd
->f_reset(dd
);
653 ret
= qib_init(dd
, 1);
657 qib_dev_err(dd
, "Reinitialize unit %u after "
658 "reset failed with %d\n", unit
, ret
);
660 qib_devinfo(dd
->pcidev
, "Reinitialized unit %u after "
661 "resetting\n", unit
);