2 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/init.h>
35 #include <linux/interrupt.h>
36 #include <linux/dma-mapping.h>
38 #include <linux/mlx4/cmd.h>
44 MLX4_NUM_ASYNC_EQE
= 0x100,
45 MLX4_NUM_SPARE_EQE
= 0x80,
46 MLX4_EQ_ENTRY_SIZE
= 0x20
50 * Must be packed because start is 64 bits but only aligned to 32 bits.
52 struct mlx4_eq_context
{
66 __be32 mtt_base_addr_l
;
68 __be32 consumer_index
;
69 __be32 producer_index
;
73 #define MLX4_EQ_STATUS_OK ( 0 << 28)
74 #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
75 #define MLX4_EQ_OWNER_SW ( 0 << 24)
76 #define MLX4_EQ_OWNER_HW ( 1 << 24)
77 #define MLX4_EQ_FLAG_EC ( 1 << 18)
78 #define MLX4_EQ_FLAG_OI ( 1 << 17)
79 #define MLX4_EQ_STATE_ARMED ( 9 << 8)
80 #define MLX4_EQ_STATE_FIRED (10 << 8)
81 #define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8)
83 #define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \
84 (1ull << MLX4_EVENT_TYPE_COMM_EST) | \
85 (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \
86 (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \
87 (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \
88 (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \
89 (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \
90 (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
91 (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \
92 (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \
93 (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \
94 (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
95 (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
96 (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
97 (1ull << MLX4_EVENT_TYPE_CMD))
108 } __attribute__((packed
)) comp
;
116 } __attribute__((packed
)) cmd
;
119 } __attribute__((packed
)) qp
;
122 } __attribute__((packed
)) srq
;
128 } __attribute__((packed
)) cq_err
;
132 } __attribute__((packed
)) port_change
;
136 } __attribute__((packed
));
138 static void eq_set_ci(struct mlx4_eq
*eq
, int req_not
)
140 __raw_writel((__force u32
) cpu_to_be32((eq
->cons_index
& 0xffffff) |
143 /* We still want ordering, just not swabbing, so add a barrier */
147 static struct mlx4_eqe
*get_eqe(struct mlx4_eq
*eq
, u32 entry
)
149 unsigned long off
= (entry
& (eq
->nent
- 1)) * MLX4_EQ_ENTRY_SIZE
;
150 return eq
->page_list
[off
/ PAGE_SIZE
].buf
+ off
% PAGE_SIZE
;
153 static struct mlx4_eqe
*next_eqe_sw(struct mlx4_eq
*eq
)
155 struct mlx4_eqe
*eqe
= get_eqe(eq
, eq
->cons_index
);
156 return !!(eqe
->owner
& 0x80) ^ !!(eq
->cons_index
& eq
->nent
) ? NULL
: eqe
;
159 static int mlx4_eq_int(struct mlx4_dev
*dev
, struct mlx4_eq
*eq
)
161 struct mlx4_eqe
*eqe
;
166 while ((eqe
= next_eqe_sw(eq
))) {
168 * Make sure we read EQ entry contents after we've
169 * checked the ownership bit.
174 case MLX4_EVENT_TYPE_COMP
:
175 cqn
= be32_to_cpu(eqe
->event
.comp
.cqn
) & 0xffffff;
176 mlx4_cq_completion(dev
, cqn
);
179 case MLX4_EVENT_TYPE_PATH_MIG
:
180 case MLX4_EVENT_TYPE_COMM_EST
:
181 case MLX4_EVENT_TYPE_SQ_DRAINED
:
182 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE
:
183 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR
:
184 case MLX4_EVENT_TYPE_PATH_MIG_FAILED
:
185 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR
:
186 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR
:
187 mlx4_qp_event(dev
, be32_to_cpu(eqe
->event
.qp
.qpn
) & 0xffffff,
191 case MLX4_EVENT_TYPE_SRQ_LIMIT
:
192 case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR
:
193 mlx4_srq_event(dev
, be32_to_cpu(eqe
->event
.srq
.srqn
) & 0xffffff,
197 case MLX4_EVENT_TYPE_CMD
:
199 be16_to_cpu(eqe
->event
.cmd
.token
),
200 eqe
->event
.cmd
.status
,
201 be64_to_cpu(eqe
->event
.cmd
.out_param
));
204 case MLX4_EVENT_TYPE_PORT_CHANGE
:
205 mlx4_dispatch_event(dev
, eqe
->type
, eqe
->subtype
,
206 be32_to_cpu(eqe
->event
.port_change
.port
) >> 28);
209 case MLX4_EVENT_TYPE_CQ_ERROR
:
210 mlx4_warn(dev
, "CQ %s on CQN %06x\n",
211 eqe
->event
.cq_err
.syndrome
== 1 ?
212 "overrun" : "access violation",
213 be32_to_cpu(eqe
->event
.cq_err
.cqn
) & 0xffffff);
214 mlx4_cq_event(dev
, be32_to_cpu(eqe
->event
.cq_err
.cqn
),
218 case MLX4_EVENT_TYPE_EQ_OVERFLOW
:
219 mlx4_warn(dev
, "EQ overrun on EQN %d\n", eq
->eqn
);
222 case MLX4_EVENT_TYPE_EEC_CATAS_ERROR
:
223 case MLX4_EVENT_TYPE_ECC_DETECT
:
225 mlx4_warn(dev
, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
226 eqe
->type
, eqe
->subtype
, eq
->eqn
, eq
->cons_index
);
235 * The HCA will think the queue has overflowed if we
236 * don't tell it we've been processing events. We
237 * create our EQs with MLX4_NUM_SPARE_EQE extra
238 * entries, so we must update our consumer index at
241 if (unlikely(set_ci
>= MLX4_NUM_SPARE_EQE
)) {
243 * Conditional on hca_type is OK here because
244 * this is a rare case, not the fast path.
256 static irqreturn_t
mlx4_interrupt(int irq
, void *dev_ptr
)
258 struct mlx4_dev
*dev
= dev_ptr
;
259 struct mlx4_priv
*priv
= mlx4_priv(dev
);
263 writel(priv
->eq_table
.clr_mask
, priv
->eq_table
.clr_int
);
265 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
266 work
|= mlx4_eq_int(dev
, &priv
->eq_table
.eq
[i
]);
268 return IRQ_RETVAL(work
);
271 static irqreturn_t
mlx4_msi_x_interrupt(int irq
, void *eq_ptr
)
273 struct mlx4_eq
*eq
= eq_ptr
;
274 struct mlx4_dev
*dev
= eq
->dev
;
276 mlx4_eq_int(dev
, eq
);
278 /* MSI-X vectors always belong to us */
282 static int mlx4_MAP_EQ(struct mlx4_dev
*dev
, u64 event_mask
, int unmap
,
285 return mlx4_cmd(dev
, event_mask
, (unmap
<< 31) | eq_num
,
286 0, MLX4_CMD_MAP_EQ
, MLX4_CMD_TIME_CLASS_B
);
289 static int mlx4_SW2HW_EQ(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
292 return mlx4_cmd(dev
, mailbox
->dma
, eq_num
, 0, MLX4_CMD_SW2HW_EQ
,
293 MLX4_CMD_TIME_CLASS_A
);
296 static int mlx4_HW2SW_EQ(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
299 return mlx4_cmd_box(dev
, 0, mailbox
->dma
, eq_num
, 0, MLX4_CMD_HW2SW_EQ
,
300 MLX4_CMD_TIME_CLASS_A
);
303 static void __devinit __iomem
*mlx4_get_eq_uar(struct mlx4_dev
*dev
,
306 struct mlx4_priv
*priv
= mlx4_priv(dev
);
309 index
= eq
->eqn
/ 4 - dev
->caps
.reserved_eqs
/ 4;
311 if (!priv
->eq_table
.uar_map
[index
]) {
312 priv
->eq_table
.uar_map
[index
] =
313 ioremap(pci_resource_start(dev
->pdev
, 2) +
314 ((eq
->eqn
/ 4) << PAGE_SHIFT
),
316 if (!priv
->eq_table
.uar_map
[index
]) {
317 mlx4_err(dev
, "Couldn't map EQ doorbell for EQN 0x%06x\n",
323 return priv
->eq_table
.uar_map
[index
] + 0x800 + 8 * (eq
->eqn
% 4);
326 static int __devinit
mlx4_create_eq(struct mlx4_dev
*dev
, int nent
,
327 u8 intr
, struct mlx4_eq
*eq
)
329 struct mlx4_priv
*priv
= mlx4_priv(dev
);
330 struct mlx4_cmd_mailbox
*mailbox
;
331 struct mlx4_eq_context
*eq_context
;
333 u64
*dma_list
= NULL
;
340 eq
->nent
= roundup_pow_of_two(max(nent
, 2));
341 npages
= PAGE_ALIGN(eq
->nent
* MLX4_EQ_ENTRY_SIZE
) / PAGE_SIZE
;
343 eq
->page_list
= kmalloc(npages
* sizeof *eq
->page_list
,
348 for (i
= 0; i
< npages
; ++i
)
349 eq
->page_list
[i
].buf
= NULL
;
351 dma_list
= kmalloc(npages
* sizeof *dma_list
, GFP_KERNEL
);
355 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
358 eq_context
= mailbox
->buf
;
360 for (i
= 0; i
< npages
; ++i
) {
361 eq
->page_list
[i
].buf
= dma_alloc_coherent(&dev
->pdev
->dev
,
362 PAGE_SIZE
, &t
, GFP_KERNEL
);
363 if (!eq
->page_list
[i
].buf
)
364 goto err_out_free_pages
;
367 eq
->page_list
[i
].map
= t
;
369 memset(eq
->page_list
[i
].buf
, 0, PAGE_SIZE
);
372 eq
->eqn
= mlx4_bitmap_alloc(&priv
->eq_table
.bitmap
);
374 goto err_out_free_pages
;
376 eq
->doorbell
= mlx4_get_eq_uar(dev
, eq
);
379 goto err_out_free_eq
;
382 err
= mlx4_mtt_init(dev
, npages
, PAGE_SHIFT
, &eq
->mtt
);
384 goto err_out_free_eq
;
386 err
= mlx4_write_mtt(dev
, &eq
->mtt
, 0, npages
, dma_list
);
388 goto err_out_free_mtt
;
390 memset(eq_context
, 0, sizeof *eq_context
);
391 eq_context
->flags
= cpu_to_be32(MLX4_EQ_STATUS_OK
|
392 MLX4_EQ_STATE_ARMED
);
393 eq_context
->log_eq_size
= ilog2(eq
->nent
);
394 eq_context
->intr
= intr
;
395 eq_context
->log_page_size
= PAGE_SHIFT
- MLX4_ICM_PAGE_SHIFT
;
397 mtt_addr
= mlx4_mtt_addr(dev
, &eq
->mtt
);
398 eq_context
->mtt_base_addr_h
= mtt_addr
>> 32;
399 eq_context
->mtt_base_addr_l
= cpu_to_be32(mtt_addr
& 0xffffffff);
401 err
= mlx4_SW2HW_EQ(dev
, mailbox
, eq
->eqn
);
403 mlx4_warn(dev
, "SW2HW_EQ failed (%d)\n", err
);
404 goto err_out_free_mtt
;
408 mlx4_free_cmd_mailbox(dev
, mailbox
);
415 mlx4_mtt_cleanup(dev
, &eq
->mtt
);
418 mlx4_bitmap_free(&priv
->eq_table
.bitmap
, eq
->eqn
);
421 for (i
= 0; i
< npages
; ++i
)
422 if (eq
->page_list
[i
].buf
)
423 dma_free_coherent(&dev
->pdev
->dev
, PAGE_SIZE
,
424 eq
->page_list
[i
].buf
,
425 eq
->page_list
[i
].map
);
427 mlx4_free_cmd_mailbox(dev
, mailbox
);
430 kfree(eq
->page_list
);
437 static void mlx4_free_eq(struct mlx4_dev
*dev
,
440 struct mlx4_priv
*priv
= mlx4_priv(dev
);
441 struct mlx4_cmd_mailbox
*mailbox
;
443 int npages
= PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE
* eq
->nent
) / PAGE_SIZE
;
446 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
450 err
= mlx4_HW2SW_EQ(dev
, mailbox
, eq
->eqn
);
452 mlx4_warn(dev
, "HW2SW_EQ failed (%d)\n", err
);
455 mlx4_dbg(dev
, "Dumping EQ context %02x:\n", eq
->eqn
);
456 for (i
= 0; i
< sizeof (struct mlx4_eq_context
) / 4; ++i
) {
458 printk("[%02x] ", i
* 4);
459 printk(" %08x", be32_to_cpup(mailbox
->buf
+ i
* 4));
460 if ((i
+ 1) % 4 == 0)
465 mlx4_mtt_cleanup(dev
, &eq
->mtt
);
466 for (i
= 0; i
< npages
; ++i
)
467 pci_free_consistent(dev
->pdev
, PAGE_SIZE
,
468 eq
->page_list
[i
].buf
,
469 eq
->page_list
[i
].map
);
471 kfree(eq
->page_list
);
472 mlx4_bitmap_free(&priv
->eq_table
.bitmap
, eq
->eqn
);
473 mlx4_free_cmd_mailbox(dev
, mailbox
);
476 static void mlx4_free_irqs(struct mlx4_dev
*dev
)
478 struct mlx4_eq_table
*eq_table
= &mlx4_priv(dev
)->eq_table
;
481 if (eq_table
->have_irq
)
482 free_irq(dev
->pdev
->irq
, dev
);
483 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
484 if (eq_table
->eq
[i
].have_irq
)
485 free_irq(eq_table
->eq
[i
].irq
, eq_table
->eq
+ i
);
488 static int __devinit
mlx4_map_clr_int(struct mlx4_dev
*dev
)
490 struct mlx4_priv
*priv
= mlx4_priv(dev
);
492 priv
->clr_base
= ioremap(pci_resource_start(dev
->pdev
, priv
->fw
.clr_int_bar
) +
493 priv
->fw
.clr_int_base
, MLX4_CLR_INT_SIZE
);
494 if (!priv
->clr_base
) {
495 mlx4_err(dev
, "Couldn't map interrupt clear register, aborting.\n");
502 static void mlx4_unmap_clr_int(struct mlx4_dev
*dev
)
504 struct mlx4_priv
*priv
= mlx4_priv(dev
);
506 iounmap(priv
->clr_base
);
509 int __devinit
mlx4_map_eq_icm(struct mlx4_dev
*dev
, u64 icm_virt
)
511 struct mlx4_priv
*priv
= mlx4_priv(dev
);
515 * We assume that mapping one page is enough for the whole EQ
516 * context table. This is fine with all current HCAs, because
517 * we only use 32 EQs and each EQ uses 64 bytes of context
518 * memory, or 1 KB total.
520 priv
->eq_table
.icm_virt
= icm_virt
;
521 priv
->eq_table
.icm_page
= alloc_page(GFP_HIGHUSER
);
522 if (!priv
->eq_table
.icm_page
)
524 priv
->eq_table
.icm_dma
= pci_map_page(dev
->pdev
, priv
->eq_table
.icm_page
, 0,
525 PAGE_SIZE
, PCI_DMA_BIDIRECTIONAL
);
526 if (pci_dma_mapping_error(priv
->eq_table
.icm_dma
)) {
527 __free_page(priv
->eq_table
.icm_page
);
531 ret
= mlx4_MAP_ICM_page(dev
, priv
->eq_table
.icm_dma
, icm_virt
);
533 pci_unmap_page(dev
->pdev
, priv
->eq_table
.icm_dma
, PAGE_SIZE
,
534 PCI_DMA_BIDIRECTIONAL
);
535 __free_page(priv
->eq_table
.icm_page
);
541 void mlx4_unmap_eq_icm(struct mlx4_dev
*dev
)
543 struct mlx4_priv
*priv
= mlx4_priv(dev
);
545 mlx4_UNMAP_ICM(dev
, priv
->eq_table
.icm_virt
, 1);
546 pci_unmap_page(dev
->pdev
, priv
->eq_table
.icm_dma
, PAGE_SIZE
,
547 PCI_DMA_BIDIRECTIONAL
);
548 __free_page(priv
->eq_table
.icm_page
);
551 int __devinit
mlx4_init_eq_table(struct mlx4_dev
*dev
)
553 struct mlx4_priv
*priv
= mlx4_priv(dev
);
557 err
= mlx4_bitmap_init(&priv
->eq_table
.bitmap
, dev
->caps
.num_eqs
,
558 dev
->caps
.num_eqs
- 1, dev
->caps
.reserved_eqs
);
562 for (i
= 0; i
< ARRAY_SIZE(priv
->eq_table
.uar_map
); ++i
)
563 priv
->eq_table
.uar_map
[i
] = NULL
;
565 err
= mlx4_map_clr_int(dev
);
569 priv
->eq_table
.clr_mask
=
570 swab32(1 << (priv
->eq_table
.inta_pin
& 31));
571 priv
->eq_table
.clr_int
= priv
->clr_base
+
572 (priv
->eq_table
.inta_pin
< 32 ? 4 : 0);
574 err
= mlx4_create_eq(dev
, dev
->caps
.num_cqs
+ MLX4_NUM_SPARE_EQE
,
575 (dev
->flags
& MLX4_FLAG_MSI_X
) ? MLX4_EQ_COMP
: 0,
576 &priv
->eq_table
.eq
[MLX4_EQ_COMP
]);
580 err
= mlx4_create_eq(dev
, MLX4_NUM_ASYNC_EQE
+ MLX4_NUM_SPARE_EQE
,
581 (dev
->flags
& MLX4_FLAG_MSI_X
) ? MLX4_EQ_ASYNC
: 0,
582 &priv
->eq_table
.eq
[MLX4_EQ_ASYNC
]);
586 if (dev
->flags
& MLX4_FLAG_MSI_X
) {
587 static const char *eq_name
[] = {
588 [MLX4_EQ_COMP
] = DRV_NAME
" (comp)",
589 [MLX4_EQ_ASYNC
] = DRV_NAME
" (async)"
592 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
) {
593 err
= request_irq(priv
->eq_table
.eq
[i
].irq
,
594 mlx4_msi_x_interrupt
,
595 0, eq_name
[i
], priv
->eq_table
.eq
+ i
);
599 priv
->eq_table
.eq
[i
].have_irq
= 1;
603 err
= request_irq(dev
->pdev
->irq
, mlx4_interrupt
,
604 IRQF_SHARED
, DRV_NAME
, dev
);
608 priv
->eq_table
.have_irq
= 1;
611 err
= mlx4_MAP_EQ(dev
, MLX4_ASYNC_EVENT_MASK
, 0,
612 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
);
614 mlx4_warn(dev
, "MAP_EQ for async EQ %d failed (%d)\n",
615 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
, err
);
617 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
618 eq_set_ci(&priv
->eq_table
.eq
[i
], 1);
623 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[MLX4_EQ_ASYNC
]);
626 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[MLX4_EQ_COMP
]);
629 mlx4_unmap_clr_int(dev
);
633 mlx4_bitmap_cleanup(&priv
->eq_table
.bitmap
);
637 void mlx4_cleanup_eq_table(struct mlx4_dev
*dev
)
639 struct mlx4_priv
*priv
= mlx4_priv(dev
);
642 mlx4_MAP_EQ(dev
, MLX4_ASYNC_EVENT_MASK
, 1,
643 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
);
647 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
648 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[i
]);
650 mlx4_unmap_clr_int(dev
);
652 for (i
= 0; i
< ARRAY_SIZE(priv
->eq_table
.uar_map
); ++i
)
653 if (priv
->eq_table
.uar_map
[i
])
654 iounmap(priv
->eq_table
.uar_map
[i
]);
656 mlx4_bitmap_cleanup(&priv
->eq_table
.bitmap
);