2 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/init.h>
35 #include <linux/interrupt.h>
36 #include <linux/dma-mapping.h>
38 #include <linux/mlx4/cmd.h>
44 MLX4_NUM_ASYNC_EQE
= 0x100,
45 MLX4_NUM_SPARE_EQE
= 0x80,
46 MLX4_EQ_ENTRY_SIZE
= 0x20
50 * Must be packed because start is 64 bits but only aligned to 32 bits.
52 struct mlx4_eq_context
{
66 __be32 mtt_base_addr_l
;
68 __be32 consumer_index
;
69 __be32 producer_index
;
73 #define MLX4_EQ_STATUS_OK ( 0 << 28)
74 #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
75 #define MLX4_EQ_OWNER_SW ( 0 << 24)
76 #define MLX4_EQ_OWNER_HW ( 1 << 24)
77 #define MLX4_EQ_FLAG_EC ( 1 << 18)
78 #define MLX4_EQ_FLAG_OI ( 1 << 17)
79 #define MLX4_EQ_STATE_ARMED ( 9 << 8)
80 #define MLX4_EQ_STATE_FIRED (10 << 8)
81 #define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8)
83 #define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \
84 (1ull << MLX4_EVENT_TYPE_COMM_EST) | \
85 (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \
86 (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \
87 (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \
88 (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \
89 (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \
90 (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
91 (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \
92 (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \
93 (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \
94 (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
95 (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
96 (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
97 (1ull << MLX4_EVENT_TYPE_CMD))
108 } __attribute__((packed
)) comp
;
116 } __attribute__((packed
)) cmd
;
119 } __attribute__((packed
)) qp
;
122 } __attribute__((packed
)) srq
;
128 } __attribute__((packed
)) cq_err
;
132 } __attribute__((packed
)) port_change
;
136 } __attribute__((packed
));
138 static void eq_set_ci(struct mlx4_eq
*eq
, int req_not
)
140 __raw_writel((__force u32
) cpu_to_be32((eq
->cons_index
& 0xffffff) |
143 /* We still want ordering, just not swabbing, so add a barrier */
147 static struct mlx4_eqe
*get_eqe(struct mlx4_eq
*eq
, u32 entry
)
149 unsigned long off
= (entry
& (eq
->nent
- 1)) * MLX4_EQ_ENTRY_SIZE
;
150 return eq
->page_list
[off
/ PAGE_SIZE
].buf
+ off
% PAGE_SIZE
;
153 static struct mlx4_eqe
*next_eqe_sw(struct mlx4_eq
*eq
)
155 struct mlx4_eqe
*eqe
= get_eqe(eq
, eq
->cons_index
);
156 return !!(eqe
->owner
& 0x80) ^ !!(eq
->cons_index
& eq
->nent
) ? NULL
: eqe
;
159 static int mlx4_eq_int(struct mlx4_dev
*dev
, struct mlx4_eq
*eq
)
161 struct mlx4_eqe
*eqe
;
166 while ((eqe
= next_eqe_sw(eq
))) {
168 * Make sure we read EQ entry contents after we've
169 * checked the ownership bit.
174 case MLX4_EVENT_TYPE_COMP
:
175 cqn
= be32_to_cpu(eqe
->event
.comp
.cqn
) & 0xffffff;
176 mlx4_cq_completion(dev
, cqn
);
179 case MLX4_EVENT_TYPE_PATH_MIG
:
180 case MLX4_EVENT_TYPE_COMM_EST
:
181 case MLX4_EVENT_TYPE_SQ_DRAINED
:
182 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE
:
183 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR
:
184 case MLX4_EVENT_TYPE_PATH_MIG_FAILED
:
185 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR
:
186 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR
:
187 mlx4_qp_event(dev
, be32_to_cpu(eqe
->event
.qp
.qpn
) & 0xffffff,
191 case MLX4_EVENT_TYPE_SRQ_LIMIT
:
192 case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR
:
193 mlx4_srq_event(dev
, be32_to_cpu(eqe
->event
.srq
.srqn
) & 0xffffff,
197 case MLX4_EVENT_TYPE_CMD
:
199 be16_to_cpu(eqe
->event
.cmd
.token
),
200 eqe
->event
.cmd
.status
,
201 be64_to_cpu(eqe
->event
.cmd
.out_param
));
204 case MLX4_EVENT_TYPE_PORT_CHANGE
:
205 mlx4_dispatch_event(dev
, eqe
->type
, eqe
->subtype
,
206 be32_to_cpu(eqe
->event
.port_change
.port
) >> 28);
209 case MLX4_EVENT_TYPE_CQ_ERROR
:
210 mlx4_warn(dev
, "CQ %s on CQN %06x\n",
211 eqe
->event
.cq_err
.syndrome
== 1 ?
212 "overrun" : "access violation",
213 be32_to_cpu(eqe
->event
.cq_err
.cqn
) & 0xffffff);
214 mlx4_cq_event(dev
, be32_to_cpu(eqe
->event
.cq_err
.cqn
),
218 case MLX4_EVENT_TYPE_EQ_OVERFLOW
:
219 mlx4_warn(dev
, "EQ overrun on EQN %d\n", eq
->eqn
);
222 case MLX4_EVENT_TYPE_EEC_CATAS_ERROR
:
223 case MLX4_EVENT_TYPE_ECC_DETECT
:
225 mlx4_warn(dev
, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
226 eqe
->type
, eqe
->subtype
, eq
->eqn
, eq
->cons_index
);
235 * The HCA will think the queue has overflowed if we
236 * don't tell it we've been processing events. We
237 * create our EQs with MLX4_NUM_SPARE_EQE extra
238 * entries, so we must update our consumer index at
241 if (unlikely(set_ci
>= MLX4_NUM_SPARE_EQE
)) {
243 * Conditional on hca_type is OK here because
244 * this is a rare case, not the fast path.
256 static irqreturn_t
mlx4_interrupt(int irq
, void *dev_ptr
)
258 struct mlx4_dev
*dev
= dev_ptr
;
259 struct mlx4_priv
*priv
= mlx4_priv(dev
);
263 writel(priv
->eq_table
.clr_mask
, priv
->eq_table
.clr_int
);
265 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
266 work
|= mlx4_eq_int(dev
, &priv
->eq_table
.eq
[i
]);
268 return IRQ_RETVAL(work
);
271 static irqreturn_t
mlx4_msi_x_interrupt(int irq
, void *eq_ptr
)
273 struct mlx4_eq
*eq
= eq_ptr
;
274 struct mlx4_dev
*dev
= eq
->dev
;
276 mlx4_eq_int(dev
, eq
);
278 /* MSI-X vectors always belong to us */
282 static int mlx4_MAP_EQ(struct mlx4_dev
*dev
, u64 event_mask
, int unmap
,
285 return mlx4_cmd(dev
, event_mask
, (unmap
<< 31) | eq_num
,
286 0, MLX4_CMD_MAP_EQ
, MLX4_CMD_TIME_CLASS_B
);
289 static int mlx4_SW2HW_EQ(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
292 return mlx4_cmd(dev
, mailbox
->dma
, eq_num
, 0, MLX4_CMD_SW2HW_EQ
,
293 MLX4_CMD_TIME_CLASS_A
);
296 static int mlx4_HW2SW_EQ(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
299 return mlx4_cmd_box(dev
, 0, mailbox
->dma
, eq_num
, 0, MLX4_CMD_HW2SW_EQ
,
300 MLX4_CMD_TIME_CLASS_A
);
303 static void __iomem
*mlx4_get_eq_uar(struct mlx4_dev
*dev
, struct mlx4_eq
*eq
)
305 struct mlx4_priv
*priv
= mlx4_priv(dev
);
308 index
= eq
->eqn
/ 4 - dev
->caps
.reserved_eqs
/ 4;
310 if (!priv
->eq_table
.uar_map
[index
]) {
311 priv
->eq_table
.uar_map
[index
] =
312 ioremap(pci_resource_start(dev
->pdev
, 2) +
313 ((eq
->eqn
/ 4) << PAGE_SHIFT
),
315 if (!priv
->eq_table
.uar_map
[index
]) {
316 mlx4_err(dev
, "Couldn't map EQ doorbell for EQN 0x%06x\n",
322 return priv
->eq_table
.uar_map
[index
] + 0x800 + 8 * (eq
->eqn
% 4);
325 static int mlx4_create_eq(struct mlx4_dev
*dev
, int nent
,
326 u8 intr
, struct mlx4_eq
*eq
)
328 struct mlx4_priv
*priv
= mlx4_priv(dev
);
329 struct mlx4_cmd_mailbox
*mailbox
;
330 struct mlx4_eq_context
*eq_context
;
332 u64
*dma_list
= NULL
;
339 eq
->nent
= roundup_pow_of_two(max(nent
, 2));
340 npages
= PAGE_ALIGN(eq
->nent
* MLX4_EQ_ENTRY_SIZE
) / PAGE_SIZE
;
342 eq
->page_list
= kmalloc(npages
* sizeof *eq
->page_list
,
347 for (i
= 0; i
< npages
; ++i
)
348 eq
->page_list
[i
].buf
= NULL
;
350 dma_list
= kmalloc(npages
* sizeof *dma_list
, GFP_KERNEL
);
354 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
357 eq_context
= mailbox
->buf
;
359 for (i
= 0; i
< npages
; ++i
) {
360 eq
->page_list
[i
].buf
= dma_alloc_coherent(&dev
->pdev
->dev
,
361 PAGE_SIZE
, &t
, GFP_KERNEL
);
362 if (!eq
->page_list
[i
].buf
)
363 goto err_out_free_pages
;
366 eq
->page_list
[i
].map
= t
;
368 memset(eq
->page_list
[i
].buf
, 0, PAGE_SIZE
);
371 eq
->eqn
= mlx4_bitmap_alloc(&priv
->eq_table
.bitmap
);
373 goto err_out_free_pages
;
375 eq
->doorbell
= mlx4_get_eq_uar(dev
, eq
);
378 goto err_out_free_eq
;
381 err
= mlx4_mtt_init(dev
, npages
, PAGE_SHIFT
, &eq
->mtt
);
383 goto err_out_free_eq
;
385 err
= mlx4_write_mtt(dev
, &eq
->mtt
, 0, npages
, dma_list
);
387 goto err_out_free_mtt
;
389 memset(eq_context
, 0, sizeof *eq_context
);
390 eq_context
->flags
= cpu_to_be32(MLX4_EQ_STATUS_OK
|
391 MLX4_EQ_STATE_ARMED
);
392 eq_context
->log_eq_size
= ilog2(eq
->nent
);
393 eq_context
->intr
= intr
;
394 eq_context
->log_page_size
= PAGE_SHIFT
- MLX4_ICM_PAGE_SHIFT
;
396 mtt_addr
= mlx4_mtt_addr(dev
, &eq
->mtt
);
397 eq_context
->mtt_base_addr_h
= mtt_addr
>> 32;
398 eq_context
->mtt_base_addr_l
= cpu_to_be32(mtt_addr
& 0xffffffff);
400 err
= mlx4_SW2HW_EQ(dev
, mailbox
, eq
->eqn
);
402 mlx4_warn(dev
, "SW2HW_EQ failed (%d)\n", err
);
403 goto err_out_free_mtt
;
407 mlx4_free_cmd_mailbox(dev
, mailbox
);
414 mlx4_mtt_cleanup(dev
, &eq
->mtt
);
417 mlx4_bitmap_free(&priv
->eq_table
.bitmap
, eq
->eqn
);
420 for (i
= 0; i
< npages
; ++i
)
421 if (eq
->page_list
[i
].buf
)
422 dma_free_coherent(&dev
->pdev
->dev
, PAGE_SIZE
,
423 eq
->page_list
[i
].buf
,
424 eq
->page_list
[i
].map
);
426 mlx4_free_cmd_mailbox(dev
, mailbox
);
429 kfree(eq
->page_list
);
436 static void mlx4_free_eq(struct mlx4_dev
*dev
,
439 struct mlx4_priv
*priv
= mlx4_priv(dev
);
440 struct mlx4_cmd_mailbox
*mailbox
;
442 int npages
= PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE
* eq
->nent
) / PAGE_SIZE
;
445 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
449 err
= mlx4_HW2SW_EQ(dev
, mailbox
, eq
->eqn
);
451 mlx4_warn(dev
, "HW2SW_EQ failed (%d)\n", err
);
454 mlx4_dbg(dev
, "Dumping EQ context %02x:\n", eq
->eqn
);
455 for (i
= 0; i
< sizeof (struct mlx4_eq_context
) / 4; ++i
) {
457 printk("[%02x] ", i
* 4);
458 printk(" %08x", be32_to_cpup(mailbox
->buf
+ i
* 4));
459 if ((i
+ 1) % 4 == 0)
464 mlx4_mtt_cleanup(dev
, &eq
->mtt
);
465 for (i
= 0; i
< npages
; ++i
)
466 pci_free_consistent(dev
->pdev
, PAGE_SIZE
,
467 eq
->page_list
[i
].buf
,
468 eq
->page_list
[i
].map
);
470 kfree(eq
->page_list
);
471 mlx4_bitmap_free(&priv
->eq_table
.bitmap
, eq
->eqn
);
472 mlx4_free_cmd_mailbox(dev
, mailbox
);
475 static void mlx4_free_irqs(struct mlx4_dev
*dev
)
477 struct mlx4_eq_table
*eq_table
= &mlx4_priv(dev
)->eq_table
;
480 if (eq_table
->have_irq
)
481 free_irq(dev
->pdev
->irq
, dev
);
482 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
483 if (eq_table
->eq
[i
].have_irq
)
484 free_irq(eq_table
->eq
[i
].irq
, eq_table
->eq
+ i
);
487 static int mlx4_map_clr_int(struct mlx4_dev
*dev
)
489 struct mlx4_priv
*priv
= mlx4_priv(dev
);
491 priv
->clr_base
= ioremap(pci_resource_start(dev
->pdev
, priv
->fw
.clr_int_bar
) +
492 priv
->fw
.clr_int_base
, MLX4_CLR_INT_SIZE
);
493 if (!priv
->clr_base
) {
494 mlx4_err(dev
, "Couldn't map interrupt clear register, aborting.\n");
501 static void mlx4_unmap_clr_int(struct mlx4_dev
*dev
)
503 struct mlx4_priv
*priv
= mlx4_priv(dev
);
505 iounmap(priv
->clr_base
);
508 int mlx4_map_eq_icm(struct mlx4_dev
*dev
, u64 icm_virt
)
510 struct mlx4_priv
*priv
= mlx4_priv(dev
);
514 * We assume that mapping one page is enough for the whole EQ
515 * context table. This is fine with all current HCAs, because
516 * we only use 32 EQs and each EQ uses 64 bytes of context
517 * memory, or 1 KB total.
519 priv
->eq_table
.icm_virt
= icm_virt
;
520 priv
->eq_table
.icm_page
= alloc_page(GFP_HIGHUSER
);
521 if (!priv
->eq_table
.icm_page
)
523 priv
->eq_table
.icm_dma
= pci_map_page(dev
->pdev
, priv
->eq_table
.icm_page
, 0,
524 PAGE_SIZE
, PCI_DMA_BIDIRECTIONAL
);
525 if (pci_dma_mapping_error(priv
->eq_table
.icm_dma
)) {
526 __free_page(priv
->eq_table
.icm_page
);
530 ret
= mlx4_MAP_ICM_page(dev
, priv
->eq_table
.icm_dma
, icm_virt
);
532 pci_unmap_page(dev
->pdev
, priv
->eq_table
.icm_dma
, PAGE_SIZE
,
533 PCI_DMA_BIDIRECTIONAL
);
534 __free_page(priv
->eq_table
.icm_page
);
540 void mlx4_unmap_eq_icm(struct mlx4_dev
*dev
)
542 struct mlx4_priv
*priv
= mlx4_priv(dev
);
544 mlx4_UNMAP_ICM(dev
, priv
->eq_table
.icm_virt
, 1);
545 pci_unmap_page(dev
->pdev
, priv
->eq_table
.icm_dma
, PAGE_SIZE
,
546 PCI_DMA_BIDIRECTIONAL
);
547 __free_page(priv
->eq_table
.icm_page
);
550 int mlx4_init_eq_table(struct mlx4_dev
*dev
)
552 struct mlx4_priv
*priv
= mlx4_priv(dev
);
556 err
= mlx4_bitmap_init(&priv
->eq_table
.bitmap
, dev
->caps
.num_eqs
,
557 dev
->caps
.num_eqs
- 1, dev
->caps
.reserved_eqs
);
561 for (i
= 0; i
< ARRAY_SIZE(priv
->eq_table
.uar_map
); ++i
)
562 priv
->eq_table
.uar_map
[i
] = NULL
;
564 err
= mlx4_map_clr_int(dev
);
568 priv
->eq_table
.clr_mask
=
569 swab32(1 << (priv
->eq_table
.inta_pin
& 31));
570 priv
->eq_table
.clr_int
= priv
->clr_base
+
571 (priv
->eq_table
.inta_pin
< 32 ? 4 : 0);
573 err
= mlx4_create_eq(dev
, dev
->caps
.num_cqs
+ MLX4_NUM_SPARE_EQE
,
574 (dev
->flags
& MLX4_FLAG_MSI_X
) ? MLX4_EQ_COMP
: 0,
575 &priv
->eq_table
.eq
[MLX4_EQ_COMP
]);
579 err
= mlx4_create_eq(dev
, MLX4_NUM_ASYNC_EQE
+ MLX4_NUM_SPARE_EQE
,
580 (dev
->flags
& MLX4_FLAG_MSI_X
) ? MLX4_EQ_ASYNC
: 0,
581 &priv
->eq_table
.eq
[MLX4_EQ_ASYNC
]);
585 if (dev
->flags
& MLX4_FLAG_MSI_X
) {
586 static const char *eq_name
[] = {
587 [MLX4_EQ_COMP
] = DRV_NAME
" (comp)",
588 [MLX4_EQ_ASYNC
] = DRV_NAME
" (async)"
591 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
) {
592 err
= request_irq(priv
->eq_table
.eq
[i
].irq
,
593 mlx4_msi_x_interrupt
,
594 0, eq_name
[i
], priv
->eq_table
.eq
+ i
);
598 priv
->eq_table
.eq
[i
].have_irq
= 1;
602 err
= request_irq(dev
->pdev
->irq
, mlx4_interrupt
,
603 IRQF_SHARED
, DRV_NAME
, dev
);
607 priv
->eq_table
.have_irq
= 1;
610 err
= mlx4_MAP_EQ(dev
, MLX4_ASYNC_EVENT_MASK
, 0,
611 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
);
613 mlx4_warn(dev
, "MAP_EQ for async EQ %d failed (%d)\n",
614 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
, err
);
616 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
617 eq_set_ci(&priv
->eq_table
.eq
[i
], 1);
622 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[MLX4_EQ_ASYNC
]);
625 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[MLX4_EQ_COMP
]);
628 mlx4_unmap_clr_int(dev
);
632 mlx4_bitmap_cleanup(&priv
->eq_table
.bitmap
);
636 void mlx4_cleanup_eq_table(struct mlx4_dev
*dev
)
638 struct mlx4_priv
*priv
= mlx4_priv(dev
);
641 mlx4_MAP_EQ(dev
, MLX4_ASYNC_EVENT_MASK
, 1,
642 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
);
646 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
647 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[i
]);
649 mlx4_unmap_clr_int(dev
);
651 for (i
= 0; i
< ARRAY_SIZE(priv
->eq_table
.uar_map
); ++i
)
652 if (priv
->eq_table
.uar_map
[i
])
653 iounmap(priv
->eq_table
.uar_map
[i
]);
655 mlx4_bitmap_cleanup(&priv
->eq_table
.bitmap
);