2 * QEMU paravirtual RDMA - Resource Manager Implementation
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
8 * Yuval Shaia <yuval.shaia@oracle.com>
9 * Marcel Apfelbaum <marcel@redhat.com>
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
20 #include "rdma_utils.h"
21 #include "rdma_backend.h"
24 /* Page directory and page tables */
25 #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
26 #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
28 static inline void res_tbl_init(const char *name
, RdmaRmResTbl
*tbl
,
29 uint32_t tbl_sz
, uint32_t res_sz
)
31 tbl
->tbl
= g_malloc(tbl_sz
* res_sz
);
33 strncpy(tbl
->name
, name
, MAX_RM_TBL_NAME
);
34 tbl
->name
[MAX_RM_TBL_NAME
- 1] = 0;
36 tbl
->bitmap
= bitmap_new(tbl_sz
);
39 qemu_mutex_init(&tbl
->lock
);
42 static inline void res_tbl_free(RdmaRmResTbl
*tbl
)
47 qemu_mutex_destroy(&tbl
->lock
);
52 static inline void *res_tbl_get(RdmaRmResTbl
*tbl
, uint32_t handle
)
54 pr_dbg("%s, handle=%d\n", tbl
->name
, handle
);
56 if ((handle
< tbl
->tbl_sz
) && (test_bit(handle
, tbl
->bitmap
))) {
57 return tbl
->tbl
+ handle
* tbl
->res_sz
;
59 pr_dbg("Invalid handle %d\n", handle
);
64 static inline void *res_tbl_alloc(RdmaRmResTbl
*tbl
, uint32_t *handle
)
66 qemu_mutex_lock(&tbl
->lock
);
68 *handle
= find_first_zero_bit(tbl
->bitmap
, tbl
->tbl_sz
);
69 if (*handle
> tbl
->tbl_sz
) {
70 pr_dbg("Failed to alloc, bitmap is full\n");
71 qemu_mutex_unlock(&tbl
->lock
);
75 set_bit(*handle
, tbl
->bitmap
);
77 qemu_mutex_unlock(&tbl
->lock
);
79 memset(tbl
->tbl
+ *handle
* tbl
->res_sz
, 0, tbl
->res_sz
);
81 pr_dbg("%s, handle=%d\n", tbl
->name
, *handle
);
83 return tbl
->tbl
+ *handle
* tbl
->res_sz
;
86 static inline void res_tbl_dealloc(RdmaRmResTbl
*tbl
, uint32_t handle
)
88 pr_dbg("%s, handle=%d\n", tbl
->name
, handle
);
90 qemu_mutex_lock(&tbl
->lock
);
92 if (handle
< tbl
->tbl_sz
) {
93 clear_bit(handle
, tbl
->bitmap
);
96 qemu_mutex_unlock(&tbl
->lock
);
99 int rdma_rm_alloc_pd(RdmaDeviceResources
*dev_res
, RdmaBackendDev
*backend_dev
,
100 uint32_t *pd_handle
, uint32_t ctx_handle
)
105 pd
= res_tbl_alloc(&dev_res
->pd_tbl
, pd_handle
);
110 ret
= rdma_backend_create_pd(backend_dev
, &pd
->backend_pd
);
113 goto out_tbl_dealloc
;
116 pd
->ctx_handle
= ctx_handle
;
121 res_tbl_dealloc(&dev_res
->pd_tbl
, *pd_handle
);
127 RdmaRmPD
*rdma_rm_get_pd(RdmaDeviceResources
*dev_res
, uint32_t pd_handle
)
129 return res_tbl_get(&dev_res
->pd_tbl
, pd_handle
);
132 void rdma_rm_dealloc_pd(RdmaDeviceResources
*dev_res
, uint32_t pd_handle
)
134 RdmaRmPD
*pd
= rdma_rm_get_pd(dev_res
, pd_handle
);
137 rdma_backend_destroy_pd(&pd
->backend_pd
);
138 res_tbl_dealloc(&dev_res
->pd_tbl
, pd_handle
);
142 int rdma_rm_alloc_mr(RdmaDeviceResources
*dev_res
, uint32_t pd_handle
,
143 uint64_t guest_start
, size_t guest_length
, void *host_virt
,
144 int access_flags
, uint32_t *mr_handle
, uint32_t *lkey
,
151 pd
= rdma_rm_get_pd(dev_res
, pd_handle
);
153 pr_dbg("Invalid PD\n");
157 mr
= res_tbl_alloc(&dev_res
->mr_tbl
, mr_handle
);
159 pr_dbg("Failed to allocate obj in table\n");
162 pr_dbg("mr_handle=%d\n", *mr_handle
);
164 pr_dbg("host_virt=0x%p\n", host_virt
);
165 pr_dbg("guest_start=0x%" PRIx64
"\n", guest_start
);
166 pr_dbg("length=%zu\n", guest_length
);
169 mr
->virt
= host_virt
;
170 mr
->start
= guest_start
;
171 mr
->length
= guest_length
;
172 mr
->virt
+= (mr
->start
& (TARGET_PAGE_SIZE
- 1));
174 ret
= rdma_backend_create_mr(&mr
->backend_mr
, &pd
->backend_pd
, mr
->virt
,
175 mr
->length
, access_flags
);
177 pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret
);
183 /* We keep mr_handle in lkey so send and recv get get mr ptr */
187 mr
->pd_handle
= pd_handle
;
192 res_tbl_dealloc(&dev_res
->mr_tbl
, *mr_handle
);
197 RdmaRmMR
*rdma_rm_get_mr(RdmaDeviceResources
*dev_res
, uint32_t mr_handle
)
199 return res_tbl_get(&dev_res
->mr_tbl
, mr_handle
);
202 void rdma_rm_dealloc_mr(RdmaDeviceResources
*dev_res
, uint32_t mr_handle
)
204 RdmaRmMR
*mr
= rdma_rm_get_mr(dev_res
, mr_handle
);
207 rdma_backend_destroy_mr(&mr
->backend_mr
);
208 pr_dbg("start=0x%" PRIx64
"\n", mr
->start
);
210 mr
->virt
-= (mr
->start
& (TARGET_PAGE_SIZE
- 1));
211 munmap(mr
->virt
, mr
->length
);
213 res_tbl_dealloc(&dev_res
->mr_tbl
, mr_handle
);
217 int rdma_rm_alloc_uc(RdmaDeviceResources
*dev_res
, uint32_t pfn
,
222 /* TODO: Need to make sure pfn is between bar start address and
223 * bsd+RDMA_BAR2_UAR_SIZE
224 if (pfn > RDMA_BAR2_UAR_SIZE) {
225 pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
230 uc
= res_tbl_alloc(&dev_res
->uc_tbl
, uc_handle
);
238 RdmaRmUC
*rdma_rm_get_uc(RdmaDeviceResources
*dev_res
, uint32_t uc_handle
)
240 return res_tbl_get(&dev_res
->uc_tbl
, uc_handle
);
243 void rdma_rm_dealloc_uc(RdmaDeviceResources
*dev_res
, uint32_t uc_handle
)
245 RdmaRmUC
*uc
= rdma_rm_get_uc(dev_res
, uc_handle
);
248 res_tbl_dealloc(&dev_res
->uc_tbl
, uc_handle
);
252 RdmaRmCQ
*rdma_rm_get_cq(RdmaDeviceResources
*dev_res
, uint32_t cq_handle
)
254 return res_tbl_get(&dev_res
->cq_tbl
, cq_handle
);
257 int rdma_rm_alloc_cq(RdmaDeviceResources
*dev_res
, RdmaBackendDev
*backend_dev
,
258 uint32_t cqe
, uint32_t *cq_handle
, void *opaque
)
263 cq
= res_tbl_alloc(&dev_res
->cq_tbl
, cq_handle
);
269 cq
->notify
= CNT_CLEAR
;
271 rc
= rdma_backend_create_cq(backend_dev
, &cq
->backend_cq
, cqe
);
280 rdma_rm_dealloc_cq(dev_res
, *cq_handle
);
285 void rdma_rm_req_notify_cq(RdmaDeviceResources
*dev_res
, uint32_t cq_handle
,
290 pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle
, notify
);
292 cq
= rdma_rm_get_cq(dev_res
, cq_handle
);
297 if (cq
->notify
!= CNT_SET
) {
298 cq
->notify
= notify
? CNT_ARM
: CNT_CLEAR
;
301 pr_dbg("notify=%d\n", cq
->notify
);
304 void rdma_rm_dealloc_cq(RdmaDeviceResources
*dev_res
, uint32_t cq_handle
)
308 cq
= rdma_rm_get_cq(dev_res
, cq_handle
);
313 rdma_backend_destroy_cq(&cq
->backend_cq
);
315 res_tbl_dealloc(&dev_res
->cq_tbl
, cq_handle
);
318 RdmaRmQP
*rdma_rm_get_qp(RdmaDeviceResources
*dev_res
, uint32_t qpn
)
320 GBytes
*key
= g_bytes_new(&qpn
, sizeof(qpn
));
322 RdmaRmQP
*qp
= g_hash_table_lookup(dev_res
->qp_hash
, key
);
329 int rdma_rm_alloc_qp(RdmaDeviceResources
*dev_res
, uint32_t pd_handle
,
330 uint8_t qp_type
, uint32_t max_send_wr
,
331 uint32_t max_send_sge
, uint32_t send_cq_handle
,
332 uint32_t max_recv_wr
, uint32_t max_recv_sge
,
333 uint32_t recv_cq_handle
, void *opaque
, uint32_t *qpn
)
341 pr_dbg("qp_type=%d\n", qp_type
);
343 pd
= rdma_rm_get_pd(dev_res
, pd_handle
);
345 pr_err("Invalid pd handle (%d)\n", pd_handle
);
349 scq
= rdma_rm_get_cq(dev_res
, send_cq_handle
);
350 rcq
= rdma_rm_get_cq(dev_res
, recv_cq_handle
);
353 pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
354 send_cq_handle
, recv_cq_handle
);
358 if (qp_type
== IBV_QPT_GSI
) {
359 scq
->notify
= CNT_SET
;
360 rcq
->notify
= CNT_SET
;
363 qp
= res_tbl_alloc(&dev_res
->qp_tbl
, &rm_qpn
);
367 pr_dbg("rm_qpn=%d\n", rm_qpn
);
370 qp
->qp_state
= IBV_QPS_RESET
;
371 qp
->qp_type
= qp_type
;
372 qp
->send_cq_handle
= send_cq_handle
;
373 qp
->recv_cq_handle
= recv_cq_handle
;
376 rc
= rdma_backend_create_qp(&qp
->backend_qp
, qp_type
, &pd
->backend_pd
,
377 &scq
->backend_cq
, &rcq
->backend_cq
, max_send_wr
,
378 max_recv_wr
, max_send_sge
, max_recv_sge
);
384 *qpn
= rdma_backend_qpn(&qp
->backend_qp
);
385 pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn
, *qpn
);
386 g_hash_table_insert(dev_res
->qp_hash
, g_bytes_new(qpn
, sizeof(*qpn
)), qp
);
391 res_tbl_dealloc(&dev_res
->qp_tbl
, qp
->qpn
);
396 int rdma_rm_modify_qp(RdmaDeviceResources
*dev_res
, RdmaBackendDev
*backend_dev
,
397 uint32_t qp_handle
, uint32_t attr_mask
, uint8_t sgid_idx
,
398 union ibv_gid
*dgid
, uint32_t dqpn
,
399 enum ibv_qp_state qp_state
, uint32_t qkey
,
400 uint32_t rq_psn
, uint32_t sq_psn
)
405 pr_dbg("qpn=0x%x\n", qp_handle
);
406 pr_dbg("qkey=0x%x\n", qkey
);
408 qp
= rdma_rm_get_qp(dev_res
, qp_handle
);
413 pr_dbg("qp_type=%d\n", qp
->qp_type
);
414 pr_dbg("attr_mask=0x%x\n", attr_mask
);
416 if (qp
->qp_type
== IBV_QPT_SMI
) {
417 pr_dbg("QP0 unsupported\n");
419 } else if (qp
->qp_type
== IBV_QPT_GSI
) {
424 if (attr_mask
& IBV_QP_STATE
) {
425 qp
->qp_state
= qp_state
;
426 pr_dbg("qp_state=%d\n", qp
->qp_state
);
428 if (qp
->qp_state
== IBV_QPS_INIT
) {
429 ret
= rdma_backend_qp_state_init(backend_dev
, &qp
->backend_qp
,
436 if (qp
->qp_state
== IBV_QPS_RTR
) {
437 /* Get backend gid index */
438 pr_dbg("Guest sgid_idx=%d\n", sgid_idx
);
439 sgid_idx
= rdma_rm_get_backend_gid_index(dev_res
, backend_dev
,
441 if (sgid_idx
<= 0) { /* TODO check also less than bk.max_sgid */
442 pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx
);
446 ret
= rdma_backend_qp_state_rtr(backend_dev
, &qp
->backend_qp
,
447 qp
->qp_type
, sgid_idx
, dgid
, dqpn
,
449 attr_mask
& IBV_QP_QKEY
);
455 if (qp
->qp_state
== IBV_QPS_RTS
) {
456 ret
= rdma_backend_qp_state_rts(&qp
->backend_qp
, qp
->qp_type
,
458 attr_mask
& IBV_QP_QKEY
);
468 int rdma_rm_query_qp(RdmaDeviceResources
*dev_res
, RdmaBackendDev
*backend_dev
,
469 uint32_t qp_handle
, struct ibv_qp_attr
*attr
,
470 int attr_mask
, struct ibv_qp_init_attr
*init_attr
)
474 pr_dbg("qpn=0x%x\n", qp_handle
);
476 qp
= rdma_rm_get_qp(dev_res
, qp_handle
);
481 pr_dbg("qp_type=%d\n", qp
->qp_type
);
483 return rdma_backend_query_qp(&qp
->backend_qp
, attr
, attr_mask
, init_attr
);
486 void rdma_rm_dealloc_qp(RdmaDeviceResources
*dev_res
, uint32_t qp_handle
)
491 key
= g_bytes_new(&qp_handle
, sizeof(qp_handle
));
492 qp
= g_hash_table_lookup(dev_res
->qp_hash
, key
);
493 g_hash_table_remove(dev_res
->qp_hash
, key
);
500 rdma_backend_destroy_qp(&qp
->backend_qp
);
502 res_tbl_dealloc(&dev_res
->qp_tbl
, qp
->qpn
);
505 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources
*dev_res
, uint32_t cqe_ctx_id
)
509 cqe_ctx
= res_tbl_get(&dev_res
->cqe_ctx_tbl
, cqe_ctx_id
);
514 pr_dbg("ctx=%p\n", *cqe_ctx
);
519 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources
*dev_res
, uint32_t *cqe_ctx_id
,
524 cqe_ctx
= res_tbl_alloc(&dev_res
->cqe_ctx_tbl
, cqe_ctx_id
);
529 pr_dbg("ctx=%p\n", ctx
);
535 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources
*dev_res
, uint32_t cqe_ctx_id
)
537 res_tbl_dealloc(&dev_res
->cqe_ctx_tbl
, cqe_ctx_id
);
540 int rdma_rm_add_gid(RdmaDeviceResources
*dev_res
, RdmaBackendDev
*backend_dev
,
541 const char *ifname
, union ibv_gid
*gid
, int gid_idx
)
545 rc
= rdma_backend_add_gid(backend_dev
, ifname
, gid
);
547 pr_dbg("Fail to add gid\n");
551 memcpy(&dev_res
->port
.gid_tbl
[gid_idx
].gid
, gid
, sizeof(*gid
));
556 int rdma_rm_del_gid(RdmaDeviceResources
*dev_res
, RdmaBackendDev
*backend_dev
,
557 const char *ifname
, int gid_idx
)
561 if (!dev_res
->port
.gid_tbl
[gid_idx
].gid
.global
.interface_id
) {
565 rc
= rdma_backend_del_gid(backend_dev
, ifname
,
566 &dev_res
->port
.gid_tbl
[gid_idx
].gid
);
568 pr_dbg("Fail to delete gid\n");
572 memset(dev_res
->port
.gid_tbl
[gid_idx
].gid
.raw
, 0,
573 sizeof(dev_res
->port
.gid_tbl
[gid_idx
].gid
));
574 dev_res
->port
.gid_tbl
[gid_idx
].backend_gid_index
= -1;
579 int rdma_rm_get_backend_gid_index(RdmaDeviceResources
*dev_res
,
580 RdmaBackendDev
*backend_dev
, int sgid_idx
)
582 if (unlikely(sgid_idx
< 0 || sgid_idx
>= MAX_PORT_GIDS
)) {
583 pr_dbg("Got invalid sgid_idx %d\n", sgid_idx
);
587 if (unlikely(dev_res
->port
.gid_tbl
[sgid_idx
].backend_gid_index
== -1)) {
588 dev_res
->port
.gid_tbl
[sgid_idx
].backend_gid_index
=
589 rdma_backend_get_gid_index(backend_dev
,
590 &dev_res
->port
.gid_tbl
[sgid_idx
].gid
);
593 pr_dbg("backend_gid_index=%d\n",
594 dev_res
->port
.gid_tbl
[sgid_idx
].backend_gid_index
);
596 return dev_res
->port
.gid_tbl
[sgid_idx
].backend_gid_index
;
599 static void destroy_qp_hash_key(gpointer data
)
604 static void init_ports(RdmaDeviceResources
*dev_res
)
608 memset(&dev_res
->port
, 0, sizeof(dev_res
->port
));
610 dev_res
->port
.state
= IBV_PORT_DOWN
;
611 for (i
= 0; i
< MAX_PORT_GIDS
; i
++) {
612 dev_res
->port
.gid_tbl
[i
].backend_gid_index
= -1;
616 static void fini_ports(RdmaDeviceResources
*dev_res
,
617 RdmaBackendDev
*backend_dev
, const char *ifname
)
621 dev_res
->port
.state
= IBV_PORT_DOWN
;
622 for (i
= 0; i
< MAX_PORT_GIDS
; i
++) {
623 rdma_rm_del_gid(dev_res
, backend_dev
, ifname
, i
);
627 int rdma_rm_init(RdmaDeviceResources
*dev_res
, struct ibv_device_attr
*dev_attr
,
630 dev_res
->qp_hash
= g_hash_table_new_full(g_bytes_hash
, g_bytes_equal
,
631 destroy_qp_hash_key
, NULL
);
632 if (!dev_res
->qp_hash
) {
636 res_tbl_init("PD", &dev_res
->pd_tbl
, dev_attr
->max_pd
, sizeof(RdmaRmPD
));
637 res_tbl_init("CQ", &dev_res
->cq_tbl
, dev_attr
->max_cq
, sizeof(RdmaRmCQ
));
638 res_tbl_init("MR", &dev_res
->mr_tbl
, dev_attr
->max_mr
, sizeof(RdmaRmMR
));
639 res_tbl_init("QP", &dev_res
->qp_tbl
, dev_attr
->max_qp
, sizeof(RdmaRmQP
));
640 res_tbl_init("CQE_CTX", &dev_res
->cqe_ctx_tbl
, dev_attr
->max_qp
*
641 dev_attr
->max_qp_wr
, sizeof(void *));
642 res_tbl_init("UC", &dev_res
->uc_tbl
, MAX_UCS
, sizeof(RdmaRmUC
));
649 void rdma_rm_fini(RdmaDeviceResources
*dev_res
, RdmaBackendDev
*backend_dev
,
652 fini_ports(dev_res
, backend_dev
, ifname
);
654 res_tbl_free(&dev_res
->uc_tbl
);
655 res_tbl_free(&dev_res
->cqe_ctx_tbl
);
656 res_tbl_free(&dev_res
->qp_tbl
);
657 res_tbl_free(&dev_res
->mr_tbl
);
658 res_tbl_free(&dev_res
->cq_tbl
);
659 res_tbl_free(&dev_res
->pd_tbl
);
661 if (dev_res
->qp_hash
) {
662 g_hash_table_destroy(dev_res
->qp_hash
);