hw: rdma: fix an off-by-one issue
[qemu/ar7.git] / hw / rdma / rdma_rm.c
blob268ff633a48e1b67df0414761cbe67648b7573db
1 /*
2 * QEMU paravirtual RDMA - Resource Manager Implementation
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
7 * Authors:
8 * Yuval Shaia <yuval.shaia@oracle.com>
9 * Marcel Apfelbaum <marcel@redhat.com>
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
18 #include "cpu.h"
20 #include "rdma_utils.h"
21 #include "rdma_backend.h"
22 #include "rdma_rm.h"
24 /* Page directory and page tables */
25 #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
26 #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
28 static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
29 uint32_t tbl_sz, uint32_t res_sz)
31 tbl->tbl = g_malloc(tbl_sz * res_sz);
33 strncpy(tbl->name, name, MAX_RM_TBL_NAME);
34 tbl->name[MAX_RM_TBL_NAME - 1] = 0;
36 tbl->bitmap = bitmap_new(tbl_sz);
37 tbl->tbl_sz = tbl_sz;
38 tbl->res_sz = res_sz;
39 qemu_mutex_init(&tbl->lock);
42 static inline void res_tbl_free(RdmaRmResTbl *tbl)
44 if (!tbl->bitmap) {
45 return;
47 qemu_mutex_destroy(&tbl->lock);
48 g_free(tbl->tbl);
49 g_free(tbl->bitmap);
52 static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
54 pr_dbg("%s, handle=%d\n", tbl->name, handle);
56 if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
57 return tbl->tbl + handle * tbl->res_sz;
58 } else {
59 pr_dbg("Invalid handle %d\n", handle);
60 return NULL;
64 static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
66 qemu_mutex_lock(&tbl->lock);
68 *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
69 if (*handle > tbl->tbl_sz) {
70 pr_dbg("Failed to alloc, bitmap is full\n");
71 qemu_mutex_unlock(&tbl->lock);
72 return NULL;
75 set_bit(*handle, tbl->bitmap);
77 qemu_mutex_unlock(&tbl->lock);
79 memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
81 pr_dbg("%s, handle=%d\n", tbl->name, *handle);
83 return tbl->tbl + *handle * tbl->res_sz;
86 static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
88 pr_dbg("%s, handle=%d\n", tbl->name, handle);
90 qemu_mutex_lock(&tbl->lock);
92 if (handle < tbl->tbl_sz) {
93 clear_bit(handle, tbl->bitmap);
96 qemu_mutex_unlock(&tbl->lock);
99 int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
100 uint32_t *pd_handle, uint32_t ctx_handle)
102 RdmaRmPD *pd;
103 int ret = -ENOMEM;
105 pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
106 if (!pd) {
107 goto out;
110 ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
111 if (ret) {
112 ret = -EIO;
113 goto out_tbl_dealloc;
116 pd->ctx_handle = ctx_handle;
118 return 0;
120 out_tbl_dealloc:
121 res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
123 out:
124 return ret;
127 RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
129 return res_tbl_get(&dev_res->pd_tbl, pd_handle);
132 void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
134 RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
136 if (pd) {
137 rdma_backend_destroy_pd(&pd->backend_pd);
138 res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
142 int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
143 uint64_t guest_start, size_t guest_length, void *host_virt,
144 int access_flags, uint32_t *mr_handle, uint32_t *lkey,
145 uint32_t *rkey)
147 RdmaRmMR *mr;
148 int ret = 0;
149 RdmaRmPD *pd;
151 pd = rdma_rm_get_pd(dev_res, pd_handle);
152 if (!pd) {
153 pr_dbg("Invalid PD\n");
154 return -EINVAL;
157 mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
158 if (!mr) {
159 pr_dbg("Failed to allocate obj in table\n");
160 return -ENOMEM;
162 pr_dbg("mr_handle=%d\n", *mr_handle);
164 pr_dbg("host_virt=0x%p\n", host_virt);
165 pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start);
166 pr_dbg("length=%zu\n", guest_length);
168 if (host_virt) {
169 mr->virt = host_virt;
170 mr->start = guest_start;
171 mr->length = guest_length;
172 mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
174 ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
175 mr->length, access_flags);
176 if (ret) {
177 pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
178 ret = -EIO;
179 goto out_dealloc_mr;
183 /* We keep mr_handle in lkey so send and recv get get mr ptr */
184 *lkey = *mr_handle;
185 *rkey = -1;
187 mr->pd_handle = pd_handle;
189 return 0;
191 out_dealloc_mr:
192 res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
194 return ret;
197 RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
199 return res_tbl_get(&dev_res->mr_tbl, mr_handle);
202 void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
204 RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
206 if (mr) {
207 rdma_backend_destroy_mr(&mr->backend_mr);
208 pr_dbg("start=0x%" PRIx64 "\n", mr->start);
209 if (mr->start) {
210 mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
211 munmap(mr->virt, mr->length);
213 res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
217 int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
218 uint32_t *uc_handle)
220 RdmaRmUC *uc;
222 /* TODO: Need to make sure pfn is between bar start address and
223 * bsd+RDMA_BAR2_UAR_SIZE
224 if (pfn > RDMA_BAR2_UAR_SIZE) {
225 pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
226 return -ENOMEM;
230 uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
231 if (!uc) {
232 return -ENOMEM;
235 return 0;
238 RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
240 return res_tbl_get(&dev_res->uc_tbl, uc_handle);
243 void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
245 RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
247 if (uc) {
248 res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
252 RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
254 return res_tbl_get(&dev_res->cq_tbl, cq_handle);
257 int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
258 uint32_t cqe, uint32_t *cq_handle, void *opaque)
260 int rc;
261 RdmaRmCQ *cq;
263 cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
264 if (!cq) {
265 return -ENOMEM;
268 cq->opaque = opaque;
269 cq->notify = CNT_CLEAR;
271 rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
272 if (rc) {
273 rc = -EIO;
274 goto out_dealloc_cq;
277 return 0;
279 out_dealloc_cq:
280 rdma_rm_dealloc_cq(dev_res, *cq_handle);
282 return rc;
285 void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
286 bool notify)
288 RdmaRmCQ *cq;
290 pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);
292 cq = rdma_rm_get_cq(dev_res, cq_handle);
293 if (!cq) {
294 return;
297 if (cq->notify != CNT_SET) {
298 cq->notify = notify ? CNT_ARM : CNT_CLEAR;
301 pr_dbg("notify=%d\n", cq->notify);
304 void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
306 RdmaRmCQ *cq;
308 cq = rdma_rm_get_cq(dev_res, cq_handle);
309 if (!cq) {
310 return;
313 rdma_backend_destroy_cq(&cq->backend_cq);
315 res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
318 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
320 GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
322 RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
324 g_bytes_unref(key);
326 return qp;
329 int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
330 uint8_t qp_type, uint32_t max_send_wr,
331 uint32_t max_send_sge, uint32_t send_cq_handle,
332 uint32_t max_recv_wr, uint32_t max_recv_sge,
333 uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
335 int rc;
336 RdmaRmQP *qp;
337 RdmaRmCQ *scq, *rcq;
338 RdmaRmPD *pd;
339 uint32_t rm_qpn;
341 pr_dbg("qp_type=%d\n", qp_type);
343 pd = rdma_rm_get_pd(dev_res, pd_handle);
344 if (!pd) {
345 pr_err("Invalid pd handle (%d)\n", pd_handle);
346 return -EINVAL;
349 scq = rdma_rm_get_cq(dev_res, send_cq_handle);
350 rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
352 if (!scq || !rcq) {
353 pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
354 send_cq_handle, recv_cq_handle);
355 return -EINVAL;
358 if (qp_type == IBV_QPT_GSI) {
359 scq->notify = CNT_SET;
360 rcq->notify = CNT_SET;
363 qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
364 if (!qp) {
365 return -ENOMEM;
367 pr_dbg("rm_qpn=%d\n", rm_qpn);
369 qp->qpn = rm_qpn;
370 qp->qp_state = IBV_QPS_RESET;
371 qp->qp_type = qp_type;
372 qp->send_cq_handle = send_cq_handle;
373 qp->recv_cq_handle = recv_cq_handle;
374 qp->opaque = opaque;
376 rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
377 &scq->backend_cq, &rcq->backend_cq, max_send_wr,
378 max_recv_wr, max_send_sge, max_recv_sge);
379 if (rc) {
380 rc = -EIO;
381 goto out_dealloc_qp;
384 *qpn = rdma_backend_qpn(&qp->backend_qp);
385 pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
386 g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
388 return 0;
390 out_dealloc_qp:
391 res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
393 return rc;
396 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
397 uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
398 union ibv_gid *dgid, uint32_t dqpn,
399 enum ibv_qp_state qp_state, uint32_t qkey,
400 uint32_t rq_psn, uint32_t sq_psn)
402 RdmaRmQP *qp;
403 int ret;
405 pr_dbg("qpn=0x%x\n", qp_handle);
406 pr_dbg("qkey=0x%x\n", qkey);
408 qp = rdma_rm_get_qp(dev_res, qp_handle);
409 if (!qp) {
410 return -EINVAL;
413 pr_dbg("qp_type=%d\n", qp->qp_type);
414 pr_dbg("attr_mask=0x%x\n", attr_mask);
416 if (qp->qp_type == IBV_QPT_SMI) {
417 pr_dbg("QP0 unsupported\n");
418 return -EPERM;
419 } else if (qp->qp_type == IBV_QPT_GSI) {
420 pr_dbg("QP1\n");
421 return 0;
424 if (attr_mask & IBV_QP_STATE) {
425 qp->qp_state = qp_state;
426 pr_dbg("qp_state=%d\n", qp->qp_state);
428 if (qp->qp_state == IBV_QPS_INIT) {
429 ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
430 qp->qp_type, qkey);
431 if (ret) {
432 return -EIO;
436 if (qp->qp_state == IBV_QPS_RTR) {
437 /* Get backend gid index */
438 pr_dbg("Guest sgid_idx=%d\n", sgid_idx);
439 sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
440 sgid_idx);
441 if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
442 pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx);
443 return -EIO;
446 ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
447 qp->qp_type, sgid_idx, dgid, dqpn,
448 rq_psn, qkey,
449 attr_mask & IBV_QP_QKEY);
450 if (ret) {
451 return -EIO;
455 if (qp->qp_state == IBV_QPS_RTS) {
456 ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
457 sq_psn, qkey,
458 attr_mask & IBV_QP_QKEY);
459 if (ret) {
460 return -EIO;
465 return 0;
468 int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
469 uint32_t qp_handle, struct ibv_qp_attr *attr,
470 int attr_mask, struct ibv_qp_init_attr *init_attr)
472 RdmaRmQP *qp;
474 pr_dbg("qpn=0x%x\n", qp_handle);
476 qp = rdma_rm_get_qp(dev_res, qp_handle);
477 if (!qp) {
478 return -EINVAL;
481 pr_dbg("qp_type=%d\n", qp->qp_type);
483 return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
486 void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
488 RdmaRmQP *qp;
489 GBytes *key;
491 key = g_bytes_new(&qp_handle, sizeof(qp_handle));
492 qp = g_hash_table_lookup(dev_res->qp_hash, key);
493 g_hash_table_remove(dev_res->qp_hash, key);
494 g_bytes_unref(key);
496 if (!qp) {
497 return;
500 rdma_backend_destroy_qp(&qp->backend_qp);
502 res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
505 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
507 void **cqe_ctx;
509 cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
510 if (!cqe_ctx) {
511 return NULL;
514 pr_dbg("ctx=%p\n", *cqe_ctx);
516 return *cqe_ctx;
519 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
520 void *ctx)
522 void **cqe_ctx;
524 cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
525 if (!cqe_ctx) {
526 return -ENOMEM;
529 pr_dbg("ctx=%p\n", ctx);
530 *cqe_ctx = ctx;
532 return 0;
535 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
537 res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
540 int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
541 const char *ifname, union ibv_gid *gid, int gid_idx)
543 int rc;
545 rc = rdma_backend_add_gid(backend_dev, ifname, gid);
546 if (rc) {
547 pr_dbg("Fail to add gid\n");
548 return -EINVAL;
551 memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));
553 return 0;
556 int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
557 const char *ifname, int gid_idx)
559 int rc;
561 if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {
562 return 0;
565 rc = rdma_backend_del_gid(backend_dev, ifname,
566 &dev_res->port.gid_tbl[gid_idx].gid);
567 if (rc) {
568 pr_dbg("Fail to delete gid\n");
569 return -EINVAL;
572 memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
573 sizeof(dev_res->port.gid_tbl[gid_idx].gid));
574 dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;
576 return 0;
579 int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
580 RdmaBackendDev *backend_dev, int sgid_idx)
582 if (unlikely(sgid_idx < 0 || sgid_idx >= MAX_PORT_GIDS)) {
583 pr_dbg("Got invalid sgid_idx %d\n", sgid_idx);
584 return -EINVAL;
587 if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
588 dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
589 rdma_backend_get_gid_index(backend_dev,
590 &dev_res->port.gid_tbl[sgid_idx].gid);
593 pr_dbg("backend_gid_index=%d\n",
594 dev_res->port.gid_tbl[sgid_idx].backend_gid_index);
596 return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
599 static void destroy_qp_hash_key(gpointer data)
601 g_bytes_unref(data);
604 static void init_ports(RdmaDeviceResources *dev_res)
606 int i;
608 memset(&dev_res->port, 0, sizeof(dev_res->port));
610 dev_res->port.state = IBV_PORT_DOWN;
611 for (i = 0; i < MAX_PORT_GIDS; i++) {
612 dev_res->port.gid_tbl[i].backend_gid_index = -1;
616 static void fini_ports(RdmaDeviceResources *dev_res,
617 RdmaBackendDev *backend_dev, const char *ifname)
619 int i;
621 dev_res->port.state = IBV_PORT_DOWN;
622 for (i = 0; i < MAX_PORT_GIDS; i++) {
623 rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
627 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
628 Error **errp)
630 dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
631 destroy_qp_hash_key, NULL);
632 if (!dev_res->qp_hash) {
633 return -ENOMEM;
636 res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
637 res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
638 res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
639 res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
640 res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
641 dev_attr->max_qp_wr, sizeof(void *));
642 res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
644 init_ports(dev_res);
646 return 0;
649 void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
650 const char *ifname)
652 fini_ports(dev_res, backend_dev, ifname);
654 res_tbl_free(&dev_res->uc_tbl);
655 res_tbl_free(&dev_res->cqe_ctx_tbl);
656 res_tbl_free(&dev_res->qp_tbl);
657 res_tbl_free(&dev_res->mr_tbl);
658 res_tbl_free(&dev_res->cq_tbl);
659 res_tbl_free(&dev_res->pd_tbl);
661 if (dev_res->qp_hash) {
662 g_hash_table_destroy(dev_res->qp_hash);