IB/srp: Document srp_map_data() return value
[linux-2.6/btrfs-unstable.git] / drivers / infiniband / ulp / srp / ib_srp.c
blob269f5ebe82de253bbf355d5ad92cecc8b66c1f55
1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
54 #include "ib_srp.h"
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static int topspin_workarounds = 1;
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100 "Use memory registration even for contiguous memory regions");
102 static const struct kernel_param_ops srp_tmo_ops;
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
106 S_IRUGO | S_IWUSR);
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
111 S_IRUGO | S_IWUSR);
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113 "Number of seconds between the observation of a transport"
114 " layer error and failing all I/O. \"off\" means that this"
115 " functionality is disabled.");
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
119 S_IRUGO | S_IWUSR);
120 MODULE_PARM_DESC(dev_loss_tmo,
121 "Maximum number of seconds that the SRP transport should"
122 " insulate transport layer errors. After this time has been"
123 " exceeded the SCSI host is removed. Should be"
124 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125 " if fast_io_fail_tmo has not been set. \"off\" means that"
126 " this functionality is disabled.");
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device, void *client_data);
135 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
136 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
137 const char *opname);
138 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
140 static struct scsi_transport_template *ib_srp_transport_template;
141 static struct workqueue_struct *srp_remove_wq;
143 static struct ib_client srp_client = {
144 .name = "srp",
145 .add = srp_add_one,
146 .remove = srp_remove_one
149 static struct ib_sa_client srp_sa_client;
151 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
153 int tmo = *(int *)kp->arg;
155 if (tmo >= 0)
156 return sprintf(buffer, "%d", tmo);
157 else
158 return sprintf(buffer, "off");
161 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
163 int tmo, res;
165 res = srp_parse_tmo(&tmo, val);
166 if (res)
167 goto out;
169 if (kp->arg == &srp_reconnect_delay)
170 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
171 srp_dev_loss_tmo);
172 else if (kp->arg == &srp_fast_io_fail_tmo)
173 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
174 else
175 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
176 tmo);
177 if (res)
178 goto out;
179 *(int *)kp->arg = tmo;
181 out:
182 return res;
185 static const struct kernel_param_ops srp_tmo_ops = {
186 .get = srp_tmo_get,
187 .set = srp_tmo_set,
190 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
192 return (struct srp_target_port *) host->hostdata;
195 static const char *srp_target_info(struct Scsi_Host *host)
197 return host_to_target(host)->target_name;
200 static int srp_target_is_topspin(struct srp_target_port *target)
202 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
203 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
205 return topspin_workarounds &&
206 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
207 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
210 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
211 gfp_t gfp_mask,
212 enum dma_data_direction direction)
214 struct srp_iu *iu;
216 iu = kmalloc(sizeof *iu, gfp_mask);
217 if (!iu)
218 goto out;
220 iu->buf = kzalloc(size, gfp_mask);
221 if (!iu->buf)
222 goto out_free_iu;
224 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
225 direction);
226 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
227 goto out_free_buf;
229 iu->size = size;
230 iu->direction = direction;
232 return iu;
234 out_free_buf:
235 kfree(iu->buf);
236 out_free_iu:
237 kfree(iu);
238 out:
239 return NULL;
242 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
244 if (!iu)
245 return;
247 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
248 iu->direction);
249 kfree(iu->buf);
250 kfree(iu);
253 static void srp_qp_event(struct ib_event *event, void *context)
255 pr_debug("QP event %s (%d)\n",
256 ib_event_msg(event->event), event->event);
259 static int srp_init_qp(struct srp_target_port *target,
260 struct ib_qp *qp)
262 struct ib_qp_attr *attr;
263 int ret;
265 attr = kmalloc(sizeof *attr, GFP_KERNEL);
266 if (!attr)
267 return -ENOMEM;
269 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
270 target->srp_host->port,
271 be16_to_cpu(target->pkey),
272 &attr->pkey_index);
273 if (ret)
274 goto out;
276 attr->qp_state = IB_QPS_INIT;
277 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
278 IB_ACCESS_REMOTE_WRITE);
279 attr->port_num = target->srp_host->port;
281 ret = ib_modify_qp(qp, attr,
282 IB_QP_STATE |
283 IB_QP_PKEY_INDEX |
284 IB_QP_ACCESS_FLAGS |
285 IB_QP_PORT);
287 out:
288 kfree(attr);
289 return ret;
292 static int srp_new_cm_id(struct srp_rdma_ch *ch)
294 struct srp_target_port *target = ch->target;
295 struct ib_cm_id *new_cm_id;
297 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
298 srp_cm_handler, ch);
299 if (IS_ERR(new_cm_id))
300 return PTR_ERR(new_cm_id);
302 if (ch->cm_id)
303 ib_destroy_cm_id(ch->cm_id);
304 ch->cm_id = new_cm_id;
305 ch->path.sgid = target->sgid;
306 ch->path.dgid = target->orig_dgid;
307 ch->path.pkey = target->pkey;
308 ch->path.service_id = target->service_id;
310 return 0;
313 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
315 struct srp_device *dev = target->srp_host->srp_dev;
316 struct ib_fmr_pool_param fmr_param;
318 memset(&fmr_param, 0, sizeof(fmr_param));
319 fmr_param.pool_size = target->scsi_host->can_queue;
320 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
321 fmr_param.cache = 1;
322 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
323 fmr_param.page_shift = ilog2(dev->mr_page_size);
324 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
325 IB_ACCESS_REMOTE_WRITE |
326 IB_ACCESS_REMOTE_READ);
328 return ib_create_fmr_pool(dev->pd, &fmr_param);
332 * srp_destroy_fr_pool() - free the resources owned by a pool
333 * @pool: Fast registration pool to be destroyed.
335 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
337 int i;
338 struct srp_fr_desc *d;
340 if (!pool)
341 return;
343 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
344 if (d->mr)
345 ib_dereg_mr(d->mr);
347 kfree(pool);
351 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
352 * @device: IB device to allocate fast registration descriptors for.
353 * @pd: Protection domain associated with the FR descriptors.
354 * @pool_size: Number of descriptors to allocate.
355 * @max_page_list_len: Maximum fast registration work request page list length.
357 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
358 struct ib_pd *pd, int pool_size,
359 int max_page_list_len)
361 struct srp_fr_pool *pool;
362 struct srp_fr_desc *d;
363 struct ib_mr *mr;
364 int i, ret = -EINVAL;
366 if (pool_size <= 0)
367 goto err;
368 ret = -ENOMEM;
369 pool = kzalloc(sizeof(struct srp_fr_pool) +
370 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
371 if (!pool)
372 goto err;
373 pool->size = pool_size;
374 pool->max_page_list_len = max_page_list_len;
375 spin_lock_init(&pool->lock);
376 INIT_LIST_HEAD(&pool->free_list);
378 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
379 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
380 max_page_list_len);
381 if (IS_ERR(mr)) {
382 ret = PTR_ERR(mr);
383 goto destroy_pool;
385 d->mr = mr;
386 list_add_tail(&d->entry, &pool->free_list);
389 out:
390 return pool;
392 destroy_pool:
393 srp_destroy_fr_pool(pool);
395 err:
396 pool = ERR_PTR(ret);
397 goto out;
401 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
402 * @pool: Pool to obtain descriptor from.
404 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
406 struct srp_fr_desc *d = NULL;
407 unsigned long flags;
409 spin_lock_irqsave(&pool->lock, flags);
410 if (!list_empty(&pool->free_list)) {
411 d = list_first_entry(&pool->free_list, typeof(*d), entry);
412 list_del(&d->entry);
414 spin_unlock_irqrestore(&pool->lock, flags);
416 return d;
420 * srp_fr_pool_put() - put an FR descriptor back in the free list
421 * @pool: Pool the descriptor was allocated from.
422 * @desc: Pointer to an array of fast registration descriptor pointers.
423 * @n: Number of descriptors to put back.
425 * Note: The caller must already have queued an invalidation request for
426 * desc->mr->rkey before calling this function.
428 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
429 int n)
431 unsigned long flags;
432 int i;
434 spin_lock_irqsave(&pool->lock, flags);
435 for (i = 0; i < n; i++)
436 list_add(&desc[i]->entry, &pool->free_list);
437 spin_unlock_irqrestore(&pool->lock, flags);
440 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
442 struct srp_device *dev = target->srp_host->srp_dev;
444 return srp_create_fr_pool(dev->dev, dev->pd,
445 target->scsi_host->can_queue,
446 dev->max_pages_per_mr);
450 * srp_destroy_qp() - destroy an RDMA queue pair
451 * @ch: SRP RDMA channel.
453 * Drain the qp before destroying it. This avoids that the receive
454 * completion handler can access the queue pair while it is
455 * being destroyed.
457 static void srp_destroy_qp(struct srp_rdma_ch *ch)
459 ib_drain_rq(ch->qp);
460 ib_destroy_qp(ch->qp);
463 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
465 struct srp_target_port *target = ch->target;
466 struct srp_device *dev = target->srp_host->srp_dev;
467 struct ib_qp_init_attr *init_attr;
468 struct ib_cq *recv_cq, *send_cq;
469 struct ib_qp *qp;
470 struct ib_fmr_pool *fmr_pool = NULL;
471 struct srp_fr_pool *fr_pool = NULL;
472 const int m = dev->use_fast_reg ? 3 : 1;
473 int ret;
475 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
476 if (!init_attr)
477 return -ENOMEM;
479 /* queue_size + 1 for ib_drain_rq() */
480 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
481 ch->comp_vector, IB_POLL_SOFTIRQ);
482 if (IS_ERR(recv_cq)) {
483 ret = PTR_ERR(recv_cq);
484 goto err;
487 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
488 ch->comp_vector, IB_POLL_DIRECT);
489 if (IS_ERR(send_cq)) {
490 ret = PTR_ERR(send_cq);
491 goto err_recv_cq;
494 init_attr->event_handler = srp_qp_event;
495 init_attr->cap.max_send_wr = m * target->queue_size;
496 init_attr->cap.max_recv_wr = target->queue_size + 1;
497 init_attr->cap.max_recv_sge = 1;
498 init_attr->cap.max_send_sge = 1;
499 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
500 init_attr->qp_type = IB_QPT_RC;
501 init_attr->send_cq = send_cq;
502 init_attr->recv_cq = recv_cq;
504 qp = ib_create_qp(dev->pd, init_attr);
505 if (IS_ERR(qp)) {
506 ret = PTR_ERR(qp);
507 goto err_send_cq;
510 ret = srp_init_qp(target, qp);
511 if (ret)
512 goto err_qp;
514 if (dev->use_fast_reg) {
515 fr_pool = srp_alloc_fr_pool(target);
516 if (IS_ERR(fr_pool)) {
517 ret = PTR_ERR(fr_pool);
518 shost_printk(KERN_WARNING, target->scsi_host, PFX
519 "FR pool allocation failed (%d)\n", ret);
520 goto err_qp;
522 } else if (dev->use_fmr) {
523 fmr_pool = srp_alloc_fmr_pool(target);
524 if (IS_ERR(fmr_pool)) {
525 ret = PTR_ERR(fmr_pool);
526 shost_printk(KERN_WARNING, target->scsi_host, PFX
527 "FMR pool allocation failed (%d)\n", ret);
528 goto err_qp;
532 if (ch->qp)
533 srp_destroy_qp(ch);
534 if (ch->recv_cq)
535 ib_free_cq(ch->recv_cq);
536 if (ch->send_cq)
537 ib_free_cq(ch->send_cq);
539 ch->qp = qp;
540 ch->recv_cq = recv_cq;
541 ch->send_cq = send_cq;
543 if (dev->use_fast_reg) {
544 if (ch->fr_pool)
545 srp_destroy_fr_pool(ch->fr_pool);
546 ch->fr_pool = fr_pool;
547 } else if (dev->use_fmr) {
548 if (ch->fmr_pool)
549 ib_destroy_fmr_pool(ch->fmr_pool);
550 ch->fmr_pool = fmr_pool;
553 kfree(init_attr);
554 return 0;
556 err_qp:
557 srp_destroy_qp(ch);
559 err_send_cq:
560 ib_free_cq(send_cq);
562 err_recv_cq:
563 ib_free_cq(recv_cq);
565 err:
566 kfree(init_attr);
567 return ret;
571 * Note: this function may be called without srp_alloc_iu_bufs() having been
572 * invoked. Hence the ch->[rt]x_ring checks.
574 static void srp_free_ch_ib(struct srp_target_port *target,
575 struct srp_rdma_ch *ch)
577 struct srp_device *dev = target->srp_host->srp_dev;
578 int i;
580 if (!ch->target)
581 return;
583 if (ch->cm_id) {
584 ib_destroy_cm_id(ch->cm_id);
585 ch->cm_id = NULL;
588 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
589 if (!ch->qp)
590 return;
592 if (dev->use_fast_reg) {
593 if (ch->fr_pool)
594 srp_destroy_fr_pool(ch->fr_pool);
595 } else if (dev->use_fmr) {
596 if (ch->fmr_pool)
597 ib_destroy_fmr_pool(ch->fmr_pool);
600 srp_destroy_qp(ch);
601 ib_free_cq(ch->send_cq);
602 ib_free_cq(ch->recv_cq);
605 * Avoid that the SCSI error handler tries to use this channel after
606 * it has been freed. The SCSI error handler can namely continue
607 * trying to perform recovery actions after scsi_remove_host()
608 * returned.
610 ch->target = NULL;
612 ch->qp = NULL;
613 ch->send_cq = ch->recv_cq = NULL;
615 if (ch->rx_ring) {
616 for (i = 0; i < target->queue_size; ++i)
617 srp_free_iu(target->srp_host, ch->rx_ring[i]);
618 kfree(ch->rx_ring);
619 ch->rx_ring = NULL;
621 if (ch->tx_ring) {
622 for (i = 0; i < target->queue_size; ++i)
623 srp_free_iu(target->srp_host, ch->tx_ring[i]);
624 kfree(ch->tx_ring);
625 ch->tx_ring = NULL;
629 static void srp_path_rec_completion(int status,
630 struct ib_sa_path_rec *pathrec,
631 void *ch_ptr)
633 struct srp_rdma_ch *ch = ch_ptr;
634 struct srp_target_port *target = ch->target;
636 ch->status = status;
637 if (status)
638 shost_printk(KERN_ERR, target->scsi_host,
639 PFX "Got failed path rec status %d\n", status);
640 else
641 ch->path = *pathrec;
642 complete(&ch->done);
645 static int srp_lookup_path(struct srp_rdma_ch *ch)
647 struct srp_target_port *target = ch->target;
648 int ret;
650 ch->path.numb_path = 1;
652 init_completion(&ch->done);
654 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
655 target->srp_host->srp_dev->dev,
656 target->srp_host->port,
657 &ch->path,
658 IB_SA_PATH_REC_SERVICE_ID |
659 IB_SA_PATH_REC_DGID |
660 IB_SA_PATH_REC_SGID |
661 IB_SA_PATH_REC_NUMB_PATH |
662 IB_SA_PATH_REC_PKEY,
663 SRP_PATH_REC_TIMEOUT_MS,
664 GFP_KERNEL,
665 srp_path_rec_completion,
666 ch, &ch->path_query);
667 if (ch->path_query_id < 0)
668 return ch->path_query_id;
670 ret = wait_for_completion_interruptible(&ch->done);
671 if (ret < 0)
672 return ret;
674 if (ch->status < 0)
675 shost_printk(KERN_WARNING, target->scsi_host,
676 PFX "Path record query failed\n");
678 return ch->status;
681 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
683 struct srp_target_port *target = ch->target;
684 struct {
685 struct ib_cm_req_param param;
686 struct srp_login_req priv;
687 } *req = NULL;
688 int status;
690 req = kzalloc(sizeof *req, GFP_KERNEL);
691 if (!req)
692 return -ENOMEM;
694 req->param.primary_path = &ch->path;
695 req->param.alternate_path = NULL;
696 req->param.service_id = target->service_id;
697 req->param.qp_num = ch->qp->qp_num;
698 req->param.qp_type = ch->qp->qp_type;
699 req->param.private_data = &req->priv;
700 req->param.private_data_len = sizeof req->priv;
701 req->param.flow_control = 1;
703 get_random_bytes(&req->param.starting_psn, 4);
704 req->param.starting_psn &= 0xffffff;
707 * Pick some arbitrary defaults here; we could make these
708 * module parameters if anyone cared about setting them.
710 req->param.responder_resources = 4;
711 req->param.remote_cm_response_timeout = 20;
712 req->param.local_cm_response_timeout = 20;
713 req->param.retry_count = target->tl_retry_count;
714 req->param.rnr_retry_count = 7;
715 req->param.max_cm_retries = 15;
717 req->priv.opcode = SRP_LOGIN_REQ;
718 req->priv.tag = 0;
719 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
720 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
721 SRP_BUF_FORMAT_INDIRECT);
722 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
723 SRP_MULTICHAN_SINGLE);
725 * In the published SRP specification (draft rev. 16a), the
726 * port identifier format is 8 bytes of ID extension followed
727 * by 8 bytes of GUID. Older drafts put the two halves in the
728 * opposite order, so that the GUID comes first.
730 * Targets conforming to these obsolete drafts can be
731 * recognized by the I/O Class they report.
733 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
734 memcpy(req->priv.initiator_port_id,
735 &target->sgid.global.interface_id, 8);
736 memcpy(req->priv.initiator_port_id + 8,
737 &target->initiator_ext, 8);
738 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
739 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
740 } else {
741 memcpy(req->priv.initiator_port_id,
742 &target->initiator_ext, 8);
743 memcpy(req->priv.initiator_port_id + 8,
744 &target->sgid.global.interface_id, 8);
745 memcpy(req->priv.target_port_id, &target->id_ext, 8);
746 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
750 * Topspin/Cisco SRP targets will reject our login unless we
751 * zero out the first 8 bytes of our initiator port ID and set
752 * the second 8 bytes to the local node GUID.
754 if (srp_target_is_topspin(target)) {
755 shost_printk(KERN_DEBUG, target->scsi_host,
756 PFX "Topspin/Cisco initiator port ID workaround "
757 "activated for target GUID %016llx\n",
758 be64_to_cpu(target->ioc_guid));
759 memset(req->priv.initiator_port_id, 0, 8);
760 memcpy(req->priv.initiator_port_id + 8,
761 &target->srp_host->srp_dev->dev->node_guid, 8);
764 status = ib_send_cm_req(ch->cm_id, &req->param);
766 kfree(req);
768 return status;
771 static bool srp_queue_remove_work(struct srp_target_port *target)
773 bool changed = false;
775 spin_lock_irq(&target->lock);
776 if (target->state != SRP_TARGET_REMOVED) {
777 target->state = SRP_TARGET_REMOVED;
778 changed = true;
780 spin_unlock_irq(&target->lock);
782 if (changed)
783 queue_work(srp_remove_wq, &target->remove_work);
785 return changed;
788 static void srp_disconnect_target(struct srp_target_port *target)
790 struct srp_rdma_ch *ch;
791 int i;
793 /* XXX should send SRP_I_LOGOUT request */
795 for (i = 0; i < target->ch_count; i++) {
796 ch = &target->ch[i];
797 ch->connected = false;
798 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
799 shost_printk(KERN_DEBUG, target->scsi_host,
800 PFX "Sending CM DREQ failed\n");
805 static void srp_free_req_data(struct srp_target_port *target,
806 struct srp_rdma_ch *ch)
808 struct srp_device *dev = target->srp_host->srp_dev;
809 struct ib_device *ibdev = dev->dev;
810 struct srp_request *req;
811 int i;
813 if (!ch->req_ring)
814 return;
816 for (i = 0; i < target->req_ring_size; ++i) {
817 req = &ch->req_ring[i];
818 if (dev->use_fast_reg) {
819 kfree(req->fr_list);
820 } else {
821 kfree(req->fmr_list);
822 kfree(req->map_page);
824 if (req->indirect_dma_addr) {
825 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
826 target->indirect_size,
827 DMA_TO_DEVICE);
829 kfree(req->indirect_desc);
832 kfree(ch->req_ring);
833 ch->req_ring = NULL;
836 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
838 struct srp_target_port *target = ch->target;
839 struct srp_device *srp_dev = target->srp_host->srp_dev;
840 struct ib_device *ibdev = srp_dev->dev;
841 struct srp_request *req;
842 void *mr_list;
843 dma_addr_t dma_addr;
844 int i, ret = -ENOMEM;
846 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
847 GFP_KERNEL);
848 if (!ch->req_ring)
849 goto out;
851 for (i = 0; i < target->req_ring_size; ++i) {
852 req = &ch->req_ring[i];
853 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
854 GFP_KERNEL);
855 if (!mr_list)
856 goto out;
857 if (srp_dev->use_fast_reg) {
858 req->fr_list = mr_list;
859 } else {
860 req->fmr_list = mr_list;
861 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
862 sizeof(void *), GFP_KERNEL);
863 if (!req->map_page)
864 goto out;
866 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
867 if (!req->indirect_desc)
868 goto out;
870 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
871 target->indirect_size,
872 DMA_TO_DEVICE);
873 if (ib_dma_mapping_error(ibdev, dma_addr))
874 goto out;
876 req->indirect_dma_addr = dma_addr;
878 ret = 0;
880 out:
881 return ret;
885 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
886 * @shost: SCSI host whose attributes to remove from sysfs.
888 * Note: Any attributes defined in the host template and that did not exist
889 * before invocation of this function will be ignored.
891 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
893 struct device_attribute **attr;
895 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
896 device_remove_file(&shost->shost_dev, *attr);
899 static void srp_remove_target(struct srp_target_port *target)
901 struct srp_rdma_ch *ch;
902 int i;
904 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
906 srp_del_scsi_host_attr(target->scsi_host);
907 srp_rport_get(target->rport);
908 srp_remove_host(target->scsi_host);
909 scsi_remove_host(target->scsi_host);
910 srp_stop_rport_timers(target->rport);
911 srp_disconnect_target(target);
912 for (i = 0; i < target->ch_count; i++) {
913 ch = &target->ch[i];
914 srp_free_ch_ib(target, ch);
916 cancel_work_sync(&target->tl_err_work);
917 srp_rport_put(target->rport);
918 for (i = 0; i < target->ch_count; i++) {
919 ch = &target->ch[i];
920 srp_free_req_data(target, ch);
922 kfree(target->ch);
923 target->ch = NULL;
925 spin_lock(&target->srp_host->target_lock);
926 list_del(&target->list);
927 spin_unlock(&target->srp_host->target_lock);
929 scsi_host_put(target->scsi_host);
932 static void srp_remove_work(struct work_struct *work)
934 struct srp_target_port *target =
935 container_of(work, struct srp_target_port, remove_work);
937 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
939 srp_remove_target(target);
942 static void srp_rport_delete(struct srp_rport *rport)
944 struct srp_target_port *target = rport->lld_data;
946 srp_queue_remove_work(target);
950 * srp_connected_ch() - number of connected channels
951 * @target: SRP target port.
953 static int srp_connected_ch(struct srp_target_port *target)
955 int i, c = 0;
957 for (i = 0; i < target->ch_count; i++)
958 c += target->ch[i].connected;
960 return c;
963 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
965 struct srp_target_port *target = ch->target;
966 int ret;
968 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
970 ret = srp_lookup_path(ch);
971 if (ret)
972 goto out;
974 while (1) {
975 init_completion(&ch->done);
976 ret = srp_send_req(ch, multich);
977 if (ret)
978 goto out;
979 ret = wait_for_completion_interruptible(&ch->done);
980 if (ret < 0)
981 goto out;
984 * The CM event handling code will set status to
985 * SRP_PORT_REDIRECT if we get a port redirect REJ
986 * back, or SRP_DLID_REDIRECT if we get a lid/qp
987 * redirect REJ back.
989 ret = ch->status;
990 switch (ret) {
991 case 0:
992 ch->connected = true;
993 goto out;
995 case SRP_PORT_REDIRECT:
996 ret = srp_lookup_path(ch);
997 if (ret)
998 goto out;
999 break;
1001 case SRP_DLID_REDIRECT:
1002 break;
1004 case SRP_STALE_CONN:
1005 shost_printk(KERN_ERR, target->scsi_host, PFX
1006 "giving up on stale connection\n");
1007 ret = -ECONNRESET;
1008 goto out;
1010 default:
1011 goto out;
1015 out:
1016 return ret <= 0 ? ret : -ENODEV;
1019 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1021 srp_handle_qp_err(cq, wc, "INV RKEY");
1024 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1025 u32 rkey)
1027 struct ib_send_wr *bad_wr;
1028 struct ib_send_wr wr = {
1029 .opcode = IB_WR_LOCAL_INV,
1030 .next = NULL,
1031 .num_sge = 0,
1032 .send_flags = 0,
1033 .ex.invalidate_rkey = rkey,
1036 wr.wr_cqe = &req->reg_cqe;
1037 req->reg_cqe.done = srp_inv_rkey_err_done;
1038 return ib_post_send(ch->qp, &wr, &bad_wr);
1041 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1042 struct srp_rdma_ch *ch,
1043 struct srp_request *req)
1045 struct srp_target_port *target = ch->target;
1046 struct srp_device *dev = target->srp_host->srp_dev;
1047 struct ib_device *ibdev = dev->dev;
1048 int i, res;
1050 if (!scsi_sglist(scmnd) ||
1051 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1052 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1053 return;
1055 if (dev->use_fast_reg) {
1056 struct srp_fr_desc **pfr;
1058 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1059 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1060 if (res < 0) {
1061 shost_printk(KERN_ERR, target->scsi_host, PFX
1062 "Queueing INV WR for rkey %#x failed (%d)\n",
1063 (*pfr)->mr->rkey, res);
1064 queue_work(system_long_wq,
1065 &target->tl_err_work);
1068 if (req->nmdesc)
1069 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1070 req->nmdesc);
1071 } else if (dev->use_fmr) {
1072 struct ib_pool_fmr **pfmr;
1074 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1075 ib_fmr_pool_unmap(*pfmr);
1078 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1079 scmnd->sc_data_direction);
1083 * srp_claim_req - Take ownership of the scmnd associated with a request.
1084 * @ch: SRP RDMA channel.
1085 * @req: SRP request.
1086 * @sdev: If not NULL, only take ownership for this SCSI device.
1087 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1088 * ownership of @req->scmnd if it equals @scmnd.
1090 * Return value:
1091 * Either NULL or a pointer to the SCSI command the caller became owner of.
1093 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1094 struct srp_request *req,
1095 struct scsi_device *sdev,
1096 struct scsi_cmnd *scmnd)
1098 unsigned long flags;
1100 spin_lock_irqsave(&ch->lock, flags);
1101 if (req->scmnd &&
1102 (!sdev || req->scmnd->device == sdev) &&
1103 (!scmnd || req->scmnd == scmnd)) {
1104 scmnd = req->scmnd;
1105 req->scmnd = NULL;
1106 } else {
1107 scmnd = NULL;
1109 spin_unlock_irqrestore(&ch->lock, flags);
1111 return scmnd;
1115 * srp_free_req() - Unmap data and adjust ch->req_lim.
1116 * @ch: SRP RDMA channel.
1117 * @req: Request to be freed.
1118 * @scmnd: SCSI command associated with @req.
1119 * @req_lim_delta: Amount to be added to @target->req_lim.
1121 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1122 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1124 unsigned long flags;
1126 srp_unmap_data(scmnd, ch, req);
1128 spin_lock_irqsave(&ch->lock, flags);
1129 ch->req_lim += req_lim_delta;
1130 spin_unlock_irqrestore(&ch->lock, flags);
1133 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1134 struct scsi_device *sdev, int result)
1136 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1138 if (scmnd) {
1139 srp_free_req(ch, req, scmnd, 0);
1140 scmnd->result = result;
1141 scmnd->scsi_done(scmnd);
1145 static void srp_terminate_io(struct srp_rport *rport)
1147 struct srp_target_port *target = rport->lld_data;
1148 struct srp_rdma_ch *ch;
1149 struct Scsi_Host *shost = target->scsi_host;
1150 struct scsi_device *sdev;
1151 int i, j;
1154 * Invoking srp_terminate_io() while srp_queuecommand() is running
1155 * is not safe. Hence the warning statement below.
1157 shost_for_each_device(sdev, shost)
1158 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1160 for (i = 0; i < target->ch_count; i++) {
1161 ch = &target->ch[i];
1163 for (j = 0; j < target->req_ring_size; ++j) {
1164 struct srp_request *req = &ch->req_ring[j];
1166 srp_finish_req(ch, req, NULL,
1167 DID_TRANSPORT_FAILFAST << 16);
1173 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1174 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1175 * srp_reset_device() or srp_reset_host() calls will occur while this function
1176 * is in progress. One way to realize that is not to call this function
1177 * directly but to call srp_reconnect_rport() instead since that last function
1178 * serializes calls of this function via rport->mutex and also blocks
1179 * srp_queuecommand() calls before invoking this function.
1181 static int srp_rport_reconnect(struct srp_rport *rport)
1183 struct srp_target_port *target = rport->lld_data;
1184 struct srp_rdma_ch *ch;
1185 int i, j, ret = 0;
1186 bool multich = false;
1188 srp_disconnect_target(target);
1190 if (target->state == SRP_TARGET_SCANNING)
1191 return -ENODEV;
1194 * Now get a new local CM ID so that we avoid confusing the target in
1195 * case things are really fouled up. Doing so also ensures that all CM
1196 * callbacks will have finished before a new QP is allocated.
1198 for (i = 0; i < target->ch_count; i++) {
1199 ch = &target->ch[i];
1200 ret += srp_new_cm_id(ch);
1202 for (i = 0; i < target->ch_count; i++) {
1203 ch = &target->ch[i];
1204 for (j = 0; j < target->req_ring_size; ++j) {
1205 struct srp_request *req = &ch->req_ring[j];
1207 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1210 for (i = 0; i < target->ch_count; i++) {
1211 ch = &target->ch[i];
1213 * Whether or not creating a new CM ID succeeded, create a new
1214 * QP. This guarantees that all completion callback function
1215 * invocations have finished before request resetting starts.
1217 ret += srp_create_ch_ib(ch);
1219 INIT_LIST_HEAD(&ch->free_tx);
1220 for (j = 0; j < target->queue_size; ++j)
1221 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1224 target->qp_in_error = false;
1226 for (i = 0; i < target->ch_count; i++) {
1227 ch = &target->ch[i];
1228 if (ret)
1229 break;
1230 ret = srp_connect_ch(ch, multich);
1231 multich = true;
1234 if (ret == 0)
1235 shost_printk(KERN_INFO, target->scsi_host,
1236 PFX "reconnect succeeded\n");
1238 return ret;
1241 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1242 unsigned int dma_len, u32 rkey)
1244 struct srp_direct_buf *desc = state->desc;
1246 WARN_ON_ONCE(!dma_len);
1248 desc->va = cpu_to_be64(dma_addr);
1249 desc->key = cpu_to_be32(rkey);
1250 desc->len = cpu_to_be32(dma_len);
1252 state->total_len += dma_len;
1253 state->desc++;
1254 state->ndesc++;
1257 static int srp_map_finish_fmr(struct srp_map_state *state,
1258 struct srp_rdma_ch *ch)
1260 struct srp_target_port *target = ch->target;
1261 struct srp_device *dev = target->srp_host->srp_dev;
1262 struct ib_pool_fmr *fmr;
1263 u64 io_addr = 0;
1265 if (state->fmr.next >= state->fmr.end)
1266 return -ENOMEM;
1268 WARN_ON_ONCE(!dev->use_fmr);
1270 if (state->npages == 0)
1271 return 0;
1273 if (state->npages == 1 && target->global_mr) {
1274 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1275 target->global_mr->rkey);
1276 goto reset_state;
1279 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1280 state->npages, io_addr);
1281 if (IS_ERR(fmr))
1282 return PTR_ERR(fmr);
1284 *state->fmr.next++ = fmr;
1285 state->nmdesc++;
1287 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1288 state->dma_len, fmr->fmr->rkey);
1290 reset_state:
1291 state->npages = 0;
1292 state->dma_len = 0;
1294 return 0;
1297 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1299 srp_handle_qp_err(cq, wc, "FAST REG");
1302 static int srp_map_finish_fr(struct srp_map_state *state,
1303 struct srp_request *req,
1304 struct srp_rdma_ch *ch, int sg_nents)
1306 struct srp_target_port *target = ch->target;
1307 struct srp_device *dev = target->srp_host->srp_dev;
1308 struct ib_send_wr *bad_wr;
1309 struct ib_reg_wr wr;
1310 struct srp_fr_desc *desc;
1311 u32 rkey;
1312 int n, err;
1314 if (state->fr.next >= state->fr.end)
1315 return -ENOMEM;
1317 WARN_ON_ONCE(!dev->use_fast_reg);
1319 if (sg_nents == 0)
1320 return 0;
1322 if (sg_nents == 1 && target->global_mr) {
1323 srp_map_desc(state, sg_dma_address(state->sg),
1324 sg_dma_len(state->sg),
1325 target->global_mr->rkey);
1326 return 1;
1329 desc = srp_fr_pool_get(ch->fr_pool);
1330 if (!desc)
1331 return -ENOMEM;
1333 rkey = ib_inc_rkey(desc->mr->rkey);
1334 ib_update_fast_reg_key(desc->mr, rkey);
1336 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
1337 if (unlikely(n < 0))
1338 return n;
1340 req->reg_cqe.done = srp_reg_mr_err_done;
1342 wr.wr.next = NULL;
1343 wr.wr.opcode = IB_WR_REG_MR;
1344 wr.wr.wr_cqe = &req->reg_cqe;
1345 wr.wr.num_sge = 0;
1346 wr.wr.send_flags = 0;
1347 wr.mr = desc->mr;
1348 wr.key = desc->mr->rkey;
1349 wr.access = (IB_ACCESS_LOCAL_WRITE |
1350 IB_ACCESS_REMOTE_READ |
1351 IB_ACCESS_REMOTE_WRITE);
1353 *state->fr.next++ = desc;
1354 state->nmdesc++;
1356 srp_map_desc(state, desc->mr->iova,
1357 desc->mr->length, desc->mr->rkey);
1359 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1360 if (unlikely(err))
1361 return err;
1363 return n;
1366 static int srp_map_sg_entry(struct srp_map_state *state,
1367 struct srp_rdma_ch *ch,
1368 struct scatterlist *sg, int sg_index)
1370 struct srp_target_port *target = ch->target;
1371 struct srp_device *dev = target->srp_host->srp_dev;
1372 struct ib_device *ibdev = dev->dev;
1373 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1374 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1375 unsigned int len = 0;
1376 int ret;
1378 WARN_ON_ONCE(!dma_len);
1380 while (dma_len) {
1381 unsigned offset = dma_addr & ~dev->mr_page_mask;
1382 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1383 ret = srp_map_finish_fmr(state, ch);
1384 if (ret)
1385 return ret;
1388 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1390 if (!state->npages)
1391 state->base_dma_addr = dma_addr;
1392 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1393 state->dma_len += len;
1394 dma_addr += len;
1395 dma_len -= len;
1399 * If the last entry of the MR wasn't a full page, then we need to
1400 * close it out and start a new one -- we can only merge at page
1401 * boundaries.
1403 ret = 0;
1404 if (len != dev->mr_page_size)
1405 ret = srp_map_finish_fmr(state, ch);
1406 return ret;
1409 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1410 struct srp_request *req, struct scatterlist *scat,
1411 int count)
1413 struct scatterlist *sg;
1414 int i, ret;
1416 state->desc = req->indirect_desc;
1417 state->pages = req->map_page;
1418 state->fmr.next = req->fmr_list;
1419 state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
1421 for_each_sg(scat, sg, count, i) {
1422 ret = srp_map_sg_entry(state, ch, sg, i);
1423 if (ret)
1424 return ret;
1427 ret = srp_map_finish_fmr(state, ch);
1428 if (ret)
1429 return ret;
1431 req->nmdesc = state->nmdesc;
1433 return 0;
1436 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1437 struct srp_request *req, struct scatterlist *scat,
1438 int count)
1440 state->desc = req->indirect_desc;
1441 state->fr.next = req->fr_list;
1442 state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
1443 state->sg = scat;
1445 while (count) {
1446 int i, n;
1448 n = srp_map_finish_fr(state, req, ch, count);
1449 if (unlikely(n < 0))
1450 return n;
1452 count -= n;
1453 for (i = 0; i < n; i++)
1454 state->sg = sg_next(state->sg);
1457 req->nmdesc = state->nmdesc;
1459 return 0;
1462 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1463 struct srp_request *req, struct scatterlist *scat,
1464 int count)
1466 struct srp_target_port *target = ch->target;
1467 struct srp_device *dev = target->srp_host->srp_dev;
1468 struct scatterlist *sg;
1469 int i;
1471 state->desc = req->indirect_desc;
1472 for_each_sg(scat, sg, count, i) {
1473 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1474 ib_sg_dma_len(dev->dev, sg),
1475 target->global_mr->rkey);
1478 req->nmdesc = state->nmdesc;
1480 return 0;
1484 * Register the indirect data buffer descriptor with the HCA.
1486 * Note: since the indirect data buffer descriptor has been allocated with
1487 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1488 * memory buffer.
1490 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1491 void **next_mr, void **end_mr, u32 idb_len,
1492 __be32 *idb_rkey)
1494 struct srp_target_port *target = ch->target;
1495 struct srp_device *dev = target->srp_host->srp_dev;
1496 struct srp_map_state state;
1497 struct srp_direct_buf idb_desc;
1498 u64 idb_pages[1];
1499 struct scatterlist idb_sg[1];
1500 int ret;
1502 memset(&state, 0, sizeof(state));
1503 memset(&idb_desc, 0, sizeof(idb_desc));
1504 state.gen.next = next_mr;
1505 state.gen.end = end_mr;
1506 state.desc = &idb_desc;
1507 state.base_dma_addr = req->indirect_dma_addr;
1508 state.dma_len = idb_len;
1510 if (dev->use_fast_reg) {
1511 state.sg = idb_sg;
1512 sg_set_buf(idb_sg, req->indirect_desc, idb_len);
1513 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1514 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1515 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1516 #endif
1517 ret = srp_map_finish_fr(&state, req, ch, 1);
1518 if (ret < 0)
1519 return ret;
1520 } else if (dev->use_fmr) {
1521 state.pages = idb_pages;
1522 state.pages[0] = (req->indirect_dma_addr &
1523 dev->mr_page_mask);
1524 state.npages = 1;
1525 ret = srp_map_finish_fmr(&state, ch);
1526 if (ret < 0)
1527 return ret;
1528 } else {
1529 return -EINVAL;
1532 *idb_rkey = idb_desc.key;
1534 return 0;
1538 * srp_map_data() - map SCSI data buffer onto an SRP request
1539 * @scmnd: SCSI command to map
1540 * @ch: SRP RDMA channel
1541 * @req: SRP request
1543 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1544 * mapping failed.
1546 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1547 struct srp_request *req)
1549 struct srp_target_port *target = ch->target;
1550 struct scatterlist *scat;
1551 struct srp_cmd *cmd = req->cmd->buf;
1552 int len, nents, count, ret;
1553 struct srp_device *dev;
1554 struct ib_device *ibdev;
1555 struct srp_map_state state;
1556 struct srp_indirect_buf *indirect_hdr;
1557 u32 idb_len, table_len;
1558 __be32 idb_rkey;
1559 u8 fmt;
1561 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1562 return sizeof (struct srp_cmd);
1564 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1565 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1566 shost_printk(KERN_WARNING, target->scsi_host,
1567 PFX "Unhandled data direction %d\n",
1568 scmnd->sc_data_direction);
1569 return -EINVAL;
1572 nents = scsi_sg_count(scmnd);
1573 scat = scsi_sglist(scmnd);
1575 dev = target->srp_host->srp_dev;
1576 ibdev = dev->dev;
1578 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1579 if (unlikely(count == 0))
1580 return -EIO;
1582 fmt = SRP_DATA_DESC_DIRECT;
1583 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1585 if (count == 1 && target->global_mr) {
1587 * The midlayer only generated a single gather/scatter
1588 * entry, or DMA mapping coalesced everything to a
1589 * single entry. So a direct descriptor along with
1590 * the DMA MR suffices.
1592 struct srp_direct_buf *buf = (void *) cmd->add_data;
1594 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1595 buf->key = cpu_to_be32(target->global_mr->rkey);
1596 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1598 req->nmdesc = 0;
1599 goto map_complete;
1603 * We have more than one scatter/gather entry, so build our indirect
1604 * descriptor table, trying to merge as many entries as we can.
1606 indirect_hdr = (void *) cmd->add_data;
1608 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1609 target->indirect_size, DMA_TO_DEVICE);
1611 memset(&state, 0, sizeof(state));
1612 if (dev->use_fast_reg)
1613 srp_map_sg_fr(&state, ch, req, scat, count);
1614 else if (dev->use_fmr)
1615 srp_map_sg_fmr(&state, ch, req, scat, count);
1616 else
1617 srp_map_sg_dma(&state, ch, req, scat, count);
1619 /* We've mapped the request, now pull as much of the indirect
1620 * descriptor table as we can into the command buffer. If this
1621 * target is not using an external indirect table, we are
1622 * guaranteed to fit into the command, as the SCSI layer won't
1623 * give us more S/G entries than we allow.
1625 if (state.ndesc == 1) {
1627 * Memory registration collapsed the sg-list into one entry,
1628 * so use a direct descriptor.
1630 struct srp_direct_buf *buf = (void *) cmd->add_data;
1632 *buf = req->indirect_desc[0];
1633 goto map_complete;
1636 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1637 !target->allow_ext_sg)) {
1638 shost_printk(KERN_ERR, target->scsi_host,
1639 "Could not fit S/G list into SRP_CMD\n");
1640 return -EIO;
1643 count = min(state.ndesc, target->cmd_sg_cnt);
1644 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1645 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1647 fmt = SRP_DATA_DESC_INDIRECT;
1648 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1649 len += count * sizeof (struct srp_direct_buf);
1651 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1652 count * sizeof (struct srp_direct_buf));
1654 if (!target->global_mr) {
1655 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1656 idb_len, &idb_rkey);
1657 if (ret < 0)
1658 return ret;
1659 req->nmdesc++;
1660 } else {
1661 idb_rkey = cpu_to_be32(target->global_mr->rkey);
1664 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1665 indirect_hdr->table_desc.key = idb_rkey;
1666 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1667 indirect_hdr->len = cpu_to_be32(state.total_len);
1669 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1670 cmd->data_out_desc_cnt = count;
1671 else
1672 cmd->data_in_desc_cnt = count;
1674 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1675 DMA_TO_DEVICE);
1677 map_complete:
1678 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1679 cmd->buf_fmt = fmt << 4;
1680 else
1681 cmd->buf_fmt = fmt;
1683 return len;
1687 * Return an IU and possible credit to the free pool
1689 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1690 enum srp_iu_type iu_type)
1692 unsigned long flags;
1694 spin_lock_irqsave(&ch->lock, flags);
1695 list_add(&iu->list, &ch->free_tx);
1696 if (iu_type != SRP_IU_RSP)
1697 ++ch->req_lim;
1698 spin_unlock_irqrestore(&ch->lock, flags);
1702 * Must be called with ch->lock held to protect req_lim and free_tx.
1703 * If IU is not sent, it must be returned using srp_put_tx_iu().
1705 * Note:
1706 * An upper limit for the number of allocated information units for each
1707 * request type is:
1708 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1709 * more than Scsi_Host.can_queue requests.
1710 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1711 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1712 * one unanswered SRP request to an initiator.
1714 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1715 enum srp_iu_type iu_type)
1717 struct srp_target_port *target = ch->target;
1718 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1719 struct srp_iu *iu;
1721 ib_process_cq_direct(ch->send_cq, -1);
1723 if (list_empty(&ch->free_tx))
1724 return NULL;
1726 /* Initiator responses to target requests do not consume credits */
1727 if (iu_type != SRP_IU_RSP) {
1728 if (ch->req_lim <= rsv) {
1729 ++target->zero_req_lim;
1730 return NULL;
1733 --ch->req_lim;
1736 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1737 list_del(&iu->list);
1738 return iu;
1741 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1743 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1744 struct srp_rdma_ch *ch = cq->cq_context;
1746 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1747 srp_handle_qp_err(cq, wc, "SEND");
1748 return;
1751 list_add(&iu->list, &ch->free_tx);
1754 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1756 struct srp_target_port *target = ch->target;
1757 struct ib_sge list;
1758 struct ib_send_wr wr, *bad_wr;
1760 list.addr = iu->dma;
1761 list.length = len;
1762 list.lkey = target->lkey;
1764 iu->cqe.done = srp_send_done;
1766 wr.next = NULL;
1767 wr.wr_cqe = &iu->cqe;
1768 wr.sg_list = &list;
1769 wr.num_sge = 1;
1770 wr.opcode = IB_WR_SEND;
1771 wr.send_flags = IB_SEND_SIGNALED;
1773 return ib_post_send(ch->qp, &wr, &bad_wr);
1776 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1778 struct srp_target_port *target = ch->target;
1779 struct ib_recv_wr wr, *bad_wr;
1780 struct ib_sge list;
1782 list.addr = iu->dma;
1783 list.length = iu->size;
1784 list.lkey = target->lkey;
1786 iu->cqe.done = srp_recv_done;
1788 wr.next = NULL;
1789 wr.wr_cqe = &iu->cqe;
1790 wr.sg_list = &list;
1791 wr.num_sge = 1;
1793 return ib_post_recv(ch->qp, &wr, &bad_wr);
1796 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1798 struct srp_target_port *target = ch->target;
1799 struct srp_request *req;
1800 struct scsi_cmnd *scmnd;
1801 unsigned long flags;
1803 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1804 spin_lock_irqsave(&ch->lock, flags);
1805 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1806 spin_unlock_irqrestore(&ch->lock, flags);
1808 ch->tsk_mgmt_status = -1;
1809 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1810 ch->tsk_mgmt_status = rsp->data[3];
1811 complete(&ch->tsk_mgmt_done);
1812 } else {
1813 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1814 if (scmnd) {
1815 req = (void *)scmnd->host_scribble;
1816 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1818 if (!scmnd) {
1819 shost_printk(KERN_ERR, target->scsi_host,
1820 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1821 rsp->tag, ch - target->ch, ch->qp->qp_num);
1823 spin_lock_irqsave(&ch->lock, flags);
1824 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1825 spin_unlock_irqrestore(&ch->lock, flags);
1827 return;
1829 scmnd->result = rsp->status;
1831 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1832 memcpy(scmnd->sense_buffer, rsp->data +
1833 be32_to_cpu(rsp->resp_data_len),
1834 min_t(int, be32_to_cpu(rsp->sense_data_len),
1835 SCSI_SENSE_BUFFERSIZE));
1838 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1839 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1840 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1841 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1842 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1843 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1844 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1845 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1847 srp_free_req(ch, req, scmnd,
1848 be32_to_cpu(rsp->req_lim_delta));
1850 scmnd->host_scribble = NULL;
1851 scmnd->scsi_done(scmnd);
1855 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1856 void *rsp, int len)
1858 struct srp_target_port *target = ch->target;
1859 struct ib_device *dev = target->srp_host->srp_dev->dev;
1860 unsigned long flags;
1861 struct srp_iu *iu;
1862 int err;
1864 spin_lock_irqsave(&ch->lock, flags);
1865 ch->req_lim += req_delta;
1866 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1867 spin_unlock_irqrestore(&ch->lock, flags);
1869 if (!iu) {
1870 shost_printk(KERN_ERR, target->scsi_host, PFX
1871 "no IU available to send response\n");
1872 return 1;
1875 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1876 memcpy(iu->buf, rsp, len);
1877 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1879 err = srp_post_send(ch, iu, len);
1880 if (err) {
1881 shost_printk(KERN_ERR, target->scsi_host, PFX
1882 "unable to post response: %d\n", err);
1883 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1886 return err;
1889 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1890 struct srp_cred_req *req)
1892 struct srp_cred_rsp rsp = {
1893 .opcode = SRP_CRED_RSP,
1894 .tag = req->tag,
1896 s32 delta = be32_to_cpu(req->req_lim_delta);
1898 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1899 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1900 "problems processing SRP_CRED_REQ\n");
1903 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1904 struct srp_aer_req *req)
1906 struct srp_target_port *target = ch->target;
1907 struct srp_aer_rsp rsp = {
1908 .opcode = SRP_AER_RSP,
1909 .tag = req->tag,
1911 s32 delta = be32_to_cpu(req->req_lim_delta);
1913 shost_printk(KERN_ERR, target->scsi_host, PFX
1914 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1916 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1917 shost_printk(KERN_ERR, target->scsi_host, PFX
1918 "problems processing SRP_AER_REQ\n");
1921 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1923 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1924 struct srp_rdma_ch *ch = cq->cq_context;
1925 struct srp_target_port *target = ch->target;
1926 struct ib_device *dev = target->srp_host->srp_dev->dev;
1927 int res;
1928 u8 opcode;
1930 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1931 srp_handle_qp_err(cq, wc, "RECV");
1932 return;
1935 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1936 DMA_FROM_DEVICE);
1938 opcode = *(u8 *) iu->buf;
1940 if (0) {
1941 shost_printk(KERN_ERR, target->scsi_host,
1942 PFX "recv completion, opcode 0x%02x\n", opcode);
1943 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1944 iu->buf, wc->byte_len, true);
1947 switch (opcode) {
1948 case SRP_RSP:
1949 srp_process_rsp(ch, iu->buf);
1950 break;
1952 case SRP_CRED_REQ:
1953 srp_process_cred_req(ch, iu->buf);
1954 break;
1956 case SRP_AER_REQ:
1957 srp_process_aer_req(ch, iu->buf);
1958 break;
1960 case SRP_T_LOGOUT:
1961 /* XXX Handle target logout */
1962 shost_printk(KERN_WARNING, target->scsi_host,
1963 PFX "Got target logout request\n");
1964 break;
1966 default:
1967 shost_printk(KERN_WARNING, target->scsi_host,
1968 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1969 break;
1972 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1973 DMA_FROM_DEVICE);
1975 res = srp_post_recv(ch, iu);
1976 if (res != 0)
1977 shost_printk(KERN_ERR, target->scsi_host,
1978 PFX "Recv failed with error code %d\n", res);
1982 * srp_tl_err_work() - handle a transport layer error
1983 * @work: Work structure embedded in an SRP target port.
1985 * Note: This function may get invoked before the rport has been created,
1986 * hence the target->rport test.
1988 static void srp_tl_err_work(struct work_struct *work)
1990 struct srp_target_port *target;
1992 target = container_of(work, struct srp_target_port, tl_err_work);
1993 if (target->rport)
1994 srp_start_tl_fail_timers(target->rport);
1997 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
1998 const char *opname)
2000 struct srp_rdma_ch *ch = cq->cq_context;
2001 struct srp_target_port *target = ch->target;
2003 if (ch->connected && !target->qp_in_error) {
2004 shost_printk(KERN_ERR, target->scsi_host,
2005 PFX "failed %s status %s (%d) for CQE %p\n",
2006 opname, ib_wc_status_msg(wc->status), wc->status,
2007 wc->wr_cqe);
2008 queue_work(system_long_wq, &target->tl_err_work);
2010 target->qp_in_error = true;
2013 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2015 struct srp_target_port *target = host_to_target(shost);
2016 struct srp_rport *rport = target->rport;
2017 struct srp_rdma_ch *ch;
2018 struct srp_request *req;
2019 struct srp_iu *iu;
2020 struct srp_cmd *cmd;
2021 struct ib_device *dev;
2022 unsigned long flags;
2023 u32 tag;
2024 u16 idx;
2025 int len, ret;
2026 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2029 * The SCSI EH thread is the only context from which srp_queuecommand()
2030 * can get invoked for blocked devices (SDEV_BLOCK /
2031 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2032 * locking the rport mutex if invoked from inside the SCSI EH.
2034 if (in_scsi_eh)
2035 mutex_lock(&rport->mutex);
2037 scmnd->result = srp_chkready(target->rport);
2038 if (unlikely(scmnd->result))
2039 goto err;
2041 WARN_ON_ONCE(scmnd->request->tag < 0);
2042 tag = blk_mq_unique_tag(scmnd->request);
2043 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2044 idx = blk_mq_unique_tag_to_tag(tag);
2045 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2046 dev_name(&shost->shost_gendev), tag, idx,
2047 target->req_ring_size);
2049 spin_lock_irqsave(&ch->lock, flags);
2050 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2051 spin_unlock_irqrestore(&ch->lock, flags);
2053 if (!iu)
2054 goto err;
2056 req = &ch->req_ring[idx];
2057 dev = target->srp_host->srp_dev->dev;
2058 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2059 DMA_TO_DEVICE);
2061 scmnd->host_scribble = (void *) req;
2063 cmd = iu->buf;
2064 memset(cmd, 0, sizeof *cmd);
2066 cmd->opcode = SRP_CMD;
2067 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2068 cmd->tag = tag;
2069 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2071 req->scmnd = scmnd;
2072 req->cmd = iu;
2074 len = srp_map_data(scmnd, ch, req);
2075 if (len < 0) {
2076 shost_printk(KERN_ERR, target->scsi_host,
2077 PFX "Failed to map data (%d)\n", len);
2079 * If we ran out of memory descriptors (-ENOMEM) because an
2080 * application is queuing many requests with more than
2081 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2082 * to reduce queue depth temporarily.
2084 scmnd->result = len == -ENOMEM ?
2085 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2086 goto err_iu;
2089 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2090 DMA_TO_DEVICE);
2092 if (srp_post_send(ch, iu, len)) {
2093 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2094 goto err_unmap;
2097 ret = 0;
2099 unlock_rport:
2100 if (in_scsi_eh)
2101 mutex_unlock(&rport->mutex);
2103 return ret;
2105 err_unmap:
2106 srp_unmap_data(scmnd, ch, req);
2108 err_iu:
2109 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2112 * Avoid that the loops that iterate over the request ring can
2113 * encounter a dangling SCSI command pointer.
2115 req->scmnd = NULL;
2117 err:
2118 if (scmnd->result) {
2119 scmnd->scsi_done(scmnd);
2120 ret = 0;
2121 } else {
2122 ret = SCSI_MLQUEUE_HOST_BUSY;
2125 goto unlock_rport;
2129 * Note: the resources allocated in this function are freed in
2130 * srp_free_ch_ib().
2132 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2134 struct srp_target_port *target = ch->target;
2135 int i;
2137 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2138 GFP_KERNEL);
2139 if (!ch->rx_ring)
2140 goto err_no_ring;
2141 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2142 GFP_KERNEL);
2143 if (!ch->tx_ring)
2144 goto err_no_ring;
2146 for (i = 0; i < target->queue_size; ++i) {
2147 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2148 ch->max_ti_iu_len,
2149 GFP_KERNEL, DMA_FROM_DEVICE);
2150 if (!ch->rx_ring[i])
2151 goto err;
2154 for (i = 0; i < target->queue_size; ++i) {
2155 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2156 target->max_iu_len,
2157 GFP_KERNEL, DMA_TO_DEVICE);
2158 if (!ch->tx_ring[i])
2159 goto err;
2161 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2164 return 0;
2166 err:
2167 for (i = 0; i < target->queue_size; ++i) {
2168 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2169 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2173 err_no_ring:
2174 kfree(ch->tx_ring);
2175 ch->tx_ring = NULL;
2176 kfree(ch->rx_ring);
2177 ch->rx_ring = NULL;
2179 return -ENOMEM;
2182 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2184 uint64_t T_tr_ns, max_compl_time_ms;
2185 uint32_t rq_tmo_jiffies;
2188 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2189 * table 91), both the QP timeout and the retry count have to be set
2190 * for RC QP's during the RTR to RTS transition.
2192 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2193 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2196 * Set target->rq_tmo_jiffies to one second more than the largest time
2197 * it can take before an error completion is generated. See also
2198 * C9-140..142 in the IBTA spec for more information about how to
2199 * convert the QP Local ACK Timeout value to nanoseconds.
2201 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2202 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2203 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2204 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2206 return rq_tmo_jiffies;
2209 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2210 const struct srp_login_rsp *lrsp,
2211 struct srp_rdma_ch *ch)
2213 struct srp_target_port *target = ch->target;
2214 struct ib_qp_attr *qp_attr = NULL;
2215 int attr_mask = 0;
2216 int ret;
2217 int i;
2219 if (lrsp->opcode == SRP_LOGIN_RSP) {
2220 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2221 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2224 * Reserve credits for task management so we don't
2225 * bounce requests back to the SCSI mid-layer.
2227 target->scsi_host->can_queue
2228 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2229 target->scsi_host->can_queue);
2230 target->scsi_host->cmd_per_lun
2231 = min_t(int, target->scsi_host->can_queue,
2232 target->scsi_host->cmd_per_lun);
2233 } else {
2234 shost_printk(KERN_WARNING, target->scsi_host,
2235 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2236 ret = -ECONNRESET;
2237 goto error;
2240 if (!ch->rx_ring) {
2241 ret = srp_alloc_iu_bufs(ch);
2242 if (ret)
2243 goto error;
2246 ret = -ENOMEM;
2247 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2248 if (!qp_attr)
2249 goto error;
2251 qp_attr->qp_state = IB_QPS_RTR;
2252 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2253 if (ret)
2254 goto error_free;
2256 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2257 if (ret)
2258 goto error_free;
2260 for (i = 0; i < target->queue_size; i++) {
2261 struct srp_iu *iu = ch->rx_ring[i];
2263 ret = srp_post_recv(ch, iu);
2264 if (ret)
2265 goto error_free;
2268 qp_attr->qp_state = IB_QPS_RTS;
2269 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2270 if (ret)
2271 goto error_free;
2273 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2275 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2276 if (ret)
2277 goto error_free;
2279 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2281 error_free:
2282 kfree(qp_attr);
2284 error:
2285 ch->status = ret;
2288 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2289 struct ib_cm_event *event,
2290 struct srp_rdma_ch *ch)
2292 struct srp_target_port *target = ch->target;
2293 struct Scsi_Host *shost = target->scsi_host;
2294 struct ib_class_port_info *cpi;
2295 int opcode;
2297 switch (event->param.rej_rcvd.reason) {
2298 case IB_CM_REJ_PORT_CM_REDIRECT:
2299 cpi = event->param.rej_rcvd.ari;
2300 ch->path.dlid = cpi->redirect_lid;
2301 ch->path.pkey = cpi->redirect_pkey;
2302 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2303 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2305 ch->status = ch->path.dlid ?
2306 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2307 break;
2309 case IB_CM_REJ_PORT_REDIRECT:
2310 if (srp_target_is_topspin(target)) {
2312 * Topspin/Cisco SRP gateways incorrectly send
2313 * reject reason code 25 when they mean 24
2314 * (port redirect).
2316 memcpy(ch->path.dgid.raw,
2317 event->param.rej_rcvd.ari, 16);
2319 shost_printk(KERN_DEBUG, shost,
2320 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2321 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2322 be64_to_cpu(ch->path.dgid.global.interface_id));
2324 ch->status = SRP_PORT_REDIRECT;
2325 } else {
2326 shost_printk(KERN_WARNING, shost,
2327 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2328 ch->status = -ECONNRESET;
2330 break;
2332 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2333 shost_printk(KERN_WARNING, shost,
2334 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2335 ch->status = -ECONNRESET;
2336 break;
2338 case IB_CM_REJ_CONSUMER_DEFINED:
2339 opcode = *(u8 *) event->private_data;
2340 if (opcode == SRP_LOGIN_REJ) {
2341 struct srp_login_rej *rej = event->private_data;
2342 u32 reason = be32_to_cpu(rej->reason);
2344 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2345 shost_printk(KERN_WARNING, shost,
2346 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2347 else
2348 shost_printk(KERN_WARNING, shost, PFX
2349 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2350 target->sgid.raw,
2351 target->orig_dgid.raw, reason);
2352 } else
2353 shost_printk(KERN_WARNING, shost,
2354 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2355 " opcode 0x%02x\n", opcode);
2356 ch->status = -ECONNRESET;
2357 break;
2359 case IB_CM_REJ_STALE_CONN:
2360 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2361 ch->status = SRP_STALE_CONN;
2362 break;
2364 default:
2365 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2366 event->param.rej_rcvd.reason);
2367 ch->status = -ECONNRESET;
2371 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2373 struct srp_rdma_ch *ch = cm_id->context;
2374 struct srp_target_port *target = ch->target;
2375 int comp = 0;
2377 switch (event->event) {
2378 case IB_CM_REQ_ERROR:
2379 shost_printk(KERN_DEBUG, target->scsi_host,
2380 PFX "Sending CM REQ failed\n");
2381 comp = 1;
2382 ch->status = -ECONNRESET;
2383 break;
2385 case IB_CM_REP_RECEIVED:
2386 comp = 1;
2387 srp_cm_rep_handler(cm_id, event->private_data, ch);
2388 break;
2390 case IB_CM_REJ_RECEIVED:
2391 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2392 comp = 1;
2394 srp_cm_rej_handler(cm_id, event, ch);
2395 break;
2397 case IB_CM_DREQ_RECEIVED:
2398 shost_printk(KERN_WARNING, target->scsi_host,
2399 PFX "DREQ received - connection closed\n");
2400 ch->connected = false;
2401 if (ib_send_cm_drep(cm_id, NULL, 0))
2402 shost_printk(KERN_ERR, target->scsi_host,
2403 PFX "Sending CM DREP failed\n");
2404 queue_work(system_long_wq, &target->tl_err_work);
2405 break;
2407 case IB_CM_TIMEWAIT_EXIT:
2408 shost_printk(KERN_ERR, target->scsi_host,
2409 PFX "connection closed\n");
2410 comp = 1;
2412 ch->status = 0;
2413 break;
2415 case IB_CM_MRA_RECEIVED:
2416 case IB_CM_DREQ_ERROR:
2417 case IB_CM_DREP_RECEIVED:
2418 break;
2420 default:
2421 shost_printk(KERN_WARNING, target->scsi_host,
2422 PFX "Unhandled CM event %d\n", event->event);
2423 break;
2426 if (comp)
2427 complete(&ch->done);
2429 return 0;
2433 * srp_change_queue_depth - setting device queue depth
2434 * @sdev: scsi device struct
2435 * @qdepth: requested queue depth
2437 * Returns queue depth.
2439 static int
2440 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2442 if (!sdev->tagged_supported)
2443 qdepth = 1;
2444 return scsi_change_queue_depth(sdev, qdepth);
2447 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2448 u8 func)
2450 struct srp_target_port *target = ch->target;
2451 struct srp_rport *rport = target->rport;
2452 struct ib_device *dev = target->srp_host->srp_dev->dev;
2453 struct srp_iu *iu;
2454 struct srp_tsk_mgmt *tsk_mgmt;
2456 if (!ch->connected || target->qp_in_error)
2457 return -1;
2459 init_completion(&ch->tsk_mgmt_done);
2462 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2463 * invoked while a task management function is being sent.
2465 mutex_lock(&rport->mutex);
2466 spin_lock_irq(&ch->lock);
2467 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2468 spin_unlock_irq(&ch->lock);
2470 if (!iu) {
2471 mutex_unlock(&rport->mutex);
2473 return -1;
2476 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2477 DMA_TO_DEVICE);
2478 tsk_mgmt = iu->buf;
2479 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2481 tsk_mgmt->opcode = SRP_TSK_MGMT;
2482 int_to_scsilun(lun, &tsk_mgmt->lun);
2483 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
2484 tsk_mgmt->tsk_mgmt_func = func;
2485 tsk_mgmt->task_tag = req_tag;
2487 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2488 DMA_TO_DEVICE);
2489 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2490 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2491 mutex_unlock(&rport->mutex);
2493 return -1;
2495 mutex_unlock(&rport->mutex);
2497 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2498 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2499 return -1;
2501 return 0;
2504 static int srp_abort(struct scsi_cmnd *scmnd)
2506 struct srp_target_port *target = host_to_target(scmnd->device->host);
2507 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2508 u32 tag;
2509 u16 ch_idx;
2510 struct srp_rdma_ch *ch;
2511 int ret;
2513 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2515 if (!req)
2516 return SUCCESS;
2517 tag = blk_mq_unique_tag(scmnd->request);
2518 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2519 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2520 return SUCCESS;
2521 ch = &target->ch[ch_idx];
2522 if (!srp_claim_req(ch, req, NULL, scmnd))
2523 return SUCCESS;
2524 shost_printk(KERN_ERR, target->scsi_host,
2525 "Sending SRP abort for tag %#x\n", tag);
2526 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2527 SRP_TSK_ABORT_TASK) == 0)
2528 ret = SUCCESS;
2529 else if (target->rport->state == SRP_RPORT_LOST)
2530 ret = FAST_IO_FAIL;
2531 else
2532 ret = FAILED;
2533 srp_free_req(ch, req, scmnd, 0);
2534 scmnd->result = DID_ABORT << 16;
2535 scmnd->scsi_done(scmnd);
2537 return ret;
2540 static int srp_reset_device(struct scsi_cmnd *scmnd)
2542 struct srp_target_port *target = host_to_target(scmnd->device->host);
2543 struct srp_rdma_ch *ch;
2544 int i;
2546 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2548 ch = &target->ch[0];
2549 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2550 SRP_TSK_LUN_RESET))
2551 return FAILED;
2552 if (ch->tsk_mgmt_status)
2553 return FAILED;
2555 for (i = 0; i < target->ch_count; i++) {
2556 ch = &target->ch[i];
2557 for (i = 0; i < target->req_ring_size; ++i) {
2558 struct srp_request *req = &ch->req_ring[i];
2560 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2564 return SUCCESS;
2567 static int srp_reset_host(struct scsi_cmnd *scmnd)
2569 struct srp_target_port *target = host_to_target(scmnd->device->host);
2571 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2573 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2576 static int srp_slave_configure(struct scsi_device *sdev)
2578 struct Scsi_Host *shost = sdev->host;
2579 struct srp_target_port *target = host_to_target(shost);
2580 struct request_queue *q = sdev->request_queue;
2581 unsigned long timeout;
2583 if (sdev->type == TYPE_DISK) {
2584 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2585 blk_queue_rq_timeout(q, timeout);
2588 return 0;
2591 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2592 char *buf)
2594 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2596 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2599 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2600 char *buf)
2602 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2604 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2607 static ssize_t show_service_id(struct device *dev,
2608 struct device_attribute *attr, char *buf)
2610 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2612 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2615 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2616 char *buf)
2618 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2620 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2623 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2624 char *buf)
2626 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2628 return sprintf(buf, "%pI6\n", target->sgid.raw);
2631 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2632 char *buf)
2634 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2635 struct srp_rdma_ch *ch = &target->ch[0];
2637 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2640 static ssize_t show_orig_dgid(struct device *dev,
2641 struct device_attribute *attr, char *buf)
2643 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2645 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2648 static ssize_t show_req_lim(struct device *dev,
2649 struct device_attribute *attr, char *buf)
2651 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2652 struct srp_rdma_ch *ch;
2653 int i, req_lim = INT_MAX;
2655 for (i = 0; i < target->ch_count; i++) {
2656 ch = &target->ch[i];
2657 req_lim = min(req_lim, ch->req_lim);
2659 return sprintf(buf, "%d\n", req_lim);
2662 static ssize_t show_zero_req_lim(struct device *dev,
2663 struct device_attribute *attr, char *buf)
2665 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2667 return sprintf(buf, "%d\n", target->zero_req_lim);
2670 static ssize_t show_local_ib_port(struct device *dev,
2671 struct device_attribute *attr, char *buf)
2673 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2675 return sprintf(buf, "%d\n", target->srp_host->port);
2678 static ssize_t show_local_ib_device(struct device *dev,
2679 struct device_attribute *attr, char *buf)
2681 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2683 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2686 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2687 char *buf)
2689 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2691 return sprintf(buf, "%d\n", target->ch_count);
2694 static ssize_t show_comp_vector(struct device *dev,
2695 struct device_attribute *attr, char *buf)
2697 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2699 return sprintf(buf, "%d\n", target->comp_vector);
2702 static ssize_t show_tl_retry_count(struct device *dev,
2703 struct device_attribute *attr, char *buf)
2705 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2707 return sprintf(buf, "%d\n", target->tl_retry_count);
2710 static ssize_t show_cmd_sg_entries(struct device *dev,
2711 struct device_attribute *attr, char *buf)
2713 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2715 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2718 static ssize_t show_allow_ext_sg(struct device *dev,
2719 struct device_attribute *attr, char *buf)
2721 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2723 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2726 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2727 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2728 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2729 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2730 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2731 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2732 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2733 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2734 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2735 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2736 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2737 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2738 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2739 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2740 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2741 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2743 static struct device_attribute *srp_host_attrs[] = {
2744 &dev_attr_id_ext,
2745 &dev_attr_ioc_guid,
2746 &dev_attr_service_id,
2747 &dev_attr_pkey,
2748 &dev_attr_sgid,
2749 &dev_attr_dgid,
2750 &dev_attr_orig_dgid,
2751 &dev_attr_req_lim,
2752 &dev_attr_zero_req_lim,
2753 &dev_attr_local_ib_port,
2754 &dev_attr_local_ib_device,
2755 &dev_attr_ch_count,
2756 &dev_attr_comp_vector,
2757 &dev_attr_tl_retry_count,
2758 &dev_attr_cmd_sg_entries,
2759 &dev_attr_allow_ext_sg,
2760 NULL
2763 static struct scsi_host_template srp_template = {
2764 .module = THIS_MODULE,
2765 .name = "InfiniBand SRP initiator",
2766 .proc_name = DRV_NAME,
2767 .slave_configure = srp_slave_configure,
2768 .info = srp_target_info,
2769 .queuecommand = srp_queuecommand,
2770 .change_queue_depth = srp_change_queue_depth,
2771 .eh_abort_handler = srp_abort,
2772 .eh_device_reset_handler = srp_reset_device,
2773 .eh_host_reset_handler = srp_reset_host,
2774 .skip_settle_delay = true,
2775 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2776 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2777 .this_id = -1,
2778 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2779 .use_clustering = ENABLE_CLUSTERING,
2780 .shost_attrs = srp_host_attrs,
2781 .track_queue_depth = 1,
2784 static int srp_sdev_count(struct Scsi_Host *host)
2786 struct scsi_device *sdev;
2787 int c = 0;
2789 shost_for_each_device(sdev, host)
2790 c++;
2792 return c;
2796 * Return values:
2797 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2798 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2799 * removal has been scheduled.
2800 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2802 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2804 struct srp_rport_identifiers ids;
2805 struct srp_rport *rport;
2807 target->state = SRP_TARGET_SCANNING;
2808 sprintf(target->target_name, "SRP.T10:%016llX",
2809 be64_to_cpu(target->id_ext));
2811 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2812 return -ENODEV;
2814 memcpy(ids.port_id, &target->id_ext, 8);
2815 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2816 ids.roles = SRP_RPORT_ROLE_TARGET;
2817 rport = srp_rport_add(target->scsi_host, &ids);
2818 if (IS_ERR(rport)) {
2819 scsi_remove_host(target->scsi_host);
2820 return PTR_ERR(rport);
2823 rport->lld_data = target;
2824 target->rport = rport;
2826 spin_lock(&host->target_lock);
2827 list_add_tail(&target->list, &host->target_list);
2828 spin_unlock(&host->target_lock);
2830 scsi_scan_target(&target->scsi_host->shost_gendev,
2831 0, target->scsi_id, SCAN_WILD_CARD, 0);
2833 if (srp_connected_ch(target) < target->ch_count ||
2834 target->qp_in_error) {
2835 shost_printk(KERN_INFO, target->scsi_host,
2836 PFX "SCSI scan failed - removing SCSI host\n");
2837 srp_queue_remove_work(target);
2838 goto out;
2841 pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2842 dev_name(&target->scsi_host->shost_gendev),
2843 srp_sdev_count(target->scsi_host));
2845 spin_lock_irq(&target->lock);
2846 if (target->state == SRP_TARGET_SCANNING)
2847 target->state = SRP_TARGET_LIVE;
2848 spin_unlock_irq(&target->lock);
2850 out:
2851 return 0;
2854 static void srp_release_dev(struct device *dev)
2856 struct srp_host *host =
2857 container_of(dev, struct srp_host, dev);
2859 complete(&host->released);
2862 static struct class srp_class = {
2863 .name = "infiniband_srp",
2864 .dev_release = srp_release_dev
2868 * srp_conn_unique() - check whether the connection to a target is unique
2869 * @host: SRP host.
2870 * @target: SRP target port.
2872 static bool srp_conn_unique(struct srp_host *host,
2873 struct srp_target_port *target)
2875 struct srp_target_port *t;
2876 bool ret = false;
2878 if (target->state == SRP_TARGET_REMOVED)
2879 goto out;
2881 ret = true;
2883 spin_lock(&host->target_lock);
2884 list_for_each_entry(t, &host->target_list, list) {
2885 if (t != target &&
2886 target->id_ext == t->id_ext &&
2887 target->ioc_guid == t->ioc_guid &&
2888 target->initiator_ext == t->initiator_ext) {
2889 ret = false;
2890 break;
2893 spin_unlock(&host->target_lock);
2895 out:
2896 return ret;
2900 * Target ports are added by writing
2902 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2903 * pkey=<P_Key>,service_id=<service ID>
2905 * to the add_target sysfs attribute.
2907 enum {
2908 SRP_OPT_ERR = 0,
2909 SRP_OPT_ID_EXT = 1 << 0,
2910 SRP_OPT_IOC_GUID = 1 << 1,
2911 SRP_OPT_DGID = 1 << 2,
2912 SRP_OPT_PKEY = 1 << 3,
2913 SRP_OPT_SERVICE_ID = 1 << 4,
2914 SRP_OPT_MAX_SECT = 1 << 5,
2915 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2916 SRP_OPT_IO_CLASS = 1 << 7,
2917 SRP_OPT_INITIATOR_EXT = 1 << 8,
2918 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2919 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2920 SRP_OPT_SG_TABLESIZE = 1 << 11,
2921 SRP_OPT_COMP_VECTOR = 1 << 12,
2922 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2923 SRP_OPT_QUEUE_SIZE = 1 << 14,
2924 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2925 SRP_OPT_IOC_GUID |
2926 SRP_OPT_DGID |
2927 SRP_OPT_PKEY |
2928 SRP_OPT_SERVICE_ID),
2931 static const match_table_t srp_opt_tokens = {
2932 { SRP_OPT_ID_EXT, "id_ext=%s" },
2933 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
2934 { SRP_OPT_DGID, "dgid=%s" },
2935 { SRP_OPT_PKEY, "pkey=%x" },
2936 { SRP_OPT_SERVICE_ID, "service_id=%s" },
2937 { SRP_OPT_MAX_SECT, "max_sect=%d" },
2938 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
2939 { SRP_OPT_IO_CLASS, "io_class=%x" },
2940 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
2941 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2942 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2943 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2944 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2945 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2946 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2947 { SRP_OPT_ERR, NULL }
2950 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2952 char *options, *sep_opt;
2953 char *p;
2954 char dgid[3];
2955 substring_t args[MAX_OPT_ARGS];
2956 int opt_mask = 0;
2957 int token;
2958 int ret = -EINVAL;
2959 int i;
2961 options = kstrdup(buf, GFP_KERNEL);
2962 if (!options)
2963 return -ENOMEM;
2965 sep_opt = options;
2966 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2967 if (!*p)
2968 continue;
2970 token = match_token(p, srp_opt_tokens, args);
2971 opt_mask |= token;
2973 switch (token) {
2974 case SRP_OPT_ID_EXT:
2975 p = match_strdup(args);
2976 if (!p) {
2977 ret = -ENOMEM;
2978 goto out;
2980 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2981 kfree(p);
2982 break;
2984 case SRP_OPT_IOC_GUID:
2985 p = match_strdup(args);
2986 if (!p) {
2987 ret = -ENOMEM;
2988 goto out;
2990 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
2991 kfree(p);
2992 break;
2994 case SRP_OPT_DGID:
2995 p = match_strdup(args);
2996 if (!p) {
2997 ret = -ENOMEM;
2998 goto out;
3000 if (strlen(p) != 32) {
3001 pr_warn("bad dest GID parameter '%s'\n", p);
3002 kfree(p);
3003 goto out;
3006 for (i = 0; i < 16; ++i) {
3007 strlcpy(dgid, p + i * 2, sizeof(dgid));
3008 if (sscanf(dgid, "%hhx",
3009 &target->orig_dgid.raw[i]) < 1) {
3010 ret = -EINVAL;
3011 kfree(p);
3012 goto out;
3015 kfree(p);
3016 break;
3018 case SRP_OPT_PKEY:
3019 if (match_hex(args, &token)) {
3020 pr_warn("bad P_Key parameter '%s'\n", p);
3021 goto out;
3023 target->pkey = cpu_to_be16(token);
3024 break;
3026 case SRP_OPT_SERVICE_ID:
3027 p = match_strdup(args);
3028 if (!p) {
3029 ret = -ENOMEM;
3030 goto out;
3032 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3033 kfree(p);
3034 break;
3036 case SRP_OPT_MAX_SECT:
3037 if (match_int(args, &token)) {
3038 pr_warn("bad max sect parameter '%s'\n", p);
3039 goto out;
3041 target->scsi_host->max_sectors = token;
3042 break;
3044 case SRP_OPT_QUEUE_SIZE:
3045 if (match_int(args, &token) || token < 1) {
3046 pr_warn("bad queue_size parameter '%s'\n", p);
3047 goto out;
3049 target->scsi_host->can_queue = token;
3050 target->queue_size = token + SRP_RSP_SQ_SIZE +
3051 SRP_TSK_MGMT_SQ_SIZE;
3052 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3053 target->scsi_host->cmd_per_lun = token;
3054 break;
3056 case SRP_OPT_MAX_CMD_PER_LUN:
3057 if (match_int(args, &token) || token < 1) {
3058 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3060 goto out;
3062 target->scsi_host->cmd_per_lun = token;
3063 break;
3065 case SRP_OPT_IO_CLASS:
3066 if (match_hex(args, &token)) {
3067 pr_warn("bad IO class parameter '%s'\n", p);
3068 goto out;
3070 if (token != SRP_REV10_IB_IO_CLASS &&
3071 token != SRP_REV16A_IB_IO_CLASS) {
3072 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3073 token, SRP_REV10_IB_IO_CLASS,
3074 SRP_REV16A_IB_IO_CLASS);
3075 goto out;
3077 target->io_class = token;
3078 break;
3080 case SRP_OPT_INITIATOR_EXT:
3081 p = match_strdup(args);
3082 if (!p) {
3083 ret = -ENOMEM;
3084 goto out;
3086 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3087 kfree(p);
3088 break;
3090 case SRP_OPT_CMD_SG_ENTRIES:
3091 if (match_int(args, &token) || token < 1 || token > 255) {
3092 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3094 goto out;
3096 target->cmd_sg_cnt = token;
3097 break;
3099 case SRP_OPT_ALLOW_EXT_SG:
3100 if (match_int(args, &token)) {
3101 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3102 goto out;
3104 target->allow_ext_sg = !!token;
3105 break;
3107 case SRP_OPT_SG_TABLESIZE:
3108 if (match_int(args, &token) || token < 1 ||
3109 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3110 pr_warn("bad max sg_tablesize parameter '%s'\n",
3112 goto out;
3114 target->sg_tablesize = token;
3115 break;
3117 case SRP_OPT_COMP_VECTOR:
3118 if (match_int(args, &token) || token < 0) {
3119 pr_warn("bad comp_vector parameter '%s'\n", p);
3120 goto out;
3122 target->comp_vector = token;
3123 break;
3125 case SRP_OPT_TL_RETRY_COUNT:
3126 if (match_int(args, &token) || token < 2 || token > 7) {
3127 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3129 goto out;
3131 target->tl_retry_count = token;
3132 break;
3134 default:
3135 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3137 goto out;
3141 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3142 ret = 0;
3143 else
3144 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3145 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3146 !(srp_opt_tokens[i].token & opt_mask))
3147 pr_warn("target creation request is missing parameter '%s'\n",
3148 srp_opt_tokens[i].pattern);
3150 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3151 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3152 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3153 target->scsi_host->cmd_per_lun,
3154 target->scsi_host->can_queue);
3156 out:
3157 kfree(options);
3158 return ret;
3161 static ssize_t srp_create_target(struct device *dev,
3162 struct device_attribute *attr,
3163 const char *buf, size_t count)
3165 struct srp_host *host =
3166 container_of(dev, struct srp_host, dev);
3167 struct Scsi_Host *target_host;
3168 struct srp_target_port *target;
3169 struct srp_rdma_ch *ch;
3170 struct srp_device *srp_dev = host->srp_dev;
3171 struct ib_device *ibdev = srp_dev->dev;
3172 int ret, node_idx, node, cpu, i;
3173 bool multich = false;
3175 target_host = scsi_host_alloc(&srp_template,
3176 sizeof (struct srp_target_port));
3177 if (!target_host)
3178 return -ENOMEM;
3180 target_host->transportt = ib_srp_transport_template;
3181 target_host->max_channel = 0;
3182 target_host->max_id = 1;
3183 target_host->max_lun = -1LL;
3184 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3186 target = host_to_target(target_host);
3188 target->io_class = SRP_REV16A_IB_IO_CLASS;
3189 target->scsi_host = target_host;
3190 target->srp_host = host;
3191 target->lkey = host->srp_dev->pd->local_dma_lkey;
3192 target->global_mr = host->srp_dev->global_mr;
3193 target->cmd_sg_cnt = cmd_sg_entries;
3194 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3195 target->allow_ext_sg = allow_ext_sg;
3196 target->tl_retry_count = 7;
3197 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3200 * Avoid that the SCSI host can be removed by srp_remove_target()
3201 * before this function returns.
3203 scsi_host_get(target->scsi_host);
3205 mutex_lock(&host->add_target_mutex);
3207 ret = srp_parse_options(buf, target);
3208 if (ret)
3209 goto out;
3211 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3213 if (!srp_conn_unique(target->srp_host, target)) {
3214 shost_printk(KERN_INFO, target->scsi_host,
3215 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3216 be64_to_cpu(target->id_ext),
3217 be64_to_cpu(target->ioc_guid),
3218 be64_to_cpu(target->initiator_ext));
3219 ret = -EEXIST;
3220 goto out;
3223 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3224 target->cmd_sg_cnt < target->sg_tablesize) {
3225 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3226 target->sg_tablesize = target->cmd_sg_cnt;
3229 target_host->sg_tablesize = target->sg_tablesize;
3230 target->indirect_size = target->sg_tablesize *
3231 sizeof (struct srp_direct_buf);
3232 target->max_iu_len = sizeof (struct srp_cmd) +
3233 sizeof (struct srp_indirect_buf) +
3234 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3236 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3237 INIT_WORK(&target->remove_work, srp_remove_work);
3238 spin_lock_init(&target->lock);
3239 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3240 if (ret)
3241 goto out;
3243 ret = -ENOMEM;
3244 target->ch_count = max_t(unsigned, num_online_nodes(),
3245 min(ch_count ? :
3246 min(4 * num_online_nodes(),
3247 ibdev->num_comp_vectors),
3248 num_online_cpus()));
3249 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3250 GFP_KERNEL);
3251 if (!target->ch)
3252 goto out;
3254 node_idx = 0;
3255 for_each_online_node(node) {
3256 const int ch_start = (node_idx * target->ch_count /
3257 num_online_nodes());
3258 const int ch_end = ((node_idx + 1) * target->ch_count /
3259 num_online_nodes());
3260 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3261 num_online_nodes() + target->comp_vector)
3262 % ibdev->num_comp_vectors;
3263 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3264 num_online_nodes() + target->comp_vector)
3265 % ibdev->num_comp_vectors;
3266 int cpu_idx = 0;
3268 for_each_online_cpu(cpu) {
3269 if (cpu_to_node(cpu) != node)
3270 continue;
3271 if (ch_start + cpu_idx >= ch_end)
3272 continue;
3273 ch = &target->ch[ch_start + cpu_idx];
3274 ch->target = target;
3275 ch->comp_vector = cv_start == cv_end ? cv_start :
3276 cv_start + cpu_idx % (cv_end - cv_start);
3277 spin_lock_init(&ch->lock);
3278 INIT_LIST_HEAD(&ch->free_tx);
3279 ret = srp_new_cm_id(ch);
3280 if (ret)
3281 goto err_disconnect;
3283 ret = srp_create_ch_ib(ch);
3284 if (ret)
3285 goto err_disconnect;
3287 ret = srp_alloc_req_data(ch);
3288 if (ret)
3289 goto err_disconnect;
3291 ret = srp_connect_ch(ch, multich);
3292 if (ret) {
3293 shost_printk(KERN_ERR, target->scsi_host,
3294 PFX "Connection %d/%d failed\n",
3295 ch_start + cpu_idx,
3296 target->ch_count);
3297 if (node_idx == 0 && cpu_idx == 0) {
3298 goto err_disconnect;
3299 } else {
3300 srp_free_ch_ib(target, ch);
3301 srp_free_req_data(target, ch);
3302 target->ch_count = ch - target->ch;
3303 goto connected;
3307 multich = true;
3308 cpu_idx++;
3310 node_idx++;
3313 connected:
3314 target->scsi_host->nr_hw_queues = target->ch_count;
3316 ret = srp_add_target(host, target);
3317 if (ret)
3318 goto err_disconnect;
3320 if (target->state != SRP_TARGET_REMOVED) {
3321 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3322 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3323 be64_to_cpu(target->id_ext),
3324 be64_to_cpu(target->ioc_guid),
3325 be16_to_cpu(target->pkey),
3326 be64_to_cpu(target->service_id),
3327 target->sgid.raw, target->orig_dgid.raw);
3330 ret = count;
3332 out:
3333 mutex_unlock(&host->add_target_mutex);
3335 scsi_host_put(target->scsi_host);
3336 if (ret < 0)
3337 scsi_host_put(target->scsi_host);
3339 return ret;
3341 err_disconnect:
3342 srp_disconnect_target(target);
3344 for (i = 0; i < target->ch_count; i++) {
3345 ch = &target->ch[i];
3346 srp_free_ch_ib(target, ch);
3347 srp_free_req_data(target, ch);
3350 kfree(target->ch);
3351 goto out;
3354 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3356 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3357 char *buf)
3359 struct srp_host *host = container_of(dev, struct srp_host, dev);
3361 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3364 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3366 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3367 char *buf)
3369 struct srp_host *host = container_of(dev, struct srp_host, dev);
3371 return sprintf(buf, "%d\n", host->port);
3374 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3376 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3378 struct srp_host *host;
3380 host = kzalloc(sizeof *host, GFP_KERNEL);
3381 if (!host)
3382 return NULL;
3384 INIT_LIST_HEAD(&host->target_list);
3385 spin_lock_init(&host->target_lock);
3386 init_completion(&host->released);
3387 mutex_init(&host->add_target_mutex);
3388 host->srp_dev = device;
3389 host->port = port;
3391 host->dev.class = &srp_class;
3392 host->dev.parent = device->dev->dma_device;
3393 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3395 if (device_register(&host->dev))
3396 goto free_host;
3397 if (device_create_file(&host->dev, &dev_attr_add_target))
3398 goto err_class;
3399 if (device_create_file(&host->dev, &dev_attr_ibdev))
3400 goto err_class;
3401 if (device_create_file(&host->dev, &dev_attr_port))
3402 goto err_class;
3404 return host;
3406 err_class:
3407 device_unregister(&host->dev);
3409 free_host:
3410 kfree(host);
3412 return NULL;
3415 static void srp_add_one(struct ib_device *device)
3417 struct srp_device *srp_dev;
3418 struct srp_host *host;
3419 int mr_page_shift, p;
3420 u64 max_pages_per_mr;
3422 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3423 if (!srp_dev)
3424 return;
3426 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3427 device->map_phys_fmr && device->unmap_fmr);
3428 srp_dev->has_fr = (device->attrs.device_cap_flags &
3429 IB_DEVICE_MEM_MGT_EXTENSIONS);
3430 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3431 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3433 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3434 (!srp_dev->has_fmr || prefer_fr));
3435 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3438 * Use the smallest page size supported by the HCA, down to a
3439 * minimum of 4096 bytes. We're unlikely to build large sglists
3440 * out of smaller entries.
3442 mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
3443 srp_dev->mr_page_size = 1 << mr_page_shift;
3444 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3445 max_pages_per_mr = device->attrs.max_mr_size;
3446 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3447 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3448 max_pages_per_mr);
3449 if (srp_dev->use_fast_reg) {
3450 srp_dev->max_pages_per_mr =
3451 min_t(u32, srp_dev->max_pages_per_mr,
3452 device->attrs.max_fast_reg_page_list_len);
3454 srp_dev->mr_max_size = srp_dev->mr_page_size *
3455 srp_dev->max_pages_per_mr;
3456 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3457 device->name, mr_page_shift, device->attrs.max_mr_size,
3458 device->attrs.max_fast_reg_page_list_len,
3459 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3461 INIT_LIST_HEAD(&srp_dev->dev_list);
3463 srp_dev->dev = device;
3464 srp_dev->pd = ib_alloc_pd(device);
3465 if (IS_ERR(srp_dev->pd))
3466 goto free_dev;
3468 if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3469 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3470 IB_ACCESS_LOCAL_WRITE |
3471 IB_ACCESS_REMOTE_READ |
3472 IB_ACCESS_REMOTE_WRITE);
3473 if (IS_ERR(srp_dev->global_mr))
3474 goto err_pd;
3475 } else {
3476 srp_dev->global_mr = NULL;
3479 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3480 host = srp_add_port(srp_dev, p);
3481 if (host)
3482 list_add_tail(&host->list, &srp_dev->dev_list);
3485 ib_set_client_data(device, &srp_client, srp_dev);
3486 return;
3488 err_pd:
3489 ib_dealloc_pd(srp_dev->pd);
3491 free_dev:
3492 kfree(srp_dev);
3495 static void srp_remove_one(struct ib_device *device, void *client_data)
3497 struct srp_device *srp_dev;
3498 struct srp_host *host, *tmp_host;
3499 struct srp_target_port *target;
3501 srp_dev = client_data;
3502 if (!srp_dev)
3503 return;
3505 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3506 device_unregister(&host->dev);
3508 * Wait for the sysfs entry to go away, so that no new
3509 * target ports can be created.
3511 wait_for_completion(&host->released);
3514 * Remove all target ports.
3516 spin_lock(&host->target_lock);
3517 list_for_each_entry(target, &host->target_list, list)
3518 srp_queue_remove_work(target);
3519 spin_unlock(&host->target_lock);
3522 * Wait for tl_err and target port removal tasks.
3524 flush_workqueue(system_long_wq);
3525 flush_workqueue(srp_remove_wq);
3527 kfree(host);
3530 if (srp_dev->global_mr)
3531 ib_dereg_mr(srp_dev->global_mr);
3532 ib_dealloc_pd(srp_dev->pd);
3534 kfree(srp_dev);
3537 static struct srp_function_template ib_srp_transport_functions = {
3538 .has_rport_state = true,
3539 .reset_timer_if_blocked = true,
3540 .reconnect_delay = &srp_reconnect_delay,
3541 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3542 .dev_loss_tmo = &srp_dev_loss_tmo,
3543 .reconnect = srp_rport_reconnect,
3544 .rport_delete = srp_rport_delete,
3545 .terminate_rport_io = srp_terminate_io,
3548 static int __init srp_init_module(void)
3550 int ret;
3552 if (srp_sg_tablesize) {
3553 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3554 if (!cmd_sg_entries)
3555 cmd_sg_entries = srp_sg_tablesize;
3558 if (!cmd_sg_entries)
3559 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3561 if (cmd_sg_entries > 255) {
3562 pr_warn("Clamping cmd_sg_entries to 255\n");
3563 cmd_sg_entries = 255;
3566 if (!indirect_sg_entries)
3567 indirect_sg_entries = cmd_sg_entries;
3568 else if (indirect_sg_entries < cmd_sg_entries) {
3569 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3570 cmd_sg_entries);
3571 indirect_sg_entries = cmd_sg_entries;
3574 srp_remove_wq = create_workqueue("srp_remove");
3575 if (!srp_remove_wq) {
3576 ret = -ENOMEM;
3577 goto out;
3580 ret = -ENOMEM;
3581 ib_srp_transport_template =
3582 srp_attach_transport(&ib_srp_transport_functions);
3583 if (!ib_srp_transport_template)
3584 goto destroy_wq;
3586 ret = class_register(&srp_class);
3587 if (ret) {
3588 pr_err("couldn't register class infiniband_srp\n");
3589 goto release_tr;
3592 ib_sa_register_client(&srp_sa_client);
3594 ret = ib_register_client(&srp_client);
3595 if (ret) {
3596 pr_err("couldn't register IB client\n");
3597 goto unreg_sa;
3600 out:
3601 return ret;
3603 unreg_sa:
3604 ib_sa_unregister_client(&srp_sa_client);
3605 class_unregister(&srp_class);
3607 release_tr:
3608 srp_release_transport(ib_srp_transport_template);
3610 destroy_wq:
3611 destroy_workqueue(srp_remove_wq);
3612 goto out;
3615 static void __exit srp_cleanup_module(void)
3617 ib_unregister_client(&srp_client);
3618 ib_sa_unregister_client(&srp_sa_client);
3619 class_unregister(&srp_class);
3620 srp_release_transport(ib_srp_transport_template);
3621 destroy_workqueue(srp_remove_wq);
3624 module_init(srp_init_module);
3625 module_exit(srp_cleanup_module);