2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include "ipath_verbs.h"
37 * Convert the AETH RNR timeout code into the number of milliseconds.
39 const u32 ib_ipath_rnr_table
[32] = {
75 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
78 * XXX Use a simple list for now. We might need a priority
79 * queue if we have lots of QPs waiting for RNR timeouts
80 * but that should be rare.
82 void ipath_insert_rnr_queue(struct ipath_qp
*qp
)
84 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
87 spin_lock_irqsave(&dev
->pending_lock
, flags
);
88 if (list_empty(&dev
->rnrwait
))
89 list_add(&qp
->timerwait
, &dev
->rnrwait
);
91 struct list_head
*l
= &dev
->rnrwait
;
92 struct ipath_qp
*nqp
= list_entry(l
->next
, struct ipath_qp
,
95 while (qp
->s_rnr_timeout
>= nqp
->s_rnr_timeout
) {
96 qp
->s_rnr_timeout
-= nqp
->s_rnr_timeout
;
98 if (l
->next
== &dev
->rnrwait
)
100 nqp
= list_entry(l
->next
, struct ipath_qp
,
103 list_add(&qp
->timerwait
, l
);
105 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
109 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
111 * @wr_id_only: update wr_id only, not SGEs
113 * Return 0 if no RWQE is available, otherwise return 1.
115 * Called at interrupt level with the QP r_rq.lock held.
117 int ipath_get_rwqe(struct ipath_qp
*qp
, int wr_id_only
)
120 struct ipath_srq
*srq
;
121 struct ipath_rwqe
*wqe
;
126 if (unlikely(rq
->tail
== rq
->head
)) {
130 wqe
= get_rwqe_ptr(rq
, rq
->tail
);
131 qp
->r_wr_id
= wqe
->wr_id
;
133 qp
->r_sge
.sge
= wqe
->sg_list
[0];
134 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
135 qp
->r_sge
.num_sge
= wqe
->num_sge
;
136 qp
->r_len
= wqe
->length
;
138 if (++rq
->tail
>= rq
->size
)
144 srq
= to_isrq(qp
->ibqp
.srq
);
146 spin_lock(&rq
->lock
);
147 if (unlikely(rq
->tail
== rq
->head
)) {
148 spin_unlock(&rq
->lock
);
152 wqe
= get_rwqe_ptr(rq
, rq
->tail
);
153 qp
->r_wr_id
= wqe
->wr_id
;
155 qp
->r_sge
.sge
= wqe
->sg_list
[0];
156 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
157 qp
->r_sge
.num_sge
= wqe
->num_sge
;
158 qp
->r_len
= wqe
->length
;
160 if (++rq
->tail
>= rq
->size
)
162 if (srq
->ibsrq
.event_handler
) {
166 if (rq
->head
< rq
->tail
)
167 n
= rq
->size
+ rq
->head
- rq
->tail
;
169 n
= rq
->head
- rq
->tail
;
170 if (n
< srq
->limit
) {
172 spin_unlock(&rq
->lock
);
173 ev
.device
= qp
->ibqp
.device
;
174 ev
.element
.srq
= qp
->ibqp
.srq
;
175 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
176 srq
->ibsrq
.event_handler(&ev
,
177 srq
->ibsrq
.srq_context
);
179 spin_unlock(&rq
->lock
);
181 spin_unlock(&rq
->lock
);
189 * ipath_ruc_loopback - handle UC and RC lookback requests
190 * @sqp: the loopback QP
191 * @wc: the work completion entry
193 * This is called from ipath_do_uc_send() or ipath_do_rc_send() to
194 * forward a WQE addressed to the same HCA.
195 * Note that although we are single threaded due to the tasklet, we still
196 * have to protect against post_send(). We don't have to worry about
197 * receive interrupts since this is a connected protocol and all packets
198 * will pass through here.
200 void ipath_ruc_loopback(struct ipath_qp
*sqp
, struct ib_wc
*wc
)
202 struct ipath_ibdev
*dev
= to_idev(sqp
->ibqp
.device
);
204 struct ipath_swqe
*wqe
;
205 struct ipath_sge
*sge
;
209 qp
= ipath_lookup_qpn(&dev
->qp_table
, sqp
->remote_qpn
);
216 spin_lock_irqsave(&sqp
->s_lock
, flags
);
218 if (!(ib_ipath_state_ops
[sqp
->state
] & IPATH_PROCESS_SEND_OK
)) {
219 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
223 /* Get the next send request. */
224 if (sqp
->s_last
== sqp
->s_head
) {
225 /* Send work queue is empty. */
226 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
231 * We can rely on the entry not changing without the s_lock
232 * being held until we update s_last.
234 wqe
= get_swqe_ptr(sqp
, sqp
->s_last
);
235 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
240 sqp
->s_sge
.sge
= wqe
->sg_list
[0];
241 sqp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
242 sqp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
243 sqp
->s_len
= wqe
->length
;
244 switch (wqe
->wr
.opcode
) {
245 case IB_WR_SEND_WITH_IMM
:
246 wc
->wc_flags
= IB_WC_WITH_IMM
;
247 wc
->imm_data
= wqe
->wr
.imm_data
;
250 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
251 if (!ipath_get_rwqe(qp
, 0)) {
253 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
255 if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
257 if (sqp
->s_rnr_retry
== 0) {
258 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
261 if (sqp
->s_rnr_retry_cnt
< 7)
265 ib_ipath_rnr_table
[sqp
->s_min_rnr_timer
];
266 ipath_insert_rnr_queue(sqp
);
269 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
272 case IB_WR_RDMA_WRITE_WITH_IMM
:
273 wc
->wc_flags
= IB_WC_WITH_IMM
;
274 wc
->imm_data
= wqe
->wr
.imm_data
;
275 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
276 if (!ipath_get_rwqe(qp
, 1))
278 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
280 case IB_WR_RDMA_WRITE
:
281 if (wqe
->length
== 0)
283 if (unlikely(!ipath_rkey_ok(dev
, &qp
->r_sge
, wqe
->length
,
284 wqe
->wr
.wr
.rdma
.remote_addr
,
285 wqe
->wr
.wr
.rdma
.rkey
,
286 IB_ACCESS_REMOTE_WRITE
))) {
288 wc
->status
= IB_WC_REM_ACCESS_ERR
;
290 wc
->wr_id
= wqe
->wr
.wr_id
;
291 wc
->opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
294 wc
->qp_num
= sqp
->ibqp
.qp_num
;
295 wc
->src_qp
= sqp
->remote_qpn
;
297 wc
->slid
= sqp
->remote_ah_attr
.dlid
;
298 wc
->sl
= sqp
->remote_ah_attr
.sl
;
299 wc
->dlid_path_bits
= 0;
301 ipath_sqerror_qp(sqp
, wc
);
306 case IB_WR_RDMA_READ
:
307 if (unlikely(!ipath_rkey_ok(dev
, &sqp
->s_sge
, wqe
->length
,
308 wqe
->wr
.wr
.rdma
.remote_addr
,
309 wqe
->wr
.wr
.rdma
.rkey
,
310 IB_ACCESS_REMOTE_READ
)))
312 if (unlikely(!(qp
->qp_access_flags
&
313 IB_ACCESS_REMOTE_READ
)))
315 qp
->r_sge
.sge
= wqe
->sg_list
[0];
316 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
317 qp
->r_sge
.num_sge
= wqe
->wr
.num_sge
;
320 case IB_WR_ATOMIC_CMP_AND_SWP
:
321 case IB_WR_ATOMIC_FETCH_AND_ADD
:
322 if (unlikely(!ipath_rkey_ok(dev
, &qp
->r_sge
, sizeof(u64
),
323 wqe
->wr
.wr
.rdma
.remote_addr
,
324 wqe
->wr
.wr
.rdma
.rkey
,
325 IB_ACCESS_REMOTE_ATOMIC
)))
327 /* Perform atomic OP and save result. */
328 sdata
= wqe
->wr
.wr
.atomic
.swap
;
329 spin_lock_irqsave(&dev
->pending_lock
, flags
);
330 qp
->r_atomic_data
= *(u64
*) qp
->r_sge
.sge
.vaddr
;
331 if (wqe
->wr
.opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
)
332 *(u64
*) qp
->r_sge
.sge
.vaddr
=
333 qp
->r_atomic_data
+ sdata
;
334 else if (qp
->r_atomic_data
== wqe
->wr
.wr
.atomic
.compare_add
)
335 *(u64
*) qp
->r_sge
.sge
.vaddr
= sdata
;
336 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
337 *(u64
*) sqp
->s_sge
.sge
.vaddr
= qp
->r_atomic_data
;
344 sge
= &sqp
->s_sge
.sge
;
346 u32 len
= sqp
->s_len
;
348 if (len
> sge
->length
)
351 ipath_copy_sge(&qp
->r_sge
, sge
->vaddr
, len
);
354 sge
->sge_length
-= len
;
355 if (sge
->sge_length
== 0) {
356 if (--sqp
->s_sge
.num_sge
)
357 *sge
= *sqp
->s_sge
.sg_list
++;
358 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
359 if (++sge
->n
>= IPATH_SEGSZ
) {
360 if (++sge
->m
>= sge
->mr
->mapsz
)
365 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
367 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
372 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
||
373 wqe
->wr
.opcode
== IB_WR_RDMA_READ
)
376 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
)
377 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
379 wc
->opcode
= IB_WC_RECV
;
380 wc
->wr_id
= qp
->r_wr_id
;
381 wc
->status
= IB_WC_SUCCESS
;
383 wc
->byte_len
= wqe
->length
;
384 wc
->qp_num
= qp
->ibqp
.qp_num
;
385 wc
->src_qp
= qp
->remote_qpn
;
386 /* XXX do we know which pkey matched? Only needed for GSI. */
388 wc
->slid
= qp
->remote_ah_attr
.dlid
;
389 wc
->sl
= qp
->remote_ah_attr
.sl
;
390 wc
->dlid_path_bits
= 0;
391 /* Signal completion event if the solicited bit is set. */
392 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), wc
,
393 wqe
->wr
.send_flags
& IB_SEND_SOLICITED
);
396 sqp
->s_rnr_retry
= sqp
->s_rnr_retry_cnt
;
398 if (!test_bit(IPATH_S_SIGNAL_REQ_WR
, &sqp
->s_flags
) ||
399 (wqe
->wr
.send_flags
& IB_SEND_SIGNALED
)) {
400 wc
->wr_id
= wqe
->wr
.wr_id
;
401 wc
->status
= IB_WC_SUCCESS
;
402 wc
->opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
404 wc
->byte_len
= wqe
->length
;
405 wc
->qp_num
= sqp
->ibqp
.qp_num
;
410 wc
->dlid_path_bits
= 0;
412 ipath_cq_enter(to_icq(sqp
->ibqp
.send_cq
), wc
, 0);
415 /* Update s_last now that we are finished with the SWQE */
416 spin_lock_irqsave(&sqp
->s_lock
, flags
);
417 if (++sqp
->s_last
>= sqp
->s_size
)
419 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
423 if (atomic_dec_and_test(&qp
->refcount
))
428 * ipath_no_bufs_available - tell the layer driver we need buffers
429 * @qp: the QP that caused the problem
430 * @dev: the device we ran out of buffers on
432 * Called when we run out of PIO buffers.
434 void ipath_no_bufs_available(struct ipath_qp
*qp
, struct ipath_ibdev
*dev
)
438 spin_lock_irqsave(&dev
->pending_lock
, flags
);
439 if (list_empty(&qp
->piowait
))
440 list_add_tail(&qp
->piowait
, &dev
->piowait
);
441 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
443 * Note that as soon as ipath_layer_want_buffer() is called and
444 * possibly before it returns, ipath_ib_piobufavail()
445 * could be called. If we are still in the tasklet function,
446 * tasklet_hi_schedule() will not call us until the next time
447 * tasklet_hi_schedule() is called.
448 * We clear the tasklet flag now since we are committing to return
449 * from the tasklet function.
451 clear_bit(IPATH_S_BUSY
, &qp
->s_flags
);
452 tasklet_unlock(&qp
->s_task
);
453 ipath_layer_want_buffer(dev
->dd
);
458 * ipath_post_rc_send - post RC and UC sends
459 * @qp: the QP to post on
460 * @wr: the work request to send
462 int ipath_post_rc_send(struct ipath_qp
*qp
, struct ib_send_wr
*wr
)
464 struct ipath_swqe
*wqe
;
472 * Don't allow RDMA reads or atomic operations on UC or
473 * undefined operations.
474 * Make sure buffer is large enough to hold the result for atomics.
476 if (qp
->ibqp
.qp_type
== IB_QPT_UC
) {
477 if ((unsigned) wr
->opcode
>= IB_WR_RDMA_READ
) {
481 } else if ((unsigned) wr
->opcode
> IB_WR_ATOMIC_FETCH_AND_ADD
) {
484 } else if (wr
->opcode
>= IB_WR_ATOMIC_CMP_AND_SWP
&&
486 wr
->sg_list
[0].length
< sizeof(u64
) ||
487 wr
->sg_list
[0].addr
& (sizeof(u64
) - 1))) {
491 /* IB spec says that num_sge == 0 is OK. */
492 if (wr
->num_sge
> qp
->s_max_sge
) {
496 spin_lock_irqsave(&qp
->s_lock
, flags
);
497 next
= qp
->s_head
+ 1;
498 if (next
>= qp
->s_size
)
500 if (next
== qp
->s_last
) {
501 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
506 wqe
= get_swqe_ptr(qp
, qp
->s_head
);
508 wqe
->ssn
= qp
->s_ssn
++;
509 wqe
->sg_list
[0].mr
= NULL
;
510 wqe
->sg_list
[0].vaddr
= NULL
;
511 wqe
->sg_list
[0].length
= 0;
512 wqe
->sg_list
[0].sge_length
= 0;
514 acc
= wr
->opcode
>= IB_WR_RDMA_READ
? IB_ACCESS_LOCAL_WRITE
: 0;
515 for (i
= 0, j
= 0; i
< wr
->num_sge
; i
++) {
516 if (to_ipd(qp
->ibqp
.pd
)->user
&& wr
->sg_list
[i
].lkey
== 0) {
517 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
521 if (wr
->sg_list
[i
].length
== 0)
523 if (!ipath_lkey_ok(&to_idev(qp
->ibqp
.device
)->lk_table
,
524 &wqe
->sg_list
[j
], &wr
->sg_list
[i
],
526 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
530 wqe
->length
+= wr
->sg_list
[i
].length
;
535 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
537 if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
538 ipath_do_uc_send((unsigned long) qp
);
540 ipath_do_rc_send((unsigned long) qp
);