1 /* 2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <rdma/uverbs_ioctl.h> 34 35 #include "iw_cxgb4.h" 36 37 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 38 struct c4iw_dev_ucontext *uctx, struct sk_buff *skb, 39 struct c4iw_wr_wait *wr_waitp) 40 { 41 struct fw_ri_res_wr *res_wr; 42 struct fw_ri_res *res; 43 int wr_len; 44 int ret; 45 46 wr_len = sizeof *res_wr + sizeof *res; 47 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 48 49 res_wr = __skb_put_zero(skb, wr_len); 50 res_wr->op_nres = cpu_to_be32( 51 FW_WR_OP_V(FW_RI_RES_WR) | 52 FW_RI_RES_WR_NRES_V(1) | 53 FW_WR_COMPL_F); 54 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 55 res_wr->cookie = (uintptr_t)wr_waitp; 56 res = res_wr->res; 57 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 58 res->u.cq.op = FW_RI_RES_OP_RESET; 59 res->u.cq.iqid = cpu_to_be32(cq->cqid); 60 61 c4iw_init_wr_wait(wr_waitp); 62 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); 63 64 kfree(cq->sw_queue); 65 dma_free_coherent(&(rdev->lldi.pdev->dev), 66 cq->memsize, cq->queue, 67 dma_unmap_addr(cq, mapping)); 68 c4iw_put_cqid(rdev, cq->cqid, uctx); 69 return ret; 70 } 71 72 static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 73 struct c4iw_dev_ucontext *uctx, 74 struct c4iw_wr_wait *wr_waitp) 75 { 76 struct fw_ri_res_wr *res_wr; 77 struct fw_ri_res *res; 78 int wr_len; 79 int user = (uctx != &rdev->uctx); 80 int ret; 81 struct sk_buff *skb; 82 struct c4iw_ucontext *ucontext = NULL; 83 84 if (user) 85 ucontext = container_of(uctx, struct c4iw_ucontext, uctx); 86 87 cq->cqid = c4iw_get_cqid(rdev, uctx); 88 if (!cq->cqid) { 89 ret = -ENOMEM; 90 goto err1; 91 } 92 93 if (!user) { 94 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); 95 if (!cq->sw_queue) { 96 ret = -ENOMEM; 97 goto err2; 98 } 99 } 100 cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize, 101 &cq->dma_addr, GFP_KERNEL); 102 if (!cq->queue) { 103 ret = -ENOMEM; 104 goto err3; 105 } 106 dma_unmap_addr_set(cq, mapping, cq->dma_addr); 107 memset(cq->queue, 0, cq->memsize); 108 109 if (user && ucontext->is_32b_cqe) { 110 cq->qp_errp = &((struct t4_status_page *) 111 ((u8 *)cq->queue + (cq->size - 1) * 112 (sizeof(*cq->queue) / 2)))->qp_err; 113 } else { 114 cq->qp_errp = &((struct t4_status_page *) 115 ((u8 *)cq->queue + (cq->size - 1) * 116 sizeof(*cq->queue)))->qp_err; 117 } 118 119 /* build fw_ri_res_wr */ 120 wr_len = sizeof *res_wr + sizeof *res; 121 122 skb = alloc_skb(wr_len, GFP_KERNEL); 123 if (!skb) { 124 ret = -ENOMEM; 125 goto err4; 126 } 127 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 128 129 res_wr = __skb_put_zero(skb, wr_len); 130 res_wr->op_nres = cpu_to_be32( 131 FW_WR_OP_V(FW_RI_RES_WR) | 132 FW_RI_RES_WR_NRES_V(1) | 133 FW_WR_COMPL_F); 134 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 135 res_wr->cookie = (uintptr_t)wr_waitp; 136 res = res_wr->res; 137 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 138 res->u.cq.op = FW_RI_RES_OP_WRITE; 139 res->u.cq.iqid = cpu_to_be32(cq->cqid); 140 res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( 141 FW_RI_RES_WR_IQANUS_V(0) | 142 FW_RI_RES_WR_IQANUD_V(1) | 143 FW_RI_RES_WR_IQANDST_F | 144 FW_RI_RES_WR_IQANDSTINDEX_V( 145 rdev->lldi.ciq_ids[cq->vector])); 146 res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( 147 FW_RI_RES_WR_IQDROPRSS_F | 148 FW_RI_RES_WR_IQPCIECH_V(2) | 149 FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | 150 FW_RI_RES_WR_IQO_F | 151 ((user && ucontext->is_32b_cqe) ? 152 FW_RI_RES_WR_IQESIZE_V(1) : 153 FW_RI_RES_WR_IQESIZE_V(2))); 154 res->u.cq.iqsize = cpu_to_be16(cq->size); 155 res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); 156 157 c4iw_init_wr_wait(wr_waitp); 158 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); 159 if (ret) 160 goto err4; 161 162 cq->gen = 1; 163 cq->gts = rdev->lldi.gts_reg; 164 cq->rdev = rdev; 165 166 cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS, 167 &cq->bar2_qid, 168 user ? &cq->bar2_pa : NULL); 169 if (user && !cq->bar2_pa) { 170 pr_warn("%s: cqid %u not in BAR2 range\n", 171 pci_name(rdev->lldi.pdev), cq->cqid); 172 ret = -EINVAL; 173 goto err4; 174 } 175 return 0; 176 err4: 177 dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, 178 dma_unmap_addr(cq, mapping)); 179 err3: 180 kfree(cq->sw_queue); 181 err2: 182 c4iw_put_cqid(rdev, cq->cqid, uctx); 183 err1: 184 return ret; 185 } 186 187 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx) 188 { 189 struct t4_cqe cqe; 190 191 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", 192 wq, cq, cq->sw_cidx, cq->sw_pidx); 193 memset(&cqe, 0, sizeof(cqe)); 194 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 195 CQE_OPCODE_V(FW_RI_SEND) | 196 CQE_TYPE_V(0) | 197 CQE_SWCQE_V(1) | 198 CQE_QPID_V(wq->sq.qid)); 199 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 200 if (srqidx) 201 cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx); 202 cq->sw_queue[cq->sw_pidx] = cqe; 203 t4_swcq_produce(cq); 204 } 205 206 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) 207 { 208 int flushed = 0; 209 int in_use = wq->rq.in_use - count; 210 211 pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", 212 wq, cq, wq->rq.in_use, count); 213 while (in_use--) { 214 insert_recv_cqe(wq, cq, 0); 215 flushed++; 216 } 217 return flushed; 218 } 219 220 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, 221 struct t4_swsqe *swcqe) 222 { 223 struct t4_cqe cqe; 224 225 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", 226 wq, cq, cq->sw_cidx, cq->sw_pidx); 227 memset(&cqe, 0, sizeof(cqe)); 228 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 229 CQE_OPCODE_V(swcqe->opcode) | 230 CQE_TYPE_V(1) | 231 CQE_SWCQE_V(1) | 232 CQE_QPID_V(wq->sq.qid)); 233 CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; 234 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 235 cq->sw_queue[cq->sw_pidx] = cqe; 236 t4_swcq_produce(cq); 237 } 238 239 static void advance_oldest_read(struct t4_wq *wq); 240 241 int c4iw_flush_sq(struct c4iw_qp *qhp) 242 { 243 int flushed = 0; 244 struct t4_wq *wq = &qhp->wq; 245 struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); 246 struct t4_cq *cq = &chp->cq; 247 int idx; 248 struct t4_swsqe *swsqe; 249 250 if (wq->sq.flush_cidx == -1) 251 wq->sq.flush_cidx = wq->sq.cidx; 252 idx = wq->sq.flush_cidx; 253 while (idx != wq->sq.pidx) { 254 swsqe = &wq->sq.sw_sq[idx]; 255 swsqe->flushed = 1; 256 insert_sq_cqe(wq, cq, swsqe); 257 if (wq->sq.oldest_read == swsqe) { 258 advance_oldest_read(wq); 259 } 260 flushed++; 261 if (++idx == wq->sq.size) 262 idx = 0; 263 } 264 wq->sq.flush_cidx += flushed; 265 if (wq->sq.flush_cidx >= wq->sq.size) 266 wq->sq.flush_cidx -= wq->sq.size; 267 return flushed; 268 } 269 270 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) 271 { 272 struct t4_swsqe *swsqe; 273 int cidx; 274 275 if (wq->sq.flush_cidx == -1) 276 wq->sq.flush_cidx = wq->sq.cidx; 277 cidx = wq->sq.flush_cidx; 278 279 while (cidx != wq->sq.pidx) { 280 swsqe = &wq->sq.sw_sq[cidx]; 281 if (!swsqe->signaled) { 282 if (++cidx == wq->sq.size) 283 cidx = 0; 284 } else if (swsqe->complete) { 285 286 /* 287 * Insert this completed cqe into the swcq. 288 */ 289 pr_debug("moving cqe into swcq sq idx %u cq idx %u\n", 290 cidx, cq->sw_pidx); 291 swsqe->cqe.header |= htonl(CQE_SWCQE_V(1)); 292 cq->sw_queue[cq->sw_pidx] = swsqe->cqe; 293 t4_swcq_produce(cq); 294 swsqe->flushed = 1; 295 if (++cidx == wq->sq.size) 296 cidx = 0; 297 wq->sq.flush_cidx = cidx; 298 } else 299 break; 300 } 301 } 302 303 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, 304 struct t4_cqe *read_cqe) 305 { 306 read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; 307 read_cqe->len = htonl(wq->sq.oldest_read->read_len); 308 read_cqe->header = htonl(CQE_QPID_V(CQE_QPID(hw_cqe)) | 309 CQE_SWCQE_V(SW_CQE(hw_cqe)) | 310 CQE_OPCODE_V(FW_RI_READ_REQ) | 311 CQE_TYPE_V(1)); 312 read_cqe->bits_type_ts = hw_cqe->bits_type_ts; 313 } 314 315 static void advance_oldest_read(struct t4_wq *wq) 316 { 317 318 u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; 319 320 if (rptr == wq->sq.size) 321 rptr = 0; 322 while (rptr != wq->sq.pidx) { 323 wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; 324 325 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) 326 return; 327 if (++rptr == wq->sq.size) 328 rptr = 0; 329 } 330 wq->sq.oldest_read = NULL; 331 } 332 333 /* 334 * Move all CQEs from the HWCQ into the SWCQ. 335 * Deal with out-of-order and/or completions that complete 336 * prior unsignalled WRs. 337 */ 338 void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp) 339 { 340 struct t4_cqe *hw_cqe, *swcqe, read_cqe; 341 struct c4iw_qp *qhp; 342 struct t4_swsqe *swsqe; 343 int ret; 344 345 pr_debug("cqid 0x%x\n", chp->cq.cqid); 346 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 347 348 /* 349 * This logic is similar to poll_cq(), but not quite the same 350 * unfortunately. Need to move pertinent HW CQEs to the SW CQ but 351 * also do any translation magic that poll_cq() normally does. 352 */ 353 while (!ret) { 354 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); 355 356 /* 357 * drop CQEs with no associated QP 358 */ 359 if (qhp == NULL) 360 goto next_cqe; 361 362 if (flush_qhp != qhp) { 363 spin_lock(&qhp->lock); 364 365 if (qhp->wq.flushed == 1) 366 goto next_cqe; 367 } 368 369 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) 370 goto next_cqe; 371 372 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { 373 374 /* If we have reached here because of async 375 * event or other error, and have egress error 376 * then drop 377 */ 378 if (CQE_TYPE(hw_cqe) == 1) 379 goto next_cqe; 380 381 /* drop peer2peer RTR reads. 382 */ 383 if (CQE_WRID_STAG(hw_cqe) == 1) 384 goto next_cqe; 385 386 /* 387 * Eat completions for unsignaled read WRs. 388 */ 389 if (!qhp->wq.sq.oldest_read->signaled) { 390 advance_oldest_read(&qhp->wq); 391 goto next_cqe; 392 } 393 394 /* 395 * Don't write to the HWCQ, create a new read req CQE 396 * in local memory and move it into the swcq. 397 */ 398 create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); 399 hw_cqe = &read_cqe; 400 advance_oldest_read(&qhp->wq); 401 } 402 403 /* if its a SQ completion, then do the magic to move all the 404 * unsignaled and now in-order completions into the swcq. 405 */ 406 if (SQ_TYPE(hw_cqe)) { 407 swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 408 swsqe->cqe = *hw_cqe; 409 swsqe->complete = 1; 410 flush_completed_wrs(&qhp->wq, &chp->cq); 411 } else { 412 swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; 413 *swcqe = *hw_cqe; 414 swcqe->header |= cpu_to_be32(CQE_SWCQE_V(1)); 415 t4_swcq_produce(&chp->cq); 416 } 417 next_cqe: 418 t4_hwcq_consume(&chp->cq); 419 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 420 if (qhp && flush_qhp != qhp) 421 spin_unlock(&qhp->lock); 422 } 423 } 424 425 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) 426 { 427 if (DRAIN_CQE(cqe)) { 428 WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); 429 return 0; 430 } 431 432 if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) 433 return 0; 434 435 if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) 436 return 0; 437 438 if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) 439 return 0; 440 441 if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) 442 return 0; 443 return 1; 444 } 445 446 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) 447 { 448 struct t4_cqe *cqe; 449 u32 ptr; 450 451 *count = 0; 452 pr_debug("count zero %d\n", *count); 453 ptr = cq->sw_cidx; 454 while (ptr != cq->sw_pidx) { 455 cqe = &cq->sw_queue[ptr]; 456 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && 457 (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) 458 (*count)++; 459 if (++ptr == cq->size) 460 ptr = 0; 461 } 462 pr_debug("cq %p count %d\n", cq, *count); 463 } 464 465 static void post_pending_srq_wrs(struct t4_srq *srq) 466 { 467 struct t4_srq_pending_wr *pwr; 468 u16 idx = 0; 469 470 while (srq->pending_in_use) { 471 pwr = &srq->pending_wrs[srq->pending_cidx]; 472 srq->sw_rq[srq->pidx].wr_id = pwr->wr_id; 473 srq->sw_rq[srq->pidx].valid = 1; 474 475 pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", 476 __func__, 477 srq->cidx, srq->pidx, srq->wq_pidx, 478 srq->in_use, srq->size, 479 (unsigned long long)pwr->wr_id); 480 481 c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16); 482 t4_srq_consume_pending_wr(srq); 483 t4_srq_produce(srq, pwr->len16); 484 idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE); 485 } 486 487 if (idx) { 488 t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe); 489 srq->queue[srq->size].status.host_wq_pidx = 490 srq->wq_pidx; 491 } 492 } 493 494 static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq) 495 { 496 int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx; 497 u64 wr_id; 498 499 srq->sw_rq[rel_idx].valid = 0; 500 wr_id = srq->sw_rq[rel_idx].wr_id; 501 502 if (rel_idx == srq->cidx) { 503 pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", 504 __func__, rel_idx, srq->cidx, srq->pidx, 505 srq->wq_pidx, srq->in_use, srq->size, 506 (unsigned long long)srq->sw_rq[rel_idx].wr_id); 507 t4_srq_consume(srq); 508 while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) { 509 pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", 510 __func__, srq->cidx, srq->pidx, 511 srq->wq_pidx, srq->in_use, 512 srq->size, srq->ooo_count, 513 (unsigned long long) 514 srq->sw_rq[srq->cidx].wr_id); 515 t4_srq_consume_ooo(srq); 516 } 517 if (srq->ooo_count == 0 && srq->pending_in_use) 518 post_pending_srq_wrs(srq); 519 } else { 520 pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", 521 __func__, rel_idx, srq->cidx, 522 srq->pidx, srq->wq_pidx, 523 srq->in_use, srq->size, 524 srq->ooo_count, 525 (unsigned long long)srq->sw_rq[rel_idx].wr_id); 526 t4_srq_produce_ooo(srq); 527 } 528 return wr_id; 529 } 530 531 /* 532 * poll_cq 533 * 534 * Caller must: 535 * check the validity of the first CQE, 536 * supply the wq assicated with the qpid. 537 * 538 * credit: cq credit to return to sge. 539 * cqe_flushed: 1 iff the CQE is flushed. 540 * cqe: copy of the polled CQE. 541 * 542 * return value: 543 * 0 CQE returned ok. 544 * -EAGAIN CQE skipped, try again. 545 * -EOVERFLOW CQ overflow detected. 546 */ 547 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, 548 u8 *cqe_flushed, u64 *cookie, u32 *credit, 549 struct t4_srq *srq) 550 { 551 int ret = 0; 552 struct t4_cqe *hw_cqe, read_cqe; 553 554 *cqe_flushed = 0; 555 *credit = 0; 556 ret = t4_next_cqe(cq, &hw_cqe); 557 if (ret) 558 return ret; 559 560 pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", 561 CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), 562 CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe), 563 CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), 564 CQE_WRID_LOW(hw_cqe)); 565 566 /* 567 * skip cqe's not affiliated with a QP. 568 */ 569 if (wq == NULL) { 570 ret = -EAGAIN; 571 goto skip_cqe; 572 } 573 574 /* 575 * skip hw cqe's if the wq is flushed. 576 */ 577 if (wq->flushed && !SW_CQE(hw_cqe)) { 578 ret = -EAGAIN; 579 goto skip_cqe; 580 } 581 582 /* 583 * skip TERMINATE cqes... 584 */ 585 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { 586 ret = -EAGAIN; 587 goto skip_cqe; 588 } 589 590 /* 591 * Special cqe for drain WR completions... 592 */ 593 if (DRAIN_CQE(hw_cqe)) { 594 *cookie = CQE_DRAIN_COOKIE(hw_cqe); 595 *cqe = *hw_cqe; 596 goto skip_cqe; 597 } 598 599 /* 600 * Gotta tweak READ completions: 601 * 1) the cqe doesn't contain the sq_wptr from the wr. 602 * 2) opcode not reflected from the wr. 603 * 3) read_len not reflected from the wr. 604 * 4) cq_type is RQ_TYPE not SQ_TYPE. 605 */ 606 if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { 607 608 /* If we have reached here because of async 609 * event or other error, and have egress error 610 * then drop 611 */ 612 if (CQE_TYPE(hw_cqe) == 1) { 613 if (CQE_STATUS(hw_cqe)) 614 t4_set_wq_in_error(wq, 0); 615 ret = -EAGAIN; 616 goto skip_cqe; 617 } 618 619 /* If this is an unsolicited read response, then the read 620 * was generated by the kernel driver as part of peer-2-peer 621 * connection setup. So ignore the completion. 622 */ 623 if (CQE_WRID_STAG(hw_cqe) == 1) { 624 if (CQE_STATUS(hw_cqe)) 625 t4_set_wq_in_error(wq, 0); 626 ret = -EAGAIN; 627 goto skip_cqe; 628 } 629 630 /* 631 * Eat completions for unsignaled read WRs. 632 */ 633 if (!wq->sq.oldest_read->signaled) { 634 advance_oldest_read(wq); 635 ret = -EAGAIN; 636 goto skip_cqe; 637 } 638 639 /* 640 * Don't write to the HWCQ, so create a new read req CQE 641 * in local memory. 642 */ 643 create_read_req_cqe(wq, hw_cqe, &read_cqe); 644 hw_cqe = &read_cqe; 645 advance_oldest_read(wq); 646 } 647 648 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { 649 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); 650 t4_set_wq_in_error(wq, 0); 651 } 652 653 /* 654 * RECV completion. 655 */ 656 if (RQ_TYPE(hw_cqe)) { 657 658 /* 659 * HW only validates 4 bits of MSN. So we must validate that 660 * the MSN in the SEND is the next expected MSN. If its not, 661 * then we complete this with T4_ERR_MSN and mark the wq in 662 * error. 663 */ 664 if (unlikely(!CQE_STATUS(hw_cqe) && 665 CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { 666 t4_set_wq_in_error(wq, 0); 667 hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); 668 } 669 goto proc_cqe; 670 } 671 672 /* 673 * If we get here its a send completion. 674 * 675 * Handle out of order completion. These get stuffed 676 * in the SW SQ. Then the SW SQ is walked to move any 677 * now in-order completions into the SW CQ. This handles 678 * 2 cases: 679 * 1) reaping unsignaled WRs when the first subsequent 680 * signaled WR is completed. 681 * 2) out of order read completions. 682 */ 683 if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { 684 struct t4_swsqe *swsqe; 685 686 pr_debug("out of order completion going in sw_sq at idx %u\n", 687 CQE_WRID_SQ_IDX(hw_cqe)); 688 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 689 swsqe->cqe = *hw_cqe; 690 swsqe->complete = 1; 691 ret = -EAGAIN; 692 goto flush_wq; 693 } 694 695 proc_cqe: 696 *cqe = *hw_cqe; 697 698 /* 699 * Reap the associated WR(s) that are freed up with this 700 * completion. 701 */ 702 if (SQ_TYPE(hw_cqe)) { 703 int idx = CQE_WRID_SQ_IDX(hw_cqe); 704 705 /* 706 * Account for any unsignaled completions completed by 707 * this signaled completion. In this case, cidx points 708 * to the first unsignaled one, and idx points to the 709 * signaled one. So adjust in_use based on this delta. 710 * if this is not completing any unsigned wrs, then the 711 * delta will be 0. Handle wrapping also! 712 */ 713 if (idx < wq->sq.cidx) 714 wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; 715 else 716 wq->sq.in_use -= idx - wq->sq.cidx; 717 718 wq->sq.cidx = (uint16_t)idx; 719 pr_debug("completing sq idx %u\n", wq->sq.cidx); 720 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; 721 if (c4iw_wr_log) 722 c4iw_log_wr_stats(wq, hw_cqe); 723 t4_sq_consume(wq); 724 } else { 725 if (!srq) { 726 pr_debug("completing rq idx %u\n", wq->rq.cidx); 727 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; 728 if (c4iw_wr_log) 729 c4iw_log_wr_stats(wq, hw_cqe); 730 t4_rq_consume(wq); 731 } else { 732 *cookie = reap_srq_cqe(hw_cqe, srq); 733 } 734 wq->rq.msn++; 735 goto skip_cqe; 736 } 737 738 flush_wq: 739 /* 740 * Flush any completed cqes that are now in-order. 741 */ 742 flush_completed_wrs(wq, cq); 743 744 skip_cqe: 745 if (SW_CQE(hw_cqe)) { 746 pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n", 747 cq, cq->cqid, cq->sw_cidx); 748 t4_swcq_consume(cq); 749 } else { 750 pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n", 751 cq, cq->cqid, cq->cidx); 752 t4_hwcq_consume(cq); 753 } 754 return ret; 755 } 756 757 static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, 758 struct ib_wc *wc, struct c4iw_srq *srq) 759 { 760 struct t4_cqe uninitialized_var(cqe); 761 struct t4_wq *wq = qhp ? &qhp->wq : NULL; 762 u32 credit = 0; 763 u8 cqe_flushed; 764 u64 cookie = 0; 765 int ret; 766 767 ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit, 768 srq ? &srq->wq : NULL); 769 if (ret) 770 goto out; 771 772 wc->wr_id = cookie; 773 wc->qp = qhp ? &qhp->ibqp : NULL; 774 wc->vendor_err = CQE_STATUS(&cqe); 775 wc->wc_flags = 0; 776 777 /* 778 * Simulate a SRQ_LIMIT_REACHED HW notification if required. 779 */ 780 if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed && 781 srq->wq.in_use < srq->srq_limit) 782 c4iw_dispatch_srq_limit_reached_event(srq); 783 784 pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", 785 CQE_QPID(&cqe), 786 CQE_TYPE(&cqe), CQE_OPCODE(&cqe), 787 CQE_STATUS(&cqe), CQE_LEN(&cqe), 788 CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), 789 (unsigned long long)cookie); 790 791 if (CQE_TYPE(&cqe) == 0) { 792 if (!CQE_STATUS(&cqe)) 793 wc->byte_len = CQE_LEN(&cqe); 794 else 795 wc->byte_len = 0; 796 797 switch (CQE_OPCODE(&cqe)) { 798 case FW_RI_SEND: 799 wc->opcode = IB_WC_RECV; 800 break; 801 case FW_RI_SEND_WITH_INV: 802 case FW_RI_SEND_WITH_SE_INV: 803 wc->opcode = IB_WC_RECV; 804 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 805 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 806 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 807 break; 808 case FW_RI_WRITE_IMMEDIATE: 809 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 810 wc->ex.imm_data = CQE_IMM_DATA(&cqe); 811 wc->wc_flags |= IB_WC_WITH_IMM; 812 break; 813 default: 814 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", 815 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 816 ret = -EINVAL; 817 goto out; 818 } 819 } else { 820 switch (CQE_OPCODE(&cqe)) { 821 case FW_RI_WRITE_IMMEDIATE: 822 case FW_RI_RDMA_WRITE: 823 wc->opcode = IB_WC_RDMA_WRITE; 824 break; 825 case FW_RI_READ_REQ: 826 wc->opcode = IB_WC_RDMA_READ; 827 wc->byte_len = CQE_LEN(&cqe); 828 break; 829 case FW_RI_SEND_WITH_INV: 830 case FW_RI_SEND_WITH_SE_INV: 831 wc->opcode = IB_WC_SEND; 832 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 833 break; 834 case FW_RI_SEND: 835 case FW_RI_SEND_WITH_SE: 836 wc->opcode = IB_WC_SEND; 837 break; 838 839 case FW_RI_LOCAL_INV: 840 wc->opcode = IB_WC_LOCAL_INV; 841 break; 842 case FW_RI_FAST_REGISTER: 843 wc->opcode = IB_WC_REG_MR; 844 845 /* Invalidate the MR if the fastreg failed */ 846 if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) 847 c4iw_invalidate_mr(qhp->rhp, 848 CQE_WRID_FR_STAG(&cqe)); 849 break; 850 default: 851 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", 852 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 853 ret = -EINVAL; 854 goto out; 855 } 856 } 857 858 if (cqe_flushed) 859 wc->status = IB_WC_WR_FLUSH_ERR; 860 else { 861 862 switch (CQE_STATUS(&cqe)) { 863 case T4_ERR_SUCCESS: 864 wc->status = IB_WC_SUCCESS; 865 break; 866 case T4_ERR_STAG: 867 wc->status = IB_WC_LOC_ACCESS_ERR; 868 break; 869 case T4_ERR_PDID: 870 wc->status = IB_WC_LOC_PROT_ERR; 871 break; 872 case T4_ERR_QPID: 873 case T4_ERR_ACCESS: 874 wc->status = IB_WC_LOC_ACCESS_ERR; 875 break; 876 case T4_ERR_WRAP: 877 wc->status = IB_WC_GENERAL_ERR; 878 break; 879 case T4_ERR_BOUND: 880 wc->status = IB_WC_LOC_LEN_ERR; 881 break; 882 case T4_ERR_INVALIDATE_SHARED_MR: 883 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: 884 wc->status = IB_WC_MW_BIND_ERR; 885 break; 886 case T4_ERR_CRC: 887 case T4_ERR_MARKER: 888 case T4_ERR_PDU_LEN_ERR: 889 case T4_ERR_OUT_OF_RQE: 890 case T4_ERR_DDP_VERSION: 891 case T4_ERR_RDMA_VERSION: 892 case T4_ERR_DDP_QUEUE_NUM: 893 case T4_ERR_MSN: 894 case T4_ERR_TBIT: 895 case T4_ERR_MO: 896 case T4_ERR_MSN_RANGE: 897 case T4_ERR_IRD_OVERFLOW: 898 case T4_ERR_OPCODE: 899 case T4_ERR_INTERNAL_ERR: 900 wc->status = IB_WC_FATAL_ERR; 901 break; 902 case T4_ERR_SWFLUSH: 903 wc->status = IB_WC_WR_FLUSH_ERR; 904 break; 905 default: 906 pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", 907 CQE_STATUS(&cqe), CQE_QPID(&cqe)); 908 wc->status = IB_WC_FATAL_ERR; 909 } 910 } 911 out: 912 return ret; 913 } 914 915 /* 916 * Get one cq entry from c4iw and map it to openib. 917 * 918 * Returns: 919 * 0 cqe returned 920 * -ENODATA EMPTY; 921 * -EAGAIN caller must try again 922 * any other -errno fatal error 923 */ 924 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) 925 { 926 struct c4iw_srq *srq = NULL; 927 struct c4iw_qp *qhp = NULL; 928 struct t4_cqe *rd_cqe; 929 int ret; 930 931 ret = t4_next_cqe(&chp->cq, &rd_cqe); 932 933 if (ret) 934 return ret; 935 936 qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); 937 if (qhp) { 938 spin_lock(&qhp->lock); 939 srq = qhp->srq; 940 if (srq) 941 spin_lock(&srq->lock); 942 ret = __c4iw_poll_cq_one(chp, qhp, wc, srq); 943 spin_unlock(&qhp->lock); 944 if (srq) 945 spin_unlock(&srq->lock); 946 } else { 947 ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL); 948 } 949 return ret; 950 } 951 952 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 953 { 954 struct c4iw_cq *chp; 955 unsigned long flags; 956 int npolled; 957 int err = 0; 958 959 chp = to_c4iw_cq(ibcq); 960 961 spin_lock_irqsave(&chp->lock, flags); 962 for (npolled = 0; npolled < num_entries; ++npolled) { 963 do { 964 err = c4iw_poll_cq_one(chp, wc + npolled); 965 } while (err == -EAGAIN); 966 if (err) 967 break; 968 } 969 spin_unlock_irqrestore(&chp->lock, flags); 970 return !err || err == -ENODATA ? npolled : err; 971 } 972 973 int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) 974 { 975 struct c4iw_cq *chp; 976 struct c4iw_ucontext *ucontext; 977 978 pr_debug("ib_cq %p\n", ib_cq); 979 chp = to_c4iw_cq(ib_cq); 980 981 xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); 982 atomic_dec(&chp->refcnt); 983 wait_event(chp->wait, !atomic_read(&chp->refcnt)); 984 985 ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, 986 ibucontext); 987 destroy_cq(&chp->rhp->rdev, &chp->cq, 988 ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, 989 chp->destroy_skb, chp->wr_waitp); 990 c4iw_put_wr_wait(chp->wr_waitp); 991 kfree(chp); 992 return 0; 993 } 994 995 struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, 996 const struct ib_cq_init_attr *attr, 997 struct ib_udata *udata) 998 { 999 int entries = attr->cqe; 1000 int vector = attr->comp_vector; 1001 struct c4iw_dev *rhp; 1002 struct c4iw_cq *chp; 1003 struct c4iw_create_cq ucmd; 1004 struct c4iw_create_cq_resp uresp; 1005 int ret, wr_len; 1006 size_t memsize, hwentries; 1007 struct c4iw_mm_entry *mm, *mm2; 1008 struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context( 1009 udata, struct c4iw_ucontext, ibucontext); 1010 1011 pr_debug("ib_dev %p entries %d\n", ibdev, entries); 1012 if (attr->flags) 1013 return ERR_PTR(-EINVAL); 1014 1015 rhp = to_c4iw_dev(ibdev); 1016 1017 if (vector >= rhp->rdev.lldi.nciq) 1018 return ERR_PTR(-EINVAL); 1019 1020 if (udata) { 1021 if (udata->inlen < sizeof(ucmd)) 1022 ucontext->is_32b_cqe = 1; 1023 } 1024 1025 chp = kzalloc(sizeof(*chp), GFP_KERNEL); 1026 if (!chp) 1027 return ERR_PTR(-ENOMEM); 1028 1029 chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); 1030 if (!chp->wr_waitp) { 1031 ret = -ENOMEM; 1032 goto err_free_chp; 1033 } 1034 c4iw_init_wr_wait(chp->wr_waitp); 1035 1036 wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); 1037 chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); 1038 if (!chp->destroy_skb) { 1039 ret = -ENOMEM; 1040 goto err_free_wr_wait; 1041 } 1042 1043 /* account for the status page. */ 1044 entries++; 1045 1046 /* IQ needs one extra entry to differentiate full vs empty. */ 1047 entries++; 1048 1049 /* 1050 * entries must be multiple of 16 for HW. 1051 */ 1052 entries = roundup(entries, 16); 1053 1054 /* 1055 * Make actual HW queue 2x to avoid cdix_inc overflows. 1056 */ 1057 hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size); 1058 1059 /* 1060 * Make HW queue at least 64 entries so GTS updates aren't too 1061 * frequent. 1062 */ 1063 if (hwentries < 64) 1064 hwentries = 64; 1065 1066 memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ? 1067 (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue)); 1068 1069 /* 1070 * memsize must be a multiple of the page size if its a user cq. 1071 */ 1072 if (udata) 1073 memsize = roundup(memsize, PAGE_SIZE); 1074 1075 chp->cq.size = hwentries; 1076 chp->cq.memsize = memsize; 1077 chp->cq.vector = vector; 1078 1079 ret = create_cq(&rhp->rdev, &chp->cq, 1080 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 1081 chp->wr_waitp); 1082 if (ret) 1083 goto err_free_skb; 1084 1085 chp->rhp = rhp; 1086 chp->cq.size--; /* status page */ 1087 chp->ibcq.cqe = entries - 2; 1088 spin_lock_init(&chp->lock); 1089 spin_lock_init(&chp->comp_handler_lock); 1090 atomic_set(&chp->refcnt, 1); 1091 init_waitqueue_head(&chp->wait); 1092 ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL); 1093 if (ret) 1094 goto err_destroy_cq; 1095 1096 if (ucontext) { 1097 ret = -ENOMEM; 1098 mm = kmalloc(sizeof *mm, GFP_KERNEL); 1099 if (!mm) 1100 goto err_remove_handle; 1101 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); 1102 if (!mm2) 1103 goto err_free_mm; 1104 1105 memset(&uresp, 0, sizeof(uresp)); 1106 uresp.qid_mask = rhp->rdev.cqmask; 1107 uresp.cqid = chp->cq.cqid; 1108 uresp.size = chp->cq.size; 1109 uresp.memsize = chp->cq.memsize; 1110 spin_lock(&ucontext->mmap_lock); 1111 uresp.key = ucontext->key; 1112 ucontext->key += PAGE_SIZE; 1113 uresp.gts_key = ucontext->key; 1114 ucontext->key += PAGE_SIZE; 1115 /* communicate to the userspace that 1116 * kernel driver supports 64B CQE 1117 */ 1118 uresp.flags |= C4IW_64B_CQE; 1119 1120 spin_unlock(&ucontext->mmap_lock); 1121 ret = ib_copy_to_udata(udata, &uresp, 1122 ucontext->is_32b_cqe ? 1123 sizeof(uresp) - sizeof(uresp.flags) : 1124 sizeof(uresp)); 1125 if (ret) 1126 goto err_free_mm2; 1127 1128 mm->key = uresp.key; 1129 mm->addr = virt_to_phys(chp->cq.queue); 1130 mm->len = chp->cq.memsize; 1131 insert_mmap(ucontext, mm); 1132 1133 mm2->key = uresp.gts_key; 1134 mm2->addr = chp->cq.bar2_pa; 1135 mm2->len = PAGE_SIZE; 1136 insert_mmap(ucontext, mm2); 1137 } 1138 pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", 1139 chp->cq.cqid, chp, chp->cq.size, 1140 chp->cq.memsize, (unsigned long long)chp->cq.dma_addr); 1141 return &chp->ibcq; 1142 err_free_mm2: 1143 kfree(mm2); 1144 err_free_mm: 1145 kfree(mm); 1146 err_remove_handle: 1147 xa_erase_irq(&rhp->cqs, chp->cq.cqid); 1148 err_destroy_cq: 1149 destroy_cq(&chp->rhp->rdev, &chp->cq, 1150 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 1151 chp->destroy_skb, chp->wr_waitp); 1152 err_free_skb: 1153 kfree_skb(chp->destroy_skb); 1154 err_free_wr_wait: 1155 c4iw_put_wr_wait(chp->wr_waitp); 1156 err_free_chp: 1157 kfree(chp); 1158 return ERR_PTR(ret); 1159 } 1160 1161 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 1162 { 1163 struct c4iw_cq *chp; 1164 int ret = 0; 1165 unsigned long flag; 1166 1167 chp = to_c4iw_cq(ibcq); 1168 spin_lock_irqsave(&chp->lock, flag); 1169 t4_arm_cq(&chp->cq, 1170 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); 1171 if (flags & IB_CQ_REPORT_MISSED_EVENTS) 1172 ret = t4_cq_notempty(&chp->cq); 1173 spin_unlock_irqrestore(&chp->lock, flag); 1174 return ret; 1175 } 1176 1177 void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx) 1178 { 1179 struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq); 1180 unsigned long flag; 1181 1182 /* locking heirarchy: cq lock first, then qp lock. */ 1183 spin_lock_irqsave(&rchp->lock, flag); 1184 spin_lock(&qhp->lock); 1185 1186 /* create a SRQ RECV CQE for srqidx */ 1187 insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx); 1188 1189 spin_unlock(&qhp->lock); 1190 spin_unlock_irqrestore(&rchp->lock, flag); 1191 } 1192