1 /* 2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include "iw_cxgb4.h" 34 35 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 36 struct c4iw_dev_ucontext *uctx, struct sk_buff *skb, 37 struct c4iw_wr_wait *wr_waitp) 38 { 39 struct fw_ri_res_wr *res_wr; 40 struct fw_ri_res *res; 41 int wr_len; 42 int ret; 43 44 wr_len = sizeof *res_wr + sizeof *res; 45 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 46 47 res_wr = __skb_put_zero(skb, wr_len); 48 res_wr->op_nres = cpu_to_be32( 49 FW_WR_OP_V(FW_RI_RES_WR) | 50 FW_RI_RES_WR_NRES_V(1) | 51 FW_WR_COMPL_F); 52 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 53 res_wr->cookie = (uintptr_t)wr_waitp; 54 res = res_wr->res; 55 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 56 res->u.cq.op = FW_RI_RES_OP_RESET; 57 res->u.cq.iqid = cpu_to_be32(cq->cqid); 58 59 c4iw_init_wr_wait(wr_waitp); 60 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); 61 62 kfree(cq->sw_queue); 63 dma_free_coherent(&(rdev->lldi.pdev->dev), 64 cq->memsize, cq->queue, 65 dma_unmap_addr(cq, mapping)); 66 c4iw_put_cqid(rdev, cq->cqid, uctx); 67 return ret; 68 } 69 70 static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 71 struct c4iw_dev_ucontext *uctx, 72 struct c4iw_wr_wait *wr_waitp) 73 { 74 struct fw_ri_res_wr *res_wr; 75 struct fw_ri_res *res; 76 int wr_len; 77 int user = (uctx != &rdev->uctx); 78 int ret; 79 struct sk_buff *skb; 80 81 cq->cqid = c4iw_get_cqid(rdev, uctx); 82 if (!cq->cqid) { 83 ret = -ENOMEM; 84 goto err1; 85 } 86 87 if (!user) { 88 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); 89 if (!cq->sw_queue) { 90 ret = -ENOMEM; 91 goto err2; 92 } 93 } 94 cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize, 95 &cq->dma_addr, GFP_KERNEL); 96 if (!cq->queue) { 97 ret = -ENOMEM; 98 goto err3; 99 } 100 dma_unmap_addr_set(cq, mapping, cq->dma_addr); 101 memset(cq->queue, 0, cq->memsize); 102 103 /* build fw_ri_res_wr */ 104 wr_len = sizeof *res_wr + sizeof *res; 105 106 skb = alloc_skb(wr_len, GFP_KERNEL); 107 if (!skb) { 108 ret = -ENOMEM; 109 goto err4; 110 } 111 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 112 113 res_wr = __skb_put_zero(skb, wr_len); 114 res_wr->op_nres = cpu_to_be32( 115 FW_WR_OP_V(FW_RI_RES_WR) | 116 FW_RI_RES_WR_NRES_V(1) | 117 FW_WR_COMPL_F); 118 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 119 res_wr->cookie = (uintptr_t)wr_waitp; 120 res = res_wr->res; 121 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 122 res->u.cq.op = FW_RI_RES_OP_WRITE; 123 res->u.cq.iqid = cpu_to_be32(cq->cqid); 124 res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( 125 FW_RI_RES_WR_IQANUS_V(0) | 126 FW_RI_RES_WR_IQANUD_V(1) | 127 FW_RI_RES_WR_IQANDST_F | 128 FW_RI_RES_WR_IQANDSTINDEX_V( 129 rdev->lldi.ciq_ids[cq->vector])); 130 res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( 131 FW_RI_RES_WR_IQDROPRSS_F | 132 FW_RI_RES_WR_IQPCIECH_V(2) | 133 FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | 134 FW_RI_RES_WR_IQO_F | 135 FW_RI_RES_WR_IQESIZE_V(1)); 136 res->u.cq.iqsize = cpu_to_be16(cq->size); 137 res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); 138 139 c4iw_init_wr_wait(wr_waitp); 140 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); 141 if (ret) 142 goto err4; 143 144 cq->gen = 1; 145 cq->gts = rdev->lldi.gts_reg; 146 cq->rdev = rdev; 147 148 cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, 149 &cq->bar2_qid, 150 user ? &cq->bar2_pa : NULL); 151 if (user && !cq->bar2_pa) { 152 pr_warn("%s: cqid %u not in BAR2 range\n", 153 pci_name(rdev->lldi.pdev), cq->cqid); 154 ret = -EINVAL; 155 goto err4; 156 } 157 return 0; 158 err4: 159 dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, 160 dma_unmap_addr(cq, mapping)); 161 err3: 162 kfree(cq->sw_queue); 163 err2: 164 c4iw_put_cqid(rdev, cq->cqid, uctx); 165 err1: 166 return ret; 167 } 168 169 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) 170 { 171 struct t4_cqe cqe; 172 173 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", 174 wq, cq, cq->sw_cidx, cq->sw_pidx); 175 memset(&cqe, 0, sizeof(cqe)); 176 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 177 CQE_OPCODE_V(FW_RI_SEND) | 178 CQE_TYPE_V(0) | 179 CQE_SWCQE_V(1) | 180 CQE_QPID_V(wq->sq.qid)); 181 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 182 cq->sw_queue[cq->sw_pidx] = cqe; 183 t4_swcq_produce(cq); 184 } 185 186 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) 187 { 188 int flushed = 0; 189 int in_use = wq->rq.in_use - count; 190 191 pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", 192 wq, cq, wq->rq.in_use, count); 193 while (in_use--) { 194 insert_recv_cqe(wq, cq); 195 flushed++; 196 } 197 return flushed; 198 } 199 200 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, 201 struct t4_swsqe *swcqe) 202 { 203 struct t4_cqe cqe; 204 205 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", 206 wq, cq, cq->sw_cidx, cq->sw_pidx); 207 memset(&cqe, 0, sizeof(cqe)); 208 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 209 CQE_OPCODE_V(swcqe->opcode) | 210 CQE_TYPE_V(1) | 211 CQE_SWCQE_V(1) | 212 CQE_QPID_V(wq->sq.qid)); 213 CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; 214 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 215 cq->sw_queue[cq->sw_pidx] = cqe; 216 t4_swcq_produce(cq); 217 } 218 219 static void advance_oldest_read(struct t4_wq *wq); 220 221 int c4iw_flush_sq(struct c4iw_qp *qhp) 222 { 223 int flushed = 0; 224 struct t4_wq *wq = &qhp->wq; 225 struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); 226 struct t4_cq *cq = &chp->cq; 227 int idx; 228 struct t4_swsqe *swsqe; 229 230 if (wq->sq.flush_cidx == -1) 231 wq->sq.flush_cidx = wq->sq.cidx; 232 idx = wq->sq.flush_cidx; 233 while (idx != wq->sq.pidx) { 234 swsqe = &wq->sq.sw_sq[idx]; 235 swsqe->flushed = 1; 236 insert_sq_cqe(wq, cq, swsqe); 237 if (wq->sq.oldest_read == swsqe) { 238 advance_oldest_read(wq); 239 } 240 flushed++; 241 if (++idx == wq->sq.size) 242 idx = 0; 243 } 244 wq->sq.flush_cidx += flushed; 245 if (wq->sq.flush_cidx >= wq->sq.size) 246 wq->sq.flush_cidx -= wq->sq.size; 247 return flushed; 248 } 249 250 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) 251 { 252 struct t4_swsqe *swsqe; 253 int cidx; 254 255 if (wq->sq.flush_cidx == -1) 256 wq->sq.flush_cidx = wq->sq.cidx; 257 cidx = wq->sq.flush_cidx; 258 259 while (cidx != wq->sq.pidx) { 260 swsqe = &wq->sq.sw_sq[cidx]; 261 if (!swsqe->signaled) { 262 if (++cidx == wq->sq.size) 263 cidx = 0; 264 } else if (swsqe->complete) { 265 266 /* 267 * Insert this completed cqe into the swcq. 268 */ 269 pr_debug("moving cqe into swcq sq idx %u cq idx %u\n", 270 cidx, cq->sw_pidx); 271 swsqe->cqe.header |= htonl(CQE_SWCQE_V(1)); 272 cq->sw_queue[cq->sw_pidx] = swsqe->cqe; 273 t4_swcq_produce(cq); 274 swsqe->flushed = 1; 275 if (++cidx == wq->sq.size) 276 cidx = 0; 277 wq->sq.flush_cidx = cidx; 278 } else 279 break; 280 } 281 } 282 283 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, 284 struct t4_cqe *read_cqe) 285 { 286 read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; 287 read_cqe->len = htonl(wq->sq.oldest_read->read_len); 288 read_cqe->header = htonl(CQE_QPID_V(CQE_QPID(hw_cqe)) | 289 CQE_SWCQE_V(SW_CQE(hw_cqe)) | 290 CQE_OPCODE_V(FW_RI_READ_REQ) | 291 CQE_TYPE_V(1)); 292 read_cqe->bits_type_ts = hw_cqe->bits_type_ts; 293 } 294 295 static void advance_oldest_read(struct t4_wq *wq) 296 { 297 298 u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; 299 300 if (rptr == wq->sq.size) 301 rptr = 0; 302 while (rptr != wq->sq.pidx) { 303 wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; 304 305 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) 306 return; 307 if (++rptr == wq->sq.size) 308 rptr = 0; 309 } 310 wq->sq.oldest_read = NULL; 311 } 312 313 /* 314 * Move all CQEs from the HWCQ into the SWCQ. 315 * Deal with out-of-order and/or completions that complete 316 * prior unsignalled WRs. 317 */ 318 void c4iw_flush_hw_cq(struct c4iw_cq *chp) 319 { 320 struct t4_cqe *hw_cqe, *swcqe, read_cqe; 321 struct c4iw_qp *qhp; 322 struct t4_swsqe *swsqe; 323 int ret; 324 325 pr_debug("cqid 0x%x\n", chp->cq.cqid); 326 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 327 328 /* 329 * This logic is similar to poll_cq(), but not quite the same 330 * unfortunately. Need to move pertinent HW CQEs to the SW CQ but 331 * also do any translation magic that poll_cq() normally does. 332 */ 333 while (!ret) { 334 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); 335 336 /* 337 * drop CQEs with no associated QP 338 */ 339 if (qhp == NULL) 340 goto next_cqe; 341 342 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) 343 goto next_cqe; 344 345 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { 346 347 /* If we have reached here because of async 348 * event or other error, and have egress error 349 * then drop 350 */ 351 if (CQE_TYPE(hw_cqe) == 1) 352 goto next_cqe; 353 354 /* drop peer2peer RTR reads. 355 */ 356 if (CQE_WRID_STAG(hw_cqe) == 1) 357 goto next_cqe; 358 359 /* 360 * Eat completions for unsignaled read WRs. 361 */ 362 if (!qhp->wq.sq.oldest_read->signaled) { 363 advance_oldest_read(&qhp->wq); 364 goto next_cqe; 365 } 366 367 /* 368 * Don't write to the HWCQ, create a new read req CQE 369 * in local memory and move it into the swcq. 370 */ 371 create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); 372 hw_cqe = &read_cqe; 373 advance_oldest_read(&qhp->wq); 374 } 375 376 /* if its a SQ completion, then do the magic to move all the 377 * unsignaled and now in-order completions into the swcq. 378 */ 379 if (SQ_TYPE(hw_cqe)) { 380 swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 381 swsqe->cqe = *hw_cqe; 382 swsqe->complete = 1; 383 flush_completed_wrs(&qhp->wq, &chp->cq); 384 } else { 385 swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; 386 *swcqe = *hw_cqe; 387 swcqe->header |= cpu_to_be32(CQE_SWCQE_V(1)); 388 t4_swcq_produce(&chp->cq); 389 } 390 next_cqe: 391 t4_hwcq_consume(&chp->cq); 392 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 393 } 394 } 395 396 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) 397 { 398 if (DRAIN_CQE(cqe)) { 399 WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); 400 return 0; 401 } 402 403 if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) 404 return 0; 405 406 if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) 407 return 0; 408 409 if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) 410 return 0; 411 412 if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) 413 return 0; 414 return 1; 415 } 416 417 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) 418 { 419 struct t4_cqe *cqe; 420 u32 ptr; 421 422 *count = 0; 423 pr_debug("count zero %d\n", *count); 424 ptr = cq->sw_cidx; 425 while (ptr != cq->sw_pidx) { 426 cqe = &cq->sw_queue[ptr]; 427 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && 428 (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) 429 (*count)++; 430 if (++ptr == cq->size) 431 ptr = 0; 432 } 433 pr_debug("cq %p count %d\n", cq, *count); 434 } 435 436 /* 437 * poll_cq 438 * 439 * Caller must: 440 * check the validity of the first CQE, 441 * supply the wq assicated with the qpid. 442 * 443 * credit: cq credit to return to sge. 444 * cqe_flushed: 1 iff the CQE is flushed. 445 * cqe: copy of the polled CQE. 446 * 447 * return value: 448 * 0 CQE returned ok. 449 * -EAGAIN CQE skipped, try again. 450 * -EOVERFLOW CQ overflow detected. 451 */ 452 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, 453 u8 *cqe_flushed, u64 *cookie, u32 *credit) 454 { 455 int ret = 0; 456 struct t4_cqe *hw_cqe, read_cqe; 457 458 *cqe_flushed = 0; 459 *credit = 0; 460 ret = t4_next_cqe(cq, &hw_cqe); 461 if (ret) 462 return ret; 463 464 pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", 465 CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), 466 CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe), 467 CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), 468 CQE_WRID_LOW(hw_cqe)); 469 470 /* 471 * skip cqe's not affiliated with a QP. 472 */ 473 if (wq == NULL) { 474 ret = -EAGAIN; 475 goto skip_cqe; 476 } 477 478 /* 479 * skip hw cqe's if the wq is flushed. 480 */ 481 if (wq->flushed && !SW_CQE(hw_cqe)) { 482 ret = -EAGAIN; 483 goto skip_cqe; 484 } 485 486 /* 487 * skip TERMINATE cqes... 488 */ 489 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { 490 ret = -EAGAIN; 491 goto skip_cqe; 492 } 493 494 /* 495 * Special cqe for drain WR completions... 496 */ 497 if (DRAIN_CQE(hw_cqe)) { 498 *cookie = CQE_DRAIN_COOKIE(hw_cqe); 499 *cqe = *hw_cqe; 500 goto skip_cqe; 501 } 502 503 /* 504 * Gotta tweak READ completions: 505 * 1) the cqe doesn't contain the sq_wptr from the wr. 506 * 2) opcode not reflected from the wr. 507 * 3) read_len not reflected from the wr. 508 * 4) cq_type is RQ_TYPE not SQ_TYPE. 509 */ 510 if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { 511 512 /* If we have reached here because of async 513 * event or other error, and have egress error 514 * then drop 515 */ 516 if (CQE_TYPE(hw_cqe) == 1) { 517 if (CQE_STATUS(hw_cqe)) 518 t4_set_wq_in_error(wq); 519 ret = -EAGAIN; 520 goto skip_cqe; 521 } 522 523 /* If this is an unsolicited read response, then the read 524 * was generated by the kernel driver as part of peer-2-peer 525 * connection setup. So ignore the completion. 526 */ 527 if (CQE_WRID_STAG(hw_cqe) == 1) { 528 if (CQE_STATUS(hw_cqe)) 529 t4_set_wq_in_error(wq); 530 ret = -EAGAIN; 531 goto skip_cqe; 532 } 533 534 /* 535 * Eat completions for unsignaled read WRs. 536 */ 537 if (!wq->sq.oldest_read->signaled) { 538 advance_oldest_read(wq); 539 ret = -EAGAIN; 540 goto skip_cqe; 541 } 542 543 /* 544 * Don't write to the HWCQ, so create a new read req CQE 545 * in local memory. 546 */ 547 create_read_req_cqe(wq, hw_cqe, &read_cqe); 548 hw_cqe = &read_cqe; 549 advance_oldest_read(wq); 550 } 551 552 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { 553 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); 554 t4_set_wq_in_error(wq); 555 } 556 557 /* 558 * RECV completion. 559 */ 560 if (RQ_TYPE(hw_cqe)) { 561 562 /* 563 * HW only validates 4 bits of MSN. So we must validate that 564 * the MSN in the SEND is the next expected MSN. If its not, 565 * then we complete this with T4_ERR_MSN and mark the wq in 566 * error. 567 */ 568 569 if (t4_rq_empty(wq)) { 570 t4_set_wq_in_error(wq); 571 ret = -EAGAIN; 572 goto skip_cqe; 573 } 574 if (unlikely(!CQE_STATUS(hw_cqe) && 575 CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { 576 t4_set_wq_in_error(wq); 577 hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); 578 } 579 goto proc_cqe; 580 } 581 582 /* 583 * If we get here its a send completion. 584 * 585 * Handle out of order completion. These get stuffed 586 * in the SW SQ. Then the SW SQ is walked to move any 587 * now in-order completions into the SW CQ. This handles 588 * 2 cases: 589 * 1) reaping unsignaled WRs when the first subsequent 590 * signaled WR is completed. 591 * 2) out of order read completions. 592 */ 593 if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { 594 struct t4_swsqe *swsqe; 595 596 pr_debug("out of order completion going in sw_sq at idx %u\n", 597 CQE_WRID_SQ_IDX(hw_cqe)); 598 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 599 swsqe->cqe = *hw_cqe; 600 swsqe->complete = 1; 601 ret = -EAGAIN; 602 goto flush_wq; 603 } 604 605 proc_cqe: 606 *cqe = *hw_cqe; 607 608 /* 609 * Reap the associated WR(s) that are freed up with this 610 * completion. 611 */ 612 if (SQ_TYPE(hw_cqe)) { 613 int idx = CQE_WRID_SQ_IDX(hw_cqe); 614 615 /* 616 * Account for any unsignaled completions completed by 617 * this signaled completion. In this case, cidx points 618 * to the first unsignaled one, and idx points to the 619 * signaled one. So adjust in_use based on this delta. 620 * if this is not completing any unsigned wrs, then the 621 * delta will be 0. Handle wrapping also! 622 */ 623 if (idx < wq->sq.cidx) 624 wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; 625 else 626 wq->sq.in_use -= idx - wq->sq.cidx; 627 628 wq->sq.cidx = (uint16_t)idx; 629 pr_debug("completing sq idx %u\n", wq->sq.cidx); 630 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; 631 if (c4iw_wr_log) 632 c4iw_log_wr_stats(wq, hw_cqe); 633 t4_sq_consume(wq); 634 } else { 635 pr_debug("completing rq idx %u\n", wq->rq.cidx); 636 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; 637 if (c4iw_wr_log) 638 c4iw_log_wr_stats(wq, hw_cqe); 639 t4_rq_consume(wq); 640 goto skip_cqe; 641 } 642 643 flush_wq: 644 /* 645 * Flush any completed cqes that are now in-order. 646 */ 647 flush_completed_wrs(wq, cq); 648 649 skip_cqe: 650 if (SW_CQE(hw_cqe)) { 651 pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n", 652 cq, cq->cqid, cq->sw_cidx); 653 t4_swcq_consume(cq); 654 } else { 655 pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n", 656 cq, cq->cqid, cq->cidx); 657 t4_hwcq_consume(cq); 658 } 659 return ret; 660 } 661 662 /* 663 * Get one cq entry from c4iw and map it to openib. 664 * 665 * Returns: 666 * 0 cqe returned 667 * -ENODATA EMPTY; 668 * -EAGAIN caller must try again 669 * any other -errno fatal error 670 */ 671 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) 672 { 673 struct c4iw_qp *qhp = NULL; 674 struct t4_cqe uninitialized_var(cqe), *rd_cqe; 675 struct t4_wq *wq; 676 u32 credit = 0; 677 u8 cqe_flushed; 678 u64 cookie = 0; 679 int ret; 680 681 ret = t4_next_cqe(&chp->cq, &rd_cqe); 682 683 if (ret) 684 return ret; 685 686 qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); 687 if (!qhp) 688 wq = NULL; 689 else { 690 spin_lock(&qhp->lock); 691 wq = &(qhp->wq); 692 } 693 ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); 694 if (ret) 695 goto out; 696 697 wc->wr_id = cookie; 698 wc->qp = &qhp->ibqp; 699 wc->vendor_err = CQE_STATUS(&cqe); 700 wc->wc_flags = 0; 701 702 pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", 703 CQE_QPID(&cqe), 704 CQE_TYPE(&cqe), CQE_OPCODE(&cqe), 705 CQE_STATUS(&cqe), CQE_LEN(&cqe), 706 CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), 707 (unsigned long long)cookie); 708 709 if (CQE_TYPE(&cqe) == 0) { 710 if (!CQE_STATUS(&cqe)) 711 wc->byte_len = CQE_LEN(&cqe); 712 else 713 wc->byte_len = 0; 714 wc->opcode = IB_WC_RECV; 715 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || 716 CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { 717 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 718 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 719 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 720 } 721 } else { 722 switch (CQE_OPCODE(&cqe)) { 723 case FW_RI_RDMA_WRITE: 724 wc->opcode = IB_WC_RDMA_WRITE; 725 break; 726 case FW_RI_READ_REQ: 727 wc->opcode = IB_WC_RDMA_READ; 728 wc->byte_len = CQE_LEN(&cqe); 729 break; 730 case FW_RI_SEND_WITH_INV: 731 case FW_RI_SEND_WITH_SE_INV: 732 wc->opcode = IB_WC_SEND; 733 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 734 break; 735 case FW_RI_SEND: 736 case FW_RI_SEND_WITH_SE: 737 wc->opcode = IB_WC_SEND; 738 break; 739 740 case FW_RI_LOCAL_INV: 741 wc->opcode = IB_WC_LOCAL_INV; 742 break; 743 case FW_RI_FAST_REGISTER: 744 wc->opcode = IB_WC_REG_MR; 745 746 /* Invalidate the MR if the fastreg failed */ 747 if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) 748 c4iw_invalidate_mr(qhp->rhp, 749 CQE_WRID_FR_STAG(&cqe)); 750 break; 751 default: 752 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", 753 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 754 ret = -EINVAL; 755 goto out; 756 } 757 } 758 759 if (cqe_flushed) 760 wc->status = IB_WC_WR_FLUSH_ERR; 761 else { 762 763 switch (CQE_STATUS(&cqe)) { 764 case T4_ERR_SUCCESS: 765 wc->status = IB_WC_SUCCESS; 766 break; 767 case T4_ERR_STAG: 768 wc->status = IB_WC_LOC_ACCESS_ERR; 769 break; 770 case T4_ERR_PDID: 771 wc->status = IB_WC_LOC_PROT_ERR; 772 break; 773 case T4_ERR_QPID: 774 case T4_ERR_ACCESS: 775 wc->status = IB_WC_LOC_ACCESS_ERR; 776 break; 777 case T4_ERR_WRAP: 778 wc->status = IB_WC_GENERAL_ERR; 779 break; 780 case T4_ERR_BOUND: 781 wc->status = IB_WC_LOC_LEN_ERR; 782 break; 783 case T4_ERR_INVALIDATE_SHARED_MR: 784 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: 785 wc->status = IB_WC_MW_BIND_ERR; 786 break; 787 case T4_ERR_CRC: 788 case T4_ERR_MARKER: 789 case T4_ERR_PDU_LEN_ERR: 790 case T4_ERR_OUT_OF_RQE: 791 case T4_ERR_DDP_VERSION: 792 case T4_ERR_RDMA_VERSION: 793 case T4_ERR_DDP_QUEUE_NUM: 794 case T4_ERR_MSN: 795 case T4_ERR_TBIT: 796 case T4_ERR_MO: 797 case T4_ERR_MSN_RANGE: 798 case T4_ERR_IRD_OVERFLOW: 799 case T4_ERR_OPCODE: 800 case T4_ERR_INTERNAL_ERR: 801 wc->status = IB_WC_FATAL_ERR; 802 break; 803 case T4_ERR_SWFLUSH: 804 wc->status = IB_WC_WR_FLUSH_ERR; 805 break; 806 default: 807 pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", 808 CQE_STATUS(&cqe), CQE_QPID(&cqe)); 809 wc->status = IB_WC_FATAL_ERR; 810 } 811 } 812 out: 813 if (wq) 814 spin_unlock(&qhp->lock); 815 return ret; 816 } 817 818 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 819 { 820 struct c4iw_cq *chp; 821 unsigned long flags; 822 int npolled; 823 int err = 0; 824 825 chp = to_c4iw_cq(ibcq); 826 827 spin_lock_irqsave(&chp->lock, flags); 828 for (npolled = 0; npolled < num_entries; ++npolled) { 829 do { 830 err = c4iw_poll_cq_one(chp, wc + npolled); 831 } while (err == -EAGAIN); 832 if (err) 833 break; 834 } 835 spin_unlock_irqrestore(&chp->lock, flags); 836 return !err || err == -ENODATA ? npolled : err; 837 } 838 839 int c4iw_destroy_cq(struct ib_cq *ib_cq) 840 { 841 struct c4iw_cq *chp; 842 struct c4iw_ucontext *ucontext; 843 844 pr_debug("ib_cq %p\n", ib_cq); 845 chp = to_c4iw_cq(ib_cq); 846 847 remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); 848 atomic_dec(&chp->refcnt); 849 wait_event(chp->wait, !atomic_read(&chp->refcnt)); 850 851 ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) 852 : NULL; 853 destroy_cq(&chp->rhp->rdev, &chp->cq, 854 ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, 855 chp->destroy_skb, chp->wr_waitp); 856 c4iw_put_wr_wait(chp->wr_waitp); 857 kfree(chp); 858 return 0; 859 } 860 861 struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, 862 const struct ib_cq_init_attr *attr, 863 struct ib_ucontext *ib_context, 864 struct ib_udata *udata) 865 { 866 int entries = attr->cqe; 867 int vector = attr->comp_vector; 868 struct c4iw_dev *rhp; 869 struct c4iw_cq *chp; 870 struct c4iw_create_cq_resp uresp; 871 struct c4iw_ucontext *ucontext = NULL; 872 int ret, wr_len; 873 size_t memsize, hwentries; 874 struct c4iw_mm_entry *mm, *mm2; 875 876 pr_debug("ib_dev %p entries %d\n", ibdev, entries); 877 if (attr->flags) 878 return ERR_PTR(-EINVAL); 879 880 rhp = to_c4iw_dev(ibdev); 881 882 if (vector >= rhp->rdev.lldi.nciq) 883 return ERR_PTR(-EINVAL); 884 885 chp = kzalloc(sizeof(*chp), GFP_KERNEL); 886 if (!chp) 887 return ERR_PTR(-ENOMEM); 888 chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); 889 if (!chp->wr_waitp) { 890 ret = -ENOMEM; 891 goto err_free_chp; 892 } 893 c4iw_init_wr_wait(chp->wr_waitp); 894 895 wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); 896 chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); 897 if (!chp->destroy_skb) { 898 ret = -ENOMEM; 899 goto err_free_wr_wait; 900 } 901 902 if (ib_context) 903 ucontext = to_c4iw_ucontext(ib_context); 904 905 /* account for the status page. */ 906 entries++; 907 908 /* IQ needs one extra entry to differentiate full vs empty. */ 909 entries++; 910 911 /* 912 * entries must be multiple of 16 for HW. 913 */ 914 entries = roundup(entries, 16); 915 916 /* 917 * Make actual HW queue 2x to avoid cdix_inc overflows. 918 */ 919 hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size); 920 921 /* 922 * Make HW queue at least 64 entries so GTS updates aren't too 923 * frequent. 924 */ 925 if (hwentries < 64) 926 hwentries = 64; 927 928 memsize = hwentries * sizeof *chp->cq.queue; 929 930 /* 931 * memsize must be a multiple of the page size if its a user cq. 932 */ 933 if (ucontext) 934 memsize = roundup(memsize, PAGE_SIZE); 935 chp->cq.size = hwentries; 936 chp->cq.memsize = memsize; 937 chp->cq.vector = vector; 938 939 ret = create_cq(&rhp->rdev, &chp->cq, 940 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 941 chp->wr_waitp); 942 if (ret) 943 goto err_free_skb; 944 945 chp->rhp = rhp; 946 chp->cq.size--; /* status page */ 947 chp->ibcq.cqe = entries - 2; 948 spin_lock_init(&chp->lock); 949 spin_lock_init(&chp->comp_handler_lock); 950 atomic_set(&chp->refcnt, 1); 951 init_waitqueue_head(&chp->wait); 952 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); 953 if (ret) 954 goto err_destroy_cq; 955 956 if (ucontext) { 957 ret = -ENOMEM; 958 mm = kmalloc(sizeof *mm, GFP_KERNEL); 959 if (!mm) 960 goto err_remove_handle; 961 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); 962 if (!mm2) 963 goto err_free_mm; 964 965 uresp.qid_mask = rhp->rdev.cqmask; 966 uresp.cqid = chp->cq.cqid; 967 uresp.size = chp->cq.size; 968 uresp.memsize = chp->cq.memsize; 969 spin_lock(&ucontext->mmap_lock); 970 uresp.key = ucontext->key; 971 ucontext->key += PAGE_SIZE; 972 uresp.gts_key = ucontext->key; 973 ucontext->key += PAGE_SIZE; 974 spin_unlock(&ucontext->mmap_lock); 975 ret = ib_copy_to_udata(udata, &uresp, 976 sizeof(uresp) - sizeof(uresp.reserved)); 977 if (ret) 978 goto err_free_mm2; 979 980 mm->key = uresp.key; 981 mm->addr = virt_to_phys(chp->cq.queue); 982 mm->len = chp->cq.memsize; 983 insert_mmap(ucontext, mm); 984 985 mm2->key = uresp.gts_key; 986 mm2->addr = chp->cq.bar2_pa; 987 mm2->len = PAGE_SIZE; 988 insert_mmap(ucontext, mm2); 989 } 990 pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", 991 chp->cq.cqid, chp, chp->cq.size, 992 chp->cq.memsize, (unsigned long long)chp->cq.dma_addr); 993 return &chp->ibcq; 994 err_free_mm2: 995 kfree(mm2); 996 err_free_mm: 997 kfree(mm); 998 err_remove_handle: 999 remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); 1000 err_destroy_cq: 1001 destroy_cq(&chp->rhp->rdev, &chp->cq, 1002 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 1003 chp->destroy_skb, chp->wr_waitp); 1004 err_free_skb: 1005 kfree_skb(chp->destroy_skb); 1006 err_free_wr_wait: 1007 c4iw_put_wr_wait(chp->wr_waitp); 1008 err_free_chp: 1009 kfree(chp); 1010 return ERR_PTR(ret); 1011 } 1012 1013 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) 1014 { 1015 return -ENOSYS; 1016 } 1017 1018 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 1019 { 1020 struct c4iw_cq *chp; 1021 int ret = 0; 1022 unsigned long flag; 1023 1024 chp = to_c4iw_cq(ibcq); 1025 spin_lock_irqsave(&chp->lock, flag); 1026 t4_arm_cq(&chp->cq, 1027 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); 1028 if (flags & IB_CQ_REPORT_MISSED_EVENTS) 1029 ret = t4_cq_notempty(&chp->cq); 1030 spin_unlock_irqrestore(&chp->lock, flag); 1031 return ret; 1032 } 1033