1 /* 2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include "iw_cxgb4.h" 34 35 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 36 struct c4iw_dev_ucontext *uctx, struct sk_buff *skb, 37 struct c4iw_wr_wait *wr_waitp) 38 { 39 struct fw_ri_res_wr *res_wr; 40 struct fw_ri_res *res; 41 int wr_len; 42 int ret; 43 44 wr_len = sizeof *res_wr + sizeof *res; 45 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 46 47 res_wr = __skb_put_zero(skb, wr_len); 48 res_wr->op_nres = cpu_to_be32( 49 FW_WR_OP_V(FW_RI_RES_WR) | 50 FW_RI_RES_WR_NRES_V(1) | 51 FW_WR_COMPL_F); 52 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 53 res_wr->cookie = (uintptr_t)wr_waitp; 54 res = res_wr->res; 55 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 56 res->u.cq.op = FW_RI_RES_OP_RESET; 57 res->u.cq.iqid = cpu_to_be32(cq->cqid); 58 59 c4iw_init_wr_wait(wr_waitp); 60 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); 61 62 kfree(cq->sw_queue); 63 dma_free_coherent(&(rdev->lldi.pdev->dev), 64 cq->memsize, cq->queue, 65 dma_unmap_addr(cq, mapping)); 66 c4iw_put_cqid(rdev, cq->cqid, uctx); 67 return ret; 68 } 69 70 static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 71 struct c4iw_dev_ucontext *uctx, 72 struct c4iw_wr_wait *wr_waitp) 73 { 74 struct fw_ri_res_wr *res_wr; 75 struct fw_ri_res *res; 76 int wr_len; 77 int user = (uctx != &rdev->uctx); 78 int ret; 79 struct sk_buff *skb; 80 81 cq->cqid = c4iw_get_cqid(rdev, uctx); 82 if (!cq->cqid) { 83 ret = -ENOMEM; 84 goto err1; 85 } 86 87 if (!user) { 88 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); 89 if (!cq->sw_queue) { 90 ret = -ENOMEM; 91 goto err2; 92 } 93 } 94 cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize, 95 &cq->dma_addr, GFP_KERNEL); 96 if (!cq->queue) { 97 ret = -ENOMEM; 98 goto err3; 99 } 100 dma_unmap_addr_set(cq, mapping, cq->dma_addr); 101 memset(cq->queue, 0, cq->memsize); 102 103 /* build fw_ri_res_wr */ 104 wr_len = sizeof *res_wr + sizeof *res; 105 106 skb = alloc_skb(wr_len, GFP_KERNEL); 107 if (!skb) { 108 ret = -ENOMEM; 109 goto err4; 110 } 111 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 112 113 res_wr = __skb_put_zero(skb, wr_len); 114 res_wr->op_nres = cpu_to_be32( 115 FW_WR_OP_V(FW_RI_RES_WR) | 116 FW_RI_RES_WR_NRES_V(1) | 117 FW_WR_COMPL_F); 118 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 119 res_wr->cookie = (uintptr_t)wr_waitp; 120 res = res_wr->res; 121 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 122 res->u.cq.op = FW_RI_RES_OP_WRITE; 123 res->u.cq.iqid = cpu_to_be32(cq->cqid); 124 res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( 125 FW_RI_RES_WR_IQANUS_V(0) | 126 FW_RI_RES_WR_IQANUD_V(1) | 127 FW_RI_RES_WR_IQANDST_F | 128 FW_RI_RES_WR_IQANDSTINDEX_V( 129 rdev->lldi.ciq_ids[cq->vector])); 130 res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( 131 FW_RI_RES_WR_IQDROPRSS_F | 132 FW_RI_RES_WR_IQPCIECH_V(2) | 133 FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | 134 FW_RI_RES_WR_IQO_F | 135 FW_RI_RES_WR_IQESIZE_V(1)); 136 res->u.cq.iqsize = cpu_to_be16(cq->size); 137 res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); 138 139 c4iw_init_wr_wait(wr_waitp); 140 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); 141 if (ret) 142 goto err4; 143 144 cq->gen = 1; 145 cq->gts = rdev->lldi.gts_reg; 146 cq->rdev = rdev; 147 148 cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, 149 &cq->bar2_qid, 150 user ? &cq->bar2_pa : NULL); 151 if (user && !cq->bar2_pa) { 152 pr_warn("%s: cqid %u not in BAR2 range\n", 153 pci_name(rdev->lldi.pdev), cq->cqid); 154 ret = -EINVAL; 155 goto err4; 156 } 157 return 0; 158 err4: 159 dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, 160 dma_unmap_addr(cq, mapping)); 161 err3: 162 kfree(cq->sw_queue); 163 err2: 164 c4iw_put_cqid(rdev, cq->cqid, uctx); 165 err1: 166 return ret; 167 } 168 169 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) 170 { 171 struct t4_cqe cqe; 172 173 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", 174 wq, cq, cq->sw_cidx, cq->sw_pidx); 175 memset(&cqe, 0, sizeof(cqe)); 176 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 177 CQE_OPCODE_V(FW_RI_SEND) | 178 CQE_TYPE_V(0) | 179 CQE_SWCQE_V(1) | 180 CQE_QPID_V(wq->sq.qid)); 181 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 182 cq->sw_queue[cq->sw_pidx] = cqe; 183 t4_swcq_produce(cq); 184 } 185 186 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) 187 { 188 int flushed = 0; 189 int in_use = wq->rq.in_use - count; 190 191 pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", 192 wq, cq, wq->rq.in_use, count); 193 while (in_use--) { 194 insert_recv_cqe(wq, cq); 195 flushed++; 196 } 197 return flushed; 198 } 199 200 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, 201 struct t4_swsqe *swcqe) 202 { 203 struct t4_cqe cqe; 204 205 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", 206 wq, cq, cq->sw_cidx, cq->sw_pidx); 207 memset(&cqe, 0, sizeof(cqe)); 208 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 209 CQE_OPCODE_V(swcqe->opcode) | 210 CQE_TYPE_V(1) | 211 CQE_SWCQE_V(1) | 212 CQE_QPID_V(wq->sq.qid)); 213 CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; 214 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 215 cq->sw_queue[cq->sw_pidx] = cqe; 216 t4_swcq_produce(cq); 217 } 218 219 static void advance_oldest_read(struct t4_wq *wq); 220 221 int c4iw_flush_sq(struct c4iw_qp *qhp) 222 { 223 int flushed = 0; 224 struct t4_wq *wq = &qhp->wq; 225 struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); 226 struct t4_cq *cq = &chp->cq; 227 int idx; 228 struct t4_swsqe *swsqe; 229 230 if (wq->sq.flush_cidx == -1) 231 wq->sq.flush_cidx = wq->sq.cidx; 232 idx = wq->sq.flush_cidx; 233 while (idx != wq->sq.pidx) { 234 swsqe = &wq->sq.sw_sq[idx]; 235 swsqe->flushed = 1; 236 insert_sq_cqe(wq, cq, swsqe); 237 if (wq->sq.oldest_read == swsqe) { 238 advance_oldest_read(wq); 239 } 240 flushed++; 241 if (++idx == wq->sq.size) 242 idx = 0; 243 } 244 wq->sq.flush_cidx += flushed; 245 if (wq->sq.flush_cidx >= wq->sq.size) 246 wq->sq.flush_cidx -= wq->sq.size; 247 return flushed; 248 } 249 250 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) 251 { 252 struct t4_swsqe *swsqe; 253 int cidx; 254 255 if (wq->sq.flush_cidx == -1) 256 wq->sq.flush_cidx = wq->sq.cidx; 257 cidx = wq->sq.flush_cidx; 258 259 while (cidx != wq->sq.pidx) { 260 swsqe = &wq->sq.sw_sq[cidx]; 261 if (!swsqe->signaled) { 262 if (++cidx == wq->sq.size) 263 cidx = 0; 264 } else if (swsqe->complete) { 265 266 /* 267 * Insert this completed cqe into the swcq. 268 */ 269 pr_debug("moving cqe into swcq sq idx %u cq idx %u\n", 270 cidx, cq->sw_pidx); 271 swsqe->cqe.header |= htonl(CQE_SWCQE_V(1)); 272 cq->sw_queue[cq->sw_pidx] = swsqe->cqe; 273 t4_swcq_produce(cq); 274 swsqe->flushed = 1; 275 if (++cidx == wq->sq.size) 276 cidx = 0; 277 wq->sq.flush_cidx = cidx; 278 } else 279 break; 280 } 281 } 282 283 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, 284 struct t4_cqe *read_cqe) 285 { 286 read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; 287 read_cqe->len = htonl(wq->sq.oldest_read->read_len); 288 read_cqe->header = htonl(CQE_QPID_V(CQE_QPID(hw_cqe)) | 289 CQE_SWCQE_V(SW_CQE(hw_cqe)) | 290 CQE_OPCODE_V(FW_RI_READ_REQ) | 291 CQE_TYPE_V(1)); 292 read_cqe->bits_type_ts = hw_cqe->bits_type_ts; 293 } 294 295 static void advance_oldest_read(struct t4_wq *wq) 296 { 297 298 u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; 299 300 if (rptr == wq->sq.size) 301 rptr = 0; 302 while (rptr != wq->sq.pidx) { 303 wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; 304 305 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) 306 return; 307 if (++rptr == wq->sq.size) 308 rptr = 0; 309 } 310 wq->sq.oldest_read = NULL; 311 } 312 313 /* 314 * Move all CQEs from the HWCQ into the SWCQ. 315 * Deal with out-of-order and/or completions that complete 316 * prior unsignalled WRs. 317 */ 318 void c4iw_flush_hw_cq(struct c4iw_cq *chp) 319 { 320 struct t4_cqe *hw_cqe, *swcqe, read_cqe; 321 struct c4iw_qp *qhp; 322 struct t4_swsqe *swsqe; 323 int ret; 324 325 pr_debug("cqid 0x%x\n", chp->cq.cqid); 326 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 327 328 /* 329 * This logic is similar to poll_cq(), but not quite the same 330 * unfortunately. Need to move pertinent HW CQEs to the SW CQ but 331 * also do any translation magic that poll_cq() normally does. 332 */ 333 while (!ret) { 334 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); 335 336 /* 337 * drop CQEs with no associated QP 338 */ 339 if (qhp == NULL) 340 goto next_cqe; 341 342 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) 343 goto next_cqe; 344 345 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { 346 347 /* If we have reached here because of async 348 * event or other error, and have egress error 349 * then drop 350 */ 351 if (CQE_TYPE(hw_cqe) == 1) 352 goto next_cqe; 353 354 /* drop peer2peer RTR reads. 355 */ 356 if (CQE_WRID_STAG(hw_cqe) == 1) 357 goto next_cqe; 358 359 /* 360 * Eat completions for unsignaled read WRs. 361 */ 362 if (!qhp->wq.sq.oldest_read->signaled) { 363 advance_oldest_read(&qhp->wq); 364 goto next_cqe; 365 } 366 367 /* 368 * Don't write to the HWCQ, create a new read req CQE 369 * in local memory and move it into the swcq. 370 */ 371 create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); 372 hw_cqe = &read_cqe; 373 advance_oldest_read(&qhp->wq); 374 } 375 376 /* if its a SQ completion, then do the magic to move all the 377 * unsignaled and now in-order completions into the swcq. 378 */ 379 if (SQ_TYPE(hw_cqe)) { 380 swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 381 swsqe->cqe = *hw_cqe; 382 swsqe->complete = 1; 383 flush_completed_wrs(&qhp->wq, &chp->cq); 384 } else { 385 swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; 386 *swcqe = *hw_cqe; 387 swcqe->header |= cpu_to_be32(CQE_SWCQE_V(1)); 388 t4_swcq_produce(&chp->cq); 389 } 390 next_cqe: 391 t4_hwcq_consume(&chp->cq); 392 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 393 } 394 } 395 396 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) 397 { 398 if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) 399 return 0; 400 401 if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) 402 return 0; 403 404 if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) 405 return 0; 406 407 if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) 408 return 0; 409 return 1; 410 } 411 412 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) 413 { 414 struct t4_cqe *cqe; 415 u32 ptr; 416 417 *count = 0; 418 pr_debug("count zero %d\n", *count); 419 ptr = cq->sw_cidx; 420 while (ptr != cq->sw_pidx) { 421 cqe = &cq->sw_queue[ptr]; 422 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && 423 (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) 424 (*count)++; 425 if (++ptr == cq->size) 426 ptr = 0; 427 } 428 pr_debug("cq %p count %d\n", cq, *count); 429 } 430 431 /* 432 * poll_cq 433 * 434 * Caller must: 435 * check the validity of the first CQE, 436 * supply the wq assicated with the qpid. 437 * 438 * credit: cq credit to return to sge. 439 * cqe_flushed: 1 iff the CQE is flushed. 440 * cqe: copy of the polled CQE. 441 * 442 * return value: 443 * 0 CQE returned ok. 444 * -EAGAIN CQE skipped, try again. 445 * -EOVERFLOW CQ overflow detected. 446 */ 447 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, 448 u8 *cqe_flushed, u64 *cookie, u32 *credit) 449 { 450 int ret = 0; 451 struct t4_cqe *hw_cqe, read_cqe; 452 453 *cqe_flushed = 0; 454 *credit = 0; 455 ret = t4_next_cqe(cq, &hw_cqe); 456 if (ret) 457 return ret; 458 459 pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", 460 CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), 461 CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe), 462 CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), 463 CQE_WRID_LOW(hw_cqe)); 464 465 /* 466 * skip cqe's not affiliated with a QP. 467 */ 468 if (wq == NULL) { 469 ret = -EAGAIN; 470 goto skip_cqe; 471 } 472 473 /* 474 * skip hw cqe's if the wq is flushed. 475 */ 476 if (wq->flushed && !SW_CQE(hw_cqe)) { 477 ret = -EAGAIN; 478 goto skip_cqe; 479 } 480 481 /* 482 * skip TERMINATE cqes... 483 */ 484 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { 485 ret = -EAGAIN; 486 goto skip_cqe; 487 } 488 489 /* 490 * Special cqe for drain WR completions... 491 */ 492 if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { 493 *cookie = CQE_DRAIN_COOKIE(hw_cqe); 494 *cqe = *hw_cqe; 495 goto skip_cqe; 496 } 497 498 /* 499 * Gotta tweak READ completions: 500 * 1) the cqe doesn't contain the sq_wptr from the wr. 501 * 2) opcode not reflected from the wr. 502 * 3) read_len not reflected from the wr. 503 * 4) cq_type is RQ_TYPE not SQ_TYPE. 504 */ 505 if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { 506 507 /* If we have reached here because of async 508 * event or other error, and have egress error 509 * then drop 510 */ 511 if (CQE_TYPE(hw_cqe) == 1) { 512 if (CQE_STATUS(hw_cqe)) 513 t4_set_wq_in_error(wq); 514 ret = -EAGAIN; 515 goto skip_cqe; 516 } 517 518 /* If this is an unsolicited read response, then the read 519 * was generated by the kernel driver as part of peer-2-peer 520 * connection setup. So ignore the completion. 521 */ 522 if (CQE_WRID_STAG(hw_cqe) == 1) { 523 if (CQE_STATUS(hw_cqe)) 524 t4_set_wq_in_error(wq); 525 ret = -EAGAIN; 526 goto skip_cqe; 527 } 528 529 /* 530 * Eat completions for unsignaled read WRs. 531 */ 532 if (!wq->sq.oldest_read->signaled) { 533 advance_oldest_read(wq); 534 ret = -EAGAIN; 535 goto skip_cqe; 536 } 537 538 /* 539 * Don't write to the HWCQ, so create a new read req CQE 540 * in local memory. 541 */ 542 create_read_req_cqe(wq, hw_cqe, &read_cqe); 543 hw_cqe = &read_cqe; 544 advance_oldest_read(wq); 545 } 546 547 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { 548 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); 549 t4_set_wq_in_error(wq); 550 } 551 552 /* 553 * RECV completion. 554 */ 555 if (RQ_TYPE(hw_cqe)) { 556 557 /* 558 * HW only validates 4 bits of MSN. So we must validate that 559 * the MSN in the SEND is the next expected MSN. If its not, 560 * then we complete this with T4_ERR_MSN and mark the wq in 561 * error. 562 */ 563 564 if (t4_rq_empty(wq)) { 565 t4_set_wq_in_error(wq); 566 ret = -EAGAIN; 567 goto skip_cqe; 568 } 569 if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { 570 t4_set_wq_in_error(wq); 571 hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN)); 572 goto proc_cqe; 573 } 574 goto proc_cqe; 575 } 576 577 /* 578 * If we get here its a send completion. 579 * 580 * Handle out of order completion. These get stuffed 581 * in the SW SQ. Then the SW SQ is walked to move any 582 * now in-order completions into the SW CQ. This handles 583 * 2 cases: 584 * 1) reaping unsignaled WRs when the first subsequent 585 * signaled WR is completed. 586 * 2) out of order read completions. 587 */ 588 if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { 589 struct t4_swsqe *swsqe; 590 591 pr_debug("out of order completion going in sw_sq at idx %u\n", 592 CQE_WRID_SQ_IDX(hw_cqe)); 593 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 594 swsqe->cqe = *hw_cqe; 595 swsqe->complete = 1; 596 ret = -EAGAIN; 597 goto flush_wq; 598 } 599 600 proc_cqe: 601 *cqe = *hw_cqe; 602 603 /* 604 * Reap the associated WR(s) that are freed up with this 605 * completion. 606 */ 607 if (SQ_TYPE(hw_cqe)) { 608 int idx = CQE_WRID_SQ_IDX(hw_cqe); 609 610 /* 611 * Account for any unsignaled completions completed by 612 * this signaled completion. In this case, cidx points 613 * to the first unsignaled one, and idx points to the 614 * signaled one. So adjust in_use based on this delta. 615 * if this is not completing any unsigned wrs, then the 616 * delta will be 0. Handle wrapping also! 617 */ 618 if (idx < wq->sq.cidx) 619 wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; 620 else 621 wq->sq.in_use -= idx - wq->sq.cidx; 622 623 wq->sq.cidx = (uint16_t)idx; 624 pr_debug("completing sq idx %u\n", wq->sq.cidx); 625 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; 626 if (c4iw_wr_log) 627 c4iw_log_wr_stats(wq, hw_cqe); 628 t4_sq_consume(wq); 629 } else { 630 pr_debug("completing rq idx %u\n", wq->rq.cidx); 631 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; 632 if (c4iw_wr_log) 633 c4iw_log_wr_stats(wq, hw_cqe); 634 t4_rq_consume(wq); 635 goto skip_cqe; 636 } 637 638 flush_wq: 639 /* 640 * Flush any completed cqes that are now in-order. 641 */ 642 flush_completed_wrs(wq, cq); 643 644 skip_cqe: 645 if (SW_CQE(hw_cqe)) { 646 pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n", 647 cq, cq->cqid, cq->sw_cidx); 648 t4_swcq_consume(cq); 649 } else { 650 pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n", 651 cq, cq->cqid, cq->cidx); 652 t4_hwcq_consume(cq); 653 } 654 return ret; 655 } 656 657 /* 658 * Get one cq entry from c4iw and map it to openib. 659 * 660 * Returns: 661 * 0 cqe returned 662 * -ENODATA EMPTY; 663 * -EAGAIN caller must try again 664 * any other -errno fatal error 665 */ 666 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) 667 { 668 struct c4iw_qp *qhp = NULL; 669 struct t4_cqe uninitialized_var(cqe), *rd_cqe; 670 struct t4_wq *wq; 671 u32 credit = 0; 672 u8 cqe_flushed; 673 u64 cookie = 0; 674 int ret; 675 676 ret = t4_next_cqe(&chp->cq, &rd_cqe); 677 678 if (ret) 679 return ret; 680 681 qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); 682 if (!qhp) 683 wq = NULL; 684 else { 685 spin_lock(&qhp->lock); 686 wq = &(qhp->wq); 687 } 688 ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); 689 if (ret) 690 goto out; 691 692 wc->wr_id = cookie; 693 wc->qp = &qhp->ibqp; 694 wc->vendor_err = CQE_STATUS(&cqe); 695 wc->wc_flags = 0; 696 697 pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", 698 CQE_QPID(&cqe), 699 CQE_TYPE(&cqe), CQE_OPCODE(&cqe), 700 CQE_STATUS(&cqe), CQE_LEN(&cqe), 701 CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), 702 (unsigned long long)cookie); 703 704 if (CQE_TYPE(&cqe) == 0) { 705 if (!CQE_STATUS(&cqe)) 706 wc->byte_len = CQE_LEN(&cqe); 707 else 708 wc->byte_len = 0; 709 wc->opcode = IB_WC_RECV; 710 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || 711 CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { 712 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 713 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 714 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 715 } 716 } else { 717 switch (CQE_OPCODE(&cqe)) { 718 case FW_RI_RDMA_WRITE: 719 wc->opcode = IB_WC_RDMA_WRITE; 720 break; 721 case FW_RI_READ_REQ: 722 wc->opcode = IB_WC_RDMA_READ; 723 wc->byte_len = CQE_LEN(&cqe); 724 break; 725 case FW_RI_SEND_WITH_INV: 726 case FW_RI_SEND_WITH_SE_INV: 727 wc->opcode = IB_WC_SEND; 728 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 729 break; 730 case FW_RI_SEND: 731 case FW_RI_SEND_WITH_SE: 732 wc->opcode = IB_WC_SEND; 733 break; 734 735 case FW_RI_LOCAL_INV: 736 wc->opcode = IB_WC_LOCAL_INV; 737 break; 738 case FW_RI_FAST_REGISTER: 739 wc->opcode = IB_WC_REG_MR; 740 741 /* Invalidate the MR if the fastreg failed */ 742 if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) 743 c4iw_invalidate_mr(qhp->rhp, 744 CQE_WRID_FR_STAG(&cqe)); 745 break; 746 case C4IW_DRAIN_OPCODE: 747 wc->opcode = IB_WC_SEND; 748 break; 749 default: 750 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", 751 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 752 ret = -EINVAL; 753 goto out; 754 } 755 } 756 757 if (cqe_flushed) 758 wc->status = IB_WC_WR_FLUSH_ERR; 759 else { 760 761 switch (CQE_STATUS(&cqe)) { 762 case T4_ERR_SUCCESS: 763 wc->status = IB_WC_SUCCESS; 764 break; 765 case T4_ERR_STAG: 766 wc->status = IB_WC_LOC_ACCESS_ERR; 767 break; 768 case T4_ERR_PDID: 769 wc->status = IB_WC_LOC_PROT_ERR; 770 break; 771 case T4_ERR_QPID: 772 case T4_ERR_ACCESS: 773 wc->status = IB_WC_LOC_ACCESS_ERR; 774 break; 775 case T4_ERR_WRAP: 776 wc->status = IB_WC_GENERAL_ERR; 777 break; 778 case T4_ERR_BOUND: 779 wc->status = IB_WC_LOC_LEN_ERR; 780 break; 781 case T4_ERR_INVALIDATE_SHARED_MR: 782 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: 783 wc->status = IB_WC_MW_BIND_ERR; 784 break; 785 case T4_ERR_CRC: 786 case T4_ERR_MARKER: 787 case T4_ERR_PDU_LEN_ERR: 788 case T4_ERR_OUT_OF_RQE: 789 case T4_ERR_DDP_VERSION: 790 case T4_ERR_RDMA_VERSION: 791 case T4_ERR_DDP_QUEUE_NUM: 792 case T4_ERR_MSN: 793 case T4_ERR_TBIT: 794 case T4_ERR_MO: 795 case T4_ERR_MSN_RANGE: 796 case T4_ERR_IRD_OVERFLOW: 797 case T4_ERR_OPCODE: 798 case T4_ERR_INTERNAL_ERR: 799 wc->status = IB_WC_FATAL_ERR; 800 break; 801 case T4_ERR_SWFLUSH: 802 wc->status = IB_WC_WR_FLUSH_ERR; 803 break; 804 default: 805 pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", 806 CQE_STATUS(&cqe), CQE_QPID(&cqe)); 807 wc->status = IB_WC_FATAL_ERR; 808 } 809 } 810 out: 811 if (wq) 812 spin_unlock(&qhp->lock); 813 return ret; 814 } 815 816 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 817 { 818 struct c4iw_cq *chp; 819 unsigned long flags; 820 int npolled; 821 int err = 0; 822 823 chp = to_c4iw_cq(ibcq); 824 825 spin_lock_irqsave(&chp->lock, flags); 826 for (npolled = 0; npolled < num_entries; ++npolled) { 827 do { 828 err = c4iw_poll_cq_one(chp, wc + npolled); 829 } while (err == -EAGAIN); 830 if (err) 831 break; 832 } 833 spin_unlock_irqrestore(&chp->lock, flags); 834 return !err || err == -ENODATA ? npolled : err; 835 } 836 837 int c4iw_destroy_cq(struct ib_cq *ib_cq) 838 { 839 struct c4iw_cq *chp; 840 struct c4iw_ucontext *ucontext; 841 842 pr_debug("ib_cq %p\n", ib_cq); 843 chp = to_c4iw_cq(ib_cq); 844 845 remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); 846 atomic_dec(&chp->refcnt); 847 wait_event(chp->wait, !atomic_read(&chp->refcnt)); 848 849 ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) 850 : NULL; 851 destroy_cq(&chp->rhp->rdev, &chp->cq, 852 ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, 853 chp->destroy_skb, chp->wr_waitp); 854 c4iw_put_wr_wait(chp->wr_waitp); 855 kfree(chp); 856 return 0; 857 } 858 859 struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, 860 const struct ib_cq_init_attr *attr, 861 struct ib_ucontext *ib_context, 862 struct ib_udata *udata) 863 { 864 int entries = attr->cqe; 865 int vector = attr->comp_vector; 866 struct c4iw_dev *rhp; 867 struct c4iw_cq *chp; 868 struct c4iw_create_cq_resp uresp; 869 struct c4iw_ucontext *ucontext = NULL; 870 int ret, wr_len; 871 size_t memsize, hwentries; 872 struct c4iw_mm_entry *mm, *mm2; 873 874 pr_debug("ib_dev %p entries %d\n", ibdev, entries); 875 if (attr->flags) 876 return ERR_PTR(-EINVAL); 877 878 rhp = to_c4iw_dev(ibdev); 879 880 if (vector >= rhp->rdev.lldi.nciq) 881 return ERR_PTR(-EINVAL); 882 883 chp = kzalloc(sizeof(*chp), GFP_KERNEL); 884 if (!chp) 885 return ERR_PTR(-ENOMEM); 886 chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); 887 if (!chp->wr_waitp) { 888 ret = -ENOMEM; 889 goto err_free_chp; 890 } 891 c4iw_init_wr_wait(chp->wr_waitp); 892 893 wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); 894 chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); 895 if (!chp->destroy_skb) { 896 ret = -ENOMEM; 897 goto err_free_wr_wait; 898 } 899 900 if (ib_context) 901 ucontext = to_c4iw_ucontext(ib_context); 902 903 /* account for the status page. */ 904 entries++; 905 906 /* IQ needs one extra entry to differentiate full vs empty. */ 907 entries++; 908 909 /* 910 * entries must be multiple of 16 for HW. 911 */ 912 entries = roundup(entries, 16); 913 914 /* 915 * Make actual HW queue 2x to avoid cdix_inc overflows. 916 */ 917 hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size); 918 919 /* 920 * Make HW queue at least 64 entries so GTS updates aren't too 921 * frequent. 922 */ 923 if (hwentries < 64) 924 hwentries = 64; 925 926 memsize = hwentries * sizeof *chp->cq.queue; 927 928 /* 929 * memsize must be a multiple of the page size if its a user cq. 930 */ 931 if (ucontext) 932 memsize = roundup(memsize, PAGE_SIZE); 933 chp->cq.size = hwentries; 934 chp->cq.memsize = memsize; 935 chp->cq.vector = vector; 936 937 ret = create_cq(&rhp->rdev, &chp->cq, 938 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 939 chp->wr_waitp); 940 if (ret) 941 goto err_free_skb; 942 943 chp->rhp = rhp; 944 chp->cq.size--; /* status page */ 945 chp->ibcq.cqe = entries - 2; 946 spin_lock_init(&chp->lock); 947 spin_lock_init(&chp->comp_handler_lock); 948 atomic_set(&chp->refcnt, 1); 949 init_waitqueue_head(&chp->wait); 950 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); 951 if (ret) 952 goto err_destroy_cq; 953 954 if (ucontext) { 955 ret = -ENOMEM; 956 mm = kmalloc(sizeof *mm, GFP_KERNEL); 957 if (!mm) 958 goto err_remove_handle; 959 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); 960 if (!mm2) 961 goto err_free_mm; 962 963 uresp.qid_mask = rhp->rdev.cqmask; 964 uresp.cqid = chp->cq.cqid; 965 uresp.size = chp->cq.size; 966 uresp.memsize = chp->cq.memsize; 967 spin_lock(&ucontext->mmap_lock); 968 uresp.key = ucontext->key; 969 ucontext->key += PAGE_SIZE; 970 uresp.gts_key = ucontext->key; 971 ucontext->key += PAGE_SIZE; 972 spin_unlock(&ucontext->mmap_lock); 973 ret = ib_copy_to_udata(udata, &uresp, 974 sizeof(uresp) - sizeof(uresp.reserved)); 975 if (ret) 976 goto err_free_mm2; 977 978 mm->key = uresp.key; 979 mm->addr = virt_to_phys(chp->cq.queue); 980 mm->len = chp->cq.memsize; 981 insert_mmap(ucontext, mm); 982 983 mm2->key = uresp.gts_key; 984 mm2->addr = chp->cq.bar2_pa; 985 mm2->len = PAGE_SIZE; 986 insert_mmap(ucontext, mm2); 987 } 988 pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", 989 chp->cq.cqid, chp, chp->cq.size, 990 chp->cq.memsize, (unsigned long long)chp->cq.dma_addr); 991 return &chp->ibcq; 992 err_free_mm2: 993 kfree(mm2); 994 err_free_mm: 995 kfree(mm); 996 err_remove_handle: 997 remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); 998 err_destroy_cq: 999 destroy_cq(&chp->rhp->rdev, &chp->cq, 1000 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 1001 chp->destroy_skb, chp->wr_waitp); 1002 err_free_skb: 1003 kfree_skb(chp->destroy_skb); 1004 err_free_wr_wait: 1005 c4iw_put_wr_wait(chp->wr_waitp); 1006 err_free_chp: 1007 kfree(chp); 1008 return ERR_PTR(ret); 1009 } 1010 1011 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) 1012 { 1013 return -ENOSYS; 1014 } 1015 1016 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 1017 { 1018 struct c4iw_cq *chp; 1019 int ret = 0; 1020 unsigned long flag; 1021 1022 chp = to_c4iw_cq(ibcq); 1023 spin_lock_irqsave(&chp->lock, flag); 1024 t4_arm_cq(&chp->cq, 1025 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); 1026 if (flags & IB_CQ_REPORT_MISSED_EVENTS) 1027 ret = t4_cq_notempty(&chp->cq); 1028 spin_unlock_irqrestore(&chp->lock, flag); 1029 return ret; 1030 } 1031