1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 4 /* Copyright (c) 2008-2019, IBM Corporation */ 5 6 #include <linux/errno.h> 7 #include <linux/types.h> 8 #include <linux/net.h> 9 #include <linux/scatterlist.h> 10 #include <linux/highmem.h> 11 12 #include <rdma/iw_cm.h> 13 #include <rdma/ib_verbs.h> 14 15 #include "siw.h" 16 #include "siw_verbs.h" 17 #include "siw_mem.h" 18 19 /* 20 * siw_rx_umem() 21 * 22 * Receive data of @len into target referenced by @dest_addr. 23 * 24 * @srx: Receive Context 25 * @umem: siw representation of target memory 26 * @dest_addr: user virtual address 27 * @len: number of bytes to place 28 */ 29 static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, 30 u64 dest_addr, int len) 31 { 32 int copied = 0; 33 34 while (len) { 35 struct page *p; 36 int pg_off, bytes, rv; 37 void *dest; 38 39 p = siw_get_upage(umem, dest_addr); 40 if (unlikely(!p)) { 41 pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n", 42 __func__, qp_id(rx_qp(srx)), 43 (void *)dest_addr, (void *)umem->fp_addr); 44 /* siw internal error */ 45 srx->skb_copied += copied; 46 srx->skb_new -= copied; 47 48 return -EFAULT; 49 } 50 pg_off = dest_addr & ~PAGE_MASK; 51 bytes = min(len, (int)PAGE_SIZE - pg_off); 52 53 siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes); 54 55 dest = kmap_atomic(p); 56 rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off, 57 bytes); 58 59 if (unlikely(rv)) { 60 kunmap_atomic(dest); 61 srx->skb_copied += copied; 62 srx->skb_new -= copied; 63 64 pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n", 65 qp_id(rx_qp(srx)), __func__, len, p, rv); 66 67 return -EFAULT; 68 } 69 if (srx->mpa_crc_hd) { 70 if (rx_qp(srx)->kernel_verbs) { 71 crypto_shash_update(srx->mpa_crc_hd, 72 (u8 *)(dest + pg_off), bytes); 73 kunmap_atomic(dest); 74 } else { 75 kunmap_atomic(dest); 76 /* 77 * Do CRC on original, not target buffer. 78 * Some user land applications may 79 * concurrently write the target buffer, 80 * which would yield a broken CRC. 81 * Walking the skb twice is very ineffcient. 82 * Folding the CRC into skb_copy_bits() 83 * would be much better, but is currently 84 * not supported. 85 */ 86 siw_crc_skb(srx, bytes); 87 } 88 } else { 89 kunmap_atomic(dest); 90 } 91 srx->skb_offset += bytes; 92 copied += bytes; 93 len -= bytes; 94 dest_addr += bytes; 95 pg_off = 0; 96 } 97 srx->skb_copied += copied; 98 srx->skb_new -= copied; 99 100 return copied; 101 } 102 103 static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len) 104 { 105 int rv; 106 107 siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len); 108 109 rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len); 110 if (unlikely(rv)) { 111 pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n", 112 qp_id(rx_qp(srx)), __func__, len, kva, rv); 113 114 return rv; 115 } 116 if (srx->mpa_crc_hd) 117 crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len); 118 119 srx->skb_offset += len; 120 srx->skb_copied += len; 121 srx->skb_new -= len; 122 123 return len; 124 } 125 126 static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, 127 struct siw_mem *mem, u64 addr, int len) 128 { 129 struct siw_pbl *pbl = mem->pbl; 130 u64 offset = addr - mem->va; 131 int copied = 0; 132 133 while (len) { 134 int bytes; 135 u64 buf_addr = 136 siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx); 137 if (!buf_addr) 138 break; 139 140 bytes = min(bytes, len); 141 if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) { 142 copied += bytes; 143 offset += bytes; 144 len -= bytes; 145 } else { 146 break; 147 } 148 } 149 return copied; 150 } 151 152 /* 153 * siw_rresp_check_ntoh() 154 * 155 * Check incoming RRESP fragment header against expected 156 * header values and update expected values for potential next 157 * fragment. 158 * 159 * NOTE: This function must be called only if a RRESP DDP segment 160 * starts but not for fragmented consecutive pieces of an 161 * already started DDP segment. 162 */ 163 static int siw_rresp_check_ntoh(struct siw_rx_stream *srx, 164 struct siw_rx_fpdu *frx) 165 { 166 struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp; 167 struct siw_wqe *wqe = &frx->wqe_active; 168 enum ddp_ecode ecode; 169 170 u32 sink_stag = be32_to_cpu(rresp->sink_stag); 171 u64 sink_to = be64_to_cpu(rresp->sink_to); 172 173 if (frx->first_ddp_seg) { 174 srx->ddp_stag = wqe->sqe.sge[0].lkey; 175 srx->ddp_to = wqe->sqe.sge[0].laddr; 176 frx->pbl_idx = 0; 177 } 178 /* Below checks extend beyond the semantics of DDP, and 179 * into RDMAP: 180 * We check if the read response matches exactly the 181 * read request which was send to the remote peer to 182 * trigger this read response. RFC5040/5041 do not 183 * always have a proper error code for the detected 184 * error cases. We choose 'base or bounds error' for 185 * cases where the inbound STag is valid, but offset 186 * or length do not match our response receive state. 187 */ 188 if (unlikely(srx->ddp_stag != sink_stag)) { 189 pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n", 190 qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag); 191 ecode = DDP_ECODE_T_INVALID_STAG; 192 goto error; 193 } 194 if (unlikely(srx->ddp_to != sink_to)) { 195 pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n", 196 qp_id(rx_qp(srx)), (unsigned long long)sink_to, 197 (unsigned long long)srx->ddp_to); 198 ecode = DDP_ECODE_T_BASE_BOUNDS; 199 goto error; 200 } 201 if (unlikely(!frx->more_ddp_segs && 202 (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) { 203 pr_warn("siw: [QP %u]: rresp len: %d != %d\n", 204 qp_id(rx_qp(srx)), 205 wqe->processed + srx->fpdu_part_rem, wqe->bytes); 206 ecode = DDP_ECODE_T_BASE_BOUNDS; 207 goto error; 208 } 209 return 0; 210 error: 211 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, 212 DDP_ETYPE_TAGGED_BUF, ecode, 0); 213 return -EINVAL; 214 } 215 216 /* 217 * siw_write_check_ntoh() 218 * 219 * Check incoming WRITE fragment header against expected 220 * header values and update expected values for potential next 221 * fragment 222 * 223 * NOTE: This function must be called only if a WRITE DDP segment 224 * starts but not for fragmented consecutive pieces of an 225 * already started DDP segment. 226 */ 227 static int siw_write_check_ntoh(struct siw_rx_stream *srx, 228 struct siw_rx_fpdu *frx) 229 { 230 struct iwarp_rdma_write *write = &srx->hdr.rwrite; 231 enum ddp_ecode ecode; 232 233 u32 sink_stag = be32_to_cpu(write->sink_stag); 234 u64 sink_to = be64_to_cpu(write->sink_to); 235 236 if (frx->first_ddp_seg) { 237 srx->ddp_stag = sink_stag; 238 srx->ddp_to = sink_to; 239 frx->pbl_idx = 0; 240 } else { 241 if (unlikely(srx->ddp_stag != sink_stag)) { 242 pr_warn("siw: [QP %u]: write stag: %08x != %08x\n", 243 qp_id(rx_qp(srx)), sink_stag, 244 srx->ddp_stag); 245 ecode = DDP_ECODE_T_INVALID_STAG; 246 goto error; 247 } 248 if (unlikely(srx->ddp_to != sink_to)) { 249 pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n", 250 qp_id(rx_qp(srx)), 251 (unsigned long long)sink_to, 252 (unsigned long long)srx->ddp_to); 253 ecode = DDP_ECODE_T_BASE_BOUNDS; 254 goto error; 255 } 256 } 257 return 0; 258 error: 259 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, 260 DDP_ETYPE_TAGGED_BUF, ecode, 0); 261 return -EINVAL; 262 } 263 264 /* 265 * siw_send_check_ntoh() 266 * 267 * Check incoming SEND fragment header against expected 268 * header values and update expected MSN if no next 269 * fragment expected 270 * 271 * NOTE: This function must be called only if a SEND DDP segment 272 * starts but not for fragmented consecutive pieces of an 273 * already started DDP segment. 274 */ 275 static int siw_send_check_ntoh(struct siw_rx_stream *srx, 276 struct siw_rx_fpdu *frx) 277 { 278 struct iwarp_send_inv *send = &srx->hdr.send_inv; 279 struct siw_wqe *wqe = &frx->wqe_active; 280 enum ddp_ecode ecode; 281 282 u32 ddp_msn = be32_to_cpu(send->ddp_msn); 283 u32 ddp_mo = be32_to_cpu(send->ddp_mo); 284 u32 ddp_qn = be32_to_cpu(send->ddp_qn); 285 286 if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) { 287 pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n", 288 qp_id(rx_qp(srx)), ddp_qn); 289 ecode = DDP_ECODE_UT_INVALID_QN; 290 goto error; 291 } 292 if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) { 293 pr_warn("siw: [QP %u]: send msn: %u != %u\n", 294 qp_id(rx_qp(srx)), ddp_msn, 295 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]); 296 ecode = DDP_ECODE_UT_INVALID_MSN_RANGE; 297 goto error; 298 } 299 if (unlikely(ddp_mo != wqe->processed)) { 300 pr_warn("siw: [QP %u], send mo: %u != %u\n", 301 qp_id(rx_qp(srx)), ddp_mo, wqe->processed); 302 ecode = DDP_ECODE_UT_INVALID_MO; 303 goto error; 304 } 305 if (frx->first_ddp_seg) { 306 /* initialize user memory write position */ 307 frx->sge_idx = 0; 308 frx->sge_off = 0; 309 frx->pbl_idx = 0; 310 311 /* only valid for SEND_INV and SEND_SE_INV operations */ 312 srx->inval_stag = be32_to_cpu(send->inval_stag); 313 } 314 if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) { 315 siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n", 316 wqe->bytes, wqe->processed, srx->fpdu_part_rem); 317 wqe->wc_status = SIW_WC_LOC_LEN_ERR; 318 ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF; 319 goto error; 320 } 321 return 0; 322 error: 323 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, 324 DDP_ETYPE_UNTAGGED_BUF, ecode, 0); 325 return -EINVAL; 326 } 327 328 static struct siw_wqe *siw_rqe_get(struct siw_qp *qp) 329 { 330 struct siw_rqe *rqe; 331 struct siw_srq *srq; 332 struct siw_wqe *wqe = NULL; 333 bool srq_event = false; 334 unsigned long flags; 335 336 srq = qp->srq; 337 if (srq) { 338 spin_lock_irqsave(&srq->lock, flags); 339 if (unlikely(!srq->num_rqe)) 340 goto out; 341 342 rqe = &srq->recvq[srq->rq_get % srq->num_rqe]; 343 } else { 344 if (unlikely(!qp->recvq)) 345 goto out; 346 347 rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size]; 348 } 349 if (likely(rqe->flags == SIW_WQE_VALID)) { 350 int num_sge = rqe->num_sge; 351 352 if (likely(num_sge <= SIW_MAX_SGE)) { 353 int i = 0; 354 355 wqe = rx_wqe(&qp->rx_untagged); 356 rx_type(wqe) = SIW_OP_RECEIVE; 357 wqe->wr_status = SIW_WR_INPROGRESS; 358 wqe->bytes = 0; 359 wqe->processed = 0; 360 361 wqe->rqe.id = rqe->id; 362 wqe->rqe.num_sge = num_sge; 363 364 while (i < num_sge) { 365 wqe->rqe.sge[i].laddr = rqe->sge[i].laddr; 366 wqe->rqe.sge[i].lkey = rqe->sge[i].lkey; 367 wqe->rqe.sge[i].length = rqe->sge[i].length; 368 wqe->bytes += wqe->rqe.sge[i].length; 369 wqe->mem[i] = NULL; 370 i++; 371 } 372 /* can be re-used by appl */ 373 smp_store_mb(rqe->flags, 0); 374 } else { 375 siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge); 376 if (srq) 377 spin_unlock_irqrestore(&srq->lock, flags); 378 return NULL; 379 } 380 if (!srq) { 381 qp->rq_get++; 382 } else { 383 if (srq->armed) { 384 /* Test SRQ limit */ 385 u32 off = (srq->rq_get + srq->limit) % 386 srq->num_rqe; 387 struct siw_rqe *rqe2 = &srq->recvq[off]; 388 389 if (!(rqe2->flags & SIW_WQE_VALID)) { 390 srq->armed = 0; 391 srq_event = true; 392 } 393 } 394 srq->rq_get++; 395 } 396 } 397 out: 398 if (srq) { 399 spin_unlock_irqrestore(&srq->lock, flags); 400 if (srq_event) 401 siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED); 402 } 403 return wqe; 404 } 405 406 /* 407 * siw_proc_send: 408 * 409 * Process one incoming SEND and place data into memory referenced by 410 * receive wqe. 411 * 412 * Function supports partially received sends (suspending/resuming 413 * current receive wqe processing) 414 * 415 * return value: 416 * 0: reached the end of a DDP segment 417 * -EAGAIN: to be called again to finish the DDP segment 418 */ 419 int siw_proc_send(struct siw_qp *qp) 420 { 421 struct siw_rx_stream *srx = &qp->rx_stream; 422 struct siw_rx_fpdu *frx = &qp->rx_untagged; 423 struct siw_wqe *wqe; 424 u32 data_bytes; /* all data bytes available */ 425 u32 rcvd_bytes; /* sum of data bytes rcvd */ 426 int rv = 0; 427 428 if (frx->first_ddp_seg) { 429 wqe = siw_rqe_get(qp); 430 if (unlikely(!wqe)) { 431 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 432 DDP_ETYPE_UNTAGGED_BUF, 433 DDP_ECODE_UT_INVALID_MSN_NOBUF, 0); 434 return -ENOENT; 435 } 436 } else { 437 wqe = rx_wqe(frx); 438 } 439 if (srx->state == SIW_GET_DATA_START) { 440 rv = siw_send_check_ntoh(srx, frx); 441 if (unlikely(rv)) { 442 siw_qp_event(qp, IB_EVENT_QP_FATAL); 443 return rv; 444 } 445 if (!srx->fpdu_part_rem) /* zero length SEND */ 446 return 0; 447 } 448 data_bytes = min(srx->fpdu_part_rem, srx->skb_new); 449 rcvd_bytes = 0; 450 451 /* A zero length SEND will skip below loop */ 452 while (data_bytes) { 453 struct ib_pd *pd; 454 struct siw_mem **mem, *mem_p; 455 struct siw_sge *sge; 456 u32 sge_bytes; /* data bytes avail for SGE */ 457 458 sge = &wqe->rqe.sge[frx->sge_idx]; 459 460 if (!sge->length) { 461 /* just skip empty sge's */ 462 frx->sge_idx++; 463 frx->sge_off = 0; 464 frx->pbl_idx = 0; 465 continue; 466 } 467 sge_bytes = min(data_bytes, sge->length - frx->sge_off); 468 mem = &wqe->mem[frx->sge_idx]; 469 470 /* 471 * check with QP's PD if no SRQ present, SRQ's PD otherwise 472 */ 473 pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd; 474 475 rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 476 frx->sge_off, sge_bytes); 477 if (unlikely(rv)) { 478 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 479 DDP_ETYPE_CATASTROPHIC, 480 DDP_ECODE_CATASTROPHIC, 0); 481 482 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); 483 break; 484 } 485 mem_p = *mem; 486 if (mem_p->mem_obj == NULL) 487 rv = siw_rx_kva(srx, 488 (void *)(sge->laddr + frx->sge_off), 489 sge_bytes); 490 else if (!mem_p->is_pbl) 491 rv = siw_rx_umem(srx, mem_p->umem, 492 sge->laddr + frx->sge_off, sge_bytes); 493 else 494 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, 495 sge->laddr + frx->sge_off, sge_bytes); 496 497 if (unlikely(rv != sge_bytes)) { 498 wqe->processed += rcvd_bytes; 499 500 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 501 DDP_ETYPE_CATASTROPHIC, 502 DDP_ECODE_CATASTROPHIC, 0); 503 return -EINVAL; 504 } 505 frx->sge_off += rv; 506 507 if (frx->sge_off == sge->length) { 508 frx->sge_idx++; 509 frx->sge_off = 0; 510 frx->pbl_idx = 0; 511 } 512 data_bytes -= rv; 513 rcvd_bytes += rv; 514 515 srx->fpdu_part_rem -= rv; 516 srx->fpdu_part_rcvd += rv; 517 } 518 wqe->processed += rcvd_bytes; 519 520 if (!srx->fpdu_part_rem) 521 return 0; 522 523 return (rv < 0) ? rv : -EAGAIN; 524 } 525 526 /* 527 * siw_proc_write: 528 * 529 * Place incoming WRITE after referencing and checking target buffer 530 531 * Function supports partially received WRITEs (suspending/resuming 532 * current receive processing) 533 * 534 * return value: 535 * 0: reached the end of a DDP segment 536 * -EAGAIN: to be called again to finish the DDP segment 537 */ 538 int siw_proc_write(struct siw_qp *qp) 539 { 540 struct siw_rx_stream *srx = &qp->rx_stream; 541 struct siw_rx_fpdu *frx = &qp->rx_tagged; 542 struct siw_mem *mem; 543 int bytes, rv; 544 545 if (srx->state == SIW_GET_DATA_START) { 546 if (!srx->fpdu_part_rem) /* zero length WRITE */ 547 return 0; 548 549 rv = siw_write_check_ntoh(srx, frx); 550 if (unlikely(rv)) { 551 siw_qp_event(qp, IB_EVENT_QP_FATAL); 552 return rv; 553 } 554 } 555 bytes = min(srx->fpdu_part_rem, srx->skb_new); 556 557 if (frx->first_ddp_seg) { 558 struct siw_wqe *wqe = rx_wqe(frx); 559 560 rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8); 561 if (unlikely(!rx_mem(frx))) { 562 siw_dbg_qp(qp, 563 "sink stag not found/invalid, stag 0x%08x\n", 564 srx->ddp_stag); 565 566 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 567 DDP_ETYPE_TAGGED_BUF, 568 DDP_ECODE_T_INVALID_STAG, 0); 569 return -EINVAL; 570 } 571 wqe->rqe.num_sge = 1; 572 rx_type(wqe) = SIW_OP_WRITE; 573 wqe->wr_status = SIW_WR_INPROGRESS; 574 } 575 mem = rx_mem(frx); 576 577 /* 578 * Check if application re-registered memory with different 579 * key field of STag. 580 */ 581 if (unlikely(mem->stag != srx->ddp_stag)) { 582 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 583 DDP_ETYPE_TAGGED_BUF, 584 DDP_ECODE_T_INVALID_STAG, 0); 585 return -EINVAL; 586 } 587 rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd, 588 IB_ACCESS_REMOTE_WRITE, bytes); 589 if (unlikely(rv)) { 590 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 591 DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv), 592 0); 593 594 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); 595 596 return -EINVAL; 597 } 598 599 if (mem->mem_obj == NULL) 600 rv = siw_rx_kva(srx, 601 (void *)(srx->ddp_to + srx->fpdu_part_rcvd), 602 bytes); 603 else if (!mem->is_pbl) 604 rv = siw_rx_umem(srx, mem->umem, 605 srx->ddp_to + srx->fpdu_part_rcvd, bytes); 606 else 607 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem, 608 srx->ddp_to + srx->fpdu_part_rcvd, bytes); 609 610 if (unlikely(rv != bytes)) { 611 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 612 DDP_ETYPE_CATASTROPHIC, 613 DDP_ECODE_CATASTROPHIC, 0); 614 return -EINVAL; 615 } 616 srx->fpdu_part_rem -= rv; 617 srx->fpdu_part_rcvd += rv; 618 619 if (!srx->fpdu_part_rem) { 620 srx->ddp_to += srx->fpdu_part_rcvd; 621 return 0; 622 } 623 return -EAGAIN; 624 } 625 626 /* 627 * Inbound RREQ's cannot carry user data. 628 */ 629 int siw_proc_rreq(struct siw_qp *qp) 630 { 631 struct siw_rx_stream *srx = &qp->rx_stream; 632 633 if (!srx->fpdu_part_rem) 634 return 0; 635 636 pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp), 637 be16_to_cpu(srx->hdr.ctrl.mpa_len)); 638 639 return -EPROTO; 640 } 641 642 /* 643 * siw_init_rresp: 644 * 645 * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE. 646 * Put it at the tail of the IRQ, if there is another WQE currently in 647 * transmit processing. If not, make it the current WQE to be processed 648 * and schedule transmit processing. 649 * 650 * Can be called from softirq context and from process 651 * context (RREAD socket loopback case!) 652 * 653 * return value: 654 * 0: success, 655 * failure code otherwise 656 */ 657 658 static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) 659 { 660 struct siw_wqe *tx_work = tx_wqe(qp); 661 struct siw_sqe *resp; 662 663 uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to), 664 laddr = be64_to_cpu(srx->hdr.rreq.source_to); 665 uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size), 666 lkey = be32_to_cpu(srx->hdr.rreq.source_stag), 667 rkey = be32_to_cpu(srx->hdr.rreq.sink_stag), 668 msn = be32_to_cpu(srx->hdr.rreq.ddp_msn); 669 670 int run_sq = 1, rv = 0; 671 unsigned long flags; 672 673 if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) { 674 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 675 DDP_ETYPE_UNTAGGED_BUF, 676 DDP_ECODE_UT_INVALID_MSN_RANGE, 0); 677 return -EPROTO; 678 } 679 spin_lock_irqsave(&qp->sq_lock, flags); 680 681 if (tx_work->wr_status == SIW_WR_IDLE) { 682 /* 683 * immediately schedule READ response w/o 684 * consuming IRQ entry: IRQ must be empty. 685 */ 686 tx_work->processed = 0; 687 tx_work->mem[0] = NULL; 688 tx_work->wr_status = SIW_WR_QUEUED; 689 resp = &tx_work->sqe; 690 } else { 691 resp = irq_alloc_free(qp); 692 run_sq = 0; 693 } 694 if (likely(resp)) { 695 resp->opcode = SIW_OP_READ_RESPONSE; 696 697 resp->sge[0].length = length; 698 resp->sge[0].laddr = laddr; 699 resp->sge[0].lkey = lkey; 700 701 /* Keep aside message sequence number for potential 702 * error reporting during Read Response generation. 703 */ 704 resp->sge[1].length = msn; 705 706 resp->raddr = raddr; 707 resp->rkey = rkey; 708 resp->num_sge = length ? 1 : 0; 709 710 /* RRESP now valid as current TX wqe or placed into IRQ */ 711 smp_store_mb(resp->flags, SIW_WQE_VALID); 712 } else { 713 pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp), 714 qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size); 715 716 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, 717 RDMAP_ETYPE_REMOTE_OPERATION, 718 RDMAP_ECODE_CATASTROPHIC_STREAM, 0); 719 rv = -EPROTO; 720 } 721 722 spin_unlock_irqrestore(&qp->sq_lock, flags); 723 724 if (run_sq) 725 rv = siw_sq_start(qp); 726 727 return rv; 728 } 729 730 /* 731 * Only called at start of Read.Resonse processing. 732 * Transfer pending Read from tip of ORQ into currrent rx wqe, 733 * but keep ORQ entry valid until Read.Response processing done. 734 * No Queue locking needed. 735 */ 736 static int siw_orqe_start_rx(struct siw_qp *qp) 737 { 738 struct siw_sqe *orqe; 739 struct siw_wqe *wqe = NULL; 740 741 /* make sure ORQ indices are current */ 742 smp_mb(); 743 744 orqe = orq_get_current(qp); 745 if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) { 746 /* RRESP is a TAGGED RDMAP operation */ 747 wqe = rx_wqe(&qp->rx_tagged); 748 wqe->sqe.id = orqe->id; 749 wqe->sqe.opcode = orqe->opcode; 750 wqe->sqe.sge[0].laddr = orqe->sge[0].laddr; 751 wqe->sqe.sge[0].lkey = orqe->sge[0].lkey; 752 wqe->sqe.sge[0].length = orqe->sge[0].length; 753 wqe->sqe.flags = orqe->flags; 754 wqe->sqe.num_sge = 1; 755 wqe->bytes = orqe->sge[0].length; 756 wqe->processed = 0; 757 wqe->mem[0] = NULL; 758 /* make sure WQE is completely written before valid */ 759 smp_wmb(); 760 wqe->wr_status = SIW_WR_INPROGRESS; 761 762 return 0; 763 } 764 return -EPROTO; 765 } 766 767 /* 768 * siw_proc_rresp: 769 * 770 * Place incoming RRESP data into memory referenced by RREQ WQE 771 * which is at the tip of the ORQ 772 * 773 * Function supports partially received RRESP's (suspending/resuming 774 * current receive processing) 775 */ 776 int siw_proc_rresp(struct siw_qp *qp) 777 { 778 struct siw_rx_stream *srx = &qp->rx_stream; 779 struct siw_rx_fpdu *frx = &qp->rx_tagged; 780 struct siw_wqe *wqe = rx_wqe(frx); 781 struct siw_mem **mem, *mem_p; 782 struct siw_sge *sge; 783 int bytes, rv; 784 785 if (frx->first_ddp_seg) { 786 if (unlikely(wqe->wr_status != SIW_WR_IDLE)) { 787 pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n", 788 qp_id(qp), wqe->wr_status, wqe->sqe.opcode); 789 rv = -EPROTO; 790 goto error_term; 791 } 792 /* 793 * fetch pending RREQ from orq 794 */ 795 rv = siw_orqe_start_rx(qp); 796 if (rv) { 797 pr_warn("siw: [QP %u]: ORQ empty at idx %d\n", 798 qp_id(qp), qp->orq_get % qp->attrs.orq_size); 799 goto error_term; 800 } 801 rv = siw_rresp_check_ntoh(srx, frx); 802 if (unlikely(rv)) { 803 siw_qp_event(qp, IB_EVENT_QP_FATAL); 804 return rv; 805 } 806 } else { 807 if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) { 808 pr_warn("siw: [QP %u]: resume RRESP: status %d\n", 809 qp_id(qp), wqe->wr_status); 810 rv = -EPROTO; 811 goto error_term; 812 } 813 } 814 if (!srx->fpdu_part_rem) /* zero length RRESPONSE */ 815 return 0; 816 817 sge = wqe->sqe.sge; /* there is only one */ 818 mem = &wqe->mem[0]; 819 820 if (!(*mem)) { 821 /* 822 * check target memory which resolves memory on first fragment 823 */ 824 rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0, 825 wqe->bytes); 826 if (unlikely(rv)) { 827 siw_dbg_qp(qp, "target mem check: %d\n", rv); 828 wqe->wc_status = SIW_WC_LOC_PROT_ERR; 829 830 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 831 DDP_ETYPE_TAGGED_BUF, 832 siw_tagged_error(-rv), 0); 833 834 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); 835 836 return -EINVAL; 837 } 838 } 839 mem_p = *mem; 840 841 bytes = min(srx->fpdu_part_rem, srx->skb_new); 842 843 if (mem_p->mem_obj == NULL) 844 rv = siw_rx_kva(srx, (void *)(sge->laddr + wqe->processed), 845 bytes); 846 else if (!mem_p->is_pbl) 847 rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, 848 bytes); 849 else 850 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, 851 sge->laddr + wqe->processed, bytes); 852 if (rv != bytes) { 853 wqe->wc_status = SIW_WC_GENERAL_ERR; 854 rv = -EINVAL; 855 goto error_term; 856 } 857 srx->fpdu_part_rem -= rv; 858 srx->fpdu_part_rcvd += rv; 859 wqe->processed += rv; 860 861 if (!srx->fpdu_part_rem) { 862 srx->ddp_to += srx->fpdu_part_rcvd; 863 return 0; 864 } 865 return -EAGAIN; 866 867 error_term: 868 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC, 869 DDP_ECODE_CATASTROPHIC, 0); 870 return rv; 871 } 872 873 int siw_proc_terminate(struct siw_qp *qp) 874 { 875 struct siw_rx_stream *srx = &qp->rx_stream; 876 struct sk_buff *skb = srx->skb; 877 struct iwarp_terminate *term = &srx->hdr.terminate; 878 union iwarp_hdr term_info; 879 u8 *infop = (u8 *)&term_info; 880 enum rdma_opcode op; 881 u16 to_copy = sizeof(struct iwarp_ctrl); 882 883 pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n", 884 __rdmap_term_layer(term), __rdmap_term_etype(term), 885 __rdmap_term_ecode(term)); 886 887 if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE || 888 be32_to_cpu(term->ddp_msn) != 889 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] || 890 be32_to_cpu(term->ddp_mo) != 0) { 891 pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n", 892 be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn), 893 be32_to_cpu(term->ddp_mo)); 894 return -ECONNRESET; 895 } 896 /* 897 * Receive remaining pieces of TERM if indicated 898 */ 899 if (!term->flag_m) 900 return -ECONNRESET; 901 902 /* Do not take the effort to reassemble a network fragmented 903 * TERM message 904 */ 905 if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged)) 906 return -ECONNRESET; 907 908 memset(infop, 0, sizeof(term_info)); 909 910 skb_copy_bits(skb, srx->skb_offset, infop, to_copy); 911 912 op = __rdmap_get_opcode(&term_info.ctrl); 913 if (op >= RDMAP_TERMINATE) 914 goto out; 915 916 infop += to_copy; 917 srx->skb_offset += to_copy; 918 srx->skb_new -= to_copy; 919 srx->skb_copied += to_copy; 920 srx->fpdu_part_rcvd += to_copy; 921 srx->fpdu_part_rem -= to_copy; 922 923 to_copy = iwarp_pktinfo[op].hdr_len - to_copy; 924 925 /* Again, no network fragmented TERM's */ 926 if (to_copy + MPA_CRC_SIZE > srx->skb_new) 927 return -ECONNRESET; 928 929 skb_copy_bits(skb, srx->skb_offset, infop, to_copy); 930 931 if (term->flag_r) { 932 siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n", 933 op, be16_to_cpu(term_info.ctrl.mpa_len), 934 term->flag_m ? "valid" : "invalid"); 935 } else if (term->flag_d) { 936 siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n", 937 op, be16_to_cpu(term_info.ctrl.mpa_len), 938 term->flag_m ? "valid" : "invalid"); 939 } 940 out: 941 srx->skb_new -= to_copy; 942 srx->skb_offset += to_copy; 943 srx->skb_copied += to_copy; 944 srx->fpdu_part_rcvd += to_copy; 945 srx->fpdu_part_rem -= to_copy; 946 947 return -ECONNRESET; 948 } 949 950 static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) 951 { 952 struct sk_buff *skb = srx->skb; 953 u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad; 954 __wsum crc_in, crc_own = 0; 955 956 siw_dbg_qp(qp, "expected %d, available %d, pad %u\n", 957 srx->fpdu_part_rem, srx->skb_new, srx->pad); 958 959 if (srx->skb_new < srx->fpdu_part_rem) 960 return -EAGAIN; 961 962 skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem); 963 964 if (srx->mpa_crc_hd && srx->pad) 965 crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); 966 967 srx->skb_new -= srx->fpdu_part_rem; 968 srx->skb_offset += srx->fpdu_part_rem; 969 srx->skb_copied += srx->fpdu_part_rem; 970 971 if (!srx->mpa_crc_hd) 972 return 0; 973 974 /* 975 * CRC32 is computed, transmitted and received directly in NBO, 976 * so there's never a reason to convert byte order. 977 */ 978 crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own); 979 crc_in = (__force __wsum)srx->trailer.crc; 980 981 if (unlikely(crc_in != crc_own)) { 982 pr_warn("siw: crc error. in: %08x, own %08x, op %u\n", 983 crc_in, crc_own, qp->rx_stream.rdmap_op); 984 985 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, 986 LLP_ETYPE_MPA, 987 LLP_ECODE_RECEIVED_CRC, 0); 988 return -EINVAL; 989 } 990 return 0; 991 } 992 993 #define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged) 994 995 static int siw_get_hdr(struct siw_rx_stream *srx) 996 { 997 struct sk_buff *skb = srx->skb; 998 struct siw_qp *qp = rx_qp(srx); 999 struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl; 1000 struct siw_rx_fpdu *frx; 1001 u8 opcode; 1002 int bytes; 1003 1004 if (srx->fpdu_part_rcvd < MIN_DDP_HDR) { 1005 /* 1006 * copy a mimimum sized (tagged) DDP frame control part 1007 */ 1008 bytes = min_t(int, srx->skb_new, 1009 MIN_DDP_HDR - srx->fpdu_part_rcvd); 1010 1011 skb_copy_bits(skb, srx->skb_offset, 1012 (char *)c_hdr + srx->fpdu_part_rcvd, bytes); 1013 1014 srx->fpdu_part_rcvd += bytes; 1015 1016 srx->skb_new -= bytes; 1017 srx->skb_offset += bytes; 1018 srx->skb_copied += bytes; 1019 1020 if (srx->fpdu_part_rcvd < MIN_DDP_HDR) 1021 return -EAGAIN; 1022 1023 if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) { 1024 enum ddp_etype etype; 1025 enum ddp_ecode ecode; 1026 1027 pr_warn("siw: received ddp version unsupported %d\n", 1028 __ddp_get_version(c_hdr)); 1029 1030 if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) { 1031 etype = DDP_ETYPE_TAGGED_BUF; 1032 ecode = DDP_ECODE_T_VERSION; 1033 } else { 1034 etype = DDP_ETYPE_UNTAGGED_BUF; 1035 ecode = DDP_ECODE_UT_VERSION; 1036 } 1037 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, 1038 etype, ecode, 0); 1039 return -EINVAL; 1040 } 1041 if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) { 1042 pr_warn("siw: received rdmap version unsupported %d\n", 1043 __rdmap_get_version(c_hdr)); 1044 1045 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP, 1046 RDMAP_ETYPE_REMOTE_OPERATION, 1047 RDMAP_ECODE_VERSION, 0); 1048 return -EINVAL; 1049 } 1050 opcode = __rdmap_get_opcode(c_hdr); 1051 1052 if (opcode > RDMAP_TERMINATE) { 1053 pr_warn("siw: received unknown packet type %u\n", 1054 opcode); 1055 1056 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP, 1057 RDMAP_ETYPE_REMOTE_OPERATION, 1058 RDMAP_ECODE_OPCODE, 0); 1059 return -EINVAL; 1060 } 1061 siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode); 1062 } else { 1063 opcode = __rdmap_get_opcode(c_hdr); 1064 } 1065 set_rx_fpdu_context(qp, opcode); 1066 frx = qp->rx_fpdu; 1067 1068 /* 1069 * Figure out len of current hdr: variable length of 1070 * iwarp hdr may force us to copy hdr information in 1071 * two steps. Only tagged DDP messages are already 1072 * completely received. 1073 */ 1074 if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) { 1075 bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR; 1076 1077 if (srx->skb_new < bytes) 1078 return -EAGAIN; 1079 1080 skb_copy_bits(skb, srx->skb_offset, 1081 (char *)c_hdr + srx->fpdu_part_rcvd, bytes); 1082 1083 srx->fpdu_part_rcvd += bytes; 1084 1085 srx->skb_new -= bytes; 1086 srx->skb_offset += bytes; 1087 srx->skb_copied += bytes; 1088 } 1089 1090 /* 1091 * DDP/RDMAP header receive completed. Check if the current 1092 * DDP segment starts a new RDMAP message or continues a previously 1093 * started RDMAP message. 1094 * 1095 * Alternating reception of DDP segments (or FPDUs) from incomplete 1096 * tagged and untagged RDMAP messages is supported, as long as 1097 * the current tagged or untagged message gets eventually completed 1098 * w/o intersection from another message of the same type 1099 * (tagged/untagged). E.g., a WRITE can get intersected by a SEND, 1100 * but not by a READ RESPONSE etc. 1101 */ 1102 if (srx->mpa_crc_hd) { 1103 /* 1104 * Restart CRC computation 1105 */ 1106 crypto_shash_init(srx->mpa_crc_hd); 1107 crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr, 1108 srx->fpdu_part_rcvd); 1109 } 1110 if (frx->more_ddp_segs) { 1111 frx->first_ddp_seg = 0; 1112 if (frx->prev_rdmap_op != opcode) { 1113 pr_warn("siw: packet intersection: %u : %u\n", 1114 frx->prev_rdmap_op, opcode); 1115 /* 1116 * The last inbound RDMA operation of same type 1117 * (tagged or untagged) is left unfinished. 1118 * To complete it in error, make it the current 1119 * operation again, even with the header already 1120 * overwritten. For error handling, only the opcode 1121 * and current rx context are relevant. 1122 */ 1123 set_rx_fpdu_context(qp, frx->prev_rdmap_op); 1124 __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op); 1125 return -EPROTO; 1126 } 1127 } else { 1128 frx->prev_rdmap_op = opcode; 1129 frx->first_ddp_seg = 1; 1130 } 1131 frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1; 1132 1133 return 0; 1134 } 1135 1136 static int siw_check_tx_fence(struct siw_qp *qp) 1137 { 1138 struct siw_wqe *tx_waiting = tx_wqe(qp); 1139 struct siw_sqe *rreq; 1140 int resume_tx = 0, rv = 0; 1141 unsigned long flags; 1142 1143 spin_lock_irqsave(&qp->orq_lock, flags); 1144 1145 rreq = orq_get_current(qp); 1146 1147 /* free current orq entry */ 1148 WRITE_ONCE(rreq->flags, 0); 1149 1150 if (qp->tx_ctx.orq_fence) { 1151 if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { 1152 pr_warn("siw: [QP %u]: fence resume: bad status %d\n", 1153 qp_id(qp), tx_waiting->wr_status); 1154 rv = -EPROTO; 1155 goto out; 1156 } 1157 /* resume SQ processing */ 1158 if (tx_waiting->sqe.opcode == SIW_OP_READ || 1159 tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { 1160 rreq = orq_get_tail(qp); 1161 if (unlikely(!rreq)) { 1162 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); 1163 rv = -EPROTO; 1164 goto out; 1165 } 1166 siw_read_to_orq(rreq, &tx_waiting->sqe); 1167 1168 qp->orq_put++; 1169 qp->tx_ctx.orq_fence = 0; 1170 resume_tx = 1; 1171 1172 } else if (siw_orq_empty(qp)) { 1173 qp->tx_ctx.orq_fence = 0; 1174 resume_tx = 1; 1175 } else { 1176 pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n", 1177 qp_id(qp), qp->orq_get, qp->orq_put); 1178 rv = -EPROTO; 1179 } 1180 } 1181 qp->orq_get++; 1182 out: 1183 spin_unlock_irqrestore(&qp->orq_lock, flags); 1184 1185 if (resume_tx) 1186 rv = siw_sq_start(qp); 1187 1188 return rv; 1189 } 1190 1191 /* 1192 * siw_rdmap_complete() 1193 * 1194 * Complete processing of an RDMA message after receiving all 1195 * DDP segmens or ABort processing after encountering error case. 1196 * 1197 * o SENDs + RRESPs will need for completion, 1198 * o RREQs need for READ RESPONSE initialization 1199 * o WRITEs need memory dereferencing 1200 * 1201 * TODO: Failed WRITEs need local error to be surfaced. 1202 */ 1203 static int siw_rdmap_complete(struct siw_qp *qp, int error) 1204 { 1205 struct siw_rx_stream *srx = &qp->rx_stream; 1206 struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu); 1207 enum siw_wc_status wc_status = wqe->wc_status; 1208 u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl); 1209 int rv = 0; 1210 1211 switch (opcode) { 1212 case RDMAP_SEND_SE: 1213 case RDMAP_SEND_SE_INVAL: 1214 wqe->rqe.flags |= SIW_WQE_SOLICITED; 1215 /* Fall through */ 1216 1217 case RDMAP_SEND: 1218 case RDMAP_SEND_INVAL: 1219 if (wqe->wr_status == SIW_WR_IDLE) 1220 break; 1221 1222 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++; 1223 1224 if (error != 0 && wc_status == SIW_WC_SUCCESS) 1225 wc_status = SIW_WC_GENERAL_ERR; 1226 /* 1227 * Handle STag invalidation request 1228 */ 1229 if (wc_status == SIW_WC_SUCCESS && 1230 (opcode == RDMAP_SEND_INVAL || 1231 opcode == RDMAP_SEND_SE_INVAL)) { 1232 rv = siw_invalidate_stag(qp->pd, srx->inval_stag); 1233 if (rv) { 1234 siw_init_terminate( 1235 qp, TERM_ERROR_LAYER_RDMAP, 1236 rv == -EACCES ? 1237 RDMAP_ETYPE_REMOTE_PROTECTION : 1238 RDMAP_ETYPE_REMOTE_OPERATION, 1239 RDMAP_ECODE_CANNOT_INVALIDATE, 0); 1240 1241 wc_status = SIW_WC_REM_INV_REQ_ERR; 1242 } 1243 rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed, 1244 rv ? 0 : srx->inval_stag, 1245 wc_status); 1246 } else { 1247 rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed, 1248 0, wc_status); 1249 } 1250 siw_wqe_put_mem(wqe, SIW_OP_RECEIVE); 1251 break; 1252 1253 case RDMAP_RDMA_READ_RESP: 1254 if (wqe->wr_status == SIW_WR_IDLE) 1255 break; 1256 1257 if (error != 0) { 1258 if ((srx->state == SIW_GET_HDR && 1259 qp->rx_fpdu->first_ddp_seg) || error == -ENODATA) 1260 /* possible RREQ in ORQ left untouched */ 1261 break; 1262 1263 if (wc_status == SIW_WC_SUCCESS) 1264 wc_status = SIW_WC_GENERAL_ERR; 1265 } else if (qp->kernel_verbs && 1266 rx_type(wqe) == SIW_OP_READ_LOCAL_INV) { 1267 /* 1268 * Handle any STag invalidation request 1269 */ 1270 rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey); 1271 if (rv) { 1272 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, 1273 RDMAP_ETYPE_CATASTROPHIC, 1274 RDMAP_ECODE_UNSPECIFIED, 0); 1275 1276 if (wc_status == SIW_WC_SUCCESS) { 1277 wc_status = SIW_WC_GENERAL_ERR; 1278 error = rv; 1279 } 1280 } 1281 } 1282 /* 1283 * All errors turn the wqe into signalled. 1284 */ 1285 if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0) 1286 rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed, 1287 wc_status); 1288 siw_wqe_put_mem(wqe, SIW_OP_READ); 1289 1290 if (!error) 1291 rv = siw_check_tx_fence(qp); 1292 else 1293 /* Disable current ORQ eleement */ 1294 WRITE_ONCE(orq_get_current(qp)->flags, 0); 1295 break; 1296 1297 case RDMAP_RDMA_READ_REQ: 1298 if (!error) { 1299 rv = siw_init_rresp(qp, srx); 1300 srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++; 1301 } 1302 break; 1303 1304 case RDMAP_RDMA_WRITE: 1305 if (wqe->wr_status == SIW_WR_IDLE) 1306 break; 1307 1308 /* 1309 * Free References from memory object if 1310 * attached to receive context (inbound WRITE). 1311 * While a zero-length WRITE is allowed, 1312 * no memory reference got created. 1313 */ 1314 if (rx_mem(&qp->rx_tagged)) { 1315 siw_mem_put(rx_mem(&qp->rx_tagged)); 1316 rx_mem(&qp->rx_tagged) = NULL; 1317 } 1318 break; 1319 1320 default: 1321 break; 1322 } 1323 wqe->wr_status = SIW_WR_IDLE; 1324 1325 return rv; 1326 } 1327 1328 /* 1329 * siw_tcp_rx_data() 1330 * 1331 * Main routine to consume inbound TCP payload 1332 * 1333 * @rd_desc: read descriptor 1334 * @skb: socket buffer 1335 * @off: offset in skb 1336 * @len: skb->len - offset : payload in skb 1337 */ 1338 int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb, 1339 unsigned int off, size_t len) 1340 { 1341 struct siw_qp *qp = rd_desc->arg.data; 1342 struct siw_rx_stream *srx = &qp->rx_stream; 1343 int rv; 1344 1345 srx->skb = skb; 1346 srx->skb_new = skb->len - off; 1347 srx->skb_offset = off; 1348 srx->skb_copied = 0; 1349 1350 siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new); 1351 1352 while (srx->skb_new) { 1353 int run_completion = 1; 1354 1355 if (unlikely(srx->rx_suspend)) { 1356 /* Do not process any more data */ 1357 srx->skb_copied += srx->skb_new; 1358 break; 1359 } 1360 switch (srx->state) { 1361 case SIW_GET_HDR: 1362 rv = siw_get_hdr(srx); 1363 if (!rv) { 1364 srx->fpdu_part_rem = 1365 be16_to_cpu(srx->hdr.ctrl.mpa_len) - 1366 srx->fpdu_part_rcvd + MPA_HDR_SIZE; 1367 1368 if (srx->fpdu_part_rem) 1369 srx->pad = -srx->fpdu_part_rem & 0x3; 1370 else 1371 srx->pad = 0; 1372 1373 srx->state = SIW_GET_DATA_START; 1374 srx->fpdu_part_rcvd = 0; 1375 } 1376 break; 1377 1378 case SIW_GET_DATA_MORE: 1379 /* 1380 * Another data fragment of the same DDP segment. 1381 * Setting first_ddp_seg = 0 avoids repeating 1382 * initializations that shall occur only once per 1383 * DDP segment. 1384 */ 1385 qp->rx_fpdu->first_ddp_seg = 0; 1386 /* Fall through */ 1387 1388 case SIW_GET_DATA_START: 1389 /* 1390 * Headers will be checked by the opcode-specific 1391 * data receive function below. 1392 */ 1393 rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp); 1394 if (!rv) { 1395 int mpa_len = 1396 be16_to_cpu(srx->hdr.ctrl.mpa_len) 1397 + MPA_HDR_SIZE; 1398 1399 srx->fpdu_part_rem = (-mpa_len & 0x3) 1400 + MPA_CRC_SIZE; 1401 srx->fpdu_part_rcvd = 0; 1402 srx->state = SIW_GET_TRAILER; 1403 } else { 1404 if (unlikely(rv == -ECONNRESET)) 1405 run_completion = 0; 1406 else 1407 srx->state = SIW_GET_DATA_MORE; 1408 } 1409 break; 1410 1411 case SIW_GET_TRAILER: 1412 /* 1413 * read CRC + any padding 1414 */ 1415 rv = siw_get_trailer(qp, srx); 1416 if (likely(!rv)) { 1417 /* 1418 * FPDU completed. 1419 * complete RDMAP message if last fragment 1420 */ 1421 srx->state = SIW_GET_HDR; 1422 srx->fpdu_part_rcvd = 0; 1423 1424 if (!(srx->hdr.ctrl.ddp_rdmap_ctrl & 1425 DDP_FLAG_LAST)) 1426 /* more frags */ 1427 break; 1428 1429 rv = siw_rdmap_complete(qp, 0); 1430 run_completion = 0; 1431 } 1432 break; 1433 1434 default: 1435 pr_warn("QP[%u]: RX out of state\n", qp_id(qp)); 1436 rv = -EPROTO; 1437 run_completion = 0; 1438 } 1439 if (unlikely(rv != 0 && rv != -EAGAIN)) { 1440 if ((srx->state > SIW_GET_HDR || 1441 qp->rx_fpdu->more_ddp_segs) && run_completion) 1442 siw_rdmap_complete(qp, rv); 1443 1444 siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv, 1445 srx->state); 1446 1447 siw_qp_cm_drop(qp, 1); 1448 1449 break; 1450 } 1451 if (rv) { 1452 siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n", 1453 srx->state, srx->fpdu_part_rem); 1454 break; 1455 } 1456 } 1457 return srx->skb_copied; 1458 } 1459