1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include <linux/skbuff.h> 8 9 #include "rxe.h" 10 #include "rxe_loc.h" 11 #include "rxe_queue.h" 12 13 static char *resp_state_name[] = { 14 [RESPST_NONE] = "NONE", 15 [RESPST_GET_REQ] = "GET_REQ", 16 [RESPST_CHK_PSN] = "CHK_PSN", 17 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", 18 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", 19 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", 20 [RESPST_CHK_LENGTH] = "CHK_LENGTH", 21 [RESPST_CHK_RKEY] = "CHK_RKEY", 22 [RESPST_EXECUTE] = "EXECUTE", 23 [RESPST_READ_REPLY] = "READ_REPLY", 24 [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", 25 [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", 26 [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", 27 [RESPST_COMPLETE] = "COMPLETE", 28 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", 29 [RESPST_CLEANUP] = "CLEANUP", 30 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", 31 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", 32 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", 33 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", 34 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", 35 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", 36 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", 37 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", 38 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", 39 [RESPST_ERR_RNR] = "ERR_RNR", 40 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", 41 [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION", 42 [RESPST_ERR_LENGTH] = "ERR_LENGTH", 43 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", 44 [RESPST_ERROR] = "ERROR", 45 [RESPST_RESET] = "RESET", 46 [RESPST_DONE] = "DONE", 47 [RESPST_EXIT] = "EXIT", 48 }; 49 50 /* rxe_recv calls here to add a request packet to the input queue */ 51 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) 52 { 53 int must_sched; 54 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 55 56 skb_queue_tail(&qp->req_pkts, skb); 57 58 must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || 59 (skb_queue_len(&qp->req_pkts) > 1); 60 61 if (must_sched) 62 rxe_sched_task(&qp->resp.task); 63 else 64 rxe_run_task(&qp->resp.task); 65 } 66 67 static inline enum resp_states get_req(struct rxe_qp *qp, 68 struct rxe_pkt_info **pkt_p) 69 { 70 struct sk_buff *skb; 71 72 if (qp->resp.state == QP_STATE_ERROR) { 73 while ((skb = skb_dequeue(&qp->req_pkts))) { 74 rxe_put(qp); 75 kfree_skb(skb); 76 ib_device_put(qp->ibqp.device); 77 } 78 79 /* go drain recv wr queue */ 80 return RESPST_CHK_RESOURCE; 81 } 82 83 skb = skb_peek(&qp->req_pkts); 84 if (!skb) 85 return RESPST_EXIT; 86 87 *pkt_p = SKB_TO_PKT(skb); 88 89 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; 90 } 91 92 static enum resp_states check_psn(struct rxe_qp *qp, 93 struct rxe_pkt_info *pkt) 94 { 95 int diff = psn_compare(pkt->psn, qp->resp.psn); 96 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 97 98 switch (qp_type(qp)) { 99 case IB_QPT_RC: 100 if (diff > 0) { 101 if (qp->resp.sent_psn_nak) 102 return RESPST_CLEANUP; 103 104 qp->resp.sent_psn_nak = 1; 105 rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ); 106 return RESPST_ERR_PSN_OUT_OF_SEQ; 107 108 } else if (diff < 0) { 109 rxe_counter_inc(rxe, RXE_CNT_DUP_REQ); 110 return RESPST_DUPLICATE_REQUEST; 111 } 112 113 if (qp->resp.sent_psn_nak) 114 qp->resp.sent_psn_nak = 0; 115 116 break; 117 118 case IB_QPT_UC: 119 if (qp->resp.drop_msg || diff != 0) { 120 if (pkt->mask & RXE_START_MASK) { 121 qp->resp.drop_msg = 0; 122 return RESPST_CHK_OP_SEQ; 123 } 124 125 qp->resp.drop_msg = 1; 126 return RESPST_CLEANUP; 127 } 128 break; 129 default: 130 break; 131 } 132 133 return RESPST_CHK_OP_SEQ; 134 } 135 136 static enum resp_states check_op_seq(struct rxe_qp *qp, 137 struct rxe_pkt_info *pkt) 138 { 139 switch (qp_type(qp)) { 140 case IB_QPT_RC: 141 switch (qp->resp.opcode) { 142 case IB_OPCODE_RC_SEND_FIRST: 143 case IB_OPCODE_RC_SEND_MIDDLE: 144 switch (pkt->opcode) { 145 case IB_OPCODE_RC_SEND_MIDDLE: 146 case IB_OPCODE_RC_SEND_LAST: 147 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 148 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 149 return RESPST_CHK_OP_VALID; 150 default: 151 return RESPST_ERR_MISSING_OPCODE_LAST_C; 152 } 153 154 case IB_OPCODE_RC_RDMA_WRITE_FIRST: 155 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 156 switch (pkt->opcode) { 157 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 158 case IB_OPCODE_RC_RDMA_WRITE_LAST: 159 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 160 return RESPST_CHK_OP_VALID; 161 default: 162 return RESPST_ERR_MISSING_OPCODE_LAST_C; 163 } 164 165 default: 166 switch (pkt->opcode) { 167 case IB_OPCODE_RC_SEND_MIDDLE: 168 case IB_OPCODE_RC_SEND_LAST: 169 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 170 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 171 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 172 case IB_OPCODE_RC_RDMA_WRITE_LAST: 173 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 174 return RESPST_ERR_MISSING_OPCODE_FIRST; 175 default: 176 return RESPST_CHK_OP_VALID; 177 } 178 } 179 break; 180 181 case IB_QPT_UC: 182 switch (qp->resp.opcode) { 183 case IB_OPCODE_UC_SEND_FIRST: 184 case IB_OPCODE_UC_SEND_MIDDLE: 185 switch (pkt->opcode) { 186 case IB_OPCODE_UC_SEND_MIDDLE: 187 case IB_OPCODE_UC_SEND_LAST: 188 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 189 return RESPST_CHK_OP_VALID; 190 default: 191 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 192 } 193 194 case IB_OPCODE_UC_RDMA_WRITE_FIRST: 195 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 196 switch (pkt->opcode) { 197 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 198 case IB_OPCODE_UC_RDMA_WRITE_LAST: 199 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 200 return RESPST_CHK_OP_VALID; 201 default: 202 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 203 } 204 205 default: 206 switch (pkt->opcode) { 207 case IB_OPCODE_UC_SEND_MIDDLE: 208 case IB_OPCODE_UC_SEND_LAST: 209 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 210 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 211 case IB_OPCODE_UC_RDMA_WRITE_LAST: 212 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 213 qp->resp.drop_msg = 1; 214 return RESPST_CLEANUP; 215 default: 216 return RESPST_CHK_OP_VALID; 217 } 218 } 219 break; 220 221 default: 222 return RESPST_CHK_OP_VALID; 223 } 224 } 225 226 static bool check_qp_attr_access(struct rxe_qp *qp, 227 struct rxe_pkt_info *pkt) 228 { 229 if (((pkt->mask & RXE_READ_MASK) && 230 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || 231 ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && 232 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || 233 ((pkt->mask & RXE_ATOMIC_MASK) && 234 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 235 return false; 236 237 if (pkt->mask & RXE_FLUSH_MASK) { 238 u32 flush_type = feth_plt(pkt); 239 240 if ((flush_type & IB_FLUSH_GLOBAL && 241 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || 242 (flush_type & IB_FLUSH_PERSISTENT && 243 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) 244 return false; 245 } 246 247 return true; 248 } 249 250 static enum resp_states check_op_valid(struct rxe_qp *qp, 251 struct rxe_pkt_info *pkt) 252 { 253 switch (qp_type(qp)) { 254 case IB_QPT_RC: 255 if (!check_qp_attr_access(qp, pkt)) 256 return RESPST_ERR_UNSUPPORTED_OPCODE; 257 258 break; 259 260 case IB_QPT_UC: 261 if ((pkt->mask & RXE_WRITE_MASK) && 262 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { 263 qp->resp.drop_msg = 1; 264 return RESPST_CLEANUP; 265 } 266 267 break; 268 269 case IB_QPT_UD: 270 case IB_QPT_GSI: 271 break; 272 273 default: 274 WARN_ON_ONCE(1); 275 break; 276 } 277 278 return RESPST_CHK_RESOURCE; 279 } 280 281 static enum resp_states get_srq_wqe(struct rxe_qp *qp) 282 { 283 struct rxe_srq *srq = qp->srq; 284 struct rxe_queue *q = srq->rq.queue; 285 struct rxe_recv_wqe *wqe; 286 struct ib_event ev; 287 unsigned int count; 288 size_t size; 289 unsigned long flags; 290 291 if (srq->error) 292 return RESPST_ERR_RNR; 293 294 spin_lock_irqsave(&srq->rq.consumer_lock, flags); 295 296 wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 297 if (!wqe) { 298 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 299 return RESPST_ERR_RNR; 300 } 301 302 /* don't trust user space data */ 303 if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { 304 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 305 rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); 306 return RESPST_ERR_MALFORMED_WQE; 307 } 308 size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); 309 memcpy(&qp->resp.srq_wqe, wqe, size); 310 311 qp->resp.wqe = &qp->resp.srq_wqe.wqe; 312 queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); 313 count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); 314 315 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { 316 srq->limit = 0; 317 goto event; 318 } 319 320 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 321 return RESPST_CHK_LENGTH; 322 323 event: 324 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 325 ev.device = qp->ibqp.device; 326 ev.element.srq = qp->ibqp.srq; 327 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 328 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); 329 return RESPST_CHK_LENGTH; 330 } 331 332 static enum resp_states check_resource(struct rxe_qp *qp, 333 struct rxe_pkt_info *pkt) 334 { 335 struct rxe_srq *srq = qp->srq; 336 337 if (qp->resp.state == QP_STATE_ERROR) { 338 if (qp->resp.wqe) { 339 qp->resp.status = IB_WC_WR_FLUSH_ERR; 340 return RESPST_COMPLETE; 341 } else if (!srq) { 342 qp->resp.wqe = queue_head(qp->rq.queue, 343 QUEUE_TYPE_FROM_CLIENT); 344 if (qp->resp.wqe) { 345 qp->resp.status = IB_WC_WR_FLUSH_ERR; 346 return RESPST_COMPLETE; 347 } else { 348 return RESPST_EXIT; 349 } 350 } else { 351 return RESPST_EXIT; 352 } 353 } 354 355 if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { 356 /* it is the requesters job to not send 357 * too many read/atomic ops, we just 358 * recycle the responder resource queue 359 */ 360 if (likely(qp->attr.max_dest_rd_atomic > 0)) 361 return RESPST_CHK_LENGTH; 362 else 363 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; 364 } 365 366 if (pkt->mask & RXE_RWR_MASK) { 367 if (srq) 368 return get_srq_wqe(qp); 369 370 qp->resp.wqe = queue_head(qp->rq.queue, 371 QUEUE_TYPE_FROM_CLIENT); 372 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 373 } 374 375 return RESPST_CHK_LENGTH; 376 } 377 378 static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, 379 struct rxe_pkt_info *pkt) 380 { 381 /* 382 * See IBA C9-92 383 * For UD QPs we only check if the packet will fit in the 384 * receive buffer later. For rmda operations additional 385 * length checks are performed in check_rkey. 386 */ 387 if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || 388 (qp_type(qp) == IB_QPT_UC))) { 389 unsigned int mtu = qp->mtu; 390 unsigned int payload = payload_size(pkt); 391 392 if ((pkt->mask & RXE_START_MASK) && 393 (pkt->mask & RXE_END_MASK)) { 394 if (unlikely(payload > mtu)) { 395 rxe_dbg_qp(qp, "only packet too long"); 396 return RESPST_ERR_LENGTH; 397 } 398 } else if ((pkt->mask & RXE_START_MASK) || 399 (pkt->mask & RXE_MIDDLE_MASK)) { 400 if (unlikely(payload != mtu)) { 401 rxe_dbg_qp(qp, "first or middle packet not mtu"); 402 return RESPST_ERR_LENGTH; 403 } 404 } else if (pkt->mask & RXE_END_MASK) { 405 if (unlikely((payload == 0) || (payload > mtu))) { 406 rxe_dbg_qp(qp, "last packet zero or too long"); 407 return RESPST_ERR_LENGTH; 408 } 409 } 410 } 411 412 /* See IBA C9-94 */ 413 if (pkt->mask & RXE_RETH_MASK) { 414 if (reth_len(pkt) > (1U << 31)) { 415 rxe_dbg_qp(qp, "dma length too long"); 416 return RESPST_ERR_LENGTH; 417 } 418 } 419 420 return RESPST_CHK_RKEY; 421 } 422 423 /* if the reth length field is zero we can assume nothing 424 * about the rkey value and should not validate or use it. 425 * Instead set qp->resp.rkey to 0 which is an invalid rkey 426 * value since the minimum index part is 1. 427 */ 428 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 429 { 430 unsigned int length = reth_len(pkt); 431 432 qp->resp.va = reth_va(pkt); 433 qp->resp.offset = 0; 434 qp->resp.resid = length; 435 qp->resp.length = length; 436 if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0) 437 qp->resp.rkey = 0; 438 else 439 qp->resp.rkey = reth_rkey(pkt); 440 } 441 442 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 443 { 444 qp->resp.va = atmeth_va(pkt); 445 qp->resp.offset = 0; 446 qp->resp.rkey = atmeth_rkey(pkt); 447 qp->resp.resid = sizeof(u64); 448 } 449 450 /* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL 451 * if an invalid rkey is received or the rdma length is zero. For middle 452 * or last packets use the stored value of mr. 453 */ 454 static enum resp_states check_rkey(struct rxe_qp *qp, 455 struct rxe_pkt_info *pkt) 456 { 457 struct rxe_mr *mr = NULL; 458 struct rxe_mw *mw = NULL; 459 u64 va; 460 u32 rkey; 461 u32 resid; 462 u32 pktlen; 463 int mtu = qp->mtu; 464 enum resp_states state; 465 int access = 0; 466 467 if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 468 if (pkt->mask & RXE_RETH_MASK) 469 qp_resp_from_reth(qp, pkt); 470 471 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ 472 : IB_ACCESS_REMOTE_WRITE; 473 } else if (pkt->mask & RXE_FLUSH_MASK) { 474 u32 flush_type = feth_plt(pkt); 475 476 if (pkt->mask & RXE_RETH_MASK) 477 qp_resp_from_reth(qp, pkt); 478 479 if (flush_type & IB_FLUSH_GLOBAL) 480 access |= IB_ACCESS_FLUSH_GLOBAL; 481 if (flush_type & IB_FLUSH_PERSISTENT) 482 access |= IB_ACCESS_FLUSH_PERSISTENT; 483 } else if (pkt->mask & RXE_ATOMIC_MASK) { 484 qp_resp_from_atmeth(qp, pkt); 485 access = IB_ACCESS_REMOTE_ATOMIC; 486 } else { 487 return RESPST_EXECUTE; 488 } 489 490 /* A zero-byte read or write op is not required to 491 * set an addr or rkey. See C9-88 492 */ 493 if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && 494 (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { 495 qp->resp.mr = NULL; 496 return RESPST_EXECUTE; 497 } 498 499 va = qp->resp.va; 500 rkey = qp->resp.rkey; 501 resid = qp->resp.resid; 502 pktlen = payload_size(pkt); 503 504 if (rkey_is_mw(rkey)) { 505 mw = rxe_lookup_mw(qp, access, rkey); 506 if (!mw) { 507 rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); 508 state = RESPST_ERR_RKEY_VIOLATION; 509 goto err; 510 } 511 512 mr = mw->mr; 513 if (!mr) { 514 rxe_dbg_qp(qp, "MW doesn't have an MR\n"); 515 state = RESPST_ERR_RKEY_VIOLATION; 516 goto err; 517 } 518 519 if (mw->access & IB_ZERO_BASED) 520 qp->resp.offset = mw->addr; 521 522 rxe_put(mw); 523 rxe_get(mr); 524 } else { 525 mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); 526 if (!mr) { 527 rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); 528 state = RESPST_ERR_RKEY_VIOLATION; 529 goto err; 530 } 531 } 532 533 if (pkt->mask & RXE_FLUSH_MASK) { 534 /* FLUSH MR may not set va or resid 535 * no need to check range since we will flush whole mr 536 */ 537 if (feth_sel(pkt) == IB_FLUSH_MR) 538 goto skip_check_range; 539 } 540 541 if (mr_check_range(mr, va + qp->resp.offset, resid)) { 542 state = RESPST_ERR_RKEY_VIOLATION; 543 goto err; 544 } 545 546 skip_check_range: 547 if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 548 if (resid > mtu) { 549 if (pktlen != mtu || bth_pad(pkt)) { 550 state = RESPST_ERR_LENGTH; 551 goto err; 552 } 553 } else { 554 if (pktlen != resid) { 555 state = RESPST_ERR_LENGTH; 556 goto err; 557 } 558 if ((bth_pad(pkt) != (0x3 & (-resid)))) { 559 /* This case may not be exactly that 560 * but nothing else fits. 561 */ 562 state = RESPST_ERR_LENGTH; 563 goto err; 564 } 565 } 566 } 567 568 WARN_ON_ONCE(qp->resp.mr); 569 570 qp->resp.mr = mr; 571 return RESPST_EXECUTE; 572 573 err: 574 qp->resp.mr = NULL; 575 if (mr) 576 rxe_put(mr); 577 if (mw) 578 rxe_put(mw); 579 580 return state; 581 } 582 583 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, 584 int data_len) 585 { 586 int err; 587 588 err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, 589 data_addr, data_len, RXE_TO_MR_OBJ); 590 if (unlikely(err)) 591 return (err == -ENOSPC) ? RESPST_ERR_LENGTH 592 : RESPST_ERR_MALFORMED_WQE; 593 594 return RESPST_NONE; 595 } 596 597 static enum resp_states write_data_in(struct rxe_qp *qp, 598 struct rxe_pkt_info *pkt) 599 { 600 enum resp_states rc = RESPST_NONE; 601 int err; 602 int data_len = payload_size(pkt); 603 604 err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, 605 payload_addr(pkt), data_len, RXE_TO_MR_OBJ); 606 if (err) { 607 rc = RESPST_ERR_RKEY_VIOLATION; 608 goto out; 609 } 610 611 qp->resp.va += data_len; 612 qp->resp.resid -= data_len; 613 614 out: 615 return rc; 616 } 617 618 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, 619 struct rxe_pkt_info *pkt, 620 int type) 621 { 622 struct resp_res *res; 623 u32 pkts; 624 625 res = &qp->resp.resources[qp->resp.res_head]; 626 rxe_advance_resp_resource(qp); 627 free_rd_atomic_resource(res); 628 629 res->type = type; 630 res->replay = 0; 631 632 switch (type) { 633 case RXE_READ_MASK: 634 res->read.va = qp->resp.va + qp->resp.offset; 635 res->read.va_org = qp->resp.va + qp->resp.offset; 636 res->read.resid = qp->resp.resid; 637 res->read.length = qp->resp.resid; 638 res->read.rkey = qp->resp.rkey; 639 640 pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); 641 res->first_psn = pkt->psn; 642 res->cur_psn = pkt->psn; 643 res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; 644 645 res->state = rdatm_res_state_new; 646 break; 647 case RXE_ATOMIC_MASK: 648 case RXE_ATOMIC_WRITE_MASK: 649 res->first_psn = pkt->psn; 650 res->last_psn = pkt->psn; 651 res->cur_psn = pkt->psn; 652 break; 653 case RXE_FLUSH_MASK: 654 res->flush.va = qp->resp.va + qp->resp.offset; 655 res->flush.length = qp->resp.length; 656 res->flush.type = feth_plt(pkt); 657 res->flush.level = feth_sel(pkt); 658 } 659 660 return res; 661 } 662 663 static enum resp_states process_flush(struct rxe_qp *qp, 664 struct rxe_pkt_info *pkt) 665 { 666 u64 length, start; 667 struct rxe_mr *mr = qp->resp.mr; 668 struct resp_res *res = qp->resp.res; 669 670 /* oA19-14, oA19-15 */ 671 if (res && res->replay) 672 return RESPST_ACKNOWLEDGE; 673 else if (!res) { 674 res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); 675 qp->resp.res = res; 676 } 677 678 if (res->flush.level == IB_FLUSH_RANGE) { 679 start = res->flush.va; 680 length = res->flush.length; 681 } else { /* level == IB_FLUSH_MR */ 682 start = mr->ibmr.iova; 683 length = mr->ibmr.length; 684 } 685 686 if (res->flush.type & IB_FLUSH_PERSISTENT) { 687 if (rxe_flush_pmem_iova(mr, start, length)) 688 return RESPST_ERR_RKEY_VIOLATION; 689 /* Make data persistent. */ 690 wmb(); 691 } else if (res->flush.type & IB_FLUSH_GLOBAL) { 692 /* Make data global visibility. */ 693 wmb(); 694 } 695 696 qp->resp.msn++; 697 698 /* next expected psn, read handles this separately */ 699 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 700 qp->resp.ack_psn = qp->resp.psn; 701 702 qp->resp.opcode = pkt->opcode; 703 qp->resp.status = IB_WC_SUCCESS; 704 705 return RESPST_ACKNOWLEDGE; 706 } 707 708 static enum resp_states atomic_reply(struct rxe_qp *qp, 709 struct rxe_pkt_info *pkt) 710 { 711 struct rxe_mr *mr = qp->resp.mr; 712 struct resp_res *res = qp->resp.res; 713 int err; 714 715 if (!res) { 716 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); 717 qp->resp.res = res; 718 } 719 720 if (!res->replay) { 721 u64 iova = qp->resp.va + qp->resp.offset; 722 723 err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, 724 atmeth_comp(pkt), 725 atmeth_swap_add(pkt), 726 &res->atomic.orig_val); 727 if (err) 728 return err; 729 730 qp->resp.msn++; 731 732 /* next expected psn, read handles this separately */ 733 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 734 qp->resp.ack_psn = qp->resp.psn; 735 736 qp->resp.opcode = pkt->opcode; 737 qp->resp.status = IB_WC_SUCCESS; 738 } 739 740 return RESPST_ACKNOWLEDGE; 741 } 742 743 static enum resp_states atomic_write_reply(struct rxe_qp *qp, 744 struct rxe_pkt_info *pkt) 745 { 746 struct resp_res *res = qp->resp.res; 747 struct rxe_mr *mr; 748 u64 value; 749 u64 iova; 750 int err; 751 752 if (!res) { 753 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); 754 qp->resp.res = res; 755 } 756 757 if (res->replay) 758 return RESPST_ACKNOWLEDGE; 759 760 mr = qp->resp.mr; 761 value = *(u64 *)payload_addr(pkt); 762 iova = qp->resp.va + qp->resp.offset; 763 764 err = rxe_mr_do_atomic_write(mr, iova, value); 765 if (err) 766 return err; 767 768 qp->resp.resid = 0; 769 qp->resp.msn++; 770 771 /* next expected psn, read handles this separately */ 772 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 773 qp->resp.ack_psn = qp->resp.psn; 774 775 qp->resp.opcode = pkt->opcode; 776 qp->resp.status = IB_WC_SUCCESS; 777 778 return RESPST_ACKNOWLEDGE; 779 } 780 781 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, 782 struct rxe_pkt_info *ack, 783 int opcode, 784 int payload, 785 u32 psn, 786 u8 syndrome) 787 { 788 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 789 struct sk_buff *skb; 790 int paylen; 791 int pad; 792 int err; 793 794 /* 795 * allocate packet 796 */ 797 pad = (-payload) & 0x3; 798 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 799 800 skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack); 801 if (!skb) 802 return NULL; 803 804 ack->qp = qp; 805 ack->opcode = opcode; 806 ack->mask = rxe_opcode[opcode].mask; 807 ack->paylen = paylen; 808 ack->psn = psn; 809 810 bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL, 811 qp->attr.dest_qp_num, 0, psn); 812 813 if (ack->mask & RXE_AETH_MASK) { 814 aeth_set_syn(ack, syndrome); 815 aeth_set_msn(ack, qp->resp.msn); 816 } 817 818 if (ack->mask & RXE_ATMACK_MASK) 819 atmack_set_orig(ack, qp->resp.res->atomic.orig_val); 820 821 err = rxe_prepare(&qp->pri_av, ack, skb); 822 if (err) { 823 kfree_skb(skb); 824 return NULL; 825 } 826 827 return skb; 828 } 829 830 /** 831 * rxe_recheck_mr - revalidate MR from rkey and get a reference 832 * @qp: the qp 833 * @rkey: the rkey 834 * 835 * This code allows the MR to be invalidated or deregistered or 836 * the MW if one was used to be invalidated or deallocated. 837 * It is assumed that the access permissions if originally good 838 * are OK and the mappings to be unchanged. 839 * 840 * TODO: If someone reregisters an MR to change its size or 841 * access permissions during the processing of an RDMA read 842 * we should kill the responder resource and complete the 843 * operation with an error. 844 * 845 * Return: mr on success else NULL 846 */ 847 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) 848 { 849 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 850 struct rxe_mr *mr; 851 struct rxe_mw *mw; 852 853 if (rkey_is_mw(rkey)) { 854 mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); 855 if (!mw) 856 return NULL; 857 858 mr = mw->mr; 859 if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || 860 !mr || mr->state != RXE_MR_STATE_VALID) { 861 rxe_put(mw); 862 return NULL; 863 } 864 865 rxe_get(mr); 866 rxe_put(mw); 867 868 return mr; 869 } 870 871 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 872 if (!mr) 873 return NULL; 874 875 if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { 876 rxe_put(mr); 877 return NULL; 878 } 879 880 return mr; 881 } 882 883 /* RDMA read response. If res is not NULL, then we have a current RDMA request 884 * being processed or replayed. 885 */ 886 static enum resp_states read_reply(struct rxe_qp *qp, 887 struct rxe_pkt_info *req_pkt) 888 { 889 struct rxe_pkt_info ack_pkt; 890 struct sk_buff *skb; 891 int mtu = qp->mtu; 892 enum resp_states state; 893 int payload; 894 int opcode; 895 int err; 896 struct resp_res *res = qp->resp.res; 897 struct rxe_mr *mr; 898 899 if (!res) { 900 res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); 901 qp->resp.res = res; 902 } 903 904 if (res->state == rdatm_res_state_new) { 905 if (!res->replay || qp->resp.length == 0) { 906 /* if length == 0 mr will be NULL (is ok) 907 * otherwise qp->resp.mr holds a ref on mr 908 * which we transfer to mr and drop below. 909 */ 910 mr = qp->resp.mr; 911 qp->resp.mr = NULL; 912 } else { 913 mr = rxe_recheck_mr(qp, res->read.rkey); 914 if (!mr) 915 return RESPST_ERR_RKEY_VIOLATION; 916 } 917 918 if (res->read.resid <= mtu) 919 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; 920 else 921 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; 922 } else { 923 /* re-lookup mr from rkey on all later packets. 924 * length will be non-zero. This can fail if someone 925 * modifies or destroys the mr since the first packet. 926 */ 927 mr = rxe_recheck_mr(qp, res->read.rkey); 928 if (!mr) 929 return RESPST_ERR_RKEY_VIOLATION; 930 931 if (res->read.resid > mtu) 932 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; 933 else 934 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; 935 } 936 937 res->state = rdatm_res_state_next; 938 939 payload = min_t(int, res->read.resid, mtu); 940 941 skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, 942 res->cur_psn, AETH_ACK_UNLIMITED); 943 if (!skb) { 944 state = RESPST_ERR_RNR; 945 goto err_out; 946 } 947 948 err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), 949 payload, RXE_FROM_MR_OBJ); 950 if (err) { 951 kfree_skb(skb); 952 state = RESPST_ERR_RKEY_VIOLATION; 953 goto err_out; 954 } 955 956 if (bth_pad(&ack_pkt)) { 957 u8 *pad = payload_addr(&ack_pkt) + payload; 958 959 memset(pad, 0, bth_pad(&ack_pkt)); 960 } 961 962 /* rxe_xmit_packet always consumes the skb */ 963 err = rxe_xmit_packet(qp, &ack_pkt, skb); 964 if (err) { 965 state = RESPST_ERR_RNR; 966 goto err_out; 967 } 968 969 res->read.va += payload; 970 res->read.resid -= payload; 971 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; 972 973 if (res->read.resid > 0) { 974 state = RESPST_DONE; 975 } else { 976 qp->resp.res = NULL; 977 if (!res->replay) 978 qp->resp.opcode = -1; 979 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 980 qp->resp.psn = res->cur_psn; 981 state = RESPST_CLEANUP; 982 } 983 984 err_out: 985 if (mr) 986 rxe_put(mr); 987 return state; 988 } 989 990 static int invalidate_rkey(struct rxe_qp *qp, u32 rkey) 991 { 992 if (rkey_is_mw(rkey)) 993 return rxe_invalidate_mw(qp, rkey); 994 else 995 return rxe_invalidate_mr(qp, rkey); 996 } 997 998 /* Executes a new request. A retried request never reach that function (send 999 * and writes are discarded, and reads and atomics are retried elsewhere. 1000 */ 1001 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 1002 { 1003 enum resp_states err; 1004 struct sk_buff *skb = PKT_TO_SKB(pkt); 1005 union rdma_network_hdr hdr; 1006 1007 if (pkt->mask & RXE_SEND_MASK) { 1008 if (qp_type(qp) == IB_QPT_UD || 1009 qp_type(qp) == IB_QPT_GSI) { 1010 if (skb->protocol == htons(ETH_P_IP)) { 1011 memset(&hdr.reserved, 0, 1012 sizeof(hdr.reserved)); 1013 memcpy(&hdr.roce4grh, ip_hdr(skb), 1014 sizeof(hdr.roce4grh)); 1015 err = send_data_in(qp, &hdr, sizeof(hdr)); 1016 } else { 1017 err = send_data_in(qp, ipv6_hdr(skb), 1018 sizeof(hdr)); 1019 } 1020 if (err) 1021 return err; 1022 } 1023 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); 1024 if (err) 1025 return err; 1026 } else if (pkt->mask & RXE_WRITE_MASK) { 1027 err = write_data_in(qp, pkt); 1028 if (err) 1029 return err; 1030 } else if (pkt->mask & RXE_READ_MASK) { 1031 /* For RDMA Read we can increment the msn now. See C9-148. */ 1032 qp->resp.msn++; 1033 return RESPST_READ_REPLY; 1034 } else if (pkt->mask & RXE_ATOMIC_MASK) { 1035 return RESPST_ATOMIC_REPLY; 1036 } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { 1037 return RESPST_ATOMIC_WRITE_REPLY; 1038 } else if (pkt->mask & RXE_FLUSH_MASK) { 1039 return RESPST_PROCESS_FLUSH; 1040 } else { 1041 /* Unreachable */ 1042 WARN_ON_ONCE(1); 1043 } 1044 1045 if (pkt->mask & RXE_IETH_MASK) { 1046 u32 rkey = ieth_rkey(pkt); 1047 1048 err = invalidate_rkey(qp, rkey); 1049 if (err) 1050 return RESPST_ERR_INVALIDATE_RKEY; 1051 } 1052 1053 if (pkt->mask & RXE_END_MASK) 1054 /* We successfully processed this new request. */ 1055 qp->resp.msn++; 1056 1057 /* next expected psn, read handles this separately */ 1058 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 1059 qp->resp.ack_psn = qp->resp.psn; 1060 1061 qp->resp.opcode = pkt->opcode; 1062 qp->resp.status = IB_WC_SUCCESS; 1063 1064 if (pkt->mask & RXE_COMP_MASK) 1065 return RESPST_COMPLETE; 1066 else if (qp_type(qp) == IB_QPT_RC) 1067 return RESPST_ACKNOWLEDGE; 1068 else 1069 return RESPST_CLEANUP; 1070 } 1071 1072 static enum resp_states do_complete(struct rxe_qp *qp, 1073 struct rxe_pkt_info *pkt) 1074 { 1075 struct rxe_cqe cqe; 1076 struct ib_wc *wc = &cqe.ibwc; 1077 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1078 struct rxe_recv_wqe *wqe = qp->resp.wqe; 1079 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1080 1081 if (!wqe) 1082 goto finish; 1083 1084 memset(&cqe, 0, sizeof(cqe)); 1085 1086 if (qp->rcq->is_user) { 1087 uwc->status = qp->resp.status; 1088 uwc->qp_num = qp->ibqp.qp_num; 1089 uwc->wr_id = wqe->wr_id; 1090 } else { 1091 wc->status = qp->resp.status; 1092 wc->qp = &qp->ibqp; 1093 wc->wr_id = wqe->wr_id; 1094 } 1095 1096 if (wc->status == IB_WC_SUCCESS) { 1097 rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); 1098 wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 1099 pkt->mask & RXE_WRITE_MASK) ? 1100 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 1101 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && 1102 pkt->mask & RXE_WRITE_MASK) ? 1103 qp->resp.length : wqe->dma.length - wqe->dma.resid; 1104 1105 /* fields after byte_len are different between kernel and user 1106 * space 1107 */ 1108 if (qp->rcq->is_user) { 1109 uwc->wc_flags = IB_WC_GRH; 1110 1111 if (pkt->mask & RXE_IMMDT_MASK) { 1112 uwc->wc_flags |= IB_WC_WITH_IMM; 1113 uwc->ex.imm_data = immdt_imm(pkt); 1114 } 1115 1116 if (pkt->mask & RXE_IETH_MASK) { 1117 uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 1118 uwc->ex.invalidate_rkey = ieth_rkey(pkt); 1119 } 1120 1121 if (pkt->mask & RXE_DETH_MASK) 1122 uwc->src_qp = deth_sqp(pkt); 1123 1124 uwc->port_num = qp->attr.port_num; 1125 } else { 1126 struct sk_buff *skb = PKT_TO_SKB(pkt); 1127 1128 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; 1129 if (skb->protocol == htons(ETH_P_IP)) 1130 wc->network_hdr_type = RDMA_NETWORK_IPV4; 1131 else 1132 wc->network_hdr_type = RDMA_NETWORK_IPV6; 1133 1134 if (is_vlan_dev(skb->dev)) { 1135 wc->wc_flags |= IB_WC_WITH_VLAN; 1136 wc->vlan_id = vlan_dev_vlan_id(skb->dev); 1137 } 1138 1139 if (pkt->mask & RXE_IMMDT_MASK) { 1140 wc->wc_flags |= IB_WC_WITH_IMM; 1141 wc->ex.imm_data = immdt_imm(pkt); 1142 } 1143 1144 if (pkt->mask & RXE_IETH_MASK) { 1145 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 1146 wc->ex.invalidate_rkey = ieth_rkey(pkt); 1147 } 1148 1149 if (pkt->mask & RXE_DETH_MASK) 1150 wc->src_qp = deth_sqp(pkt); 1151 1152 wc->port_num = qp->attr.port_num; 1153 } 1154 } 1155 1156 /* have copy for srq and reference for !srq */ 1157 if (!qp->srq) 1158 queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); 1159 1160 qp->resp.wqe = NULL; 1161 1162 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) 1163 return RESPST_ERR_CQ_OVERFLOW; 1164 1165 finish: 1166 if (unlikely(qp->resp.state == QP_STATE_ERROR)) 1167 return RESPST_CHK_RESOURCE; 1168 if (unlikely(!pkt)) 1169 return RESPST_DONE; 1170 if (qp_type(qp) == IB_QPT_RC) 1171 return RESPST_ACKNOWLEDGE; 1172 else 1173 return RESPST_CLEANUP; 1174 } 1175 1176 1177 static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, 1178 int opcode, const char *msg) 1179 { 1180 int err; 1181 struct rxe_pkt_info ack_pkt; 1182 struct sk_buff *skb; 1183 1184 skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); 1185 if (!skb) 1186 return -ENOMEM; 1187 1188 err = rxe_xmit_packet(qp, &ack_pkt, skb); 1189 if (err) 1190 rxe_dbg_qp(qp, "Failed sending %s\n", msg); 1191 1192 return err; 1193 } 1194 1195 static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1196 { 1197 return send_common_ack(qp, syndrome, psn, 1198 IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); 1199 } 1200 1201 static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1202 { 1203 int ret = send_common_ack(qp, syndrome, psn, 1204 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); 1205 1206 /* have to clear this since it is used to trigger 1207 * long read replies 1208 */ 1209 qp->resp.res = NULL; 1210 return ret; 1211 } 1212 1213 static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1214 { 1215 int ret = send_common_ack(qp, syndrome, psn, 1216 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, 1217 "RDMA READ response of length zero ACK"); 1218 1219 /* have to clear this since it is used to trigger 1220 * long read replies 1221 */ 1222 qp->resp.res = NULL; 1223 return ret; 1224 } 1225 1226 static enum resp_states acknowledge(struct rxe_qp *qp, 1227 struct rxe_pkt_info *pkt) 1228 { 1229 if (qp_type(qp) != IB_QPT_RC) 1230 return RESPST_CLEANUP; 1231 1232 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) 1233 send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); 1234 else if (pkt->mask & RXE_ATOMIC_MASK) 1235 send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1236 else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) 1237 send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1238 else if (bth_ack(pkt)) 1239 send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1240 1241 return RESPST_CLEANUP; 1242 } 1243 1244 static enum resp_states cleanup(struct rxe_qp *qp, 1245 struct rxe_pkt_info *pkt) 1246 { 1247 struct sk_buff *skb; 1248 1249 if (pkt) { 1250 skb = skb_dequeue(&qp->req_pkts); 1251 rxe_put(qp); 1252 kfree_skb(skb); 1253 ib_device_put(qp->ibqp.device); 1254 } 1255 1256 if (qp->resp.mr) { 1257 rxe_put(qp->resp.mr); 1258 qp->resp.mr = NULL; 1259 } 1260 1261 return RESPST_DONE; 1262 } 1263 1264 static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) 1265 { 1266 int i; 1267 1268 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { 1269 struct resp_res *res = &qp->resp.resources[i]; 1270 1271 if (res->type == 0) 1272 continue; 1273 1274 if (psn_compare(psn, res->first_psn) >= 0 && 1275 psn_compare(psn, res->last_psn) <= 0) { 1276 return res; 1277 } 1278 } 1279 1280 return NULL; 1281 } 1282 1283 static enum resp_states duplicate_request(struct rxe_qp *qp, 1284 struct rxe_pkt_info *pkt) 1285 { 1286 enum resp_states rc; 1287 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; 1288 1289 if (pkt->mask & RXE_SEND_MASK || 1290 pkt->mask & RXE_WRITE_MASK) { 1291 /* SEND. Ack again and cleanup. C9-105. */ 1292 send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); 1293 return RESPST_CLEANUP; 1294 } else if (pkt->mask & RXE_FLUSH_MASK) { 1295 struct resp_res *res; 1296 1297 /* Find the operation in our list of responder resources. */ 1298 res = find_resource(qp, pkt->psn); 1299 if (res) { 1300 res->replay = 1; 1301 res->cur_psn = pkt->psn; 1302 qp->resp.res = res; 1303 rc = RESPST_PROCESS_FLUSH; 1304 goto out; 1305 } 1306 1307 /* Resource not found. Class D error. Drop the request. */ 1308 rc = RESPST_CLEANUP; 1309 goto out; 1310 } else if (pkt->mask & RXE_READ_MASK) { 1311 struct resp_res *res; 1312 1313 res = find_resource(qp, pkt->psn); 1314 if (!res) { 1315 /* Resource not found. Class D error. Drop the 1316 * request. 1317 */ 1318 rc = RESPST_CLEANUP; 1319 goto out; 1320 } else { 1321 /* Ensure this new request is the same as the previous 1322 * one or a subset of it. 1323 */ 1324 u64 iova = reth_va(pkt); 1325 u32 resid = reth_len(pkt); 1326 1327 if (iova < res->read.va_org || 1328 resid > res->read.length || 1329 (iova + resid) > (res->read.va_org + 1330 res->read.length)) { 1331 rc = RESPST_CLEANUP; 1332 goto out; 1333 } 1334 1335 if (reth_rkey(pkt) != res->read.rkey) { 1336 rc = RESPST_CLEANUP; 1337 goto out; 1338 } 1339 1340 res->cur_psn = pkt->psn; 1341 res->state = (pkt->psn == res->first_psn) ? 1342 rdatm_res_state_new : 1343 rdatm_res_state_replay; 1344 res->replay = 1; 1345 1346 /* Reset the resource, except length. */ 1347 res->read.va_org = iova; 1348 res->read.va = iova; 1349 res->read.resid = resid; 1350 1351 /* Replay the RDMA read reply. */ 1352 qp->resp.res = res; 1353 rc = RESPST_READ_REPLY; 1354 goto out; 1355 } 1356 } else { 1357 struct resp_res *res; 1358 1359 /* Find the operation in our list of responder resources. */ 1360 res = find_resource(qp, pkt->psn); 1361 if (res) { 1362 res->replay = 1; 1363 res->cur_psn = pkt->psn; 1364 qp->resp.res = res; 1365 rc = pkt->mask & RXE_ATOMIC_MASK ? 1366 RESPST_ATOMIC_REPLY : 1367 RESPST_ATOMIC_WRITE_REPLY; 1368 goto out; 1369 } 1370 1371 /* Resource not found. Class D error. Drop the request. */ 1372 rc = RESPST_CLEANUP; 1373 goto out; 1374 } 1375 out: 1376 return rc; 1377 } 1378 1379 /* Process a class A or C. Both are treated the same in this implementation. */ 1380 static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, 1381 enum ib_wc_status status) 1382 { 1383 qp->resp.aeth_syndrome = syndrome; 1384 qp->resp.status = status; 1385 1386 /* indicate that we should go through the ERROR state */ 1387 qp->resp.goto_error = 1; 1388 } 1389 1390 static enum resp_states do_class_d1e_error(struct rxe_qp *qp) 1391 { 1392 /* UC */ 1393 if (qp->srq) { 1394 /* Class E */ 1395 qp->resp.drop_msg = 1; 1396 if (qp->resp.wqe) { 1397 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1398 return RESPST_COMPLETE; 1399 } else { 1400 return RESPST_CLEANUP; 1401 } 1402 } else { 1403 /* Class D1. This packet may be the start of a 1404 * new message and could be valid. The previous 1405 * message is invalid and ignored. reset the 1406 * recv wr to its original state 1407 */ 1408 if (qp->resp.wqe) { 1409 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; 1410 qp->resp.wqe->dma.cur_sge = 0; 1411 qp->resp.wqe->dma.sge_offset = 0; 1412 qp->resp.opcode = -1; 1413 } 1414 1415 if (qp->resp.mr) { 1416 rxe_put(qp->resp.mr); 1417 qp->resp.mr = NULL; 1418 } 1419 1420 return RESPST_CLEANUP; 1421 } 1422 } 1423 1424 static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) 1425 { 1426 struct sk_buff *skb; 1427 struct rxe_queue *q = qp->rq.queue; 1428 1429 while ((skb = skb_dequeue(&qp->req_pkts))) { 1430 rxe_put(qp); 1431 kfree_skb(skb); 1432 ib_device_put(qp->ibqp.device); 1433 } 1434 1435 if (notify) 1436 return; 1437 1438 while (!qp->srq && q && queue_head(q, q->type)) 1439 queue_advance_consumer(q, q->type); 1440 } 1441 1442 int rxe_responder(void *arg) 1443 { 1444 struct rxe_qp *qp = (struct rxe_qp *)arg; 1445 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1446 enum resp_states state; 1447 struct rxe_pkt_info *pkt = NULL; 1448 int ret; 1449 1450 if (!rxe_get(qp)) 1451 return -EAGAIN; 1452 1453 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1454 1455 if (!qp->valid) 1456 goto exit; 1457 1458 switch (qp->resp.state) { 1459 case QP_STATE_RESET: 1460 state = RESPST_RESET; 1461 break; 1462 1463 default: 1464 state = RESPST_GET_REQ; 1465 break; 1466 } 1467 1468 while (1) { 1469 rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); 1470 switch (state) { 1471 case RESPST_GET_REQ: 1472 state = get_req(qp, &pkt); 1473 break; 1474 case RESPST_CHK_PSN: 1475 state = check_psn(qp, pkt); 1476 break; 1477 case RESPST_CHK_OP_SEQ: 1478 state = check_op_seq(qp, pkt); 1479 break; 1480 case RESPST_CHK_OP_VALID: 1481 state = check_op_valid(qp, pkt); 1482 break; 1483 case RESPST_CHK_RESOURCE: 1484 state = check_resource(qp, pkt); 1485 break; 1486 case RESPST_CHK_LENGTH: 1487 state = rxe_resp_check_length(qp, pkt); 1488 break; 1489 case RESPST_CHK_RKEY: 1490 state = check_rkey(qp, pkt); 1491 break; 1492 case RESPST_EXECUTE: 1493 state = execute(qp, pkt); 1494 break; 1495 case RESPST_COMPLETE: 1496 state = do_complete(qp, pkt); 1497 break; 1498 case RESPST_READ_REPLY: 1499 state = read_reply(qp, pkt); 1500 break; 1501 case RESPST_ATOMIC_REPLY: 1502 state = atomic_reply(qp, pkt); 1503 break; 1504 case RESPST_ATOMIC_WRITE_REPLY: 1505 state = atomic_write_reply(qp, pkt); 1506 break; 1507 case RESPST_PROCESS_FLUSH: 1508 state = process_flush(qp, pkt); 1509 break; 1510 case RESPST_ACKNOWLEDGE: 1511 state = acknowledge(qp, pkt); 1512 break; 1513 case RESPST_CLEANUP: 1514 state = cleanup(qp, pkt); 1515 break; 1516 case RESPST_DUPLICATE_REQUEST: 1517 state = duplicate_request(qp, pkt); 1518 break; 1519 case RESPST_ERR_PSN_OUT_OF_SEQ: 1520 /* RC only - Class B. Drop packet. */ 1521 send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); 1522 state = RESPST_CLEANUP; 1523 break; 1524 1525 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: 1526 case RESPST_ERR_MISSING_OPCODE_FIRST: 1527 case RESPST_ERR_MISSING_OPCODE_LAST_C: 1528 case RESPST_ERR_UNSUPPORTED_OPCODE: 1529 case RESPST_ERR_MISALIGNED_ATOMIC: 1530 /* RC Only - Class C. */ 1531 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1532 IB_WC_REM_INV_REQ_ERR); 1533 state = RESPST_COMPLETE; 1534 break; 1535 1536 case RESPST_ERR_MISSING_OPCODE_LAST_D1E: 1537 state = do_class_d1e_error(qp); 1538 break; 1539 case RESPST_ERR_RNR: 1540 if (qp_type(qp) == IB_QPT_RC) { 1541 rxe_counter_inc(rxe, RXE_CNT_SND_RNR); 1542 /* RC - class B */ 1543 send_ack(qp, AETH_RNR_NAK | 1544 (~AETH_TYPE_MASK & 1545 qp->attr.min_rnr_timer), 1546 pkt->psn); 1547 } else { 1548 /* UD/UC - class D */ 1549 qp->resp.drop_msg = 1; 1550 } 1551 state = RESPST_CLEANUP; 1552 break; 1553 1554 case RESPST_ERR_RKEY_VIOLATION: 1555 if (qp_type(qp) == IB_QPT_RC) { 1556 /* Class C */ 1557 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, 1558 IB_WC_REM_ACCESS_ERR); 1559 state = RESPST_COMPLETE; 1560 } else { 1561 qp->resp.drop_msg = 1; 1562 if (qp->srq) { 1563 /* UC/SRQ Class D */ 1564 qp->resp.status = IB_WC_REM_ACCESS_ERR; 1565 state = RESPST_COMPLETE; 1566 } else { 1567 /* UC/non-SRQ Class E. */ 1568 state = RESPST_CLEANUP; 1569 } 1570 } 1571 break; 1572 1573 case RESPST_ERR_INVALIDATE_RKEY: 1574 /* RC - Class J. */ 1575 qp->resp.goto_error = 1; 1576 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1577 state = RESPST_COMPLETE; 1578 break; 1579 1580 case RESPST_ERR_LENGTH: 1581 if (qp_type(qp) == IB_QPT_RC) { 1582 /* Class C */ 1583 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1584 IB_WC_REM_INV_REQ_ERR); 1585 state = RESPST_COMPLETE; 1586 } else if (qp->srq) { 1587 /* UC/UD - class E */ 1588 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1589 state = RESPST_COMPLETE; 1590 } else { 1591 /* UC/UD - class D */ 1592 qp->resp.drop_msg = 1; 1593 state = RESPST_CLEANUP; 1594 } 1595 break; 1596 1597 case RESPST_ERR_MALFORMED_WQE: 1598 /* All, Class A. */ 1599 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, 1600 IB_WC_LOC_QP_OP_ERR); 1601 state = RESPST_COMPLETE; 1602 break; 1603 1604 case RESPST_ERR_CQ_OVERFLOW: 1605 /* All - Class G */ 1606 state = RESPST_ERROR; 1607 break; 1608 1609 case RESPST_DONE: 1610 if (qp->resp.goto_error) { 1611 state = RESPST_ERROR; 1612 break; 1613 } 1614 1615 goto done; 1616 1617 case RESPST_EXIT: 1618 if (qp->resp.goto_error) { 1619 state = RESPST_ERROR; 1620 break; 1621 } 1622 1623 goto exit; 1624 1625 case RESPST_RESET: 1626 rxe_drain_req_pkts(qp, false); 1627 qp->resp.wqe = NULL; 1628 goto exit; 1629 1630 case RESPST_ERROR: 1631 qp->resp.goto_error = 0; 1632 rxe_dbg_qp(qp, "moved to error state\n"); 1633 rxe_qp_error(qp); 1634 goto exit; 1635 1636 default: 1637 WARN_ON_ONCE(1); 1638 } 1639 } 1640 1641 /* A non-zero return value will cause rxe_do_task to 1642 * exit its loop and end the tasklet. A zero return 1643 * will continue looping and return to rxe_responder 1644 */ 1645 done: 1646 ret = 0; 1647 goto out; 1648 exit: 1649 ret = -EAGAIN; 1650 out: 1651 rxe_put(qp); 1652 return ret; 1653 } 1654