1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include <linux/skbuff.h> 8 9 #include "rxe.h" 10 #include "rxe_loc.h" 11 #include "rxe_queue.h" 12 13 static char *resp_state_name[] = { 14 [RESPST_NONE] = "NONE", 15 [RESPST_GET_REQ] = "GET_REQ", 16 [RESPST_CHK_PSN] = "CHK_PSN", 17 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", 18 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", 19 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", 20 [RESPST_CHK_LENGTH] = "CHK_LENGTH", 21 [RESPST_CHK_RKEY] = "CHK_RKEY", 22 [RESPST_EXECUTE] = "EXECUTE", 23 [RESPST_READ_REPLY] = "READ_REPLY", 24 [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", 25 [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", 26 [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", 27 [RESPST_COMPLETE] = "COMPLETE", 28 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", 29 [RESPST_CLEANUP] = "CLEANUP", 30 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", 31 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", 32 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", 33 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", 34 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", 35 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", 36 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", 37 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", 38 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", 39 [RESPST_ERR_RNR] = "ERR_RNR", 40 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", 41 [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION", 42 [RESPST_ERR_LENGTH] = "ERR_LENGTH", 43 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", 44 [RESPST_ERROR] = "ERROR", 45 [RESPST_DONE] = "DONE", 46 [RESPST_EXIT] = "EXIT", 47 }; 48 49 /* rxe_recv calls here to add a request packet to the input queue */ 50 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) 51 { 52 int must_sched; 53 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 54 55 skb_queue_tail(&qp->req_pkts, skb); 56 57 must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || 58 (skb_queue_len(&qp->req_pkts) > 1); 59 60 if (must_sched) 61 rxe_sched_task(&qp->resp.task); 62 else 63 rxe_run_task(&qp->resp.task); 64 } 65 66 static inline enum resp_states get_req(struct rxe_qp *qp, 67 struct rxe_pkt_info **pkt_p) 68 { 69 struct sk_buff *skb; 70 71 skb = skb_peek(&qp->req_pkts); 72 if (!skb) 73 return RESPST_EXIT; 74 75 *pkt_p = SKB_TO_PKT(skb); 76 77 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; 78 } 79 80 static enum resp_states check_psn(struct rxe_qp *qp, 81 struct rxe_pkt_info *pkt) 82 { 83 int diff = psn_compare(pkt->psn, qp->resp.psn); 84 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 85 86 switch (qp_type(qp)) { 87 case IB_QPT_RC: 88 if (diff > 0) { 89 if (qp->resp.sent_psn_nak) 90 return RESPST_CLEANUP; 91 92 qp->resp.sent_psn_nak = 1; 93 rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ); 94 return RESPST_ERR_PSN_OUT_OF_SEQ; 95 96 } else if (diff < 0) { 97 rxe_counter_inc(rxe, RXE_CNT_DUP_REQ); 98 return RESPST_DUPLICATE_REQUEST; 99 } 100 101 if (qp->resp.sent_psn_nak) 102 qp->resp.sent_psn_nak = 0; 103 104 break; 105 106 case IB_QPT_UC: 107 if (qp->resp.drop_msg || diff != 0) { 108 if (pkt->mask & RXE_START_MASK) { 109 qp->resp.drop_msg = 0; 110 return RESPST_CHK_OP_SEQ; 111 } 112 113 qp->resp.drop_msg = 1; 114 return RESPST_CLEANUP; 115 } 116 break; 117 default: 118 break; 119 } 120 121 return RESPST_CHK_OP_SEQ; 122 } 123 124 static enum resp_states check_op_seq(struct rxe_qp *qp, 125 struct rxe_pkt_info *pkt) 126 { 127 switch (qp_type(qp)) { 128 case IB_QPT_RC: 129 switch (qp->resp.opcode) { 130 case IB_OPCODE_RC_SEND_FIRST: 131 case IB_OPCODE_RC_SEND_MIDDLE: 132 switch (pkt->opcode) { 133 case IB_OPCODE_RC_SEND_MIDDLE: 134 case IB_OPCODE_RC_SEND_LAST: 135 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 136 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 137 return RESPST_CHK_OP_VALID; 138 default: 139 return RESPST_ERR_MISSING_OPCODE_LAST_C; 140 } 141 142 case IB_OPCODE_RC_RDMA_WRITE_FIRST: 143 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 144 switch (pkt->opcode) { 145 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 146 case IB_OPCODE_RC_RDMA_WRITE_LAST: 147 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 148 return RESPST_CHK_OP_VALID; 149 default: 150 return RESPST_ERR_MISSING_OPCODE_LAST_C; 151 } 152 153 default: 154 switch (pkt->opcode) { 155 case IB_OPCODE_RC_SEND_MIDDLE: 156 case IB_OPCODE_RC_SEND_LAST: 157 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 158 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 159 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 160 case IB_OPCODE_RC_RDMA_WRITE_LAST: 161 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 162 return RESPST_ERR_MISSING_OPCODE_FIRST; 163 default: 164 return RESPST_CHK_OP_VALID; 165 } 166 } 167 break; 168 169 case IB_QPT_UC: 170 switch (qp->resp.opcode) { 171 case IB_OPCODE_UC_SEND_FIRST: 172 case IB_OPCODE_UC_SEND_MIDDLE: 173 switch (pkt->opcode) { 174 case IB_OPCODE_UC_SEND_MIDDLE: 175 case IB_OPCODE_UC_SEND_LAST: 176 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 177 return RESPST_CHK_OP_VALID; 178 default: 179 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 180 } 181 182 case IB_OPCODE_UC_RDMA_WRITE_FIRST: 183 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 184 switch (pkt->opcode) { 185 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 186 case IB_OPCODE_UC_RDMA_WRITE_LAST: 187 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 188 return RESPST_CHK_OP_VALID; 189 default: 190 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 191 } 192 193 default: 194 switch (pkt->opcode) { 195 case IB_OPCODE_UC_SEND_MIDDLE: 196 case IB_OPCODE_UC_SEND_LAST: 197 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 198 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 199 case IB_OPCODE_UC_RDMA_WRITE_LAST: 200 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 201 qp->resp.drop_msg = 1; 202 return RESPST_CLEANUP; 203 default: 204 return RESPST_CHK_OP_VALID; 205 } 206 } 207 break; 208 209 default: 210 return RESPST_CHK_OP_VALID; 211 } 212 } 213 214 static bool check_qp_attr_access(struct rxe_qp *qp, 215 struct rxe_pkt_info *pkt) 216 { 217 if (((pkt->mask & RXE_READ_MASK) && 218 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || 219 ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && 220 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || 221 ((pkt->mask & RXE_ATOMIC_MASK) && 222 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 223 return false; 224 225 if (pkt->mask & RXE_FLUSH_MASK) { 226 u32 flush_type = feth_plt(pkt); 227 228 if ((flush_type & IB_FLUSH_GLOBAL && 229 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || 230 (flush_type & IB_FLUSH_PERSISTENT && 231 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) 232 return false; 233 } 234 235 return true; 236 } 237 238 static enum resp_states check_op_valid(struct rxe_qp *qp, 239 struct rxe_pkt_info *pkt) 240 { 241 switch (qp_type(qp)) { 242 case IB_QPT_RC: 243 if (!check_qp_attr_access(qp, pkt)) 244 return RESPST_ERR_UNSUPPORTED_OPCODE; 245 246 break; 247 248 case IB_QPT_UC: 249 if ((pkt->mask & RXE_WRITE_MASK) && 250 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { 251 qp->resp.drop_msg = 1; 252 return RESPST_CLEANUP; 253 } 254 255 break; 256 257 case IB_QPT_UD: 258 case IB_QPT_GSI: 259 break; 260 261 default: 262 WARN_ON_ONCE(1); 263 break; 264 } 265 266 return RESPST_CHK_RESOURCE; 267 } 268 269 static enum resp_states get_srq_wqe(struct rxe_qp *qp) 270 { 271 struct rxe_srq *srq = qp->srq; 272 struct rxe_queue *q = srq->rq.queue; 273 struct rxe_recv_wqe *wqe; 274 struct ib_event ev; 275 unsigned int count; 276 size_t size; 277 unsigned long flags; 278 279 if (srq->error) 280 return RESPST_ERR_RNR; 281 282 spin_lock_irqsave(&srq->rq.consumer_lock, flags); 283 284 wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 285 if (!wqe) { 286 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 287 return RESPST_ERR_RNR; 288 } 289 290 /* don't trust user space data */ 291 if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { 292 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 293 rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); 294 return RESPST_ERR_MALFORMED_WQE; 295 } 296 size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); 297 memcpy(&qp->resp.srq_wqe, wqe, size); 298 299 qp->resp.wqe = &qp->resp.srq_wqe.wqe; 300 queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); 301 count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); 302 303 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { 304 srq->limit = 0; 305 goto event; 306 } 307 308 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 309 return RESPST_CHK_LENGTH; 310 311 event: 312 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 313 ev.device = qp->ibqp.device; 314 ev.element.srq = qp->ibqp.srq; 315 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 316 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); 317 return RESPST_CHK_LENGTH; 318 } 319 320 static enum resp_states check_resource(struct rxe_qp *qp, 321 struct rxe_pkt_info *pkt) 322 { 323 struct rxe_srq *srq = qp->srq; 324 325 if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { 326 /* it is the requesters job to not send 327 * too many read/atomic ops, we just 328 * recycle the responder resource queue 329 */ 330 if (likely(qp->attr.max_dest_rd_atomic > 0)) 331 return RESPST_CHK_LENGTH; 332 else 333 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; 334 } 335 336 if (pkt->mask & RXE_RWR_MASK) { 337 if (srq) 338 return get_srq_wqe(qp); 339 340 qp->resp.wqe = queue_head(qp->rq.queue, 341 QUEUE_TYPE_FROM_CLIENT); 342 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 343 } 344 345 return RESPST_CHK_LENGTH; 346 } 347 348 static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, 349 struct rxe_pkt_info *pkt) 350 { 351 /* 352 * See IBA C9-92 353 * For UD QPs we only check if the packet will fit in the 354 * receive buffer later. For rmda operations additional 355 * length checks are performed in check_rkey. 356 */ 357 if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) { 358 unsigned int payload = payload_size(pkt); 359 unsigned int recv_buffer_len = 0; 360 int i; 361 362 for (i = 0; i < qp->resp.wqe->dma.num_sge; i++) 363 recv_buffer_len += qp->resp.wqe->dma.sge[i].length; 364 if (payload + 40 > recv_buffer_len) { 365 rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n"); 366 return RESPST_ERR_LENGTH; 367 } 368 } 369 370 if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || 371 (qp_type(qp) == IB_QPT_UC))) { 372 unsigned int mtu = qp->mtu; 373 unsigned int payload = payload_size(pkt); 374 375 if ((pkt->mask & RXE_START_MASK) && 376 (pkt->mask & RXE_END_MASK)) { 377 if (unlikely(payload > mtu)) { 378 rxe_dbg_qp(qp, "only packet too long"); 379 return RESPST_ERR_LENGTH; 380 } 381 } else if ((pkt->mask & RXE_START_MASK) || 382 (pkt->mask & RXE_MIDDLE_MASK)) { 383 if (unlikely(payload != mtu)) { 384 rxe_dbg_qp(qp, "first or middle packet not mtu"); 385 return RESPST_ERR_LENGTH; 386 } 387 } else if (pkt->mask & RXE_END_MASK) { 388 if (unlikely((payload == 0) || (payload > mtu))) { 389 rxe_dbg_qp(qp, "last packet zero or too long"); 390 return RESPST_ERR_LENGTH; 391 } 392 } 393 } 394 395 /* See IBA C9-94 */ 396 if (pkt->mask & RXE_RETH_MASK) { 397 if (reth_len(pkt) > (1U << 31)) { 398 rxe_dbg_qp(qp, "dma length too long"); 399 return RESPST_ERR_LENGTH; 400 } 401 } 402 403 if (pkt->mask & RXE_RDMA_OP_MASK) 404 return RESPST_CHK_RKEY; 405 else 406 return RESPST_EXECUTE; 407 } 408 409 /* if the reth length field is zero we can assume nothing 410 * about the rkey value and should not validate or use it. 411 * Instead set qp->resp.rkey to 0 which is an invalid rkey 412 * value since the minimum index part is 1. 413 */ 414 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 415 { 416 unsigned int length = reth_len(pkt); 417 418 qp->resp.va = reth_va(pkt); 419 qp->resp.offset = 0; 420 qp->resp.resid = length; 421 qp->resp.length = length; 422 if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0) 423 qp->resp.rkey = 0; 424 else 425 qp->resp.rkey = reth_rkey(pkt); 426 } 427 428 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 429 { 430 qp->resp.va = atmeth_va(pkt); 431 qp->resp.offset = 0; 432 qp->resp.rkey = atmeth_rkey(pkt); 433 qp->resp.resid = sizeof(u64); 434 } 435 436 /* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL 437 * if an invalid rkey is received or the rdma length is zero. For middle 438 * or last packets use the stored value of mr. 439 */ 440 static enum resp_states check_rkey(struct rxe_qp *qp, 441 struct rxe_pkt_info *pkt) 442 { 443 struct rxe_mr *mr = NULL; 444 struct rxe_mw *mw = NULL; 445 u64 va; 446 u32 rkey; 447 u32 resid; 448 u32 pktlen; 449 int mtu = qp->mtu; 450 enum resp_states state; 451 int access = 0; 452 453 /* parse RETH or ATMETH header for first/only packets 454 * for va, length, rkey, etc. or use current value for 455 * middle/last packets. 456 */ 457 if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 458 if (pkt->mask & RXE_RETH_MASK) 459 qp_resp_from_reth(qp, pkt); 460 461 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ 462 : IB_ACCESS_REMOTE_WRITE; 463 } else if (pkt->mask & RXE_FLUSH_MASK) { 464 u32 flush_type = feth_plt(pkt); 465 466 if (pkt->mask & RXE_RETH_MASK) 467 qp_resp_from_reth(qp, pkt); 468 469 if (flush_type & IB_FLUSH_GLOBAL) 470 access |= IB_ACCESS_FLUSH_GLOBAL; 471 if (flush_type & IB_FLUSH_PERSISTENT) 472 access |= IB_ACCESS_FLUSH_PERSISTENT; 473 } else if (pkt->mask & RXE_ATOMIC_MASK) { 474 qp_resp_from_atmeth(qp, pkt); 475 access = IB_ACCESS_REMOTE_ATOMIC; 476 } else { 477 /* shouldn't happen */ 478 WARN_ON(1); 479 } 480 481 /* A zero-byte read or write op is not required to 482 * set an addr or rkey. See C9-88 483 */ 484 if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && 485 (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { 486 qp->resp.mr = NULL; 487 return RESPST_EXECUTE; 488 } 489 490 va = qp->resp.va; 491 rkey = qp->resp.rkey; 492 resid = qp->resp.resid; 493 pktlen = payload_size(pkt); 494 495 if (rkey_is_mw(rkey)) { 496 mw = rxe_lookup_mw(qp, access, rkey); 497 if (!mw) { 498 rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); 499 state = RESPST_ERR_RKEY_VIOLATION; 500 goto err; 501 } 502 503 mr = mw->mr; 504 if (!mr) { 505 rxe_dbg_qp(qp, "MW doesn't have an MR\n"); 506 state = RESPST_ERR_RKEY_VIOLATION; 507 goto err; 508 } 509 510 if (mw->access & IB_ZERO_BASED) 511 qp->resp.offset = mw->addr; 512 513 rxe_get(mr); 514 rxe_put(mw); 515 mw = NULL; 516 } else { 517 mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); 518 if (!mr) { 519 rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); 520 state = RESPST_ERR_RKEY_VIOLATION; 521 goto err; 522 } 523 } 524 525 if (pkt->mask & RXE_FLUSH_MASK) { 526 /* FLUSH MR may not set va or resid 527 * no need to check range since we will flush whole mr 528 */ 529 if (feth_sel(pkt) == IB_FLUSH_MR) 530 goto skip_check_range; 531 } 532 533 if (mr_check_range(mr, va + qp->resp.offset, resid)) { 534 state = RESPST_ERR_RKEY_VIOLATION; 535 goto err; 536 } 537 538 skip_check_range: 539 if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 540 if (resid > mtu) { 541 if (pktlen != mtu || bth_pad(pkt)) { 542 state = RESPST_ERR_LENGTH; 543 goto err; 544 } 545 } else { 546 if (pktlen != resid) { 547 state = RESPST_ERR_LENGTH; 548 goto err; 549 } 550 if ((bth_pad(pkt) != (0x3 & (-resid)))) { 551 /* This case may not be exactly that 552 * but nothing else fits. 553 */ 554 state = RESPST_ERR_LENGTH; 555 goto err; 556 } 557 } 558 } 559 560 WARN_ON_ONCE(qp->resp.mr); 561 562 qp->resp.mr = mr; 563 return RESPST_EXECUTE; 564 565 err: 566 qp->resp.mr = NULL; 567 if (mr) 568 rxe_put(mr); 569 if (mw) 570 rxe_put(mw); 571 572 return state; 573 } 574 575 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, 576 int data_len) 577 { 578 int err; 579 580 err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, 581 data_addr, data_len, RXE_TO_MR_OBJ); 582 if (unlikely(err)) 583 return (err == -ENOSPC) ? RESPST_ERR_LENGTH 584 : RESPST_ERR_MALFORMED_WQE; 585 586 return RESPST_NONE; 587 } 588 589 static enum resp_states write_data_in(struct rxe_qp *qp, 590 struct rxe_pkt_info *pkt) 591 { 592 enum resp_states rc = RESPST_NONE; 593 int err; 594 int data_len = payload_size(pkt); 595 596 err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, 597 payload_addr(pkt), data_len, RXE_TO_MR_OBJ); 598 if (err) { 599 rc = RESPST_ERR_RKEY_VIOLATION; 600 goto out; 601 } 602 603 qp->resp.va += data_len; 604 qp->resp.resid -= data_len; 605 606 out: 607 return rc; 608 } 609 610 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, 611 struct rxe_pkt_info *pkt, 612 int type) 613 { 614 struct resp_res *res; 615 u32 pkts; 616 617 res = &qp->resp.resources[qp->resp.res_head]; 618 rxe_advance_resp_resource(qp); 619 free_rd_atomic_resource(res); 620 621 res->type = type; 622 res->replay = 0; 623 624 switch (type) { 625 case RXE_READ_MASK: 626 res->read.va = qp->resp.va + qp->resp.offset; 627 res->read.va_org = qp->resp.va + qp->resp.offset; 628 res->read.resid = qp->resp.resid; 629 res->read.length = qp->resp.resid; 630 res->read.rkey = qp->resp.rkey; 631 632 pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); 633 res->first_psn = pkt->psn; 634 res->cur_psn = pkt->psn; 635 res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; 636 637 res->state = rdatm_res_state_new; 638 break; 639 case RXE_ATOMIC_MASK: 640 case RXE_ATOMIC_WRITE_MASK: 641 res->first_psn = pkt->psn; 642 res->last_psn = pkt->psn; 643 res->cur_psn = pkt->psn; 644 break; 645 case RXE_FLUSH_MASK: 646 res->flush.va = qp->resp.va + qp->resp.offset; 647 res->flush.length = qp->resp.length; 648 res->flush.type = feth_plt(pkt); 649 res->flush.level = feth_sel(pkt); 650 } 651 652 return res; 653 } 654 655 static enum resp_states process_flush(struct rxe_qp *qp, 656 struct rxe_pkt_info *pkt) 657 { 658 u64 length, start; 659 struct rxe_mr *mr = qp->resp.mr; 660 struct resp_res *res = qp->resp.res; 661 662 /* oA19-14, oA19-15 */ 663 if (res && res->replay) 664 return RESPST_ACKNOWLEDGE; 665 else if (!res) { 666 res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); 667 qp->resp.res = res; 668 } 669 670 if (res->flush.level == IB_FLUSH_RANGE) { 671 start = res->flush.va; 672 length = res->flush.length; 673 } else { /* level == IB_FLUSH_MR */ 674 start = mr->ibmr.iova; 675 length = mr->ibmr.length; 676 } 677 678 if (res->flush.type & IB_FLUSH_PERSISTENT) { 679 if (rxe_flush_pmem_iova(mr, start, length)) 680 return RESPST_ERR_RKEY_VIOLATION; 681 /* Make data persistent. */ 682 wmb(); 683 } else if (res->flush.type & IB_FLUSH_GLOBAL) { 684 /* Make data global visibility. */ 685 wmb(); 686 } 687 688 qp->resp.msn++; 689 690 /* next expected psn, read handles this separately */ 691 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 692 qp->resp.ack_psn = qp->resp.psn; 693 694 qp->resp.opcode = pkt->opcode; 695 qp->resp.status = IB_WC_SUCCESS; 696 697 return RESPST_ACKNOWLEDGE; 698 } 699 700 static enum resp_states atomic_reply(struct rxe_qp *qp, 701 struct rxe_pkt_info *pkt) 702 { 703 struct rxe_mr *mr = qp->resp.mr; 704 struct resp_res *res = qp->resp.res; 705 int err; 706 707 if (!res) { 708 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); 709 qp->resp.res = res; 710 } 711 712 if (!res->replay) { 713 u64 iova = qp->resp.va + qp->resp.offset; 714 715 err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, 716 atmeth_comp(pkt), 717 atmeth_swap_add(pkt), 718 &res->atomic.orig_val); 719 if (err) 720 return err; 721 722 qp->resp.msn++; 723 724 /* next expected psn, read handles this separately */ 725 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 726 qp->resp.ack_psn = qp->resp.psn; 727 728 qp->resp.opcode = pkt->opcode; 729 qp->resp.status = IB_WC_SUCCESS; 730 } 731 732 return RESPST_ACKNOWLEDGE; 733 } 734 735 static enum resp_states atomic_write_reply(struct rxe_qp *qp, 736 struct rxe_pkt_info *pkt) 737 { 738 struct resp_res *res = qp->resp.res; 739 struct rxe_mr *mr; 740 u64 value; 741 u64 iova; 742 int err; 743 744 if (!res) { 745 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); 746 qp->resp.res = res; 747 } 748 749 if (res->replay) 750 return RESPST_ACKNOWLEDGE; 751 752 mr = qp->resp.mr; 753 value = *(u64 *)payload_addr(pkt); 754 iova = qp->resp.va + qp->resp.offset; 755 756 err = rxe_mr_do_atomic_write(mr, iova, value); 757 if (err) 758 return err; 759 760 qp->resp.resid = 0; 761 qp->resp.msn++; 762 763 /* next expected psn, read handles this separately */ 764 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 765 qp->resp.ack_psn = qp->resp.psn; 766 767 qp->resp.opcode = pkt->opcode; 768 qp->resp.status = IB_WC_SUCCESS; 769 770 return RESPST_ACKNOWLEDGE; 771 } 772 773 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, 774 struct rxe_pkt_info *ack, 775 int opcode, 776 int payload, 777 u32 psn, 778 u8 syndrome) 779 { 780 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 781 struct sk_buff *skb; 782 int paylen; 783 int pad; 784 int err; 785 786 /* 787 * allocate packet 788 */ 789 pad = (-payload) & 0x3; 790 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 791 792 skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack); 793 if (!skb) 794 return NULL; 795 796 ack->qp = qp; 797 ack->opcode = opcode; 798 ack->mask = rxe_opcode[opcode].mask; 799 ack->paylen = paylen; 800 ack->psn = psn; 801 802 bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL, 803 qp->attr.dest_qp_num, 0, psn); 804 805 if (ack->mask & RXE_AETH_MASK) { 806 aeth_set_syn(ack, syndrome); 807 aeth_set_msn(ack, qp->resp.msn); 808 } 809 810 if (ack->mask & RXE_ATMACK_MASK) 811 atmack_set_orig(ack, qp->resp.res->atomic.orig_val); 812 813 err = rxe_prepare(&qp->pri_av, ack, skb); 814 if (err) { 815 kfree_skb(skb); 816 return NULL; 817 } 818 819 return skb; 820 } 821 822 /** 823 * rxe_recheck_mr - revalidate MR from rkey and get a reference 824 * @qp: the qp 825 * @rkey: the rkey 826 * 827 * This code allows the MR to be invalidated or deregistered or 828 * the MW if one was used to be invalidated or deallocated. 829 * It is assumed that the access permissions if originally good 830 * are OK and the mappings to be unchanged. 831 * 832 * TODO: If someone reregisters an MR to change its size or 833 * access permissions during the processing of an RDMA read 834 * we should kill the responder resource and complete the 835 * operation with an error. 836 * 837 * Return: mr on success else NULL 838 */ 839 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) 840 { 841 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 842 struct rxe_mr *mr; 843 struct rxe_mw *mw; 844 845 if (rkey_is_mw(rkey)) { 846 mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); 847 if (!mw) 848 return NULL; 849 850 mr = mw->mr; 851 if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || 852 !mr || mr->state != RXE_MR_STATE_VALID) { 853 rxe_put(mw); 854 return NULL; 855 } 856 857 rxe_get(mr); 858 rxe_put(mw); 859 860 return mr; 861 } 862 863 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 864 if (!mr) 865 return NULL; 866 867 if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { 868 rxe_put(mr); 869 return NULL; 870 } 871 872 return mr; 873 } 874 875 /* RDMA read response. If res is not NULL, then we have a current RDMA request 876 * being processed or replayed. 877 */ 878 static enum resp_states read_reply(struct rxe_qp *qp, 879 struct rxe_pkt_info *req_pkt) 880 { 881 struct rxe_pkt_info ack_pkt; 882 struct sk_buff *skb; 883 int mtu = qp->mtu; 884 enum resp_states state; 885 int payload; 886 int opcode; 887 int err; 888 struct resp_res *res = qp->resp.res; 889 struct rxe_mr *mr; 890 891 if (!res) { 892 res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); 893 qp->resp.res = res; 894 } 895 896 if (res->state == rdatm_res_state_new) { 897 if (!res->replay || qp->resp.length == 0) { 898 /* if length == 0 mr will be NULL (is ok) 899 * otherwise qp->resp.mr holds a ref on mr 900 * which we transfer to mr and drop below. 901 */ 902 mr = qp->resp.mr; 903 qp->resp.mr = NULL; 904 } else { 905 mr = rxe_recheck_mr(qp, res->read.rkey); 906 if (!mr) 907 return RESPST_ERR_RKEY_VIOLATION; 908 } 909 910 if (res->read.resid <= mtu) 911 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; 912 else 913 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; 914 } else { 915 /* re-lookup mr from rkey on all later packets. 916 * length will be non-zero. This can fail if someone 917 * modifies or destroys the mr since the first packet. 918 */ 919 mr = rxe_recheck_mr(qp, res->read.rkey); 920 if (!mr) 921 return RESPST_ERR_RKEY_VIOLATION; 922 923 if (res->read.resid > mtu) 924 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; 925 else 926 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; 927 } 928 929 res->state = rdatm_res_state_next; 930 931 payload = min_t(int, res->read.resid, mtu); 932 933 skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, 934 res->cur_psn, AETH_ACK_UNLIMITED); 935 if (!skb) { 936 state = RESPST_ERR_RNR; 937 goto err_out; 938 } 939 940 err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), 941 payload, RXE_FROM_MR_OBJ); 942 if (err) { 943 kfree_skb(skb); 944 state = RESPST_ERR_RKEY_VIOLATION; 945 goto err_out; 946 } 947 948 if (bth_pad(&ack_pkt)) { 949 u8 *pad = payload_addr(&ack_pkt) + payload; 950 951 memset(pad, 0, bth_pad(&ack_pkt)); 952 } 953 954 /* rxe_xmit_packet always consumes the skb */ 955 err = rxe_xmit_packet(qp, &ack_pkt, skb); 956 if (err) { 957 state = RESPST_ERR_RNR; 958 goto err_out; 959 } 960 961 res->read.va += payload; 962 res->read.resid -= payload; 963 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; 964 965 if (res->read.resid > 0) { 966 state = RESPST_DONE; 967 } else { 968 qp->resp.res = NULL; 969 if (!res->replay) 970 qp->resp.opcode = -1; 971 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 972 qp->resp.psn = res->cur_psn; 973 state = RESPST_CLEANUP; 974 } 975 976 err_out: 977 if (mr) 978 rxe_put(mr); 979 return state; 980 } 981 982 static int invalidate_rkey(struct rxe_qp *qp, u32 rkey) 983 { 984 if (rkey_is_mw(rkey)) 985 return rxe_invalidate_mw(qp, rkey); 986 else 987 return rxe_invalidate_mr(qp, rkey); 988 } 989 990 /* Executes a new request. A retried request never reach that function (send 991 * and writes are discarded, and reads and atomics are retried elsewhere. 992 */ 993 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 994 { 995 enum resp_states err; 996 struct sk_buff *skb = PKT_TO_SKB(pkt); 997 union rdma_network_hdr hdr; 998 999 if (pkt->mask & RXE_SEND_MASK) { 1000 if (qp_type(qp) == IB_QPT_UD || 1001 qp_type(qp) == IB_QPT_GSI) { 1002 if (skb->protocol == htons(ETH_P_IP)) { 1003 memset(&hdr.reserved, 0, 1004 sizeof(hdr.reserved)); 1005 memcpy(&hdr.roce4grh, ip_hdr(skb), 1006 sizeof(hdr.roce4grh)); 1007 err = send_data_in(qp, &hdr, sizeof(hdr)); 1008 } else { 1009 err = send_data_in(qp, ipv6_hdr(skb), 1010 sizeof(hdr)); 1011 } 1012 if (err) 1013 return err; 1014 } 1015 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); 1016 if (err) 1017 return err; 1018 } else if (pkt->mask & RXE_WRITE_MASK) { 1019 err = write_data_in(qp, pkt); 1020 if (err) 1021 return err; 1022 } else if (pkt->mask & RXE_READ_MASK) { 1023 /* For RDMA Read we can increment the msn now. See C9-148. */ 1024 qp->resp.msn++; 1025 return RESPST_READ_REPLY; 1026 } else if (pkt->mask & RXE_ATOMIC_MASK) { 1027 return RESPST_ATOMIC_REPLY; 1028 } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { 1029 return RESPST_ATOMIC_WRITE_REPLY; 1030 } else if (pkt->mask & RXE_FLUSH_MASK) { 1031 return RESPST_PROCESS_FLUSH; 1032 } else { 1033 /* Unreachable */ 1034 WARN_ON_ONCE(1); 1035 } 1036 1037 if (pkt->mask & RXE_IETH_MASK) { 1038 u32 rkey = ieth_rkey(pkt); 1039 1040 err = invalidate_rkey(qp, rkey); 1041 if (err) 1042 return RESPST_ERR_INVALIDATE_RKEY; 1043 } 1044 1045 if (pkt->mask & RXE_END_MASK) 1046 /* We successfully processed this new request. */ 1047 qp->resp.msn++; 1048 1049 /* next expected psn, read handles this separately */ 1050 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 1051 qp->resp.ack_psn = qp->resp.psn; 1052 1053 qp->resp.opcode = pkt->opcode; 1054 qp->resp.status = IB_WC_SUCCESS; 1055 1056 if (pkt->mask & RXE_COMP_MASK) 1057 return RESPST_COMPLETE; 1058 else if (qp_type(qp) == IB_QPT_RC) 1059 return RESPST_ACKNOWLEDGE; 1060 else 1061 return RESPST_CLEANUP; 1062 } 1063 1064 static enum resp_states do_complete(struct rxe_qp *qp, 1065 struct rxe_pkt_info *pkt) 1066 { 1067 struct rxe_cqe cqe; 1068 struct ib_wc *wc = &cqe.ibwc; 1069 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1070 struct rxe_recv_wqe *wqe = qp->resp.wqe; 1071 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1072 unsigned long flags; 1073 1074 if (!wqe) 1075 goto finish; 1076 1077 memset(&cqe, 0, sizeof(cqe)); 1078 1079 if (qp->rcq->is_user) { 1080 uwc->status = qp->resp.status; 1081 uwc->qp_num = qp->ibqp.qp_num; 1082 uwc->wr_id = wqe->wr_id; 1083 } else { 1084 wc->status = qp->resp.status; 1085 wc->qp = &qp->ibqp; 1086 wc->wr_id = wqe->wr_id; 1087 } 1088 1089 if (wc->status == IB_WC_SUCCESS) { 1090 rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); 1091 wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 1092 pkt->mask & RXE_WRITE_MASK) ? 1093 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 1094 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && 1095 pkt->mask & RXE_WRITE_MASK) ? 1096 qp->resp.length : wqe->dma.length - wqe->dma.resid; 1097 1098 /* fields after byte_len are different between kernel and user 1099 * space 1100 */ 1101 if (qp->rcq->is_user) { 1102 uwc->wc_flags = IB_WC_GRH; 1103 1104 if (pkt->mask & RXE_IMMDT_MASK) { 1105 uwc->wc_flags |= IB_WC_WITH_IMM; 1106 uwc->ex.imm_data = immdt_imm(pkt); 1107 } 1108 1109 if (pkt->mask & RXE_IETH_MASK) { 1110 uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 1111 uwc->ex.invalidate_rkey = ieth_rkey(pkt); 1112 } 1113 1114 if (pkt->mask & RXE_DETH_MASK) 1115 uwc->src_qp = deth_sqp(pkt); 1116 1117 uwc->port_num = qp->attr.port_num; 1118 } else { 1119 struct sk_buff *skb = PKT_TO_SKB(pkt); 1120 1121 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; 1122 if (skb->protocol == htons(ETH_P_IP)) 1123 wc->network_hdr_type = RDMA_NETWORK_IPV4; 1124 else 1125 wc->network_hdr_type = RDMA_NETWORK_IPV6; 1126 1127 if (is_vlan_dev(skb->dev)) { 1128 wc->wc_flags |= IB_WC_WITH_VLAN; 1129 wc->vlan_id = vlan_dev_vlan_id(skb->dev); 1130 } 1131 1132 if (pkt->mask & RXE_IMMDT_MASK) { 1133 wc->wc_flags |= IB_WC_WITH_IMM; 1134 wc->ex.imm_data = immdt_imm(pkt); 1135 } 1136 1137 if (pkt->mask & RXE_IETH_MASK) { 1138 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 1139 wc->ex.invalidate_rkey = ieth_rkey(pkt); 1140 } 1141 1142 if (pkt->mask & RXE_DETH_MASK) 1143 wc->src_qp = deth_sqp(pkt); 1144 1145 wc->port_num = qp->attr.port_num; 1146 } 1147 } else { 1148 if (wc->status != IB_WC_WR_FLUSH_ERR) 1149 rxe_err_qp(qp, "non-flush error status = %d", 1150 wc->status); 1151 } 1152 1153 /* have copy for srq and reference for !srq */ 1154 if (!qp->srq) 1155 queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); 1156 1157 qp->resp.wqe = NULL; 1158 1159 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) 1160 return RESPST_ERR_CQ_OVERFLOW; 1161 1162 finish: 1163 spin_lock_irqsave(&qp->state_lock, flags); 1164 if (unlikely(qp_state(qp) == IB_QPS_ERR)) { 1165 spin_unlock_irqrestore(&qp->state_lock, flags); 1166 return RESPST_CHK_RESOURCE; 1167 } 1168 spin_unlock_irqrestore(&qp->state_lock, flags); 1169 1170 if (unlikely(!pkt)) 1171 return RESPST_DONE; 1172 if (qp_type(qp) == IB_QPT_RC) 1173 return RESPST_ACKNOWLEDGE; 1174 else 1175 return RESPST_CLEANUP; 1176 } 1177 1178 1179 static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, 1180 int opcode, const char *msg) 1181 { 1182 int err; 1183 struct rxe_pkt_info ack_pkt; 1184 struct sk_buff *skb; 1185 1186 skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); 1187 if (!skb) 1188 return -ENOMEM; 1189 1190 err = rxe_xmit_packet(qp, &ack_pkt, skb); 1191 if (err) 1192 rxe_dbg_qp(qp, "Failed sending %s\n", msg); 1193 1194 return err; 1195 } 1196 1197 static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1198 { 1199 return send_common_ack(qp, syndrome, psn, 1200 IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); 1201 } 1202 1203 static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1204 { 1205 int ret = send_common_ack(qp, syndrome, psn, 1206 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); 1207 1208 /* have to clear this since it is used to trigger 1209 * long read replies 1210 */ 1211 qp->resp.res = NULL; 1212 return ret; 1213 } 1214 1215 static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1216 { 1217 int ret = send_common_ack(qp, syndrome, psn, 1218 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, 1219 "RDMA READ response of length zero ACK"); 1220 1221 /* have to clear this since it is used to trigger 1222 * long read replies 1223 */ 1224 qp->resp.res = NULL; 1225 return ret; 1226 } 1227 1228 static enum resp_states acknowledge(struct rxe_qp *qp, 1229 struct rxe_pkt_info *pkt) 1230 { 1231 if (qp_type(qp) != IB_QPT_RC) 1232 return RESPST_CLEANUP; 1233 1234 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) 1235 send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); 1236 else if (pkt->mask & RXE_ATOMIC_MASK) 1237 send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1238 else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) 1239 send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1240 else if (bth_ack(pkt)) 1241 send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1242 1243 return RESPST_CLEANUP; 1244 } 1245 1246 static enum resp_states cleanup(struct rxe_qp *qp, 1247 struct rxe_pkt_info *pkt) 1248 { 1249 struct sk_buff *skb; 1250 1251 if (pkt) { 1252 skb = skb_dequeue(&qp->req_pkts); 1253 rxe_put(qp); 1254 kfree_skb(skb); 1255 ib_device_put(qp->ibqp.device); 1256 } 1257 1258 if (qp->resp.mr) { 1259 rxe_put(qp->resp.mr); 1260 qp->resp.mr = NULL; 1261 } 1262 1263 return RESPST_DONE; 1264 } 1265 1266 static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) 1267 { 1268 int i; 1269 1270 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { 1271 struct resp_res *res = &qp->resp.resources[i]; 1272 1273 if (res->type == 0) 1274 continue; 1275 1276 if (psn_compare(psn, res->first_psn) >= 0 && 1277 psn_compare(psn, res->last_psn) <= 0) { 1278 return res; 1279 } 1280 } 1281 1282 return NULL; 1283 } 1284 1285 static enum resp_states duplicate_request(struct rxe_qp *qp, 1286 struct rxe_pkt_info *pkt) 1287 { 1288 enum resp_states rc; 1289 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; 1290 1291 if (pkt->mask & RXE_SEND_MASK || 1292 pkt->mask & RXE_WRITE_MASK) { 1293 /* SEND. Ack again and cleanup. C9-105. */ 1294 send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); 1295 return RESPST_CLEANUP; 1296 } else if (pkt->mask & RXE_FLUSH_MASK) { 1297 struct resp_res *res; 1298 1299 /* Find the operation in our list of responder resources. */ 1300 res = find_resource(qp, pkt->psn); 1301 if (res) { 1302 res->replay = 1; 1303 res->cur_psn = pkt->psn; 1304 qp->resp.res = res; 1305 rc = RESPST_PROCESS_FLUSH; 1306 goto out; 1307 } 1308 1309 /* Resource not found. Class D error. Drop the request. */ 1310 rc = RESPST_CLEANUP; 1311 goto out; 1312 } else if (pkt->mask & RXE_READ_MASK) { 1313 struct resp_res *res; 1314 1315 res = find_resource(qp, pkt->psn); 1316 if (!res) { 1317 /* Resource not found. Class D error. Drop the 1318 * request. 1319 */ 1320 rc = RESPST_CLEANUP; 1321 goto out; 1322 } else { 1323 /* Ensure this new request is the same as the previous 1324 * one or a subset of it. 1325 */ 1326 u64 iova = reth_va(pkt); 1327 u32 resid = reth_len(pkt); 1328 1329 if (iova < res->read.va_org || 1330 resid > res->read.length || 1331 (iova + resid) > (res->read.va_org + 1332 res->read.length)) { 1333 rc = RESPST_CLEANUP; 1334 goto out; 1335 } 1336 1337 if (reth_rkey(pkt) != res->read.rkey) { 1338 rc = RESPST_CLEANUP; 1339 goto out; 1340 } 1341 1342 res->cur_psn = pkt->psn; 1343 res->state = (pkt->psn == res->first_psn) ? 1344 rdatm_res_state_new : 1345 rdatm_res_state_replay; 1346 res->replay = 1; 1347 1348 /* Reset the resource, except length. */ 1349 res->read.va_org = iova; 1350 res->read.va = iova; 1351 res->read.resid = resid; 1352 1353 /* Replay the RDMA read reply. */ 1354 qp->resp.res = res; 1355 rc = RESPST_READ_REPLY; 1356 goto out; 1357 } 1358 } else { 1359 struct resp_res *res; 1360 1361 /* Find the operation in our list of responder resources. */ 1362 res = find_resource(qp, pkt->psn); 1363 if (res) { 1364 res->replay = 1; 1365 res->cur_psn = pkt->psn; 1366 qp->resp.res = res; 1367 rc = pkt->mask & RXE_ATOMIC_MASK ? 1368 RESPST_ATOMIC_REPLY : 1369 RESPST_ATOMIC_WRITE_REPLY; 1370 goto out; 1371 } 1372 1373 /* Resource not found. Class D error. Drop the request. */ 1374 rc = RESPST_CLEANUP; 1375 goto out; 1376 } 1377 out: 1378 return rc; 1379 } 1380 1381 /* Process a class A or C. Both are treated the same in this implementation. */ 1382 static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, 1383 enum ib_wc_status status) 1384 { 1385 qp->resp.aeth_syndrome = syndrome; 1386 qp->resp.status = status; 1387 1388 /* indicate that we should go through the ERROR state */ 1389 qp->resp.goto_error = 1; 1390 } 1391 1392 static enum resp_states do_class_d1e_error(struct rxe_qp *qp) 1393 { 1394 /* UC */ 1395 if (qp->srq) { 1396 /* Class E */ 1397 qp->resp.drop_msg = 1; 1398 if (qp->resp.wqe) { 1399 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1400 return RESPST_COMPLETE; 1401 } else { 1402 return RESPST_CLEANUP; 1403 } 1404 } else { 1405 /* Class D1. This packet may be the start of a 1406 * new message and could be valid. The previous 1407 * message is invalid and ignored. reset the 1408 * recv wr to its original state 1409 */ 1410 if (qp->resp.wqe) { 1411 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; 1412 qp->resp.wqe->dma.cur_sge = 0; 1413 qp->resp.wqe->dma.sge_offset = 0; 1414 qp->resp.opcode = -1; 1415 } 1416 1417 if (qp->resp.mr) { 1418 rxe_put(qp->resp.mr); 1419 qp->resp.mr = NULL; 1420 } 1421 1422 return RESPST_CLEANUP; 1423 } 1424 } 1425 1426 /* drain incoming request packet queue */ 1427 static void drain_req_pkts(struct rxe_qp *qp) 1428 { 1429 struct sk_buff *skb; 1430 1431 while ((skb = skb_dequeue(&qp->req_pkts))) { 1432 rxe_put(qp); 1433 kfree_skb(skb); 1434 ib_device_put(qp->ibqp.device); 1435 } 1436 } 1437 1438 /* complete receive wqe with flush error */ 1439 static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe) 1440 { 1441 struct rxe_cqe cqe = {}; 1442 struct ib_wc *wc = &cqe.ibwc; 1443 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1444 int err; 1445 1446 if (qp->rcq->is_user) { 1447 uwc->wr_id = wqe->wr_id; 1448 uwc->status = IB_WC_WR_FLUSH_ERR; 1449 uwc->qp_num = qp_num(qp); 1450 } else { 1451 wc->wr_id = wqe->wr_id; 1452 wc->status = IB_WC_WR_FLUSH_ERR; 1453 wc->qp = &qp->ibqp; 1454 } 1455 1456 err = rxe_cq_post(qp->rcq, &cqe, 0); 1457 if (err) 1458 rxe_dbg_cq(qp->rcq, "post cq failed err = %d", err); 1459 1460 return err; 1461 } 1462 1463 /* drain and optionally complete the recive queue 1464 * if unable to complete a wqe stop completing and 1465 * just flush the remaining wqes 1466 */ 1467 static void flush_recv_queue(struct rxe_qp *qp, bool notify) 1468 { 1469 struct rxe_queue *q = qp->rq.queue; 1470 struct rxe_recv_wqe *wqe; 1471 int err; 1472 1473 if (qp->srq) { 1474 if (notify && qp->ibqp.event_handler) { 1475 struct ib_event ev; 1476 1477 ev.device = qp->ibqp.device; 1478 ev.element.qp = &qp->ibqp; 1479 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 1480 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 1481 } 1482 return; 1483 } 1484 1485 /* recv queue not created. nothing to do. */ 1486 if (!qp->rq.queue) 1487 return; 1488 1489 while ((wqe = queue_head(q, q->type))) { 1490 if (notify) { 1491 err = flush_recv_wqe(qp, wqe); 1492 if (err) 1493 notify = 0; 1494 } 1495 queue_advance_consumer(q, q->type); 1496 } 1497 1498 qp->resp.wqe = NULL; 1499 } 1500 1501 int rxe_responder(struct rxe_qp *qp) 1502 { 1503 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1504 enum resp_states state; 1505 struct rxe_pkt_info *pkt = NULL; 1506 int ret; 1507 unsigned long flags; 1508 1509 spin_lock_irqsave(&qp->state_lock, flags); 1510 if (!qp->valid || qp_state(qp) == IB_QPS_ERR || 1511 qp_state(qp) == IB_QPS_RESET) { 1512 bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); 1513 1514 drain_req_pkts(qp); 1515 flush_recv_queue(qp, notify); 1516 spin_unlock_irqrestore(&qp->state_lock, flags); 1517 goto exit; 1518 } 1519 spin_unlock_irqrestore(&qp->state_lock, flags); 1520 1521 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1522 1523 state = RESPST_GET_REQ; 1524 1525 while (1) { 1526 rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); 1527 switch (state) { 1528 case RESPST_GET_REQ: 1529 state = get_req(qp, &pkt); 1530 break; 1531 case RESPST_CHK_PSN: 1532 state = check_psn(qp, pkt); 1533 break; 1534 case RESPST_CHK_OP_SEQ: 1535 state = check_op_seq(qp, pkt); 1536 break; 1537 case RESPST_CHK_OP_VALID: 1538 state = check_op_valid(qp, pkt); 1539 break; 1540 case RESPST_CHK_RESOURCE: 1541 state = check_resource(qp, pkt); 1542 break; 1543 case RESPST_CHK_LENGTH: 1544 state = rxe_resp_check_length(qp, pkt); 1545 break; 1546 case RESPST_CHK_RKEY: 1547 state = check_rkey(qp, pkt); 1548 break; 1549 case RESPST_EXECUTE: 1550 state = execute(qp, pkt); 1551 break; 1552 case RESPST_COMPLETE: 1553 state = do_complete(qp, pkt); 1554 break; 1555 case RESPST_READ_REPLY: 1556 state = read_reply(qp, pkt); 1557 break; 1558 case RESPST_ATOMIC_REPLY: 1559 state = atomic_reply(qp, pkt); 1560 break; 1561 case RESPST_ATOMIC_WRITE_REPLY: 1562 state = atomic_write_reply(qp, pkt); 1563 break; 1564 case RESPST_PROCESS_FLUSH: 1565 state = process_flush(qp, pkt); 1566 break; 1567 case RESPST_ACKNOWLEDGE: 1568 state = acknowledge(qp, pkt); 1569 break; 1570 case RESPST_CLEANUP: 1571 state = cleanup(qp, pkt); 1572 break; 1573 case RESPST_DUPLICATE_REQUEST: 1574 state = duplicate_request(qp, pkt); 1575 break; 1576 case RESPST_ERR_PSN_OUT_OF_SEQ: 1577 /* RC only - Class B. Drop packet. */ 1578 send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); 1579 state = RESPST_CLEANUP; 1580 break; 1581 1582 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: 1583 case RESPST_ERR_MISSING_OPCODE_FIRST: 1584 case RESPST_ERR_MISSING_OPCODE_LAST_C: 1585 case RESPST_ERR_UNSUPPORTED_OPCODE: 1586 case RESPST_ERR_MISALIGNED_ATOMIC: 1587 /* RC Only - Class C. */ 1588 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1589 IB_WC_REM_INV_REQ_ERR); 1590 state = RESPST_COMPLETE; 1591 break; 1592 1593 case RESPST_ERR_MISSING_OPCODE_LAST_D1E: 1594 state = do_class_d1e_error(qp); 1595 break; 1596 case RESPST_ERR_RNR: 1597 if (qp_type(qp) == IB_QPT_RC) { 1598 rxe_counter_inc(rxe, RXE_CNT_SND_RNR); 1599 /* RC - class B */ 1600 send_ack(qp, AETH_RNR_NAK | 1601 (~AETH_TYPE_MASK & 1602 qp->attr.min_rnr_timer), 1603 pkt->psn); 1604 } else { 1605 /* UD/UC - class D */ 1606 qp->resp.drop_msg = 1; 1607 } 1608 state = RESPST_CLEANUP; 1609 break; 1610 1611 case RESPST_ERR_RKEY_VIOLATION: 1612 if (qp_type(qp) == IB_QPT_RC) { 1613 /* Class C */ 1614 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, 1615 IB_WC_REM_ACCESS_ERR); 1616 state = RESPST_COMPLETE; 1617 } else { 1618 qp->resp.drop_msg = 1; 1619 if (qp->srq) { 1620 /* UC/SRQ Class D */ 1621 qp->resp.status = IB_WC_REM_ACCESS_ERR; 1622 state = RESPST_COMPLETE; 1623 } else { 1624 /* UC/non-SRQ Class E. */ 1625 state = RESPST_CLEANUP; 1626 } 1627 } 1628 break; 1629 1630 case RESPST_ERR_INVALIDATE_RKEY: 1631 /* RC - Class J. */ 1632 qp->resp.goto_error = 1; 1633 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1634 state = RESPST_COMPLETE; 1635 break; 1636 1637 case RESPST_ERR_LENGTH: 1638 if (qp_type(qp) == IB_QPT_RC) { 1639 /* Class C */ 1640 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1641 IB_WC_REM_INV_REQ_ERR); 1642 state = RESPST_COMPLETE; 1643 } else if (qp->srq) { 1644 /* UC/UD - class E */ 1645 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1646 state = RESPST_COMPLETE; 1647 } else { 1648 /* UC/UD - class D */ 1649 qp->resp.drop_msg = 1; 1650 state = RESPST_CLEANUP; 1651 } 1652 break; 1653 1654 case RESPST_ERR_MALFORMED_WQE: 1655 /* All, Class A. */ 1656 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, 1657 IB_WC_LOC_QP_OP_ERR); 1658 state = RESPST_COMPLETE; 1659 break; 1660 1661 case RESPST_ERR_CQ_OVERFLOW: 1662 /* All - Class G */ 1663 state = RESPST_ERROR; 1664 break; 1665 1666 case RESPST_DONE: 1667 if (qp->resp.goto_error) { 1668 state = RESPST_ERROR; 1669 break; 1670 } 1671 1672 goto done; 1673 1674 case RESPST_EXIT: 1675 if (qp->resp.goto_error) { 1676 state = RESPST_ERROR; 1677 break; 1678 } 1679 1680 goto exit; 1681 1682 case RESPST_ERROR: 1683 qp->resp.goto_error = 0; 1684 rxe_dbg_qp(qp, "moved to error state\n"); 1685 rxe_qp_error(qp); 1686 goto exit; 1687 1688 default: 1689 WARN_ON_ONCE(1); 1690 } 1691 } 1692 1693 /* A non-zero return value will cause rxe_do_task to 1694 * exit its loop and end the work item. A zero return 1695 * will continue looping and return to rxe_responder 1696 */ 1697 done: 1698 ret = 0; 1699 goto out; 1700 exit: 1701 ret = -EAGAIN; 1702 out: 1703 return ret; 1704 } 1705