1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include <linux/skbuff.h> 8 9 #include "rxe.h" 10 #include "rxe_loc.h" 11 #include "rxe_queue.h" 12 13 static char *resp_state_name[] = { 14 [RESPST_NONE] = "NONE", 15 [RESPST_GET_REQ] = "GET_REQ", 16 [RESPST_CHK_PSN] = "CHK_PSN", 17 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", 18 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", 19 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", 20 [RESPST_CHK_LENGTH] = "CHK_LENGTH", 21 [RESPST_CHK_RKEY] = "CHK_RKEY", 22 [RESPST_EXECUTE] = "EXECUTE", 23 [RESPST_READ_REPLY] = "READ_REPLY", 24 [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", 25 [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", 26 [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", 27 [RESPST_COMPLETE] = "COMPLETE", 28 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", 29 [RESPST_CLEANUP] = "CLEANUP", 30 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", 31 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", 32 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", 33 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", 34 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", 35 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", 36 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", 37 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", 38 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", 39 [RESPST_ERR_RNR] = "ERR_RNR", 40 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", 41 [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION", 42 [RESPST_ERR_LENGTH] = "ERR_LENGTH", 43 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", 44 [RESPST_ERROR] = "ERROR", 45 [RESPST_DONE] = "DONE", 46 [RESPST_EXIT] = "EXIT", 47 }; 48 49 /* rxe_recv calls here to add a request packet to the input queue */ 50 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) 51 { 52 int must_sched; 53 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 54 55 skb_queue_tail(&qp->req_pkts, skb); 56 57 must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || 58 (skb_queue_len(&qp->req_pkts) > 1); 59 60 if (must_sched) 61 rxe_sched_task(&qp->resp.task); 62 else 63 rxe_run_task(&qp->resp.task); 64 } 65 66 static inline enum resp_states get_req(struct rxe_qp *qp, 67 struct rxe_pkt_info **pkt_p) 68 { 69 struct sk_buff *skb; 70 71 skb = skb_peek(&qp->req_pkts); 72 if (!skb) 73 return RESPST_EXIT; 74 75 *pkt_p = SKB_TO_PKT(skb); 76 77 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; 78 } 79 80 static enum resp_states check_psn(struct rxe_qp *qp, 81 struct rxe_pkt_info *pkt) 82 { 83 int diff = psn_compare(pkt->psn, qp->resp.psn); 84 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 85 86 switch (qp_type(qp)) { 87 case IB_QPT_RC: 88 if (diff > 0) { 89 if (qp->resp.sent_psn_nak) 90 return RESPST_CLEANUP; 91 92 qp->resp.sent_psn_nak = 1; 93 rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ); 94 return RESPST_ERR_PSN_OUT_OF_SEQ; 95 96 } else if (diff < 0) { 97 rxe_counter_inc(rxe, RXE_CNT_DUP_REQ); 98 return RESPST_DUPLICATE_REQUEST; 99 } 100 101 if (qp->resp.sent_psn_nak) 102 qp->resp.sent_psn_nak = 0; 103 104 break; 105 106 case IB_QPT_UC: 107 if (qp->resp.drop_msg || diff != 0) { 108 if (pkt->mask & RXE_START_MASK) { 109 qp->resp.drop_msg = 0; 110 return RESPST_CHK_OP_SEQ; 111 } 112 113 qp->resp.drop_msg = 1; 114 return RESPST_CLEANUP; 115 } 116 break; 117 default: 118 break; 119 } 120 121 return RESPST_CHK_OP_SEQ; 122 } 123 124 static enum resp_states check_op_seq(struct rxe_qp *qp, 125 struct rxe_pkt_info *pkt) 126 { 127 switch (qp_type(qp)) { 128 case IB_QPT_RC: 129 switch (qp->resp.opcode) { 130 case IB_OPCODE_RC_SEND_FIRST: 131 case IB_OPCODE_RC_SEND_MIDDLE: 132 switch (pkt->opcode) { 133 case IB_OPCODE_RC_SEND_MIDDLE: 134 case IB_OPCODE_RC_SEND_LAST: 135 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 136 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 137 return RESPST_CHK_OP_VALID; 138 default: 139 return RESPST_ERR_MISSING_OPCODE_LAST_C; 140 } 141 142 case IB_OPCODE_RC_RDMA_WRITE_FIRST: 143 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 144 switch (pkt->opcode) { 145 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 146 case IB_OPCODE_RC_RDMA_WRITE_LAST: 147 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 148 return RESPST_CHK_OP_VALID; 149 default: 150 return RESPST_ERR_MISSING_OPCODE_LAST_C; 151 } 152 153 default: 154 switch (pkt->opcode) { 155 case IB_OPCODE_RC_SEND_MIDDLE: 156 case IB_OPCODE_RC_SEND_LAST: 157 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 158 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 159 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 160 case IB_OPCODE_RC_RDMA_WRITE_LAST: 161 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 162 return RESPST_ERR_MISSING_OPCODE_FIRST; 163 default: 164 return RESPST_CHK_OP_VALID; 165 } 166 } 167 break; 168 169 case IB_QPT_UC: 170 switch (qp->resp.opcode) { 171 case IB_OPCODE_UC_SEND_FIRST: 172 case IB_OPCODE_UC_SEND_MIDDLE: 173 switch (pkt->opcode) { 174 case IB_OPCODE_UC_SEND_MIDDLE: 175 case IB_OPCODE_UC_SEND_LAST: 176 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 177 return RESPST_CHK_OP_VALID; 178 default: 179 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 180 } 181 182 case IB_OPCODE_UC_RDMA_WRITE_FIRST: 183 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 184 switch (pkt->opcode) { 185 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 186 case IB_OPCODE_UC_RDMA_WRITE_LAST: 187 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 188 return RESPST_CHK_OP_VALID; 189 default: 190 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 191 } 192 193 default: 194 switch (pkt->opcode) { 195 case IB_OPCODE_UC_SEND_MIDDLE: 196 case IB_OPCODE_UC_SEND_LAST: 197 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 198 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 199 case IB_OPCODE_UC_RDMA_WRITE_LAST: 200 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 201 qp->resp.drop_msg = 1; 202 return RESPST_CLEANUP; 203 default: 204 return RESPST_CHK_OP_VALID; 205 } 206 } 207 break; 208 209 default: 210 return RESPST_CHK_OP_VALID; 211 } 212 } 213 214 static bool check_qp_attr_access(struct rxe_qp *qp, 215 struct rxe_pkt_info *pkt) 216 { 217 if (((pkt->mask & RXE_READ_MASK) && 218 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || 219 ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && 220 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || 221 ((pkt->mask & RXE_ATOMIC_MASK) && 222 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 223 return false; 224 225 if (pkt->mask & RXE_FLUSH_MASK) { 226 u32 flush_type = feth_plt(pkt); 227 228 if ((flush_type & IB_FLUSH_GLOBAL && 229 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || 230 (flush_type & IB_FLUSH_PERSISTENT && 231 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) 232 return false; 233 } 234 235 return true; 236 } 237 238 static enum resp_states check_op_valid(struct rxe_qp *qp, 239 struct rxe_pkt_info *pkt) 240 { 241 switch (qp_type(qp)) { 242 case IB_QPT_RC: 243 if (!check_qp_attr_access(qp, pkt)) 244 return RESPST_ERR_UNSUPPORTED_OPCODE; 245 246 break; 247 248 case IB_QPT_UC: 249 if ((pkt->mask & RXE_WRITE_MASK) && 250 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { 251 qp->resp.drop_msg = 1; 252 return RESPST_CLEANUP; 253 } 254 255 break; 256 257 case IB_QPT_UD: 258 case IB_QPT_GSI: 259 break; 260 261 default: 262 WARN_ON_ONCE(1); 263 break; 264 } 265 266 return RESPST_CHK_RESOURCE; 267 } 268 269 static enum resp_states get_srq_wqe(struct rxe_qp *qp) 270 { 271 struct rxe_srq *srq = qp->srq; 272 struct rxe_queue *q = srq->rq.queue; 273 struct rxe_recv_wqe *wqe; 274 struct ib_event ev; 275 unsigned int count; 276 size_t size; 277 unsigned long flags; 278 279 if (srq->error) 280 return RESPST_ERR_RNR; 281 282 spin_lock_irqsave(&srq->rq.consumer_lock, flags); 283 284 wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 285 if (!wqe) { 286 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 287 return RESPST_ERR_RNR; 288 } 289 290 /* don't trust user space data */ 291 if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { 292 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 293 rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); 294 return RESPST_ERR_MALFORMED_WQE; 295 } 296 size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); 297 memcpy(&qp->resp.srq_wqe, wqe, size); 298 299 qp->resp.wqe = &qp->resp.srq_wqe.wqe; 300 queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); 301 count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); 302 303 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { 304 srq->limit = 0; 305 goto event; 306 } 307 308 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 309 return RESPST_CHK_LENGTH; 310 311 event: 312 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 313 ev.device = qp->ibqp.device; 314 ev.element.srq = qp->ibqp.srq; 315 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 316 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); 317 return RESPST_CHK_LENGTH; 318 } 319 320 static enum resp_states check_resource(struct rxe_qp *qp, 321 struct rxe_pkt_info *pkt) 322 { 323 struct rxe_srq *srq = qp->srq; 324 325 if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { 326 /* it is the requesters job to not send 327 * too many read/atomic ops, we just 328 * recycle the responder resource queue 329 */ 330 if (likely(qp->attr.max_dest_rd_atomic > 0)) 331 return RESPST_CHK_LENGTH; 332 else 333 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; 334 } 335 336 if (pkt->mask & RXE_RWR_MASK) { 337 if (srq) 338 return get_srq_wqe(qp); 339 340 qp->resp.wqe = queue_head(qp->rq.queue, 341 QUEUE_TYPE_FROM_CLIENT); 342 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 343 } 344 345 return RESPST_CHK_LENGTH; 346 } 347 348 static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, 349 struct rxe_pkt_info *pkt) 350 { 351 /* 352 * See IBA C9-92 353 * For UD QPs we only check if the packet will fit in the 354 * receive buffer later. For rmda operations additional 355 * length checks are performed in check_rkey. 356 */ 357 if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || 358 (qp_type(qp) == IB_QPT_UC))) { 359 unsigned int mtu = qp->mtu; 360 unsigned int payload = payload_size(pkt); 361 362 if ((pkt->mask & RXE_START_MASK) && 363 (pkt->mask & RXE_END_MASK)) { 364 if (unlikely(payload > mtu)) { 365 rxe_dbg_qp(qp, "only packet too long"); 366 return RESPST_ERR_LENGTH; 367 } 368 } else if ((pkt->mask & RXE_START_MASK) || 369 (pkt->mask & RXE_MIDDLE_MASK)) { 370 if (unlikely(payload != mtu)) { 371 rxe_dbg_qp(qp, "first or middle packet not mtu"); 372 return RESPST_ERR_LENGTH; 373 } 374 } else if (pkt->mask & RXE_END_MASK) { 375 if (unlikely((payload == 0) || (payload > mtu))) { 376 rxe_dbg_qp(qp, "last packet zero or too long"); 377 return RESPST_ERR_LENGTH; 378 } 379 } 380 } 381 382 /* See IBA C9-94 */ 383 if (pkt->mask & RXE_RETH_MASK) { 384 if (reth_len(pkt) > (1U << 31)) { 385 rxe_dbg_qp(qp, "dma length too long"); 386 return RESPST_ERR_LENGTH; 387 } 388 } 389 390 return RESPST_CHK_RKEY; 391 } 392 393 /* if the reth length field is zero we can assume nothing 394 * about the rkey value and should not validate or use it. 395 * Instead set qp->resp.rkey to 0 which is an invalid rkey 396 * value since the minimum index part is 1. 397 */ 398 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 399 { 400 unsigned int length = reth_len(pkt); 401 402 qp->resp.va = reth_va(pkt); 403 qp->resp.offset = 0; 404 qp->resp.resid = length; 405 qp->resp.length = length; 406 if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0) 407 qp->resp.rkey = 0; 408 else 409 qp->resp.rkey = reth_rkey(pkt); 410 } 411 412 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 413 { 414 qp->resp.va = atmeth_va(pkt); 415 qp->resp.offset = 0; 416 qp->resp.rkey = atmeth_rkey(pkt); 417 qp->resp.resid = sizeof(u64); 418 } 419 420 /* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL 421 * if an invalid rkey is received or the rdma length is zero. For middle 422 * or last packets use the stored value of mr. 423 */ 424 static enum resp_states check_rkey(struct rxe_qp *qp, 425 struct rxe_pkt_info *pkt) 426 { 427 struct rxe_mr *mr = NULL; 428 struct rxe_mw *mw = NULL; 429 u64 va; 430 u32 rkey; 431 u32 resid; 432 u32 pktlen; 433 int mtu = qp->mtu; 434 enum resp_states state; 435 int access = 0; 436 437 if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 438 if (pkt->mask & RXE_RETH_MASK) 439 qp_resp_from_reth(qp, pkt); 440 441 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ 442 : IB_ACCESS_REMOTE_WRITE; 443 } else if (pkt->mask & RXE_FLUSH_MASK) { 444 u32 flush_type = feth_plt(pkt); 445 446 if (pkt->mask & RXE_RETH_MASK) 447 qp_resp_from_reth(qp, pkt); 448 449 if (flush_type & IB_FLUSH_GLOBAL) 450 access |= IB_ACCESS_FLUSH_GLOBAL; 451 if (flush_type & IB_FLUSH_PERSISTENT) 452 access |= IB_ACCESS_FLUSH_PERSISTENT; 453 } else if (pkt->mask & RXE_ATOMIC_MASK) { 454 qp_resp_from_atmeth(qp, pkt); 455 access = IB_ACCESS_REMOTE_ATOMIC; 456 } else { 457 return RESPST_EXECUTE; 458 } 459 460 /* A zero-byte read or write op is not required to 461 * set an addr or rkey. See C9-88 462 */ 463 if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && 464 (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { 465 qp->resp.mr = NULL; 466 return RESPST_EXECUTE; 467 } 468 469 va = qp->resp.va; 470 rkey = qp->resp.rkey; 471 resid = qp->resp.resid; 472 pktlen = payload_size(pkt); 473 474 if (rkey_is_mw(rkey)) { 475 mw = rxe_lookup_mw(qp, access, rkey); 476 if (!mw) { 477 rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); 478 state = RESPST_ERR_RKEY_VIOLATION; 479 goto err; 480 } 481 482 mr = mw->mr; 483 if (!mr) { 484 rxe_dbg_qp(qp, "MW doesn't have an MR\n"); 485 state = RESPST_ERR_RKEY_VIOLATION; 486 goto err; 487 } 488 489 if (mw->access & IB_ZERO_BASED) 490 qp->resp.offset = mw->addr; 491 492 rxe_get(mr); 493 rxe_put(mw); 494 mw = NULL; 495 } else { 496 mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); 497 if (!mr) { 498 rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); 499 state = RESPST_ERR_RKEY_VIOLATION; 500 goto err; 501 } 502 } 503 504 if (pkt->mask & RXE_FLUSH_MASK) { 505 /* FLUSH MR may not set va or resid 506 * no need to check range since we will flush whole mr 507 */ 508 if (feth_sel(pkt) == IB_FLUSH_MR) 509 goto skip_check_range; 510 } 511 512 if (mr_check_range(mr, va + qp->resp.offset, resid)) { 513 state = RESPST_ERR_RKEY_VIOLATION; 514 goto err; 515 } 516 517 skip_check_range: 518 if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 519 if (resid > mtu) { 520 if (pktlen != mtu || bth_pad(pkt)) { 521 state = RESPST_ERR_LENGTH; 522 goto err; 523 } 524 } else { 525 if (pktlen != resid) { 526 state = RESPST_ERR_LENGTH; 527 goto err; 528 } 529 if ((bth_pad(pkt) != (0x3 & (-resid)))) { 530 /* This case may not be exactly that 531 * but nothing else fits. 532 */ 533 state = RESPST_ERR_LENGTH; 534 goto err; 535 } 536 } 537 } 538 539 WARN_ON_ONCE(qp->resp.mr); 540 541 qp->resp.mr = mr; 542 return RESPST_EXECUTE; 543 544 err: 545 qp->resp.mr = NULL; 546 if (mr) 547 rxe_put(mr); 548 if (mw) 549 rxe_put(mw); 550 551 return state; 552 } 553 554 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, 555 int data_len) 556 { 557 int err; 558 559 err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, 560 data_addr, data_len, RXE_TO_MR_OBJ); 561 if (unlikely(err)) 562 return (err == -ENOSPC) ? RESPST_ERR_LENGTH 563 : RESPST_ERR_MALFORMED_WQE; 564 565 return RESPST_NONE; 566 } 567 568 static enum resp_states write_data_in(struct rxe_qp *qp, 569 struct rxe_pkt_info *pkt) 570 { 571 enum resp_states rc = RESPST_NONE; 572 int err; 573 int data_len = payload_size(pkt); 574 575 err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, 576 payload_addr(pkt), data_len, RXE_TO_MR_OBJ); 577 if (err) { 578 rc = RESPST_ERR_RKEY_VIOLATION; 579 goto out; 580 } 581 582 qp->resp.va += data_len; 583 qp->resp.resid -= data_len; 584 585 out: 586 return rc; 587 } 588 589 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, 590 struct rxe_pkt_info *pkt, 591 int type) 592 { 593 struct resp_res *res; 594 u32 pkts; 595 596 res = &qp->resp.resources[qp->resp.res_head]; 597 rxe_advance_resp_resource(qp); 598 free_rd_atomic_resource(res); 599 600 res->type = type; 601 res->replay = 0; 602 603 switch (type) { 604 case RXE_READ_MASK: 605 res->read.va = qp->resp.va + qp->resp.offset; 606 res->read.va_org = qp->resp.va + qp->resp.offset; 607 res->read.resid = qp->resp.resid; 608 res->read.length = qp->resp.resid; 609 res->read.rkey = qp->resp.rkey; 610 611 pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); 612 res->first_psn = pkt->psn; 613 res->cur_psn = pkt->psn; 614 res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; 615 616 res->state = rdatm_res_state_new; 617 break; 618 case RXE_ATOMIC_MASK: 619 case RXE_ATOMIC_WRITE_MASK: 620 res->first_psn = pkt->psn; 621 res->last_psn = pkt->psn; 622 res->cur_psn = pkt->psn; 623 break; 624 case RXE_FLUSH_MASK: 625 res->flush.va = qp->resp.va + qp->resp.offset; 626 res->flush.length = qp->resp.length; 627 res->flush.type = feth_plt(pkt); 628 res->flush.level = feth_sel(pkt); 629 } 630 631 return res; 632 } 633 634 static enum resp_states process_flush(struct rxe_qp *qp, 635 struct rxe_pkt_info *pkt) 636 { 637 u64 length, start; 638 struct rxe_mr *mr = qp->resp.mr; 639 struct resp_res *res = qp->resp.res; 640 641 /* oA19-14, oA19-15 */ 642 if (res && res->replay) 643 return RESPST_ACKNOWLEDGE; 644 else if (!res) { 645 res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); 646 qp->resp.res = res; 647 } 648 649 if (res->flush.level == IB_FLUSH_RANGE) { 650 start = res->flush.va; 651 length = res->flush.length; 652 } else { /* level == IB_FLUSH_MR */ 653 start = mr->ibmr.iova; 654 length = mr->ibmr.length; 655 } 656 657 if (res->flush.type & IB_FLUSH_PERSISTENT) { 658 if (rxe_flush_pmem_iova(mr, start, length)) 659 return RESPST_ERR_RKEY_VIOLATION; 660 /* Make data persistent. */ 661 wmb(); 662 } else if (res->flush.type & IB_FLUSH_GLOBAL) { 663 /* Make data global visibility. */ 664 wmb(); 665 } 666 667 qp->resp.msn++; 668 669 /* next expected psn, read handles this separately */ 670 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 671 qp->resp.ack_psn = qp->resp.psn; 672 673 qp->resp.opcode = pkt->opcode; 674 qp->resp.status = IB_WC_SUCCESS; 675 676 return RESPST_ACKNOWLEDGE; 677 } 678 679 static enum resp_states atomic_reply(struct rxe_qp *qp, 680 struct rxe_pkt_info *pkt) 681 { 682 struct rxe_mr *mr = qp->resp.mr; 683 struct resp_res *res = qp->resp.res; 684 int err; 685 686 if (!res) { 687 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); 688 qp->resp.res = res; 689 } 690 691 if (!res->replay) { 692 u64 iova = qp->resp.va + qp->resp.offset; 693 694 err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, 695 atmeth_comp(pkt), 696 atmeth_swap_add(pkt), 697 &res->atomic.orig_val); 698 if (err) 699 return err; 700 701 qp->resp.msn++; 702 703 /* next expected psn, read handles this separately */ 704 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 705 qp->resp.ack_psn = qp->resp.psn; 706 707 qp->resp.opcode = pkt->opcode; 708 qp->resp.status = IB_WC_SUCCESS; 709 } 710 711 return RESPST_ACKNOWLEDGE; 712 } 713 714 static enum resp_states atomic_write_reply(struct rxe_qp *qp, 715 struct rxe_pkt_info *pkt) 716 { 717 struct resp_res *res = qp->resp.res; 718 struct rxe_mr *mr; 719 u64 value; 720 u64 iova; 721 int err; 722 723 if (!res) { 724 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); 725 qp->resp.res = res; 726 } 727 728 if (res->replay) 729 return RESPST_ACKNOWLEDGE; 730 731 mr = qp->resp.mr; 732 value = *(u64 *)payload_addr(pkt); 733 iova = qp->resp.va + qp->resp.offset; 734 735 err = rxe_mr_do_atomic_write(mr, iova, value); 736 if (err) 737 return err; 738 739 qp->resp.resid = 0; 740 qp->resp.msn++; 741 742 /* next expected psn, read handles this separately */ 743 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 744 qp->resp.ack_psn = qp->resp.psn; 745 746 qp->resp.opcode = pkt->opcode; 747 qp->resp.status = IB_WC_SUCCESS; 748 749 return RESPST_ACKNOWLEDGE; 750 } 751 752 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, 753 struct rxe_pkt_info *ack, 754 int opcode, 755 int payload, 756 u32 psn, 757 u8 syndrome) 758 { 759 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 760 struct sk_buff *skb; 761 int paylen; 762 int pad; 763 int err; 764 765 /* 766 * allocate packet 767 */ 768 pad = (-payload) & 0x3; 769 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 770 771 skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack); 772 if (!skb) 773 return NULL; 774 775 ack->qp = qp; 776 ack->opcode = opcode; 777 ack->mask = rxe_opcode[opcode].mask; 778 ack->paylen = paylen; 779 ack->psn = psn; 780 781 bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL, 782 qp->attr.dest_qp_num, 0, psn); 783 784 if (ack->mask & RXE_AETH_MASK) { 785 aeth_set_syn(ack, syndrome); 786 aeth_set_msn(ack, qp->resp.msn); 787 } 788 789 if (ack->mask & RXE_ATMACK_MASK) 790 atmack_set_orig(ack, qp->resp.res->atomic.orig_val); 791 792 err = rxe_prepare(&qp->pri_av, ack, skb); 793 if (err) { 794 kfree_skb(skb); 795 return NULL; 796 } 797 798 return skb; 799 } 800 801 /** 802 * rxe_recheck_mr - revalidate MR from rkey and get a reference 803 * @qp: the qp 804 * @rkey: the rkey 805 * 806 * This code allows the MR to be invalidated or deregistered or 807 * the MW if one was used to be invalidated or deallocated. 808 * It is assumed that the access permissions if originally good 809 * are OK and the mappings to be unchanged. 810 * 811 * TODO: If someone reregisters an MR to change its size or 812 * access permissions during the processing of an RDMA read 813 * we should kill the responder resource and complete the 814 * operation with an error. 815 * 816 * Return: mr on success else NULL 817 */ 818 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) 819 { 820 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 821 struct rxe_mr *mr; 822 struct rxe_mw *mw; 823 824 if (rkey_is_mw(rkey)) { 825 mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); 826 if (!mw) 827 return NULL; 828 829 mr = mw->mr; 830 if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || 831 !mr || mr->state != RXE_MR_STATE_VALID) { 832 rxe_put(mw); 833 return NULL; 834 } 835 836 rxe_get(mr); 837 rxe_put(mw); 838 839 return mr; 840 } 841 842 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 843 if (!mr) 844 return NULL; 845 846 if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { 847 rxe_put(mr); 848 return NULL; 849 } 850 851 return mr; 852 } 853 854 /* RDMA read response. If res is not NULL, then we have a current RDMA request 855 * being processed or replayed. 856 */ 857 static enum resp_states read_reply(struct rxe_qp *qp, 858 struct rxe_pkt_info *req_pkt) 859 { 860 struct rxe_pkt_info ack_pkt; 861 struct sk_buff *skb; 862 int mtu = qp->mtu; 863 enum resp_states state; 864 int payload; 865 int opcode; 866 int err; 867 struct resp_res *res = qp->resp.res; 868 struct rxe_mr *mr; 869 870 if (!res) { 871 res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); 872 qp->resp.res = res; 873 } 874 875 if (res->state == rdatm_res_state_new) { 876 if (!res->replay || qp->resp.length == 0) { 877 /* if length == 0 mr will be NULL (is ok) 878 * otherwise qp->resp.mr holds a ref on mr 879 * which we transfer to mr and drop below. 880 */ 881 mr = qp->resp.mr; 882 qp->resp.mr = NULL; 883 } else { 884 mr = rxe_recheck_mr(qp, res->read.rkey); 885 if (!mr) 886 return RESPST_ERR_RKEY_VIOLATION; 887 } 888 889 if (res->read.resid <= mtu) 890 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; 891 else 892 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; 893 } else { 894 /* re-lookup mr from rkey on all later packets. 895 * length will be non-zero. This can fail if someone 896 * modifies or destroys the mr since the first packet. 897 */ 898 mr = rxe_recheck_mr(qp, res->read.rkey); 899 if (!mr) 900 return RESPST_ERR_RKEY_VIOLATION; 901 902 if (res->read.resid > mtu) 903 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; 904 else 905 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; 906 } 907 908 res->state = rdatm_res_state_next; 909 910 payload = min_t(int, res->read.resid, mtu); 911 912 skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, 913 res->cur_psn, AETH_ACK_UNLIMITED); 914 if (!skb) { 915 state = RESPST_ERR_RNR; 916 goto err_out; 917 } 918 919 err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), 920 payload, RXE_FROM_MR_OBJ); 921 if (err) { 922 kfree_skb(skb); 923 state = RESPST_ERR_RKEY_VIOLATION; 924 goto err_out; 925 } 926 927 if (bth_pad(&ack_pkt)) { 928 u8 *pad = payload_addr(&ack_pkt) + payload; 929 930 memset(pad, 0, bth_pad(&ack_pkt)); 931 } 932 933 /* rxe_xmit_packet always consumes the skb */ 934 err = rxe_xmit_packet(qp, &ack_pkt, skb); 935 if (err) { 936 state = RESPST_ERR_RNR; 937 goto err_out; 938 } 939 940 res->read.va += payload; 941 res->read.resid -= payload; 942 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; 943 944 if (res->read.resid > 0) { 945 state = RESPST_DONE; 946 } else { 947 qp->resp.res = NULL; 948 if (!res->replay) 949 qp->resp.opcode = -1; 950 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 951 qp->resp.psn = res->cur_psn; 952 state = RESPST_CLEANUP; 953 } 954 955 err_out: 956 if (mr) 957 rxe_put(mr); 958 return state; 959 } 960 961 static int invalidate_rkey(struct rxe_qp *qp, u32 rkey) 962 { 963 if (rkey_is_mw(rkey)) 964 return rxe_invalidate_mw(qp, rkey); 965 else 966 return rxe_invalidate_mr(qp, rkey); 967 } 968 969 /* Executes a new request. A retried request never reach that function (send 970 * and writes are discarded, and reads and atomics are retried elsewhere. 971 */ 972 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 973 { 974 enum resp_states err; 975 struct sk_buff *skb = PKT_TO_SKB(pkt); 976 union rdma_network_hdr hdr; 977 978 if (pkt->mask & RXE_SEND_MASK) { 979 if (qp_type(qp) == IB_QPT_UD || 980 qp_type(qp) == IB_QPT_GSI) { 981 if (skb->protocol == htons(ETH_P_IP)) { 982 memset(&hdr.reserved, 0, 983 sizeof(hdr.reserved)); 984 memcpy(&hdr.roce4grh, ip_hdr(skb), 985 sizeof(hdr.roce4grh)); 986 err = send_data_in(qp, &hdr, sizeof(hdr)); 987 } else { 988 err = send_data_in(qp, ipv6_hdr(skb), 989 sizeof(hdr)); 990 } 991 if (err) 992 return err; 993 } 994 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); 995 if (err) 996 return err; 997 } else if (pkt->mask & RXE_WRITE_MASK) { 998 err = write_data_in(qp, pkt); 999 if (err) 1000 return err; 1001 } else if (pkt->mask & RXE_READ_MASK) { 1002 /* For RDMA Read we can increment the msn now. See C9-148. */ 1003 qp->resp.msn++; 1004 return RESPST_READ_REPLY; 1005 } else if (pkt->mask & RXE_ATOMIC_MASK) { 1006 return RESPST_ATOMIC_REPLY; 1007 } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { 1008 return RESPST_ATOMIC_WRITE_REPLY; 1009 } else if (pkt->mask & RXE_FLUSH_MASK) { 1010 return RESPST_PROCESS_FLUSH; 1011 } else { 1012 /* Unreachable */ 1013 WARN_ON_ONCE(1); 1014 } 1015 1016 if (pkt->mask & RXE_IETH_MASK) { 1017 u32 rkey = ieth_rkey(pkt); 1018 1019 err = invalidate_rkey(qp, rkey); 1020 if (err) 1021 return RESPST_ERR_INVALIDATE_RKEY; 1022 } 1023 1024 if (pkt->mask & RXE_END_MASK) 1025 /* We successfully processed this new request. */ 1026 qp->resp.msn++; 1027 1028 /* next expected psn, read handles this separately */ 1029 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 1030 qp->resp.ack_psn = qp->resp.psn; 1031 1032 qp->resp.opcode = pkt->opcode; 1033 qp->resp.status = IB_WC_SUCCESS; 1034 1035 if (pkt->mask & RXE_COMP_MASK) 1036 return RESPST_COMPLETE; 1037 else if (qp_type(qp) == IB_QPT_RC) 1038 return RESPST_ACKNOWLEDGE; 1039 else 1040 return RESPST_CLEANUP; 1041 } 1042 1043 static enum resp_states do_complete(struct rxe_qp *qp, 1044 struct rxe_pkt_info *pkt) 1045 { 1046 struct rxe_cqe cqe; 1047 struct ib_wc *wc = &cqe.ibwc; 1048 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1049 struct rxe_recv_wqe *wqe = qp->resp.wqe; 1050 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1051 unsigned long flags; 1052 1053 if (!wqe) 1054 goto finish; 1055 1056 memset(&cqe, 0, sizeof(cqe)); 1057 1058 if (qp->rcq->is_user) { 1059 uwc->status = qp->resp.status; 1060 uwc->qp_num = qp->ibqp.qp_num; 1061 uwc->wr_id = wqe->wr_id; 1062 } else { 1063 wc->status = qp->resp.status; 1064 wc->qp = &qp->ibqp; 1065 wc->wr_id = wqe->wr_id; 1066 } 1067 1068 if (wc->status == IB_WC_SUCCESS) { 1069 rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); 1070 wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 1071 pkt->mask & RXE_WRITE_MASK) ? 1072 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 1073 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && 1074 pkt->mask & RXE_WRITE_MASK) ? 1075 qp->resp.length : wqe->dma.length - wqe->dma.resid; 1076 1077 /* fields after byte_len are different between kernel and user 1078 * space 1079 */ 1080 if (qp->rcq->is_user) { 1081 uwc->wc_flags = IB_WC_GRH; 1082 1083 if (pkt->mask & RXE_IMMDT_MASK) { 1084 uwc->wc_flags |= IB_WC_WITH_IMM; 1085 uwc->ex.imm_data = immdt_imm(pkt); 1086 } 1087 1088 if (pkt->mask & RXE_IETH_MASK) { 1089 uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 1090 uwc->ex.invalidate_rkey = ieth_rkey(pkt); 1091 } 1092 1093 if (pkt->mask & RXE_DETH_MASK) 1094 uwc->src_qp = deth_sqp(pkt); 1095 1096 uwc->port_num = qp->attr.port_num; 1097 } else { 1098 struct sk_buff *skb = PKT_TO_SKB(pkt); 1099 1100 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; 1101 if (skb->protocol == htons(ETH_P_IP)) 1102 wc->network_hdr_type = RDMA_NETWORK_IPV4; 1103 else 1104 wc->network_hdr_type = RDMA_NETWORK_IPV6; 1105 1106 if (is_vlan_dev(skb->dev)) { 1107 wc->wc_flags |= IB_WC_WITH_VLAN; 1108 wc->vlan_id = vlan_dev_vlan_id(skb->dev); 1109 } 1110 1111 if (pkt->mask & RXE_IMMDT_MASK) { 1112 wc->wc_flags |= IB_WC_WITH_IMM; 1113 wc->ex.imm_data = immdt_imm(pkt); 1114 } 1115 1116 if (pkt->mask & RXE_IETH_MASK) { 1117 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 1118 wc->ex.invalidate_rkey = ieth_rkey(pkt); 1119 } 1120 1121 if (pkt->mask & RXE_DETH_MASK) 1122 wc->src_qp = deth_sqp(pkt); 1123 1124 wc->port_num = qp->attr.port_num; 1125 } 1126 } else { 1127 if (wc->status != IB_WC_WR_FLUSH_ERR) 1128 rxe_err_qp(qp, "non-flush error status = %d", 1129 wc->status); 1130 } 1131 1132 /* have copy for srq and reference for !srq */ 1133 if (!qp->srq) 1134 queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); 1135 1136 qp->resp.wqe = NULL; 1137 1138 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) 1139 return RESPST_ERR_CQ_OVERFLOW; 1140 1141 finish: 1142 spin_lock_irqsave(&qp->state_lock, flags); 1143 if (unlikely(qp_state(qp) == IB_QPS_ERR)) { 1144 spin_unlock_irqrestore(&qp->state_lock, flags); 1145 return RESPST_CHK_RESOURCE; 1146 } 1147 spin_unlock_irqrestore(&qp->state_lock, flags); 1148 1149 if (unlikely(!pkt)) 1150 return RESPST_DONE; 1151 if (qp_type(qp) == IB_QPT_RC) 1152 return RESPST_ACKNOWLEDGE; 1153 else 1154 return RESPST_CLEANUP; 1155 } 1156 1157 1158 static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, 1159 int opcode, const char *msg) 1160 { 1161 int err; 1162 struct rxe_pkt_info ack_pkt; 1163 struct sk_buff *skb; 1164 1165 skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); 1166 if (!skb) 1167 return -ENOMEM; 1168 1169 err = rxe_xmit_packet(qp, &ack_pkt, skb); 1170 if (err) 1171 rxe_dbg_qp(qp, "Failed sending %s\n", msg); 1172 1173 return err; 1174 } 1175 1176 static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1177 { 1178 return send_common_ack(qp, syndrome, psn, 1179 IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); 1180 } 1181 1182 static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1183 { 1184 int ret = send_common_ack(qp, syndrome, psn, 1185 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); 1186 1187 /* have to clear this since it is used to trigger 1188 * long read replies 1189 */ 1190 qp->resp.res = NULL; 1191 return ret; 1192 } 1193 1194 static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1195 { 1196 int ret = send_common_ack(qp, syndrome, psn, 1197 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, 1198 "RDMA READ response of length zero ACK"); 1199 1200 /* have to clear this since it is used to trigger 1201 * long read replies 1202 */ 1203 qp->resp.res = NULL; 1204 return ret; 1205 } 1206 1207 static enum resp_states acknowledge(struct rxe_qp *qp, 1208 struct rxe_pkt_info *pkt) 1209 { 1210 if (qp_type(qp) != IB_QPT_RC) 1211 return RESPST_CLEANUP; 1212 1213 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) 1214 send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); 1215 else if (pkt->mask & RXE_ATOMIC_MASK) 1216 send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1217 else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) 1218 send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1219 else if (bth_ack(pkt)) 1220 send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1221 1222 return RESPST_CLEANUP; 1223 } 1224 1225 static enum resp_states cleanup(struct rxe_qp *qp, 1226 struct rxe_pkt_info *pkt) 1227 { 1228 struct sk_buff *skb; 1229 1230 if (pkt) { 1231 skb = skb_dequeue(&qp->req_pkts); 1232 rxe_put(qp); 1233 kfree_skb(skb); 1234 ib_device_put(qp->ibqp.device); 1235 } 1236 1237 if (qp->resp.mr) { 1238 rxe_put(qp->resp.mr); 1239 qp->resp.mr = NULL; 1240 } 1241 1242 return RESPST_DONE; 1243 } 1244 1245 static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) 1246 { 1247 int i; 1248 1249 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { 1250 struct resp_res *res = &qp->resp.resources[i]; 1251 1252 if (res->type == 0) 1253 continue; 1254 1255 if (psn_compare(psn, res->first_psn) >= 0 && 1256 psn_compare(psn, res->last_psn) <= 0) { 1257 return res; 1258 } 1259 } 1260 1261 return NULL; 1262 } 1263 1264 static enum resp_states duplicate_request(struct rxe_qp *qp, 1265 struct rxe_pkt_info *pkt) 1266 { 1267 enum resp_states rc; 1268 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; 1269 1270 if (pkt->mask & RXE_SEND_MASK || 1271 pkt->mask & RXE_WRITE_MASK) { 1272 /* SEND. Ack again and cleanup. C9-105. */ 1273 send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); 1274 return RESPST_CLEANUP; 1275 } else if (pkt->mask & RXE_FLUSH_MASK) { 1276 struct resp_res *res; 1277 1278 /* Find the operation in our list of responder resources. */ 1279 res = find_resource(qp, pkt->psn); 1280 if (res) { 1281 res->replay = 1; 1282 res->cur_psn = pkt->psn; 1283 qp->resp.res = res; 1284 rc = RESPST_PROCESS_FLUSH; 1285 goto out; 1286 } 1287 1288 /* Resource not found. Class D error. Drop the request. */ 1289 rc = RESPST_CLEANUP; 1290 goto out; 1291 } else if (pkt->mask & RXE_READ_MASK) { 1292 struct resp_res *res; 1293 1294 res = find_resource(qp, pkt->psn); 1295 if (!res) { 1296 /* Resource not found. Class D error. Drop the 1297 * request. 1298 */ 1299 rc = RESPST_CLEANUP; 1300 goto out; 1301 } else { 1302 /* Ensure this new request is the same as the previous 1303 * one or a subset of it. 1304 */ 1305 u64 iova = reth_va(pkt); 1306 u32 resid = reth_len(pkt); 1307 1308 if (iova < res->read.va_org || 1309 resid > res->read.length || 1310 (iova + resid) > (res->read.va_org + 1311 res->read.length)) { 1312 rc = RESPST_CLEANUP; 1313 goto out; 1314 } 1315 1316 if (reth_rkey(pkt) != res->read.rkey) { 1317 rc = RESPST_CLEANUP; 1318 goto out; 1319 } 1320 1321 res->cur_psn = pkt->psn; 1322 res->state = (pkt->psn == res->first_psn) ? 1323 rdatm_res_state_new : 1324 rdatm_res_state_replay; 1325 res->replay = 1; 1326 1327 /* Reset the resource, except length. */ 1328 res->read.va_org = iova; 1329 res->read.va = iova; 1330 res->read.resid = resid; 1331 1332 /* Replay the RDMA read reply. */ 1333 qp->resp.res = res; 1334 rc = RESPST_READ_REPLY; 1335 goto out; 1336 } 1337 } else { 1338 struct resp_res *res; 1339 1340 /* Find the operation in our list of responder resources. */ 1341 res = find_resource(qp, pkt->psn); 1342 if (res) { 1343 res->replay = 1; 1344 res->cur_psn = pkt->psn; 1345 qp->resp.res = res; 1346 rc = pkt->mask & RXE_ATOMIC_MASK ? 1347 RESPST_ATOMIC_REPLY : 1348 RESPST_ATOMIC_WRITE_REPLY; 1349 goto out; 1350 } 1351 1352 /* Resource not found. Class D error. Drop the request. */ 1353 rc = RESPST_CLEANUP; 1354 goto out; 1355 } 1356 out: 1357 return rc; 1358 } 1359 1360 /* Process a class A or C. Both are treated the same in this implementation. */ 1361 static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, 1362 enum ib_wc_status status) 1363 { 1364 qp->resp.aeth_syndrome = syndrome; 1365 qp->resp.status = status; 1366 1367 /* indicate that we should go through the ERROR state */ 1368 qp->resp.goto_error = 1; 1369 } 1370 1371 static enum resp_states do_class_d1e_error(struct rxe_qp *qp) 1372 { 1373 /* UC */ 1374 if (qp->srq) { 1375 /* Class E */ 1376 qp->resp.drop_msg = 1; 1377 if (qp->resp.wqe) { 1378 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1379 return RESPST_COMPLETE; 1380 } else { 1381 return RESPST_CLEANUP; 1382 } 1383 } else { 1384 /* Class D1. This packet may be the start of a 1385 * new message and could be valid. The previous 1386 * message is invalid and ignored. reset the 1387 * recv wr to its original state 1388 */ 1389 if (qp->resp.wqe) { 1390 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; 1391 qp->resp.wqe->dma.cur_sge = 0; 1392 qp->resp.wqe->dma.sge_offset = 0; 1393 qp->resp.opcode = -1; 1394 } 1395 1396 if (qp->resp.mr) { 1397 rxe_put(qp->resp.mr); 1398 qp->resp.mr = NULL; 1399 } 1400 1401 return RESPST_CLEANUP; 1402 } 1403 } 1404 1405 /* drain incoming request packet queue */ 1406 static void drain_req_pkts(struct rxe_qp *qp) 1407 { 1408 struct sk_buff *skb; 1409 1410 while ((skb = skb_dequeue(&qp->req_pkts))) { 1411 rxe_put(qp); 1412 kfree_skb(skb); 1413 ib_device_put(qp->ibqp.device); 1414 } 1415 } 1416 1417 /* complete receive wqe with flush error */ 1418 static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe) 1419 { 1420 struct rxe_cqe cqe = {}; 1421 struct ib_wc *wc = &cqe.ibwc; 1422 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1423 int err; 1424 1425 if (qp->rcq->is_user) { 1426 uwc->wr_id = wqe->wr_id; 1427 uwc->status = IB_WC_WR_FLUSH_ERR; 1428 uwc->qp_num = qp_num(qp); 1429 } else { 1430 wc->wr_id = wqe->wr_id; 1431 wc->status = IB_WC_WR_FLUSH_ERR; 1432 wc->qp = &qp->ibqp; 1433 } 1434 1435 err = rxe_cq_post(qp->rcq, &cqe, 0); 1436 if (err) 1437 rxe_dbg_cq(qp->rcq, "post cq failed err = %d", err); 1438 1439 return err; 1440 } 1441 1442 /* drain and optionally complete the recive queue 1443 * if unable to complete a wqe stop completing and 1444 * just flush the remaining wqes 1445 */ 1446 static void flush_recv_queue(struct rxe_qp *qp, bool notify) 1447 { 1448 struct rxe_queue *q = qp->rq.queue; 1449 struct rxe_recv_wqe *wqe; 1450 int err; 1451 1452 if (qp->srq) 1453 return; 1454 1455 while ((wqe = queue_head(q, q->type))) { 1456 if (notify) { 1457 err = flush_recv_wqe(qp, wqe); 1458 if (err) 1459 notify = 0; 1460 } 1461 queue_advance_consumer(q, q->type); 1462 } 1463 1464 qp->resp.wqe = NULL; 1465 } 1466 1467 int rxe_responder(struct rxe_qp *qp) 1468 { 1469 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1470 enum resp_states state; 1471 struct rxe_pkt_info *pkt = NULL; 1472 int ret; 1473 unsigned long flags; 1474 1475 spin_lock_irqsave(&qp->state_lock, flags); 1476 if (!qp->valid || qp_state(qp) == IB_QPS_ERR || 1477 qp_state(qp) == IB_QPS_RESET) { 1478 bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); 1479 1480 drain_req_pkts(qp); 1481 flush_recv_queue(qp, notify); 1482 spin_unlock_irqrestore(&qp->state_lock, flags); 1483 goto exit; 1484 } 1485 spin_unlock_irqrestore(&qp->state_lock, flags); 1486 1487 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1488 1489 state = RESPST_GET_REQ; 1490 1491 while (1) { 1492 rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); 1493 switch (state) { 1494 case RESPST_GET_REQ: 1495 state = get_req(qp, &pkt); 1496 break; 1497 case RESPST_CHK_PSN: 1498 state = check_psn(qp, pkt); 1499 break; 1500 case RESPST_CHK_OP_SEQ: 1501 state = check_op_seq(qp, pkt); 1502 break; 1503 case RESPST_CHK_OP_VALID: 1504 state = check_op_valid(qp, pkt); 1505 break; 1506 case RESPST_CHK_RESOURCE: 1507 state = check_resource(qp, pkt); 1508 break; 1509 case RESPST_CHK_LENGTH: 1510 state = rxe_resp_check_length(qp, pkt); 1511 break; 1512 case RESPST_CHK_RKEY: 1513 state = check_rkey(qp, pkt); 1514 break; 1515 case RESPST_EXECUTE: 1516 state = execute(qp, pkt); 1517 break; 1518 case RESPST_COMPLETE: 1519 state = do_complete(qp, pkt); 1520 break; 1521 case RESPST_READ_REPLY: 1522 state = read_reply(qp, pkt); 1523 break; 1524 case RESPST_ATOMIC_REPLY: 1525 state = atomic_reply(qp, pkt); 1526 break; 1527 case RESPST_ATOMIC_WRITE_REPLY: 1528 state = atomic_write_reply(qp, pkt); 1529 break; 1530 case RESPST_PROCESS_FLUSH: 1531 state = process_flush(qp, pkt); 1532 break; 1533 case RESPST_ACKNOWLEDGE: 1534 state = acknowledge(qp, pkt); 1535 break; 1536 case RESPST_CLEANUP: 1537 state = cleanup(qp, pkt); 1538 break; 1539 case RESPST_DUPLICATE_REQUEST: 1540 state = duplicate_request(qp, pkt); 1541 break; 1542 case RESPST_ERR_PSN_OUT_OF_SEQ: 1543 /* RC only - Class B. Drop packet. */ 1544 send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); 1545 state = RESPST_CLEANUP; 1546 break; 1547 1548 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: 1549 case RESPST_ERR_MISSING_OPCODE_FIRST: 1550 case RESPST_ERR_MISSING_OPCODE_LAST_C: 1551 case RESPST_ERR_UNSUPPORTED_OPCODE: 1552 case RESPST_ERR_MISALIGNED_ATOMIC: 1553 /* RC Only - Class C. */ 1554 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1555 IB_WC_REM_INV_REQ_ERR); 1556 state = RESPST_COMPLETE; 1557 break; 1558 1559 case RESPST_ERR_MISSING_OPCODE_LAST_D1E: 1560 state = do_class_d1e_error(qp); 1561 break; 1562 case RESPST_ERR_RNR: 1563 if (qp_type(qp) == IB_QPT_RC) { 1564 rxe_counter_inc(rxe, RXE_CNT_SND_RNR); 1565 /* RC - class B */ 1566 send_ack(qp, AETH_RNR_NAK | 1567 (~AETH_TYPE_MASK & 1568 qp->attr.min_rnr_timer), 1569 pkt->psn); 1570 } else { 1571 /* UD/UC - class D */ 1572 qp->resp.drop_msg = 1; 1573 } 1574 state = RESPST_CLEANUP; 1575 break; 1576 1577 case RESPST_ERR_RKEY_VIOLATION: 1578 if (qp_type(qp) == IB_QPT_RC) { 1579 /* Class C */ 1580 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, 1581 IB_WC_REM_ACCESS_ERR); 1582 state = RESPST_COMPLETE; 1583 } else { 1584 qp->resp.drop_msg = 1; 1585 if (qp->srq) { 1586 /* UC/SRQ Class D */ 1587 qp->resp.status = IB_WC_REM_ACCESS_ERR; 1588 state = RESPST_COMPLETE; 1589 } else { 1590 /* UC/non-SRQ Class E. */ 1591 state = RESPST_CLEANUP; 1592 } 1593 } 1594 break; 1595 1596 case RESPST_ERR_INVALIDATE_RKEY: 1597 /* RC - Class J. */ 1598 qp->resp.goto_error = 1; 1599 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1600 state = RESPST_COMPLETE; 1601 break; 1602 1603 case RESPST_ERR_LENGTH: 1604 if (qp_type(qp) == IB_QPT_RC) { 1605 /* Class C */ 1606 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1607 IB_WC_REM_INV_REQ_ERR); 1608 state = RESPST_COMPLETE; 1609 } else if (qp->srq) { 1610 /* UC/UD - class E */ 1611 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1612 state = RESPST_COMPLETE; 1613 } else { 1614 /* UC/UD - class D */ 1615 qp->resp.drop_msg = 1; 1616 state = RESPST_CLEANUP; 1617 } 1618 break; 1619 1620 case RESPST_ERR_MALFORMED_WQE: 1621 /* All, Class A. */ 1622 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, 1623 IB_WC_LOC_QP_OP_ERR); 1624 state = RESPST_COMPLETE; 1625 break; 1626 1627 case RESPST_ERR_CQ_OVERFLOW: 1628 /* All - Class G */ 1629 state = RESPST_ERROR; 1630 break; 1631 1632 case RESPST_DONE: 1633 if (qp->resp.goto_error) { 1634 state = RESPST_ERROR; 1635 break; 1636 } 1637 1638 goto done; 1639 1640 case RESPST_EXIT: 1641 if (qp->resp.goto_error) { 1642 state = RESPST_ERROR; 1643 break; 1644 } 1645 1646 goto exit; 1647 1648 case RESPST_ERROR: 1649 qp->resp.goto_error = 0; 1650 rxe_dbg_qp(qp, "moved to error state\n"); 1651 rxe_qp_error(qp); 1652 goto exit; 1653 1654 default: 1655 WARN_ON_ONCE(1); 1656 } 1657 } 1658 1659 /* A non-zero return value will cause rxe_do_task to 1660 * exit its loop and end the tasklet. A zero return 1661 * will continue looping and return to rxe_responder 1662 */ 1663 done: 1664 ret = 0; 1665 goto out; 1666 exit: 1667 ret = -EAGAIN; 1668 out: 1669 return ret; 1670 } 1671