1 /* 2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/dma-mapping.h> 35 #include <net/addrconf.h> 36 #include "rxe.h" 37 #include "rxe_loc.h" 38 #include "rxe_queue.h" 39 #include "rxe_hw_counters.h" 40 41 static int rxe_query_device(struct ib_device *dev, 42 struct ib_device_attr *attr, 43 struct ib_udata *uhw) 44 { 45 struct rxe_dev *rxe = to_rdev(dev); 46 47 if (uhw->inlen || uhw->outlen) 48 return -EINVAL; 49 50 *attr = rxe->attr; 51 return 0; 52 } 53 54 static int rxe_query_port(struct ib_device *dev, 55 u8 port_num, struct ib_port_attr *attr) 56 { 57 struct rxe_dev *rxe = to_rdev(dev); 58 struct rxe_port *port; 59 int rc; 60 61 port = &rxe->port; 62 63 /* *attr being zeroed by the caller, avoid zeroing it here */ 64 *attr = port->attr; 65 66 mutex_lock(&rxe->usdev_lock); 67 rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, 68 &attr->active_width); 69 70 if (attr->state == IB_PORT_ACTIVE) 71 attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP; 72 else if (dev_get_flags(rxe->ndev) & IFF_UP) 73 attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING; 74 else 75 attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED; 76 77 mutex_unlock(&rxe->usdev_lock); 78 79 return rc; 80 } 81 82 static struct net_device *rxe_get_netdev(struct ib_device *device, 83 u8 port_num) 84 { 85 struct rxe_dev *rxe = to_rdev(device); 86 87 if (rxe->ndev) { 88 dev_hold(rxe->ndev); 89 return rxe->ndev; 90 } 91 92 return NULL; 93 } 94 95 static int rxe_query_pkey(struct ib_device *device, 96 u8 port_num, u16 index, u16 *pkey) 97 { 98 struct rxe_dev *rxe = to_rdev(device); 99 struct rxe_port *port; 100 101 port = &rxe->port; 102 103 if (unlikely(index >= port->attr.pkey_tbl_len)) { 104 dev_warn(device->dev.parent, "invalid index = %d\n", 105 index); 106 goto err1; 107 } 108 109 *pkey = port->pkey_tbl[index]; 110 return 0; 111 112 err1: 113 return -EINVAL; 114 } 115 116 static int rxe_modify_device(struct ib_device *dev, 117 int mask, struct ib_device_modify *attr) 118 { 119 struct rxe_dev *rxe = to_rdev(dev); 120 121 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) 122 rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); 123 124 if (mask & IB_DEVICE_MODIFY_NODE_DESC) { 125 memcpy(rxe->ib_dev.node_desc, 126 attr->node_desc, sizeof(rxe->ib_dev.node_desc)); 127 } 128 129 return 0; 130 } 131 132 static int rxe_modify_port(struct ib_device *dev, 133 u8 port_num, int mask, struct ib_port_modify *attr) 134 { 135 struct rxe_dev *rxe = to_rdev(dev); 136 struct rxe_port *port; 137 138 port = &rxe->port; 139 140 port->attr.port_cap_flags |= attr->set_port_cap_mask; 141 port->attr.port_cap_flags &= ~attr->clr_port_cap_mask; 142 143 if (mask & IB_PORT_RESET_QKEY_CNTR) 144 port->attr.qkey_viol_cntr = 0; 145 146 return 0; 147 } 148 149 static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, 150 u8 port_num) 151 { 152 struct rxe_dev *rxe = to_rdev(dev); 153 154 return rxe_link_layer(rxe, port_num); 155 } 156 157 static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, 158 struct ib_udata *udata) 159 { 160 struct rxe_dev *rxe = to_rdev(dev); 161 struct rxe_ucontext *uc; 162 163 uc = rxe_alloc(&rxe->uc_pool); 164 return uc ? &uc->ibuc : ERR_PTR(-ENOMEM); 165 } 166 167 static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc) 168 { 169 struct rxe_ucontext *uc = to_ruc(ibuc); 170 171 rxe_drop_ref(uc); 172 return 0; 173 } 174 175 static int rxe_port_immutable(struct ib_device *dev, u8 port_num, 176 struct ib_port_immutable *immutable) 177 { 178 int err; 179 struct ib_port_attr attr; 180 181 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 182 183 err = ib_query_port(dev, port_num, &attr); 184 if (err) 185 return err; 186 187 immutable->pkey_tbl_len = attr.pkey_tbl_len; 188 immutable->gid_tbl_len = attr.gid_tbl_len; 189 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 190 191 return 0; 192 } 193 194 static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, 195 struct ib_ucontext *context, 196 struct ib_udata *udata) 197 { 198 struct rxe_dev *rxe = to_rdev(dev); 199 struct rxe_pd *pd; 200 201 pd = rxe_alloc(&rxe->pd_pool); 202 return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); 203 } 204 205 static int rxe_dealloc_pd(struct ib_pd *ibpd) 206 { 207 struct rxe_pd *pd = to_rpd(ibpd); 208 209 rxe_drop_ref(pd); 210 return 0; 211 } 212 213 static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, 214 struct rxe_av *av) 215 { 216 rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); 217 rxe_av_fill_ip_info(av, attr); 218 } 219 220 static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, 221 struct rdma_ah_attr *attr, 222 u32 flags, 223 struct ib_udata *udata) 224 225 { 226 int err; 227 struct rxe_dev *rxe = to_rdev(ibpd->device); 228 struct rxe_pd *pd = to_rpd(ibpd); 229 struct rxe_ah *ah; 230 231 err = rxe_av_chk_attr(rxe, attr); 232 if (err) 233 return ERR_PTR(err); 234 235 ah = rxe_alloc(&rxe->ah_pool); 236 if (!ah) 237 return ERR_PTR(-ENOMEM); 238 239 rxe_add_ref(pd); 240 ah->pd = pd; 241 242 rxe_init_av(rxe, attr, &ah->av); 243 return &ah->ibah; 244 } 245 246 static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) 247 { 248 int err; 249 struct rxe_dev *rxe = to_rdev(ibah->device); 250 struct rxe_ah *ah = to_rah(ibah); 251 252 err = rxe_av_chk_attr(rxe, attr); 253 if (err) 254 return err; 255 256 rxe_init_av(rxe, attr, &ah->av); 257 return 0; 258 } 259 260 static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) 261 { 262 struct rxe_ah *ah = to_rah(ibah); 263 264 memset(attr, 0, sizeof(*attr)); 265 attr->type = ibah->type; 266 rxe_av_to_attr(&ah->av, attr); 267 return 0; 268 } 269 270 static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) 271 { 272 struct rxe_ah *ah = to_rah(ibah); 273 274 rxe_drop_ref(ah->pd); 275 rxe_drop_ref(ah); 276 return 0; 277 } 278 279 static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) 280 { 281 int err; 282 int i; 283 u32 length; 284 struct rxe_recv_wqe *recv_wqe; 285 int num_sge = ibwr->num_sge; 286 287 if (unlikely(queue_full(rq->queue))) { 288 err = -ENOMEM; 289 goto err1; 290 } 291 292 if (unlikely(num_sge > rq->max_sge)) { 293 err = -EINVAL; 294 goto err1; 295 } 296 297 length = 0; 298 for (i = 0; i < num_sge; i++) 299 length += ibwr->sg_list[i].length; 300 301 recv_wqe = producer_addr(rq->queue); 302 recv_wqe->wr_id = ibwr->wr_id; 303 recv_wqe->num_sge = num_sge; 304 305 memcpy(recv_wqe->dma.sge, ibwr->sg_list, 306 num_sge * sizeof(struct ib_sge)); 307 308 recv_wqe->dma.length = length; 309 recv_wqe->dma.resid = length; 310 recv_wqe->dma.num_sge = num_sge; 311 recv_wqe->dma.cur_sge = 0; 312 recv_wqe->dma.sge_offset = 0; 313 314 /* make sure all changes to the work queue are written before we 315 * update the producer pointer 316 */ 317 smp_wmb(); 318 319 advance_producer(rq->queue); 320 return 0; 321 322 err1: 323 return err; 324 } 325 326 static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, 327 struct ib_srq_init_attr *init, 328 struct ib_udata *udata) 329 { 330 int err; 331 struct rxe_dev *rxe = to_rdev(ibpd->device); 332 struct rxe_pd *pd = to_rpd(ibpd); 333 struct rxe_srq *srq; 334 struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; 335 struct rxe_create_srq_resp __user *uresp = NULL; 336 337 if (udata) { 338 if (udata->outlen < sizeof(*uresp)) 339 return ERR_PTR(-EINVAL); 340 uresp = udata->outbuf; 341 } 342 343 err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); 344 if (err) 345 goto err1; 346 347 srq = rxe_alloc(&rxe->srq_pool); 348 if (!srq) { 349 err = -ENOMEM; 350 goto err1; 351 } 352 353 rxe_add_index(srq); 354 rxe_add_ref(pd); 355 srq->pd = pd; 356 357 err = rxe_srq_from_init(rxe, srq, init, context, uresp); 358 if (err) 359 goto err2; 360 361 return &srq->ibsrq; 362 363 err2: 364 rxe_drop_ref(pd); 365 rxe_drop_index(srq); 366 rxe_drop_ref(srq); 367 err1: 368 return ERR_PTR(err); 369 } 370 371 static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 372 enum ib_srq_attr_mask mask, 373 struct ib_udata *udata) 374 { 375 int err; 376 struct rxe_srq *srq = to_rsrq(ibsrq); 377 struct rxe_dev *rxe = to_rdev(ibsrq->device); 378 struct rxe_modify_srq_cmd ucmd = {}; 379 380 if (udata) { 381 if (udata->inlen < sizeof(ucmd)) 382 return -EINVAL; 383 384 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); 385 if (err) 386 return err; 387 } 388 389 err = rxe_srq_chk_attr(rxe, srq, attr, mask); 390 if (err) 391 goto err1; 392 393 err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd); 394 if (err) 395 goto err1; 396 397 return 0; 398 399 err1: 400 return err; 401 } 402 403 static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) 404 { 405 struct rxe_srq *srq = to_rsrq(ibsrq); 406 407 if (srq->error) 408 return -EINVAL; 409 410 attr->max_wr = srq->rq.queue->buf->index_mask; 411 attr->max_sge = srq->rq.max_sge; 412 attr->srq_limit = srq->limit; 413 return 0; 414 } 415 416 static int rxe_destroy_srq(struct ib_srq *ibsrq) 417 { 418 struct rxe_srq *srq = to_rsrq(ibsrq); 419 420 if (srq->rq.queue) 421 rxe_queue_cleanup(srq->rq.queue); 422 423 rxe_drop_ref(srq->pd); 424 rxe_drop_index(srq); 425 rxe_drop_ref(srq); 426 427 return 0; 428 } 429 430 static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, 431 const struct ib_recv_wr **bad_wr) 432 { 433 int err = 0; 434 unsigned long flags; 435 struct rxe_srq *srq = to_rsrq(ibsrq); 436 437 spin_lock_irqsave(&srq->rq.producer_lock, flags); 438 439 while (wr) { 440 err = post_one_recv(&srq->rq, wr); 441 if (unlikely(err)) 442 break; 443 wr = wr->next; 444 } 445 446 spin_unlock_irqrestore(&srq->rq.producer_lock, flags); 447 448 if (err) 449 *bad_wr = wr; 450 451 return err; 452 } 453 454 static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, 455 struct ib_qp_init_attr *init, 456 struct ib_udata *udata) 457 { 458 int err; 459 struct rxe_dev *rxe = to_rdev(ibpd->device); 460 struct rxe_pd *pd = to_rpd(ibpd); 461 struct rxe_qp *qp; 462 struct rxe_create_qp_resp __user *uresp = NULL; 463 464 if (udata) { 465 if (udata->outlen < sizeof(*uresp)) 466 return ERR_PTR(-EINVAL); 467 uresp = udata->outbuf; 468 } 469 470 err = rxe_qp_chk_init(rxe, init); 471 if (err) 472 goto err1; 473 474 qp = rxe_alloc(&rxe->qp_pool); 475 if (!qp) { 476 err = -ENOMEM; 477 goto err1; 478 } 479 480 if (udata) { 481 if (udata->inlen) { 482 err = -EINVAL; 483 goto err2; 484 } 485 qp->is_user = 1; 486 } 487 488 rxe_add_index(qp); 489 490 err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata); 491 if (err) 492 goto err3; 493 494 return &qp->ibqp; 495 496 err3: 497 rxe_drop_index(qp); 498 err2: 499 rxe_drop_ref(qp); 500 err1: 501 return ERR_PTR(err); 502 } 503 504 static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 505 int mask, struct ib_udata *udata) 506 { 507 int err; 508 struct rxe_dev *rxe = to_rdev(ibqp->device); 509 struct rxe_qp *qp = to_rqp(ibqp); 510 511 err = rxe_qp_chk_attr(rxe, qp, attr, mask); 512 if (err) 513 goto err1; 514 515 err = rxe_qp_from_attr(qp, attr, mask, udata); 516 if (err) 517 goto err1; 518 519 return 0; 520 521 err1: 522 return err; 523 } 524 525 static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 526 int mask, struct ib_qp_init_attr *init) 527 { 528 struct rxe_qp *qp = to_rqp(ibqp); 529 530 rxe_qp_to_init(qp, init); 531 rxe_qp_to_attr(qp, attr, mask); 532 533 return 0; 534 } 535 536 static int rxe_destroy_qp(struct ib_qp *ibqp) 537 { 538 struct rxe_qp *qp = to_rqp(ibqp); 539 540 rxe_qp_destroy(qp); 541 rxe_drop_index(qp); 542 rxe_drop_ref(qp); 543 return 0; 544 } 545 546 static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, 547 unsigned int mask, unsigned int length) 548 { 549 int num_sge = ibwr->num_sge; 550 struct rxe_sq *sq = &qp->sq; 551 552 if (unlikely(num_sge > sq->max_sge)) 553 goto err1; 554 555 if (unlikely(mask & WR_ATOMIC_MASK)) { 556 if (length < 8) 557 goto err1; 558 559 if (atomic_wr(ibwr)->remote_addr & 0x7) 560 goto err1; 561 } 562 563 if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && 564 (length > sq->max_inline))) 565 goto err1; 566 567 return 0; 568 569 err1: 570 return -EINVAL; 571 } 572 573 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, 574 const struct ib_send_wr *ibwr) 575 { 576 wr->wr_id = ibwr->wr_id; 577 wr->num_sge = ibwr->num_sge; 578 wr->opcode = ibwr->opcode; 579 wr->send_flags = ibwr->send_flags; 580 581 if (qp_type(qp) == IB_QPT_UD || 582 qp_type(qp) == IB_QPT_SMI || 583 qp_type(qp) == IB_QPT_GSI) { 584 wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; 585 wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; 586 if (qp_type(qp) == IB_QPT_GSI) 587 wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; 588 if (wr->opcode == IB_WR_SEND_WITH_IMM) 589 wr->ex.imm_data = ibwr->ex.imm_data; 590 } else { 591 switch (wr->opcode) { 592 case IB_WR_RDMA_WRITE_WITH_IMM: 593 wr->ex.imm_data = ibwr->ex.imm_data; 594 /* fall through */ 595 case IB_WR_RDMA_READ: 596 case IB_WR_RDMA_WRITE: 597 wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; 598 wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; 599 break; 600 case IB_WR_SEND_WITH_IMM: 601 wr->ex.imm_data = ibwr->ex.imm_data; 602 break; 603 case IB_WR_SEND_WITH_INV: 604 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; 605 break; 606 case IB_WR_ATOMIC_CMP_AND_SWP: 607 case IB_WR_ATOMIC_FETCH_AND_ADD: 608 wr->wr.atomic.remote_addr = 609 atomic_wr(ibwr)->remote_addr; 610 wr->wr.atomic.compare_add = 611 atomic_wr(ibwr)->compare_add; 612 wr->wr.atomic.swap = atomic_wr(ibwr)->swap; 613 wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey; 614 break; 615 case IB_WR_LOCAL_INV: 616 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; 617 break; 618 case IB_WR_REG_MR: 619 wr->wr.reg.mr = reg_wr(ibwr)->mr; 620 wr->wr.reg.key = reg_wr(ibwr)->key; 621 wr->wr.reg.access = reg_wr(ibwr)->access; 622 break; 623 default: 624 break; 625 } 626 } 627 } 628 629 static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, 630 unsigned int mask, unsigned int length, 631 struct rxe_send_wqe *wqe) 632 { 633 int num_sge = ibwr->num_sge; 634 struct ib_sge *sge; 635 int i; 636 u8 *p; 637 638 init_send_wr(qp, &wqe->wr, ibwr); 639 640 if (qp_type(qp) == IB_QPT_UD || 641 qp_type(qp) == IB_QPT_SMI || 642 qp_type(qp) == IB_QPT_GSI) 643 memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); 644 645 if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { 646 p = wqe->dma.inline_data; 647 648 sge = ibwr->sg_list; 649 for (i = 0; i < num_sge; i++, sge++) { 650 memcpy(p, (void *)(uintptr_t)sge->addr, 651 sge->length); 652 653 p += sge->length; 654 } 655 } else if (mask & WR_REG_MASK) { 656 wqe->mask = mask; 657 wqe->state = wqe_state_posted; 658 return 0; 659 } else 660 memcpy(wqe->dma.sge, ibwr->sg_list, 661 num_sge * sizeof(struct ib_sge)); 662 663 wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr : 664 mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0; 665 wqe->mask = mask; 666 wqe->dma.length = length; 667 wqe->dma.resid = length; 668 wqe->dma.num_sge = num_sge; 669 wqe->dma.cur_sge = 0; 670 wqe->dma.sge_offset = 0; 671 wqe->state = wqe_state_posted; 672 wqe->ssn = atomic_add_return(1, &qp->ssn); 673 674 return 0; 675 } 676 677 static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, 678 unsigned int mask, u32 length) 679 { 680 int err; 681 struct rxe_sq *sq = &qp->sq; 682 struct rxe_send_wqe *send_wqe; 683 unsigned long flags; 684 685 err = validate_send_wr(qp, ibwr, mask, length); 686 if (err) 687 return err; 688 689 spin_lock_irqsave(&qp->sq.sq_lock, flags); 690 691 if (unlikely(queue_full(sq->queue))) { 692 err = -ENOMEM; 693 goto err1; 694 } 695 696 send_wqe = producer_addr(sq->queue); 697 698 err = init_send_wqe(qp, ibwr, mask, length, send_wqe); 699 if (unlikely(err)) 700 goto err1; 701 702 /* 703 * make sure all changes to the work queue are 704 * written before we update the producer pointer 705 */ 706 smp_wmb(); 707 708 advance_producer(sq->queue); 709 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 710 711 return 0; 712 713 err1: 714 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 715 return err; 716 } 717 718 static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, 719 const struct ib_send_wr **bad_wr) 720 { 721 int err = 0; 722 unsigned int mask; 723 unsigned int length = 0; 724 int i; 725 726 while (wr) { 727 mask = wr_opcode_mask(wr->opcode, qp); 728 if (unlikely(!mask)) { 729 err = -EINVAL; 730 *bad_wr = wr; 731 break; 732 } 733 734 if (unlikely((wr->send_flags & IB_SEND_INLINE) && 735 !(mask & WR_INLINE_MASK))) { 736 err = -EINVAL; 737 *bad_wr = wr; 738 break; 739 } 740 741 length = 0; 742 for (i = 0; i < wr->num_sge; i++) 743 length += wr->sg_list[i].length; 744 745 err = post_one_send(qp, wr, mask, length); 746 747 if (err) { 748 *bad_wr = wr; 749 break; 750 } 751 wr = wr->next; 752 } 753 754 rxe_run_task(&qp->req.task, 1); 755 if (unlikely(qp->req.state == QP_STATE_ERROR)) 756 rxe_run_task(&qp->comp.task, 1); 757 758 return err; 759 } 760 761 static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, 762 const struct ib_send_wr **bad_wr) 763 { 764 struct rxe_qp *qp = to_rqp(ibqp); 765 766 if (unlikely(!qp->valid)) { 767 *bad_wr = wr; 768 return -EINVAL; 769 } 770 771 if (unlikely(qp->req.state < QP_STATE_READY)) { 772 *bad_wr = wr; 773 return -EINVAL; 774 } 775 776 if (qp->is_user) { 777 /* Utilize process context to do protocol processing */ 778 rxe_run_task(&qp->req.task, 0); 779 return 0; 780 } else 781 return rxe_post_send_kernel(qp, wr, bad_wr); 782 } 783 784 static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, 785 const struct ib_recv_wr **bad_wr) 786 { 787 int err = 0; 788 struct rxe_qp *qp = to_rqp(ibqp); 789 struct rxe_rq *rq = &qp->rq; 790 unsigned long flags; 791 792 if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { 793 *bad_wr = wr; 794 err = -EINVAL; 795 goto err1; 796 } 797 798 if (unlikely(qp->srq)) { 799 *bad_wr = wr; 800 err = -EINVAL; 801 goto err1; 802 } 803 804 spin_lock_irqsave(&rq->producer_lock, flags); 805 806 while (wr) { 807 err = post_one_recv(rq, wr); 808 if (unlikely(err)) { 809 *bad_wr = wr; 810 break; 811 } 812 wr = wr->next; 813 } 814 815 spin_unlock_irqrestore(&rq->producer_lock, flags); 816 817 if (qp->resp.state == QP_STATE_ERROR) 818 rxe_run_task(&qp->resp.task, 1); 819 820 err1: 821 return err; 822 } 823 824 static struct ib_cq *rxe_create_cq(struct ib_device *dev, 825 const struct ib_cq_init_attr *attr, 826 struct ib_ucontext *context, 827 struct ib_udata *udata) 828 { 829 int err; 830 struct rxe_dev *rxe = to_rdev(dev); 831 struct rxe_cq *cq; 832 struct rxe_create_cq_resp __user *uresp = NULL; 833 834 if (udata) { 835 if (udata->outlen < sizeof(*uresp)) 836 return ERR_PTR(-EINVAL); 837 uresp = udata->outbuf; 838 } 839 840 if (attr->flags) 841 return ERR_PTR(-EINVAL); 842 843 err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); 844 if (err) 845 goto err1; 846 847 cq = rxe_alloc(&rxe->cq_pool); 848 if (!cq) { 849 err = -ENOMEM; 850 goto err1; 851 } 852 853 err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, 854 context, uresp); 855 if (err) 856 goto err2; 857 858 return &cq->ibcq; 859 860 err2: 861 rxe_drop_ref(cq); 862 err1: 863 return ERR_PTR(err); 864 } 865 866 static int rxe_destroy_cq(struct ib_cq *ibcq) 867 { 868 struct rxe_cq *cq = to_rcq(ibcq); 869 870 rxe_cq_disable(cq); 871 872 rxe_drop_ref(cq); 873 return 0; 874 } 875 876 static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 877 { 878 int err; 879 struct rxe_cq *cq = to_rcq(ibcq); 880 struct rxe_dev *rxe = to_rdev(ibcq->device); 881 struct rxe_resize_cq_resp __user *uresp = NULL; 882 883 if (udata) { 884 if (udata->outlen < sizeof(*uresp)) 885 return -EINVAL; 886 uresp = udata->outbuf; 887 } 888 889 err = rxe_cq_chk_attr(rxe, cq, cqe, 0); 890 if (err) 891 goto err1; 892 893 err = rxe_cq_resize_queue(cq, cqe, uresp); 894 if (err) 895 goto err1; 896 897 return 0; 898 899 err1: 900 return err; 901 } 902 903 static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 904 { 905 int i; 906 struct rxe_cq *cq = to_rcq(ibcq); 907 struct rxe_cqe *cqe; 908 unsigned long flags; 909 910 spin_lock_irqsave(&cq->cq_lock, flags); 911 for (i = 0; i < num_entries; i++) { 912 cqe = queue_head(cq->queue); 913 if (!cqe) 914 break; 915 916 memcpy(wc++, &cqe->ibwc, sizeof(*wc)); 917 advance_consumer(cq->queue); 918 } 919 spin_unlock_irqrestore(&cq->cq_lock, flags); 920 921 return i; 922 } 923 924 static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) 925 { 926 struct rxe_cq *cq = to_rcq(ibcq); 927 int count = queue_count(cq->queue); 928 929 return (count > wc_cnt) ? wc_cnt : count; 930 } 931 932 static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 933 { 934 struct rxe_cq *cq = to_rcq(ibcq); 935 unsigned long irq_flags; 936 int ret = 0; 937 938 spin_lock_irqsave(&cq->cq_lock, irq_flags); 939 if (cq->notify != IB_CQ_NEXT_COMP) 940 cq->notify = flags & IB_CQ_SOLICITED_MASK; 941 942 if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) 943 ret = 1; 944 945 spin_unlock_irqrestore(&cq->cq_lock, irq_flags); 946 947 return ret; 948 } 949 950 static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) 951 { 952 struct rxe_dev *rxe = to_rdev(ibpd->device); 953 struct rxe_pd *pd = to_rpd(ibpd); 954 struct rxe_mem *mr; 955 int err; 956 957 mr = rxe_alloc(&rxe->mr_pool); 958 if (!mr) { 959 err = -ENOMEM; 960 goto err1; 961 } 962 963 rxe_add_index(mr); 964 965 rxe_add_ref(pd); 966 967 err = rxe_mem_init_dma(pd, access, mr); 968 if (err) 969 goto err2; 970 971 return &mr->ibmr; 972 973 err2: 974 rxe_drop_ref(pd); 975 rxe_drop_index(mr); 976 rxe_drop_ref(mr); 977 err1: 978 return ERR_PTR(err); 979 } 980 981 static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, 982 u64 start, 983 u64 length, 984 u64 iova, 985 int access, struct ib_udata *udata) 986 { 987 int err; 988 struct rxe_dev *rxe = to_rdev(ibpd->device); 989 struct rxe_pd *pd = to_rpd(ibpd); 990 struct rxe_mem *mr; 991 992 mr = rxe_alloc(&rxe->mr_pool); 993 if (!mr) { 994 err = -ENOMEM; 995 goto err2; 996 } 997 998 rxe_add_index(mr); 999 1000 rxe_add_ref(pd); 1001 1002 err = rxe_mem_init_user(pd, start, length, iova, 1003 access, udata, mr); 1004 if (err) 1005 goto err3; 1006 1007 return &mr->ibmr; 1008 1009 err3: 1010 rxe_drop_ref(pd); 1011 rxe_drop_index(mr); 1012 rxe_drop_ref(mr); 1013 err2: 1014 return ERR_PTR(err); 1015 } 1016 1017 static int rxe_dereg_mr(struct ib_mr *ibmr) 1018 { 1019 struct rxe_mem *mr = to_rmr(ibmr); 1020 1021 mr->state = RXE_MEM_STATE_ZOMBIE; 1022 rxe_drop_ref(mr->pd); 1023 rxe_drop_index(mr); 1024 rxe_drop_ref(mr); 1025 return 0; 1026 } 1027 1028 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, 1029 enum ib_mr_type mr_type, 1030 u32 max_num_sg) 1031 { 1032 struct rxe_dev *rxe = to_rdev(ibpd->device); 1033 struct rxe_pd *pd = to_rpd(ibpd); 1034 struct rxe_mem *mr; 1035 int err; 1036 1037 if (mr_type != IB_MR_TYPE_MEM_REG) 1038 return ERR_PTR(-EINVAL); 1039 1040 mr = rxe_alloc(&rxe->mr_pool); 1041 if (!mr) { 1042 err = -ENOMEM; 1043 goto err1; 1044 } 1045 1046 rxe_add_index(mr); 1047 1048 rxe_add_ref(pd); 1049 1050 err = rxe_mem_init_fast(pd, max_num_sg, mr); 1051 if (err) 1052 goto err2; 1053 1054 return &mr->ibmr; 1055 1056 err2: 1057 rxe_drop_ref(pd); 1058 rxe_drop_index(mr); 1059 rxe_drop_ref(mr); 1060 err1: 1061 return ERR_PTR(err); 1062 } 1063 1064 static int rxe_set_page(struct ib_mr *ibmr, u64 addr) 1065 { 1066 struct rxe_mem *mr = to_rmr(ibmr); 1067 struct rxe_map *map; 1068 struct rxe_phys_buf *buf; 1069 1070 if (unlikely(mr->nbuf == mr->num_buf)) 1071 return -ENOMEM; 1072 1073 map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; 1074 buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; 1075 1076 buf->addr = addr; 1077 buf->size = ibmr->page_size; 1078 mr->nbuf++; 1079 1080 return 0; 1081 } 1082 1083 static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 1084 int sg_nents, unsigned int *sg_offset) 1085 { 1086 struct rxe_mem *mr = to_rmr(ibmr); 1087 int n; 1088 1089 mr->nbuf = 0; 1090 1091 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); 1092 1093 mr->va = ibmr->iova; 1094 mr->iova = ibmr->iova; 1095 mr->length = ibmr->length; 1096 mr->page_shift = ilog2(ibmr->page_size); 1097 mr->page_mask = ibmr->page_size - 1; 1098 mr->offset = mr->iova & mr->page_mask; 1099 1100 return n; 1101 } 1102 1103 static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 1104 { 1105 int err; 1106 struct rxe_dev *rxe = to_rdev(ibqp->device); 1107 struct rxe_qp *qp = to_rqp(ibqp); 1108 struct rxe_mc_grp *grp; 1109 1110 /* takes a ref on grp if successful */ 1111 err = rxe_mcast_get_grp(rxe, mgid, &grp); 1112 if (err) 1113 return err; 1114 1115 err = rxe_mcast_add_grp_elem(rxe, qp, grp); 1116 1117 rxe_drop_ref(grp); 1118 return err; 1119 } 1120 1121 static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 1122 { 1123 struct rxe_dev *rxe = to_rdev(ibqp->device); 1124 struct rxe_qp *qp = to_rqp(ibqp); 1125 1126 return rxe_mcast_drop_grp_elem(rxe, qp, mgid); 1127 } 1128 1129 static ssize_t parent_show(struct device *device, 1130 struct device_attribute *attr, char *buf) 1131 { 1132 struct rxe_dev *rxe = container_of(device, struct rxe_dev, 1133 ib_dev.dev); 1134 1135 return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); 1136 } 1137 1138 static DEVICE_ATTR_RO(parent); 1139 1140 static struct attribute *rxe_dev_attributes[] = { 1141 &dev_attr_parent.attr, 1142 NULL 1143 }; 1144 1145 static const struct attribute_group rxe_attr_group = { 1146 .attrs = rxe_dev_attributes, 1147 }; 1148 1149 static const struct ib_device_ops rxe_dev_ops = { 1150 .alloc_hw_stats = rxe_ib_alloc_hw_stats, 1151 .alloc_mr = rxe_alloc_mr, 1152 .alloc_pd = rxe_alloc_pd, 1153 .alloc_ucontext = rxe_alloc_ucontext, 1154 .attach_mcast = rxe_attach_mcast, 1155 .create_ah = rxe_create_ah, 1156 .create_cq = rxe_create_cq, 1157 .create_qp = rxe_create_qp, 1158 .create_srq = rxe_create_srq, 1159 .dealloc_pd = rxe_dealloc_pd, 1160 .dealloc_ucontext = rxe_dealloc_ucontext, 1161 .dereg_mr = rxe_dereg_mr, 1162 .destroy_ah = rxe_destroy_ah, 1163 .destroy_cq = rxe_destroy_cq, 1164 .destroy_qp = rxe_destroy_qp, 1165 .destroy_srq = rxe_destroy_srq, 1166 .detach_mcast = rxe_detach_mcast, 1167 .get_dma_mr = rxe_get_dma_mr, 1168 .get_hw_stats = rxe_ib_get_hw_stats, 1169 .get_link_layer = rxe_get_link_layer, 1170 .get_netdev = rxe_get_netdev, 1171 .get_port_immutable = rxe_port_immutable, 1172 .map_mr_sg = rxe_map_mr_sg, 1173 .mmap = rxe_mmap, 1174 .modify_ah = rxe_modify_ah, 1175 .modify_device = rxe_modify_device, 1176 .modify_port = rxe_modify_port, 1177 .modify_qp = rxe_modify_qp, 1178 .modify_srq = rxe_modify_srq, 1179 .peek_cq = rxe_peek_cq, 1180 .poll_cq = rxe_poll_cq, 1181 .post_recv = rxe_post_recv, 1182 .post_send = rxe_post_send, 1183 .post_srq_recv = rxe_post_srq_recv, 1184 .query_ah = rxe_query_ah, 1185 .query_device = rxe_query_device, 1186 .query_pkey = rxe_query_pkey, 1187 .query_port = rxe_query_port, 1188 .query_qp = rxe_query_qp, 1189 .query_srq = rxe_query_srq, 1190 .reg_user_mr = rxe_reg_user_mr, 1191 .req_notify_cq = rxe_req_notify_cq, 1192 .resize_cq = rxe_resize_cq, 1193 }; 1194 1195 int rxe_register_device(struct rxe_dev *rxe) 1196 { 1197 int err; 1198 struct ib_device *dev = &rxe->ib_dev; 1199 struct crypto_shash *tfm; 1200 1201 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); 1202 1203 dev->owner = THIS_MODULE; 1204 dev->node_type = RDMA_NODE_IB_CA; 1205 dev->phys_port_cnt = 1; 1206 dev->num_comp_vectors = num_possible_cpus(); 1207 dev->dev.parent = rxe_dma_device(rxe); 1208 dev->local_dma_lkey = 0; 1209 addrconf_addr_eui48((unsigned char *)&dev->node_guid, 1210 rxe->ndev->dev_addr); 1211 dev->dev.dma_ops = &dma_virt_ops; 1212 dma_coerce_mask_and_coherent(&dev->dev, 1213 dma_get_required_mask(&dev->dev)); 1214 1215 dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; 1216 dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) 1217 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) 1218 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) 1219 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) 1220 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) 1221 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) 1222 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) 1223 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) 1224 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) 1225 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) 1226 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) 1227 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) 1228 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) 1229 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) 1230 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) 1231 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) 1232 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) 1233 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) 1234 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) 1235 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) 1236 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) 1237 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) 1238 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) 1239 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) 1240 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) 1241 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) 1242 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) 1243 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) 1244 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) 1245 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) 1246 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) 1247 ; 1248 1249 ib_set_device_ops(dev, &rxe_dev_ops); 1250 1251 tfm = crypto_alloc_shash("crc32", 0, 0); 1252 if (IS_ERR(tfm)) { 1253 pr_err("failed to allocate crc algorithm err:%ld\n", 1254 PTR_ERR(tfm)); 1255 return PTR_ERR(tfm); 1256 } 1257 rxe->tfm = tfm; 1258 1259 rdma_set_device_sysfs_group(dev, &rxe_attr_group); 1260 dev->driver_id = RDMA_DRIVER_RXE; 1261 err = ib_register_device(dev, "rxe%d", NULL); 1262 if (err) { 1263 pr_warn("%s failed with error %d\n", __func__, err); 1264 goto err1; 1265 } 1266 1267 return 0; 1268 1269 err1: 1270 crypto_free_shash(rxe->tfm); 1271 1272 return err; 1273 } 1274 1275 void rxe_unregister_device(struct rxe_dev *rxe) 1276 { 1277 struct ib_device *dev = &rxe->ib_dev; 1278 1279 ib_unregister_device(dev); 1280 } 1281