1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/fs/9p/trans_rdma.c 4 * 5 * RDMA transport layer based on the trans_fd.c implementation. 6 * 7 * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> 8 * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> 9 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> 10 * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> 11 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/in.h> 17 #include <linux/module.h> 18 #include <linux/net.h> 19 #include <linux/ipv6.h> 20 #include <linux/kthread.h> 21 #include <linux/errno.h> 22 #include <linux/kernel.h> 23 #include <linux/un.h> 24 #include <linux/uaccess.h> 25 #include <linux/inet.h> 26 #include <linux/idr.h> 27 #include <linux/file.h> 28 #include <linux/parser.h> 29 #include <linux/semaphore.h> 30 #include <linux/slab.h> 31 #include <linux/seq_file.h> 32 #include <net/9p/9p.h> 33 #include <net/9p/client.h> 34 #include <net/9p/transport.h> 35 #include <rdma/ib_verbs.h> 36 #include <rdma/rdma_cm.h> 37 38 #define P9_PORT 5640 39 #define P9_RDMA_SQ_DEPTH 32 40 #define P9_RDMA_RQ_DEPTH 32 41 #define P9_RDMA_SEND_SGE 4 42 #define P9_RDMA_RECV_SGE 4 43 #define P9_RDMA_IRD 0 44 #define P9_RDMA_ORD 0 45 #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ 46 #define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ 47 48 /** 49 * struct p9_trans_rdma - RDMA transport instance 50 * 51 * @state: tracks the transport state machine for connection setup and tear down 52 * @cm_id: The RDMA CM ID 53 * @pd: Protection Domain pointer 54 * @qp: Queue Pair pointer 55 * @cq: Completion Queue pointer 56 * @timeout: Number of uSecs to wait for connection management events 57 * @privport: Whether a privileged port may be used 58 * @port: The port to use 59 * @sq_depth: The depth of the Send Queue 60 * @sq_sem: Semaphore for the SQ 61 * @rq_depth: The depth of the Receive Queue. 62 * @rq_sem: Semaphore for the RQ 63 * @excess_rc : Amount of posted Receive Contexts without a pending request. 64 * See rdma_request() 65 * @addr: The remote peer's address 66 * @req_lock: Protects the active request list 67 * @cm_done: Completion event for connection management tracking 68 */ 69 struct p9_trans_rdma { 70 enum { 71 P9_RDMA_INIT, 72 P9_RDMA_ADDR_RESOLVED, 73 P9_RDMA_ROUTE_RESOLVED, 74 P9_RDMA_CONNECTED, 75 P9_RDMA_FLUSHING, 76 P9_RDMA_CLOSING, 77 P9_RDMA_CLOSED, 78 } state; 79 struct rdma_cm_id *cm_id; 80 struct ib_pd *pd; 81 struct ib_qp *qp; 82 struct ib_cq *cq; 83 long timeout; 84 bool privport; 85 u16 port; 86 int sq_depth; 87 struct semaphore sq_sem; 88 int rq_depth; 89 struct semaphore rq_sem; 90 atomic_t excess_rc; 91 struct sockaddr_in addr; 92 spinlock_t req_lock; 93 94 struct completion cm_done; 95 }; 96 97 struct p9_rdma_req; 98 99 /** 100 * struct p9_rdma_context - Keeps track of in-process WR 101 * 102 * @cqe: completion queue entry 103 * @busa: Bus address to unmap when the WR completes 104 * @req: Keeps track of requests (send) 105 * @rc: Keepts track of replies (receive) 106 */ 107 struct p9_rdma_context { 108 struct ib_cqe cqe; 109 dma_addr_t busa; 110 union { 111 struct p9_req_t *req; 112 struct p9_fcall rc; 113 }; 114 }; 115 116 /** 117 * struct p9_rdma_opts - Collection of mount options 118 * @port: port of connection 119 * @privport: Whether a privileged port may be used 120 * @sq_depth: The requested depth of the SQ. This really doesn't need 121 * to be any deeper than the number of threads used in the client 122 * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth 123 * @timeout: Time to wait in msecs for CM events 124 */ 125 struct p9_rdma_opts { 126 short port; 127 bool privport; 128 int sq_depth; 129 int rq_depth; 130 long timeout; 131 }; 132 133 /* 134 * Option Parsing (code inspired by NFS code) 135 */ 136 enum { 137 /* Options that take integer arguments */ 138 Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, 139 /* Options that take no argument */ 140 Opt_privport, 141 Opt_err, 142 }; 143 144 static match_table_t tokens = { 145 {Opt_port, "port=%u"}, 146 {Opt_sq_depth, "sq=%u"}, 147 {Opt_rq_depth, "rq=%u"}, 148 {Opt_timeout, "timeout=%u"}, 149 {Opt_privport, "privport"}, 150 {Opt_err, NULL}, 151 }; 152 153 static int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt) 154 { 155 struct p9_trans_rdma *rdma = clnt->trans; 156 157 if (rdma->port != P9_PORT) 158 seq_printf(m, ",port=%u", rdma->port); 159 if (rdma->sq_depth != P9_RDMA_SQ_DEPTH) 160 seq_printf(m, ",sq=%u", rdma->sq_depth); 161 if (rdma->rq_depth != P9_RDMA_RQ_DEPTH) 162 seq_printf(m, ",rq=%u", rdma->rq_depth); 163 if (rdma->timeout != P9_RDMA_TIMEOUT) 164 seq_printf(m, ",timeout=%lu", rdma->timeout); 165 if (rdma->privport) 166 seq_puts(m, ",privport"); 167 return 0; 168 } 169 170 /** 171 * parse_opts - parse mount options into rdma options structure 172 * @params: options string passed from mount 173 * @opts: rdma transport-specific structure to parse options into 174 * 175 * Returns 0 upon success, -ERRNO upon failure 176 */ 177 static int parse_opts(char *params, struct p9_rdma_opts *opts) 178 { 179 char *p; 180 substring_t args[MAX_OPT_ARGS]; 181 int option; 182 char *options, *tmp_options; 183 184 opts->port = P9_PORT; 185 opts->sq_depth = P9_RDMA_SQ_DEPTH; 186 opts->rq_depth = P9_RDMA_RQ_DEPTH; 187 opts->timeout = P9_RDMA_TIMEOUT; 188 opts->privport = false; 189 190 if (!params) 191 return 0; 192 193 tmp_options = kstrdup(params, GFP_KERNEL); 194 if (!tmp_options) { 195 p9_debug(P9_DEBUG_ERROR, 196 "failed to allocate copy of option string\n"); 197 return -ENOMEM; 198 } 199 options = tmp_options; 200 201 while ((p = strsep(&options, ",")) != NULL) { 202 int token; 203 int r; 204 if (!*p) 205 continue; 206 token = match_token(p, tokens, args); 207 if ((token != Opt_err) && (token != Opt_privport)) { 208 r = match_int(&args[0], &option); 209 if (r < 0) { 210 p9_debug(P9_DEBUG_ERROR, 211 "integer field, but no integer?\n"); 212 continue; 213 } 214 } 215 switch (token) { 216 case Opt_port: 217 opts->port = option; 218 break; 219 case Opt_sq_depth: 220 opts->sq_depth = option; 221 break; 222 case Opt_rq_depth: 223 opts->rq_depth = option; 224 break; 225 case Opt_timeout: 226 opts->timeout = option; 227 break; 228 case Opt_privport: 229 opts->privport = true; 230 break; 231 default: 232 continue; 233 } 234 } 235 /* RQ must be at least as large as the SQ */ 236 opts->rq_depth = max(opts->rq_depth, opts->sq_depth); 237 kfree(tmp_options); 238 return 0; 239 } 240 241 static int 242 p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 243 { 244 struct p9_client *c = id->context; 245 struct p9_trans_rdma *rdma = c->trans; 246 switch (event->event) { 247 case RDMA_CM_EVENT_ADDR_RESOLVED: 248 BUG_ON(rdma->state != P9_RDMA_INIT); 249 rdma->state = P9_RDMA_ADDR_RESOLVED; 250 break; 251 252 case RDMA_CM_EVENT_ROUTE_RESOLVED: 253 BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); 254 rdma->state = P9_RDMA_ROUTE_RESOLVED; 255 break; 256 257 case RDMA_CM_EVENT_ESTABLISHED: 258 BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); 259 rdma->state = P9_RDMA_CONNECTED; 260 break; 261 262 case RDMA_CM_EVENT_DISCONNECTED: 263 if (rdma) 264 rdma->state = P9_RDMA_CLOSED; 265 c->status = Disconnected; 266 break; 267 268 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 269 break; 270 271 case RDMA_CM_EVENT_ADDR_CHANGE: 272 case RDMA_CM_EVENT_ROUTE_ERROR: 273 case RDMA_CM_EVENT_DEVICE_REMOVAL: 274 case RDMA_CM_EVENT_MULTICAST_JOIN: 275 case RDMA_CM_EVENT_MULTICAST_ERROR: 276 case RDMA_CM_EVENT_REJECTED: 277 case RDMA_CM_EVENT_CONNECT_REQUEST: 278 case RDMA_CM_EVENT_CONNECT_RESPONSE: 279 case RDMA_CM_EVENT_CONNECT_ERROR: 280 case RDMA_CM_EVENT_ADDR_ERROR: 281 case RDMA_CM_EVENT_UNREACHABLE: 282 c->status = Disconnected; 283 rdma_disconnect(rdma->cm_id); 284 break; 285 default: 286 BUG(); 287 } 288 complete(&rdma->cm_done); 289 return 0; 290 } 291 292 static void 293 recv_done(struct ib_cq *cq, struct ib_wc *wc) 294 { 295 struct p9_client *client = cq->cq_context; 296 struct p9_trans_rdma *rdma = client->trans; 297 struct p9_rdma_context *c = 298 container_of(wc->wr_cqe, struct p9_rdma_context, cqe); 299 struct p9_req_t *req; 300 int err = 0; 301 int16_t tag; 302 303 req = NULL; 304 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, 305 DMA_FROM_DEVICE); 306 307 if (wc->status != IB_WC_SUCCESS) 308 goto err_out; 309 310 c->rc.size = wc->byte_len; 311 err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1); 312 if (err) 313 goto err_out; 314 315 req = p9_tag_lookup(client, tag); 316 if (!req) 317 goto err_out; 318 319 /* Check that we have not yet received a reply for this request. 320 */ 321 if (unlikely(req->rc.sdata)) { 322 pr_err("Duplicate reply for request %d", tag); 323 goto err_out; 324 } 325 326 req->rc.size = c->rc.size; 327 req->rc.sdata = c->rc.sdata; 328 p9_client_cb(client, req, REQ_STATUS_RCVD); 329 330 out: 331 up(&rdma->rq_sem); 332 kfree(c); 333 return; 334 335 err_out: 336 p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", 337 req, err, wc->status); 338 rdma->state = P9_RDMA_FLUSHING; 339 client->status = Disconnected; 340 goto out; 341 } 342 343 static void 344 send_done(struct ib_cq *cq, struct ib_wc *wc) 345 { 346 struct p9_client *client = cq->cq_context; 347 struct p9_trans_rdma *rdma = client->trans; 348 struct p9_rdma_context *c = 349 container_of(wc->wr_cqe, struct p9_rdma_context, cqe); 350 351 ib_dma_unmap_single(rdma->cm_id->device, 352 c->busa, c->req->tc.size, 353 DMA_TO_DEVICE); 354 up(&rdma->sq_sem); 355 p9_req_put(c->req); 356 kfree(c); 357 } 358 359 static void qp_event_handler(struct ib_event *event, void *context) 360 { 361 p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", 362 event->event, context); 363 } 364 365 static void rdma_destroy_trans(struct p9_trans_rdma *rdma) 366 { 367 if (!rdma) 368 return; 369 370 if (rdma->qp && !IS_ERR(rdma->qp)) 371 ib_destroy_qp(rdma->qp); 372 373 if (rdma->pd && !IS_ERR(rdma->pd)) 374 ib_dealloc_pd(rdma->pd); 375 376 if (rdma->cq && !IS_ERR(rdma->cq)) 377 ib_free_cq(rdma->cq); 378 379 if (rdma->cm_id && !IS_ERR(rdma->cm_id)) 380 rdma_destroy_id(rdma->cm_id); 381 382 kfree(rdma); 383 } 384 385 static int 386 post_recv(struct p9_client *client, struct p9_rdma_context *c) 387 { 388 struct p9_trans_rdma *rdma = client->trans; 389 struct ib_recv_wr wr; 390 struct ib_sge sge; 391 392 c->busa = ib_dma_map_single(rdma->cm_id->device, 393 c->rc.sdata, client->msize, 394 DMA_FROM_DEVICE); 395 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 396 goto error; 397 398 c->cqe.done = recv_done; 399 400 sge.addr = c->busa; 401 sge.length = client->msize; 402 sge.lkey = rdma->pd->local_dma_lkey; 403 404 wr.next = NULL; 405 wr.wr_cqe = &c->cqe; 406 wr.sg_list = &sge; 407 wr.num_sge = 1; 408 return ib_post_recv(rdma->qp, &wr, NULL); 409 410 error: 411 p9_debug(P9_DEBUG_ERROR, "EIO\n"); 412 return -EIO; 413 } 414 415 static int rdma_request(struct p9_client *client, struct p9_req_t *req) 416 { 417 struct p9_trans_rdma *rdma = client->trans; 418 struct ib_send_wr wr; 419 struct ib_sge sge; 420 int err = 0; 421 unsigned long flags; 422 struct p9_rdma_context *c = NULL; 423 struct p9_rdma_context *rpl_context = NULL; 424 425 /* When an error occurs between posting the recv and the send, 426 * there will be a receive context posted without a pending request. 427 * Since there is no way to "un-post" it, we remember it and skip 428 * post_recv() for the next request. 429 * So here, 430 * see if we are this `next request' and need to absorb an excess rc. 431 * If yes, then drop and free our own, and do not recv_post(). 432 **/ 433 if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { 434 if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { 435 /* Got one! */ 436 p9_fcall_fini(&req->rc); 437 req->rc.sdata = NULL; 438 goto dont_need_post_recv; 439 } else { 440 /* We raced and lost. */ 441 atomic_inc(&rdma->excess_rc); 442 } 443 } 444 445 /* Allocate an fcall for the reply */ 446 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); 447 if (!rpl_context) { 448 err = -ENOMEM; 449 goto recv_error; 450 } 451 rpl_context->rc.sdata = req->rc.sdata; 452 453 /* 454 * Post a receive buffer for this request. We need to ensure 455 * there is a reply buffer available for every outstanding 456 * request. A flushed request can result in no reply for an 457 * outstanding request, so we must keep a count to avoid 458 * overflowing the RQ. 459 */ 460 if (down_interruptible(&rdma->rq_sem)) { 461 err = -EINTR; 462 goto recv_error; 463 } 464 465 err = post_recv(client, rpl_context); 466 if (err) { 467 p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err); 468 goto recv_error; 469 } 470 /* remove posted receive buffer from request structure */ 471 req->rc.sdata = NULL; 472 473 dont_need_post_recv: 474 /* Post the request */ 475 c = kmalloc(sizeof *c, GFP_NOFS); 476 if (!c) { 477 err = -ENOMEM; 478 goto send_error; 479 } 480 c->req = req; 481 482 c->busa = ib_dma_map_single(rdma->cm_id->device, 483 c->req->tc.sdata, c->req->tc.size, 484 DMA_TO_DEVICE); 485 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { 486 err = -EIO; 487 goto send_error; 488 } 489 490 c->cqe.done = send_done; 491 492 sge.addr = c->busa; 493 sge.length = c->req->tc.size; 494 sge.lkey = rdma->pd->local_dma_lkey; 495 496 wr.next = NULL; 497 wr.wr_cqe = &c->cqe; 498 wr.opcode = IB_WR_SEND; 499 wr.send_flags = IB_SEND_SIGNALED; 500 wr.sg_list = &sge; 501 wr.num_sge = 1; 502 503 if (down_interruptible(&rdma->sq_sem)) { 504 err = -EINTR; 505 goto send_error; 506 } 507 508 /* Mark request as `sent' *before* we actually send it, 509 * because doing if after could erase the REQ_STATUS_RCVD 510 * status in case of a very fast reply. 511 */ 512 req->status = REQ_STATUS_SENT; 513 err = ib_post_send(rdma->qp, &wr, NULL); 514 if (err) 515 goto send_error; 516 517 /* Success */ 518 return 0; 519 520 /* Handle errors that happened during or while preparing the send: */ 521 send_error: 522 req->status = REQ_STATUS_ERROR; 523 kfree(c); 524 p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); 525 526 /* Ach. 527 * We did recv_post(), but not send. We have one recv_post in excess. 528 */ 529 atomic_inc(&rdma->excess_rc); 530 return err; 531 532 /* Handle errors that happened during or while preparing post_recv(): */ 533 recv_error: 534 kfree(rpl_context); 535 spin_lock_irqsave(&rdma->req_lock, flags); 536 if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) { 537 rdma->state = P9_RDMA_CLOSING; 538 spin_unlock_irqrestore(&rdma->req_lock, flags); 539 rdma_disconnect(rdma->cm_id); 540 } else 541 spin_unlock_irqrestore(&rdma->req_lock, flags); 542 return err; 543 } 544 545 static void rdma_close(struct p9_client *client) 546 { 547 struct p9_trans_rdma *rdma; 548 549 if (!client) 550 return; 551 552 rdma = client->trans; 553 if (!rdma) 554 return; 555 556 client->status = Disconnected; 557 rdma_disconnect(rdma->cm_id); 558 rdma_destroy_trans(rdma); 559 } 560 561 /** 562 * alloc_rdma - Allocate and initialize the rdma transport structure 563 * @opts: Mount options structure 564 */ 565 static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) 566 { 567 struct p9_trans_rdma *rdma; 568 569 rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); 570 if (!rdma) 571 return NULL; 572 573 rdma->port = opts->port; 574 rdma->privport = opts->privport; 575 rdma->sq_depth = opts->sq_depth; 576 rdma->rq_depth = opts->rq_depth; 577 rdma->timeout = opts->timeout; 578 spin_lock_init(&rdma->req_lock); 579 init_completion(&rdma->cm_done); 580 sema_init(&rdma->sq_sem, rdma->sq_depth); 581 sema_init(&rdma->rq_sem, rdma->rq_depth); 582 atomic_set(&rdma->excess_rc, 0); 583 584 return rdma; 585 } 586 587 static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) 588 { 589 /* Nothing to do here. 590 * We will take care of it (if we have to) in rdma_cancelled() 591 */ 592 return 1; 593 } 594 595 /* A request has been fully flushed without a reply. 596 * That means we have posted one buffer in excess. 597 */ 598 static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) 599 { 600 struct p9_trans_rdma *rdma = client->trans; 601 atomic_inc(&rdma->excess_rc); 602 return 0; 603 } 604 605 static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) 606 { 607 struct sockaddr_in cl = { 608 .sin_family = AF_INET, 609 .sin_addr.s_addr = htonl(INADDR_ANY), 610 }; 611 int port, err = -EINVAL; 612 613 for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) { 614 cl.sin_port = htons((ushort)port); 615 err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl); 616 if (err != -EADDRINUSE) 617 break; 618 } 619 return err; 620 } 621 622 /** 623 * rdma_create_trans - Transport method for creating a transport instance 624 * @client: client instance 625 * @addr: IP address string 626 * @args: Mount options string 627 */ 628 static int 629 rdma_create_trans(struct p9_client *client, const char *addr, char *args) 630 { 631 int err; 632 struct p9_rdma_opts opts; 633 struct p9_trans_rdma *rdma; 634 struct rdma_conn_param conn_param; 635 struct ib_qp_init_attr qp_attr; 636 637 if (addr == NULL) 638 return -EINVAL; 639 640 /* Parse the transport specific mount options */ 641 err = parse_opts(args, &opts); 642 if (err < 0) 643 return err; 644 645 /* Create and initialize the RDMA transport structure */ 646 rdma = alloc_rdma(&opts); 647 if (!rdma) 648 return -ENOMEM; 649 650 /* Create the RDMA CM ID */ 651 rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client, 652 RDMA_PS_TCP, IB_QPT_RC); 653 if (IS_ERR(rdma->cm_id)) 654 goto error; 655 656 /* Associate the client with the transport */ 657 client->trans = rdma; 658 659 /* Bind to a privileged port if we need to */ 660 if (opts.privport) { 661 err = p9_rdma_bind_privport(rdma); 662 if (err < 0) { 663 pr_err("%s (%d): problem binding to privport: %d\n", 664 __func__, task_pid_nr(current), -err); 665 goto error; 666 } 667 } 668 669 /* Resolve the server's address */ 670 rdma->addr.sin_family = AF_INET; 671 rdma->addr.sin_addr.s_addr = in_aton(addr); 672 rdma->addr.sin_port = htons(opts.port); 673 err = rdma_resolve_addr(rdma->cm_id, NULL, 674 (struct sockaddr *)&rdma->addr, 675 rdma->timeout); 676 if (err) 677 goto error; 678 err = wait_for_completion_interruptible(&rdma->cm_done); 679 if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) 680 goto error; 681 682 /* Resolve the route to the server */ 683 err = rdma_resolve_route(rdma->cm_id, rdma->timeout); 684 if (err) 685 goto error; 686 err = wait_for_completion_interruptible(&rdma->cm_done); 687 if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) 688 goto error; 689 690 /* Create the Completion Queue */ 691 rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client, 692 opts.sq_depth + opts.rq_depth + 1, 693 IB_POLL_SOFTIRQ); 694 if (IS_ERR(rdma->cq)) 695 goto error; 696 697 /* Create the Protection Domain */ 698 rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0); 699 if (IS_ERR(rdma->pd)) 700 goto error; 701 702 /* Create the Queue Pair */ 703 memset(&qp_attr, 0, sizeof qp_attr); 704 qp_attr.event_handler = qp_event_handler; 705 qp_attr.qp_context = client; 706 qp_attr.cap.max_send_wr = opts.sq_depth; 707 qp_attr.cap.max_recv_wr = opts.rq_depth; 708 qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; 709 qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; 710 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 711 qp_attr.qp_type = IB_QPT_RC; 712 qp_attr.send_cq = rdma->cq; 713 qp_attr.recv_cq = rdma->cq; 714 err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); 715 if (err) 716 goto error; 717 rdma->qp = rdma->cm_id->qp; 718 719 /* Request a connection */ 720 memset(&conn_param, 0, sizeof(conn_param)); 721 conn_param.private_data = NULL; 722 conn_param.private_data_len = 0; 723 conn_param.responder_resources = P9_RDMA_IRD; 724 conn_param.initiator_depth = P9_RDMA_ORD; 725 err = rdma_connect(rdma->cm_id, &conn_param); 726 if (err) 727 goto error; 728 err = wait_for_completion_interruptible(&rdma->cm_done); 729 if (err || (rdma->state != P9_RDMA_CONNECTED)) 730 goto error; 731 732 client->status = Connected; 733 734 return 0; 735 736 error: 737 rdma_destroy_trans(rdma); 738 return -ENOTCONN; 739 } 740 741 static struct p9_trans_module p9_rdma_trans = { 742 .name = "rdma", 743 .maxsize = P9_RDMA_MAXSIZE, 744 .def = 0, 745 .owner = THIS_MODULE, 746 .create = rdma_create_trans, 747 .close = rdma_close, 748 .request = rdma_request, 749 .cancel = rdma_cancel, 750 .cancelled = rdma_cancelled, 751 .show_options = p9_rdma_show_options, 752 }; 753 754 /** 755 * p9_trans_rdma_init - Register the 9P RDMA transport driver 756 */ 757 static int __init p9_trans_rdma_init(void) 758 { 759 v9fs_register_trans(&p9_rdma_trans); 760 return 0; 761 } 762 763 static void __exit p9_trans_rdma_exit(void) 764 { 765 v9fs_unregister_trans(&p9_rdma_trans); 766 } 767 768 module_init(p9_trans_rdma_init); 769 module_exit(p9_trans_rdma_exit); 770 771 MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 772 MODULE_DESCRIPTION("RDMA Transport for 9P"); 773 MODULE_LICENSE("Dual BSD/GPL"); 774