1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/fs/9p/trans_rdma.c 4 * 5 * RDMA transport layer based on the trans_fd.c implementation. 6 * 7 * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> 8 * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> 9 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> 10 * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> 11 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/in.h> 17 #include <linux/module.h> 18 #include <linux/net.h> 19 #include <linux/ipv6.h> 20 #include <linux/kthread.h> 21 #include <linux/errno.h> 22 #include <linux/kernel.h> 23 #include <linux/un.h> 24 #include <linux/uaccess.h> 25 #include <linux/inet.h> 26 #include <linux/idr.h> 27 #include <linux/file.h> 28 #include <linux/parser.h> 29 #include <linux/semaphore.h> 30 #include <linux/slab.h> 31 #include <linux/seq_file.h> 32 #include <net/9p/9p.h> 33 #include <net/9p/client.h> 34 #include <net/9p/transport.h> 35 #include <rdma/ib_verbs.h> 36 #include <rdma/rdma_cm.h> 37 38 #define P9_PORT 5640 39 #define P9_RDMA_SQ_DEPTH 32 40 #define P9_RDMA_RQ_DEPTH 32 41 #define P9_RDMA_SEND_SGE 4 42 #define P9_RDMA_RECV_SGE 4 43 #define P9_RDMA_IRD 0 44 #define P9_RDMA_ORD 0 45 #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ 46 #define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ 47 48 /** 49 * struct p9_trans_rdma - RDMA transport instance 50 * 51 * @state: tracks the transport state machine for connection setup and tear down 52 * @cm_id: The RDMA CM ID 53 * @pd: Protection Domain pointer 54 * @qp: Queue Pair pointer 55 * @cq: Completion Queue pointer 56 * @timeout: Number of uSecs to wait for connection management events 57 * @privport: Whether a privileged port may be used 58 * @port: The port to use 59 * @sq_depth: The depth of the Send Queue 60 * @sq_sem: Semaphore for the SQ 61 * @rq_depth: The depth of the Receive Queue. 62 * @rq_sem: Semaphore for the RQ 63 * @excess_rc : Amount of posted Receive Contexts without a pending request. 64 * See rdma_request() 65 * @addr: The remote peer's address 66 * @req_lock: Protects the active request list 67 * @cm_done: Completion event for connection management tracking 68 */ 69 struct p9_trans_rdma { 70 enum { 71 P9_RDMA_INIT, 72 P9_RDMA_ADDR_RESOLVED, 73 P9_RDMA_ROUTE_RESOLVED, 74 P9_RDMA_CONNECTED, 75 P9_RDMA_FLUSHING, 76 P9_RDMA_CLOSING, 77 P9_RDMA_CLOSED, 78 } state; 79 struct rdma_cm_id *cm_id; 80 struct ib_pd *pd; 81 struct ib_qp *qp; 82 struct ib_cq *cq; 83 long timeout; 84 bool privport; 85 u16 port; 86 int sq_depth; 87 struct semaphore sq_sem; 88 int rq_depth; 89 struct semaphore rq_sem; 90 atomic_t excess_rc; 91 struct sockaddr_in addr; 92 spinlock_t req_lock; 93 94 struct completion cm_done; 95 }; 96 97 /** 98 * p9_rdma_context - Keeps track of in-process WR 99 * 100 * @busa: Bus address to unmap when the WR completes 101 * @req: Keeps track of requests (send) 102 * @rc: Keepts track of replies (receive) 103 */ 104 struct p9_rdma_req; 105 struct p9_rdma_context { 106 struct ib_cqe cqe; 107 dma_addr_t busa; 108 union { 109 struct p9_req_t *req; 110 struct p9_fcall rc; 111 }; 112 }; 113 114 /** 115 * p9_rdma_opts - Collection of mount options 116 * @port: port of connection 117 * @sq_depth: The requested depth of the SQ. This really doesn't need 118 * to be any deeper than the number of threads used in the client 119 * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth 120 * @timeout: Time to wait in msecs for CM events 121 */ 122 struct p9_rdma_opts { 123 short port; 124 bool privport; 125 int sq_depth; 126 int rq_depth; 127 long timeout; 128 }; 129 130 /* 131 * Option Parsing (code inspired by NFS code) 132 */ 133 enum { 134 /* Options that take integer arguments */ 135 Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, 136 /* Options that take no argument */ 137 Opt_privport, 138 Opt_err, 139 }; 140 141 static match_table_t tokens = { 142 {Opt_port, "port=%u"}, 143 {Opt_sq_depth, "sq=%u"}, 144 {Opt_rq_depth, "rq=%u"}, 145 {Opt_timeout, "timeout=%u"}, 146 {Opt_privport, "privport"}, 147 {Opt_err, NULL}, 148 }; 149 150 static int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt) 151 { 152 struct p9_trans_rdma *rdma = clnt->trans; 153 154 if (rdma->port != P9_PORT) 155 seq_printf(m, ",port=%u", rdma->port); 156 if (rdma->sq_depth != P9_RDMA_SQ_DEPTH) 157 seq_printf(m, ",sq=%u", rdma->sq_depth); 158 if (rdma->rq_depth != P9_RDMA_RQ_DEPTH) 159 seq_printf(m, ",rq=%u", rdma->rq_depth); 160 if (rdma->timeout != P9_RDMA_TIMEOUT) 161 seq_printf(m, ",timeout=%lu", rdma->timeout); 162 if (rdma->privport) 163 seq_puts(m, ",privport"); 164 return 0; 165 } 166 167 /** 168 * parse_opts - parse mount options into rdma options structure 169 * @params: options string passed from mount 170 * @opts: rdma transport-specific structure to parse options into 171 * 172 * Returns 0 upon success, -ERRNO upon failure 173 */ 174 static int parse_opts(char *params, struct p9_rdma_opts *opts) 175 { 176 char *p; 177 substring_t args[MAX_OPT_ARGS]; 178 int option; 179 char *options, *tmp_options; 180 181 opts->port = P9_PORT; 182 opts->sq_depth = P9_RDMA_SQ_DEPTH; 183 opts->rq_depth = P9_RDMA_RQ_DEPTH; 184 opts->timeout = P9_RDMA_TIMEOUT; 185 opts->privport = false; 186 187 if (!params) 188 return 0; 189 190 tmp_options = kstrdup(params, GFP_KERNEL); 191 if (!tmp_options) { 192 p9_debug(P9_DEBUG_ERROR, 193 "failed to allocate copy of option string\n"); 194 return -ENOMEM; 195 } 196 options = tmp_options; 197 198 while ((p = strsep(&options, ",")) != NULL) { 199 int token; 200 int r; 201 if (!*p) 202 continue; 203 token = match_token(p, tokens, args); 204 if ((token != Opt_err) && (token != Opt_privport)) { 205 r = match_int(&args[0], &option); 206 if (r < 0) { 207 p9_debug(P9_DEBUG_ERROR, 208 "integer field, but no integer?\n"); 209 continue; 210 } 211 } 212 switch (token) { 213 case Opt_port: 214 opts->port = option; 215 break; 216 case Opt_sq_depth: 217 opts->sq_depth = option; 218 break; 219 case Opt_rq_depth: 220 opts->rq_depth = option; 221 break; 222 case Opt_timeout: 223 opts->timeout = option; 224 break; 225 case Opt_privport: 226 opts->privport = true; 227 break; 228 default: 229 continue; 230 } 231 } 232 /* RQ must be at least as large as the SQ */ 233 opts->rq_depth = max(opts->rq_depth, opts->sq_depth); 234 kfree(tmp_options); 235 return 0; 236 } 237 238 static int 239 p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 240 { 241 struct p9_client *c = id->context; 242 struct p9_trans_rdma *rdma = c->trans; 243 switch (event->event) { 244 case RDMA_CM_EVENT_ADDR_RESOLVED: 245 BUG_ON(rdma->state != P9_RDMA_INIT); 246 rdma->state = P9_RDMA_ADDR_RESOLVED; 247 break; 248 249 case RDMA_CM_EVENT_ROUTE_RESOLVED: 250 BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); 251 rdma->state = P9_RDMA_ROUTE_RESOLVED; 252 break; 253 254 case RDMA_CM_EVENT_ESTABLISHED: 255 BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); 256 rdma->state = P9_RDMA_CONNECTED; 257 break; 258 259 case RDMA_CM_EVENT_DISCONNECTED: 260 if (rdma) 261 rdma->state = P9_RDMA_CLOSED; 262 c->status = Disconnected; 263 break; 264 265 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 266 break; 267 268 case RDMA_CM_EVENT_ADDR_CHANGE: 269 case RDMA_CM_EVENT_ROUTE_ERROR: 270 case RDMA_CM_EVENT_DEVICE_REMOVAL: 271 case RDMA_CM_EVENT_MULTICAST_JOIN: 272 case RDMA_CM_EVENT_MULTICAST_ERROR: 273 case RDMA_CM_EVENT_REJECTED: 274 case RDMA_CM_EVENT_CONNECT_REQUEST: 275 case RDMA_CM_EVENT_CONNECT_RESPONSE: 276 case RDMA_CM_EVENT_CONNECT_ERROR: 277 case RDMA_CM_EVENT_ADDR_ERROR: 278 case RDMA_CM_EVENT_UNREACHABLE: 279 c->status = Disconnected; 280 rdma_disconnect(rdma->cm_id); 281 break; 282 default: 283 BUG(); 284 } 285 complete(&rdma->cm_done); 286 return 0; 287 } 288 289 static void 290 recv_done(struct ib_cq *cq, struct ib_wc *wc) 291 { 292 struct p9_client *client = cq->cq_context; 293 struct p9_trans_rdma *rdma = client->trans; 294 struct p9_rdma_context *c = 295 container_of(wc->wr_cqe, struct p9_rdma_context, cqe); 296 struct p9_req_t *req; 297 int err = 0; 298 int16_t tag; 299 300 req = NULL; 301 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, 302 DMA_FROM_DEVICE); 303 304 if (wc->status != IB_WC_SUCCESS) 305 goto err_out; 306 307 c->rc.size = wc->byte_len; 308 err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1); 309 if (err) 310 goto err_out; 311 312 req = p9_tag_lookup(client, tag); 313 if (!req) 314 goto err_out; 315 316 /* Check that we have not yet received a reply for this request. 317 */ 318 if (unlikely(req->rc.sdata)) { 319 pr_err("Duplicate reply for request %d", tag); 320 goto err_out; 321 } 322 323 req->rc.size = c->rc.size; 324 req->rc.sdata = c->rc.sdata; 325 p9_client_cb(client, req, REQ_STATUS_RCVD); 326 327 out: 328 up(&rdma->rq_sem); 329 kfree(c); 330 return; 331 332 err_out: 333 p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", 334 req, err, wc->status); 335 rdma->state = P9_RDMA_FLUSHING; 336 client->status = Disconnected; 337 goto out; 338 } 339 340 static void 341 send_done(struct ib_cq *cq, struct ib_wc *wc) 342 { 343 struct p9_client *client = cq->cq_context; 344 struct p9_trans_rdma *rdma = client->trans; 345 struct p9_rdma_context *c = 346 container_of(wc->wr_cqe, struct p9_rdma_context, cqe); 347 348 ib_dma_unmap_single(rdma->cm_id->device, 349 c->busa, c->req->tc.size, 350 DMA_TO_DEVICE); 351 up(&rdma->sq_sem); 352 p9_req_put(c->req); 353 kfree(c); 354 } 355 356 static void qp_event_handler(struct ib_event *event, void *context) 357 { 358 p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", 359 event->event, context); 360 } 361 362 static void rdma_destroy_trans(struct p9_trans_rdma *rdma) 363 { 364 if (!rdma) 365 return; 366 367 if (rdma->qp && !IS_ERR(rdma->qp)) 368 ib_destroy_qp(rdma->qp); 369 370 if (rdma->pd && !IS_ERR(rdma->pd)) 371 ib_dealloc_pd(rdma->pd); 372 373 if (rdma->cq && !IS_ERR(rdma->cq)) 374 ib_free_cq(rdma->cq); 375 376 if (rdma->cm_id && !IS_ERR(rdma->cm_id)) 377 rdma_destroy_id(rdma->cm_id); 378 379 kfree(rdma); 380 } 381 382 static int 383 post_recv(struct p9_client *client, struct p9_rdma_context *c) 384 { 385 struct p9_trans_rdma *rdma = client->trans; 386 struct ib_recv_wr wr; 387 struct ib_sge sge; 388 389 c->busa = ib_dma_map_single(rdma->cm_id->device, 390 c->rc.sdata, client->msize, 391 DMA_FROM_DEVICE); 392 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 393 goto error; 394 395 c->cqe.done = recv_done; 396 397 sge.addr = c->busa; 398 sge.length = client->msize; 399 sge.lkey = rdma->pd->local_dma_lkey; 400 401 wr.next = NULL; 402 wr.wr_cqe = &c->cqe; 403 wr.sg_list = &sge; 404 wr.num_sge = 1; 405 return ib_post_recv(rdma->qp, &wr, NULL); 406 407 error: 408 p9_debug(P9_DEBUG_ERROR, "EIO\n"); 409 return -EIO; 410 } 411 412 static int rdma_request(struct p9_client *client, struct p9_req_t *req) 413 { 414 struct p9_trans_rdma *rdma = client->trans; 415 struct ib_send_wr wr; 416 struct ib_sge sge; 417 int err = 0; 418 unsigned long flags; 419 struct p9_rdma_context *c = NULL; 420 struct p9_rdma_context *rpl_context = NULL; 421 422 /* When an error occurs between posting the recv and the send, 423 * there will be a receive context posted without a pending request. 424 * Since there is no way to "un-post" it, we remember it and skip 425 * post_recv() for the next request. 426 * So here, 427 * see if we are this `next request' and need to absorb an excess rc. 428 * If yes, then drop and free our own, and do not recv_post(). 429 **/ 430 if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { 431 if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { 432 /* Got one! */ 433 p9_fcall_fini(&req->rc); 434 req->rc.sdata = NULL; 435 goto dont_need_post_recv; 436 } else { 437 /* We raced and lost. */ 438 atomic_inc(&rdma->excess_rc); 439 } 440 } 441 442 /* Allocate an fcall for the reply */ 443 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); 444 if (!rpl_context) { 445 err = -ENOMEM; 446 goto recv_error; 447 } 448 rpl_context->rc.sdata = req->rc.sdata; 449 450 /* 451 * Post a receive buffer for this request. We need to ensure 452 * there is a reply buffer available for every outstanding 453 * request. A flushed request can result in no reply for an 454 * outstanding request, so we must keep a count to avoid 455 * overflowing the RQ. 456 */ 457 if (down_interruptible(&rdma->rq_sem)) { 458 err = -EINTR; 459 goto recv_error; 460 } 461 462 err = post_recv(client, rpl_context); 463 if (err) { 464 p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err); 465 goto recv_error; 466 } 467 /* remove posted receive buffer from request structure */ 468 req->rc.sdata = NULL; 469 470 dont_need_post_recv: 471 /* Post the request */ 472 c = kmalloc(sizeof *c, GFP_NOFS); 473 if (!c) { 474 err = -ENOMEM; 475 goto send_error; 476 } 477 c->req = req; 478 479 c->busa = ib_dma_map_single(rdma->cm_id->device, 480 c->req->tc.sdata, c->req->tc.size, 481 DMA_TO_DEVICE); 482 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { 483 err = -EIO; 484 goto send_error; 485 } 486 487 c->cqe.done = send_done; 488 489 sge.addr = c->busa; 490 sge.length = c->req->tc.size; 491 sge.lkey = rdma->pd->local_dma_lkey; 492 493 wr.next = NULL; 494 wr.wr_cqe = &c->cqe; 495 wr.opcode = IB_WR_SEND; 496 wr.send_flags = IB_SEND_SIGNALED; 497 wr.sg_list = &sge; 498 wr.num_sge = 1; 499 500 if (down_interruptible(&rdma->sq_sem)) { 501 err = -EINTR; 502 goto send_error; 503 } 504 505 /* Mark request as `sent' *before* we actually send it, 506 * because doing if after could erase the REQ_STATUS_RCVD 507 * status in case of a very fast reply. 508 */ 509 req->status = REQ_STATUS_SENT; 510 err = ib_post_send(rdma->qp, &wr, NULL); 511 if (err) 512 goto send_error; 513 514 /* Success */ 515 return 0; 516 517 /* Handle errors that happened during or while preparing the send: */ 518 send_error: 519 req->status = REQ_STATUS_ERROR; 520 kfree(c); 521 p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); 522 523 /* Ach. 524 * We did recv_post(), but not send. We have one recv_post in excess. 525 */ 526 atomic_inc(&rdma->excess_rc); 527 return err; 528 529 /* Handle errors that happened during or while preparing post_recv(): */ 530 recv_error: 531 kfree(rpl_context); 532 spin_lock_irqsave(&rdma->req_lock, flags); 533 if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) { 534 rdma->state = P9_RDMA_CLOSING; 535 spin_unlock_irqrestore(&rdma->req_lock, flags); 536 rdma_disconnect(rdma->cm_id); 537 } else 538 spin_unlock_irqrestore(&rdma->req_lock, flags); 539 return err; 540 } 541 542 static void rdma_close(struct p9_client *client) 543 { 544 struct p9_trans_rdma *rdma; 545 546 if (!client) 547 return; 548 549 rdma = client->trans; 550 if (!rdma) 551 return; 552 553 client->status = Disconnected; 554 rdma_disconnect(rdma->cm_id); 555 rdma_destroy_trans(rdma); 556 } 557 558 /** 559 * alloc_rdma - Allocate and initialize the rdma transport structure 560 * @opts: Mount options structure 561 */ 562 static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) 563 { 564 struct p9_trans_rdma *rdma; 565 566 rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); 567 if (!rdma) 568 return NULL; 569 570 rdma->port = opts->port; 571 rdma->privport = opts->privport; 572 rdma->sq_depth = opts->sq_depth; 573 rdma->rq_depth = opts->rq_depth; 574 rdma->timeout = opts->timeout; 575 spin_lock_init(&rdma->req_lock); 576 init_completion(&rdma->cm_done); 577 sema_init(&rdma->sq_sem, rdma->sq_depth); 578 sema_init(&rdma->rq_sem, rdma->rq_depth); 579 atomic_set(&rdma->excess_rc, 0); 580 581 return rdma; 582 } 583 584 static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) 585 { 586 /* Nothing to do here. 587 * We will take care of it (if we have to) in rdma_cancelled() 588 */ 589 return 1; 590 } 591 592 /* A request has been fully flushed without a reply. 593 * That means we have posted one buffer in excess. 594 */ 595 static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) 596 { 597 struct p9_trans_rdma *rdma = client->trans; 598 atomic_inc(&rdma->excess_rc); 599 return 0; 600 } 601 602 static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) 603 { 604 struct sockaddr_in cl = { 605 .sin_family = AF_INET, 606 .sin_addr.s_addr = htonl(INADDR_ANY), 607 }; 608 int port, err = -EINVAL; 609 610 for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) { 611 cl.sin_port = htons((ushort)port); 612 err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl); 613 if (err != -EADDRINUSE) 614 break; 615 } 616 return err; 617 } 618 619 /** 620 * rdma_create_trans - Transport method for creating a transport instance 621 * @client: client instance 622 * @addr: IP address string 623 * @args: Mount options string 624 */ 625 static int 626 rdma_create_trans(struct p9_client *client, const char *addr, char *args) 627 { 628 int err; 629 struct p9_rdma_opts opts; 630 struct p9_trans_rdma *rdma; 631 struct rdma_conn_param conn_param; 632 struct ib_qp_init_attr qp_attr; 633 634 if (addr == NULL) 635 return -EINVAL; 636 637 /* Parse the transport specific mount options */ 638 err = parse_opts(args, &opts); 639 if (err < 0) 640 return err; 641 642 /* Create and initialize the RDMA transport structure */ 643 rdma = alloc_rdma(&opts); 644 if (!rdma) 645 return -ENOMEM; 646 647 /* Create the RDMA CM ID */ 648 rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client, 649 RDMA_PS_TCP, IB_QPT_RC); 650 if (IS_ERR(rdma->cm_id)) 651 goto error; 652 653 /* Associate the client with the transport */ 654 client->trans = rdma; 655 656 /* Bind to a privileged port if we need to */ 657 if (opts.privport) { 658 err = p9_rdma_bind_privport(rdma); 659 if (err < 0) { 660 pr_err("%s (%d): problem binding to privport: %d\n", 661 __func__, task_pid_nr(current), -err); 662 goto error; 663 } 664 } 665 666 /* Resolve the server's address */ 667 rdma->addr.sin_family = AF_INET; 668 rdma->addr.sin_addr.s_addr = in_aton(addr); 669 rdma->addr.sin_port = htons(opts.port); 670 err = rdma_resolve_addr(rdma->cm_id, NULL, 671 (struct sockaddr *)&rdma->addr, 672 rdma->timeout); 673 if (err) 674 goto error; 675 err = wait_for_completion_interruptible(&rdma->cm_done); 676 if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) 677 goto error; 678 679 /* Resolve the route to the server */ 680 err = rdma_resolve_route(rdma->cm_id, rdma->timeout); 681 if (err) 682 goto error; 683 err = wait_for_completion_interruptible(&rdma->cm_done); 684 if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) 685 goto error; 686 687 /* Create the Completion Queue */ 688 rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, 689 opts.sq_depth + opts.rq_depth + 1, 690 0, IB_POLL_SOFTIRQ); 691 if (IS_ERR(rdma->cq)) 692 goto error; 693 694 /* Create the Protection Domain */ 695 rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0); 696 if (IS_ERR(rdma->pd)) 697 goto error; 698 699 /* Create the Queue Pair */ 700 memset(&qp_attr, 0, sizeof qp_attr); 701 qp_attr.event_handler = qp_event_handler; 702 qp_attr.qp_context = client; 703 qp_attr.cap.max_send_wr = opts.sq_depth; 704 qp_attr.cap.max_recv_wr = opts.rq_depth; 705 qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; 706 qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; 707 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 708 qp_attr.qp_type = IB_QPT_RC; 709 qp_attr.send_cq = rdma->cq; 710 qp_attr.recv_cq = rdma->cq; 711 err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); 712 if (err) 713 goto error; 714 rdma->qp = rdma->cm_id->qp; 715 716 /* Request a connection */ 717 memset(&conn_param, 0, sizeof(conn_param)); 718 conn_param.private_data = NULL; 719 conn_param.private_data_len = 0; 720 conn_param.responder_resources = P9_RDMA_IRD; 721 conn_param.initiator_depth = P9_RDMA_ORD; 722 err = rdma_connect(rdma->cm_id, &conn_param); 723 if (err) 724 goto error; 725 err = wait_for_completion_interruptible(&rdma->cm_done); 726 if (err || (rdma->state != P9_RDMA_CONNECTED)) 727 goto error; 728 729 client->status = Connected; 730 731 return 0; 732 733 error: 734 rdma_destroy_trans(rdma); 735 return -ENOTCONN; 736 } 737 738 static struct p9_trans_module p9_rdma_trans = { 739 .name = "rdma", 740 .maxsize = P9_RDMA_MAXSIZE, 741 .def = 0, 742 .owner = THIS_MODULE, 743 .create = rdma_create_trans, 744 .close = rdma_close, 745 .request = rdma_request, 746 .cancel = rdma_cancel, 747 .cancelled = rdma_cancelled, 748 .show_options = p9_rdma_show_options, 749 }; 750 751 /** 752 * p9_trans_rdma_init - Register the 9P RDMA transport driver 753 */ 754 static int __init p9_trans_rdma_init(void) 755 { 756 v9fs_register_trans(&p9_rdma_trans); 757 return 0; 758 } 759 760 static void __exit p9_trans_rdma_exit(void) 761 { 762 v9fs_unregister_trans(&p9_rdma_trans); 763 } 764 765 module_init(p9_trans_rdma_init); 766 module_exit(p9_trans_rdma_exit); 767 768 MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 769 MODULE_DESCRIPTION("RDMA Transport for 9P"); 770 MODULE_LICENSE("Dual BSD/GPL"); 771