1 /* 2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/file.h> 35 #include <linux/mutex.h> 36 #include <linux/poll.h> 37 #include <linux/sched.h> 38 #include <linux/idr.h> 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/miscdevice.h> 42 #include <linux/slab.h> 43 #include <linux/sysctl.h> 44 #include <linux/module.h> 45 #include <linux/nsproxy.h> 46 47 #include <linux/nospec.h> 48 49 #include <rdma/rdma_user_cm.h> 50 #include <rdma/ib_marshall.h> 51 #include <rdma/rdma_cm.h> 52 #include <rdma/rdma_cm_ib.h> 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib.h> 55 #include <rdma/rdma_netlink.h> 56 #include "core_priv.h" 57 58 MODULE_AUTHOR("Sean Hefty"); 59 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static unsigned int max_backlog = 1024; 63 64 static struct ctl_table_header *ucma_ctl_table_hdr; 65 static struct ctl_table ucma_ctl_table[] = { 66 { 67 .procname = "max_backlog", 68 .data = &max_backlog, 69 .maxlen = sizeof max_backlog, 70 .mode = 0644, 71 .proc_handler = proc_dointvec, 72 }, 73 { } 74 }; 75 76 struct ucma_file { 77 struct mutex mut; 78 struct file *filp; 79 struct list_head ctx_list; 80 struct list_head event_list; 81 wait_queue_head_t poll_wait; 82 struct workqueue_struct *close_wq; 83 }; 84 85 struct ucma_context { 86 u32 id; 87 struct completion comp; 88 refcount_t ref; 89 int events_reported; 90 int backlog; 91 92 struct ucma_file *file; 93 struct rdma_cm_id *cm_id; 94 struct mutex mutex; 95 u64 uid; 96 97 struct list_head list; 98 struct list_head mc_list; 99 /* mark that device is in process of destroying the internal HW 100 * resources, protected by the ctx_table lock 101 */ 102 int closing; 103 /* sync between removal event and id destroy, protected by file mut */ 104 int destroying; 105 struct work_struct close_work; 106 }; 107 108 struct ucma_multicast { 109 struct ucma_context *ctx; 110 u32 id; 111 int events_reported; 112 113 u64 uid; 114 u8 join_state; 115 struct list_head list; 116 struct sockaddr_storage addr; 117 }; 118 119 struct ucma_event { 120 struct ucma_context *ctx; 121 struct ucma_multicast *mc; 122 struct list_head list; 123 struct rdma_cm_id *cm_id; 124 struct rdma_ucm_event_resp resp; 125 struct work_struct close_work; 126 }; 127 128 static DEFINE_XARRAY_ALLOC(ctx_table); 129 static DEFINE_XARRAY_ALLOC(multicast_table); 130 131 static const struct file_operations ucma_fops; 132 133 static inline struct ucma_context *_ucma_find_context(int id, 134 struct ucma_file *file) 135 { 136 struct ucma_context *ctx; 137 138 ctx = xa_load(&ctx_table, id); 139 if (!ctx) 140 ctx = ERR_PTR(-ENOENT); 141 else if (ctx->file != file || !ctx->cm_id) 142 ctx = ERR_PTR(-EINVAL); 143 return ctx; 144 } 145 146 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) 147 { 148 struct ucma_context *ctx; 149 150 xa_lock(&ctx_table); 151 ctx = _ucma_find_context(id, file); 152 if (!IS_ERR(ctx)) { 153 if (ctx->closing) 154 ctx = ERR_PTR(-EIO); 155 else 156 refcount_inc(&ctx->ref); 157 } 158 xa_unlock(&ctx_table); 159 return ctx; 160 } 161 162 static void ucma_put_ctx(struct ucma_context *ctx) 163 { 164 if (refcount_dec_and_test(&ctx->ref)) 165 complete(&ctx->comp); 166 } 167 168 /* 169 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the 170 * CM_ID is bound. 171 */ 172 static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) 173 { 174 struct ucma_context *ctx = ucma_get_ctx(file, id); 175 176 if (IS_ERR(ctx)) 177 return ctx; 178 if (!ctx->cm_id->device) { 179 ucma_put_ctx(ctx); 180 return ERR_PTR(-EINVAL); 181 } 182 return ctx; 183 } 184 185 static void ucma_close_event_id(struct work_struct *work) 186 { 187 struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); 188 189 rdma_destroy_id(uevent_close->cm_id); 190 kfree(uevent_close); 191 } 192 193 static void ucma_close_id(struct work_struct *work) 194 { 195 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); 196 197 /* once all inflight tasks are finished, we close all underlying 198 * resources. The context is still alive till its explicit destryoing 199 * by its creator. 200 */ 201 ucma_put_ctx(ctx); 202 wait_for_completion(&ctx->comp); 203 /* No new events will be generated after destroying the id. */ 204 rdma_destroy_id(ctx->cm_id); 205 } 206 207 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) 208 { 209 struct ucma_context *ctx; 210 211 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 212 if (!ctx) 213 return NULL; 214 215 INIT_WORK(&ctx->close_work, ucma_close_id); 216 refcount_set(&ctx->ref, 1); 217 init_completion(&ctx->comp); 218 INIT_LIST_HEAD(&ctx->mc_list); 219 ctx->file = file; 220 mutex_init(&ctx->mutex); 221 222 if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) 223 goto error; 224 225 list_add_tail(&ctx->list, &file->ctx_list); 226 return ctx; 227 228 error: 229 kfree(ctx); 230 return NULL; 231 } 232 233 static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) 234 { 235 struct ucma_multicast *mc; 236 237 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 238 if (!mc) 239 return NULL; 240 241 mc->ctx = ctx; 242 if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) 243 goto error; 244 245 list_add_tail(&mc->list, &ctx->mc_list); 246 return mc; 247 248 error: 249 kfree(mc); 250 return NULL; 251 } 252 253 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 254 struct rdma_conn_param *src) 255 { 256 if (src->private_data_len) 257 memcpy(dst->private_data, src->private_data, 258 src->private_data_len); 259 dst->private_data_len = src->private_data_len; 260 dst->responder_resources =src->responder_resources; 261 dst->initiator_depth = src->initiator_depth; 262 dst->flow_control = src->flow_control; 263 dst->retry_count = src->retry_count; 264 dst->rnr_retry_count = src->rnr_retry_count; 265 dst->srq = src->srq; 266 dst->qp_num = src->qp_num; 267 } 268 269 static void ucma_copy_ud_event(struct ib_device *device, 270 struct rdma_ucm_ud_param *dst, 271 struct rdma_ud_param *src) 272 { 273 if (src->private_data_len) 274 memcpy(dst->private_data, src->private_data, 275 src->private_data_len); 276 dst->private_data_len = src->private_data_len; 277 ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); 278 dst->qp_num = src->qp_num; 279 dst->qkey = src->qkey; 280 } 281 282 static void ucma_set_event_context(struct ucma_context *ctx, 283 struct rdma_cm_event *event, 284 struct ucma_event *uevent) 285 { 286 uevent->ctx = ctx; 287 switch (event->event) { 288 case RDMA_CM_EVENT_MULTICAST_JOIN: 289 case RDMA_CM_EVENT_MULTICAST_ERROR: 290 uevent->mc = (struct ucma_multicast *) 291 event->param.ud.private_data; 292 uevent->resp.uid = uevent->mc->uid; 293 uevent->resp.id = uevent->mc->id; 294 break; 295 default: 296 uevent->resp.uid = ctx->uid; 297 uevent->resp.id = ctx->id; 298 break; 299 } 300 } 301 302 /* Called with file->mut locked for the relevant context. */ 303 static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) 304 { 305 struct ucma_context *ctx = cm_id->context; 306 struct ucma_event *con_req_eve; 307 int event_found = 0; 308 309 if (ctx->destroying) 310 return; 311 312 /* only if context is pointing to cm_id that it owns it and can be 313 * queued to be closed, otherwise that cm_id is an inflight one that 314 * is part of that context event list pending to be detached and 315 * reattached to its new context as part of ucma_get_event, 316 * handled separately below. 317 */ 318 if (ctx->cm_id == cm_id) { 319 xa_lock(&ctx_table); 320 ctx->closing = 1; 321 xa_unlock(&ctx_table); 322 queue_work(ctx->file->close_wq, &ctx->close_work); 323 return; 324 } 325 326 list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { 327 if (con_req_eve->cm_id == cm_id && 328 con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { 329 list_del(&con_req_eve->list); 330 INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); 331 queue_work(ctx->file->close_wq, &con_req_eve->close_work); 332 event_found = 1; 333 break; 334 } 335 } 336 if (!event_found) 337 pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); 338 } 339 340 static int ucma_event_handler(struct rdma_cm_id *cm_id, 341 struct rdma_cm_event *event) 342 { 343 struct ucma_event *uevent; 344 struct ucma_context *ctx = cm_id->context; 345 int ret = 0; 346 347 uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); 348 if (!uevent) 349 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; 350 351 mutex_lock(&ctx->file->mut); 352 uevent->cm_id = cm_id; 353 ucma_set_event_context(ctx, event, uevent); 354 uevent->resp.event = event->event; 355 uevent->resp.status = event->status; 356 if (cm_id->qp_type == IB_QPT_UD) 357 ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud, 358 &event->param.ud); 359 else 360 ucma_copy_conn_event(&uevent->resp.param.conn, 361 &event->param.conn); 362 363 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { 364 if (!ctx->backlog) { 365 ret = -ENOMEM; 366 kfree(uevent); 367 goto out; 368 } 369 ctx->backlog--; 370 } else if (!ctx->uid || ctx->cm_id != cm_id) { 371 /* 372 * We ignore events for new connections until userspace has set 373 * their context. This can only happen if an error occurs on a 374 * new connection before the user accepts it. This is okay, 375 * since the accept will just fail later. However, we do need 376 * to release the underlying HW resources in case of a device 377 * removal event. 378 */ 379 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 380 ucma_removal_event_handler(cm_id); 381 382 kfree(uevent); 383 goto out; 384 } 385 386 list_add_tail(&uevent->list, &ctx->file->event_list); 387 wake_up_interruptible(&ctx->file->poll_wait); 388 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 389 ucma_removal_event_handler(cm_id); 390 out: 391 mutex_unlock(&ctx->file->mut); 392 return ret; 393 } 394 395 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, 396 int in_len, int out_len) 397 { 398 struct ucma_context *ctx; 399 struct rdma_ucm_get_event cmd; 400 struct ucma_event *uevent; 401 int ret = 0; 402 403 /* 404 * Old 32 bit user space does not send the 4 byte padding in the 405 * reserved field. We don't care, allow it to keep working. 406 */ 407 if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved)) 408 return -ENOSPC; 409 410 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 411 return -EFAULT; 412 413 mutex_lock(&file->mut); 414 while (list_empty(&file->event_list)) { 415 mutex_unlock(&file->mut); 416 417 if (file->filp->f_flags & O_NONBLOCK) 418 return -EAGAIN; 419 420 if (wait_event_interruptible(file->poll_wait, 421 !list_empty(&file->event_list))) 422 return -ERESTARTSYS; 423 424 mutex_lock(&file->mut); 425 } 426 427 uevent = list_entry(file->event_list.next, struct ucma_event, list); 428 429 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { 430 ctx = ucma_alloc_ctx(file); 431 if (!ctx) { 432 ret = -ENOMEM; 433 goto done; 434 } 435 uevent->ctx->backlog++; 436 ctx->cm_id = uevent->cm_id; 437 ctx->cm_id->context = ctx; 438 uevent->resp.id = ctx->id; 439 } 440 441 if (copy_to_user(u64_to_user_ptr(cmd.response), 442 &uevent->resp, 443 min_t(size_t, out_len, sizeof(uevent->resp)))) { 444 ret = -EFAULT; 445 goto done; 446 } 447 448 list_del(&uevent->list); 449 uevent->ctx->events_reported++; 450 if (uevent->mc) 451 uevent->mc->events_reported++; 452 kfree(uevent); 453 done: 454 mutex_unlock(&file->mut); 455 return ret; 456 } 457 458 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) 459 { 460 switch (cmd->ps) { 461 case RDMA_PS_TCP: 462 *qp_type = IB_QPT_RC; 463 return 0; 464 case RDMA_PS_UDP: 465 case RDMA_PS_IPOIB: 466 *qp_type = IB_QPT_UD; 467 return 0; 468 case RDMA_PS_IB: 469 *qp_type = cmd->qp_type; 470 return 0; 471 default: 472 return -EINVAL; 473 } 474 } 475 476 static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, 477 int in_len, int out_len) 478 { 479 struct rdma_ucm_create_id cmd; 480 struct rdma_ucm_create_id_resp resp; 481 struct ucma_context *ctx; 482 struct rdma_cm_id *cm_id; 483 enum ib_qp_type qp_type; 484 int ret; 485 486 if (out_len < sizeof(resp)) 487 return -ENOSPC; 488 489 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 490 return -EFAULT; 491 492 ret = ucma_get_qp_type(&cmd, &qp_type); 493 if (ret) 494 return ret; 495 496 mutex_lock(&file->mut); 497 ctx = ucma_alloc_ctx(file); 498 mutex_unlock(&file->mut); 499 if (!ctx) 500 return -ENOMEM; 501 502 ctx->uid = cmd.uid; 503 cm_id = __rdma_create_id(current->nsproxy->net_ns, 504 ucma_event_handler, ctx, cmd.ps, qp_type, NULL); 505 if (IS_ERR(cm_id)) { 506 ret = PTR_ERR(cm_id); 507 goto err1; 508 } 509 510 resp.id = ctx->id; 511 if (copy_to_user(u64_to_user_ptr(cmd.response), 512 &resp, sizeof(resp))) { 513 ret = -EFAULT; 514 goto err2; 515 } 516 517 ctx->cm_id = cm_id; 518 return 0; 519 520 err2: 521 rdma_destroy_id(cm_id); 522 err1: 523 xa_erase(&ctx_table, ctx->id); 524 mutex_lock(&file->mut); 525 list_del(&ctx->list); 526 mutex_unlock(&file->mut); 527 kfree(ctx); 528 return ret; 529 } 530 531 static void ucma_cleanup_multicast(struct ucma_context *ctx) 532 { 533 struct ucma_multicast *mc, *tmp; 534 535 mutex_lock(&ctx->file->mut); 536 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 537 list_del(&mc->list); 538 xa_erase(&multicast_table, mc->id); 539 kfree(mc); 540 } 541 mutex_unlock(&ctx->file->mut); 542 } 543 544 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 545 { 546 struct ucma_event *uevent, *tmp; 547 548 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 549 if (uevent->mc != mc) 550 continue; 551 552 list_del(&uevent->list); 553 kfree(uevent); 554 } 555 } 556 557 /* 558 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At 559 * this point, no new events will be reported from the hardware. However, we 560 * still need to cleanup the UCMA context for this ID. Specifically, there 561 * might be events that have not yet been consumed by the user space software. 562 * These might include pending connect requests which we have not completed 563 * processing. We cannot call rdma_destroy_id while holding the lock of the 564 * context (file->mut), as it might cause a deadlock. We therefore extract all 565 * relevant events from the context pending events list while holding the 566 * mutex. After that we release them as needed. 567 */ 568 static int ucma_free_ctx(struct ucma_context *ctx) 569 { 570 int events_reported; 571 struct ucma_event *uevent, *tmp; 572 LIST_HEAD(list); 573 574 575 ucma_cleanup_multicast(ctx); 576 577 /* Cleanup events not yet reported to the user. */ 578 mutex_lock(&ctx->file->mut); 579 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 580 if (uevent->ctx == ctx) 581 list_move_tail(&uevent->list, &list); 582 } 583 list_del(&ctx->list); 584 mutex_unlock(&ctx->file->mut); 585 586 list_for_each_entry_safe(uevent, tmp, &list, list) { 587 list_del(&uevent->list); 588 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) 589 rdma_destroy_id(uevent->cm_id); 590 kfree(uevent); 591 } 592 593 events_reported = ctx->events_reported; 594 mutex_destroy(&ctx->mutex); 595 kfree(ctx); 596 return events_reported; 597 } 598 599 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, 600 int in_len, int out_len) 601 { 602 struct rdma_ucm_destroy_id cmd; 603 struct rdma_ucm_destroy_id_resp resp; 604 struct ucma_context *ctx; 605 int ret = 0; 606 607 if (out_len < sizeof(resp)) 608 return -ENOSPC; 609 610 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 611 return -EFAULT; 612 613 xa_lock(&ctx_table); 614 ctx = _ucma_find_context(cmd.id, file); 615 if (!IS_ERR(ctx)) 616 __xa_erase(&ctx_table, ctx->id); 617 xa_unlock(&ctx_table); 618 619 if (IS_ERR(ctx)) 620 return PTR_ERR(ctx); 621 622 mutex_lock(&ctx->file->mut); 623 ctx->destroying = 1; 624 mutex_unlock(&ctx->file->mut); 625 626 flush_workqueue(ctx->file->close_wq); 627 /* At this point it's guaranteed that there is no inflight 628 * closing task */ 629 xa_lock(&ctx_table); 630 if (!ctx->closing) { 631 xa_unlock(&ctx_table); 632 ucma_put_ctx(ctx); 633 wait_for_completion(&ctx->comp); 634 rdma_destroy_id(ctx->cm_id); 635 } else { 636 xa_unlock(&ctx_table); 637 } 638 639 resp.events_reported = ucma_free_ctx(ctx); 640 if (copy_to_user(u64_to_user_ptr(cmd.response), 641 &resp, sizeof(resp))) 642 ret = -EFAULT; 643 644 return ret; 645 } 646 647 static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, 648 int in_len, int out_len) 649 { 650 struct rdma_ucm_bind_ip cmd; 651 struct ucma_context *ctx; 652 int ret; 653 654 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 655 return -EFAULT; 656 657 if (!rdma_addr_size_in6(&cmd.addr)) 658 return -EINVAL; 659 660 ctx = ucma_get_ctx(file, cmd.id); 661 if (IS_ERR(ctx)) 662 return PTR_ERR(ctx); 663 664 mutex_lock(&ctx->mutex); 665 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 666 mutex_unlock(&ctx->mutex); 667 668 ucma_put_ctx(ctx); 669 return ret; 670 } 671 672 static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, 673 int in_len, int out_len) 674 { 675 struct rdma_ucm_bind cmd; 676 struct ucma_context *ctx; 677 int ret; 678 679 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 680 return -EFAULT; 681 682 if (cmd.reserved || !cmd.addr_size || 683 cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) 684 return -EINVAL; 685 686 ctx = ucma_get_ctx(file, cmd.id); 687 if (IS_ERR(ctx)) 688 return PTR_ERR(ctx); 689 690 mutex_lock(&ctx->mutex); 691 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 692 mutex_unlock(&ctx->mutex); 693 ucma_put_ctx(ctx); 694 return ret; 695 } 696 697 static ssize_t ucma_resolve_ip(struct ucma_file *file, 698 const char __user *inbuf, 699 int in_len, int out_len) 700 { 701 struct rdma_ucm_resolve_ip cmd; 702 struct ucma_context *ctx; 703 int ret; 704 705 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 706 return -EFAULT; 707 708 if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || 709 !rdma_addr_size_in6(&cmd.dst_addr)) 710 return -EINVAL; 711 712 ctx = ucma_get_ctx(file, cmd.id); 713 if (IS_ERR(ctx)) 714 return PTR_ERR(ctx); 715 716 mutex_lock(&ctx->mutex); 717 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 718 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 719 mutex_unlock(&ctx->mutex); 720 ucma_put_ctx(ctx); 721 return ret; 722 } 723 724 static ssize_t ucma_resolve_addr(struct ucma_file *file, 725 const char __user *inbuf, 726 int in_len, int out_len) 727 { 728 struct rdma_ucm_resolve_addr cmd; 729 struct ucma_context *ctx; 730 int ret; 731 732 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 733 return -EFAULT; 734 735 if (cmd.reserved || 736 (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || 737 !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) 738 return -EINVAL; 739 740 ctx = ucma_get_ctx(file, cmd.id); 741 if (IS_ERR(ctx)) 742 return PTR_ERR(ctx); 743 744 mutex_lock(&ctx->mutex); 745 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 746 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 747 mutex_unlock(&ctx->mutex); 748 ucma_put_ctx(ctx); 749 return ret; 750 } 751 752 static ssize_t ucma_resolve_route(struct ucma_file *file, 753 const char __user *inbuf, 754 int in_len, int out_len) 755 { 756 struct rdma_ucm_resolve_route cmd; 757 struct ucma_context *ctx; 758 int ret; 759 760 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 761 return -EFAULT; 762 763 ctx = ucma_get_ctx_dev(file, cmd.id); 764 if (IS_ERR(ctx)) 765 return PTR_ERR(ctx); 766 767 mutex_lock(&ctx->mutex); 768 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 769 mutex_unlock(&ctx->mutex); 770 ucma_put_ctx(ctx); 771 return ret; 772 } 773 774 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, 775 struct rdma_route *route) 776 { 777 struct rdma_dev_addr *dev_addr; 778 779 resp->num_paths = route->num_paths; 780 switch (route->num_paths) { 781 case 0: 782 dev_addr = &route->addr.dev_addr; 783 rdma_addr_get_dgid(dev_addr, 784 (union ib_gid *) &resp->ib_route[0].dgid); 785 rdma_addr_get_sgid(dev_addr, 786 (union ib_gid *) &resp->ib_route[0].sgid); 787 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 788 break; 789 case 2: 790 ib_copy_path_rec_to_user(&resp->ib_route[1], 791 &route->path_rec[1]); 792 /* fall through */ 793 case 1: 794 ib_copy_path_rec_to_user(&resp->ib_route[0], 795 &route->path_rec[0]); 796 break; 797 default: 798 break; 799 } 800 } 801 802 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 803 struct rdma_route *route) 804 { 805 806 resp->num_paths = route->num_paths; 807 switch (route->num_paths) { 808 case 0: 809 rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 810 (union ib_gid *)&resp->ib_route[0].dgid); 811 rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 812 (union ib_gid *)&resp->ib_route[0].sgid); 813 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 814 break; 815 case 2: 816 ib_copy_path_rec_to_user(&resp->ib_route[1], 817 &route->path_rec[1]); 818 /* fall through */ 819 case 1: 820 ib_copy_path_rec_to_user(&resp->ib_route[0], 821 &route->path_rec[0]); 822 break; 823 default: 824 break; 825 } 826 } 827 828 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, 829 struct rdma_route *route) 830 { 831 struct rdma_dev_addr *dev_addr; 832 833 dev_addr = &route->addr.dev_addr; 834 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); 835 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); 836 } 837 838 static ssize_t ucma_query_route(struct ucma_file *file, 839 const char __user *inbuf, 840 int in_len, int out_len) 841 { 842 struct rdma_ucm_query cmd; 843 struct rdma_ucm_query_route_resp resp; 844 struct ucma_context *ctx; 845 struct sockaddr *addr; 846 int ret = 0; 847 848 if (out_len < sizeof(resp)) 849 return -ENOSPC; 850 851 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 852 return -EFAULT; 853 854 ctx = ucma_get_ctx(file, cmd.id); 855 if (IS_ERR(ctx)) 856 return PTR_ERR(ctx); 857 858 mutex_lock(&ctx->mutex); 859 memset(&resp, 0, sizeof resp); 860 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 861 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? 862 sizeof(struct sockaddr_in) : 863 sizeof(struct sockaddr_in6)); 864 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 865 memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? 866 sizeof(struct sockaddr_in) : 867 sizeof(struct sockaddr_in6)); 868 if (!ctx->cm_id->device) 869 goto out; 870 871 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 872 resp.port_num = ctx->cm_id->port_num; 873 874 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) 875 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 876 else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) 877 ucma_copy_iboe_route(&resp, &ctx->cm_id->route); 878 else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) 879 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 880 881 out: 882 mutex_unlock(&ctx->mutex); 883 if (copy_to_user(u64_to_user_ptr(cmd.response), 884 &resp, sizeof(resp))) 885 ret = -EFAULT; 886 887 ucma_put_ctx(ctx); 888 return ret; 889 } 890 891 static void ucma_query_device_addr(struct rdma_cm_id *cm_id, 892 struct rdma_ucm_query_addr_resp *resp) 893 { 894 if (!cm_id->device) 895 return; 896 897 resp->node_guid = (__force __u64) cm_id->device->node_guid; 898 resp->port_num = cm_id->port_num; 899 resp->pkey = (__force __u16) cpu_to_be16( 900 ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); 901 } 902 903 static ssize_t ucma_query_addr(struct ucma_context *ctx, 904 void __user *response, int out_len) 905 { 906 struct rdma_ucm_query_addr_resp resp; 907 struct sockaddr *addr; 908 int ret = 0; 909 910 if (out_len < sizeof(resp)) 911 return -ENOSPC; 912 913 memset(&resp, 0, sizeof resp); 914 915 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 916 resp.src_size = rdma_addr_size(addr); 917 memcpy(&resp.src_addr, addr, resp.src_size); 918 919 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 920 resp.dst_size = rdma_addr_size(addr); 921 memcpy(&resp.dst_addr, addr, resp.dst_size); 922 923 ucma_query_device_addr(ctx->cm_id, &resp); 924 925 if (copy_to_user(response, &resp, sizeof(resp))) 926 ret = -EFAULT; 927 928 return ret; 929 } 930 931 static ssize_t ucma_query_path(struct ucma_context *ctx, 932 void __user *response, int out_len) 933 { 934 struct rdma_ucm_query_path_resp *resp; 935 int i, ret = 0; 936 937 if (out_len < sizeof(*resp)) 938 return -ENOSPC; 939 940 resp = kzalloc(out_len, GFP_KERNEL); 941 if (!resp) 942 return -ENOMEM; 943 944 resp->num_paths = ctx->cm_id->route.num_paths; 945 for (i = 0, out_len -= sizeof(*resp); 946 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); 947 i++, out_len -= sizeof(struct ib_path_rec_data)) { 948 struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; 949 950 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 951 IB_PATH_BIDIRECTIONAL; 952 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { 953 struct sa_path_rec ib; 954 955 sa_convert_path_opa_to_ib(&ib, rec); 956 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 957 958 } else { 959 ib_sa_pack_path(rec, &resp->path_data[i].path_rec); 960 } 961 } 962 963 if (copy_to_user(response, resp, struct_size(resp, path_data, i))) 964 ret = -EFAULT; 965 966 kfree(resp); 967 return ret; 968 } 969 970 static ssize_t ucma_query_gid(struct ucma_context *ctx, 971 void __user *response, int out_len) 972 { 973 struct rdma_ucm_query_addr_resp resp; 974 struct sockaddr_ib *addr; 975 int ret = 0; 976 977 if (out_len < sizeof(resp)) 978 return -ENOSPC; 979 980 memset(&resp, 0, sizeof resp); 981 982 ucma_query_device_addr(ctx->cm_id, &resp); 983 984 addr = (struct sockaddr_ib *) &resp.src_addr; 985 resp.src_size = sizeof(*addr); 986 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { 987 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); 988 } else { 989 addr->sib_family = AF_IB; 990 addr->sib_pkey = (__force __be16) resp.pkey; 991 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, 992 NULL); 993 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 994 &ctx->cm_id->route.addr.src_addr); 995 } 996 997 addr = (struct sockaddr_ib *) &resp.dst_addr; 998 resp.dst_size = sizeof(*addr); 999 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { 1000 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); 1001 } else { 1002 addr->sib_family = AF_IB; 1003 addr->sib_pkey = (__force __be16) resp.pkey; 1004 rdma_read_gids(ctx->cm_id, NULL, 1005 (union ib_gid *)&addr->sib_addr); 1006 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 1007 &ctx->cm_id->route.addr.dst_addr); 1008 } 1009 1010 if (copy_to_user(response, &resp, sizeof(resp))) 1011 ret = -EFAULT; 1012 1013 return ret; 1014 } 1015 1016 static ssize_t ucma_query(struct ucma_file *file, 1017 const char __user *inbuf, 1018 int in_len, int out_len) 1019 { 1020 struct rdma_ucm_query cmd; 1021 struct ucma_context *ctx; 1022 void __user *response; 1023 int ret; 1024 1025 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1026 return -EFAULT; 1027 1028 response = u64_to_user_ptr(cmd.response); 1029 ctx = ucma_get_ctx(file, cmd.id); 1030 if (IS_ERR(ctx)) 1031 return PTR_ERR(ctx); 1032 1033 mutex_lock(&ctx->mutex); 1034 switch (cmd.option) { 1035 case RDMA_USER_CM_QUERY_ADDR: 1036 ret = ucma_query_addr(ctx, response, out_len); 1037 break; 1038 case RDMA_USER_CM_QUERY_PATH: 1039 ret = ucma_query_path(ctx, response, out_len); 1040 break; 1041 case RDMA_USER_CM_QUERY_GID: 1042 ret = ucma_query_gid(ctx, response, out_len); 1043 break; 1044 default: 1045 ret = -ENOSYS; 1046 break; 1047 } 1048 mutex_unlock(&ctx->mutex); 1049 1050 ucma_put_ctx(ctx); 1051 return ret; 1052 } 1053 1054 static void ucma_copy_conn_param(struct rdma_cm_id *id, 1055 struct rdma_conn_param *dst, 1056 struct rdma_ucm_conn_param *src) 1057 { 1058 dst->private_data = src->private_data; 1059 dst->private_data_len = src->private_data_len; 1060 dst->responder_resources =src->responder_resources; 1061 dst->initiator_depth = src->initiator_depth; 1062 dst->flow_control = src->flow_control; 1063 dst->retry_count = src->retry_count; 1064 dst->rnr_retry_count = src->rnr_retry_count; 1065 dst->srq = src->srq; 1066 dst->qp_num = src->qp_num & 0xFFFFFF; 1067 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; 1068 } 1069 1070 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 1071 int in_len, int out_len) 1072 { 1073 struct rdma_ucm_connect cmd; 1074 struct rdma_conn_param conn_param; 1075 struct ucma_context *ctx; 1076 int ret; 1077 1078 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1079 return -EFAULT; 1080 1081 if (!cmd.conn_param.valid) 1082 return -EINVAL; 1083 1084 ctx = ucma_get_ctx_dev(file, cmd.id); 1085 if (IS_ERR(ctx)) 1086 return PTR_ERR(ctx); 1087 1088 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1089 mutex_lock(&ctx->mutex); 1090 ret = rdma_connect(ctx->cm_id, &conn_param); 1091 mutex_unlock(&ctx->mutex); 1092 ucma_put_ctx(ctx); 1093 return ret; 1094 } 1095 1096 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, 1097 int in_len, int out_len) 1098 { 1099 struct rdma_ucm_listen cmd; 1100 struct ucma_context *ctx; 1101 int ret; 1102 1103 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1104 return -EFAULT; 1105 1106 ctx = ucma_get_ctx(file, cmd.id); 1107 if (IS_ERR(ctx)) 1108 return PTR_ERR(ctx); 1109 1110 ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? 1111 cmd.backlog : max_backlog; 1112 mutex_lock(&ctx->mutex); 1113 ret = rdma_listen(ctx->cm_id, ctx->backlog); 1114 mutex_unlock(&ctx->mutex); 1115 ucma_put_ctx(ctx); 1116 return ret; 1117 } 1118 1119 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, 1120 int in_len, int out_len) 1121 { 1122 struct rdma_ucm_accept cmd; 1123 struct rdma_conn_param conn_param; 1124 struct ucma_context *ctx; 1125 int ret; 1126 1127 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1128 return -EFAULT; 1129 1130 ctx = ucma_get_ctx_dev(file, cmd.id); 1131 if (IS_ERR(ctx)) 1132 return PTR_ERR(ctx); 1133 1134 if (cmd.conn_param.valid) { 1135 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1136 mutex_lock(&file->mut); 1137 mutex_lock(&ctx->mutex); 1138 ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); 1139 mutex_unlock(&ctx->mutex); 1140 if (!ret) 1141 ctx->uid = cmd.uid; 1142 mutex_unlock(&file->mut); 1143 } else { 1144 mutex_lock(&ctx->mutex); 1145 ret = __rdma_accept(ctx->cm_id, NULL, NULL); 1146 mutex_unlock(&ctx->mutex); 1147 } 1148 ucma_put_ctx(ctx); 1149 return ret; 1150 } 1151 1152 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, 1153 int in_len, int out_len) 1154 { 1155 struct rdma_ucm_reject cmd; 1156 struct ucma_context *ctx; 1157 int ret; 1158 1159 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1160 return -EFAULT; 1161 1162 ctx = ucma_get_ctx_dev(file, cmd.id); 1163 if (IS_ERR(ctx)) 1164 return PTR_ERR(ctx); 1165 1166 mutex_lock(&ctx->mutex); 1167 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); 1168 mutex_unlock(&ctx->mutex); 1169 ucma_put_ctx(ctx); 1170 return ret; 1171 } 1172 1173 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, 1174 int in_len, int out_len) 1175 { 1176 struct rdma_ucm_disconnect cmd; 1177 struct ucma_context *ctx; 1178 int ret; 1179 1180 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1181 return -EFAULT; 1182 1183 ctx = ucma_get_ctx_dev(file, cmd.id); 1184 if (IS_ERR(ctx)) 1185 return PTR_ERR(ctx); 1186 1187 mutex_lock(&ctx->mutex); 1188 ret = rdma_disconnect(ctx->cm_id); 1189 mutex_unlock(&ctx->mutex); 1190 ucma_put_ctx(ctx); 1191 return ret; 1192 } 1193 1194 static ssize_t ucma_init_qp_attr(struct ucma_file *file, 1195 const char __user *inbuf, 1196 int in_len, int out_len) 1197 { 1198 struct rdma_ucm_init_qp_attr cmd; 1199 struct ib_uverbs_qp_attr resp; 1200 struct ucma_context *ctx; 1201 struct ib_qp_attr qp_attr; 1202 int ret; 1203 1204 if (out_len < sizeof(resp)) 1205 return -ENOSPC; 1206 1207 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1208 return -EFAULT; 1209 1210 if (cmd.qp_state > IB_QPS_ERR) 1211 return -EINVAL; 1212 1213 ctx = ucma_get_ctx_dev(file, cmd.id); 1214 if (IS_ERR(ctx)) 1215 return PTR_ERR(ctx); 1216 1217 resp.qp_attr_mask = 0; 1218 memset(&qp_attr, 0, sizeof qp_attr); 1219 qp_attr.qp_state = cmd.qp_state; 1220 mutex_lock(&ctx->mutex); 1221 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1222 mutex_unlock(&ctx->mutex); 1223 if (ret) 1224 goto out; 1225 1226 ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); 1227 if (copy_to_user(u64_to_user_ptr(cmd.response), 1228 &resp, sizeof(resp))) 1229 ret = -EFAULT; 1230 1231 out: 1232 ucma_put_ctx(ctx); 1233 return ret; 1234 } 1235 1236 static int ucma_set_option_id(struct ucma_context *ctx, int optname, 1237 void *optval, size_t optlen) 1238 { 1239 int ret = 0; 1240 1241 switch (optname) { 1242 case RDMA_OPTION_ID_TOS: 1243 if (optlen != sizeof(u8)) { 1244 ret = -EINVAL; 1245 break; 1246 } 1247 rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); 1248 break; 1249 case RDMA_OPTION_ID_REUSEADDR: 1250 if (optlen != sizeof(int)) { 1251 ret = -EINVAL; 1252 break; 1253 } 1254 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 1255 break; 1256 case RDMA_OPTION_ID_AFONLY: 1257 if (optlen != sizeof(int)) { 1258 ret = -EINVAL; 1259 break; 1260 } 1261 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); 1262 break; 1263 case RDMA_OPTION_ID_ACK_TIMEOUT: 1264 if (optlen != sizeof(u8)) { 1265 ret = -EINVAL; 1266 break; 1267 } 1268 ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); 1269 break; 1270 default: 1271 ret = -ENOSYS; 1272 } 1273 1274 return ret; 1275 } 1276 1277 static int ucma_set_ib_path(struct ucma_context *ctx, 1278 struct ib_path_rec_data *path_data, size_t optlen) 1279 { 1280 struct sa_path_rec sa_path; 1281 struct rdma_cm_event event; 1282 int ret; 1283 1284 if (optlen % sizeof(*path_data)) 1285 return -EINVAL; 1286 1287 for (; optlen; optlen -= sizeof(*path_data), path_data++) { 1288 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | 1289 IB_PATH_BIDIRECTIONAL)) 1290 break; 1291 } 1292 1293 if (!optlen) 1294 return -EINVAL; 1295 1296 if (!ctx->cm_id->device) 1297 return -EINVAL; 1298 1299 memset(&sa_path, 0, sizeof(sa_path)); 1300 1301 sa_path.rec_type = SA_PATH_REC_TYPE_IB; 1302 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1303 1304 if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { 1305 struct sa_path_rec opa; 1306 1307 sa_convert_path_ib_to_opa(&opa, &sa_path); 1308 mutex_lock(&ctx->mutex); 1309 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1310 mutex_unlock(&ctx->mutex); 1311 } else { 1312 mutex_lock(&ctx->mutex); 1313 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1314 mutex_unlock(&ctx->mutex); 1315 } 1316 if (ret) 1317 return ret; 1318 1319 memset(&event, 0, sizeof event); 1320 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 1321 return ucma_event_handler(ctx->cm_id, &event); 1322 } 1323 1324 static int ucma_set_option_ib(struct ucma_context *ctx, int optname, 1325 void *optval, size_t optlen) 1326 { 1327 int ret; 1328 1329 switch (optname) { 1330 case RDMA_OPTION_IB_PATH: 1331 ret = ucma_set_ib_path(ctx, optval, optlen); 1332 break; 1333 default: 1334 ret = -ENOSYS; 1335 } 1336 1337 return ret; 1338 } 1339 1340 static int ucma_set_option_level(struct ucma_context *ctx, int level, 1341 int optname, void *optval, size_t optlen) 1342 { 1343 int ret; 1344 1345 switch (level) { 1346 case RDMA_OPTION_ID: 1347 mutex_lock(&ctx->mutex); 1348 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1349 mutex_unlock(&ctx->mutex); 1350 break; 1351 case RDMA_OPTION_IB: 1352 ret = ucma_set_option_ib(ctx, optname, optval, optlen); 1353 break; 1354 default: 1355 ret = -ENOSYS; 1356 } 1357 1358 return ret; 1359 } 1360 1361 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, 1362 int in_len, int out_len) 1363 { 1364 struct rdma_ucm_set_option cmd; 1365 struct ucma_context *ctx; 1366 void *optval; 1367 int ret; 1368 1369 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1370 return -EFAULT; 1371 1372 if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) 1373 return -EINVAL; 1374 1375 ctx = ucma_get_ctx(file, cmd.id); 1376 if (IS_ERR(ctx)) 1377 return PTR_ERR(ctx); 1378 1379 optval = memdup_user(u64_to_user_ptr(cmd.optval), 1380 cmd.optlen); 1381 if (IS_ERR(optval)) { 1382 ret = PTR_ERR(optval); 1383 goto out; 1384 } 1385 1386 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1387 cmd.optlen); 1388 kfree(optval); 1389 1390 out: 1391 ucma_put_ctx(ctx); 1392 return ret; 1393 } 1394 1395 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 1396 int in_len, int out_len) 1397 { 1398 struct rdma_ucm_notify cmd; 1399 struct ucma_context *ctx; 1400 int ret = -EINVAL; 1401 1402 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1403 return -EFAULT; 1404 1405 ctx = ucma_get_ctx(file, cmd.id); 1406 if (IS_ERR(ctx)) 1407 return PTR_ERR(ctx); 1408 1409 mutex_lock(&ctx->mutex); 1410 if (ctx->cm_id->device) 1411 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1412 mutex_unlock(&ctx->mutex); 1413 1414 ucma_put_ctx(ctx); 1415 return ret; 1416 } 1417 1418 static ssize_t ucma_process_join(struct ucma_file *file, 1419 struct rdma_ucm_join_mcast *cmd, int out_len) 1420 { 1421 struct rdma_ucm_create_id_resp resp; 1422 struct ucma_context *ctx; 1423 struct ucma_multicast *mc; 1424 struct sockaddr *addr; 1425 int ret; 1426 u8 join_state; 1427 1428 if (out_len < sizeof(resp)) 1429 return -ENOSPC; 1430 1431 addr = (struct sockaddr *) &cmd->addr; 1432 if (cmd->addr_size != rdma_addr_size(addr)) 1433 return -EINVAL; 1434 1435 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1436 join_state = BIT(FULLMEMBER_JOIN); 1437 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1438 join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1439 else 1440 return -EINVAL; 1441 1442 ctx = ucma_get_ctx_dev(file, cmd->id); 1443 if (IS_ERR(ctx)) 1444 return PTR_ERR(ctx); 1445 1446 mutex_lock(&file->mut); 1447 mc = ucma_alloc_multicast(ctx); 1448 if (!mc) { 1449 ret = -ENOMEM; 1450 goto err1; 1451 } 1452 mc->join_state = join_state; 1453 mc->uid = cmd->uid; 1454 memcpy(&mc->addr, addr, cmd->addr_size); 1455 mutex_lock(&ctx->mutex); 1456 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1457 join_state, mc); 1458 mutex_unlock(&ctx->mutex); 1459 if (ret) 1460 goto err2; 1461 1462 resp.id = mc->id; 1463 if (copy_to_user(u64_to_user_ptr(cmd->response), 1464 &resp, sizeof(resp))) { 1465 ret = -EFAULT; 1466 goto err3; 1467 } 1468 1469 xa_store(&multicast_table, mc->id, mc, 0); 1470 1471 mutex_unlock(&file->mut); 1472 ucma_put_ctx(ctx); 1473 return 0; 1474 1475 err3: 1476 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); 1477 ucma_cleanup_mc_events(mc); 1478 err2: 1479 xa_erase(&multicast_table, mc->id); 1480 list_del(&mc->list); 1481 kfree(mc); 1482 err1: 1483 mutex_unlock(&file->mut); 1484 ucma_put_ctx(ctx); 1485 return ret; 1486 } 1487 1488 static ssize_t ucma_join_ip_multicast(struct ucma_file *file, 1489 const char __user *inbuf, 1490 int in_len, int out_len) 1491 { 1492 struct rdma_ucm_join_ip_mcast cmd; 1493 struct rdma_ucm_join_mcast join_cmd; 1494 1495 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1496 return -EFAULT; 1497 1498 join_cmd.response = cmd.response; 1499 join_cmd.uid = cmd.uid; 1500 join_cmd.id = cmd.id; 1501 join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); 1502 if (!join_cmd.addr_size) 1503 return -EINVAL; 1504 1505 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1506 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1507 1508 return ucma_process_join(file, &join_cmd, out_len); 1509 } 1510 1511 static ssize_t ucma_join_multicast(struct ucma_file *file, 1512 const char __user *inbuf, 1513 int in_len, int out_len) 1514 { 1515 struct rdma_ucm_join_mcast cmd; 1516 1517 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1518 return -EFAULT; 1519 1520 if (!rdma_addr_size_kss(&cmd.addr)) 1521 return -EINVAL; 1522 1523 return ucma_process_join(file, &cmd, out_len); 1524 } 1525 1526 static ssize_t ucma_leave_multicast(struct ucma_file *file, 1527 const char __user *inbuf, 1528 int in_len, int out_len) 1529 { 1530 struct rdma_ucm_destroy_id cmd; 1531 struct rdma_ucm_destroy_id_resp resp; 1532 struct ucma_multicast *mc; 1533 int ret = 0; 1534 1535 if (out_len < sizeof(resp)) 1536 return -ENOSPC; 1537 1538 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1539 return -EFAULT; 1540 1541 xa_lock(&multicast_table); 1542 mc = xa_load(&multicast_table, cmd.id); 1543 if (!mc) 1544 mc = ERR_PTR(-ENOENT); 1545 else if (mc->ctx->file != file) 1546 mc = ERR_PTR(-EINVAL); 1547 else if (!refcount_inc_not_zero(&mc->ctx->ref)) 1548 mc = ERR_PTR(-ENXIO); 1549 else 1550 __xa_erase(&multicast_table, mc->id); 1551 xa_unlock(&multicast_table); 1552 1553 if (IS_ERR(mc)) { 1554 ret = PTR_ERR(mc); 1555 goto out; 1556 } 1557 1558 mutex_lock(&mc->ctx->mutex); 1559 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1560 mutex_unlock(&mc->ctx->mutex); 1561 1562 mutex_lock(&mc->ctx->file->mut); 1563 ucma_cleanup_mc_events(mc); 1564 list_del(&mc->list); 1565 mutex_unlock(&mc->ctx->file->mut); 1566 1567 ucma_put_ctx(mc->ctx); 1568 resp.events_reported = mc->events_reported; 1569 kfree(mc); 1570 1571 if (copy_to_user(u64_to_user_ptr(cmd.response), 1572 &resp, sizeof(resp))) 1573 ret = -EFAULT; 1574 out: 1575 return ret; 1576 } 1577 1578 static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) 1579 { 1580 /* Acquire mutex's based on pointer comparison to prevent deadlock. */ 1581 if (file1 < file2) { 1582 mutex_lock(&file1->mut); 1583 mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); 1584 } else { 1585 mutex_lock(&file2->mut); 1586 mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); 1587 } 1588 } 1589 1590 static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) 1591 { 1592 if (file1 < file2) { 1593 mutex_unlock(&file2->mut); 1594 mutex_unlock(&file1->mut); 1595 } else { 1596 mutex_unlock(&file1->mut); 1597 mutex_unlock(&file2->mut); 1598 } 1599 } 1600 1601 static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) 1602 { 1603 struct ucma_event *uevent, *tmp; 1604 1605 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) 1606 if (uevent->ctx == ctx) 1607 list_move_tail(&uevent->list, &file->event_list); 1608 } 1609 1610 static ssize_t ucma_migrate_id(struct ucma_file *new_file, 1611 const char __user *inbuf, 1612 int in_len, int out_len) 1613 { 1614 struct rdma_ucm_migrate_id cmd; 1615 struct rdma_ucm_migrate_resp resp; 1616 struct ucma_context *ctx; 1617 struct fd f; 1618 struct ucma_file *cur_file; 1619 int ret = 0; 1620 1621 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1622 return -EFAULT; 1623 1624 /* Get current fd to protect against it being closed */ 1625 f = fdget(cmd.fd); 1626 if (!f.file) 1627 return -ENOENT; 1628 if (f.file->f_op != &ucma_fops) { 1629 ret = -EINVAL; 1630 goto file_put; 1631 } 1632 1633 /* Validate current fd and prevent destruction of id. */ 1634 ctx = ucma_get_ctx(f.file->private_data, cmd.id); 1635 if (IS_ERR(ctx)) { 1636 ret = PTR_ERR(ctx); 1637 goto file_put; 1638 } 1639 1640 cur_file = ctx->file; 1641 if (cur_file == new_file) { 1642 resp.events_reported = ctx->events_reported; 1643 goto response; 1644 } 1645 1646 /* 1647 * Migrate events between fd's, maintaining order, and avoiding new 1648 * events being added before existing events. 1649 */ 1650 ucma_lock_files(cur_file, new_file); 1651 xa_lock(&ctx_table); 1652 1653 list_move_tail(&ctx->list, &new_file->ctx_list); 1654 ucma_move_events(ctx, new_file); 1655 ctx->file = new_file; 1656 resp.events_reported = ctx->events_reported; 1657 1658 xa_unlock(&ctx_table); 1659 ucma_unlock_files(cur_file, new_file); 1660 1661 response: 1662 if (copy_to_user(u64_to_user_ptr(cmd.response), 1663 &resp, sizeof(resp))) 1664 ret = -EFAULT; 1665 1666 ucma_put_ctx(ctx); 1667 file_put: 1668 fdput(f); 1669 return ret; 1670 } 1671 1672 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1673 const char __user *inbuf, 1674 int in_len, int out_len) = { 1675 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1676 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1677 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, 1678 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, 1679 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, 1680 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1681 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1682 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1683 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1684 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1685 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1686 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1687 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1688 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1689 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1690 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1691 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, 1692 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1693 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, 1694 [RDMA_USER_CM_CMD_QUERY] = ucma_query, 1695 [RDMA_USER_CM_CMD_BIND] = ucma_bind, 1696 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1697 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast 1698 }; 1699 1700 static ssize_t ucma_write(struct file *filp, const char __user *buf, 1701 size_t len, loff_t *pos) 1702 { 1703 struct ucma_file *file = filp->private_data; 1704 struct rdma_ucm_cmd_hdr hdr; 1705 ssize_t ret; 1706 1707 if (!ib_safe_file_access(filp)) { 1708 pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 1709 task_tgid_vnr(current), current->comm); 1710 return -EACCES; 1711 } 1712 1713 if (len < sizeof(hdr)) 1714 return -EINVAL; 1715 1716 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1717 return -EFAULT; 1718 1719 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1720 return -EINVAL; 1721 hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); 1722 1723 if (hdr.in + sizeof(hdr) > len) 1724 return -EINVAL; 1725 1726 if (!ucma_cmd_table[hdr.cmd]) 1727 return -ENOSYS; 1728 1729 ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); 1730 if (!ret) 1731 ret = len; 1732 1733 return ret; 1734 } 1735 1736 static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) 1737 { 1738 struct ucma_file *file = filp->private_data; 1739 __poll_t mask = 0; 1740 1741 poll_wait(filp, &file->poll_wait, wait); 1742 1743 if (!list_empty(&file->event_list)) 1744 mask = EPOLLIN | EPOLLRDNORM; 1745 1746 return mask; 1747 } 1748 1749 /* 1750 * ucma_open() does not need the BKL: 1751 * 1752 * - no global state is referred to; 1753 * - there is no ioctl method to race against; 1754 * - no further module initialization is required for open to work 1755 * after the device is registered. 1756 */ 1757 static int ucma_open(struct inode *inode, struct file *filp) 1758 { 1759 struct ucma_file *file; 1760 1761 file = kmalloc(sizeof *file, GFP_KERNEL); 1762 if (!file) 1763 return -ENOMEM; 1764 1765 file->close_wq = alloc_ordered_workqueue("ucma_close_id", 1766 WQ_MEM_RECLAIM); 1767 if (!file->close_wq) { 1768 kfree(file); 1769 return -ENOMEM; 1770 } 1771 1772 INIT_LIST_HEAD(&file->event_list); 1773 INIT_LIST_HEAD(&file->ctx_list); 1774 init_waitqueue_head(&file->poll_wait); 1775 mutex_init(&file->mut); 1776 1777 filp->private_data = file; 1778 file->filp = filp; 1779 1780 return stream_open(inode, filp); 1781 } 1782 1783 static int ucma_close(struct inode *inode, struct file *filp) 1784 { 1785 struct ucma_file *file = filp->private_data; 1786 struct ucma_context *ctx, *tmp; 1787 1788 mutex_lock(&file->mut); 1789 list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { 1790 ctx->destroying = 1; 1791 mutex_unlock(&file->mut); 1792 1793 xa_erase(&ctx_table, ctx->id); 1794 flush_workqueue(file->close_wq); 1795 /* At that step once ctx was marked as destroying and workqueue 1796 * was flushed we are safe from any inflights handlers that 1797 * might put other closing task. 1798 */ 1799 xa_lock(&ctx_table); 1800 if (!ctx->closing) { 1801 xa_unlock(&ctx_table); 1802 ucma_put_ctx(ctx); 1803 wait_for_completion(&ctx->comp); 1804 /* rdma_destroy_id ensures that no event handlers are 1805 * inflight for that id before releasing it. 1806 */ 1807 rdma_destroy_id(ctx->cm_id); 1808 } else { 1809 xa_unlock(&ctx_table); 1810 } 1811 1812 ucma_free_ctx(ctx); 1813 mutex_lock(&file->mut); 1814 } 1815 mutex_unlock(&file->mut); 1816 destroy_workqueue(file->close_wq); 1817 kfree(file); 1818 return 0; 1819 } 1820 1821 static const struct file_operations ucma_fops = { 1822 .owner = THIS_MODULE, 1823 .open = ucma_open, 1824 .release = ucma_close, 1825 .write = ucma_write, 1826 .poll = ucma_poll, 1827 .llseek = no_llseek, 1828 }; 1829 1830 static struct miscdevice ucma_misc = { 1831 .minor = MISC_DYNAMIC_MINOR, 1832 .name = "rdma_cm", 1833 .nodename = "infiniband/rdma_cm", 1834 .mode = 0666, 1835 .fops = &ucma_fops, 1836 }; 1837 1838 static int ucma_get_global_nl_info(struct ib_client_nl_info *res) 1839 { 1840 res->abi = RDMA_USER_CM_ABI_VERSION; 1841 res->cdev = ucma_misc.this_device; 1842 return 0; 1843 } 1844 1845 static struct ib_client rdma_cma_client = { 1846 .name = "rdma_cm", 1847 .get_global_nl_info = ucma_get_global_nl_info, 1848 }; 1849 MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); 1850 1851 static ssize_t show_abi_version(struct device *dev, 1852 struct device_attribute *attr, 1853 char *buf) 1854 { 1855 return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1856 } 1857 static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 1858 1859 static int __init ucma_init(void) 1860 { 1861 int ret; 1862 1863 ret = misc_register(&ucma_misc); 1864 if (ret) 1865 return ret; 1866 1867 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1868 if (ret) { 1869 pr_err("rdma_ucm: couldn't create abi_version attr\n"); 1870 goto err1; 1871 } 1872 1873 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1874 if (!ucma_ctl_table_hdr) { 1875 pr_err("rdma_ucm: couldn't register sysctl paths\n"); 1876 ret = -ENOMEM; 1877 goto err2; 1878 } 1879 1880 ret = ib_register_client(&rdma_cma_client); 1881 if (ret) 1882 goto err3; 1883 1884 return 0; 1885 err3: 1886 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1887 err2: 1888 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1889 err1: 1890 misc_deregister(&ucma_misc); 1891 return ret; 1892 } 1893 1894 static void __exit ucma_cleanup(void) 1895 { 1896 ib_unregister_client(&rdma_cma_client); 1897 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1898 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1899 misc_deregister(&ucma_misc); 1900 } 1901 1902 module_init(ucma_init); 1903 module_exit(ucma_cleanup); 1904