1 /* 2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/file.h> 35 #include <linux/mutex.h> 36 #include <linux/poll.h> 37 #include <linux/sched.h> 38 #include <linux/idr.h> 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/miscdevice.h> 42 #include <linux/slab.h> 43 #include <linux/sysctl.h> 44 #include <linux/module.h> 45 #include <linux/nsproxy.h> 46 47 #include <linux/nospec.h> 48 49 #include <rdma/rdma_user_cm.h> 50 #include <rdma/ib_marshall.h> 51 #include <rdma/rdma_cm.h> 52 #include <rdma/rdma_cm_ib.h> 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib.h> 55 #include <rdma/ib_cm.h> 56 #include <rdma/rdma_netlink.h> 57 #include "core_priv.h" 58 59 MODULE_AUTHOR("Sean Hefty"); 60 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 63 static unsigned int max_backlog = 1024; 64 65 static struct ctl_table_header *ucma_ctl_table_hdr; 66 static struct ctl_table ucma_ctl_table[] = { 67 { 68 .procname = "max_backlog", 69 .data = &max_backlog, 70 .maxlen = sizeof max_backlog, 71 .mode = 0644, 72 .proc_handler = proc_dointvec, 73 }, 74 { } 75 }; 76 77 struct ucma_file { 78 struct mutex mut; 79 struct file *filp; 80 struct list_head ctx_list; 81 struct list_head event_list; 82 wait_queue_head_t poll_wait; 83 }; 84 85 struct ucma_context { 86 u32 id; 87 struct completion comp; 88 refcount_t ref; 89 int events_reported; 90 atomic_t backlog; 91 92 struct ucma_file *file; 93 struct rdma_cm_id *cm_id; 94 struct mutex mutex; 95 u64 uid; 96 97 struct list_head list; 98 struct work_struct close_work; 99 }; 100 101 struct ucma_multicast { 102 struct ucma_context *ctx; 103 u32 id; 104 int events_reported; 105 106 u64 uid; 107 u8 join_state; 108 struct sockaddr_storage addr; 109 }; 110 111 struct ucma_event { 112 struct ucma_context *ctx; 113 struct ucma_context *conn_req_ctx; 114 struct ucma_multicast *mc; 115 struct list_head list; 116 struct rdma_ucm_event_resp resp; 117 }; 118 119 static DEFINE_XARRAY_ALLOC(ctx_table); 120 static DEFINE_XARRAY_ALLOC(multicast_table); 121 122 static const struct file_operations ucma_fops; 123 static int ucma_destroy_private_ctx(struct ucma_context *ctx); 124 125 static inline struct ucma_context *_ucma_find_context(int id, 126 struct ucma_file *file) 127 { 128 struct ucma_context *ctx; 129 130 ctx = xa_load(&ctx_table, id); 131 if (!ctx) 132 ctx = ERR_PTR(-ENOENT); 133 else if (ctx->file != file) 134 ctx = ERR_PTR(-EINVAL); 135 return ctx; 136 } 137 138 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) 139 { 140 struct ucma_context *ctx; 141 142 xa_lock(&ctx_table); 143 ctx = _ucma_find_context(id, file); 144 if (!IS_ERR(ctx)) 145 if (!refcount_inc_not_zero(&ctx->ref)) 146 ctx = ERR_PTR(-ENXIO); 147 xa_unlock(&ctx_table); 148 return ctx; 149 } 150 151 static void ucma_put_ctx(struct ucma_context *ctx) 152 { 153 if (refcount_dec_and_test(&ctx->ref)) 154 complete(&ctx->comp); 155 } 156 157 /* 158 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the 159 * CM_ID is bound. 160 */ 161 static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) 162 { 163 struct ucma_context *ctx = ucma_get_ctx(file, id); 164 165 if (IS_ERR(ctx)) 166 return ctx; 167 if (!ctx->cm_id->device) { 168 ucma_put_ctx(ctx); 169 return ERR_PTR(-EINVAL); 170 } 171 return ctx; 172 } 173 174 static void ucma_close_id(struct work_struct *work) 175 { 176 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); 177 178 /* once all inflight tasks are finished, we close all underlying 179 * resources. The context is still alive till its explicit destryoing 180 * by its creator. This puts back the xarray's reference. 181 */ 182 ucma_put_ctx(ctx); 183 wait_for_completion(&ctx->comp); 184 /* No new events will be generated after destroying the id. */ 185 rdma_destroy_id(ctx->cm_id); 186 187 /* Reading the cm_id without holding a positive ref is not allowed */ 188 ctx->cm_id = NULL; 189 } 190 191 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) 192 { 193 struct ucma_context *ctx; 194 195 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 196 if (!ctx) 197 return NULL; 198 199 INIT_WORK(&ctx->close_work, ucma_close_id); 200 init_completion(&ctx->comp); 201 /* So list_del() will work if we don't do ucma_finish_ctx() */ 202 INIT_LIST_HEAD(&ctx->list); 203 ctx->file = file; 204 mutex_init(&ctx->mutex); 205 206 if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) { 207 kfree(ctx); 208 return NULL; 209 } 210 return ctx; 211 } 212 213 static void ucma_set_ctx_cm_id(struct ucma_context *ctx, 214 struct rdma_cm_id *cm_id) 215 { 216 refcount_set(&ctx->ref, 1); 217 ctx->cm_id = cm_id; 218 } 219 220 static void ucma_finish_ctx(struct ucma_context *ctx) 221 { 222 lockdep_assert_held(&ctx->file->mut); 223 list_add_tail(&ctx->list, &ctx->file->ctx_list); 224 xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL); 225 } 226 227 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 228 struct rdma_conn_param *src) 229 { 230 if (src->private_data_len) 231 memcpy(dst->private_data, src->private_data, 232 src->private_data_len); 233 dst->private_data_len = src->private_data_len; 234 dst->responder_resources = src->responder_resources; 235 dst->initiator_depth = src->initiator_depth; 236 dst->flow_control = src->flow_control; 237 dst->retry_count = src->retry_count; 238 dst->rnr_retry_count = src->rnr_retry_count; 239 dst->srq = src->srq; 240 dst->qp_num = src->qp_num; 241 } 242 243 static void ucma_copy_ud_event(struct ib_device *device, 244 struct rdma_ucm_ud_param *dst, 245 struct rdma_ud_param *src) 246 { 247 if (src->private_data_len) 248 memcpy(dst->private_data, src->private_data, 249 src->private_data_len); 250 dst->private_data_len = src->private_data_len; 251 ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); 252 dst->qp_num = src->qp_num; 253 dst->qkey = src->qkey; 254 } 255 256 static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, 257 struct rdma_cm_event *event) 258 { 259 struct ucma_event *uevent; 260 261 uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); 262 if (!uevent) 263 return NULL; 264 265 uevent->ctx = ctx; 266 switch (event->event) { 267 case RDMA_CM_EVENT_MULTICAST_JOIN: 268 case RDMA_CM_EVENT_MULTICAST_ERROR: 269 uevent->mc = (struct ucma_multicast *) 270 event->param.ud.private_data; 271 uevent->resp.uid = uevent->mc->uid; 272 uevent->resp.id = uevent->mc->id; 273 break; 274 default: 275 uevent->resp.uid = ctx->uid; 276 uevent->resp.id = ctx->id; 277 break; 278 } 279 uevent->resp.event = event->event; 280 uevent->resp.status = event->status; 281 if (ctx->cm_id->qp_type == IB_QPT_UD) 282 ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, 283 &event->param.ud); 284 else 285 ucma_copy_conn_event(&uevent->resp.param.conn, 286 &event->param.conn); 287 288 uevent->resp.ece.vendor_id = event->ece.vendor_id; 289 uevent->resp.ece.attr_mod = event->ece.attr_mod; 290 return uevent; 291 } 292 293 static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, 294 struct rdma_cm_event *event) 295 { 296 struct ucma_context *listen_ctx = cm_id->context; 297 struct ucma_context *ctx; 298 struct ucma_event *uevent; 299 300 if (!atomic_add_unless(&listen_ctx->backlog, -1, 0)) 301 return -ENOMEM; 302 ctx = ucma_alloc_ctx(listen_ctx->file); 303 if (!ctx) 304 goto err_backlog; 305 ucma_set_ctx_cm_id(ctx, cm_id); 306 307 uevent = ucma_create_uevent(listen_ctx, event); 308 if (!uevent) 309 goto err_alloc; 310 uevent->conn_req_ctx = ctx; 311 uevent->resp.id = ctx->id; 312 313 ctx->cm_id->context = ctx; 314 315 mutex_lock(&ctx->file->mut); 316 ucma_finish_ctx(ctx); 317 list_add_tail(&uevent->list, &ctx->file->event_list); 318 mutex_unlock(&ctx->file->mut); 319 wake_up_interruptible(&ctx->file->poll_wait); 320 return 0; 321 322 err_alloc: 323 ucma_destroy_private_ctx(ctx); 324 err_backlog: 325 atomic_inc(&listen_ctx->backlog); 326 /* Returning error causes the new ID to be destroyed */ 327 return -ENOMEM; 328 } 329 330 static int ucma_event_handler(struct rdma_cm_id *cm_id, 331 struct rdma_cm_event *event) 332 { 333 struct ucma_event *uevent; 334 struct ucma_context *ctx = cm_id->context; 335 336 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) 337 return ucma_connect_event_handler(cm_id, event); 338 339 /* 340 * We ignore events for new connections until userspace has set their 341 * context. This can only happen if an error occurs on a new connection 342 * before the user accepts it. This is okay, since the accept will just 343 * fail later. However, we do need to release the underlying HW 344 * resources in case of a device removal event. 345 */ 346 if (ctx->uid) { 347 uevent = ucma_create_uevent(ctx, event); 348 if (!uevent) 349 return 0; 350 351 mutex_lock(&ctx->file->mut); 352 list_add_tail(&uevent->list, &ctx->file->event_list); 353 mutex_unlock(&ctx->file->mut); 354 wake_up_interruptible(&ctx->file->poll_wait); 355 } 356 357 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { 358 xa_lock(&ctx_table); 359 if (xa_load(&ctx_table, ctx->id) == ctx) 360 queue_work(system_unbound_wq, &ctx->close_work); 361 xa_unlock(&ctx_table); 362 } 363 return 0; 364 } 365 366 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, 367 int in_len, int out_len) 368 { 369 struct rdma_ucm_get_event cmd; 370 struct ucma_event *uevent; 371 372 /* 373 * Old 32 bit user space does not send the 4 byte padding in the 374 * reserved field. We don't care, allow it to keep working. 375 */ 376 if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) - 377 sizeof(uevent->resp.ece)) 378 return -ENOSPC; 379 380 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 381 return -EFAULT; 382 383 mutex_lock(&file->mut); 384 while (list_empty(&file->event_list)) { 385 mutex_unlock(&file->mut); 386 387 if (file->filp->f_flags & O_NONBLOCK) 388 return -EAGAIN; 389 390 if (wait_event_interruptible(file->poll_wait, 391 !list_empty(&file->event_list))) 392 return -ERESTARTSYS; 393 394 mutex_lock(&file->mut); 395 } 396 397 uevent = list_first_entry(&file->event_list, struct ucma_event, list); 398 399 if (copy_to_user(u64_to_user_ptr(cmd.response), 400 &uevent->resp, 401 min_t(size_t, out_len, sizeof(uevent->resp)))) { 402 mutex_unlock(&file->mut); 403 return -EFAULT; 404 } 405 406 list_del(&uevent->list); 407 uevent->ctx->events_reported++; 408 if (uevent->mc) 409 uevent->mc->events_reported++; 410 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) 411 atomic_inc(&uevent->ctx->backlog); 412 mutex_unlock(&file->mut); 413 414 kfree(uevent); 415 return 0; 416 } 417 418 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) 419 { 420 switch (cmd->ps) { 421 case RDMA_PS_TCP: 422 *qp_type = IB_QPT_RC; 423 return 0; 424 case RDMA_PS_UDP: 425 case RDMA_PS_IPOIB: 426 *qp_type = IB_QPT_UD; 427 return 0; 428 case RDMA_PS_IB: 429 *qp_type = cmd->qp_type; 430 return 0; 431 default: 432 return -EINVAL; 433 } 434 } 435 436 static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, 437 int in_len, int out_len) 438 { 439 struct rdma_ucm_create_id cmd; 440 struct rdma_ucm_create_id_resp resp; 441 struct ucma_context *ctx; 442 struct rdma_cm_id *cm_id; 443 enum ib_qp_type qp_type; 444 int ret; 445 446 if (out_len < sizeof(resp)) 447 return -ENOSPC; 448 449 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 450 return -EFAULT; 451 452 ret = ucma_get_qp_type(&cmd, &qp_type); 453 if (ret) 454 return ret; 455 456 ctx = ucma_alloc_ctx(file); 457 if (!ctx) 458 return -ENOMEM; 459 460 ctx->uid = cmd.uid; 461 cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type); 462 if (IS_ERR(cm_id)) { 463 ret = PTR_ERR(cm_id); 464 goto err1; 465 } 466 ucma_set_ctx_cm_id(ctx, cm_id); 467 468 resp.id = ctx->id; 469 if (copy_to_user(u64_to_user_ptr(cmd.response), 470 &resp, sizeof(resp))) { 471 ret = -EFAULT; 472 goto err1; 473 } 474 475 mutex_lock(&file->mut); 476 ucma_finish_ctx(ctx); 477 mutex_unlock(&file->mut); 478 return 0; 479 480 err1: 481 ucma_destroy_private_ctx(ctx); 482 return ret; 483 } 484 485 static void ucma_cleanup_multicast(struct ucma_context *ctx) 486 { 487 struct ucma_multicast *mc; 488 unsigned long index; 489 490 xa_for_each(&multicast_table, index, mc) { 491 if (mc->ctx != ctx) 492 continue; 493 /* 494 * At this point mc->ctx->ref is 0 so the mc cannot leave the 495 * lock on the reader and this is enough serialization 496 */ 497 xa_erase(&multicast_table, index); 498 kfree(mc); 499 } 500 } 501 502 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 503 { 504 struct ucma_event *uevent, *tmp; 505 506 rdma_lock_handler(mc->ctx->cm_id); 507 mutex_lock(&mc->ctx->file->mut); 508 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 509 if (uevent->mc != mc) 510 continue; 511 512 list_del(&uevent->list); 513 kfree(uevent); 514 } 515 mutex_unlock(&mc->ctx->file->mut); 516 rdma_unlock_handler(mc->ctx->cm_id); 517 } 518 519 static int ucma_cleanup_ctx_events(struct ucma_context *ctx) 520 { 521 int events_reported; 522 struct ucma_event *uevent, *tmp; 523 LIST_HEAD(list); 524 525 /* Cleanup events not yet reported to the user.*/ 526 mutex_lock(&ctx->file->mut); 527 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 528 if (uevent->ctx != ctx) 529 continue; 530 531 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && 532 xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, 533 uevent->conn_req_ctx, XA_ZERO_ENTRY, 534 GFP_KERNEL) == uevent->conn_req_ctx) { 535 list_move_tail(&uevent->list, &list); 536 continue; 537 } 538 list_del(&uevent->list); 539 kfree(uevent); 540 } 541 list_del(&ctx->list); 542 events_reported = ctx->events_reported; 543 mutex_unlock(&ctx->file->mut); 544 545 /* 546 * If this was a listening ID then any connections spawned from it that 547 * have not been delivered to userspace are cleaned up too. Must be done 548 * outside any locks. 549 */ 550 list_for_each_entry_safe(uevent, tmp, &list, list) { 551 ucma_destroy_private_ctx(uevent->conn_req_ctx); 552 kfree(uevent); 553 } 554 return events_reported; 555 } 556 557 /* 558 * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie 559 * the ctx is not public to the user). This either because: 560 * - ucma_finish_ctx() hasn't been called 561 * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) 562 */ 563 static int ucma_destroy_private_ctx(struct ucma_context *ctx) 564 { 565 int events_reported; 566 567 /* 568 * Destroy the underlying cm_id. New work queuing is prevented now by 569 * the removal from the xarray. Once the work is cancled ref will either 570 * be 0 because the work ran to completion and consumed the ref from the 571 * xarray, or it will be positive because we still have the ref from the 572 * xarray. This can also be 0 in cases where cm_id was never set 573 */ 574 cancel_work_sync(&ctx->close_work); 575 if (refcount_read(&ctx->ref)) 576 ucma_close_id(&ctx->close_work); 577 578 events_reported = ucma_cleanup_ctx_events(ctx); 579 ucma_cleanup_multicast(ctx); 580 581 WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, 582 GFP_KERNEL) != NULL); 583 mutex_destroy(&ctx->mutex); 584 kfree(ctx); 585 return events_reported; 586 } 587 588 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, 589 int in_len, int out_len) 590 { 591 struct rdma_ucm_destroy_id cmd; 592 struct rdma_ucm_destroy_id_resp resp; 593 struct ucma_context *ctx; 594 int ret = 0; 595 596 if (out_len < sizeof(resp)) 597 return -ENOSPC; 598 599 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 600 return -EFAULT; 601 602 xa_lock(&ctx_table); 603 ctx = _ucma_find_context(cmd.id, file); 604 if (!IS_ERR(ctx)) { 605 if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 606 GFP_KERNEL) != ctx) 607 ctx = ERR_PTR(-ENOENT); 608 } 609 xa_unlock(&ctx_table); 610 611 if (IS_ERR(ctx)) 612 return PTR_ERR(ctx); 613 614 resp.events_reported = ucma_destroy_private_ctx(ctx); 615 if (copy_to_user(u64_to_user_ptr(cmd.response), 616 &resp, sizeof(resp))) 617 ret = -EFAULT; 618 619 return ret; 620 } 621 622 static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, 623 int in_len, int out_len) 624 { 625 struct rdma_ucm_bind_ip cmd; 626 struct ucma_context *ctx; 627 int ret; 628 629 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 630 return -EFAULT; 631 632 if (!rdma_addr_size_in6(&cmd.addr)) 633 return -EINVAL; 634 635 ctx = ucma_get_ctx(file, cmd.id); 636 if (IS_ERR(ctx)) 637 return PTR_ERR(ctx); 638 639 mutex_lock(&ctx->mutex); 640 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 641 mutex_unlock(&ctx->mutex); 642 643 ucma_put_ctx(ctx); 644 return ret; 645 } 646 647 static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, 648 int in_len, int out_len) 649 { 650 struct rdma_ucm_bind cmd; 651 struct ucma_context *ctx; 652 int ret; 653 654 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 655 return -EFAULT; 656 657 if (cmd.reserved || !cmd.addr_size || 658 cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) 659 return -EINVAL; 660 661 ctx = ucma_get_ctx(file, cmd.id); 662 if (IS_ERR(ctx)) 663 return PTR_ERR(ctx); 664 665 mutex_lock(&ctx->mutex); 666 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 667 mutex_unlock(&ctx->mutex); 668 ucma_put_ctx(ctx); 669 return ret; 670 } 671 672 static ssize_t ucma_resolve_ip(struct ucma_file *file, 673 const char __user *inbuf, 674 int in_len, int out_len) 675 { 676 struct rdma_ucm_resolve_ip cmd; 677 struct ucma_context *ctx; 678 int ret; 679 680 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 681 return -EFAULT; 682 683 if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || 684 !rdma_addr_size_in6(&cmd.dst_addr)) 685 return -EINVAL; 686 687 ctx = ucma_get_ctx(file, cmd.id); 688 if (IS_ERR(ctx)) 689 return PTR_ERR(ctx); 690 691 mutex_lock(&ctx->mutex); 692 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 693 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 694 mutex_unlock(&ctx->mutex); 695 ucma_put_ctx(ctx); 696 return ret; 697 } 698 699 static ssize_t ucma_resolve_addr(struct ucma_file *file, 700 const char __user *inbuf, 701 int in_len, int out_len) 702 { 703 struct rdma_ucm_resolve_addr cmd; 704 struct ucma_context *ctx; 705 int ret; 706 707 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 708 return -EFAULT; 709 710 if (cmd.reserved || 711 (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || 712 !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) 713 return -EINVAL; 714 715 ctx = ucma_get_ctx(file, cmd.id); 716 if (IS_ERR(ctx)) 717 return PTR_ERR(ctx); 718 719 mutex_lock(&ctx->mutex); 720 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 721 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 722 mutex_unlock(&ctx->mutex); 723 ucma_put_ctx(ctx); 724 return ret; 725 } 726 727 static ssize_t ucma_resolve_route(struct ucma_file *file, 728 const char __user *inbuf, 729 int in_len, int out_len) 730 { 731 struct rdma_ucm_resolve_route cmd; 732 struct ucma_context *ctx; 733 int ret; 734 735 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 736 return -EFAULT; 737 738 ctx = ucma_get_ctx_dev(file, cmd.id); 739 if (IS_ERR(ctx)) 740 return PTR_ERR(ctx); 741 742 mutex_lock(&ctx->mutex); 743 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 744 mutex_unlock(&ctx->mutex); 745 ucma_put_ctx(ctx); 746 return ret; 747 } 748 749 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, 750 struct rdma_route *route) 751 { 752 struct rdma_dev_addr *dev_addr; 753 754 resp->num_paths = route->num_paths; 755 switch (route->num_paths) { 756 case 0: 757 dev_addr = &route->addr.dev_addr; 758 rdma_addr_get_dgid(dev_addr, 759 (union ib_gid *) &resp->ib_route[0].dgid); 760 rdma_addr_get_sgid(dev_addr, 761 (union ib_gid *) &resp->ib_route[0].sgid); 762 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 763 break; 764 case 2: 765 ib_copy_path_rec_to_user(&resp->ib_route[1], 766 &route->path_rec[1]); 767 fallthrough; 768 case 1: 769 ib_copy_path_rec_to_user(&resp->ib_route[0], 770 &route->path_rec[0]); 771 break; 772 default: 773 break; 774 } 775 } 776 777 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 778 struct rdma_route *route) 779 { 780 781 resp->num_paths = route->num_paths; 782 switch (route->num_paths) { 783 case 0: 784 rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 785 (union ib_gid *)&resp->ib_route[0].dgid); 786 rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 787 (union ib_gid *)&resp->ib_route[0].sgid); 788 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 789 break; 790 case 2: 791 ib_copy_path_rec_to_user(&resp->ib_route[1], 792 &route->path_rec[1]); 793 fallthrough; 794 case 1: 795 ib_copy_path_rec_to_user(&resp->ib_route[0], 796 &route->path_rec[0]); 797 break; 798 default: 799 break; 800 } 801 } 802 803 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, 804 struct rdma_route *route) 805 { 806 struct rdma_dev_addr *dev_addr; 807 808 dev_addr = &route->addr.dev_addr; 809 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); 810 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); 811 } 812 813 static ssize_t ucma_query_route(struct ucma_file *file, 814 const char __user *inbuf, 815 int in_len, int out_len) 816 { 817 struct rdma_ucm_query cmd; 818 struct rdma_ucm_query_route_resp resp; 819 struct ucma_context *ctx; 820 struct sockaddr *addr; 821 int ret = 0; 822 823 if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index)) 824 return -ENOSPC; 825 826 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 827 return -EFAULT; 828 829 ctx = ucma_get_ctx(file, cmd.id); 830 if (IS_ERR(ctx)) 831 return PTR_ERR(ctx); 832 833 mutex_lock(&ctx->mutex); 834 memset(&resp, 0, sizeof resp); 835 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 836 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? 837 sizeof(struct sockaddr_in) : 838 sizeof(struct sockaddr_in6)); 839 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 840 memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? 841 sizeof(struct sockaddr_in) : 842 sizeof(struct sockaddr_in6)); 843 if (!ctx->cm_id->device) 844 goto out; 845 846 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 847 resp.ibdev_index = ctx->cm_id->device->index; 848 resp.port_num = ctx->cm_id->port_num; 849 850 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) 851 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 852 else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) 853 ucma_copy_iboe_route(&resp, &ctx->cm_id->route); 854 else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) 855 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 856 857 out: 858 mutex_unlock(&ctx->mutex); 859 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, 860 min_t(size_t, out_len, sizeof(resp)))) 861 ret = -EFAULT; 862 863 ucma_put_ctx(ctx); 864 return ret; 865 } 866 867 static void ucma_query_device_addr(struct rdma_cm_id *cm_id, 868 struct rdma_ucm_query_addr_resp *resp) 869 { 870 if (!cm_id->device) 871 return; 872 873 resp->node_guid = (__force __u64) cm_id->device->node_guid; 874 resp->ibdev_index = cm_id->device->index; 875 resp->port_num = cm_id->port_num; 876 resp->pkey = (__force __u16) cpu_to_be16( 877 ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); 878 } 879 880 static ssize_t ucma_query_addr(struct ucma_context *ctx, 881 void __user *response, int out_len) 882 { 883 struct rdma_ucm_query_addr_resp resp; 884 struct sockaddr *addr; 885 int ret = 0; 886 887 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 888 return -ENOSPC; 889 890 memset(&resp, 0, sizeof resp); 891 892 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 893 resp.src_size = rdma_addr_size(addr); 894 memcpy(&resp.src_addr, addr, resp.src_size); 895 896 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 897 resp.dst_size = rdma_addr_size(addr); 898 memcpy(&resp.dst_addr, addr, resp.dst_size); 899 900 ucma_query_device_addr(ctx->cm_id, &resp); 901 902 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 903 ret = -EFAULT; 904 905 return ret; 906 } 907 908 static ssize_t ucma_query_path(struct ucma_context *ctx, 909 void __user *response, int out_len) 910 { 911 struct rdma_ucm_query_path_resp *resp; 912 int i, ret = 0; 913 914 if (out_len < sizeof(*resp)) 915 return -ENOSPC; 916 917 resp = kzalloc(out_len, GFP_KERNEL); 918 if (!resp) 919 return -ENOMEM; 920 921 resp->num_paths = ctx->cm_id->route.num_paths; 922 for (i = 0, out_len -= sizeof(*resp); 923 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); 924 i++, out_len -= sizeof(struct ib_path_rec_data)) { 925 struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; 926 927 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 928 IB_PATH_BIDIRECTIONAL; 929 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { 930 struct sa_path_rec ib; 931 932 sa_convert_path_opa_to_ib(&ib, rec); 933 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 934 935 } else { 936 ib_sa_pack_path(rec, &resp->path_data[i].path_rec); 937 } 938 } 939 940 if (copy_to_user(response, resp, struct_size(resp, path_data, i))) 941 ret = -EFAULT; 942 943 kfree(resp); 944 return ret; 945 } 946 947 static ssize_t ucma_query_gid(struct ucma_context *ctx, 948 void __user *response, int out_len) 949 { 950 struct rdma_ucm_query_addr_resp resp; 951 struct sockaddr_ib *addr; 952 int ret = 0; 953 954 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 955 return -ENOSPC; 956 957 memset(&resp, 0, sizeof resp); 958 959 ucma_query_device_addr(ctx->cm_id, &resp); 960 961 addr = (struct sockaddr_ib *) &resp.src_addr; 962 resp.src_size = sizeof(*addr); 963 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { 964 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); 965 } else { 966 addr->sib_family = AF_IB; 967 addr->sib_pkey = (__force __be16) resp.pkey; 968 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, 969 NULL); 970 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 971 &ctx->cm_id->route.addr.src_addr); 972 } 973 974 addr = (struct sockaddr_ib *) &resp.dst_addr; 975 resp.dst_size = sizeof(*addr); 976 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { 977 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); 978 } else { 979 addr->sib_family = AF_IB; 980 addr->sib_pkey = (__force __be16) resp.pkey; 981 rdma_read_gids(ctx->cm_id, NULL, 982 (union ib_gid *)&addr->sib_addr); 983 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 984 &ctx->cm_id->route.addr.dst_addr); 985 } 986 987 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 988 ret = -EFAULT; 989 990 return ret; 991 } 992 993 static ssize_t ucma_query(struct ucma_file *file, 994 const char __user *inbuf, 995 int in_len, int out_len) 996 { 997 struct rdma_ucm_query cmd; 998 struct ucma_context *ctx; 999 void __user *response; 1000 int ret; 1001 1002 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1003 return -EFAULT; 1004 1005 response = u64_to_user_ptr(cmd.response); 1006 ctx = ucma_get_ctx(file, cmd.id); 1007 if (IS_ERR(ctx)) 1008 return PTR_ERR(ctx); 1009 1010 mutex_lock(&ctx->mutex); 1011 switch (cmd.option) { 1012 case RDMA_USER_CM_QUERY_ADDR: 1013 ret = ucma_query_addr(ctx, response, out_len); 1014 break; 1015 case RDMA_USER_CM_QUERY_PATH: 1016 ret = ucma_query_path(ctx, response, out_len); 1017 break; 1018 case RDMA_USER_CM_QUERY_GID: 1019 ret = ucma_query_gid(ctx, response, out_len); 1020 break; 1021 default: 1022 ret = -ENOSYS; 1023 break; 1024 } 1025 mutex_unlock(&ctx->mutex); 1026 1027 ucma_put_ctx(ctx); 1028 return ret; 1029 } 1030 1031 static void ucma_copy_conn_param(struct rdma_cm_id *id, 1032 struct rdma_conn_param *dst, 1033 struct rdma_ucm_conn_param *src) 1034 { 1035 dst->private_data = src->private_data; 1036 dst->private_data_len = src->private_data_len; 1037 dst->responder_resources = src->responder_resources; 1038 dst->initiator_depth = src->initiator_depth; 1039 dst->flow_control = src->flow_control; 1040 dst->retry_count = src->retry_count; 1041 dst->rnr_retry_count = src->rnr_retry_count; 1042 dst->srq = src->srq; 1043 dst->qp_num = src->qp_num & 0xFFFFFF; 1044 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; 1045 } 1046 1047 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 1048 int in_len, int out_len) 1049 { 1050 struct rdma_conn_param conn_param; 1051 struct rdma_ucm_ece ece = {}; 1052 struct rdma_ucm_connect cmd; 1053 struct ucma_context *ctx; 1054 size_t in_size; 1055 int ret; 1056 1057 if (in_len < offsetofend(typeof(cmd), reserved)) 1058 return -EINVAL; 1059 in_size = min_t(size_t, in_len, sizeof(cmd)); 1060 if (copy_from_user(&cmd, inbuf, in_size)) 1061 return -EFAULT; 1062 1063 if (!cmd.conn_param.valid) 1064 return -EINVAL; 1065 1066 ctx = ucma_get_ctx_dev(file, cmd.id); 1067 if (IS_ERR(ctx)) 1068 return PTR_ERR(ctx); 1069 1070 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1071 if (offsetofend(typeof(cmd), ece) <= in_size) { 1072 ece.vendor_id = cmd.ece.vendor_id; 1073 ece.attr_mod = cmd.ece.attr_mod; 1074 } 1075 1076 mutex_lock(&ctx->mutex); 1077 ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece); 1078 mutex_unlock(&ctx->mutex); 1079 ucma_put_ctx(ctx); 1080 return ret; 1081 } 1082 1083 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, 1084 int in_len, int out_len) 1085 { 1086 struct rdma_ucm_listen cmd; 1087 struct ucma_context *ctx; 1088 int ret; 1089 1090 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1091 return -EFAULT; 1092 1093 ctx = ucma_get_ctx(file, cmd.id); 1094 if (IS_ERR(ctx)) 1095 return PTR_ERR(ctx); 1096 1097 if (cmd.backlog <= 0 || cmd.backlog > max_backlog) 1098 cmd.backlog = max_backlog; 1099 atomic_set(&ctx->backlog, cmd.backlog); 1100 1101 mutex_lock(&ctx->mutex); 1102 ret = rdma_listen(ctx->cm_id, cmd.backlog); 1103 mutex_unlock(&ctx->mutex); 1104 ucma_put_ctx(ctx); 1105 return ret; 1106 } 1107 1108 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, 1109 int in_len, int out_len) 1110 { 1111 struct rdma_ucm_accept cmd; 1112 struct rdma_conn_param conn_param; 1113 struct rdma_ucm_ece ece = {}; 1114 struct ucma_context *ctx; 1115 size_t in_size; 1116 int ret; 1117 1118 if (in_len < offsetofend(typeof(cmd), reserved)) 1119 return -EINVAL; 1120 in_size = min_t(size_t, in_len, sizeof(cmd)); 1121 if (copy_from_user(&cmd, inbuf, in_size)) 1122 return -EFAULT; 1123 1124 ctx = ucma_get_ctx_dev(file, cmd.id); 1125 if (IS_ERR(ctx)) 1126 return PTR_ERR(ctx); 1127 1128 if (offsetofend(typeof(cmd), ece) <= in_size) { 1129 ece.vendor_id = cmd.ece.vendor_id; 1130 ece.attr_mod = cmd.ece.attr_mod; 1131 } 1132 1133 if (cmd.conn_param.valid) { 1134 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1135 mutex_lock(&ctx->mutex); 1136 rdma_lock_handler(ctx->cm_id); 1137 ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece); 1138 if (!ret) { 1139 /* The uid must be set atomically with the handler */ 1140 ctx->uid = cmd.uid; 1141 } 1142 rdma_unlock_handler(ctx->cm_id); 1143 mutex_unlock(&ctx->mutex); 1144 } else { 1145 mutex_lock(&ctx->mutex); 1146 rdma_lock_handler(ctx->cm_id); 1147 ret = rdma_accept_ece(ctx->cm_id, NULL, &ece); 1148 rdma_unlock_handler(ctx->cm_id); 1149 mutex_unlock(&ctx->mutex); 1150 } 1151 ucma_put_ctx(ctx); 1152 return ret; 1153 } 1154 1155 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, 1156 int in_len, int out_len) 1157 { 1158 struct rdma_ucm_reject cmd; 1159 struct ucma_context *ctx; 1160 int ret; 1161 1162 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1163 return -EFAULT; 1164 1165 if (!cmd.reason) 1166 cmd.reason = IB_CM_REJ_CONSUMER_DEFINED; 1167 1168 switch (cmd.reason) { 1169 case IB_CM_REJ_CONSUMER_DEFINED: 1170 case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED: 1171 break; 1172 default: 1173 return -EINVAL; 1174 } 1175 1176 ctx = ucma_get_ctx_dev(file, cmd.id); 1177 if (IS_ERR(ctx)) 1178 return PTR_ERR(ctx); 1179 1180 mutex_lock(&ctx->mutex); 1181 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len, 1182 cmd.reason); 1183 mutex_unlock(&ctx->mutex); 1184 ucma_put_ctx(ctx); 1185 return ret; 1186 } 1187 1188 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, 1189 int in_len, int out_len) 1190 { 1191 struct rdma_ucm_disconnect cmd; 1192 struct ucma_context *ctx; 1193 int ret; 1194 1195 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1196 return -EFAULT; 1197 1198 ctx = ucma_get_ctx_dev(file, cmd.id); 1199 if (IS_ERR(ctx)) 1200 return PTR_ERR(ctx); 1201 1202 mutex_lock(&ctx->mutex); 1203 ret = rdma_disconnect(ctx->cm_id); 1204 mutex_unlock(&ctx->mutex); 1205 ucma_put_ctx(ctx); 1206 return ret; 1207 } 1208 1209 static ssize_t ucma_init_qp_attr(struct ucma_file *file, 1210 const char __user *inbuf, 1211 int in_len, int out_len) 1212 { 1213 struct rdma_ucm_init_qp_attr cmd; 1214 struct ib_uverbs_qp_attr resp; 1215 struct ucma_context *ctx; 1216 struct ib_qp_attr qp_attr; 1217 int ret; 1218 1219 if (out_len < sizeof(resp)) 1220 return -ENOSPC; 1221 1222 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1223 return -EFAULT; 1224 1225 if (cmd.qp_state > IB_QPS_ERR) 1226 return -EINVAL; 1227 1228 ctx = ucma_get_ctx_dev(file, cmd.id); 1229 if (IS_ERR(ctx)) 1230 return PTR_ERR(ctx); 1231 1232 resp.qp_attr_mask = 0; 1233 memset(&qp_attr, 0, sizeof qp_attr); 1234 qp_attr.qp_state = cmd.qp_state; 1235 mutex_lock(&ctx->mutex); 1236 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1237 mutex_unlock(&ctx->mutex); 1238 if (ret) 1239 goto out; 1240 1241 ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); 1242 if (copy_to_user(u64_to_user_ptr(cmd.response), 1243 &resp, sizeof(resp))) 1244 ret = -EFAULT; 1245 1246 out: 1247 ucma_put_ctx(ctx); 1248 return ret; 1249 } 1250 1251 static int ucma_set_option_id(struct ucma_context *ctx, int optname, 1252 void *optval, size_t optlen) 1253 { 1254 int ret = 0; 1255 1256 switch (optname) { 1257 case RDMA_OPTION_ID_TOS: 1258 if (optlen != sizeof(u8)) { 1259 ret = -EINVAL; 1260 break; 1261 } 1262 rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); 1263 break; 1264 case RDMA_OPTION_ID_REUSEADDR: 1265 if (optlen != sizeof(int)) { 1266 ret = -EINVAL; 1267 break; 1268 } 1269 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 1270 break; 1271 case RDMA_OPTION_ID_AFONLY: 1272 if (optlen != sizeof(int)) { 1273 ret = -EINVAL; 1274 break; 1275 } 1276 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); 1277 break; 1278 case RDMA_OPTION_ID_ACK_TIMEOUT: 1279 if (optlen != sizeof(u8)) { 1280 ret = -EINVAL; 1281 break; 1282 } 1283 ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); 1284 break; 1285 default: 1286 ret = -ENOSYS; 1287 } 1288 1289 return ret; 1290 } 1291 1292 static int ucma_set_ib_path(struct ucma_context *ctx, 1293 struct ib_path_rec_data *path_data, size_t optlen) 1294 { 1295 struct sa_path_rec sa_path; 1296 struct rdma_cm_event event; 1297 int ret; 1298 1299 if (optlen % sizeof(*path_data)) 1300 return -EINVAL; 1301 1302 for (; optlen; optlen -= sizeof(*path_data), path_data++) { 1303 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | 1304 IB_PATH_BIDIRECTIONAL)) 1305 break; 1306 } 1307 1308 if (!optlen) 1309 return -EINVAL; 1310 1311 if (!ctx->cm_id->device) 1312 return -EINVAL; 1313 1314 memset(&sa_path, 0, sizeof(sa_path)); 1315 1316 sa_path.rec_type = SA_PATH_REC_TYPE_IB; 1317 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1318 1319 if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { 1320 struct sa_path_rec opa; 1321 1322 sa_convert_path_ib_to_opa(&opa, &sa_path); 1323 mutex_lock(&ctx->mutex); 1324 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1325 mutex_unlock(&ctx->mutex); 1326 } else { 1327 mutex_lock(&ctx->mutex); 1328 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1329 mutex_unlock(&ctx->mutex); 1330 } 1331 if (ret) 1332 return ret; 1333 1334 memset(&event, 0, sizeof event); 1335 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 1336 return ucma_event_handler(ctx->cm_id, &event); 1337 } 1338 1339 static int ucma_set_option_ib(struct ucma_context *ctx, int optname, 1340 void *optval, size_t optlen) 1341 { 1342 int ret; 1343 1344 switch (optname) { 1345 case RDMA_OPTION_IB_PATH: 1346 ret = ucma_set_ib_path(ctx, optval, optlen); 1347 break; 1348 default: 1349 ret = -ENOSYS; 1350 } 1351 1352 return ret; 1353 } 1354 1355 static int ucma_set_option_level(struct ucma_context *ctx, int level, 1356 int optname, void *optval, size_t optlen) 1357 { 1358 int ret; 1359 1360 switch (level) { 1361 case RDMA_OPTION_ID: 1362 mutex_lock(&ctx->mutex); 1363 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1364 mutex_unlock(&ctx->mutex); 1365 break; 1366 case RDMA_OPTION_IB: 1367 ret = ucma_set_option_ib(ctx, optname, optval, optlen); 1368 break; 1369 default: 1370 ret = -ENOSYS; 1371 } 1372 1373 return ret; 1374 } 1375 1376 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, 1377 int in_len, int out_len) 1378 { 1379 struct rdma_ucm_set_option cmd; 1380 struct ucma_context *ctx; 1381 void *optval; 1382 int ret; 1383 1384 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1385 return -EFAULT; 1386 1387 if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) 1388 return -EINVAL; 1389 1390 ctx = ucma_get_ctx(file, cmd.id); 1391 if (IS_ERR(ctx)) 1392 return PTR_ERR(ctx); 1393 1394 optval = memdup_user(u64_to_user_ptr(cmd.optval), 1395 cmd.optlen); 1396 if (IS_ERR(optval)) { 1397 ret = PTR_ERR(optval); 1398 goto out; 1399 } 1400 1401 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1402 cmd.optlen); 1403 kfree(optval); 1404 1405 out: 1406 ucma_put_ctx(ctx); 1407 return ret; 1408 } 1409 1410 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 1411 int in_len, int out_len) 1412 { 1413 struct rdma_ucm_notify cmd; 1414 struct ucma_context *ctx; 1415 int ret = -EINVAL; 1416 1417 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1418 return -EFAULT; 1419 1420 ctx = ucma_get_ctx(file, cmd.id); 1421 if (IS_ERR(ctx)) 1422 return PTR_ERR(ctx); 1423 1424 mutex_lock(&ctx->mutex); 1425 if (ctx->cm_id->device) 1426 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1427 mutex_unlock(&ctx->mutex); 1428 1429 ucma_put_ctx(ctx); 1430 return ret; 1431 } 1432 1433 static ssize_t ucma_process_join(struct ucma_file *file, 1434 struct rdma_ucm_join_mcast *cmd, int out_len) 1435 { 1436 struct rdma_ucm_create_id_resp resp; 1437 struct ucma_context *ctx; 1438 struct ucma_multicast *mc; 1439 struct sockaddr *addr; 1440 int ret; 1441 u8 join_state; 1442 1443 if (out_len < sizeof(resp)) 1444 return -ENOSPC; 1445 1446 addr = (struct sockaddr *) &cmd->addr; 1447 if (cmd->addr_size != rdma_addr_size(addr)) 1448 return -EINVAL; 1449 1450 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1451 join_state = BIT(FULLMEMBER_JOIN); 1452 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1453 join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1454 else 1455 return -EINVAL; 1456 1457 ctx = ucma_get_ctx_dev(file, cmd->id); 1458 if (IS_ERR(ctx)) 1459 return PTR_ERR(ctx); 1460 1461 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 1462 if (!mc) { 1463 ret = -ENOMEM; 1464 goto err_put_ctx; 1465 } 1466 1467 mc->ctx = ctx; 1468 mc->join_state = join_state; 1469 mc->uid = cmd->uid; 1470 memcpy(&mc->addr, addr, cmd->addr_size); 1471 1472 if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1473 GFP_KERNEL)) { 1474 ret = -ENOMEM; 1475 goto err_free_mc; 1476 } 1477 1478 mutex_lock(&ctx->mutex); 1479 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1480 join_state, mc); 1481 mutex_unlock(&ctx->mutex); 1482 if (ret) 1483 goto err_xa_erase; 1484 1485 resp.id = mc->id; 1486 if (copy_to_user(u64_to_user_ptr(cmd->response), 1487 &resp, sizeof(resp))) { 1488 ret = -EFAULT; 1489 goto err_leave_multicast; 1490 } 1491 1492 xa_store(&multicast_table, mc->id, mc, 0); 1493 1494 ucma_put_ctx(ctx); 1495 return 0; 1496 1497 err_leave_multicast: 1498 mutex_lock(&ctx->mutex); 1499 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); 1500 mutex_unlock(&ctx->mutex); 1501 ucma_cleanup_mc_events(mc); 1502 err_xa_erase: 1503 xa_erase(&multicast_table, mc->id); 1504 err_free_mc: 1505 kfree(mc); 1506 err_put_ctx: 1507 ucma_put_ctx(ctx); 1508 return ret; 1509 } 1510 1511 static ssize_t ucma_join_ip_multicast(struct ucma_file *file, 1512 const char __user *inbuf, 1513 int in_len, int out_len) 1514 { 1515 struct rdma_ucm_join_ip_mcast cmd; 1516 struct rdma_ucm_join_mcast join_cmd; 1517 1518 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1519 return -EFAULT; 1520 1521 join_cmd.response = cmd.response; 1522 join_cmd.uid = cmd.uid; 1523 join_cmd.id = cmd.id; 1524 join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); 1525 if (!join_cmd.addr_size) 1526 return -EINVAL; 1527 1528 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1529 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1530 1531 return ucma_process_join(file, &join_cmd, out_len); 1532 } 1533 1534 static ssize_t ucma_join_multicast(struct ucma_file *file, 1535 const char __user *inbuf, 1536 int in_len, int out_len) 1537 { 1538 struct rdma_ucm_join_mcast cmd; 1539 1540 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1541 return -EFAULT; 1542 1543 if (!rdma_addr_size_kss(&cmd.addr)) 1544 return -EINVAL; 1545 1546 return ucma_process_join(file, &cmd, out_len); 1547 } 1548 1549 static ssize_t ucma_leave_multicast(struct ucma_file *file, 1550 const char __user *inbuf, 1551 int in_len, int out_len) 1552 { 1553 struct rdma_ucm_destroy_id cmd; 1554 struct rdma_ucm_destroy_id_resp resp; 1555 struct ucma_multicast *mc; 1556 int ret = 0; 1557 1558 if (out_len < sizeof(resp)) 1559 return -ENOSPC; 1560 1561 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1562 return -EFAULT; 1563 1564 xa_lock(&multicast_table); 1565 mc = xa_load(&multicast_table, cmd.id); 1566 if (!mc) 1567 mc = ERR_PTR(-ENOENT); 1568 else if (READ_ONCE(mc->ctx->file) != file) 1569 mc = ERR_PTR(-EINVAL); 1570 else if (!refcount_inc_not_zero(&mc->ctx->ref)) 1571 mc = ERR_PTR(-ENXIO); 1572 else 1573 __xa_erase(&multicast_table, mc->id); 1574 xa_unlock(&multicast_table); 1575 1576 if (IS_ERR(mc)) { 1577 ret = PTR_ERR(mc); 1578 goto out; 1579 } 1580 1581 mutex_lock(&mc->ctx->mutex); 1582 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1583 mutex_unlock(&mc->ctx->mutex); 1584 1585 ucma_cleanup_mc_events(mc); 1586 1587 ucma_put_ctx(mc->ctx); 1588 resp.events_reported = mc->events_reported; 1589 kfree(mc); 1590 1591 if (copy_to_user(u64_to_user_ptr(cmd.response), 1592 &resp, sizeof(resp))) 1593 ret = -EFAULT; 1594 out: 1595 return ret; 1596 } 1597 1598 static ssize_t ucma_migrate_id(struct ucma_file *new_file, 1599 const char __user *inbuf, 1600 int in_len, int out_len) 1601 { 1602 struct rdma_ucm_migrate_id cmd; 1603 struct rdma_ucm_migrate_resp resp; 1604 struct ucma_event *uevent, *tmp; 1605 struct ucma_context *ctx; 1606 LIST_HEAD(event_list); 1607 struct fd f; 1608 struct ucma_file *cur_file; 1609 int ret = 0; 1610 1611 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1612 return -EFAULT; 1613 1614 /* Get current fd to protect against it being closed */ 1615 f = fdget(cmd.fd); 1616 if (!f.file) 1617 return -ENOENT; 1618 if (f.file->f_op != &ucma_fops) { 1619 ret = -EINVAL; 1620 goto file_put; 1621 } 1622 cur_file = f.file->private_data; 1623 1624 /* Validate current fd and prevent destruction of id. */ 1625 ctx = ucma_get_ctx(cur_file, cmd.id); 1626 if (IS_ERR(ctx)) { 1627 ret = PTR_ERR(ctx); 1628 goto file_put; 1629 } 1630 1631 rdma_lock_handler(ctx->cm_id); 1632 /* 1633 * ctx->file can only be changed under the handler & xa_lock. xa_load() 1634 * must be checked again to ensure the ctx hasn't begun destruction 1635 * since the ucma_get_ctx(). 1636 */ 1637 xa_lock(&ctx_table); 1638 if (_ucma_find_context(cmd.id, cur_file) != ctx) { 1639 xa_unlock(&ctx_table); 1640 ret = -ENOENT; 1641 goto err_unlock; 1642 } 1643 ctx->file = new_file; 1644 xa_unlock(&ctx_table); 1645 1646 mutex_lock(&cur_file->mut); 1647 list_del(&ctx->list); 1648 /* 1649 * At this point lock_handler() prevents addition of new uevents for 1650 * this ctx. 1651 */ 1652 list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list) 1653 if (uevent->ctx == ctx) 1654 list_move_tail(&uevent->list, &event_list); 1655 resp.events_reported = ctx->events_reported; 1656 mutex_unlock(&cur_file->mut); 1657 1658 mutex_lock(&new_file->mut); 1659 list_add_tail(&ctx->list, &new_file->ctx_list); 1660 list_splice_tail(&event_list, &new_file->event_list); 1661 mutex_unlock(&new_file->mut); 1662 1663 if (copy_to_user(u64_to_user_ptr(cmd.response), 1664 &resp, sizeof(resp))) 1665 ret = -EFAULT; 1666 1667 err_unlock: 1668 rdma_unlock_handler(ctx->cm_id); 1669 ucma_put_ctx(ctx); 1670 file_put: 1671 fdput(f); 1672 return ret; 1673 } 1674 1675 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1676 const char __user *inbuf, 1677 int in_len, int out_len) = { 1678 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1679 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1680 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, 1681 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, 1682 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, 1683 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1684 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1685 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1686 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1687 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1688 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1689 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1690 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1691 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1692 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1693 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1694 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, 1695 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1696 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, 1697 [RDMA_USER_CM_CMD_QUERY] = ucma_query, 1698 [RDMA_USER_CM_CMD_BIND] = ucma_bind, 1699 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1700 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast 1701 }; 1702 1703 static ssize_t ucma_write(struct file *filp, const char __user *buf, 1704 size_t len, loff_t *pos) 1705 { 1706 struct ucma_file *file = filp->private_data; 1707 struct rdma_ucm_cmd_hdr hdr; 1708 ssize_t ret; 1709 1710 if (!ib_safe_file_access(filp)) { 1711 pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 1712 __func__, task_tgid_vnr(current), current->comm); 1713 return -EACCES; 1714 } 1715 1716 if (len < sizeof(hdr)) 1717 return -EINVAL; 1718 1719 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1720 return -EFAULT; 1721 1722 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1723 return -EINVAL; 1724 hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); 1725 1726 if (hdr.in + sizeof(hdr) > len) 1727 return -EINVAL; 1728 1729 if (!ucma_cmd_table[hdr.cmd]) 1730 return -ENOSYS; 1731 1732 ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); 1733 if (!ret) 1734 ret = len; 1735 1736 return ret; 1737 } 1738 1739 static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) 1740 { 1741 struct ucma_file *file = filp->private_data; 1742 __poll_t mask = 0; 1743 1744 poll_wait(filp, &file->poll_wait, wait); 1745 1746 if (!list_empty(&file->event_list)) 1747 mask = EPOLLIN | EPOLLRDNORM; 1748 1749 return mask; 1750 } 1751 1752 /* 1753 * ucma_open() does not need the BKL: 1754 * 1755 * - no global state is referred to; 1756 * - there is no ioctl method to race against; 1757 * - no further module initialization is required for open to work 1758 * after the device is registered. 1759 */ 1760 static int ucma_open(struct inode *inode, struct file *filp) 1761 { 1762 struct ucma_file *file; 1763 1764 file = kmalloc(sizeof *file, GFP_KERNEL); 1765 if (!file) 1766 return -ENOMEM; 1767 1768 INIT_LIST_HEAD(&file->event_list); 1769 INIT_LIST_HEAD(&file->ctx_list); 1770 init_waitqueue_head(&file->poll_wait); 1771 mutex_init(&file->mut); 1772 1773 filp->private_data = file; 1774 file->filp = filp; 1775 1776 return stream_open(inode, filp); 1777 } 1778 1779 static int ucma_close(struct inode *inode, struct file *filp) 1780 { 1781 struct ucma_file *file = filp->private_data; 1782 1783 /* 1784 * All paths that touch ctx_list or ctx_list starting from write() are 1785 * prevented by this being a FD release function. The list_add_tail() in 1786 * ucma_connect_event_handler() can run concurrently, however it only 1787 * adds to the list *after* a listening ID. By only reading the first of 1788 * the list, and relying on ucma_destroy_private_ctx() to block 1789 * ucma_connect_event_handler(), no additional locking is needed. 1790 */ 1791 while (!list_empty(&file->ctx_list)) { 1792 struct ucma_context *ctx = list_first_entry( 1793 &file->ctx_list, struct ucma_context, list); 1794 1795 WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 1796 GFP_KERNEL) != ctx); 1797 ucma_destroy_private_ctx(ctx); 1798 } 1799 kfree(file); 1800 return 0; 1801 } 1802 1803 static const struct file_operations ucma_fops = { 1804 .owner = THIS_MODULE, 1805 .open = ucma_open, 1806 .release = ucma_close, 1807 .write = ucma_write, 1808 .poll = ucma_poll, 1809 .llseek = no_llseek, 1810 }; 1811 1812 static struct miscdevice ucma_misc = { 1813 .minor = MISC_DYNAMIC_MINOR, 1814 .name = "rdma_cm", 1815 .nodename = "infiniband/rdma_cm", 1816 .mode = 0666, 1817 .fops = &ucma_fops, 1818 }; 1819 1820 static int ucma_get_global_nl_info(struct ib_client_nl_info *res) 1821 { 1822 res->abi = RDMA_USER_CM_ABI_VERSION; 1823 res->cdev = ucma_misc.this_device; 1824 return 0; 1825 } 1826 1827 static struct ib_client rdma_cma_client = { 1828 .name = "rdma_cm", 1829 .get_global_nl_info = ucma_get_global_nl_info, 1830 }; 1831 MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); 1832 1833 static ssize_t abi_version_show(struct device *dev, 1834 struct device_attribute *attr, char *buf) 1835 { 1836 return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1837 } 1838 static DEVICE_ATTR_RO(abi_version); 1839 1840 static int __init ucma_init(void) 1841 { 1842 int ret; 1843 1844 ret = misc_register(&ucma_misc); 1845 if (ret) 1846 return ret; 1847 1848 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1849 if (ret) { 1850 pr_err("rdma_ucm: couldn't create abi_version attr\n"); 1851 goto err1; 1852 } 1853 1854 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1855 if (!ucma_ctl_table_hdr) { 1856 pr_err("rdma_ucm: couldn't register sysctl paths\n"); 1857 ret = -ENOMEM; 1858 goto err2; 1859 } 1860 1861 ret = ib_register_client(&rdma_cma_client); 1862 if (ret) 1863 goto err3; 1864 1865 return 0; 1866 err3: 1867 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1868 err2: 1869 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1870 err1: 1871 misc_deregister(&ucma_misc); 1872 return ret; 1873 } 1874 1875 static void __exit ucma_cleanup(void) 1876 { 1877 ib_unregister_client(&rdma_cma_client); 1878 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1879 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1880 misc_deregister(&ucma_misc); 1881 } 1882 1883 module_init(ucma_init); 1884 module_exit(ucma_cleanup); 1885