1 /* 2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/file.h> 35 #include <linux/mutex.h> 36 #include <linux/poll.h> 37 #include <linux/sched.h> 38 #include <linux/idr.h> 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/miscdevice.h> 42 #include <linux/slab.h> 43 #include <linux/sysctl.h> 44 #include <linux/module.h> 45 #include <linux/nsproxy.h> 46 47 #include <linux/nospec.h> 48 49 #include <rdma/rdma_user_cm.h> 50 #include <rdma/ib_marshall.h> 51 #include <rdma/rdma_cm.h> 52 #include <rdma/rdma_cm_ib.h> 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib.h> 55 #include <rdma/ib_cm.h> 56 #include <rdma/rdma_netlink.h> 57 #include "core_priv.h" 58 59 MODULE_AUTHOR("Sean Hefty"); 60 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 63 static unsigned int max_backlog = 1024; 64 65 static struct ctl_table_header *ucma_ctl_table_hdr; 66 static struct ctl_table ucma_ctl_table[] = { 67 { 68 .procname = "max_backlog", 69 .data = &max_backlog, 70 .maxlen = sizeof max_backlog, 71 .mode = 0644, 72 .proc_handler = proc_dointvec, 73 }, 74 { } 75 }; 76 77 struct ucma_file { 78 struct mutex mut; 79 struct file *filp; 80 struct list_head ctx_list; 81 struct list_head event_list; 82 wait_queue_head_t poll_wait; 83 }; 84 85 struct ucma_context { 86 u32 id; 87 struct completion comp; 88 refcount_t ref; 89 int events_reported; 90 atomic_t backlog; 91 92 struct ucma_file *file; 93 struct rdma_cm_id *cm_id; 94 struct mutex mutex; 95 u64 uid; 96 97 struct list_head list; 98 /* sync between removal event and id destroy, protected by file mut */ 99 int destroying; 100 struct work_struct close_work; 101 }; 102 103 struct ucma_multicast { 104 struct ucma_context *ctx; 105 u32 id; 106 int events_reported; 107 108 u64 uid; 109 u8 join_state; 110 struct sockaddr_storage addr; 111 }; 112 113 struct ucma_event { 114 struct ucma_context *ctx; 115 struct ucma_context *conn_req_ctx; 116 struct ucma_multicast *mc; 117 struct list_head list; 118 struct rdma_ucm_event_resp resp; 119 }; 120 121 static DEFINE_XARRAY_ALLOC(ctx_table); 122 static DEFINE_XARRAY_ALLOC(multicast_table); 123 124 static const struct file_operations ucma_fops; 125 static int __destroy_id(struct ucma_context *ctx); 126 127 static inline struct ucma_context *_ucma_find_context(int id, 128 struct ucma_file *file) 129 { 130 struct ucma_context *ctx; 131 132 ctx = xa_load(&ctx_table, id); 133 if (!ctx) 134 ctx = ERR_PTR(-ENOENT); 135 else if (ctx->file != file) 136 ctx = ERR_PTR(-EINVAL); 137 return ctx; 138 } 139 140 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) 141 { 142 struct ucma_context *ctx; 143 144 xa_lock(&ctx_table); 145 ctx = _ucma_find_context(id, file); 146 if (!IS_ERR(ctx)) 147 if (!refcount_inc_not_zero(&ctx->ref)) 148 ctx = ERR_PTR(-ENXIO); 149 xa_unlock(&ctx_table); 150 return ctx; 151 } 152 153 static void ucma_put_ctx(struct ucma_context *ctx) 154 { 155 if (refcount_dec_and_test(&ctx->ref)) 156 complete(&ctx->comp); 157 } 158 159 /* 160 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the 161 * CM_ID is bound. 162 */ 163 static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) 164 { 165 struct ucma_context *ctx = ucma_get_ctx(file, id); 166 167 if (IS_ERR(ctx)) 168 return ctx; 169 if (!ctx->cm_id->device) { 170 ucma_put_ctx(ctx); 171 return ERR_PTR(-EINVAL); 172 } 173 return ctx; 174 } 175 176 static void ucma_close_id(struct work_struct *work) 177 { 178 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); 179 180 /* once all inflight tasks are finished, we close all underlying 181 * resources. The context is still alive till its explicit destryoing 182 * by its creator. 183 */ 184 ucma_put_ctx(ctx); 185 wait_for_completion(&ctx->comp); 186 /* No new events will be generated after destroying the id. */ 187 rdma_destroy_id(ctx->cm_id); 188 189 /* 190 * At this point ctx->ref is zero so the only place the ctx can be is in 191 * a uevent or in __destroy_id(). Since the former doesn't touch 192 * ctx->cm_id and the latter sync cancels this, there is no races with 193 * this store. 194 */ 195 ctx->cm_id = NULL; 196 } 197 198 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) 199 { 200 struct ucma_context *ctx; 201 202 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 203 if (!ctx) 204 return NULL; 205 206 INIT_WORK(&ctx->close_work, ucma_close_id); 207 refcount_set(&ctx->ref, 1); 208 init_completion(&ctx->comp); 209 /* So list_del() will work if we don't do ucma_finish_ctx() */ 210 INIT_LIST_HEAD(&ctx->list); 211 ctx->file = file; 212 mutex_init(&ctx->mutex); 213 214 if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) { 215 kfree(ctx); 216 return NULL; 217 } 218 return ctx; 219 } 220 221 static void ucma_finish_ctx(struct ucma_context *ctx) 222 { 223 lockdep_assert_held(&ctx->file->mut); 224 list_add_tail(&ctx->list, &ctx->file->ctx_list); 225 xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL); 226 } 227 228 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 229 struct rdma_conn_param *src) 230 { 231 if (src->private_data_len) 232 memcpy(dst->private_data, src->private_data, 233 src->private_data_len); 234 dst->private_data_len = src->private_data_len; 235 dst->responder_resources =src->responder_resources; 236 dst->initiator_depth = src->initiator_depth; 237 dst->flow_control = src->flow_control; 238 dst->retry_count = src->retry_count; 239 dst->rnr_retry_count = src->rnr_retry_count; 240 dst->srq = src->srq; 241 dst->qp_num = src->qp_num; 242 } 243 244 static void ucma_copy_ud_event(struct ib_device *device, 245 struct rdma_ucm_ud_param *dst, 246 struct rdma_ud_param *src) 247 { 248 if (src->private_data_len) 249 memcpy(dst->private_data, src->private_data, 250 src->private_data_len); 251 dst->private_data_len = src->private_data_len; 252 ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); 253 dst->qp_num = src->qp_num; 254 dst->qkey = src->qkey; 255 } 256 257 static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, 258 struct rdma_cm_event *event) 259 { 260 struct ucma_event *uevent; 261 262 uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); 263 if (!uevent) 264 return NULL; 265 266 uevent->ctx = ctx; 267 switch (event->event) { 268 case RDMA_CM_EVENT_MULTICAST_JOIN: 269 case RDMA_CM_EVENT_MULTICAST_ERROR: 270 uevent->mc = (struct ucma_multicast *) 271 event->param.ud.private_data; 272 uevent->resp.uid = uevent->mc->uid; 273 uevent->resp.id = uevent->mc->id; 274 break; 275 default: 276 uevent->resp.uid = ctx->uid; 277 uevent->resp.id = ctx->id; 278 break; 279 } 280 uevent->resp.event = event->event; 281 uevent->resp.status = event->status; 282 if (ctx->cm_id->qp_type == IB_QPT_UD) 283 ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, 284 &event->param.ud); 285 else 286 ucma_copy_conn_event(&uevent->resp.param.conn, 287 &event->param.conn); 288 289 uevent->resp.ece.vendor_id = event->ece.vendor_id; 290 uevent->resp.ece.attr_mod = event->ece.attr_mod; 291 return uevent; 292 } 293 294 static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, 295 struct rdma_cm_event *event) 296 { 297 struct ucma_context *listen_ctx = cm_id->context; 298 struct ucma_context *ctx; 299 struct ucma_event *uevent; 300 301 if (!atomic_add_unless(&listen_ctx->backlog, -1, 0)) 302 return -ENOMEM; 303 ctx = ucma_alloc_ctx(listen_ctx->file); 304 if (!ctx) 305 goto err_backlog; 306 ctx->cm_id = cm_id; 307 308 uevent = ucma_create_uevent(listen_ctx, event); 309 if (!uevent) 310 goto err_alloc; 311 uevent->conn_req_ctx = ctx; 312 uevent->resp.id = ctx->id; 313 314 ctx->cm_id->context = ctx; 315 316 mutex_lock(&ctx->file->mut); 317 ucma_finish_ctx(ctx); 318 list_add_tail(&uevent->list, &ctx->file->event_list); 319 mutex_unlock(&ctx->file->mut); 320 wake_up_interruptible(&ctx->file->poll_wait); 321 return 0; 322 323 err_alloc: 324 xa_erase(&ctx_table, ctx->id); 325 kfree(ctx); 326 err_backlog: 327 atomic_inc(&listen_ctx->backlog); 328 /* Returning error causes the new ID to be destroyed */ 329 return -ENOMEM; 330 } 331 332 static int ucma_event_handler(struct rdma_cm_id *cm_id, 333 struct rdma_cm_event *event) 334 { 335 struct ucma_event *uevent; 336 struct ucma_context *ctx = cm_id->context; 337 338 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) 339 return ucma_connect_event_handler(cm_id, event); 340 341 /* 342 * We ignore events for new connections until userspace has set their 343 * context. This can only happen if an error occurs on a new connection 344 * before the user accepts it. This is okay, since the accept will just 345 * fail later. However, we do need to release the underlying HW 346 * resources in case of a device removal event. 347 */ 348 if (ctx->uid) { 349 uevent = ucma_create_uevent(ctx, event); 350 if (!uevent) 351 return 0; 352 353 mutex_lock(&ctx->file->mut); 354 list_add_tail(&uevent->list, &ctx->file->event_list); 355 mutex_unlock(&ctx->file->mut); 356 wake_up_interruptible(&ctx->file->poll_wait); 357 } 358 359 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying) 360 queue_work(system_unbound_wq, &ctx->close_work); 361 return 0; 362 } 363 364 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, 365 int in_len, int out_len) 366 { 367 struct rdma_ucm_get_event cmd; 368 struct ucma_event *uevent; 369 370 /* 371 * Old 32 bit user space does not send the 4 byte padding in the 372 * reserved field. We don't care, allow it to keep working. 373 */ 374 if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) - 375 sizeof(uevent->resp.ece)) 376 return -ENOSPC; 377 378 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 379 return -EFAULT; 380 381 mutex_lock(&file->mut); 382 while (list_empty(&file->event_list)) { 383 mutex_unlock(&file->mut); 384 385 if (file->filp->f_flags & O_NONBLOCK) 386 return -EAGAIN; 387 388 if (wait_event_interruptible(file->poll_wait, 389 !list_empty(&file->event_list))) 390 return -ERESTARTSYS; 391 392 mutex_lock(&file->mut); 393 } 394 395 uevent = list_first_entry(&file->event_list, struct ucma_event, list); 396 397 if (copy_to_user(u64_to_user_ptr(cmd.response), 398 &uevent->resp, 399 min_t(size_t, out_len, sizeof(uevent->resp)))) { 400 mutex_unlock(&file->mut); 401 return -EFAULT; 402 } 403 404 list_del(&uevent->list); 405 uevent->ctx->events_reported++; 406 if (uevent->mc) 407 uevent->mc->events_reported++; 408 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) 409 atomic_inc(&uevent->ctx->backlog); 410 mutex_unlock(&file->mut); 411 412 kfree(uevent); 413 return 0; 414 } 415 416 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) 417 { 418 switch (cmd->ps) { 419 case RDMA_PS_TCP: 420 *qp_type = IB_QPT_RC; 421 return 0; 422 case RDMA_PS_UDP: 423 case RDMA_PS_IPOIB: 424 *qp_type = IB_QPT_UD; 425 return 0; 426 case RDMA_PS_IB: 427 *qp_type = cmd->qp_type; 428 return 0; 429 default: 430 return -EINVAL; 431 } 432 } 433 434 static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, 435 int in_len, int out_len) 436 { 437 struct rdma_ucm_create_id cmd; 438 struct rdma_ucm_create_id_resp resp; 439 struct ucma_context *ctx; 440 struct rdma_cm_id *cm_id; 441 enum ib_qp_type qp_type; 442 int ret; 443 444 if (out_len < sizeof(resp)) 445 return -ENOSPC; 446 447 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 448 return -EFAULT; 449 450 ret = ucma_get_qp_type(&cmd, &qp_type); 451 if (ret) 452 return ret; 453 454 ctx = ucma_alloc_ctx(file); 455 if (!ctx) 456 return -ENOMEM; 457 458 ctx->uid = cmd.uid; 459 cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type); 460 if (IS_ERR(cm_id)) { 461 ret = PTR_ERR(cm_id); 462 goto err1; 463 } 464 ctx->cm_id = cm_id; 465 466 resp.id = ctx->id; 467 if (copy_to_user(u64_to_user_ptr(cmd.response), 468 &resp, sizeof(resp))) { 469 xa_erase(&ctx_table, ctx->id); 470 __destroy_id(ctx); 471 return -EFAULT; 472 } 473 474 mutex_lock(&file->mut); 475 ucma_finish_ctx(ctx); 476 mutex_unlock(&file->mut); 477 return 0; 478 479 err1: 480 xa_erase(&ctx_table, ctx->id); 481 kfree(ctx); 482 return ret; 483 } 484 485 static void ucma_cleanup_multicast(struct ucma_context *ctx) 486 { 487 struct ucma_multicast *mc; 488 unsigned long index; 489 490 xa_for_each(&multicast_table, index, mc) { 491 if (mc->ctx != ctx) 492 continue; 493 /* 494 * At this point mc->ctx->ref is 0 so the mc cannot leave the 495 * lock on the reader and this is enough serialization 496 */ 497 xa_erase(&multicast_table, index); 498 kfree(mc); 499 } 500 } 501 502 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 503 { 504 struct ucma_event *uevent, *tmp; 505 506 rdma_lock_handler(mc->ctx->cm_id); 507 mutex_lock(&mc->ctx->file->mut); 508 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 509 if (uevent->mc != mc) 510 continue; 511 512 list_del(&uevent->list); 513 kfree(uevent); 514 } 515 mutex_unlock(&mc->ctx->file->mut); 516 rdma_unlock_handler(mc->ctx->cm_id); 517 } 518 519 /* 520 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At 521 * this point, no new events will be reported from the hardware. However, we 522 * still need to cleanup the UCMA context for this ID. Specifically, there 523 * might be events that have not yet been consumed by the user space software. 524 * mutex. After that we release them as needed. 525 */ 526 static int ucma_free_ctx(struct ucma_context *ctx) 527 { 528 int events_reported; 529 struct ucma_event *uevent, *tmp; 530 LIST_HEAD(list); 531 532 ucma_cleanup_multicast(ctx); 533 534 /* Cleanup events not yet reported to the user. */ 535 mutex_lock(&ctx->file->mut); 536 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 537 if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx) 538 list_move_tail(&uevent->list, &list); 539 } 540 list_del(&ctx->list); 541 events_reported = ctx->events_reported; 542 mutex_unlock(&ctx->file->mut); 543 544 /* 545 * If this was a listening ID then any connections spawned from it 546 * that have not been delivered to userspace are cleaned up too. 547 * Must be done outside any locks. 548 */ 549 list_for_each_entry_safe(uevent, tmp, &list, list) { 550 list_del(&uevent->list); 551 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && 552 uevent->conn_req_ctx != ctx) 553 __destroy_id(uevent->conn_req_ctx); 554 kfree(uevent); 555 } 556 557 mutex_destroy(&ctx->mutex); 558 kfree(ctx); 559 return events_reported; 560 } 561 562 static int __destroy_id(struct ucma_context *ctx) 563 { 564 /* 565 * If the refcount is already 0 then ucma_close_id() has already 566 * destroyed the cm_id, otherwise holding the refcount keeps cm_id 567 * valid. Prevent queue_work() from being called. 568 */ 569 if (refcount_inc_not_zero(&ctx->ref)) { 570 rdma_lock_handler(ctx->cm_id); 571 ctx->destroying = 1; 572 rdma_unlock_handler(ctx->cm_id); 573 ucma_put_ctx(ctx); 574 } 575 576 cancel_work_sync(&ctx->close_work); 577 /* At this point it's guaranteed that there is no inflight closing task */ 578 if (ctx->cm_id) 579 ucma_close_id(&ctx->close_work); 580 return ucma_free_ctx(ctx); 581 } 582 583 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, 584 int in_len, int out_len) 585 { 586 struct rdma_ucm_destroy_id cmd; 587 struct rdma_ucm_destroy_id_resp resp; 588 struct ucma_context *ctx; 589 int ret = 0; 590 591 if (out_len < sizeof(resp)) 592 return -ENOSPC; 593 594 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 595 return -EFAULT; 596 597 xa_lock(&ctx_table); 598 ctx = _ucma_find_context(cmd.id, file); 599 if (!IS_ERR(ctx)) 600 __xa_erase(&ctx_table, ctx->id); 601 xa_unlock(&ctx_table); 602 603 if (IS_ERR(ctx)) 604 return PTR_ERR(ctx); 605 606 resp.events_reported = __destroy_id(ctx); 607 if (copy_to_user(u64_to_user_ptr(cmd.response), 608 &resp, sizeof(resp))) 609 ret = -EFAULT; 610 611 return ret; 612 } 613 614 static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, 615 int in_len, int out_len) 616 { 617 struct rdma_ucm_bind_ip cmd; 618 struct ucma_context *ctx; 619 int ret; 620 621 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 622 return -EFAULT; 623 624 if (!rdma_addr_size_in6(&cmd.addr)) 625 return -EINVAL; 626 627 ctx = ucma_get_ctx(file, cmd.id); 628 if (IS_ERR(ctx)) 629 return PTR_ERR(ctx); 630 631 mutex_lock(&ctx->mutex); 632 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 633 mutex_unlock(&ctx->mutex); 634 635 ucma_put_ctx(ctx); 636 return ret; 637 } 638 639 static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, 640 int in_len, int out_len) 641 { 642 struct rdma_ucm_bind cmd; 643 struct ucma_context *ctx; 644 int ret; 645 646 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 647 return -EFAULT; 648 649 if (cmd.reserved || !cmd.addr_size || 650 cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) 651 return -EINVAL; 652 653 ctx = ucma_get_ctx(file, cmd.id); 654 if (IS_ERR(ctx)) 655 return PTR_ERR(ctx); 656 657 mutex_lock(&ctx->mutex); 658 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 659 mutex_unlock(&ctx->mutex); 660 ucma_put_ctx(ctx); 661 return ret; 662 } 663 664 static ssize_t ucma_resolve_ip(struct ucma_file *file, 665 const char __user *inbuf, 666 int in_len, int out_len) 667 { 668 struct rdma_ucm_resolve_ip cmd; 669 struct ucma_context *ctx; 670 int ret; 671 672 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 673 return -EFAULT; 674 675 if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || 676 !rdma_addr_size_in6(&cmd.dst_addr)) 677 return -EINVAL; 678 679 ctx = ucma_get_ctx(file, cmd.id); 680 if (IS_ERR(ctx)) 681 return PTR_ERR(ctx); 682 683 mutex_lock(&ctx->mutex); 684 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 685 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 686 mutex_unlock(&ctx->mutex); 687 ucma_put_ctx(ctx); 688 return ret; 689 } 690 691 static ssize_t ucma_resolve_addr(struct ucma_file *file, 692 const char __user *inbuf, 693 int in_len, int out_len) 694 { 695 struct rdma_ucm_resolve_addr cmd; 696 struct ucma_context *ctx; 697 int ret; 698 699 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 700 return -EFAULT; 701 702 if (cmd.reserved || 703 (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || 704 !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) 705 return -EINVAL; 706 707 ctx = ucma_get_ctx(file, cmd.id); 708 if (IS_ERR(ctx)) 709 return PTR_ERR(ctx); 710 711 mutex_lock(&ctx->mutex); 712 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 713 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 714 mutex_unlock(&ctx->mutex); 715 ucma_put_ctx(ctx); 716 return ret; 717 } 718 719 static ssize_t ucma_resolve_route(struct ucma_file *file, 720 const char __user *inbuf, 721 int in_len, int out_len) 722 { 723 struct rdma_ucm_resolve_route cmd; 724 struct ucma_context *ctx; 725 int ret; 726 727 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 728 return -EFAULT; 729 730 ctx = ucma_get_ctx_dev(file, cmd.id); 731 if (IS_ERR(ctx)) 732 return PTR_ERR(ctx); 733 734 mutex_lock(&ctx->mutex); 735 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 736 mutex_unlock(&ctx->mutex); 737 ucma_put_ctx(ctx); 738 return ret; 739 } 740 741 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, 742 struct rdma_route *route) 743 { 744 struct rdma_dev_addr *dev_addr; 745 746 resp->num_paths = route->num_paths; 747 switch (route->num_paths) { 748 case 0: 749 dev_addr = &route->addr.dev_addr; 750 rdma_addr_get_dgid(dev_addr, 751 (union ib_gid *) &resp->ib_route[0].dgid); 752 rdma_addr_get_sgid(dev_addr, 753 (union ib_gid *) &resp->ib_route[0].sgid); 754 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 755 break; 756 case 2: 757 ib_copy_path_rec_to_user(&resp->ib_route[1], 758 &route->path_rec[1]); 759 fallthrough; 760 case 1: 761 ib_copy_path_rec_to_user(&resp->ib_route[0], 762 &route->path_rec[0]); 763 break; 764 default: 765 break; 766 } 767 } 768 769 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 770 struct rdma_route *route) 771 { 772 773 resp->num_paths = route->num_paths; 774 switch (route->num_paths) { 775 case 0: 776 rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 777 (union ib_gid *)&resp->ib_route[0].dgid); 778 rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 779 (union ib_gid *)&resp->ib_route[0].sgid); 780 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 781 break; 782 case 2: 783 ib_copy_path_rec_to_user(&resp->ib_route[1], 784 &route->path_rec[1]); 785 fallthrough; 786 case 1: 787 ib_copy_path_rec_to_user(&resp->ib_route[0], 788 &route->path_rec[0]); 789 break; 790 default: 791 break; 792 } 793 } 794 795 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, 796 struct rdma_route *route) 797 { 798 struct rdma_dev_addr *dev_addr; 799 800 dev_addr = &route->addr.dev_addr; 801 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); 802 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); 803 } 804 805 static ssize_t ucma_query_route(struct ucma_file *file, 806 const char __user *inbuf, 807 int in_len, int out_len) 808 { 809 struct rdma_ucm_query cmd; 810 struct rdma_ucm_query_route_resp resp; 811 struct ucma_context *ctx; 812 struct sockaddr *addr; 813 int ret = 0; 814 815 if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index)) 816 return -ENOSPC; 817 818 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 819 return -EFAULT; 820 821 ctx = ucma_get_ctx(file, cmd.id); 822 if (IS_ERR(ctx)) 823 return PTR_ERR(ctx); 824 825 mutex_lock(&ctx->mutex); 826 memset(&resp, 0, sizeof resp); 827 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 828 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? 829 sizeof(struct sockaddr_in) : 830 sizeof(struct sockaddr_in6)); 831 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 832 memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? 833 sizeof(struct sockaddr_in) : 834 sizeof(struct sockaddr_in6)); 835 if (!ctx->cm_id->device) 836 goto out; 837 838 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 839 resp.ibdev_index = ctx->cm_id->device->index; 840 resp.port_num = ctx->cm_id->port_num; 841 842 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) 843 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 844 else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) 845 ucma_copy_iboe_route(&resp, &ctx->cm_id->route); 846 else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) 847 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 848 849 out: 850 mutex_unlock(&ctx->mutex); 851 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, 852 min_t(size_t, out_len, sizeof(resp)))) 853 ret = -EFAULT; 854 855 ucma_put_ctx(ctx); 856 return ret; 857 } 858 859 static void ucma_query_device_addr(struct rdma_cm_id *cm_id, 860 struct rdma_ucm_query_addr_resp *resp) 861 { 862 if (!cm_id->device) 863 return; 864 865 resp->node_guid = (__force __u64) cm_id->device->node_guid; 866 resp->ibdev_index = cm_id->device->index; 867 resp->port_num = cm_id->port_num; 868 resp->pkey = (__force __u16) cpu_to_be16( 869 ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); 870 } 871 872 static ssize_t ucma_query_addr(struct ucma_context *ctx, 873 void __user *response, int out_len) 874 { 875 struct rdma_ucm_query_addr_resp resp; 876 struct sockaddr *addr; 877 int ret = 0; 878 879 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 880 return -ENOSPC; 881 882 memset(&resp, 0, sizeof resp); 883 884 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 885 resp.src_size = rdma_addr_size(addr); 886 memcpy(&resp.src_addr, addr, resp.src_size); 887 888 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 889 resp.dst_size = rdma_addr_size(addr); 890 memcpy(&resp.dst_addr, addr, resp.dst_size); 891 892 ucma_query_device_addr(ctx->cm_id, &resp); 893 894 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 895 ret = -EFAULT; 896 897 return ret; 898 } 899 900 static ssize_t ucma_query_path(struct ucma_context *ctx, 901 void __user *response, int out_len) 902 { 903 struct rdma_ucm_query_path_resp *resp; 904 int i, ret = 0; 905 906 if (out_len < sizeof(*resp)) 907 return -ENOSPC; 908 909 resp = kzalloc(out_len, GFP_KERNEL); 910 if (!resp) 911 return -ENOMEM; 912 913 resp->num_paths = ctx->cm_id->route.num_paths; 914 for (i = 0, out_len -= sizeof(*resp); 915 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); 916 i++, out_len -= sizeof(struct ib_path_rec_data)) { 917 struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; 918 919 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 920 IB_PATH_BIDIRECTIONAL; 921 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { 922 struct sa_path_rec ib; 923 924 sa_convert_path_opa_to_ib(&ib, rec); 925 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 926 927 } else { 928 ib_sa_pack_path(rec, &resp->path_data[i].path_rec); 929 } 930 } 931 932 if (copy_to_user(response, resp, struct_size(resp, path_data, i))) 933 ret = -EFAULT; 934 935 kfree(resp); 936 return ret; 937 } 938 939 static ssize_t ucma_query_gid(struct ucma_context *ctx, 940 void __user *response, int out_len) 941 { 942 struct rdma_ucm_query_addr_resp resp; 943 struct sockaddr_ib *addr; 944 int ret = 0; 945 946 if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) 947 return -ENOSPC; 948 949 memset(&resp, 0, sizeof resp); 950 951 ucma_query_device_addr(ctx->cm_id, &resp); 952 953 addr = (struct sockaddr_ib *) &resp.src_addr; 954 resp.src_size = sizeof(*addr); 955 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { 956 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); 957 } else { 958 addr->sib_family = AF_IB; 959 addr->sib_pkey = (__force __be16) resp.pkey; 960 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, 961 NULL); 962 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 963 &ctx->cm_id->route.addr.src_addr); 964 } 965 966 addr = (struct sockaddr_ib *) &resp.dst_addr; 967 resp.dst_size = sizeof(*addr); 968 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { 969 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); 970 } else { 971 addr->sib_family = AF_IB; 972 addr->sib_pkey = (__force __be16) resp.pkey; 973 rdma_read_gids(ctx->cm_id, NULL, 974 (union ib_gid *)&addr->sib_addr); 975 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 976 &ctx->cm_id->route.addr.dst_addr); 977 } 978 979 if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) 980 ret = -EFAULT; 981 982 return ret; 983 } 984 985 static ssize_t ucma_query(struct ucma_file *file, 986 const char __user *inbuf, 987 int in_len, int out_len) 988 { 989 struct rdma_ucm_query cmd; 990 struct ucma_context *ctx; 991 void __user *response; 992 int ret; 993 994 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 995 return -EFAULT; 996 997 response = u64_to_user_ptr(cmd.response); 998 ctx = ucma_get_ctx(file, cmd.id); 999 if (IS_ERR(ctx)) 1000 return PTR_ERR(ctx); 1001 1002 mutex_lock(&ctx->mutex); 1003 switch (cmd.option) { 1004 case RDMA_USER_CM_QUERY_ADDR: 1005 ret = ucma_query_addr(ctx, response, out_len); 1006 break; 1007 case RDMA_USER_CM_QUERY_PATH: 1008 ret = ucma_query_path(ctx, response, out_len); 1009 break; 1010 case RDMA_USER_CM_QUERY_GID: 1011 ret = ucma_query_gid(ctx, response, out_len); 1012 break; 1013 default: 1014 ret = -ENOSYS; 1015 break; 1016 } 1017 mutex_unlock(&ctx->mutex); 1018 1019 ucma_put_ctx(ctx); 1020 return ret; 1021 } 1022 1023 static void ucma_copy_conn_param(struct rdma_cm_id *id, 1024 struct rdma_conn_param *dst, 1025 struct rdma_ucm_conn_param *src) 1026 { 1027 dst->private_data = src->private_data; 1028 dst->private_data_len = src->private_data_len; 1029 dst->responder_resources =src->responder_resources; 1030 dst->initiator_depth = src->initiator_depth; 1031 dst->flow_control = src->flow_control; 1032 dst->retry_count = src->retry_count; 1033 dst->rnr_retry_count = src->rnr_retry_count; 1034 dst->srq = src->srq; 1035 dst->qp_num = src->qp_num & 0xFFFFFF; 1036 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; 1037 } 1038 1039 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 1040 int in_len, int out_len) 1041 { 1042 struct rdma_conn_param conn_param; 1043 struct rdma_ucm_ece ece = {}; 1044 struct rdma_ucm_connect cmd; 1045 struct ucma_context *ctx; 1046 size_t in_size; 1047 int ret; 1048 1049 if (in_len < offsetofend(typeof(cmd), reserved)) 1050 return -EINVAL; 1051 in_size = min_t(size_t, in_len, sizeof(cmd)); 1052 if (copy_from_user(&cmd, inbuf, in_size)) 1053 return -EFAULT; 1054 1055 if (!cmd.conn_param.valid) 1056 return -EINVAL; 1057 1058 ctx = ucma_get_ctx_dev(file, cmd.id); 1059 if (IS_ERR(ctx)) 1060 return PTR_ERR(ctx); 1061 1062 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1063 if (offsetofend(typeof(cmd), ece) <= in_size) { 1064 ece.vendor_id = cmd.ece.vendor_id; 1065 ece.attr_mod = cmd.ece.attr_mod; 1066 } 1067 1068 mutex_lock(&ctx->mutex); 1069 ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece); 1070 mutex_unlock(&ctx->mutex); 1071 ucma_put_ctx(ctx); 1072 return ret; 1073 } 1074 1075 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, 1076 int in_len, int out_len) 1077 { 1078 struct rdma_ucm_listen cmd; 1079 struct ucma_context *ctx; 1080 int ret; 1081 1082 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1083 return -EFAULT; 1084 1085 ctx = ucma_get_ctx(file, cmd.id); 1086 if (IS_ERR(ctx)) 1087 return PTR_ERR(ctx); 1088 1089 if (cmd.backlog <= 0 || cmd.backlog > max_backlog) 1090 cmd.backlog = max_backlog; 1091 atomic_set(&ctx->backlog, cmd.backlog); 1092 1093 mutex_lock(&ctx->mutex); 1094 ret = rdma_listen(ctx->cm_id, cmd.backlog); 1095 mutex_unlock(&ctx->mutex); 1096 ucma_put_ctx(ctx); 1097 return ret; 1098 } 1099 1100 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, 1101 int in_len, int out_len) 1102 { 1103 struct rdma_ucm_accept cmd; 1104 struct rdma_conn_param conn_param; 1105 struct rdma_ucm_ece ece = {}; 1106 struct ucma_context *ctx; 1107 size_t in_size; 1108 int ret; 1109 1110 if (in_len < offsetofend(typeof(cmd), reserved)) 1111 return -EINVAL; 1112 in_size = min_t(size_t, in_len, sizeof(cmd)); 1113 if (copy_from_user(&cmd, inbuf, in_size)) 1114 return -EFAULT; 1115 1116 ctx = ucma_get_ctx_dev(file, cmd.id); 1117 if (IS_ERR(ctx)) 1118 return PTR_ERR(ctx); 1119 1120 if (offsetofend(typeof(cmd), ece) <= in_size) { 1121 ece.vendor_id = cmd.ece.vendor_id; 1122 ece.attr_mod = cmd.ece.attr_mod; 1123 } 1124 1125 if (cmd.conn_param.valid) { 1126 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1127 mutex_lock(&ctx->mutex); 1128 rdma_lock_handler(ctx->cm_id); 1129 ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece); 1130 if (!ret) { 1131 /* The uid must be set atomically with the handler */ 1132 ctx->uid = cmd.uid; 1133 } 1134 rdma_unlock_handler(ctx->cm_id); 1135 mutex_unlock(&ctx->mutex); 1136 } else { 1137 mutex_lock(&ctx->mutex); 1138 rdma_lock_handler(ctx->cm_id); 1139 ret = rdma_accept_ece(ctx->cm_id, NULL, &ece); 1140 rdma_unlock_handler(ctx->cm_id); 1141 mutex_unlock(&ctx->mutex); 1142 } 1143 ucma_put_ctx(ctx); 1144 return ret; 1145 } 1146 1147 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, 1148 int in_len, int out_len) 1149 { 1150 struct rdma_ucm_reject cmd; 1151 struct ucma_context *ctx; 1152 int ret; 1153 1154 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1155 return -EFAULT; 1156 1157 if (!cmd.reason) 1158 cmd.reason = IB_CM_REJ_CONSUMER_DEFINED; 1159 1160 switch (cmd.reason) { 1161 case IB_CM_REJ_CONSUMER_DEFINED: 1162 case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED: 1163 break; 1164 default: 1165 return -EINVAL; 1166 } 1167 1168 ctx = ucma_get_ctx_dev(file, cmd.id); 1169 if (IS_ERR(ctx)) 1170 return PTR_ERR(ctx); 1171 1172 mutex_lock(&ctx->mutex); 1173 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len, 1174 cmd.reason); 1175 mutex_unlock(&ctx->mutex); 1176 ucma_put_ctx(ctx); 1177 return ret; 1178 } 1179 1180 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, 1181 int in_len, int out_len) 1182 { 1183 struct rdma_ucm_disconnect cmd; 1184 struct ucma_context *ctx; 1185 int ret; 1186 1187 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1188 return -EFAULT; 1189 1190 ctx = ucma_get_ctx_dev(file, cmd.id); 1191 if (IS_ERR(ctx)) 1192 return PTR_ERR(ctx); 1193 1194 mutex_lock(&ctx->mutex); 1195 ret = rdma_disconnect(ctx->cm_id); 1196 mutex_unlock(&ctx->mutex); 1197 ucma_put_ctx(ctx); 1198 return ret; 1199 } 1200 1201 static ssize_t ucma_init_qp_attr(struct ucma_file *file, 1202 const char __user *inbuf, 1203 int in_len, int out_len) 1204 { 1205 struct rdma_ucm_init_qp_attr cmd; 1206 struct ib_uverbs_qp_attr resp; 1207 struct ucma_context *ctx; 1208 struct ib_qp_attr qp_attr; 1209 int ret; 1210 1211 if (out_len < sizeof(resp)) 1212 return -ENOSPC; 1213 1214 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1215 return -EFAULT; 1216 1217 if (cmd.qp_state > IB_QPS_ERR) 1218 return -EINVAL; 1219 1220 ctx = ucma_get_ctx_dev(file, cmd.id); 1221 if (IS_ERR(ctx)) 1222 return PTR_ERR(ctx); 1223 1224 resp.qp_attr_mask = 0; 1225 memset(&qp_attr, 0, sizeof qp_attr); 1226 qp_attr.qp_state = cmd.qp_state; 1227 mutex_lock(&ctx->mutex); 1228 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1229 mutex_unlock(&ctx->mutex); 1230 if (ret) 1231 goto out; 1232 1233 ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); 1234 if (copy_to_user(u64_to_user_ptr(cmd.response), 1235 &resp, sizeof(resp))) 1236 ret = -EFAULT; 1237 1238 out: 1239 ucma_put_ctx(ctx); 1240 return ret; 1241 } 1242 1243 static int ucma_set_option_id(struct ucma_context *ctx, int optname, 1244 void *optval, size_t optlen) 1245 { 1246 int ret = 0; 1247 1248 switch (optname) { 1249 case RDMA_OPTION_ID_TOS: 1250 if (optlen != sizeof(u8)) { 1251 ret = -EINVAL; 1252 break; 1253 } 1254 rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); 1255 break; 1256 case RDMA_OPTION_ID_REUSEADDR: 1257 if (optlen != sizeof(int)) { 1258 ret = -EINVAL; 1259 break; 1260 } 1261 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 1262 break; 1263 case RDMA_OPTION_ID_AFONLY: 1264 if (optlen != sizeof(int)) { 1265 ret = -EINVAL; 1266 break; 1267 } 1268 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); 1269 break; 1270 case RDMA_OPTION_ID_ACK_TIMEOUT: 1271 if (optlen != sizeof(u8)) { 1272 ret = -EINVAL; 1273 break; 1274 } 1275 ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); 1276 break; 1277 default: 1278 ret = -ENOSYS; 1279 } 1280 1281 return ret; 1282 } 1283 1284 static int ucma_set_ib_path(struct ucma_context *ctx, 1285 struct ib_path_rec_data *path_data, size_t optlen) 1286 { 1287 struct sa_path_rec sa_path; 1288 struct rdma_cm_event event; 1289 int ret; 1290 1291 if (optlen % sizeof(*path_data)) 1292 return -EINVAL; 1293 1294 for (; optlen; optlen -= sizeof(*path_data), path_data++) { 1295 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | 1296 IB_PATH_BIDIRECTIONAL)) 1297 break; 1298 } 1299 1300 if (!optlen) 1301 return -EINVAL; 1302 1303 if (!ctx->cm_id->device) 1304 return -EINVAL; 1305 1306 memset(&sa_path, 0, sizeof(sa_path)); 1307 1308 sa_path.rec_type = SA_PATH_REC_TYPE_IB; 1309 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1310 1311 if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { 1312 struct sa_path_rec opa; 1313 1314 sa_convert_path_ib_to_opa(&opa, &sa_path); 1315 mutex_lock(&ctx->mutex); 1316 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1317 mutex_unlock(&ctx->mutex); 1318 } else { 1319 mutex_lock(&ctx->mutex); 1320 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1321 mutex_unlock(&ctx->mutex); 1322 } 1323 if (ret) 1324 return ret; 1325 1326 memset(&event, 0, sizeof event); 1327 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 1328 return ucma_event_handler(ctx->cm_id, &event); 1329 } 1330 1331 static int ucma_set_option_ib(struct ucma_context *ctx, int optname, 1332 void *optval, size_t optlen) 1333 { 1334 int ret; 1335 1336 switch (optname) { 1337 case RDMA_OPTION_IB_PATH: 1338 ret = ucma_set_ib_path(ctx, optval, optlen); 1339 break; 1340 default: 1341 ret = -ENOSYS; 1342 } 1343 1344 return ret; 1345 } 1346 1347 static int ucma_set_option_level(struct ucma_context *ctx, int level, 1348 int optname, void *optval, size_t optlen) 1349 { 1350 int ret; 1351 1352 switch (level) { 1353 case RDMA_OPTION_ID: 1354 mutex_lock(&ctx->mutex); 1355 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1356 mutex_unlock(&ctx->mutex); 1357 break; 1358 case RDMA_OPTION_IB: 1359 ret = ucma_set_option_ib(ctx, optname, optval, optlen); 1360 break; 1361 default: 1362 ret = -ENOSYS; 1363 } 1364 1365 return ret; 1366 } 1367 1368 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, 1369 int in_len, int out_len) 1370 { 1371 struct rdma_ucm_set_option cmd; 1372 struct ucma_context *ctx; 1373 void *optval; 1374 int ret; 1375 1376 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1377 return -EFAULT; 1378 1379 if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) 1380 return -EINVAL; 1381 1382 ctx = ucma_get_ctx(file, cmd.id); 1383 if (IS_ERR(ctx)) 1384 return PTR_ERR(ctx); 1385 1386 optval = memdup_user(u64_to_user_ptr(cmd.optval), 1387 cmd.optlen); 1388 if (IS_ERR(optval)) { 1389 ret = PTR_ERR(optval); 1390 goto out; 1391 } 1392 1393 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1394 cmd.optlen); 1395 kfree(optval); 1396 1397 out: 1398 ucma_put_ctx(ctx); 1399 return ret; 1400 } 1401 1402 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 1403 int in_len, int out_len) 1404 { 1405 struct rdma_ucm_notify cmd; 1406 struct ucma_context *ctx; 1407 int ret = -EINVAL; 1408 1409 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1410 return -EFAULT; 1411 1412 ctx = ucma_get_ctx(file, cmd.id); 1413 if (IS_ERR(ctx)) 1414 return PTR_ERR(ctx); 1415 1416 mutex_lock(&ctx->mutex); 1417 if (ctx->cm_id->device) 1418 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1419 mutex_unlock(&ctx->mutex); 1420 1421 ucma_put_ctx(ctx); 1422 return ret; 1423 } 1424 1425 static ssize_t ucma_process_join(struct ucma_file *file, 1426 struct rdma_ucm_join_mcast *cmd, int out_len) 1427 { 1428 struct rdma_ucm_create_id_resp resp; 1429 struct ucma_context *ctx; 1430 struct ucma_multicast *mc; 1431 struct sockaddr *addr; 1432 int ret; 1433 u8 join_state; 1434 1435 if (out_len < sizeof(resp)) 1436 return -ENOSPC; 1437 1438 addr = (struct sockaddr *) &cmd->addr; 1439 if (cmd->addr_size != rdma_addr_size(addr)) 1440 return -EINVAL; 1441 1442 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1443 join_state = BIT(FULLMEMBER_JOIN); 1444 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1445 join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1446 else 1447 return -EINVAL; 1448 1449 ctx = ucma_get_ctx_dev(file, cmd->id); 1450 if (IS_ERR(ctx)) 1451 return PTR_ERR(ctx); 1452 1453 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 1454 if (!mc) { 1455 ret = -ENOMEM; 1456 goto err_put_ctx; 1457 } 1458 1459 mc->ctx = ctx; 1460 mc->join_state = join_state; 1461 mc->uid = cmd->uid; 1462 memcpy(&mc->addr, addr, cmd->addr_size); 1463 1464 if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, 1465 GFP_KERNEL)) { 1466 ret = -ENOMEM; 1467 goto err_free_mc; 1468 } 1469 1470 mutex_lock(&ctx->mutex); 1471 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1472 join_state, mc); 1473 mutex_unlock(&ctx->mutex); 1474 if (ret) 1475 goto err_xa_erase; 1476 1477 resp.id = mc->id; 1478 if (copy_to_user(u64_to_user_ptr(cmd->response), 1479 &resp, sizeof(resp))) { 1480 ret = -EFAULT; 1481 goto err_leave_multicast; 1482 } 1483 1484 xa_store(&multicast_table, mc->id, mc, 0); 1485 1486 ucma_put_ctx(ctx); 1487 return 0; 1488 1489 err_leave_multicast: 1490 mutex_lock(&ctx->mutex); 1491 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); 1492 mutex_unlock(&ctx->mutex); 1493 ucma_cleanup_mc_events(mc); 1494 err_xa_erase: 1495 xa_erase(&multicast_table, mc->id); 1496 err_free_mc: 1497 kfree(mc); 1498 err_put_ctx: 1499 ucma_put_ctx(ctx); 1500 return ret; 1501 } 1502 1503 static ssize_t ucma_join_ip_multicast(struct ucma_file *file, 1504 const char __user *inbuf, 1505 int in_len, int out_len) 1506 { 1507 struct rdma_ucm_join_ip_mcast cmd; 1508 struct rdma_ucm_join_mcast join_cmd; 1509 1510 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1511 return -EFAULT; 1512 1513 join_cmd.response = cmd.response; 1514 join_cmd.uid = cmd.uid; 1515 join_cmd.id = cmd.id; 1516 join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); 1517 if (!join_cmd.addr_size) 1518 return -EINVAL; 1519 1520 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1521 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1522 1523 return ucma_process_join(file, &join_cmd, out_len); 1524 } 1525 1526 static ssize_t ucma_join_multicast(struct ucma_file *file, 1527 const char __user *inbuf, 1528 int in_len, int out_len) 1529 { 1530 struct rdma_ucm_join_mcast cmd; 1531 1532 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1533 return -EFAULT; 1534 1535 if (!rdma_addr_size_kss(&cmd.addr)) 1536 return -EINVAL; 1537 1538 return ucma_process_join(file, &cmd, out_len); 1539 } 1540 1541 static ssize_t ucma_leave_multicast(struct ucma_file *file, 1542 const char __user *inbuf, 1543 int in_len, int out_len) 1544 { 1545 struct rdma_ucm_destroy_id cmd; 1546 struct rdma_ucm_destroy_id_resp resp; 1547 struct ucma_multicast *mc; 1548 int ret = 0; 1549 1550 if (out_len < sizeof(resp)) 1551 return -ENOSPC; 1552 1553 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1554 return -EFAULT; 1555 1556 xa_lock(&multicast_table); 1557 mc = xa_load(&multicast_table, cmd.id); 1558 if (!mc) 1559 mc = ERR_PTR(-ENOENT); 1560 else if (READ_ONCE(mc->ctx->file) != file) 1561 mc = ERR_PTR(-EINVAL); 1562 else if (!refcount_inc_not_zero(&mc->ctx->ref)) 1563 mc = ERR_PTR(-ENXIO); 1564 else 1565 __xa_erase(&multicast_table, mc->id); 1566 xa_unlock(&multicast_table); 1567 1568 if (IS_ERR(mc)) { 1569 ret = PTR_ERR(mc); 1570 goto out; 1571 } 1572 1573 mutex_lock(&mc->ctx->mutex); 1574 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1575 mutex_unlock(&mc->ctx->mutex); 1576 1577 ucma_cleanup_mc_events(mc); 1578 1579 ucma_put_ctx(mc->ctx); 1580 resp.events_reported = mc->events_reported; 1581 kfree(mc); 1582 1583 if (copy_to_user(u64_to_user_ptr(cmd.response), 1584 &resp, sizeof(resp))) 1585 ret = -EFAULT; 1586 out: 1587 return ret; 1588 } 1589 1590 static ssize_t ucma_migrate_id(struct ucma_file *new_file, 1591 const char __user *inbuf, 1592 int in_len, int out_len) 1593 { 1594 struct rdma_ucm_migrate_id cmd; 1595 struct rdma_ucm_migrate_resp resp; 1596 struct ucma_event *uevent, *tmp; 1597 struct ucma_context *ctx; 1598 LIST_HEAD(event_list); 1599 struct fd f; 1600 struct ucma_file *cur_file; 1601 int ret = 0; 1602 1603 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1604 return -EFAULT; 1605 1606 /* Get current fd to protect against it being closed */ 1607 f = fdget(cmd.fd); 1608 if (!f.file) 1609 return -ENOENT; 1610 if (f.file->f_op != &ucma_fops) { 1611 ret = -EINVAL; 1612 goto file_put; 1613 } 1614 cur_file = f.file->private_data; 1615 1616 /* Validate current fd and prevent destruction of id. */ 1617 ctx = ucma_get_ctx(cur_file, cmd.id); 1618 if (IS_ERR(ctx)) { 1619 ret = PTR_ERR(ctx); 1620 goto file_put; 1621 } 1622 1623 rdma_lock_handler(ctx->cm_id); 1624 /* 1625 * ctx->file can only be changed under the handler & xa_lock. xa_load() 1626 * must be checked again to ensure the ctx hasn't begun destruction 1627 * since the ucma_get_ctx(). 1628 */ 1629 xa_lock(&ctx_table); 1630 if (_ucma_find_context(cmd.id, cur_file) != ctx) { 1631 xa_unlock(&ctx_table); 1632 ret = -ENOENT; 1633 goto err_unlock; 1634 } 1635 ctx->file = new_file; 1636 xa_unlock(&ctx_table); 1637 1638 mutex_lock(&cur_file->mut); 1639 list_del(&ctx->list); 1640 /* 1641 * At this point lock_handler() prevents addition of new uevents for 1642 * this ctx. 1643 */ 1644 list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list) 1645 if (uevent->ctx == ctx) 1646 list_move_tail(&uevent->list, &event_list); 1647 resp.events_reported = ctx->events_reported; 1648 mutex_unlock(&cur_file->mut); 1649 1650 mutex_lock(&new_file->mut); 1651 list_add_tail(&ctx->list, &new_file->ctx_list); 1652 list_splice_tail(&event_list, &new_file->event_list); 1653 mutex_unlock(&new_file->mut); 1654 1655 if (copy_to_user(u64_to_user_ptr(cmd.response), 1656 &resp, sizeof(resp))) 1657 ret = -EFAULT; 1658 1659 err_unlock: 1660 rdma_unlock_handler(ctx->cm_id); 1661 ucma_put_ctx(ctx); 1662 file_put: 1663 fdput(f); 1664 return ret; 1665 } 1666 1667 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1668 const char __user *inbuf, 1669 int in_len, int out_len) = { 1670 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1671 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1672 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, 1673 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, 1674 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, 1675 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1676 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1677 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1678 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1679 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1680 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1681 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1682 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1683 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1684 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1685 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1686 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, 1687 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1688 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, 1689 [RDMA_USER_CM_CMD_QUERY] = ucma_query, 1690 [RDMA_USER_CM_CMD_BIND] = ucma_bind, 1691 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1692 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast 1693 }; 1694 1695 static ssize_t ucma_write(struct file *filp, const char __user *buf, 1696 size_t len, loff_t *pos) 1697 { 1698 struct ucma_file *file = filp->private_data; 1699 struct rdma_ucm_cmd_hdr hdr; 1700 ssize_t ret; 1701 1702 if (!ib_safe_file_access(filp)) { 1703 pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 1704 task_tgid_vnr(current), current->comm); 1705 return -EACCES; 1706 } 1707 1708 if (len < sizeof(hdr)) 1709 return -EINVAL; 1710 1711 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1712 return -EFAULT; 1713 1714 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1715 return -EINVAL; 1716 hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); 1717 1718 if (hdr.in + sizeof(hdr) > len) 1719 return -EINVAL; 1720 1721 if (!ucma_cmd_table[hdr.cmd]) 1722 return -ENOSYS; 1723 1724 ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); 1725 if (!ret) 1726 ret = len; 1727 1728 return ret; 1729 } 1730 1731 static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) 1732 { 1733 struct ucma_file *file = filp->private_data; 1734 __poll_t mask = 0; 1735 1736 poll_wait(filp, &file->poll_wait, wait); 1737 1738 if (!list_empty(&file->event_list)) 1739 mask = EPOLLIN | EPOLLRDNORM; 1740 1741 return mask; 1742 } 1743 1744 /* 1745 * ucma_open() does not need the BKL: 1746 * 1747 * - no global state is referred to; 1748 * - there is no ioctl method to race against; 1749 * - no further module initialization is required for open to work 1750 * after the device is registered. 1751 */ 1752 static int ucma_open(struct inode *inode, struct file *filp) 1753 { 1754 struct ucma_file *file; 1755 1756 file = kmalloc(sizeof *file, GFP_KERNEL); 1757 if (!file) 1758 return -ENOMEM; 1759 1760 INIT_LIST_HEAD(&file->event_list); 1761 INIT_LIST_HEAD(&file->ctx_list); 1762 init_waitqueue_head(&file->poll_wait); 1763 mutex_init(&file->mut); 1764 1765 filp->private_data = file; 1766 file->filp = filp; 1767 1768 return stream_open(inode, filp); 1769 } 1770 1771 static int ucma_close(struct inode *inode, struct file *filp) 1772 { 1773 struct ucma_file *file = filp->private_data; 1774 1775 /* 1776 * All paths that touch ctx_list or ctx_list starting from write() are 1777 * prevented by this being a FD release function. The list_add_tail() in 1778 * ucma_connect_event_handler() can run concurrently, however it only 1779 * adds to the list *after* a listening ID. By only reading the first of 1780 * the list, and relying on __destroy_id() to block 1781 * ucma_connect_event_handler(), no additional locking is needed. 1782 */ 1783 while (!list_empty(&file->ctx_list)) { 1784 struct ucma_context *ctx = list_first_entry( 1785 &file->ctx_list, struct ucma_context, list); 1786 1787 xa_erase(&ctx_table, ctx->id); 1788 __destroy_id(ctx); 1789 } 1790 kfree(file); 1791 return 0; 1792 } 1793 1794 static const struct file_operations ucma_fops = { 1795 .owner = THIS_MODULE, 1796 .open = ucma_open, 1797 .release = ucma_close, 1798 .write = ucma_write, 1799 .poll = ucma_poll, 1800 .llseek = no_llseek, 1801 }; 1802 1803 static struct miscdevice ucma_misc = { 1804 .minor = MISC_DYNAMIC_MINOR, 1805 .name = "rdma_cm", 1806 .nodename = "infiniband/rdma_cm", 1807 .mode = 0666, 1808 .fops = &ucma_fops, 1809 }; 1810 1811 static int ucma_get_global_nl_info(struct ib_client_nl_info *res) 1812 { 1813 res->abi = RDMA_USER_CM_ABI_VERSION; 1814 res->cdev = ucma_misc.this_device; 1815 return 0; 1816 } 1817 1818 static struct ib_client rdma_cma_client = { 1819 .name = "rdma_cm", 1820 .get_global_nl_info = ucma_get_global_nl_info, 1821 }; 1822 MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); 1823 1824 static ssize_t show_abi_version(struct device *dev, 1825 struct device_attribute *attr, 1826 char *buf) 1827 { 1828 return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1829 } 1830 static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 1831 1832 static int __init ucma_init(void) 1833 { 1834 int ret; 1835 1836 ret = misc_register(&ucma_misc); 1837 if (ret) 1838 return ret; 1839 1840 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1841 if (ret) { 1842 pr_err("rdma_ucm: couldn't create abi_version attr\n"); 1843 goto err1; 1844 } 1845 1846 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1847 if (!ucma_ctl_table_hdr) { 1848 pr_err("rdma_ucm: couldn't register sysctl paths\n"); 1849 ret = -ENOMEM; 1850 goto err2; 1851 } 1852 1853 ret = ib_register_client(&rdma_cma_client); 1854 if (ret) 1855 goto err3; 1856 1857 return 0; 1858 err3: 1859 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1860 err2: 1861 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1862 err1: 1863 misc_deregister(&ucma_misc); 1864 return ret; 1865 } 1866 1867 static void __exit ucma_cleanup(void) 1868 { 1869 ib_unregister_client(&rdma_cma_client); 1870 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1871 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1872 misc_deregister(&ucma_misc); 1873 } 1874 1875 module_init(ucma_init); 1876 module_exit(ucma_cleanup); 1877