1 /* 2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/file.h> 35 #include <linux/mutex.h> 36 #include <linux/poll.h> 37 #include <linux/sched.h> 38 #include <linux/idr.h> 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/miscdevice.h> 42 #include <linux/slab.h> 43 #include <linux/sysctl.h> 44 #include <linux/module.h> 45 #include <linux/nsproxy.h> 46 47 #include <linux/nospec.h> 48 49 #include <rdma/rdma_user_cm.h> 50 #include <rdma/ib_marshall.h> 51 #include <rdma/rdma_cm.h> 52 #include <rdma/rdma_cm_ib.h> 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib.h> 55 56 MODULE_AUTHOR("Sean Hefty"); 57 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 58 MODULE_LICENSE("Dual BSD/GPL"); 59 60 static unsigned int max_backlog = 1024; 61 62 static struct ctl_table_header *ucma_ctl_table_hdr; 63 static struct ctl_table ucma_ctl_table[] = { 64 { 65 .procname = "max_backlog", 66 .data = &max_backlog, 67 .maxlen = sizeof max_backlog, 68 .mode = 0644, 69 .proc_handler = proc_dointvec, 70 }, 71 { } 72 }; 73 74 struct ucma_file { 75 struct mutex mut; 76 struct file *filp; 77 struct list_head ctx_list; 78 struct list_head event_list; 79 wait_queue_head_t poll_wait; 80 struct workqueue_struct *close_wq; 81 }; 82 83 struct ucma_context { 84 int id; 85 struct completion comp; 86 atomic_t ref; 87 int events_reported; 88 int backlog; 89 90 struct ucma_file *file; 91 struct rdma_cm_id *cm_id; 92 u64 uid; 93 94 struct list_head list; 95 struct list_head mc_list; 96 /* mark that device is in process of destroying the internal HW 97 * resources, protected by the global mut 98 */ 99 int closing; 100 /* sync between removal event and id destroy, protected by file mut */ 101 int destroying; 102 struct work_struct close_work; 103 }; 104 105 struct ucma_multicast { 106 struct ucma_context *ctx; 107 int id; 108 int events_reported; 109 110 u64 uid; 111 u8 join_state; 112 struct list_head list; 113 struct sockaddr_storage addr; 114 }; 115 116 struct ucma_event { 117 struct ucma_context *ctx; 118 struct ucma_multicast *mc; 119 struct list_head list; 120 struct rdma_cm_id *cm_id; 121 struct rdma_ucm_event_resp resp; 122 struct work_struct close_work; 123 }; 124 125 static DEFINE_MUTEX(mut); 126 static DEFINE_IDR(ctx_idr); 127 static DEFINE_IDR(multicast_idr); 128 129 static const struct file_operations ucma_fops; 130 131 static inline struct ucma_context *_ucma_find_context(int id, 132 struct ucma_file *file) 133 { 134 struct ucma_context *ctx; 135 136 ctx = idr_find(&ctx_idr, id); 137 if (!ctx) 138 ctx = ERR_PTR(-ENOENT); 139 else if (ctx->file != file || !ctx->cm_id) 140 ctx = ERR_PTR(-EINVAL); 141 return ctx; 142 } 143 144 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) 145 { 146 struct ucma_context *ctx; 147 148 mutex_lock(&mut); 149 ctx = _ucma_find_context(id, file); 150 if (!IS_ERR(ctx)) { 151 if (ctx->closing) 152 ctx = ERR_PTR(-EIO); 153 else 154 atomic_inc(&ctx->ref); 155 } 156 mutex_unlock(&mut); 157 return ctx; 158 } 159 160 static void ucma_put_ctx(struct ucma_context *ctx) 161 { 162 if (atomic_dec_and_test(&ctx->ref)) 163 complete(&ctx->comp); 164 } 165 166 /* 167 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the 168 * CM_ID is bound. 169 */ 170 static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) 171 { 172 struct ucma_context *ctx = ucma_get_ctx(file, id); 173 174 if (IS_ERR(ctx)) 175 return ctx; 176 if (!ctx->cm_id->device) { 177 ucma_put_ctx(ctx); 178 return ERR_PTR(-EINVAL); 179 } 180 return ctx; 181 } 182 183 static void ucma_close_event_id(struct work_struct *work) 184 { 185 struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); 186 187 rdma_destroy_id(uevent_close->cm_id); 188 kfree(uevent_close); 189 } 190 191 static void ucma_close_id(struct work_struct *work) 192 { 193 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); 194 195 /* once all inflight tasks are finished, we close all underlying 196 * resources. The context is still alive till its explicit destryoing 197 * by its creator. 198 */ 199 ucma_put_ctx(ctx); 200 wait_for_completion(&ctx->comp); 201 /* No new events will be generated after destroying the id. */ 202 rdma_destroy_id(ctx->cm_id); 203 } 204 205 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) 206 { 207 struct ucma_context *ctx; 208 209 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 210 if (!ctx) 211 return NULL; 212 213 INIT_WORK(&ctx->close_work, ucma_close_id); 214 atomic_set(&ctx->ref, 1); 215 init_completion(&ctx->comp); 216 INIT_LIST_HEAD(&ctx->mc_list); 217 ctx->file = file; 218 219 mutex_lock(&mut); 220 ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); 221 mutex_unlock(&mut); 222 if (ctx->id < 0) 223 goto error; 224 225 list_add_tail(&ctx->list, &file->ctx_list); 226 return ctx; 227 228 error: 229 kfree(ctx); 230 return NULL; 231 } 232 233 static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) 234 { 235 struct ucma_multicast *mc; 236 237 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 238 if (!mc) 239 return NULL; 240 241 mutex_lock(&mut); 242 mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL); 243 mutex_unlock(&mut); 244 if (mc->id < 0) 245 goto error; 246 247 mc->ctx = ctx; 248 list_add_tail(&mc->list, &ctx->mc_list); 249 return mc; 250 251 error: 252 kfree(mc); 253 return NULL; 254 } 255 256 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 257 struct rdma_conn_param *src) 258 { 259 if (src->private_data_len) 260 memcpy(dst->private_data, src->private_data, 261 src->private_data_len); 262 dst->private_data_len = src->private_data_len; 263 dst->responder_resources =src->responder_resources; 264 dst->initiator_depth = src->initiator_depth; 265 dst->flow_control = src->flow_control; 266 dst->retry_count = src->retry_count; 267 dst->rnr_retry_count = src->rnr_retry_count; 268 dst->srq = src->srq; 269 dst->qp_num = src->qp_num; 270 } 271 272 static void ucma_copy_ud_event(struct ib_device *device, 273 struct rdma_ucm_ud_param *dst, 274 struct rdma_ud_param *src) 275 { 276 if (src->private_data_len) 277 memcpy(dst->private_data, src->private_data, 278 src->private_data_len); 279 dst->private_data_len = src->private_data_len; 280 ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); 281 dst->qp_num = src->qp_num; 282 dst->qkey = src->qkey; 283 } 284 285 static void ucma_set_event_context(struct ucma_context *ctx, 286 struct rdma_cm_event *event, 287 struct ucma_event *uevent) 288 { 289 uevent->ctx = ctx; 290 switch (event->event) { 291 case RDMA_CM_EVENT_MULTICAST_JOIN: 292 case RDMA_CM_EVENT_MULTICAST_ERROR: 293 uevent->mc = (struct ucma_multicast *) 294 event->param.ud.private_data; 295 uevent->resp.uid = uevent->mc->uid; 296 uevent->resp.id = uevent->mc->id; 297 break; 298 default: 299 uevent->resp.uid = ctx->uid; 300 uevent->resp.id = ctx->id; 301 break; 302 } 303 } 304 305 /* Called with file->mut locked for the relevant context. */ 306 static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) 307 { 308 struct ucma_context *ctx = cm_id->context; 309 struct ucma_event *con_req_eve; 310 int event_found = 0; 311 312 if (ctx->destroying) 313 return; 314 315 /* only if context is pointing to cm_id that it owns it and can be 316 * queued to be closed, otherwise that cm_id is an inflight one that 317 * is part of that context event list pending to be detached and 318 * reattached to its new context as part of ucma_get_event, 319 * handled separately below. 320 */ 321 if (ctx->cm_id == cm_id) { 322 mutex_lock(&mut); 323 ctx->closing = 1; 324 mutex_unlock(&mut); 325 queue_work(ctx->file->close_wq, &ctx->close_work); 326 return; 327 } 328 329 list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { 330 if (con_req_eve->cm_id == cm_id && 331 con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { 332 list_del(&con_req_eve->list); 333 INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); 334 queue_work(ctx->file->close_wq, &con_req_eve->close_work); 335 event_found = 1; 336 break; 337 } 338 } 339 if (!event_found) 340 pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); 341 } 342 343 static int ucma_event_handler(struct rdma_cm_id *cm_id, 344 struct rdma_cm_event *event) 345 { 346 struct ucma_event *uevent; 347 struct ucma_context *ctx = cm_id->context; 348 int ret = 0; 349 350 uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); 351 if (!uevent) 352 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; 353 354 mutex_lock(&ctx->file->mut); 355 uevent->cm_id = cm_id; 356 ucma_set_event_context(ctx, event, uevent); 357 uevent->resp.event = event->event; 358 uevent->resp.status = event->status; 359 if (cm_id->qp_type == IB_QPT_UD) 360 ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud, 361 &event->param.ud); 362 else 363 ucma_copy_conn_event(&uevent->resp.param.conn, 364 &event->param.conn); 365 366 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { 367 if (!ctx->backlog) { 368 ret = -ENOMEM; 369 kfree(uevent); 370 goto out; 371 } 372 ctx->backlog--; 373 } else if (!ctx->uid || ctx->cm_id != cm_id) { 374 /* 375 * We ignore events for new connections until userspace has set 376 * their context. This can only happen if an error occurs on a 377 * new connection before the user accepts it. This is okay, 378 * since the accept will just fail later. However, we do need 379 * to release the underlying HW resources in case of a device 380 * removal event. 381 */ 382 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 383 ucma_removal_event_handler(cm_id); 384 385 kfree(uevent); 386 goto out; 387 } 388 389 list_add_tail(&uevent->list, &ctx->file->event_list); 390 wake_up_interruptible(&ctx->file->poll_wait); 391 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 392 ucma_removal_event_handler(cm_id); 393 out: 394 mutex_unlock(&ctx->file->mut); 395 return ret; 396 } 397 398 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, 399 int in_len, int out_len) 400 { 401 struct ucma_context *ctx; 402 struct rdma_ucm_get_event cmd; 403 struct ucma_event *uevent; 404 int ret = 0; 405 406 /* 407 * Old 32 bit user space does not send the 4 byte padding in the 408 * reserved field. We don't care, allow it to keep working. 409 */ 410 if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved)) 411 return -ENOSPC; 412 413 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 414 return -EFAULT; 415 416 mutex_lock(&file->mut); 417 while (list_empty(&file->event_list)) { 418 mutex_unlock(&file->mut); 419 420 if (file->filp->f_flags & O_NONBLOCK) 421 return -EAGAIN; 422 423 if (wait_event_interruptible(file->poll_wait, 424 !list_empty(&file->event_list))) 425 return -ERESTARTSYS; 426 427 mutex_lock(&file->mut); 428 } 429 430 uevent = list_entry(file->event_list.next, struct ucma_event, list); 431 432 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { 433 ctx = ucma_alloc_ctx(file); 434 if (!ctx) { 435 ret = -ENOMEM; 436 goto done; 437 } 438 uevent->ctx->backlog++; 439 ctx->cm_id = uevent->cm_id; 440 ctx->cm_id->context = ctx; 441 uevent->resp.id = ctx->id; 442 } 443 444 if (copy_to_user(u64_to_user_ptr(cmd.response), 445 &uevent->resp, 446 min_t(size_t, out_len, sizeof(uevent->resp)))) { 447 ret = -EFAULT; 448 goto done; 449 } 450 451 list_del(&uevent->list); 452 uevent->ctx->events_reported++; 453 if (uevent->mc) 454 uevent->mc->events_reported++; 455 kfree(uevent); 456 done: 457 mutex_unlock(&file->mut); 458 return ret; 459 } 460 461 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) 462 { 463 switch (cmd->ps) { 464 case RDMA_PS_TCP: 465 *qp_type = IB_QPT_RC; 466 return 0; 467 case RDMA_PS_UDP: 468 case RDMA_PS_IPOIB: 469 *qp_type = IB_QPT_UD; 470 return 0; 471 case RDMA_PS_IB: 472 *qp_type = cmd->qp_type; 473 return 0; 474 default: 475 return -EINVAL; 476 } 477 } 478 479 static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, 480 int in_len, int out_len) 481 { 482 struct rdma_ucm_create_id cmd; 483 struct rdma_ucm_create_id_resp resp; 484 struct ucma_context *ctx; 485 struct rdma_cm_id *cm_id; 486 enum ib_qp_type qp_type; 487 int ret; 488 489 if (out_len < sizeof(resp)) 490 return -ENOSPC; 491 492 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 493 return -EFAULT; 494 495 ret = ucma_get_qp_type(&cmd, &qp_type); 496 if (ret) 497 return ret; 498 499 mutex_lock(&file->mut); 500 ctx = ucma_alloc_ctx(file); 501 mutex_unlock(&file->mut); 502 if (!ctx) 503 return -ENOMEM; 504 505 ctx->uid = cmd.uid; 506 cm_id = __rdma_create_id(current->nsproxy->net_ns, 507 ucma_event_handler, ctx, cmd.ps, qp_type, NULL); 508 if (IS_ERR(cm_id)) { 509 ret = PTR_ERR(cm_id); 510 goto err1; 511 } 512 513 resp.id = ctx->id; 514 if (copy_to_user(u64_to_user_ptr(cmd.response), 515 &resp, sizeof(resp))) { 516 ret = -EFAULT; 517 goto err2; 518 } 519 520 ctx->cm_id = cm_id; 521 return 0; 522 523 err2: 524 rdma_destroy_id(cm_id); 525 err1: 526 mutex_lock(&mut); 527 idr_remove(&ctx_idr, ctx->id); 528 mutex_unlock(&mut); 529 mutex_lock(&file->mut); 530 list_del(&ctx->list); 531 mutex_unlock(&file->mut); 532 kfree(ctx); 533 return ret; 534 } 535 536 static void ucma_cleanup_multicast(struct ucma_context *ctx) 537 { 538 struct ucma_multicast *mc, *tmp; 539 540 mutex_lock(&mut); 541 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 542 list_del(&mc->list); 543 idr_remove(&multicast_idr, mc->id); 544 kfree(mc); 545 } 546 mutex_unlock(&mut); 547 } 548 549 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 550 { 551 struct ucma_event *uevent, *tmp; 552 553 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 554 if (uevent->mc != mc) 555 continue; 556 557 list_del(&uevent->list); 558 kfree(uevent); 559 } 560 } 561 562 /* 563 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At 564 * this point, no new events will be reported from the hardware. However, we 565 * still need to cleanup the UCMA context for this ID. Specifically, there 566 * might be events that have not yet been consumed by the user space software. 567 * These might include pending connect requests which we have not completed 568 * processing. We cannot call rdma_destroy_id while holding the lock of the 569 * context (file->mut), as it might cause a deadlock. We therefore extract all 570 * relevant events from the context pending events list while holding the 571 * mutex. After that we release them as needed. 572 */ 573 static int ucma_free_ctx(struct ucma_context *ctx) 574 { 575 int events_reported; 576 struct ucma_event *uevent, *tmp; 577 LIST_HEAD(list); 578 579 580 ucma_cleanup_multicast(ctx); 581 582 /* Cleanup events not yet reported to the user. */ 583 mutex_lock(&ctx->file->mut); 584 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 585 if (uevent->ctx == ctx) 586 list_move_tail(&uevent->list, &list); 587 } 588 list_del(&ctx->list); 589 mutex_unlock(&ctx->file->mut); 590 591 list_for_each_entry_safe(uevent, tmp, &list, list) { 592 list_del(&uevent->list); 593 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) 594 rdma_destroy_id(uevent->cm_id); 595 kfree(uevent); 596 } 597 598 events_reported = ctx->events_reported; 599 kfree(ctx); 600 return events_reported; 601 } 602 603 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, 604 int in_len, int out_len) 605 { 606 struct rdma_ucm_destroy_id cmd; 607 struct rdma_ucm_destroy_id_resp resp; 608 struct ucma_context *ctx; 609 int ret = 0; 610 611 if (out_len < sizeof(resp)) 612 return -ENOSPC; 613 614 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 615 return -EFAULT; 616 617 mutex_lock(&mut); 618 ctx = _ucma_find_context(cmd.id, file); 619 if (!IS_ERR(ctx)) 620 idr_remove(&ctx_idr, ctx->id); 621 mutex_unlock(&mut); 622 623 if (IS_ERR(ctx)) 624 return PTR_ERR(ctx); 625 626 mutex_lock(&ctx->file->mut); 627 ctx->destroying = 1; 628 mutex_unlock(&ctx->file->mut); 629 630 flush_workqueue(ctx->file->close_wq); 631 /* At this point it's guaranteed that there is no inflight 632 * closing task */ 633 mutex_lock(&mut); 634 if (!ctx->closing) { 635 mutex_unlock(&mut); 636 ucma_put_ctx(ctx); 637 wait_for_completion(&ctx->comp); 638 rdma_destroy_id(ctx->cm_id); 639 } else { 640 mutex_unlock(&mut); 641 } 642 643 resp.events_reported = ucma_free_ctx(ctx); 644 if (copy_to_user(u64_to_user_ptr(cmd.response), 645 &resp, sizeof(resp))) 646 ret = -EFAULT; 647 648 return ret; 649 } 650 651 static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, 652 int in_len, int out_len) 653 { 654 struct rdma_ucm_bind_ip cmd; 655 struct ucma_context *ctx; 656 int ret; 657 658 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 659 return -EFAULT; 660 661 if (!rdma_addr_size_in6(&cmd.addr)) 662 return -EINVAL; 663 664 ctx = ucma_get_ctx(file, cmd.id); 665 if (IS_ERR(ctx)) 666 return PTR_ERR(ctx); 667 668 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 669 ucma_put_ctx(ctx); 670 return ret; 671 } 672 673 static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, 674 int in_len, int out_len) 675 { 676 struct rdma_ucm_bind cmd; 677 struct ucma_context *ctx; 678 int ret; 679 680 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 681 return -EFAULT; 682 683 if (cmd.reserved || !cmd.addr_size || 684 cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) 685 return -EINVAL; 686 687 ctx = ucma_get_ctx(file, cmd.id); 688 if (IS_ERR(ctx)) 689 return PTR_ERR(ctx); 690 691 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 692 ucma_put_ctx(ctx); 693 return ret; 694 } 695 696 static ssize_t ucma_resolve_ip(struct ucma_file *file, 697 const char __user *inbuf, 698 int in_len, int out_len) 699 { 700 struct rdma_ucm_resolve_ip cmd; 701 struct ucma_context *ctx; 702 int ret; 703 704 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 705 return -EFAULT; 706 707 if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || 708 !rdma_addr_size_in6(&cmd.dst_addr)) 709 return -EINVAL; 710 711 ctx = ucma_get_ctx(file, cmd.id); 712 if (IS_ERR(ctx)) 713 return PTR_ERR(ctx); 714 715 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 716 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 717 ucma_put_ctx(ctx); 718 return ret; 719 } 720 721 static ssize_t ucma_resolve_addr(struct ucma_file *file, 722 const char __user *inbuf, 723 int in_len, int out_len) 724 { 725 struct rdma_ucm_resolve_addr cmd; 726 struct ucma_context *ctx; 727 int ret; 728 729 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 730 return -EFAULT; 731 732 if (cmd.reserved || 733 (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || 734 !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) 735 return -EINVAL; 736 737 ctx = ucma_get_ctx(file, cmd.id); 738 if (IS_ERR(ctx)) 739 return PTR_ERR(ctx); 740 741 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 742 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 743 ucma_put_ctx(ctx); 744 return ret; 745 } 746 747 static ssize_t ucma_resolve_route(struct ucma_file *file, 748 const char __user *inbuf, 749 int in_len, int out_len) 750 { 751 struct rdma_ucm_resolve_route cmd; 752 struct ucma_context *ctx; 753 int ret; 754 755 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 756 return -EFAULT; 757 758 ctx = ucma_get_ctx_dev(file, cmd.id); 759 if (IS_ERR(ctx)) 760 return PTR_ERR(ctx); 761 762 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 763 ucma_put_ctx(ctx); 764 return ret; 765 } 766 767 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, 768 struct rdma_route *route) 769 { 770 struct rdma_dev_addr *dev_addr; 771 772 resp->num_paths = route->num_paths; 773 switch (route->num_paths) { 774 case 0: 775 dev_addr = &route->addr.dev_addr; 776 rdma_addr_get_dgid(dev_addr, 777 (union ib_gid *) &resp->ib_route[0].dgid); 778 rdma_addr_get_sgid(dev_addr, 779 (union ib_gid *) &resp->ib_route[0].sgid); 780 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 781 break; 782 case 2: 783 ib_copy_path_rec_to_user(&resp->ib_route[1], 784 &route->path_rec[1]); 785 /* fall through */ 786 case 1: 787 ib_copy_path_rec_to_user(&resp->ib_route[0], 788 &route->path_rec[0]); 789 break; 790 default: 791 break; 792 } 793 } 794 795 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 796 struct rdma_route *route) 797 { 798 799 resp->num_paths = route->num_paths; 800 switch (route->num_paths) { 801 case 0: 802 rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 803 (union ib_gid *)&resp->ib_route[0].dgid); 804 rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 805 (union ib_gid *)&resp->ib_route[0].sgid); 806 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 807 break; 808 case 2: 809 ib_copy_path_rec_to_user(&resp->ib_route[1], 810 &route->path_rec[1]); 811 /* fall through */ 812 case 1: 813 ib_copy_path_rec_to_user(&resp->ib_route[0], 814 &route->path_rec[0]); 815 break; 816 default: 817 break; 818 } 819 } 820 821 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, 822 struct rdma_route *route) 823 { 824 struct rdma_dev_addr *dev_addr; 825 826 dev_addr = &route->addr.dev_addr; 827 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); 828 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); 829 } 830 831 static ssize_t ucma_query_route(struct ucma_file *file, 832 const char __user *inbuf, 833 int in_len, int out_len) 834 { 835 struct rdma_ucm_query cmd; 836 struct rdma_ucm_query_route_resp resp; 837 struct ucma_context *ctx; 838 struct sockaddr *addr; 839 int ret = 0; 840 841 if (out_len < sizeof(resp)) 842 return -ENOSPC; 843 844 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 845 return -EFAULT; 846 847 ctx = ucma_get_ctx(file, cmd.id); 848 if (IS_ERR(ctx)) 849 return PTR_ERR(ctx); 850 851 memset(&resp, 0, sizeof resp); 852 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 853 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? 854 sizeof(struct sockaddr_in) : 855 sizeof(struct sockaddr_in6)); 856 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 857 memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? 858 sizeof(struct sockaddr_in) : 859 sizeof(struct sockaddr_in6)); 860 if (!ctx->cm_id->device) 861 goto out; 862 863 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 864 resp.port_num = ctx->cm_id->port_num; 865 866 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) 867 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 868 else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) 869 ucma_copy_iboe_route(&resp, &ctx->cm_id->route); 870 else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) 871 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 872 873 out: 874 if (copy_to_user(u64_to_user_ptr(cmd.response), 875 &resp, sizeof(resp))) 876 ret = -EFAULT; 877 878 ucma_put_ctx(ctx); 879 return ret; 880 } 881 882 static void ucma_query_device_addr(struct rdma_cm_id *cm_id, 883 struct rdma_ucm_query_addr_resp *resp) 884 { 885 if (!cm_id->device) 886 return; 887 888 resp->node_guid = (__force __u64) cm_id->device->node_guid; 889 resp->port_num = cm_id->port_num; 890 resp->pkey = (__force __u16) cpu_to_be16( 891 ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); 892 } 893 894 static ssize_t ucma_query_addr(struct ucma_context *ctx, 895 void __user *response, int out_len) 896 { 897 struct rdma_ucm_query_addr_resp resp; 898 struct sockaddr *addr; 899 int ret = 0; 900 901 if (out_len < sizeof(resp)) 902 return -ENOSPC; 903 904 memset(&resp, 0, sizeof resp); 905 906 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 907 resp.src_size = rdma_addr_size(addr); 908 memcpy(&resp.src_addr, addr, resp.src_size); 909 910 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 911 resp.dst_size = rdma_addr_size(addr); 912 memcpy(&resp.dst_addr, addr, resp.dst_size); 913 914 ucma_query_device_addr(ctx->cm_id, &resp); 915 916 if (copy_to_user(response, &resp, sizeof(resp))) 917 ret = -EFAULT; 918 919 return ret; 920 } 921 922 static ssize_t ucma_query_path(struct ucma_context *ctx, 923 void __user *response, int out_len) 924 { 925 struct rdma_ucm_query_path_resp *resp; 926 int i, ret = 0; 927 928 if (out_len < sizeof(*resp)) 929 return -ENOSPC; 930 931 resp = kzalloc(out_len, GFP_KERNEL); 932 if (!resp) 933 return -ENOMEM; 934 935 resp->num_paths = ctx->cm_id->route.num_paths; 936 for (i = 0, out_len -= sizeof(*resp); 937 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); 938 i++, out_len -= sizeof(struct ib_path_rec_data)) { 939 struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; 940 941 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 942 IB_PATH_BIDIRECTIONAL; 943 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { 944 struct sa_path_rec ib; 945 946 sa_convert_path_opa_to_ib(&ib, rec); 947 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 948 949 } else { 950 ib_sa_pack_path(rec, &resp->path_data[i].path_rec); 951 } 952 } 953 954 if (copy_to_user(response, resp, 955 sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) 956 ret = -EFAULT; 957 958 kfree(resp); 959 return ret; 960 } 961 962 static ssize_t ucma_query_gid(struct ucma_context *ctx, 963 void __user *response, int out_len) 964 { 965 struct rdma_ucm_query_addr_resp resp; 966 struct sockaddr_ib *addr; 967 int ret = 0; 968 969 if (out_len < sizeof(resp)) 970 return -ENOSPC; 971 972 memset(&resp, 0, sizeof resp); 973 974 ucma_query_device_addr(ctx->cm_id, &resp); 975 976 addr = (struct sockaddr_ib *) &resp.src_addr; 977 resp.src_size = sizeof(*addr); 978 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { 979 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); 980 } else { 981 addr->sib_family = AF_IB; 982 addr->sib_pkey = (__force __be16) resp.pkey; 983 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, 984 NULL); 985 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 986 &ctx->cm_id->route.addr.src_addr); 987 } 988 989 addr = (struct sockaddr_ib *) &resp.dst_addr; 990 resp.dst_size = sizeof(*addr); 991 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { 992 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); 993 } else { 994 addr->sib_family = AF_IB; 995 addr->sib_pkey = (__force __be16) resp.pkey; 996 rdma_read_gids(ctx->cm_id, NULL, 997 (union ib_gid *)&addr->sib_addr); 998 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 999 &ctx->cm_id->route.addr.dst_addr); 1000 } 1001 1002 if (copy_to_user(response, &resp, sizeof(resp))) 1003 ret = -EFAULT; 1004 1005 return ret; 1006 } 1007 1008 static ssize_t ucma_query(struct ucma_file *file, 1009 const char __user *inbuf, 1010 int in_len, int out_len) 1011 { 1012 struct rdma_ucm_query cmd; 1013 struct ucma_context *ctx; 1014 void __user *response; 1015 int ret; 1016 1017 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1018 return -EFAULT; 1019 1020 response = u64_to_user_ptr(cmd.response); 1021 ctx = ucma_get_ctx(file, cmd.id); 1022 if (IS_ERR(ctx)) 1023 return PTR_ERR(ctx); 1024 1025 switch (cmd.option) { 1026 case RDMA_USER_CM_QUERY_ADDR: 1027 ret = ucma_query_addr(ctx, response, out_len); 1028 break; 1029 case RDMA_USER_CM_QUERY_PATH: 1030 ret = ucma_query_path(ctx, response, out_len); 1031 break; 1032 case RDMA_USER_CM_QUERY_GID: 1033 ret = ucma_query_gid(ctx, response, out_len); 1034 break; 1035 default: 1036 ret = -ENOSYS; 1037 break; 1038 } 1039 1040 ucma_put_ctx(ctx); 1041 return ret; 1042 } 1043 1044 static void ucma_copy_conn_param(struct rdma_cm_id *id, 1045 struct rdma_conn_param *dst, 1046 struct rdma_ucm_conn_param *src) 1047 { 1048 dst->private_data = src->private_data; 1049 dst->private_data_len = src->private_data_len; 1050 dst->responder_resources =src->responder_resources; 1051 dst->initiator_depth = src->initiator_depth; 1052 dst->flow_control = src->flow_control; 1053 dst->retry_count = src->retry_count; 1054 dst->rnr_retry_count = src->rnr_retry_count; 1055 dst->srq = src->srq; 1056 dst->qp_num = src->qp_num; 1057 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; 1058 } 1059 1060 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 1061 int in_len, int out_len) 1062 { 1063 struct rdma_ucm_connect cmd; 1064 struct rdma_conn_param conn_param; 1065 struct ucma_context *ctx; 1066 int ret; 1067 1068 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1069 return -EFAULT; 1070 1071 if (!cmd.conn_param.valid) 1072 return -EINVAL; 1073 1074 ctx = ucma_get_ctx_dev(file, cmd.id); 1075 if (IS_ERR(ctx)) 1076 return PTR_ERR(ctx); 1077 1078 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1079 ret = rdma_connect(ctx->cm_id, &conn_param); 1080 ucma_put_ctx(ctx); 1081 return ret; 1082 } 1083 1084 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, 1085 int in_len, int out_len) 1086 { 1087 struct rdma_ucm_listen cmd; 1088 struct ucma_context *ctx; 1089 int ret; 1090 1091 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1092 return -EFAULT; 1093 1094 ctx = ucma_get_ctx(file, cmd.id); 1095 if (IS_ERR(ctx)) 1096 return PTR_ERR(ctx); 1097 1098 ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? 1099 cmd.backlog : max_backlog; 1100 ret = rdma_listen(ctx->cm_id, ctx->backlog); 1101 ucma_put_ctx(ctx); 1102 return ret; 1103 } 1104 1105 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, 1106 int in_len, int out_len) 1107 { 1108 struct rdma_ucm_accept cmd; 1109 struct rdma_conn_param conn_param; 1110 struct ucma_context *ctx; 1111 int ret; 1112 1113 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1114 return -EFAULT; 1115 1116 ctx = ucma_get_ctx_dev(file, cmd.id); 1117 if (IS_ERR(ctx)) 1118 return PTR_ERR(ctx); 1119 1120 if (cmd.conn_param.valid) { 1121 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1122 mutex_lock(&file->mut); 1123 ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); 1124 if (!ret) 1125 ctx->uid = cmd.uid; 1126 mutex_unlock(&file->mut); 1127 } else 1128 ret = __rdma_accept(ctx->cm_id, NULL, NULL); 1129 1130 ucma_put_ctx(ctx); 1131 return ret; 1132 } 1133 1134 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, 1135 int in_len, int out_len) 1136 { 1137 struct rdma_ucm_reject cmd; 1138 struct ucma_context *ctx; 1139 int ret; 1140 1141 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1142 return -EFAULT; 1143 1144 ctx = ucma_get_ctx_dev(file, cmd.id); 1145 if (IS_ERR(ctx)) 1146 return PTR_ERR(ctx); 1147 1148 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); 1149 ucma_put_ctx(ctx); 1150 return ret; 1151 } 1152 1153 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, 1154 int in_len, int out_len) 1155 { 1156 struct rdma_ucm_disconnect cmd; 1157 struct ucma_context *ctx; 1158 int ret; 1159 1160 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1161 return -EFAULT; 1162 1163 ctx = ucma_get_ctx_dev(file, cmd.id); 1164 if (IS_ERR(ctx)) 1165 return PTR_ERR(ctx); 1166 1167 ret = rdma_disconnect(ctx->cm_id); 1168 ucma_put_ctx(ctx); 1169 return ret; 1170 } 1171 1172 static ssize_t ucma_init_qp_attr(struct ucma_file *file, 1173 const char __user *inbuf, 1174 int in_len, int out_len) 1175 { 1176 struct rdma_ucm_init_qp_attr cmd; 1177 struct ib_uverbs_qp_attr resp; 1178 struct ucma_context *ctx; 1179 struct ib_qp_attr qp_attr; 1180 int ret; 1181 1182 if (out_len < sizeof(resp)) 1183 return -ENOSPC; 1184 1185 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1186 return -EFAULT; 1187 1188 if (cmd.qp_state > IB_QPS_ERR) 1189 return -EINVAL; 1190 1191 ctx = ucma_get_ctx_dev(file, cmd.id); 1192 if (IS_ERR(ctx)) 1193 return PTR_ERR(ctx); 1194 1195 resp.qp_attr_mask = 0; 1196 memset(&qp_attr, 0, sizeof qp_attr); 1197 qp_attr.qp_state = cmd.qp_state; 1198 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1199 if (ret) 1200 goto out; 1201 1202 ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); 1203 if (copy_to_user(u64_to_user_ptr(cmd.response), 1204 &resp, sizeof(resp))) 1205 ret = -EFAULT; 1206 1207 out: 1208 ucma_put_ctx(ctx); 1209 return ret; 1210 } 1211 1212 static int ucma_set_option_id(struct ucma_context *ctx, int optname, 1213 void *optval, size_t optlen) 1214 { 1215 int ret = 0; 1216 1217 switch (optname) { 1218 case RDMA_OPTION_ID_TOS: 1219 if (optlen != sizeof(u8)) { 1220 ret = -EINVAL; 1221 break; 1222 } 1223 rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); 1224 break; 1225 case RDMA_OPTION_ID_REUSEADDR: 1226 if (optlen != sizeof(int)) { 1227 ret = -EINVAL; 1228 break; 1229 } 1230 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 1231 break; 1232 case RDMA_OPTION_ID_AFONLY: 1233 if (optlen != sizeof(int)) { 1234 ret = -EINVAL; 1235 break; 1236 } 1237 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); 1238 break; 1239 case RDMA_OPTION_ID_ACK_TIMEOUT: 1240 if (optlen != sizeof(u8)) { 1241 ret = -EINVAL; 1242 break; 1243 } 1244 ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); 1245 break; 1246 default: 1247 ret = -ENOSYS; 1248 } 1249 1250 return ret; 1251 } 1252 1253 static int ucma_set_ib_path(struct ucma_context *ctx, 1254 struct ib_path_rec_data *path_data, size_t optlen) 1255 { 1256 struct sa_path_rec sa_path; 1257 struct rdma_cm_event event; 1258 int ret; 1259 1260 if (optlen % sizeof(*path_data)) 1261 return -EINVAL; 1262 1263 for (; optlen; optlen -= sizeof(*path_data), path_data++) { 1264 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | 1265 IB_PATH_BIDIRECTIONAL)) 1266 break; 1267 } 1268 1269 if (!optlen) 1270 return -EINVAL; 1271 1272 if (!ctx->cm_id->device) 1273 return -EINVAL; 1274 1275 memset(&sa_path, 0, sizeof(sa_path)); 1276 1277 sa_path.rec_type = SA_PATH_REC_TYPE_IB; 1278 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1279 1280 if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { 1281 struct sa_path_rec opa; 1282 1283 sa_convert_path_ib_to_opa(&opa, &sa_path); 1284 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1285 } else { 1286 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1287 } 1288 if (ret) 1289 return ret; 1290 1291 memset(&event, 0, sizeof event); 1292 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 1293 return ucma_event_handler(ctx->cm_id, &event); 1294 } 1295 1296 static int ucma_set_option_ib(struct ucma_context *ctx, int optname, 1297 void *optval, size_t optlen) 1298 { 1299 int ret; 1300 1301 switch (optname) { 1302 case RDMA_OPTION_IB_PATH: 1303 ret = ucma_set_ib_path(ctx, optval, optlen); 1304 break; 1305 default: 1306 ret = -ENOSYS; 1307 } 1308 1309 return ret; 1310 } 1311 1312 static int ucma_set_option_level(struct ucma_context *ctx, int level, 1313 int optname, void *optval, size_t optlen) 1314 { 1315 int ret; 1316 1317 switch (level) { 1318 case RDMA_OPTION_ID: 1319 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1320 break; 1321 case RDMA_OPTION_IB: 1322 ret = ucma_set_option_ib(ctx, optname, optval, optlen); 1323 break; 1324 default: 1325 ret = -ENOSYS; 1326 } 1327 1328 return ret; 1329 } 1330 1331 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, 1332 int in_len, int out_len) 1333 { 1334 struct rdma_ucm_set_option cmd; 1335 struct ucma_context *ctx; 1336 void *optval; 1337 int ret; 1338 1339 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1340 return -EFAULT; 1341 1342 if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) 1343 return -EINVAL; 1344 1345 ctx = ucma_get_ctx(file, cmd.id); 1346 if (IS_ERR(ctx)) 1347 return PTR_ERR(ctx); 1348 1349 optval = memdup_user(u64_to_user_ptr(cmd.optval), 1350 cmd.optlen); 1351 if (IS_ERR(optval)) { 1352 ret = PTR_ERR(optval); 1353 goto out; 1354 } 1355 1356 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1357 cmd.optlen); 1358 kfree(optval); 1359 1360 out: 1361 ucma_put_ctx(ctx); 1362 return ret; 1363 } 1364 1365 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 1366 int in_len, int out_len) 1367 { 1368 struct rdma_ucm_notify cmd; 1369 struct ucma_context *ctx; 1370 int ret = -EINVAL; 1371 1372 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1373 return -EFAULT; 1374 1375 ctx = ucma_get_ctx(file, cmd.id); 1376 if (IS_ERR(ctx)) 1377 return PTR_ERR(ctx); 1378 1379 if (ctx->cm_id->device) 1380 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1381 1382 ucma_put_ctx(ctx); 1383 return ret; 1384 } 1385 1386 static ssize_t ucma_process_join(struct ucma_file *file, 1387 struct rdma_ucm_join_mcast *cmd, int out_len) 1388 { 1389 struct rdma_ucm_create_id_resp resp; 1390 struct ucma_context *ctx; 1391 struct ucma_multicast *mc; 1392 struct sockaddr *addr; 1393 int ret; 1394 u8 join_state; 1395 1396 if (out_len < sizeof(resp)) 1397 return -ENOSPC; 1398 1399 addr = (struct sockaddr *) &cmd->addr; 1400 if (cmd->addr_size != rdma_addr_size(addr)) 1401 return -EINVAL; 1402 1403 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1404 join_state = BIT(FULLMEMBER_JOIN); 1405 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1406 join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1407 else 1408 return -EINVAL; 1409 1410 ctx = ucma_get_ctx_dev(file, cmd->id); 1411 if (IS_ERR(ctx)) 1412 return PTR_ERR(ctx); 1413 1414 mutex_lock(&file->mut); 1415 mc = ucma_alloc_multicast(ctx); 1416 if (!mc) { 1417 ret = -ENOMEM; 1418 goto err1; 1419 } 1420 mc->join_state = join_state; 1421 mc->uid = cmd->uid; 1422 memcpy(&mc->addr, addr, cmd->addr_size); 1423 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1424 join_state, mc); 1425 if (ret) 1426 goto err2; 1427 1428 resp.id = mc->id; 1429 if (copy_to_user(u64_to_user_ptr(cmd->response), 1430 &resp, sizeof(resp))) { 1431 ret = -EFAULT; 1432 goto err3; 1433 } 1434 1435 mutex_lock(&mut); 1436 idr_replace(&multicast_idr, mc, mc->id); 1437 mutex_unlock(&mut); 1438 1439 mutex_unlock(&file->mut); 1440 ucma_put_ctx(ctx); 1441 return 0; 1442 1443 err3: 1444 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); 1445 ucma_cleanup_mc_events(mc); 1446 err2: 1447 mutex_lock(&mut); 1448 idr_remove(&multicast_idr, mc->id); 1449 mutex_unlock(&mut); 1450 list_del(&mc->list); 1451 kfree(mc); 1452 err1: 1453 mutex_unlock(&file->mut); 1454 ucma_put_ctx(ctx); 1455 return ret; 1456 } 1457 1458 static ssize_t ucma_join_ip_multicast(struct ucma_file *file, 1459 const char __user *inbuf, 1460 int in_len, int out_len) 1461 { 1462 struct rdma_ucm_join_ip_mcast cmd; 1463 struct rdma_ucm_join_mcast join_cmd; 1464 1465 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1466 return -EFAULT; 1467 1468 join_cmd.response = cmd.response; 1469 join_cmd.uid = cmd.uid; 1470 join_cmd.id = cmd.id; 1471 join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); 1472 if (!join_cmd.addr_size) 1473 return -EINVAL; 1474 1475 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1476 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1477 1478 return ucma_process_join(file, &join_cmd, out_len); 1479 } 1480 1481 static ssize_t ucma_join_multicast(struct ucma_file *file, 1482 const char __user *inbuf, 1483 int in_len, int out_len) 1484 { 1485 struct rdma_ucm_join_mcast cmd; 1486 1487 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1488 return -EFAULT; 1489 1490 if (!rdma_addr_size_kss(&cmd.addr)) 1491 return -EINVAL; 1492 1493 return ucma_process_join(file, &cmd, out_len); 1494 } 1495 1496 static ssize_t ucma_leave_multicast(struct ucma_file *file, 1497 const char __user *inbuf, 1498 int in_len, int out_len) 1499 { 1500 struct rdma_ucm_destroy_id cmd; 1501 struct rdma_ucm_destroy_id_resp resp; 1502 struct ucma_multicast *mc; 1503 int ret = 0; 1504 1505 if (out_len < sizeof(resp)) 1506 return -ENOSPC; 1507 1508 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1509 return -EFAULT; 1510 1511 mutex_lock(&mut); 1512 mc = idr_find(&multicast_idr, cmd.id); 1513 if (!mc) 1514 mc = ERR_PTR(-ENOENT); 1515 else if (mc->ctx->file != file) 1516 mc = ERR_PTR(-EINVAL); 1517 else if (!atomic_inc_not_zero(&mc->ctx->ref)) 1518 mc = ERR_PTR(-ENXIO); 1519 else 1520 idr_remove(&multicast_idr, mc->id); 1521 mutex_unlock(&mut); 1522 1523 if (IS_ERR(mc)) { 1524 ret = PTR_ERR(mc); 1525 goto out; 1526 } 1527 1528 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1529 mutex_lock(&mc->ctx->file->mut); 1530 ucma_cleanup_mc_events(mc); 1531 list_del(&mc->list); 1532 mutex_unlock(&mc->ctx->file->mut); 1533 1534 ucma_put_ctx(mc->ctx); 1535 resp.events_reported = mc->events_reported; 1536 kfree(mc); 1537 1538 if (copy_to_user(u64_to_user_ptr(cmd.response), 1539 &resp, sizeof(resp))) 1540 ret = -EFAULT; 1541 out: 1542 return ret; 1543 } 1544 1545 static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) 1546 { 1547 /* Acquire mutex's based on pointer comparison to prevent deadlock. */ 1548 if (file1 < file2) { 1549 mutex_lock(&file1->mut); 1550 mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); 1551 } else { 1552 mutex_lock(&file2->mut); 1553 mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); 1554 } 1555 } 1556 1557 static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) 1558 { 1559 if (file1 < file2) { 1560 mutex_unlock(&file2->mut); 1561 mutex_unlock(&file1->mut); 1562 } else { 1563 mutex_unlock(&file1->mut); 1564 mutex_unlock(&file2->mut); 1565 } 1566 } 1567 1568 static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) 1569 { 1570 struct ucma_event *uevent, *tmp; 1571 1572 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) 1573 if (uevent->ctx == ctx) 1574 list_move_tail(&uevent->list, &file->event_list); 1575 } 1576 1577 static ssize_t ucma_migrate_id(struct ucma_file *new_file, 1578 const char __user *inbuf, 1579 int in_len, int out_len) 1580 { 1581 struct rdma_ucm_migrate_id cmd; 1582 struct rdma_ucm_migrate_resp resp; 1583 struct ucma_context *ctx; 1584 struct fd f; 1585 struct ucma_file *cur_file; 1586 int ret = 0; 1587 1588 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1589 return -EFAULT; 1590 1591 /* Get current fd to protect against it being closed */ 1592 f = fdget(cmd.fd); 1593 if (!f.file) 1594 return -ENOENT; 1595 if (f.file->f_op != &ucma_fops) { 1596 ret = -EINVAL; 1597 goto file_put; 1598 } 1599 1600 /* Validate current fd and prevent destruction of id. */ 1601 ctx = ucma_get_ctx(f.file->private_data, cmd.id); 1602 if (IS_ERR(ctx)) { 1603 ret = PTR_ERR(ctx); 1604 goto file_put; 1605 } 1606 1607 cur_file = ctx->file; 1608 if (cur_file == new_file) { 1609 resp.events_reported = ctx->events_reported; 1610 goto response; 1611 } 1612 1613 /* 1614 * Migrate events between fd's, maintaining order, and avoiding new 1615 * events being added before existing events. 1616 */ 1617 ucma_lock_files(cur_file, new_file); 1618 mutex_lock(&mut); 1619 1620 list_move_tail(&ctx->list, &new_file->ctx_list); 1621 ucma_move_events(ctx, new_file); 1622 ctx->file = new_file; 1623 resp.events_reported = ctx->events_reported; 1624 1625 mutex_unlock(&mut); 1626 ucma_unlock_files(cur_file, new_file); 1627 1628 response: 1629 if (copy_to_user(u64_to_user_ptr(cmd.response), 1630 &resp, sizeof(resp))) 1631 ret = -EFAULT; 1632 1633 ucma_put_ctx(ctx); 1634 file_put: 1635 fdput(f); 1636 return ret; 1637 } 1638 1639 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1640 const char __user *inbuf, 1641 int in_len, int out_len) = { 1642 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1643 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1644 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, 1645 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, 1646 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, 1647 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1648 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1649 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1650 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1651 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1652 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1653 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1654 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1655 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1656 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1657 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1658 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, 1659 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1660 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, 1661 [RDMA_USER_CM_CMD_QUERY] = ucma_query, 1662 [RDMA_USER_CM_CMD_BIND] = ucma_bind, 1663 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1664 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast 1665 }; 1666 1667 static ssize_t ucma_write(struct file *filp, const char __user *buf, 1668 size_t len, loff_t *pos) 1669 { 1670 struct ucma_file *file = filp->private_data; 1671 struct rdma_ucm_cmd_hdr hdr; 1672 ssize_t ret; 1673 1674 if (!ib_safe_file_access(filp)) { 1675 pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 1676 task_tgid_vnr(current), current->comm); 1677 return -EACCES; 1678 } 1679 1680 if (len < sizeof(hdr)) 1681 return -EINVAL; 1682 1683 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1684 return -EFAULT; 1685 1686 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1687 return -EINVAL; 1688 hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); 1689 1690 if (hdr.in + sizeof(hdr) > len) 1691 return -EINVAL; 1692 1693 if (!ucma_cmd_table[hdr.cmd]) 1694 return -ENOSYS; 1695 1696 ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); 1697 if (!ret) 1698 ret = len; 1699 1700 return ret; 1701 } 1702 1703 static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) 1704 { 1705 struct ucma_file *file = filp->private_data; 1706 __poll_t mask = 0; 1707 1708 poll_wait(filp, &file->poll_wait, wait); 1709 1710 if (!list_empty(&file->event_list)) 1711 mask = EPOLLIN | EPOLLRDNORM; 1712 1713 return mask; 1714 } 1715 1716 /* 1717 * ucma_open() does not need the BKL: 1718 * 1719 * - no global state is referred to; 1720 * - there is no ioctl method to race against; 1721 * - no further module initialization is required for open to work 1722 * after the device is registered. 1723 */ 1724 static int ucma_open(struct inode *inode, struct file *filp) 1725 { 1726 struct ucma_file *file; 1727 1728 file = kmalloc(sizeof *file, GFP_KERNEL); 1729 if (!file) 1730 return -ENOMEM; 1731 1732 file->close_wq = alloc_ordered_workqueue("ucma_close_id", 1733 WQ_MEM_RECLAIM); 1734 if (!file->close_wq) { 1735 kfree(file); 1736 return -ENOMEM; 1737 } 1738 1739 INIT_LIST_HEAD(&file->event_list); 1740 INIT_LIST_HEAD(&file->ctx_list); 1741 init_waitqueue_head(&file->poll_wait); 1742 mutex_init(&file->mut); 1743 1744 filp->private_data = file; 1745 file->filp = filp; 1746 1747 return stream_open(inode, filp); 1748 } 1749 1750 static int ucma_close(struct inode *inode, struct file *filp) 1751 { 1752 struct ucma_file *file = filp->private_data; 1753 struct ucma_context *ctx, *tmp; 1754 1755 mutex_lock(&file->mut); 1756 list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { 1757 ctx->destroying = 1; 1758 mutex_unlock(&file->mut); 1759 1760 mutex_lock(&mut); 1761 idr_remove(&ctx_idr, ctx->id); 1762 mutex_unlock(&mut); 1763 1764 flush_workqueue(file->close_wq); 1765 /* At that step once ctx was marked as destroying and workqueue 1766 * was flushed we are safe from any inflights handlers that 1767 * might put other closing task. 1768 */ 1769 mutex_lock(&mut); 1770 if (!ctx->closing) { 1771 mutex_unlock(&mut); 1772 ucma_put_ctx(ctx); 1773 wait_for_completion(&ctx->comp); 1774 /* rdma_destroy_id ensures that no event handlers are 1775 * inflight for that id before releasing it. 1776 */ 1777 rdma_destroy_id(ctx->cm_id); 1778 } else { 1779 mutex_unlock(&mut); 1780 } 1781 1782 ucma_free_ctx(ctx); 1783 mutex_lock(&file->mut); 1784 } 1785 mutex_unlock(&file->mut); 1786 destroy_workqueue(file->close_wq); 1787 kfree(file); 1788 return 0; 1789 } 1790 1791 static const struct file_operations ucma_fops = { 1792 .owner = THIS_MODULE, 1793 .open = ucma_open, 1794 .release = ucma_close, 1795 .write = ucma_write, 1796 .poll = ucma_poll, 1797 .llseek = no_llseek, 1798 }; 1799 1800 static struct miscdevice ucma_misc = { 1801 .minor = MISC_DYNAMIC_MINOR, 1802 .name = "rdma_cm", 1803 .nodename = "infiniband/rdma_cm", 1804 .mode = 0666, 1805 .fops = &ucma_fops, 1806 }; 1807 1808 static ssize_t show_abi_version(struct device *dev, 1809 struct device_attribute *attr, 1810 char *buf) 1811 { 1812 return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1813 } 1814 static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 1815 1816 static int __init ucma_init(void) 1817 { 1818 int ret; 1819 1820 ret = misc_register(&ucma_misc); 1821 if (ret) 1822 return ret; 1823 1824 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1825 if (ret) { 1826 pr_err("rdma_ucm: couldn't create abi_version attr\n"); 1827 goto err1; 1828 } 1829 1830 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1831 if (!ucma_ctl_table_hdr) { 1832 pr_err("rdma_ucm: couldn't register sysctl paths\n"); 1833 ret = -ENOMEM; 1834 goto err2; 1835 } 1836 return 0; 1837 err2: 1838 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1839 err1: 1840 misc_deregister(&ucma_misc); 1841 return ret; 1842 } 1843 1844 static void __exit ucma_cleanup(void) 1845 { 1846 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1847 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1848 misc_deregister(&ucma_misc); 1849 idr_destroy(&ctx_idr); 1850 idr_destroy(&multicast_idr); 1851 } 1852 1853 module_init(ucma_init); 1854 module_exit(ucma_cleanup); 1855