1 /* 2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/file.h> 35 #include <linux/mutex.h> 36 #include <linux/poll.h> 37 #include <linux/sched.h> 38 #include <linux/idr.h> 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/miscdevice.h> 42 #include <linux/slab.h> 43 #include <linux/sysctl.h> 44 #include <linux/module.h> 45 #include <linux/nsproxy.h> 46 47 #include <linux/nospec.h> 48 49 #include <rdma/rdma_user_cm.h> 50 #include <rdma/ib_marshall.h> 51 #include <rdma/rdma_cm.h> 52 #include <rdma/rdma_cm_ib.h> 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib.h> 55 56 MODULE_AUTHOR("Sean Hefty"); 57 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 58 MODULE_LICENSE("Dual BSD/GPL"); 59 60 static unsigned int max_backlog = 1024; 61 62 static struct ctl_table_header *ucma_ctl_table_hdr; 63 static struct ctl_table ucma_ctl_table[] = { 64 { 65 .procname = "max_backlog", 66 .data = &max_backlog, 67 .maxlen = sizeof max_backlog, 68 .mode = 0644, 69 .proc_handler = proc_dointvec, 70 }, 71 { } 72 }; 73 74 struct ucma_file { 75 struct mutex mut; 76 struct file *filp; 77 struct list_head ctx_list; 78 struct list_head event_list; 79 wait_queue_head_t poll_wait; 80 struct workqueue_struct *close_wq; 81 }; 82 83 struct ucma_context { 84 int id; 85 struct completion comp; 86 atomic_t ref; 87 int events_reported; 88 int backlog; 89 90 struct ucma_file *file; 91 struct rdma_cm_id *cm_id; 92 u64 uid; 93 94 struct list_head list; 95 struct list_head mc_list; 96 /* mark that device is in process of destroying the internal HW 97 * resources, protected by the global mut 98 */ 99 int closing; 100 /* sync between removal event and id destroy, protected by file mut */ 101 int destroying; 102 struct work_struct close_work; 103 }; 104 105 struct ucma_multicast { 106 struct ucma_context *ctx; 107 int id; 108 int events_reported; 109 110 u64 uid; 111 u8 join_state; 112 struct list_head list; 113 struct sockaddr_storage addr; 114 }; 115 116 struct ucma_event { 117 struct ucma_context *ctx; 118 struct ucma_multicast *mc; 119 struct list_head list; 120 struct rdma_cm_id *cm_id; 121 struct rdma_ucm_event_resp resp; 122 struct work_struct close_work; 123 }; 124 125 static DEFINE_MUTEX(mut); 126 static DEFINE_IDR(ctx_idr); 127 static DEFINE_IDR(multicast_idr); 128 129 static const struct file_operations ucma_fops; 130 131 static inline struct ucma_context *_ucma_find_context(int id, 132 struct ucma_file *file) 133 { 134 struct ucma_context *ctx; 135 136 ctx = idr_find(&ctx_idr, id); 137 if (!ctx) 138 ctx = ERR_PTR(-ENOENT); 139 else if (ctx->file != file || !ctx->cm_id) 140 ctx = ERR_PTR(-EINVAL); 141 return ctx; 142 } 143 144 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) 145 { 146 struct ucma_context *ctx; 147 148 mutex_lock(&mut); 149 ctx = _ucma_find_context(id, file); 150 if (!IS_ERR(ctx)) { 151 if (ctx->closing) 152 ctx = ERR_PTR(-EIO); 153 else 154 atomic_inc(&ctx->ref); 155 } 156 mutex_unlock(&mut); 157 return ctx; 158 } 159 160 static void ucma_put_ctx(struct ucma_context *ctx) 161 { 162 if (atomic_dec_and_test(&ctx->ref)) 163 complete(&ctx->comp); 164 } 165 166 /* 167 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the 168 * CM_ID is bound. 169 */ 170 static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) 171 { 172 struct ucma_context *ctx = ucma_get_ctx(file, id); 173 174 if (IS_ERR(ctx)) 175 return ctx; 176 if (!ctx->cm_id->device) { 177 ucma_put_ctx(ctx); 178 return ERR_PTR(-EINVAL); 179 } 180 return ctx; 181 } 182 183 static void ucma_close_event_id(struct work_struct *work) 184 { 185 struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); 186 187 rdma_destroy_id(uevent_close->cm_id); 188 kfree(uevent_close); 189 } 190 191 static void ucma_close_id(struct work_struct *work) 192 { 193 struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); 194 195 /* once all inflight tasks are finished, we close all underlying 196 * resources. The context is still alive till its explicit destryoing 197 * by its creator. 198 */ 199 ucma_put_ctx(ctx); 200 wait_for_completion(&ctx->comp); 201 /* No new events will be generated after destroying the id. */ 202 rdma_destroy_id(ctx->cm_id); 203 } 204 205 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) 206 { 207 struct ucma_context *ctx; 208 209 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 210 if (!ctx) 211 return NULL; 212 213 INIT_WORK(&ctx->close_work, ucma_close_id); 214 atomic_set(&ctx->ref, 1); 215 init_completion(&ctx->comp); 216 INIT_LIST_HEAD(&ctx->mc_list); 217 ctx->file = file; 218 219 mutex_lock(&mut); 220 ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); 221 mutex_unlock(&mut); 222 if (ctx->id < 0) 223 goto error; 224 225 list_add_tail(&ctx->list, &file->ctx_list); 226 return ctx; 227 228 error: 229 kfree(ctx); 230 return NULL; 231 } 232 233 static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) 234 { 235 struct ucma_multicast *mc; 236 237 mc = kzalloc(sizeof(*mc), GFP_KERNEL); 238 if (!mc) 239 return NULL; 240 241 mutex_lock(&mut); 242 mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL); 243 mutex_unlock(&mut); 244 if (mc->id < 0) 245 goto error; 246 247 mc->ctx = ctx; 248 list_add_tail(&mc->list, &ctx->mc_list); 249 return mc; 250 251 error: 252 kfree(mc); 253 return NULL; 254 } 255 256 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 257 struct rdma_conn_param *src) 258 { 259 if (src->private_data_len) 260 memcpy(dst->private_data, src->private_data, 261 src->private_data_len); 262 dst->private_data_len = src->private_data_len; 263 dst->responder_resources =src->responder_resources; 264 dst->initiator_depth = src->initiator_depth; 265 dst->flow_control = src->flow_control; 266 dst->retry_count = src->retry_count; 267 dst->rnr_retry_count = src->rnr_retry_count; 268 dst->srq = src->srq; 269 dst->qp_num = src->qp_num; 270 } 271 272 static void ucma_copy_ud_event(struct ib_device *device, 273 struct rdma_ucm_ud_param *dst, 274 struct rdma_ud_param *src) 275 { 276 if (src->private_data_len) 277 memcpy(dst->private_data, src->private_data, 278 src->private_data_len); 279 dst->private_data_len = src->private_data_len; 280 ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); 281 dst->qp_num = src->qp_num; 282 dst->qkey = src->qkey; 283 } 284 285 static void ucma_set_event_context(struct ucma_context *ctx, 286 struct rdma_cm_event *event, 287 struct ucma_event *uevent) 288 { 289 uevent->ctx = ctx; 290 switch (event->event) { 291 case RDMA_CM_EVENT_MULTICAST_JOIN: 292 case RDMA_CM_EVENT_MULTICAST_ERROR: 293 uevent->mc = (struct ucma_multicast *) 294 event->param.ud.private_data; 295 uevent->resp.uid = uevent->mc->uid; 296 uevent->resp.id = uevent->mc->id; 297 break; 298 default: 299 uevent->resp.uid = ctx->uid; 300 uevent->resp.id = ctx->id; 301 break; 302 } 303 } 304 305 /* Called with file->mut locked for the relevant context. */ 306 static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) 307 { 308 struct ucma_context *ctx = cm_id->context; 309 struct ucma_event *con_req_eve; 310 int event_found = 0; 311 312 if (ctx->destroying) 313 return; 314 315 /* only if context is pointing to cm_id that it owns it and can be 316 * queued to be closed, otherwise that cm_id is an inflight one that 317 * is part of that context event list pending to be detached and 318 * reattached to its new context as part of ucma_get_event, 319 * handled separately below. 320 */ 321 if (ctx->cm_id == cm_id) { 322 mutex_lock(&mut); 323 ctx->closing = 1; 324 mutex_unlock(&mut); 325 queue_work(ctx->file->close_wq, &ctx->close_work); 326 return; 327 } 328 329 list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { 330 if (con_req_eve->cm_id == cm_id && 331 con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { 332 list_del(&con_req_eve->list); 333 INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); 334 queue_work(ctx->file->close_wq, &con_req_eve->close_work); 335 event_found = 1; 336 break; 337 } 338 } 339 if (!event_found) 340 pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); 341 } 342 343 static int ucma_event_handler(struct rdma_cm_id *cm_id, 344 struct rdma_cm_event *event) 345 { 346 struct ucma_event *uevent; 347 struct ucma_context *ctx = cm_id->context; 348 int ret = 0; 349 350 uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); 351 if (!uevent) 352 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; 353 354 mutex_lock(&ctx->file->mut); 355 uevent->cm_id = cm_id; 356 ucma_set_event_context(ctx, event, uevent); 357 uevent->resp.event = event->event; 358 uevent->resp.status = event->status; 359 if (cm_id->qp_type == IB_QPT_UD) 360 ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud, 361 &event->param.ud); 362 else 363 ucma_copy_conn_event(&uevent->resp.param.conn, 364 &event->param.conn); 365 366 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { 367 if (!ctx->backlog) { 368 ret = -ENOMEM; 369 kfree(uevent); 370 goto out; 371 } 372 ctx->backlog--; 373 } else if (!ctx->uid || ctx->cm_id != cm_id) { 374 /* 375 * We ignore events for new connections until userspace has set 376 * their context. This can only happen if an error occurs on a 377 * new connection before the user accepts it. This is okay, 378 * since the accept will just fail later. However, we do need 379 * to release the underlying HW resources in case of a device 380 * removal event. 381 */ 382 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 383 ucma_removal_event_handler(cm_id); 384 385 kfree(uevent); 386 goto out; 387 } 388 389 list_add_tail(&uevent->list, &ctx->file->event_list); 390 wake_up_interruptible(&ctx->file->poll_wait); 391 if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 392 ucma_removal_event_handler(cm_id); 393 out: 394 mutex_unlock(&ctx->file->mut); 395 return ret; 396 } 397 398 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, 399 int in_len, int out_len) 400 { 401 struct ucma_context *ctx; 402 struct rdma_ucm_get_event cmd; 403 struct ucma_event *uevent; 404 int ret = 0; 405 406 /* 407 * Old 32 bit user space does not send the 4 byte padding in the 408 * reserved field. We don't care, allow it to keep working. 409 */ 410 if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved)) 411 return -ENOSPC; 412 413 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 414 return -EFAULT; 415 416 mutex_lock(&file->mut); 417 while (list_empty(&file->event_list)) { 418 mutex_unlock(&file->mut); 419 420 if (file->filp->f_flags & O_NONBLOCK) 421 return -EAGAIN; 422 423 if (wait_event_interruptible(file->poll_wait, 424 !list_empty(&file->event_list))) 425 return -ERESTARTSYS; 426 427 mutex_lock(&file->mut); 428 } 429 430 uevent = list_entry(file->event_list.next, struct ucma_event, list); 431 432 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { 433 ctx = ucma_alloc_ctx(file); 434 if (!ctx) { 435 ret = -ENOMEM; 436 goto done; 437 } 438 uevent->ctx->backlog++; 439 ctx->cm_id = uevent->cm_id; 440 ctx->cm_id->context = ctx; 441 uevent->resp.id = ctx->id; 442 } 443 444 if (copy_to_user(u64_to_user_ptr(cmd.response), 445 &uevent->resp, 446 min_t(size_t, out_len, sizeof(uevent->resp)))) { 447 ret = -EFAULT; 448 goto done; 449 } 450 451 list_del(&uevent->list); 452 uevent->ctx->events_reported++; 453 if (uevent->mc) 454 uevent->mc->events_reported++; 455 kfree(uevent); 456 done: 457 mutex_unlock(&file->mut); 458 return ret; 459 } 460 461 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) 462 { 463 switch (cmd->ps) { 464 case RDMA_PS_TCP: 465 *qp_type = IB_QPT_RC; 466 return 0; 467 case RDMA_PS_UDP: 468 case RDMA_PS_IPOIB: 469 *qp_type = IB_QPT_UD; 470 return 0; 471 case RDMA_PS_IB: 472 *qp_type = cmd->qp_type; 473 return 0; 474 default: 475 return -EINVAL; 476 } 477 } 478 479 static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, 480 int in_len, int out_len) 481 { 482 struct rdma_ucm_create_id cmd; 483 struct rdma_ucm_create_id_resp resp; 484 struct ucma_context *ctx; 485 struct rdma_cm_id *cm_id; 486 enum ib_qp_type qp_type; 487 int ret; 488 489 if (out_len < sizeof(resp)) 490 return -ENOSPC; 491 492 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 493 return -EFAULT; 494 495 ret = ucma_get_qp_type(&cmd, &qp_type); 496 if (ret) 497 return ret; 498 499 mutex_lock(&file->mut); 500 ctx = ucma_alloc_ctx(file); 501 mutex_unlock(&file->mut); 502 if (!ctx) 503 return -ENOMEM; 504 505 ctx->uid = cmd.uid; 506 cm_id = __rdma_create_id(current->nsproxy->net_ns, 507 ucma_event_handler, ctx, cmd.ps, qp_type, NULL); 508 if (IS_ERR(cm_id)) { 509 ret = PTR_ERR(cm_id); 510 goto err1; 511 } 512 513 resp.id = ctx->id; 514 if (copy_to_user(u64_to_user_ptr(cmd.response), 515 &resp, sizeof(resp))) { 516 ret = -EFAULT; 517 goto err2; 518 } 519 520 ctx->cm_id = cm_id; 521 return 0; 522 523 err2: 524 rdma_destroy_id(cm_id); 525 err1: 526 mutex_lock(&mut); 527 idr_remove(&ctx_idr, ctx->id); 528 mutex_unlock(&mut); 529 mutex_lock(&file->mut); 530 list_del(&ctx->list); 531 mutex_unlock(&file->mut); 532 kfree(ctx); 533 return ret; 534 } 535 536 static void ucma_cleanup_multicast(struct ucma_context *ctx) 537 { 538 struct ucma_multicast *mc, *tmp; 539 540 mutex_lock(&mut); 541 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 542 list_del(&mc->list); 543 idr_remove(&multicast_idr, mc->id); 544 kfree(mc); 545 } 546 mutex_unlock(&mut); 547 } 548 549 static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 550 { 551 struct ucma_event *uevent, *tmp; 552 553 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 554 if (uevent->mc != mc) 555 continue; 556 557 list_del(&uevent->list); 558 kfree(uevent); 559 } 560 } 561 562 /* 563 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At 564 * this point, no new events will be reported from the hardware. However, we 565 * still need to cleanup the UCMA context for this ID. Specifically, there 566 * might be events that have not yet been consumed by the user space software. 567 * These might include pending connect requests which we have not completed 568 * processing. We cannot call rdma_destroy_id while holding the lock of the 569 * context (file->mut), as it might cause a deadlock. We therefore extract all 570 * relevant events from the context pending events list while holding the 571 * mutex. After that we release them as needed. 572 */ 573 static int ucma_free_ctx(struct ucma_context *ctx) 574 { 575 int events_reported; 576 struct ucma_event *uevent, *tmp; 577 LIST_HEAD(list); 578 579 580 ucma_cleanup_multicast(ctx); 581 582 /* Cleanup events not yet reported to the user. */ 583 mutex_lock(&ctx->file->mut); 584 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 585 if (uevent->ctx == ctx) 586 list_move_tail(&uevent->list, &list); 587 } 588 list_del(&ctx->list); 589 mutex_unlock(&ctx->file->mut); 590 591 list_for_each_entry_safe(uevent, tmp, &list, list) { 592 list_del(&uevent->list); 593 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) 594 rdma_destroy_id(uevent->cm_id); 595 kfree(uevent); 596 } 597 598 events_reported = ctx->events_reported; 599 kfree(ctx); 600 return events_reported; 601 } 602 603 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, 604 int in_len, int out_len) 605 { 606 struct rdma_ucm_destroy_id cmd; 607 struct rdma_ucm_destroy_id_resp resp; 608 struct ucma_context *ctx; 609 int ret = 0; 610 611 if (out_len < sizeof(resp)) 612 return -ENOSPC; 613 614 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 615 return -EFAULT; 616 617 mutex_lock(&mut); 618 ctx = _ucma_find_context(cmd.id, file); 619 if (!IS_ERR(ctx)) 620 idr_remove(&ctx_idr, ctx->id); 621 mutex_unlock(&mut); 622 623 if (IS_ERR(ctx)) 624 return PTR_ERR(ctx); 625 626 mutex_lock(&ctx->file->mut); 627 ctx->destroying = 1; 628 mutex_unlock(&ctx->file->mut); 629 630 flush_workqueue(ctx->file->close_wq); 631 /* At this point it's guaranteed that there is no inflight 632 * closing task */ 633 mutex_lock(&mut); 634 if (!ctx->closing) { 635 mutex_unlock(&mut); 636 ucma_put_ctx(ctx); 637 wait_for_completion(&ctx->comp); 638 rdma_destroy_id(ctx->cm_id); 639 } else { 640 mutex_unlock(&mut); 641 } 642 643 resp.events_reported = ucma_free_ctx(ctx); 644 if (copy_to_user(u64_to_user_ptr(cmd.response), 645 &resp, sizeof(resp))) 646 ret = -EFAULT; 647 648 return ret; 649 } 650 651 static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, 652 int in_len, int out_len) 653 { 654 struct rdma_ucm_bind_ip cmd; 655 struct ucma_context *ctx; 656 int ret; 657 658 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 659 return -EFAULT; 660 661 if (!rdma_addr_size_in6(&cmd.addr)) 662 return -EINVAL; 663 664 ctx = ucma_get_ctx(file, cmd.id); 665 if (IS_ERR(ctx)) 666 return PTR_ERR(ctx); 667 668 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 669 ucma_put_ctx(ctx); 670 return ret; 671 } 672 673 static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, 674 int in_len, int out_len) 675 { 676 struct rdma_ucm_bind cmd; 677 struct ucma_context *ctx; 678 int ret; 679 680 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 681 return -EFAULT; 682 683 if (cmd.reserved || !cmd.addr_size || 684 cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) 685 return -EINVAL; 686 687 ctx = ucma_get_ctx(file, cmd.id); 688 if (IS_ERR(ctx)) 689 return PTR_ERR(ctx); 690 691 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 692 ucma_put_ctx(ctx); 693 return ret; 694 } 695 696 static ssize_t ucma_resolve_ip(struct ucma_file *file, 697 const char __user *inbuf, 698 int in_len, int out_len) 699 { 700 struct rdma_ucm_resolve_ip cmd; 701 struct ucma_context *ctx; 702 int ret; 703 704 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 705 return -EFAULT; 706 707 if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || 708 !rdma_addr_size_in6(&cmd.dst_addr)) 709 return -EINVAL; 710 711 ctx = ucma_get_ctx(file, cmd.id); 712 if (IS_ERR(ctx)) 713 return PTR_ERR(ctx); 714 715 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 716 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 717 ucma_put_ctx(ctx); 718 return ret; 719 } 720 721 static ssize_t ucma_resolve_addr(struct ucma_file *file, 722 const char __user *inbuf, 723 int in_len, int out_len) 724 { 725 struct rdma_ucm_resolve_addr cmd; 726 struct ucma_context *ctx; 727 int ret; 728 729 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 730 return -EFAULT; 731 732 if (cmd.reserved || 733 (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || 734 !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) 735 return -EINVAL; 736 737 ctx = ucma_get_ctx(file, cmd.id); 738 if (IS_ERR(ctx)) 739 return PTR_ERR(ctx); 740 741 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 742 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 743 ucma_put_ctx(ctx); 744 return ret; 745 } 746 747 static ssize_t ucma_resolve_route(struct ucma_file *file, 748 const char __user *inbuf, 749 int in_len, int out_len) 750 { 751 struct rdma_ucm_resolve_route cmd; 752 struct ucma_context *ctx; 753 int ret; 754 755 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 756 return -EFAULT; 757 758 ctx = ucma_get_ctx_dev(file, cmd.id); 759 if (IS_ERR(ctx)) 760 return PTR_ERR(ctx); 761 762 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 763 ucma_put_ctx(ctx); 764 return ret; 765 } 766 767 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, 768 struct rdma_route *route) 769 { 770 struct rdma_dev_addr *dev_addr; 771 772 resp->num_paths = route->num_paths; 773 switch (route->num_paths) { 774 case 0: 775 dev_addr = &route->addr.dev_addr; 776 rdma_addr_get_dgid(dev_addr, 777 (union ib_gid *) &resp->ib_route[0].dgid); 778 rdma_addr_get_sgid(dev_addr, 779 (union ib_gid *) &resp->ib_route[0].sgid); 780 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 781 break; 782 case 2: 783 ib_copy_path_rec_to_user(&resp->ib_route[1], 784 &route->path_rec[1]); 785 /* fall through */ 786 case 1: 787 ib_copy_path_rec_to_user(&resp->ib_route[0], 788 &route->path_rec[0]); 789 break; 790 default: 791 break; 792 } 793 } 794 795 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 796 struct rdma_route *route) 797 { 798 799 resp->num_paths = route->num_paths; 800 switch (route->num_paths) { 801 case 0: 802 rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 803 (union ib_gid *)&resp->ib_route[0].dgid); 804 rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 805 (union ib_gid *)&resp->ib_route[0].sgid); 806 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 807 break; 808 case 2: 809 ib_copy_path_rec_to_user(&resp->ib_route[1], 810 &route->path_rec[1]); 811 /* fall through */ 812 case 1: 813 ib_copy_path_rec_to_user(&resp->ib_route[0], 814 &route->path_rec[0]); 815 break; 816 default: 817 break; 818 } 819 } 820 821 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, 822 struct rdma_route *route) 823 { 824 struct rdma_dev_addr *dev_addr; 825 826 dev_addr = &route->addr.dev_addr; 827 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); 828 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); 829 } 830 831 static ssize_t ucma_query_route(struct ucma_file *file, 832 const char __user *inbuf, 833 int in_len, int out_len) 834 { 835 struct rdma_ucm_query cmd; 836 struct rdma_ucm_query_route_resp resp; 837 struct ucma_context *ctx; 838 struct sockaddr *addr; 839 int ret = 0; 840 841 if (out_len < sizeof(resp)) 842 return -ENOSPC; 843 844 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 845 return -EFAULT; 846 847 ctx = ucma_get_ctx(file, cmd.id); 848 if (IS_ERR(ctx)) 849 return PTR_ERR(ctx); 850 851 memset(&resp, 0, sizeof resp); 852 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 853 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? 854 sizeof(struct sockaddr_in) : 855 sizeof(struct sockaddr_in6)); 856 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 857 memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? 858 sizeof(struct sockaddr_in) : 859 sizeof(struct sockaddr_in6)); 860 if (!ctx->cm_id->device) 861 goto out; 862 863 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 864 resp.port_num = ctx->cm_id->port_num; 865 866 if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) 867 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 868 else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) 869 ucma_copy_iboe_route(&resp, &ctx->cm_id->route); 870 else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) 871 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 872 873 out: 874 if (copy_to_user(u64_to_user_ptr(cmd.response), 875 &resp, sizeof(resp))) 876 ret = -EFAULT; 877 878 ucma_put_ctx(ctx); 879 return ret; 880 } 881 882 static void ucma_query_device_addr(struct rdma_cm_id *cm_id, 883 struct rdma_ucm_query_addr_resp *resp) 884 { 885 if (!cm_id->device) 886 return; 887 888 resp->node_guid = (__force __u64) cm_id->device->node_guid; 889 resp->port_num = cm_id->port_num; 890 resp->pkey = (__force __u16) cpu_to_be16( 891 ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); 892 } 893 894 static ssize_t ucma_query_addr(struct ucma_context *ctx, 895 void __user *response, int out_len) 896 { 897 struct rdma_ucm_query_addr_resp resp; 898 struct sockaddr *addr; 899 int ret = 0; 900 901 if (out_len < sizeof(resp)) 902 return -ENOSPC; 903 904 memset(&resp, 0, sizeof resp); 905 906 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 907 resp.src_size = rdma_addr_size(addr); 908 memcpy(&resp.src_addr, addr, resp.src_size); 909 910 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; 911 resp.dst_size = rdma_addr_size(addr); 912 memcpy(&resp.dst_addr, addr, resp.dst_size); 913 914 ucma_query_device_addr(ctx->cm_id, &resp); 915 916 if (copy_to_user(response, &resp, sizeof(resp))) 917 ret = -EFAULT; 918 919 return ret; 920 } 921 922 static ssize_t ucma_query_path(struct ucma_context *ctx, 923 void __user *response, int out_len) 924 { 925 struct rdma_ucm_query_path_resp *resp; 926 int i, ret = 0; 927 928 if (out_len < sizeof(*resp)) 929 return -ENOSPC; 930 931 resp = kzalloc(out_len, GFP_KERNEL); 932 if (!resp) 933 return -ENOMEM; 934 935 resp->num_paths = ctx->cm_id->route.num_paths; 936 for (i = 0, out_len -= sizeof(*resp); 937 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); 938 i++, out_len -= sizeof(struct ib_path_rec_data)) { 939 struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; 940 941 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 942 IB_PATH_BIDIRECTIONAL; 943 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { 944 struct sa_path_rec ib; 945 946 sa_convert_path_opa_to_ib(&ib, rec); 947 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 948 949 } else { 950 ib_sa_pack_path(rec, &resp->path_data[i].path_rec); 951 } 952 } 953 954 if (copy_to_user(response, resp, 955 sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) 956 ret = -EFAULT; 957 958 kfree(resp); 959 return ret; 960 } 961 962 static ssize_t ucma_query_gid(struct ucma_context *ctx, 963 void __user *response, int out_len) 964 { 965 struct rdma_ucm_query_addr_resp resp; 966 struct sockaddr_ib *addr; 967 int ret = 0; 968 969 if (out_len < sizeof(resp)) 970 return -ENOSPC; 971 972 memset(&resp, 0, sizeof resp); 973 974 ucma_query_device_addr(ctx->cm_id, &resp); 975 976 addr = (struct sockaddr_ib *) &resp.src_addr; 977 resp.src_size = sizeof(*addr); 978 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { 979 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); 980 } else { 981 addr->sib_family = AF_IB; 982 addr->sib_pkey = (__force __be16) resp.pkey; 983 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, 984 NULL); 985 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 986 &ctx->cm_id->route.addr.src_addr); 987 } 988 989 addr = (struct sockaddr_ib *) &resp.dst_addr; 990 resp.dst_size = sizeof(*addr); 991 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { 992 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); 993 } else { 994 addr->sib_family = AF_IB; 995 addr->sib_pkey = (__force __be16) resp.pkey; 996 rdma_read_gids(ctx->cm_id, NULL, 997 (union ib_gid *)&addr->sib_addr); 998 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 999 &ctx->cm_id->route.addr.dst_addr); 1000 } 1001 1002 if (copy_to_user(response, &resp, sizeof(resp))) 1003 ret = -EFAULT; 1004 1005 return ret; 1006 } 1007 1008 static ssize_t ucma_query(struct ucma_file *file, 1009 const char __user *inbuf, 1010 int in_len, int out_len) 1011 { 1012 struct rdma_ucm_query cmd; 1013 struct ucma_context *ctx; 1014 void __user *response; 1015 int ret; 1016 1017 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1018 return -EFAULT; 1019 1020 response = u64_to_user_ptr(cmd.response); 1021 ctx = ucma_get_ctx(file, cmd.id); 1022 if (IS_ERR(ctx)) 1023 return PTR_ERR(ctx); 1024 1025 switch (cmd.option) { 1026 case RDMA_USER_CM_QUERY_ADDR: 1027 ret = ucma_query_addr(ctx, response, out_len); 1028 break; 1029 case RDMA_USER_CM_QUERY_PATH: 1030 ret = ucma_query_path(ctx, response, out_len); 1031 break; 1032 case RDMA_USER_CM_QUERY_GID: 1033 ret = ucma_query_gid(ctx, response, out_len); 1034 break; 1035 default: 1036 ret = -ENOSYS; 1037 break; 1038 } 1039 1040 ucma_put_ctx(ctx); 1041 return ret; 1042 } 1043 1044 static void ucma_copy_conn_param(struct rdma_cm_id *id, 1045 struct rdma_conn_param *dst, 1046 struct rdma_ucm_conn_param *src) 1047 { 1048 dst->private_data = src->private_data; 1049 dst->private_data_len = src->private_data_len; 1050 dst->responder_resources =src->responder_resources; 1051 dst->initiator_depth = src->initiator_depth; 1052 dst->flow_control = src->flow_control; 1053 dst->retry_count = src->retry_count; 1054 dst->rnr_retry_count = src->rnr_retry_count; 1055 dst->srq = src->srq; 1056 dst->qp_num = src->qp_num; 1057 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; 1058 } 1059 1060 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 1061 int in_len, int out_len) 1062 { 1063 struct rdma_ucm_connect cmd; 1064 struct rdma_conn_param conn_param; 1065 struct ucma_context *ctx; 1066 int ret; 1067 1068 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1069 return -EFAULT; 1070 1071 if (!cmd.conn_param.valid) 1072 return -EINVAL; 1073 1074 ctx = ucma_get_ctx_dev(file, cmd.id); 1075 if (IS_ERR(ctx)) 1076 return PTR_ERR(ctx); 1077 1078 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1079 ret = rdma_connect(ctx->cm_id, &conn_param); 1080 ucma_put_ctx(ctx); 1081 return ret; 1082 } 1083 1084 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, 1085 int in_len, int out_len) 1086 { 1087 struct rdma_ucm_listen cmd; 1088 struct ucma_context *ctx; 1089 int ret; 1090 1091 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1092 return -EFAULT; 1093 1094 ctx = ucma_get_ctx(file, cmd.id); 1095 if (IS_ERR(ctx)) 1096 return PTR_ERR(ctx); 1097 1098 ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? 1099 cmd.backlog : max_backlog; 1100 ret = rdma_listen(ctx->cm_id, ctx->backlog); 1101 ucma_put_ctx(ctx); 1102 return ret; 1103 } 1104 1105 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, 1106 int in_len, int out_len) 1107 { 1108 struct rdma_ucm_accept cmd; 1109 struct rdma_conn_param conn_param; 1110 struct ucma_context *ctx; 1111 int ret; 1112 1113 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1114 return -EFAULT; 1115 1116 ctx = ucma_get_ctx_dev(file, cmd.id); 1117 if (IS_ERR(ctx)) 1118 return PTR_ERR(ctx); 1119 1120 if (cmd.conn_param.valid) { 1121 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1122 mutex_lock(&file->mut); 1123 ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); 1124 if (!ret) 1125 ctx->uid = cmd.uid; 1126 mutex_unlock(&file->mut); 1127 } else 1128 ret = __rdma_accept(ctx->cm_id, NULL, NULL); 1129 1130 ucma_put_ctx(ctx); 1131 return ret; 1132 } 1133 1134 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, 1135 int in_len, int out_len) 1136 { 1137 struct rdma_ucm_reject cmd; 1138 struct ucma_context *ctx; 1139 int ret; 1140 1141 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1142 return -EFAULT; 1143 1144 ctx = ucma_get_ctx_dev(file, cmd.id); 1145 if (IS_ERR(ctx)) 1146 return PTR_ERR(ctx); 1147 1148 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); 1149 ucma_put_ctx(ctx); 1150 return ret; 1151 } 1152 1153 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, 1154 int in_len, int out_len) 1155 { 1156 struct rdma_ucm_disconnect cmd; 1157 struct ucma_context *ctx; 1158 int ret; 1159 1160 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1161 return -EFAULT; 1162 1163 ctx = ucma_get_ctx_dev(file, cmd.id); 1164 if (IS_ERR(ctx)) 1165 return PTR_ERR(ctx); 1166 1167 ret = rdma_disconnect(ctx->cm_id); 1168 ucma_put_ctx(ctx); 1169 return ret; 1170 } 1171 1172 static ssize_t ucma_init_qp_attr(struct ucma_file *file, 1173 const char __user *inbuf, 1174 int in_len, int out_len) 1175 { 1176 struct rdma_ucm_init_qp_attr cmd; 1177 struct ib_uverbs_qp_attr resp; 1178 struct ucma_context *ctx; 1179 struct ib_qp_attr qp_attr; 1180 int ret; 1181 1182 if (out_len < sizeof(resp)) 1183 return -ENOSPC; 1184 1185 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1186 return -EFAULT; 1187 1188 if (cmd.qp_state > IB_QPS_ERR) 1189 return -EINVAL; 1190 1191 ctx = ucma_get_ctx_dev(file, cmd.id); 1192 if (IS_ERR(ctx)) 1193 return PTR_ERR(ctx); 1194 1195 resp.qp_attr_mask = 0; 1196 memset(&qp_attr, 0, sizeof qp_attr); 1197 qp_attr.qp_state = cmd.qp_state; 1198 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1199 if (ret) 1200 goto out; 1201 1202 ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); 1203 if (copy_to_user(u64_to_user_ptr(cmd.response), 1204 &resp, sizeof(resp))) 1205 ret = -EFAULT; 1206 1207 out: 1208 ucma_put_ctx(ctx); 1209 return ret; 1210 } 1211 1212 static int ucma_set_option_id(struct ucma_context *ctx, int optname, 1213 void *optval, size_t optlen) 1214 { 1215 int ret = 0; 1216 1217 switch (optname) { 1218 case RDMA_OPTION_ID_TOS: 1219 if (optlen != sizeof(u8)) { 1220 ret = -EINVAL; 1221 break; 1222 } 1223 rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); 1224 break; 1225 case RDMA_OPTION_ID_REUSEADDR: 1226 if (optlen != sizeof(int)) { 1227 ret = -EINVAL; 1228 break; 1229 } 1230 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 1231 break; 1232 case RDMA_OPTION_ID_AFONLY: 1233 if (optlen != sizeof(int)) { 1234 ret = -EINVAL; 1235 break; 1236 } 1237 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); 1238 break; 1239 default: 1240 ret = -ENOSYS; 1241 } 1242 1243 return ret; 1244 } 1245 1246 static int ucma_set_ib_path(struct ucma_context *ctx, 1247 struct ib_path_rec_data *path_data, size_t optlen) 1248 { 1249 struct sa_path_rec sa_path; 1250 struct rdma_cm_event event; 1251 int ret; 1252 1253 if (optlen % sizeof(*path_data)) 1254 return -EINVAL; 1255 1256 for (; optlen; optlen -= sizeof(*path_data), path_data++) { 1257 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | 1258 IB_PATH_BIDIRECTIONAL)) 1259 break; 1260 } 1261 1262 if (!optlen) 1263 return -EINVAL; 1264 1265 if (!ctx->cm_id->device) 1266 return -EINVAL; 1267 1268 memset(&sa_path, 0, sizeof(sa_path)); 1269 1270 sa_path.rec_type = SA_PATH_REC_TYPE_IB; 1271 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1272 1273 if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { 1274 struct sa_path_rec opa; 1275 1276 sa_convert_path_ib_to_opa(&opa, &sa_path); 1277 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1278 } else { 1279 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1280 } 1281 if (ret) 1282 return ret; 1283 1284 memset(&event, 0, sizeof event); 1285 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 1286 return ucma_event_handler(ctx->cm_id, &event); 1287 } 1288 1289 static int ucma_set_option_ib(struct ucma_context *ctx, int optname, 1290 void *optval, size_t optlen) 1291 { 1292 int ret; 1293 1294 switch (optname) { 1295 case RDMA_OPTION_IB_PATH: 1296 ret = ucma_set_ib_path(ctx, optval, optlen); 1297 break; 1298 default: 1299 ret = -ENOSYS; 1300 } 1301 1302 return ret; 1303 } 1304 1305 static int ucma_set_option_level(struct ucma_context *ctx, int level, 1306 int optname, void *optval, size_t optlen) 1307 { 1308 int ret; 1309 1310 switch (level) { 1311 case RDMA_OPTION_ID: 1312 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1313 break; 1314 case RDMA_OPTION_IB: 1315 ret = ucma_set_option_ib(ctx, optname, optval, optlen); 1316 break; 1317 default: 1318 ret = -ENOSYS; 1319 } 1320 1321 return ret; 1322 } 1323 1324 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, 1325 int in_len, int out_len) 1326 { 1327 struct rdma_ucm_set_option cmd; 1328 struct ucma_context *ctx; 1329 void *optval; 1330 int ret; 1331 1332 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1333 return -EFAULT; 1334 1335 if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) 1336 return -EINVAL; 1337 1338 ctx = ucma_get_ctx(file, cmd.id); 1339 if (IS_ERR(ctx)) 1340 return PTR_ERR(ctx); 1341 1342 optval = memdup_user(u64_to_user_ptr(cmd.optval), 1343 cmd.optlen); 1344 if (IS_ERR(optval)) { 1345 ret = PTR_ERR(optval); 1346 goto out; 1347 } 1348 1349 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1350 cmd.optlen); 1351 kfree(optval); 1352 1353 out: 1354 ucma_put_ctx(ctx); 1355 return ret; 1356 } 1357 1358 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 1359 int in_len, int out_len) 1360 { 1361 struct rdma_ucm_notify cmd; 1362 struct ucma_context *ctx; 1363 int ret = -EINVAL; 1364 1365 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1366 return -EFAULT; 1367 1368 ctx = ucma_get_ctx(file, cmd.id); 1369 if (IS_ERR(ctx)) 1370 return PTR_ERR(ctx); 1371 1372 if (ctx->cm_id->device) 1373 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1374 1375 ucma_put_ctx(ctx); 1376 return ret; 1377 } 1378 1379 static ssize_t ucma_process_join(struct ucma_file *file, 1380 struct rdma_ucm_join_mcast *cmd, int out_len) 1381 { 1382 struct rdma_ucm_create_id_resp resp; 1383 struct ucma_context *ctx; 1384 struct ucma_multicast *mc; 1385 struct sockaddr *addr; 1386 int ret; 1387 u8 join_state; 1388 1389 if (out_len < sizeof(resp)) 1390 return -ENOSPC; 1391 1392 addr = (struct sockaddr *) &cmd->addr; 1393 if (cmd->addr_size != rdma_addr_size(addr)) 1394 return -EINVAL; 1395 1396 if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) 1397 join_state = BIT(FULLMEMBER_JOIN); 1398 else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) 1399 join_state = BIT(SENDONLY_FULLMEMBER_JOIN); 1400 else 1401 return -EINVAL; 1402 1403 ctx = ucma_get_ctx_dev(file, cmd->id); 1404 if (IS_ERR(ctx)) 1405 return PTR_ERR(ctx); 1406 1407 mutex_lock(&file->mut); 1408 mc = ucma_alloc_multicast(ctx); 1409 if (!mc) { 1410 ret = -ENOMEM; 1411 goto err1; 1412 } 1413 mc->join_state = join_state; 1414 mc->uid = cmd->uid; 1415 memcpy(&mc->addr, addr, cmd->addr_size); 1416 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1417 join_state, mc); 1418 if (ret) 1419 goto err2; 1420 1421 resp.id = mc->id; 1422 if (copy_to_user(u64_to_user_ptr(cmd->response), 1423 &resp, sizeof(resp))) { 1424 ret = -EFAULT; 1425 goto err3; 1426 } 1427 1428 mutex_lock(&mut); 1429 idr_replace(&multicast_idr, mc, mc->id); 1430 mutex_unlock(&mut); 1431 1432 mutex_unlock(&file->mut); 1433 ucma_put_ctx(ctx); 1434 return 0; 1435 1436 err3: 1437 rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); 1438 ucma_cleanup_mc_events(mc); 1439 err2: 1440 mutex_lock(&mut); 1441 idr_remove(&multicast_idr, mc->id); 1442 mutex_unlock(&mut); 1443 list_del(&mc->list); 1444 kfree(mc); 1445 err1: 1446 mutex_unlock(&file->mut); 1447 ucma_put_ctx(ctx); 1448 return ret; 1449 } 1450 1451 static ssize_t ucma_join_ip_multicast(struct ucma_file *file, 1452 const char __user *inbuf, 1453 int in_len, int out_len) 1454 { 1455 struct rdma_ucm_join_ip_mcast cmd; 1456 struct rdma_ucm_join_mcast join_cmd; 1457 1458 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1459 return -EFAULT; 1460 1461 join_cmd.response = cmd.response; 1462 join_cmd.uid = cmd.uid; 1463 join_cmd.id = cmd.id; 1464 join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); 1465 if (!join_cmd.addr_size) 1466 return -EINVAL; 1467 1468 join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; 1469 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); 1470 1471 return ucma_process_join(file, &join_cmd, out_len); 1472 } 1473 1474 static ssize_t ucma_join_multicast(struct ucma_file *file, 1475 const char __user *inbuf, 1476 int in_len, int out_len) 1477 { 1478 struct rdma_ucm_join_mcast cmd; 1479 1480 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1481 return -EFAULT; 1482 1483 if (!rdma_addr_size_kss(&cmd.addr)) 1484 return -EINVAL; 1485 1486 return ucma_process_join(file, &cmd, out_len); 1487 } 1488 1489 static ssize_t ucma_leave_multicast(struct ucma_file *file, 1490 const char __user *inbuf, 1491 int in_len, int out_len) 1492 { 1493 struct rdma_ucm_destroy_id cmd; 1494 struct rdma_ucm_destroy_id_resp resp; 1495 struct ucma_multicast *mc; 1496 int ret = 0; 1497 1498 if (out_len < sizeof(resp)) 1499 return -ENOSPC; 1500 1501 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1502 return -EFAULT; 1503 1504 mutex_lock(&mut); 1505 mc = idr_find(&multicast_idr, cmd.id); 1506 if (!mc) 1507 mc = ERR_PTR(-ENOENT); 1508 else if (mc->ctx->file != file) 1509 mc = ERR_PTR(-EINVAL); 1510 else if (!atomic_inc_not_zero(&mc->ctx->ref)) 1511 mc = ERR_PTR(-ENXIO); 1512 else 1513 idr_remove(&multicast_idr, mc->id); 1514 mutex_unlock(&mut); 1515 1516 if (IS_ERR(mc)) { 1517 ret = PTR_ERR(mc); 1518 goto out; 1519 } 1520 1521 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1522 mutex_lock(&mc->ctx->file->mut); 1523 ucma_cleanup_mc_events(mc); 1524 list_del(&mc->list); 1525 mutex_unlock(&mc->ctx->file->mut); 1526 1527 ucma_put_ctx(mc->ctx); 1528 resp.events_reported = mc->events_reported; 1529 kfree(mc); 1530 1531 if (copy_to_user(u64_to_user_ptr(cmd.response), 1532 &resp, sizeof(resp))) 1533 ret = -EFAULT; 1534 out: 1535 return ret; 1536 } 1537 1538 static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) 1539 { 1540 /* Acquire mutex's based on pointer comparison to prevent deadlock. */ 1541 if (file1 < file2) { 1542 mutex_lock(&file1->mut); 1543 mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); 1544 } else { 1545 mutex_lock(&file2->mut); 1546 mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); 1547 } 1548 } 1549 1550 static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) 1551 { 1552 if (file1 < file2) { 1553 mutex_unlock(&file2->mut); 1554 mutex_unlock(&file1->mut); 1555 } else { 1556 mutex_unlock(&file1->mut); 1557 mutex_unlock(&file2->mut); 1558 } 1559 } 1560 1561 static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) 1562 { 1563 struct ucma_event *uevent, *tmp; 1564 1565 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) 1566 if (uevent->ctx == ctx) 1567 list_move_tail(&uevent->list, &file->event_list); 1568 } 1569 1570 static ssize_t ucma_migrate_id(struct ucma_file *new_file, 1571 const char __user *inbuf, 1572 int in_len, int out_len) 1573 { 1574 struct rdma_ucm_migrate_id cmd; 1575 struct rdma_ucm_migrate_resp resp; 1576 struct ucma_context *ctx; 1577 struct fd f; 1578 struct ucma_file *cur_file; 1579 int ret = 0; 1580 1581 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1582 return -EFAULT; 1583 1584 /* Get current fd to protect against it being closed */ 1585 f = fdget(cmd.fd); 1586 if (!f.file) 1587 return -ENOENT; 1588 if (f.file->f_op != &ucma_fops) { 1589 ret = -EINVAL; 1590 goto file_put; 1591 } 1592 1593 /* Validate current fd and prevent destruction of id. */ 1594 ctx = ucma_get_ctx(f.file->private_data, cmd.id); 1595 if (IS_ERR(ctx)) { 1596 ret = PTR_ERR(ctx); 1597 goto file_put; 1598 } 1599 1600 cur_file = ctx->file; 1601 if (cur_file == new_file) { 1602 resp.events_reported = ctx->events_reported; 1603 goto response; 1604 } 1605 1606 /* 1607 * Migrate events between fd's, maintaining order, and avoiding new 1608 * events being added before existing events. 1609 */ 1610 ucma_lock_files(cur_file, new_file); 1611 mutex_lock(&mut); 1612 1613 list_move_tail(&ctx->list, &new_file->ctx_list); 1614 ucma_move_events(ctx, new_file); 1615 ctx->file = new_file; 1616 resp.events_reported = ctx->events_reported; 1617 1618 mutex_unlock(&mut); 1619 ucma_unlock_files(cur_file, new_file); 1620 1621 response: 1622 if (copy_to_user(u64_to_user_ptr(cmd.response), 1623 &resp, sizeof(resp))) 1624 ret = -EFAULT; 1625 1626 ucma_put_ctx(ctx); 1627 file_put: 1628 fdput(f); 1629 return ret; 1630 } 1631 1632 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1633 const char __user *inbuf, 1634 int in_len, int out_len) = { 1635 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1636 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1637 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, 1638 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, 1639 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, 1640 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1641 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1642 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1643 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1644 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1645 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1646 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1647 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1648 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1649 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1650 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1651 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, 1652 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1653 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, 1654 [RDMA_USER_CM_CMD_QUERY] = ucma_query, 1655 [RDMA_USER_CM_CMD_BIND] = ucma_bind, 1656 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1657 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast 1658 }; 1659 1660 static ssize_t ucma_write(struct file *filp, const char __user *buf, 1661 size_t len, loff_t *pos) 1662 { 1663 struct ucma_file *file = filp->private_data; 1664 struct rdma_ucm_cmd_hdr hdr; 1665 ssize_t ret; 1666 1667 if (!ib_safe_file_access(filp)) { 1668 pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 1669 task_tgid_vnr(current), current->comm); 1670 return -EACCES; 1671 } 1672 1673 if (len < sizeof(hdr)) 1674 return -EINVAL; 1675 1676 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1677 return -EFAULT; 1678 1679 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1680 return -EINVAL; 1681 hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); 1682 1683 if (hdr.in + sizeof(hdr) > len) 1684 return -EINVAL; 1685 1686 if (!ucma_cmd_table[hdr.cmd]) 1687 return -ENOSYS; 1688 1689 ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); 1690 if (!ret) 1691 ret = len; 1692 1693 return ret; 1694 } 1695 1696 static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) 1697 { 1698 struct ucma_file *file = filp->private_data; 1699 __poll_t mask = 0; 1700 1701 poll_wait(filp, &file->poll_wait, wait); 1702 1703 if (!list_empty(&file->event_list)) 1704 mask = EPOLLIN | EPOLLRDNORM; 1705 1706 return mask; 1707 } 1708 1709 /* 1710 * ucma_open() does not need the BKL: 1711 * 1712 * - no global state is referred to; 1713 * - there is no ioctl method to race against; 1714 * - no further module initialization is required for open to work 1715 * after the device is registered. 1716 */ 1717 static int ucma_open(struct inode *inode, struct file *filp) 1718 { 1719 struct ucma_file *file; 1720 1721 file = kmalloc(sizeof *file, GFP_KERNEL); 1722 if (!file) 1723 return -ENOMEM; 1724 1725 file->close_wq = alloc_ordered_workqueue("ucma_close_id", 1726 WQ_MEM_RECLAIM); 1727 if (!file->close_wq) { 1728 kfree(file); 1729 return -ENOMEM; 1730 } 1731 1732 INIT_LIST_HEAD(&file->event_list); 1733 INIT_LIST_HEAD(&file->ctx_list); 1734 init_waitqueue_head(&file->poll_wait); 1735 mutex_init(&file->mut); 1736 1737 filp->private_data = file; 1738 file->filp = filp; 1739 1740 return nonseekable_open(inode, filp); 1741 } 1742 1743 static int ucma_close(struct inode *inode, struct file *filp) 1744 { 1745 struct ucma_file *file = filp->private_data; 1746 struct ucma_context *ctx, *tmp; 1747 1748 mutex_lock(&file->mut); 1749 list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { 1750 ctx->destroying = 1; 1751 mutex_unlock(&file->mut); 1752 1753 mutex_lock(&mut); 1754 idr_remove(&ctx_idr, ctx->id); 1755 mutex_unlock(&mut); 1756 1757 flush_workqueue(file->close_wq); 1758 /* At that step once ctx was marked as destroying and workqueue 1759 * was flushed we are safe from any inflights handlers that 1760 * might put other closing task. 1761 */ 1762 mutex_lock(&mut); 1763 if (!ctx->closing) { 1764 mutex_unlock(&mut); 1765 ucma_put_ctx(ctx); 1766 wait_for_completion(&ctx->comp); 1767 /* rdma_destroy_id ensures that no event handlers are 1768 * inflight for that id before releasing it. 1769 */ 1770 rdma_destroy_id(ctx->cm_id); 1771 } else { 1772 mutex_unlock(&mut); 1773 } 1774 1775 ucma_free_ctx(ctx); 1776 mutex_lock(&file->mut); 1777 } 1778 mutex_unlock(&file->mut); 1779 destroy_workqueue(file->close_wq); 1780 kfree(file); 1781 return 0; 1782 } 1783 1784 static const struct file_operations ucma_fops = { 1785 .owner = THIS_MODULE, 1786 .open = ucma_open, 1787 .release = ucma_close, 1788 .write = ucma_write, 1789 .poll = ucma_poll, 1790 .llseek = no_llseek, 1791 }; 1792 1793 static struct miscdevice ucma_misc = { 1794 .minor = MISC_DYNAMIC_MINOR, 1795 .name = "rdma_cm", 1796 .nodename = "infiniband/rdma_cm", 1797 .mode = 0666, 1798 .fops = &ucma_fops, 1799 }; 1800 1801 static ssize_t show_abi_version(struct device *dev, 1802 struct device_attribute *attr, 1803 char *buf) 1804 { 1805 return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1806 } 1807 static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 1808 1809 static int __init ucma_init(void) 1810 { 1811 int ret; 1812 1813 ret = misc_register(&ucma_misc); 1814 if (ret) 1815 return ret; 1816 1817 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1818 if (ret) { 1819 pr_err("rdma_ucm: couldn't create abi_version attr\n"); 1820 goto err1; 1821 } 1822 1823 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1824 if (!ucma_ctl_table_hdr) { 1825 pr_err("rdma_ucm: couldn't register sysctl paths\n"); 1826 ret = -ENOMEM; 1827 goto err2; 1828 } 1829 return 0; 1830 err2: 1831 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1832 err1: 1833 misc_deregister(&ucma_misc); 1834 return ret; 1835 } 1836 1837 static void __exit ucma_cleanup(void) 1838 { 1839 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1840 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1841 misc_deregister(&ucma_misc); 1842 idr_destroy(&ctx_idr); 1843 idr_destroy(&multicast_idr); 1844 } 1845 1846 module_init(ucma_init); 1847 module_exit(ucma_cleanup); 1848