1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 */ 36 37 #include <linux/module.h> 38 #include <linux/init.h> 39 #include <linux/device.h> 40 #include <linux/err.h> 41 #include <linux/fs.h> 42 #include <linux/poll.h> 43 #include <linux/sched.h> 44 #include <linux/file.h> 45 #include <linux/cdev.h> 46 #include <linux/anon_inodes.h> 47 #include <linux/slab.h> 48 #include <linux/sched/mm.h> 49 50 #include <linux/uaccess.h> 51 52 #include <rdma/ib.h> 53 #include <rdma/uverbs_std_types.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "uverbs.h" 57 #include "core_priv.h" 58 #include "rdma_core.h" 59 60 MODULE_AUTHOR("Roland Dreier"); 61 MODULE_DESCRIPTION("InfiniBand userspace verbs access"); 62 MODULE_LICENSE("Dual BSD/GPL"); 63 64 enum { 65 IB_UVERBS_MAJOR = 231, 66 IB_UVERBS_BASE_MINOR = 192, 67 IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS, 68 IB_UVERBS_NUM_FIXED_MINOR = 32, 69 IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR, 70 }; 71 72 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) 73 74 static dev_t dynamic_uverbs_dev; 75 static struct class *uverbs_class; 76 77 static DEFINE_IDA(uverbs_ida); 78 static void ib_uverbs_add_one(struct ib_device *device); 79 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); 80 81 /* 82 * Must be called with the ufile->device->disassociate_srcu held, and the lock 83 * must be held until use of the ucontext is finished. 84 */ 85 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile) 86 { 87 /* 88 * We do not hold the hw_destroy_rwsem lock for this flow, instead 89 * srcu is used. It does not matter if someone races this with 90 * get_context, we get NULL or valid ucontext. 91 */ 92 struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); 93 94 if (!srcu_dereference(ufile->device->ib_dev, 95 &ufile->device->disassociate_srcu)) 96 return ERR_PTR(-EIO); 97 98 if (!ucontext) 99 return ERR_PTR(-EINVAL); 100 101 return ucontext; 102 } 103 EXPORT_SYMBOL(ib_uverbs_get_ucontext_file); 104 105 int uverbs_dealloc_mw(struct ib_mw *mw) 106 { 107 struct ib_pd *pd = mw->pd; 108 int ret; 109 110 ret = mw->device->ops.dealloc_mw(mw); 111 if (!ret) 112 atomic_dec(&pd->usecnt); 113 return ret; 114 } 115 116 static void ib_uverbs_release_dev(struct device *device) 117 { 118 struct ib_uverbs_device *dev = 119 container_of(device, struct ib_uverbs_device, dev); 120 121 uverbs_destroy_api(dev->uapi); 122 cleanup_srcu_struct(&dev->disassociate_srcu); 123 mutex_destroy(&dev->lists_mutex); 124 mutex_destroy(&dev->xrcd_tree_mutex); 125 kfree(dev); 126 } 127 128 static void ib_uverbs_release_async_event_file(struct kref *ref) 129 { 130 struct ib_uverbs_async_event_file *file = 131 container_of(ref, struct ib_uverbs_async_event_file, ref); 132 133 kfree(file); 134 } 135 136 void ib_uverbs_release_ucq(struct ib_uverbs_file *file, 137 struct ib_uverbs_completion_event_file *ev_file, 138 struct ib_ucq_object *uobj) 139 { 140 struct ib_uverbs_event *evt, *tmp; 141 142 if (ev_file) { 143 spin_lock_irq(&ev_file->ev_queue.lock); 144 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { 145 list_del(&evt->list); 146 kfree(evt); 147 } 148 spin_unlock_irq(&ev_file->ev_queue.lock); 149 150 uverbs_uobject_put(&ev_file->uobj); 151 } 152 153 spin_lock_irq(&file->async_file->ev_queue.lock); 154 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { 155 list_del(&evt->list); 156 kfree(evt); 157 } 158 spin_unlock_irq(&file->async_file->ev_queue.lock); 159 } 160 161 void ib_uverbs_release_uevent(struct ib_uverbs_file *file, 162 struct ib_uevent_object *uobj) 163 { 164 struct ib_uverbs_event *evt, *tmp; 165 166 spin_lock_irq(&file->async_file->ev_queue.lock); 167 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { 168 list_del(&evt->list); 169 kfree(evt); 170 } 171 spin_unlock_irq(&file->async_file->ev_queue.lock); 172 } 173 174 void ib_uverbs_detach_umcast(struct ib_qp *qp, 175 struct ib_uqp_object *uobj) 176 { 177 struct ib_uverbs_mcast_entry *mcast, *tmp; 178 179 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { 180 ib_detach_mcast(qp, &mcast->gid, mcast->lid); 181 list_del(&mcast->list); 182 kfree(mcast); 183 } 184 } 185 186 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) 187 { 188 complete(&dev->comp); 189 } 190 191 void ib_uverbs_release_file(struct kref *ref) 192 { 193 struct ib_uverbs_file *file = 194 container_of(ref, struct ib_uverbs_file, ref); 195 struct ib_device *ib_dev; 196 int srcu_key; 197 198 release_ufile_idr_uobject(file); 199 200 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 201 ib_dev = srcu_dereference(file->device->ib_dev, 202 &file->device->disassociate_srcu); 203 if (ib_dev && !ib_dev->ops.disassociate_ucontext) 204 module_put(ib_dev->ops.owner); 205 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 206 207 if (atomic_dec_and_test(&file->device->refcount)) 208 ib_uverbs_comp_dev(file->device); 209 210 if (file->async_file) 211 kref_put(&file->async_file->ref, 212 ib_uverbs_release_async_event_file); 213 put_device(&file->device->dev); 214 215 if (file->disassociate_page) 216 __free_pages(file->disassociate_page, 0); 217 mutex_destroy(&file->umap_lock); 218 mutex_destroy(&file->ucontext_lock); 219 kfree(file); 220 } 221 222 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, 223 struct ib_uverbs_file *uverbs_file, 224 struct file *filp, char __user *buf, 225 size_t count, loff_t *pos, 226 size_t eventsz) 227 { 228 struct ib_uverbs_event *event; 229 int ret = 0; 230 231 spin_lock_irq(&ev_queue->lock); 232 233 while (list_empty(&ev_queue->event_list)) { 234 spin_unlock_irq(&ev_queue->lock); 235 236 if (filp->f_flags & O_NONBLOCK) 237 return -EAGAIN; 238 239 if (wait_event_interruptible(ev_queue->poll_wait, 240 (!list_empty(&ev_queue->event_list) || 241 /* The barriers built into wait_event_interruptible() 242 * and wake_up() guarentee this will see the null set 243 * without using RCU 244 */ 245 !uverbs_file->device->ib_dev))) 246 return -ERESTARTSYS; 247 248 /* If device was disassociated and no event exists set an error */ 249 if (list_empty(&ev_queue->event_list) && 250 !uverbs_file->device->ib_dev) 251 return -EIO; 252 253 spin_lock_irq(&ev_queue->lock); 254 } 255 256 event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); 257 258 if (eventsz > count) { 259 ret = -EINVAL; 260 event = NULL; 261 } else { 262 list_del(ev_queue->event_list.next); 263 if (event->counter) { 264 ++(*event->counter); 265 list_del(&event->obj_list); 266 } 267 } 268 269 spin_unlock_irq(&ev_queue->lock); 270 271 if (event) { 272 if (copy_to_user(buf, event, eventsz)) 273 ret = -EFAULT; 274 else 275 ret = eventsz; 276 } 277 278 kfree(event); 279 280 return ret; 281 } 282 283 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, 284 size_t count, loff_t *pos) 285 { 286 struct ib_uverbs_async_event_file *file = filp->private_data; 287 288 return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, 289 buf, count, pos, 290 sizeof(struct ib_uverbs_async_event_desc)); 291 } 292 293 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, 294 size_t count, loff_t *pos) 295 { 296 struct ib_uverbs_completion_event_file *comp_ev_file = 297 filp->private_data; 298 299 return ib_uverbs_event_read(&comp_ev_file->ev_queue, 300 comp_ev_file->uobj.ufile, filp, 301 buf, count, pos, 302 sizeof(struct ib_uverbs_comp_event_desc)); 303 } 304 305 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, 306 struct file *filp, 307 struct poll_table_struct *wait) 308 { 309 __poll_t pollflags = 0; 310 311 poll_wait(filp, &ev_queue->poll_wait, wait); 312 313 spin_lock_irq(&ev_queue->lock); 314 if (!list_empty(&ev_queue->event_list)) 315 pollflags = EPOLLIN | EPOLLRDNORM; 316 spin_unlock_irq(&ev_queue->lock); 317 318 return pollflags; 319 } 320 321 static __poll_t ib_uverbs_async_event_poll(struct file *filp, 322 struct poll_table_struct *wait) 323 { 324 return ib_uverbs_event_poll(filp->private_data, filp, wait); 325 } 326 327 static __poll_t ib_uverbs_comp_event_poll(struct file *filp, 328 struct poll_table_struct *wait) 329 { 330 struct ib_uverbs_completion_event_file *comp_ev_file = 331 filp->private_data; 332 333 return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); 334 } 335 336 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) 337 { 338 struct ib_uverbs_event_queue *ev_queue = filp->private_data; 339 340 return fasync_helper(fd, filp, on, &ev_queue->async_queue); 341 } 342 343 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) 344 { 345 struct ib_uverbs_completion_event_file *comp_ev_file = 346 filp->private_data; 347 348 return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); 349 } 350 351 static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) 352 { 353 struct ib_uverbs_async_event_file *file = filp->private_data; 354 struct ib_uverbs_file *uverbs_file = file->uverbs_file; 355 struct ib_uverbs_event *entry, *tmp; 356 int closed_already = 0; 357 358 mutex_lock(&uverbs_file->device->lists_mutex); 359 spin_lock_irq(&file->ev_queue.lock); 360 closed_already = file->ev_queue.is_closed; 361 file->ev_queue.is_closed = 1; 362 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { 363 if (entry->counter) 364 list_del(&entry->obj_list); 365 kfree(entry); 366 } 367 spin_unlock_irq(&file->ev_queue.lock); 368 if (!closed_already) { 369 list_del(&file->list); 370 ib_unregister_event_handler(&uverbs_file->event_handler); 371 } 372 mutex_unlock(&uverbs_file->device->lists_mutex); 373 374 kref_put(&uverbs_file->ref, ib_uverbs_release_file); 375 kref_put(&file->ref, ib_uverbs_release_async_event_file); 376 377 return 0; 378 } 379 380 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) 381 { 382 struct ib_uobject *uobj = filp->private_data; 383 struct ib_uverbs_completion_event_file *file = container_of( 384 uobj, struct ib_uverbs_completion_event_file, uobj); 385 struct ib_uverbs_event *entry, *tmp; 386 387 spin_lock_irq(&file->ev_queue.lock); 388 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { 389 if (entry->counter) 390 list_del(&entry->obj_list); 391 kfree(entry); 392 } 393 file->ev_queue.is_closed = 1; 394 spin_unlock_irq(&file->ev_queue.lock); 395 396 uverbs_close_fd(filp); 397 398 return 0; 399 } 400 401 const struct file_operations uverbs_event_fops = { 402 .owner = THIS_MODULE, 403 .read = ib_uverbs_comp_event_read, 404 .poll = ib_uverbs_comp_event_poll, 405 .release = ib_uverbs_comp_event_close, 406 .fasync = ib_uverbs_comp_event_fasync, 407 .llseek = no_llseek, 408 }; 409 410 static const struct file_operations uverbs_async_event_fops = { 411 .owner = THIS_MODULE, 412 .read = ib_uverbs_async_event_read, 413 .poll = ib_uverbs_async_event_poll, 414 .release = ib_uverbs_async_event_close, 415 .fasync = ib_uverbs_async_event_fasync, 416 .llseek = no_llseek, 417 }; 418 419 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) 420 { 421 struct ib_uverbs_event_queue *ev_queue = cq_context; 422 struct ib_ucq_object *uobj; 423 struct ib_uverbs_event *entry; 424 unsigned long flags; 425 426 if (!ev_queue) 427 return; 428 429 spin_lock_irqsave(&ev_queue->lock, flags); 430 if (ev_queue->is_closed) { 431 spin_unlock_irqrestore(&ev_queue->lock, flags); 432 return; 433 } 434 435 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 436 if (!entry) { 437 spin_unlock_irqrestore(&ev_queue->lock, flags); 438 return; 439 } 440 441 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); 442 443 entry->desc.comp.cq_handle = cq->uobject->user_handle; 444 entry->counter = &uobj->comp_events_reported; 445 446 list_add_tail(&entry->list, &ev_queue->event_list); 447 list_add_tail(&entry->obj_list, &uobj->comp_list); 448 spin_unlock_irqrestore(&ev_queue->lock, flags); 449 450 wake_up_interruptible(&ev_queue->poll_wait); 451 kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); 452 } 453 454 static void ib_uverbs_async_handler(struct ib_uverbs_file *file, 455 __u64 element, __u64 event, 456 struct list_head *obj_list, 457 u32 *counter) 458 { 459 struct ib_uverbs_event *entry; 460 unsigned long flags; 461 462 spin_lock_irqsave(&file->async_file->ev_queue.lock, flags); 463 if (file->async_file->ev_queue.is_closed) { 464 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 465 return; 466 } 467 468 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 469 if (!entry) { 470 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 471 return; 472 } 473 474 entry->desc.async.element = element; 475 entry->desc.async.event_type = event; 476 entry->desc.async.reserved = 0; 477 entry->counter = counter; 478 479 list_add_tail(&entry->list, &file->async_file->ev_queue.event_list); 480 if (obj_list) 481 list_add_tail(&entry->obj_list, obj_list); 482 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 483 484 wake_up_interruptible(&file->async_file->ev_queue.poll_wait); 485 kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN); 486 } 487 488 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) 489 { 490 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, 491 struct ib_ucq_object, uobject); 492 493 ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, 494 event->event, &uobj->async_list, 495 &uobj->async_events_reported); 496 } 497 498 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) 499 { 500 struct ib_uevent_object *uobj; 501 502 /* for XRC target qp's, check that qp is live */ 503 if (!event->element.qp->uobject) 504 return; 505 506 uobj = container_of(event->element.qp->uobject, 507 struct ib_uevent_object, uobject); 508 509 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 510 event->event, &uobj->event_list, 511 &uobj->events_reported); 512 } 513 514 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) 515 { 516 struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, 517 struct ib_uevent_object, uobject); 518 519 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 520 event->event, &uobj->event_list, 521 &uobj->events_reported); 522 } 523 524 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) 525 { 526 struct ib_uevent_object *uobj; 527 528 uobj = container_of(event->element.srq->uobject, 529 struct ib_uevent_object, uobject); 530 531 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 532 event->event, &uobj->event_list, 533 &uobj->events_reported); 534 } 535 536 void ib_uverbs_event_handler(struct ib_event_handler *handler, 537 struct ib_event *event) 538 { 539 struct ib_uverbs_file *file = 540 container_of(handler, struct ib_uverbs_file, event_handler); 541 542 ib_uverbs_async_handler(file, event->element.port_num, event->event, 543 NULL, NULL); 544 } 545 546 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) 547 { 548 kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); 549 file->async_file = NULL; 550 } 551 552 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) 553 { 554 spin_lock_init(&ev_queue->lock); 555 INIT_LIST_HEAD(&ev_queue->event_list); 556 init_waitqueue_head(&ev_queue->poll_wait); 557 ev_queue->is_closed = 0; 558 ev_queue->async_queue = NULL; 559 } 560 561 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, 562 struct ib_device *ib_dev) 563 { 564 struct ib_uverbs_async_event_file *ev_file; 565 struct file *filp; 566 567 ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); 568 if (!ev_file) 569 return ERR_PTR(-ENOMEM); 570 571 ib_uverbs_init_event_queue(&ev_file->ev_queue); 572 ev_file->uverbs_file = uverbs_file; 573 kref_get(&ev_file->uverbs_file->ref); 574 kref_init(&ev_file->ref); 575 filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, 576 ev_file, O_RDONLY); 577 if (IS_ERR(filp)) 578 goto err_put_refs; 579 580 mutex_lock(&uverbs_file->device->lists_mutex); 581 list_add_tail(&ev_file->list, 582 &uverbs_file->device->uverbs_events_file_list); 583 mutex_unlock(&uverbs_file->device->lists_mutex); 584 585 WARN_ON(uverbs_file->async_file); 586 uverbs_file->async_file = ev_file; 587 kref_get(&uverbs_file->async_file->ref); 588 INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, 589 ib_dev, 590 ib_uverbs_event_handler); 591 ib_register_event_handler(&uverbs_file->event_handler); 592 /* At that point async file stuff was fully set */ 593 594 return filp; 595 596 err_put_refs: 597 kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); 598 kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); 599 return filp; 600 } 601 602 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, 603 struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count, 604 const struct uverbs_api_write_method *method_elm) 605 { 606 if (method_elm->is_ex) { 607 count -= sizeof(*hdr) + sizeof(*ex_hdr); 608 609 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) 610 return -EINVAL; 611 612 if (hdr->in_words * 8 < method_elm->req_size) 613 return -ENOSPC; 614 615 if (ex_hdr->cmd_hdr_reserved) 616 return -EINVAL; 617 618 if (ex_hdr->response) { 619 if (!hdr->out_words && !ex_hdr->provider_out_words) 620 return -EINVAL; 621 622 if (hdr->out_words * 8 < method_elm->resp_size) 623 return -ENOSPC; 624 625 if (!access_ok(u64_to_user_ptr(ex_hdr->response), 626 (hdr->out_words + ex_hdr->provider_out_words) * 8)) 627 return -EFAULT; 628 } else { 629 if (hdr->out_words || ex_hdr->provider_out_words) 630 return -EINVAL; 631 } 632 633 return 0; 634 } 635 636 /* not extended command */ 637 if (hdr->in_words * 4 != count) 638 return -EINVAL; 639 640 if (count < method_elm->req_size + sizeof(hdr)) { 641 /* 642 * rdma-core v18 and v19 have a bug where they send DESTROY_CQ 643 * with a 16 byte write instead of 24. Old kernels didn't 644 * check the size so they allowed this. Now that the size is 645 * checked provide a compatibility work around to not break 646 * those userspaces. 647 */ 648 if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ && 649 count == 16) { 650 hdr->in_words = 6; 651 return 0; 652 } 653 return -ENOSPC; 654 } 655 if (hdr->out_words * 4 < method_elm->resp_size) 656 return -ENOSPC; 657 658 return 0; 659 } 660 661 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, 662 size_t count, loff_t *pos) 663 { 664 struct ib_uverbs_file *file = filp->private_data; 665 const struct uverbs_api_write_method *method_elm; 666 struct uverbs_api *uapi = file->device->uapi; 667 struct ib_uverbs_ex_cmd_hdr ex_hdr; 668 struct ib_uverbs_cmd_hdr hdr; 669 struct uverbs_attr_bundle bundle; 670 int srcu_key; 671 ssize_t ret; 672 673 if (!ib_safe_file_access(filp)) { 674 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 675 task_tgid_vnr(current), current->comm); 676 return -EACCES; 677 } 678 679 if (count < sizeof(hdr)) 680 return -EINVAL; 681 682 if (copy_from_user(&hdr, buf, sizeof(hdr))) 683 return -EFAULT; 684 685 method_elm = uapi_get_method(uapi, hdr.command); 686 if (IS_ERR(method_elm)) 687 return PTR_ERR(method_elm); 688 689 if (method_elm->is_ex) { 690 if (count < (sizeof(hdr) + sizeof(ex_hdr))) 691 return -EINVAL; 692 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) 693 return -EFAULT; 694 } 695 696 ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); 697 if (ret) 698 return ret; 699 700 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 701 702 buf += sizeof(hdr); 703 704 memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); 705 bundle.ufile = file; 706 bundle.context = NULL; /* only valid if bundle has uobject */ 707 if (!method_elm->is_ex) { 708 size_t in_len = hdr.in_words * 4 - sizeof(hdr); 709 size_t out_len = hdr.out_words * 4; 710 u64 response = 0; 711 712 if (method_elm->has_udata) { 713 bundle.driver_udata.inlen = 714 in_len - method_elm->req_size; 715 in_len = method_elm->req_size; 716 if (bundle.driver_udata.inlen) 717 bundle.driver_udata.inbuf = buf + in_len; 718 else 719 bundle.driver_udata.inbuf = NULL; 720 } else { 721 memset(&bundle.driver_udata, 0, 722 sizeof(bundle.driver_udata)); 723 } 724 725 if (method_elm->has_resp) { 726 /* 727 * The macros check that if has_resp is set 728 * then the command request structure starts 729 * with a '__aligned u64 response' member. 730 */ 731 ret = get_user(response, (const u64 __user *)buf); 732 if (ret) 733 goto out_unlock; 734 735 if (method_elm->has_udata) { 736 bundle.driver_udata.outlen = 737 out_len - method_elm->resp_size; 738 out_len = method_elm->resp_size; 739 if (bundle.driver_udata.outlen) 740 bundle.driver_udata.outbuf = 741 u64_to_user_ptr(response + 742 out_len); 743 else 744 bundle.driver_udata.outbuf = NULL; 745 } 746 } else { 747 bundle.driver_udata.outlen = 0; 748 bundle.driver_udata.outbuf = NULL; 749 } 750 751 ib_uverbs_init_udata_buf_or_null( 752 &bundle.ucore, buf, u64_to_user_ptr(response), 753 in_len, out_len); 754 } else { 755 buf += sizeof(ex_hdr); 756 757 ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf, 758 u64_to_user_ptr(ex_hdr.response), 759 hdr.in_words * 8, hdr.out_words * 8); 760 761 ib_uverbs_init_udata_buf_or_null( 762 &bundle.driver_udata, buf + bundle.ucore.inlen, 763 u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen, 764 ex_hdr.provider_in_words * 8, 765 ex_hdr.provider_out_words * 8); 766 767 } 768 769 ret = method_elm->handler(&bundle); 770 out_unlock: 771 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 772 return (ret) ? : count; 773 } 774 775 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 776 { 777 struct ib_uverbs_file *file = filp->private_data; 778 struct ib_ucontext *ucontext; 779 int ret = 0; 780 int srcu_key; 781 782 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 783 ucontext = ib_uverbs_get_ucontext_file(file); 784 if (IS_ERR(ucontext)) { 785 ret = PTR_ERR(ucontext); 786 goto out; 787 } 788 789 ret = ucontext->device->ops.mmap(ucontext, vma); 790 out: 791 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 792 return ret; 793 } 794 795 /* 796 * Each time we map IO memory into user space this keeps track of the mapping. 797 * When the device is hot-unplugged we 'zap' the mmaps in user space to point 798 * to the zero page and allow the hot unplug to proceed. 799 * 800 * This is necessary for cases like PCI physical hot unplug as the actual BAR 801 * memory may vanish after this and access to it from userspace could MCE. 802 * 803 * RDMA drivers supporting disassociation must have their user space designed 804 * to cope in some way with their IO pages going to the zero page. 805 */ 806 struct rdma_umap_priv { 807 struct vm_area_struct *vma; 808 struct list_head list; 809 }; 810 811 static const struct vm_operations_struct rdma_umap_ops; 812 813 static void rdma_umap_priv_init(struct rdma_umap_priv *priv, 814 struct vm_area_struct *vma) 815 { 816 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 817 818 priv->vma = vma; 819 vma->vm_private_data = priv; 820 vma->vm_ops = &rdma_umap_ops; 821 822 mutex_lock(&ufile->umap_lock); 823 list_add(&priv->list, &ufile->umaps); 824 mutex_unlock(&ufile->umap_lock); 825 } 826 827 /* 828 * The VMA has been dup'd, initialize the vm_private_data with a new tracking 829 * struct 830 */ 831 static void rdma_umap_open(struct vm_area_struct *vma) 832 { 833 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 834 struct rdma_umap_priv *opriv = vma->vm_private_data; 835 struct rdma_umap_priv *priv; 836 837 if (!opriv) 838 return; 839 840 /* We are racing with disassociation */ 841 if (!down_read_trylock(&ufile->hw_destroy_rwsem)) 842 goto out_zap; 843 /* 844 * Disassociation already completed, the VMA should already be zapped. 845 */ 846 if (!ufile->ucontext) 847 goto out_unlock; 848 849 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 850 if (!priv) 851 goto out_unlock; 852 rdma_umap_priv_init(priv, vma); 853 854 up_read(&ufile->hw_destroy_rwsem); 855 return; 856 857 out_unlock: 858 up_read(&ufile->hw_destroy_rwsem); 859 out_zap: 860 /* 861 * We can't allow the VMA to be created with the actual IO pages, that 862 * would break our API contract, and it can't be stopped at this 863 * point, so zap it. 864 */ 865 vma->vm_private_data = NULL; 866 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); 867 } 868 869 static void rdma_umap_close(struct vm_area_struct *vma) 870 { 871 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 872 struct rdma_umap_priv *priv = vma->vm_private_data; 873 874 if (!priv) 875 return; 876 877 /* 878 * The vma holds a reference on the struct file that created it, which 879 * in turn means that the ib_uverbs_file is guaranteed to exist at 880 * this point. 881 */ 882 mutex_lock(&ufile->umap_lock); 883 list_del(&priv->list); 884 mutex_unlock(&ufile->umap_lock); 885 kfree(priv); 886 } 887 888 /* 889 * Once the zap_vma_ptes has been called touches to the VMA will come here and 890 * we return a dummy writable zero page for all the pfns. 891 */ 892 static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) 893 { 894 struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; 895 struct rdma_umap_priv *priv = vmf->vma->vm_private_data; 896 vm_fault_t ret = 0; 897 898 if (!priv) 899 return VM_FAULT_SIGBUS; 900 901 /* Read only pages can just use the system zero page. */ 902 if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { 903 vmf->page = ZERO_PAGE(vmf->address); 904 get_page(vmf->page); 905 return 0; 906 } 907 908 mutex_lock(&ufile->umap_lock); 909 if (!ufile->disassociate_page) 910 ufile->disassociate_page = 911 alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); 912 913 if (ufile->disassociate_page) { 914 /* 915 * This VMA is forced to always be shared so this doesn't have 916 * to worry about COW. 917 */ 918 vmf->page = ufile->disassociate_page; 919 get_page(vmf->page); 920 } else { 921 ret = VM_FAULT_SIGBUS; 922 } 923 mutex_unlock(&ufile->umap_lock); 924 925 return ret; 926 } 927 928 static const struct vm_operations_struct rdma_umap_ops = { 929 .open = rdma_umap_open, 930 .close = rdma_umap_close, 931 .fault = rdma_umap_fault, 932 }; 933 934 /* 935 * Map IO memory into a process. This is to be called by drivers as part of 936 * their mmap() functions if they wish to send something like PCI-E BAR memory 937 * to userspace. 938 */ 939 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, 940 unsigned long pfn, unsigned long size, pgprot_t prot) 941 { 942 struct ib_uverbs_file *ufile = ucontext->ufile; 943 struct rdma_umap_priv *priv; 944 945 if (!(vma->vm_flags & VM_SHARED)) 946 return -EINVAL; 947 948 if (vma->vm_end - vma->vm_start != size) 949 return -EINVAL; 950 951 /* Driver is using this wrong, must be called by ib_uverbs_mmap */ 952 if (WARN_ON(!vma->vm_file || 953 vma->vm_file->private_data != ufile)) 954 return -EINVAL; 955 lockdep_assert_held(&ufile->device->disassociate_srcu); 956 957 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 958 if (!priv) 959 return -ENOMEM; 960 961 vma->vm_page_prot = prot; 962 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { 963 kfree(priv); 964 return -EAGAIN; 965 } 966 967 rdma_umap_priv_init(priv, vma); 968 return 0; 969 } 970 EXPORT_SYMBOL(rdma_user_mmap_io); 971 972 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) 973 { 974 struct rdma_umap_priv *priv, *next_priv; 975 976 lockdep_assert_held(&ufile->hw_destroy_rwsem); 977 978 while (1) { 979 struct mm_struct *mm = NULL; 980 981 /* Get an arbitrary mm pointer that hasn't been cleaned yet */ 982 mutex_lock(&ufile->umap_lock); 983 while (!list_empty(&ufile->umaps)) { 984 int ret; 985 986 priv = list_first_entry(&ufile->umaps, 987 struct rdma_umap_priv, list); 988 mm = priv->vma->vm_mm; 989 ret = mmget_not_zero(mm); 990 if (!ret) { 991 list_del_init(&priv->list); 992 mm = NULL; 993 continue; 994 } 995 break; 996 } 997 mutex_unlock(&ufile->umap_lock); 998 if (!mm) 999 return; 1000 1001 /* 1002 * The umap_lock is nested under mmap_sem since it used within 1003 * the vma_ops callbacks, so we have to clean the list one mm 1004 * at a time to get the lock ordering right. Typically there 1005 * will only be one mm, so no big deal. 1006 */ 1007 down_read(&mm->mmap_sem); 1008 if (!mmget_still_valid(mm)) 1009 goto skip_mm; 1010 mutex_lock(&ufile->umap_lock); 1011 list_for_each_entry_safe (priv, next_priv, &ufile->umaps, 1012 list) { 1013 struct vm_area_struct *vma = priv->vma; 1014 1015 if (vma->vm_mm != mm) 1016 continue; 1017 list_del_init(&priv->list); 1018 1019 zap_vma_ptes(vma, vma->vm_start, 1020 vma->vm_end - vma->vm_start); 1021 } 1022 mutex_unlock(&ufile->umap_lock); 1023 skip_mm: 1024 up_read(&mm->mmap_sem); 1025 mmput(mm); 1026 } 1027 } 1028 1029 /* 1030 * ib_uverbs_open() does not need the BKL: 1031 * 1032 * - the ib_uverbs_device structures are properly reference counted and 1033 * everything else is purely local to the file being created, so 1034 * races against other open calls are not a problem; 1035 * - there is no ioctl method to race against; 1036 * - the open method will either immediately run -ENXIO, or all 1037 * required initialization will be done. 1038 */ 1039 static int ib_uverbs_open(struct inode *inode, struct file *filp) 1040 { 1041 struct ib_uverbs_device *dev; 1042 struct ib_uverbs_file *file; 1043 struct ib_device *ib_dev; 1044 int ret; 1045 int module_dependent; 1046 int srcu_key; 1047 1048 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); 1049 if (!atomic_inc_not_zero(&dev->refcount)) 1050 return -ENXIO; 1051 1052 get_device(&dev->dev); 1053 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1054 mutex_lock(&dev->lists_mutex); 1055 ib_dev = srcu_dereference(dev->ib_dev, 1056 &dev->disassociate_srcu); 1057 if (!ib_dev) { 1058 ret = -EIO; 1059 goto err; 1060 } 1061 1062 if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) { 1063 ret = -EPERM; 1064 goto err; 1065 } 1066 1067 /* In case IB device supports disassociate ucontext, there is no hard 1068 * dependency between uverbs device and its low level device. 1069 */ 1070 module_dependent = !(ib_dev->ops.disassociate_ucontext); 1071 1072 if (module_dependent) { 1073 if (!try_module_get(ib_dev->ops.owner)) { 1074 ret = -ENODEV; 1075 goto err; 1076 } 1077 } 1078 1079 file = kzalloc(sizeof(*file), GFP_KERNEL); 1080 if (!file) { 1081 ret = -ENOMEM; 1082 if (module_dependent) 1083 goto err_module; 1084 1085 goto err; 1086 } 1087 1088 file->device = dev; 1089 kref_init(&file->ref); 1090 mutex_init(&file->ucontext_lock); 1091 1092 spin_lock_init(&file->uobjects_lock); 1093 INIT_LIST_HEAD(&file->uobjects); 1094 init_rwsem(&file->hw_destroy_rwsem); 1095 mutex_init(&file->umap_lock); 1096 INIT_LIST_HEAD(&file->umaps); 1097 1098 filp->private_data = file; 1099 list_add_tail(&file->list, &dev->uverbs_file_list); 1100 mutex_unlock(&dev->lists_mutex); 1101 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1102 1103 setup_ufile_idr_uobject(file); 1104 1105 return stream_open(inode, filp); 1106 1107 err_module: 1108 module_put(ib_dev->ops.owner); 1109 1110 err: 1111 mutex_unlock(&dev->lists_mutex); 1112 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1113 if (atomic_dec_and_test(&dev->refcount)) 1114 ib_uverbs_comp_dev(dev); 1115 1116 put_device(&dev->dev); 1117 return ret; 1118 } 1119 1120 static int ib_uverbs_close(struct inode *inode, struct file *filp) 1121 { 1122 struct ib_uverbs_file *file = filp->private_data; 1123 1124 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); 1125 1126 mutex_lock(&file->device->lists_mutex); 1127 list_del_init(&file->list); 1128 mutex_unlock(&file->device->lists_mutex); 1129 1130 kref_put(&file->ref, ib_uverbs_release_file); 1131 1132 return 0; 1133 } 1134 1135 static const struct file_operations uverbs_fops = { 1136 .owner = THIS_MODULE, 1137 .write = ib_uverbs_write, 1138 .open = ib_uverbs_open, 1139 .release = ib_uverbs_close, 1140 .llseek = no_llseek, 1141 .unlocked_ioctl = ib_uverbs_ioctl, 1142 .compat_ioctl = ib_uverbs_ioctl, 1143 }; 1144 1145 static const struct file_operations uverbs_mmap_fops = { 1146 .owner = THIS_MODULE, 1147 .write = ib_uverbs_write, 1148 .mmap = ib_uverbs_mmap, 1149 .open = ib_uverbs_open, 1150 .release = ib_uverbs_close, 1151 .llseek = no_llseek, 1152 .unlocked_ioctl = ib_uverbs_ioctl, 1153 .compat_ioctl = ib_uverbs_ioctl, 1154 }; 1155 1156 static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data, 1157 struct ib_client_nl_info *res) 1158 { 1159 struct ib_uverbs_device *uverbs_dev = client_data; 1160 int ret; 1161 1162 if (res->port != -1) 1163 return -EINVAL; 1164 1165 res->abi = ibdev->ops.uverbs_abi_ver; 1166 res->cdev = &uverbs_dev->dev; 1167 1168 /* 1169 * To support DRIVER_ID binding in userspace some of the driver need 1170 * upgrading to expose their PCI dependent revision information 1171 * through get_context instead of relying on modalias matching. When 1172 * the drivers are fixed they can drop this flag. 1173 */ 1174 if (!ibdev->ops.uverbs_no_driver_id_binding) { 1175 ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, 1176 ibdev->ops.driver_id); 1177 if (ret) 1178 return ret; 1179 } 1180 return 0; 1181 } 1182 1183 static struct ib_client uverbs_client = { 1184 .name = "uverbs", 1185 .no_kverbs_req = true, 1186 .add = ib_uverbs_add_one, 1187 .remove = ib_uverbs_remove_one, 1188 .get_nl_info = ib_uverbs_get_nl_info, 1189 }; 1190 MODULE_ALIAS_RDMA_CLIENT("uverbs"); 1191 1192 static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, 1193 char *buf) 1194 { 1195 struct ib_uverbs_device *dev = 1196 container_of(device, struct ib_uverbs_device, dev); 1197 int ret = -ENODEV; 1198 int srcu_key; 1199 struct ib_device *ib_dev; 1200 1201 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1202 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1203 if (ib_dev) 1204 ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); 1205 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1206 1207 return ret; 1208 } 1209 static DEVICE_ATTR_RO(ibdev); 1210 1211 static ssize_t abi_version_show(struct device *device, 1212 struct device_attribute *attr, char *buf) 1213 { 1214 struct ib_uverbs_device *dev = 1215 container_of(device, struct ib_uverbs_device, dev); 1216 int ret = -ENODEV; 1217 int srcu_key; 1218 struct ib_device *ib_dev; 1219 1220 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1221 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1222 if (ib_dev) 1223 ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); 1224 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1225 1226 return ret; 1227 } 1228 static DEVICE_ATTR_RO(abi_version); 1229 1230 static struct attribute *ib_dev_attrs[] = { 1231 &dev_attr_abi_version.attr, 1232 &dev_attr_ibdev.attr, 1233 NULL, 1234 }; 1235 1236 static const struct attribute_group dev_attr_group = { 1237 .attrs = ib_dev_attrs, 1238 }; 1239 1240 static CLASS_ATTR_STRING(abi_version, S_IRUGO, 1241 __stringify(IB_USER_VERBS_ABI_VERSION)); 1242 1243 static int ib_uverbs_create_uapi(struct ib_device *device, 1244 struct ib_uverbs_device *uverbs_dev) 1245 { 1246 struct uverbs_api *uapi; 1247 1248 uapi = uverbs_alloc_api(device); 1249 if (IS_ERR(uapi)) 1250 return PTR_ERR(uapi); 1251 1252 uverbs_dev->uapi = uapi; 1253 return 0; 1254 } 1255 1256 static void ib_uverbs_add_one(struct ib_device *device) 1257 { 1258 int devnum; 1259 dev_t base; 1260 struct ib_uverbs_device *uverbs_dev; 1261 int ret; 1262 1263 if (!device->ops.alloc_ucontext) 1264 return; 1265 1266 uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); 1267 if (!uverbs_dev) 1268 return; 1269 1270 ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); 1271 if (ret) { 1272 kfree(uverbs_dev); 1273 return; 1274 } 1275 1276 device_initialize(&uverbs_dev->dev); 1277 uverbs_dev->dev.class = uverbs_class; 1278 uverbs_dev->dev.parent = device->dev.parent; 1279 uverbs_dev->dev.release = ib_uverbs_release_dev; 1280 uverbs_dev->groups[0] = &dev_attr_group; 1281 uverbs_dev->dev.groups = uverbs_dev->groups; 1282 atomic_set(&uverbs_dev->refcount, 1); 1283 init_completion(&uverbs_dev->comp); 1284 uverbs_dev->xrcd_tree = RB_ROOT; 1285 mutex_init(&uverbs_dev->xrcd_tree_mutex); 1286 mutex_init(&uverbs_dev->lists_mutex); 1287 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); 1288 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); 1289 rcu_assign_pointer(uverbs_dev->ib_dev, device); 1290 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 1291 1292 devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, 1293 GFP_KERNEL); 1294 if (devnum < 0) 1295 goto err; 1296 uverbs_dev->devnum = devnum; 1297 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) 1298 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; 1299 else 1300 base = IB_UVERBS_BASE_DEV + devnum; 1301 1302 if (ib_uverbs_create_uapi(device, uverbs_dev)) 1303 goto err_uapi; 1304 1305 uverbs_dev->dev.devt = base; 1306 dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); 1307 1308 cdev_init(&uverbs_dev->cdev, 1309 device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops); 1310 uverbs_dev->cdev.owner = THIS_MODULE; 1311 1312 ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); 1313 if (ret) 1314 goto err_uapi; 1315 1316 ib_set_client_data(device, &uverbs_client, uverbs_dev); 1317 return; 1318 1319 err_uapi: 1320 ida_free(&uverbs_ida, devnum); 1321 err: 1322 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1323 ib_uverbs_comp_dev(uverbs_dev); 1324 wait_for_completion(&uverbs_dev->comp); 1325 put_device(&uverbs_dev->dev); 1326 return; 1327 } 1328 1329 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, 1330 struct ib_device *ib_dev) 1331 { 1332 struct ib_uverbs_file *file; 1333 struct ib_uverbs_async_event_file *event_file; 1334 struct ib_event event; 1335 1336 /* Pending running commands to terminate */ 1337 uverbs_disassociate_api_pre(uverbs_dev); 1338 event.event = IB_EVENT_DEVICE_FATAL; 1339 event.element.port_num = 0; 1340 event.device = ib_dev; 1341 1342 mutex_lock(&uverbs_dev->lists_mutex); 1343 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1344 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1345 struct ib_uverbs_file, list); 1346 list_del_init(&file->list); 1347 kref_get(&file->ref); 1348 1349 /* We must release the mutex before going ahead and calling 1350 * uverbs_cleanup_ufile, as it might end up indirectly calling 1351 * uverbs_close, for example due to freeing the resources (e.g 1352 * mmput). 1353 */ 1354 mutex_unlock(&uverbs_dev->lists_mutex); 1355 1356 ib_uverbs_event_handler(&file->event_handler, &event); 1357 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); 1358 kref_put(&file->ref, ib_uverbs_release_file); 1359 1360 mutex_lock(&uverbs_dev->lists_mutex); 1361 } 1362 1363 while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { 1364 event_file = list_first_entry(&uverbs_dev-> 1365 uverbs_events_file_list, 1366 struct ib_uverbs_async_event_file, 1367 list); 1368 spin_lock_irq(&event_file->ev_queue.lock); 1369 event_file->ev_queue.is_closed = 1; 1370 spin_unlock_irq(&event_file->ev_queue.lock); 1371 1372 list_del(&event_file->list); 1373 ib_unregister_event_handler( 1374 &event_file->uverbs_file->event_handler); 1375 event_file->uverbs_file->event_handler.device = 1376 NULL; 1377 1378 wake_up_interruptible(&event_file->ev_queue.poll_wait); 1379 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); 1380 } 1381 mutex_unlock(&uverbs_dev->lists_mutex); 1382 1383 uverbs_disassociate_api(uverbs_dev->uapi); 1384 } 1385 1386 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) 1387 { 1388 struct ib_uverbs_device *uverbs_dev = client_data; 1389 int wait_clients = 1; 1390 1391 if (!uverbs_dev) 1392 return; 1393 1394 cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); 1395 ida_free(&uverbs_ida, uverbs_dev->devnum); 1396 1397 if (device->ops.disassociate_ucontext) { 1398 /* We disassociate HW resources and immediately return. 1399 * Userspace will see a EIO errno for all future access. 1400 * Upon returning, ib_device may be freed internally and is not 1401 * valid any more. 1402 * uverbs_device is still available until all clients close 1403 * their files, then the uverbs device ref count will be zero 1404 * and its resources will be freed. 1405 * Note: At this point no more files can be opened since the 1406 * cdev was deleted, however active clients can still issue 1407 * commands and close their open files. 1408 */ 1409 ib_uverbs_free_hw_resources(uverbs_dev, device); 1410 wait_clients = 0; 1411 } 1412 1413 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1414 ib_uverbs_comp_dev(uverbs_dev); 1415 if (wait_clients) 1416 wait_for_completion(&uverbs_dev->comp); 1417 1418 put_device(&uverbs_dev->dev); 1419 } 1420 1421 static char *uverbs_devnode(struct device *dev, umode_t *mode) 1422 { 1423 if (mode) 1424 *mode = 0666; 1425 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); 1426 } 1427 1428 static int __init ib_uverbs_init(void) 1429 { 1430 int ret; 1431 1432 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, 1433 IB_UVERBS_NUM_FIXED_MINOR, 1434 "infiniband_verbs"); 1435 if (ret) { 1436 pr_err("user_verbs: couldn't register device number\n"); 1437 goto out; 1438 } 1439 1440 ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0, 1441 IB_UVERBS_NUM_DYNAMIC_MINOR, 1442 "infiniband_verbs"); 1443 if (ret) { 1444 pr_err("couldn't register dynamic device number\n"); 1445 goto out_alloc; 1446 } 1447 1448 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); 1449 if (IS_ERR(uverbs_class)) { 1450 ret = PTR_ERR(uverbs_class); 1451 pr_err("user_verbs: couldn't create class infiniband_verbs\n"); 1452 goto out_chrdev; 1453 } 1454 1455 uverbs_class->devnode = uverbs_devnode; 1456 1457 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); 1458 if (ret) { 1459 pr_err("user_verbs: couldn't create abi_version attribute\n"); 1460 goto out_class; 1461 } 1462 1463 ret = ib_register_client(&uverbs_client); 1464 if (ret) { 1465 pr_err("user_verbs: couldn't register client\n"); 1466 goto out_class; 1467 } 1468 1469 return 0; 1470 1471 out_class: 1472 class_destroy(uverbs_class); 1473 1474 out_chrdev: 1475 unregister_chrdev_region(dynamic_uverbs_dev, 1476 IB_UVERBS_NUM_DYNAMIC_MINOR); 1477 1478 out_alloc: 1479 unregister_chrdev_region(IB_UVERBS_BASE_DEV, 1480 IB_UVERBS_NUM_FIXED_MINOR); 1481 1482 out: 1483 return ret; 1484 } 1485 1486 static void __exit ib_uverbs_cleanup(void) 1487 { 1488 ib_unregister_client(&uverbs_client); 1489 class_destroy(uverbs_class); 1490 unregister_chrdev_region(IB_UVERBS_BASE_DEV, 1491 IB_UVERBS_NUM_FIXED_MINOR); 1492 unregister_chrdev_region(dynamic_uverbs_dev, 1493 IB_UVERBS_NUM_DYNAMIC_MINOR); 1494 mmu_notifier_synchronize(); 1495 } 1496 1497 module_init(ib_uverbs_init); 1498 module_exit(ib_uverbs_cleanup); 1499