1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 */ 36 37 #include <linux/module.h> 38 #include <linux/init.h> 39 #include <linux/device.h> 40 #include <linux/err.h> 41 #include <linux/fs.h> 42 #include <linux/poll.h> 43 #include <linux/sched.h> 44 #include <linux/file.h> 45 #include <linux/cdev.h> 46 #include <linux/anon_inodes.h> 47 #include <linux/slab.h> 48 #include <linux/sched/mm.h> 49 50 #include <linux/uaccess.h> 51 52 #include <rdma/ib.h> 53 #include <rdma/uverbs_std_types.h> 54 55 #include "uverbs.h" 56 #include "core_priv.h" 57 #include "rdma_core.h" 58 59 MODULE_AUTHOR("Roland Dreier"); 60 MODULE_DESCRIPTION("InfiniBand userspace verbs access"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 63 enum { 64 IB_UVERBS_MAJOR = 231, 65 IB_UVERBS_BASE_MINOR = 192, 66 IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS, 67 IB_UVERBS_NUM_FIXED_MINOR = 32, 68 IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR, 69 }; 70 71 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) 72 73 static dev_t dynamic_uverbs_dev; 74 static struct class *uverbs_class; 75 76 static DEFINE_IDA(uverbs_ida); 77 static void ib_uverbs_add_one(struct ib_device *device); 78 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); 79 80 /* 81 * Must be called with the ufile->device->disassociate_srcu held, and the lock 82 * must be held until use of the ucontext is finished. 83 */ 84 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile) 85 { 86 /* 87 * We do not hold the hw_destroy_rwsem lock for this flow, instead 88 * srcu is used. It does not matter if someone races this with 89 * get_context, we get NULL or valid ucontext. 90 */ 91 struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); 92 93 if (!srcu_dereference(ufile->device->ib_dev, 94 &ufile->device->disassociate_srcu)) 95 return ERR_PTR(-EIO); 96 97 if (!ucontext) 98 return ERR_PTR(-EINVAL); 99 100 return ucontext; 101 } 102 EXPORT_SYMBOL(ib_uverbs_get_ucontext_file); 103 104 int uverbs_dealloc_mw(struct ib_mw *mw) 105 { 106 struct ib_pd *pd = mw->pd; 107 int ret; 108 109 ret = mw->device->ops.dealloc_mw(mw); 110 if (!ret) 111 atomic_dec(&pd->usecnt); 112 return ret; 113 } 114 115 static void ib_uverbs_release_dev(struct device *device) 116 { 117 struct ib_uverbs_device *dev = 118 container_of(device, struct ib_uverbs_device, dev); 119 120 uverbs_destroy_api(dev->uapi); 121 cleanup_srcu_struct(&dev->disassociate_srcu); 122 kfree(dev); 123 } 124 125 static void ib_uverbs_release_async_event_file(struct kref *ref) 126 { 127 struct ib_uverbs_async_event_file *file = 128 container_of(ref, struct ib_uverbs_async_event_file, ref); 129 130 kfree(file); 131 } 132 133 void ib_uverbs_release_ucq(struct ib_uverbs_file *file, 134 struct ib_uverbs_completion_event_file *ev_file, 135 struct ib_ucq_object *uobj) 136 { 137 struct ib_uverbs_event *evt, *tmp; 138 139 if (ev_file) { 140 spin_lock_irq(&ev_file->ev_queue.lock); 141 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { 142 list_del(&evt->list); 143 kfree(evt); 144 } 145 spin_unlock_irq(&ev_file->ev_queue.lock); 146 147 uverbs_uobject_put(&ev_file->uobj); 148 } 149 150 spin_lock_irq(&file->async_file->ev_queue.lock); 151 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { 152 list_del(&evt->list); 153 kfree(evt); 154 } 155 spin_unlock_irq(&file->async_file->ev_queue.lock); 156 } 157 158 void ib_uverbs_release_uevent(struct ib_uverbs_file *file, 159 struct ib_uevent_object *uobj) 160 { 161 struct ib_uverbs_event *evt, *tmp; 162 163 spin_lock_irq(&file->async_file->ev_queue.lock); 164 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { 165 list_del(&evt->list); 166 kfree(evt); 167 } 168 spin_unlock_irq(&file->async_file->ev_queue.lock); 169 } 170 171 void ib_uverbs_detach_umcast(struct ib_qp *qp, 172 struct ib_uqp_object *uobj) 173 { 174 struct ib_uverbs_mcast_entry *mcast, *tmp; 175 176 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { 177 ib_detach_mcast(qp, &mcast->gid, mcast->lid); 178 list_del(&mcast->list); 179 kfree(mcast); 180 } 181 } 182 183 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) 184 { 185 complete(&dev->comp); 186 } 187 188 void ib_uverbs_release_file(struct kref *ref) 189 { 190 struct ib_uverbs_file *file = 191 container_of(ref, struct ib_uverbs_file, ref); 192 struct ib_device *ib_dev; 193 int srcu_key; 194 195 release_ufile_idr_uobject(file); 196 197 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 198 ib_dev = srcu_dereference(file->device->ib_dev, 199 &file->device->disassociate_srcu); 200 if (ib_dev && !ib_dev->ops.disassociate_ucontext) 201 module_put(ib_dev->owner); 202 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 203 204 if (atomic_dec_and_test(&file->device->refcount)) 205 ib_uverbs_comp_dev(file->device); 206 207 put_device(&file->device->dev); 208 kfree(file); 209 } 210 211 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, 212 struct ib_uverbs_file *uverbs_file, 213 struct file *filp, char __user *buf, 214 size_t count, loff_t *pos, 215 size_t eventsz) 216 { 217 struct ib_uverbs_event *event; 218 int ret = 0; 219 220 spin_lock_irq(&ev_queue->lock); 221 222 while (list_empty(&ev_queue->event_list)) { 223 spin_unlock_irq(&ev_queue->lock); 224 225 if (filp->f_flags & O_NONBLOCK) 226 return -EAGAIN; 227 228 if (wait_event_interruptible(ev_queue->poll_wait, 229 (!list_empty(&ev_queue->event_list) || 230 /* The barriers built into wait_event_interruptible() 231 * and wake_up() guarentee this will see the null set 232 * without using RCU 233 */ 234 !uverbs_file->device->ib_dev))) 235 return -ERESTARTSYS; 236 237 /* If device was disassociated and no event exists set an error */ 238 if (list_empty(&ev_queue->event_list) && 239 !uverbs_file->device->ib_dev) 240 return -EIO; 241 242 spin_lock_irq(&ev_queue->lock); 243 } 244 245 event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); 246 247 if (eventsz > count) { 248 ret = -EINVAL; 249 event = NULL; 250 } else { 251 list_del(ev_queue->event_list.next); 252 if (event->counter) { 253 ++(*event->counter); 254 list_del(&event->obj_list); 255 } 256 } 257 258 spin_unlock_irq(&ev_queue->lock); 259 260 if (event) { 261 if (copy_to_user(buf, event, eventsz)) 262 ret = -EFAULT; 263 else 264 ret = eventsz; 265 } 266 267 kfree(event); 268 269 return ret; 270 } 271 272 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, 273 size_t count, loff_t *pos) 274 { 275 struct ib_uverbs_async_event_file *file = filp->private_data; 276 277 return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, 278 buf, count, pos, 279 sizeof(struct ib_uverbs_async_event_desc)); 280 } 281 282 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, 283 size_t count, loff_t *pos) 284 { 285 struct ib_uverbs_completion_event_file *comp_ev_file = 286 filp->private_data; 287 288 return ib_uverbs_event_read(&comp_ev_file->ev_queue, 289 comp_ev_file->uobj.ufile, filp, 290 buf, count, pos, 291 sizeof(struct ib_uverbs_comp_event_desc)); 292 } 293 294 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, 295 struct file *filp, 296 struct poll_table_struct *wait) 297 { 298 __poll_t pollflags = 0; 299 300 poll_wait(filp, &ev_queue->poll_wait, wait); 301 302 spin_lock_irq(&ev_queue->lock); 303 if (!list_empty(&ev_queue->event_list)) 304 pollflags = EPOLLIN | EPOLLRDNORM; 305 spin_unlock_irq(&ev_queue->lock); 306 307 return pollflags; 308 } 309 310 static __poll_t ib_uverbs_async_event_poll(struct file *filp, 311 struct poll_table_struct *wait) 312 { 313 return ib_uverbs_event_poll(filp->private_data, filp, wait); 314 } 315 316 static __poll_t ib_uverbs_comp_event_poll(struct file *filp, 317 struct poll_table_struct *wait) 318 { 319 struct ib_uverbs_completion_event_file *comp_ev_file = 320 filp->private_data; 321 322 return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); 323 } 324 325 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) 326 { 327 struct ib_uverbs_event_queue *ev_queue = filp->private_data; 328 329 return fasync_helper(fd, filp, on, &ev_queue->async_queue); 330 } 331 332 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) 333 { 334 struct ib_uverbs_completion_event_file *comp_ev_file = 335 filp->private_data; 336 337 return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); 338 } 339 340 static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) 341 { 342 struct ib_uverbs_async_event_file *file = filp->private_data; 343 struct ib_uverbs_file *uverbs_file = file->uverbs_file; 344 struct ib_uverbs_event *entry, *tmp; 345 int closed_already = 0; 346 347 mutex_lock(&uverbs_file->device->lists_mutex); 348 spin_lock_irq(&file->ev_queue.lock); 349 closed_already = file->ev_queue.is_closed; 350 file->ev_queue.is_closed = 1; 351 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { 352 if (entry->counter) 353 list_del(&entry->obj_list); 354 kfree(entry); 355 } 356 spin_unlock_irq(&file->ev_queue.lock); 357 if (!closed_already) { 358 list_del(&file->list); 359 ib_unregister_event_handler(&uverbs_file->event_handler); 360 } 361 mutex_unlock(&uverbs_file->device->lists_mutex); 362 363 kref_put(&uverbs_file->ref, ib_uverbs_release_file); 364 kref_put(&file->ref, ib_uverbs_release_async_event_file); 365 366 return 0; 367 } 368 369 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) 370 { 371 struct ib_uobject *uobj = filp->private_data; 372 struct ib_uverbs_completion_event_file *file = container_of( 373 uobj, struct ib_uverbs_completion_event_file, uobj); 374 struct ib_uverbs_event *entry, *tmp; 375 376 spin_lock_irq(&file->ev_queue.lock); 377 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { 378 if (entry->counter) 379 list_del(&entry->obj_list); 380 kfree(entry); 381 } 382 file->ev_queue.is_closed = 1; 383 spin_unlock_irq(&file->ev_queue.lock); 384 385 uverbs_close_fd(filp); 386 387 return 0; 388 } 389 390 const struct file_operations uverbs_event_fops = { 391 .owner = THIS_MODULE, 392 .read = ib_uverbs_comp_event_read, 393 .poll = ib_uverbs_comp_event_poll, 394 .release = ib_uverbs_comp_event_close, 395 .fasync = ib_uverbs_comp_event_fasync, 396 .llseek = no_llseek, 397 }; 398 399 static const struct file_operations uverbs_async_event_fops = { 400 .owner = THIS_MODULE, 401 .read = ib_uverbs_async_event_read, 402 .poll = ib_uverbs_async_event_poll, 403 .release = ib_uverbs_async_event_close, 404 .fasync = ib_uverbs_async_event_fasync, 405 .llseek = no_llseek, 406 }; 407 408 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) 409 { 410 struct ib_uverbs_event_queue *ev_queue = cq_context; 411 struct ib_ucq_object *uobj; 412 struct ib_uverbs_event *entry; 413 unsigned long flags; 414 415 if (!ev_queue) 416 return; 417 418 spin_lock_irqsave(&ev_queue->lock, flags); 419 if (ev_queue->is_closed) { 420 spin_unlock_irqrestore(&ev_queue->lock, flags); 421 return; 422 } 423 424 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 425 if (!entry) { 426 spin_unlock_irqrestore(&ev_queue->lock, flags); 427 return; 428 } 429 430 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); 431 432 entry->desc.comp.cq_handle = cq->uobject->user_handle; 433 entry->counter = &uobj->comp_events_reported; 434 435 list_add_tail(&entry->list, &ev_queue->event_list); 436 list_add_tail(&entry->obj_list, &uobj->comp_list); 437 spin_unlock_irqrestore(&ev_queue->lock, flags); 438 439 wake_up_interruptible(&ev_queue->poll_wait); 440 kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); 441 } 442 443 static void ib_uverbs_async_handler(struct ib_uverbs_file *file, 444 __u64 element, __u64 event, 445 struct list_head *obj_list, 446 u32 *counter) 447 { 448 struct ib_uverbs_event *entry; 449 unsigned long flags; 450 451 spin_lock_irqsave(&file->async_file->ev_queue.lock, flags); 452 if (file->async_file->ev_queue.is_closed) { 453 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 454 return; 455 } 456 457 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 458 if (!entry) { 459 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 460 return; 461 } 462 463 entry->desc.async.element = element; 464 entry->desc.async.event_type = event; 465 entry->desc.async.reserved = 0; 466 entry->counter = counter; 467 468 list_add_tail(&entry->list, &file->async_file->ev_queue.event_list); 469 if (obj_list) 470 list_add_tail(&entry->obj_list, obj_list); 471 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 472 473 wake_up_interruptible(&file->async_file->ev_queue.poll_wait); 474 kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN); 475 } 476 477 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) 478 { 479 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, 480 struct ib_ucq_object, uobject); 481 482 ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, 483 event->event, &uobj->async_list, 484 &uobj->async_events_reported); 485 } 486 487 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) 488 { 489 struct ib_uevent_object *uobj; 490 491 /* for XRC target qp's, check that qp is live */ 492 if (!event->element.qp->uobject) 493 return; 494 495 uobj = container_of(event->element.qp->uobject, 496 struct ib_uevent_object, uobject); 497 498 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 499 event->event, &uobj->event_list, 500 &uobj->events_reported); 501 } 502 503 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) 504 { 505 struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, 506 struct ib_uevent_object, uobject); 507 508 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 509 event->event, &uobj->event_list, 510 &uobj->events_reported); 511 } 512 513 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) 514 { 515 struct ib_uevent_object *uobj; 516 517 uobj = container_of(event->element.srq->uobject, 518 struct ib_uevent_object, uobject); 519 520 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 521 event->event, &uobj->event_list, 522 &uobj->events_reported); 523 } 524 525 void ib_uverbs_event_handler(struct ib_event_handler *handler, 526 struct ib_event *event) 527 { 528 struct ib_uverbs_file *file = 529 container_of(handler, struct ib_uverbs_file, event_handler); 530 531 ib_uverbs_async_handler(file, event->element.port_num, event->event, 532 NULL, NULL); 533 } 534 535 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) 536 { 537 kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); 538 file->async_file = NULL; 539 } 540 541 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) 542 { 543 spin_lock_init(&ev_queue->lock); 544 INIT_LIST_HEAD(&ev_queue->event_list); 545 init_waitqueue_head(&ev_queue->poll_wait); 546 ev_queue->is_closed = 0; 547 ev_queue->async_queue = NULL; 548 } 549 550 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, 551 struct ib_device *ib_dev) 552 { 553 struct ib_uverbs_async_event_file *ev_file; 554 struct file *filp; 555 556 ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); 557 if (!ev_file) 558 return ERR_PTR(-ENOMEM); 559 560 ib_uverbs_init_event_queue(&ev_file->ev_queue); 561 ev_file->uverbs_file = uverbs_file; 562 kref_get(&ev_file->uverbs_file->ref); 563 kref_init(&ev_file->ref); 564 filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, 565 ev_file, O_RDONLY); 566 if (IS_ERR(filp)) 567 goto err_put_refs; 568 569 mutex_lock(&uverbs_file->device->lists_mutex); 570 list_add_tail(&ev_file->list, 571 &uverbs_file->device->uverbs_events_file_list); 572 mutex_unlock(&uverbs_file->device->lists_mutex); 573 574 WARN_ON(uverbs_file->async_file); 575 uverbs_file->async_file = ev_file; 576 kref_get(&uverbs_file->async_file->ref); 577 INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, 578 ib_dev, 579 ib_uverbs_event_handler); 580 ib_register_event_handler(&uverbs_file->event_handler); 581 /* At that point async file stuff was fully set */ 582 583 return filp; 584 585 err_put_refs: 586 kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); 587 kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); 588 return filp; 589 } 590 591 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, 592 struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count, 593 const struct uverbs_api_write_method *method_elm) 594 { 595 if (method_elm->is_ex) { 596 count -= sizeof(*hdr) + sizeof(*ex_hdr); 597 598 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) 599 return -EINVAL; 600 601 if (hdr->in_words * 8 < method_elm->req_size) 602 return -ENOSPC; 603 604 if (ex_hdr->cmd_hdr_reserved) 605 return -EINVAL; 606 607 if (ex_hdr->response) { 608 if (!hdr->out_words && !ex_hdr->provider_out_words) 609 return -EINVAL; 610 611 if (hdr->out_words * 8 < method_elm->resp_size) 612 return -ENOSPC; 613 614 if (!access_ok(u64_to_user_ptr(ex_hdr->response), 615 (hdr->out_words + ex_hdr->provider_out_words) * 8)) 616 return -EFAULT; 617 } else { 618 if (hdr->out_words || ex_hdr->provider_out_words) 619 return -EINVAL; 620 } 621 622 return 0; 623 } 624 625 /* not extended command */ 626 if (hdr->in_words * 4 != count) 627 return -EINVAL; 628 629 if (count < method_elm->req_size + sizeof(hdr)) { 630 /* 631 * rdma-core v18 and v19 have a bug where they send DESTROY_CQ 632 * with a 16 byte write instead of 24. Old kernels didn't 633 * check the size so they allowed this. Now that the size is 634 * checked provide a compatibility work around to not break 635 * those userspaces. 636 */ 637 if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ && 638 count == 16) { 639 hdr->in_words = 6; 640 return 0; 641 } 642 return -ENOSPC; 643 } 644 if (hdr->out_words * 4 < method_elm->resp_size) 645 return -ENOSPC; 646 647 return 0; 648 } 649 650 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, 651 size_t count, loff_t *pos) 652 { 653 struct ib_uverbs_file *file = filp->private_data; 654 const struct uverbs_api_write_method *method_elm; 655 struct uverbs_api *uapi = file->device->uapi; 656 struct ib_uverbs_ex_cmd_hdr ex_hdr; 657 struct ib_uverbs_cmd_hdr hdr; 658 struct uverbs_attr_bundle bundle; 659 int srcu_key; 660 ssize_t ret; 661 662 if (!ib_safe_file_access(filp)) { 663 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 664 task_tgid_vnr(current), current->comm); 665 return -EACCES; 666 } 667 668 if (count < sizeof(hdr)) 669 return -EINVAL; 670 671 if (copy_from_user(&hdr, buf, sizeof(hdr))) 672 return -EFAULT; 673 674 method_elm = uapi_get_method(uapi, hdr.command); 675 if (IS_ERR(method_elm)) 676 return PTR_ERR(method_elm); 677 678 if (method_elm->is_ex) { 679 if (count < (sizeof(hdr) + sizeof(ex_hdr))) 680 return -EINVAL; 681 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) 682 return -EFAULT; 683 } 684 685 ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); 686 if (ret) 687 return ret; 688 689 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 690 691 buf += sizeof(hdr); 692 693 memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); 694 bundle.ufile = file; 695 if (!method_elm->is_ex) { 696 size_t in_len = hdr.in_words * 4 - sizeof(hdr); 697 size_t out_len = hdr.out_words * 4; 698 u64 response = 0; 699 700 if (method_elm->has_udata) { 701 bundle.driver_udata.inlen = 702 in_len - method_elm->req_size; 703 in_len = method_elm->req_size; 704 if (bundle.driver_udata.inlen) 705 bundle.driver_udata.inbuf = buf + in_len; 706 else 707 bundle.driver_udata.inbuf = NULL; 708 } else { 709 memset(&bundle.driver_udata, 0, 710 sizeof(bundle.driver_udata)); 711 } 712 713 if (method_elm->has_resp) { 714 /* 715 * The macros check that if has_resp is set 716 * then the command request structure starts 717 * with a '__aligned u64 response' member. 718 */ 719 ret = get_user(response, (const u64 *)buf); 720 if (ret) 721 goto out_unlock; 722 723 if (method_elm->has_udata) { 724 bundle.driver_udata.outlen = 725 out_len - method_elm->resp_size; 726 out_len = method_elm->resp_size; 727 if (bundle.driver_udata.outlen) 728 bundle.driver_udata.outbuf = 729 u64_to_user_ptr(response + 730 out_len); 731 else 732 bundle.driver_udata.outbuf = NULL; 733 } 734 } else { 735 bundle.driver_udata.outlen = 0; 736 bundle.driver_udata.outbuf = NULL; 737 } 738 739 ib_uverbs_init_udata_buf_or_null( 740 &bundle.ucore, buf, u64_to_user_ptr(response), 741 in_len, out_len); 742 } else { 743 buf += sizeof(ex_hdr); 744 745 ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf, 746 u64_to_user_ptr(ex_hdr.response), 747 hdr.in_words * 8, hdr.out_words * 8); 748 749 ib_uverbs_init_udata_buf_or_null( 750 &bundle.driver_udata, buf + bundle.ucore.inlen, 751 u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen, 752 ex_hdr.provider_in_words * 8, 753 ex_hdr.provider_out_words * 8); 754 755 } 756 757 ret = method_elm->handler(&bundle); 758 out_unlock: 759 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 760 return (ret) ? : count; 761 } 762 763 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 764 { 765 struct ib_uverbs_file *file = filp->private_data; 766 struct ib_ucontext *ucontext; 767 int ret = 0; 768 int srcu_key; 769 770 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 771 ucontext = ib_uverbs_get_ucontext_file(file); 772 if (IS_ERR(ucontext)) { 773 ret = PTR_ERR(ucontext); 774 goto out; 775 } 776 777 ret = ucontext->device->ops.mmap(ucontext, vma); 778 out: 779 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 780 return ret; 781 } 782 783 /* 784 * Each time we map IO memory into user space this keeps track of the mapping. 785 * When the device is hot-unplugged we 'zap' the mmaps in user space to point 786 * to the zero page and allow the hot unplug to proceed. 787 * 788 * This is necessary for cases like PCI physical hot unplug as the actual BAR 789 * memory may vanish after this and access to it from userspace could MCE. 790 * 791 * RDMA drivers supporting disassociation must have their user space designed 792 * to cope in some way with their IO pages going to the zero page. 793 */ 794 struct rdma_umap_priv { 795 struct vm_area_struct *vma; 796 struct list_head list; 797 }; 798 799 static const struct vm_operations_struct rdma_umap_ops; 800 801 static void rdma_umap_priv_init(struct rdma_umap_priv *priv, 802 struct vm_area_struct *vma) 803 { 804 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 805 806 priv->vma = vma; 807 vma->vm_private_data = priv; 808 vma->vm_ops = &rdma_umap_ops; 809 810 mutex_lock(&ufile->umap_lock); 811 list_add(&priv->list, &ufile->umaps); 812 mutex_unlock(&ufile->umap_lock); 813 } 814 815 /* 816 * The VMA has been dup'd, initialize the vm_private_data with a new tracking 817 * struct 818 */ 819 static void rdma_umap_open(struct vm_area_struct *vma) 820 { 821 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 822 struct rdma_umap_priv *opriv = vma->vm_private_data; 823 struct rdma_umap_priv *priv; 824 825 if (!opriv) 826 return; 827 828 /* We are racing with disassociation */ 829 if (!down_read_trylock(&ufile->hw_destroy_rwsem)) 830 goto out_zap; 831 /* 832 * Disassociation already completed, the VMA should already be zapped. 833 */ 834 if (!ufile->ucontext) 835 goto out_unlock; 836 837 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 838 if (!priv) 839 goto out_unlock; 840 rdma_umap_priv_init(priv, vma); 841 842 up_read(&ufile->hw_destroy_rwsem); 843 return; 844 845 out_unlock: 846 up_read(&ufile->hw_destroy_rwsem); 847 out_zap: 848 /* 849 * We can't allow the VMA to be created with the actual IO pages, that 850 * would break our API contract, and it can't be stopped at this 851 * point, so zap it. 852 */ 853 vma->vm_private_data = NULL; 854 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); 855 } 856 857 static void rdma_umap_close(struct vm_area_struct *vma) 858 { 859 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 860 struct rdma_umap_priv *priv = vma->vm_private_data; 861 862 if (!priv) 863 return; 864 865 /* 866 * The vma holds a reference on the struct file that created it, which 867 * in turn means that the ib_uverbs_file is guaranteed to exist at 868 * this point. 869 */ 870 mutex_lock(&ufile->umap_lock); 871 list_del(&priv->list); 872 mutex_unlock(&ufile->umap_lock); 873 kfree(priv); 874 } 875 876 static const struct vm_operations_struct rdma_umap_ops = { 877 .open = rdma_umap_open, 878 .close = rdma_umap_close, 879 }; 880 881 static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, 882 struct vm_area_struct *vma, 883 unsigned long size) 884 { 885 struct ib_uverbs_file *ufile = ucontext->ufile; 886 struct rdma_umap_priv *priv; 887 888 if (vma->vm_end - vma->vm_start != size) 889 return ERR_PTR(-EINVAL); 890 891 /* Driver is using this wrong, must be called by ib_uverbs_mmap */ 892 if (WARN_ON(!vma->vm_file || 893 vma->vm_file->private_data != ufile)) 894 return ERR_PTR(-EINVAL); 895 lockdep_assert_held(&ufile->device->disassociate_srcu); 896 897 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 898 if (!priv) 899 return ERR_PTR(-ENOMEM); 900 return priv; 901 } 902 903 /* 904 * Map IO memory into a process. This is to be called by drivers as part of 905 * their mmap() functions if they wish to send something like PCI-E BAR memory 906 * to userspace. 907 */ 908 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, 909 unsigned long pfn, unsigned long size, pgprot_t prot) 910 { 911 struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); 912 913 if (IS_ERR(priv)) 914 return PTR_ERR(priv); 915 916 vma->vm_page_prot = prot; 917 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { 918 kfree(priv); 919 return -EAGAIN; 920 } 921 922 rdma_umap_priv_init(priv, vma); 923 return 0; 924 } 925 EXPORT_SYMBOL(rdma_user_mmap_io); 926 927 /* 928 * The page case is here for a slightly different reason, the driver expects 929 * to be able to free the page it is sharing to user space when it destroys 930 * its ucontext, which means we need to zap the user space references. 931 * 932 * We could handle this differently by providing an API to allocate a shared 933 * page and then only freeing the shared page when the last ufile is 934 * destroyed. 935 */ 936 int rdma_user_mmap_page(struct ib_ucontext *ucontext, 937 struct vm_area_struct *vma, struct page *page, 938 unsigned long size) 939 { 940 struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); 941 942 if (IS_ERR(priv)) 943 return PTR_ERR(priv); 944 945 if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, 946 vma->vm_page_prot)) { 947 kfree(priv); 948 return -EAGAIN; 949 } 950 951 rdma_umap_priv_init(priv, vma); 952 return 0; 953 } 954 EXPORT_SYMBOL(rdma_user_mmap_page); 955 956 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) 957 { 958 struct rdma_umap_priv *priv, *next_priv; 959 960 lockdep_assert_held(&ufile->hw_destroy_rwsem); 961 962 while (1) { 963 struct mm_struct *mm = NULL; 964 965 /* Get an arbitrary mm pointer that hasn't been cleaned yet */ 966 mutex_lock(&ufile->umap_lock); 967 if (!list_empty(&ufile->umaps)) { 968 mm = list_first_entry(&ufile->umaps, 969 struct rdma_umap_priv, list) 970 ->vma->vm_mm; 971 mmget(mm); 972 } 973 mutex_unlock(&ufile->umap_lock); 974 if (!mm) 975 return; 976 977 /* 978 * The umap_lock is nested under mmap_sem since it used within 979 * the vma_ops callbacks, so we have to clean the list one mm 980 * at a time to get the lock ordering right. Typically there 981 * will only be one mm, so no big deal. 982 */ 983 down_write(&mm->mmap_sem); 984 mutex_lock(&ufile->umap_lock); 985 list_for_each_entry_safe (priv, next_priv, &ufile->umaps, 986 list) { 987 struct vm_area_struct *vma = priv->vma; 988 989 if (vma->vm_mm != mm) 990 continue; 991 list_del_init(&priv->list); 992 993 zap_vma_ptes(vma, vma->vm_start, 994 vma->vm_end - vma->vm_start); 995 vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); 996 } 997 mutex_unlock(&ufile->umap_lock); 998 up_write(&mm->mmap_sem); 999 mmput(mm); 1000 } 1001 } 1002 1003 /* 1004 * ib_uverbs_open() does not need the BKL: 1005 * 1006 * - the ib_uverbs_device structures are properly reference counted and 1007 * everything else is purely local to the file being created, so 1008 * races against other open calls are not a problem; 1009 * - there is no ioctl method to race against; 1010 * - the open method will either immediately run -ENXIO, or all 1011 * required initialization will be done. 1012 */ 1013 static int ib_uverbs_open(struct inode *inode, struct file *filp) 1014 { 1015 struct ib_uverbs_device *dev; 1016 struct ib_uverbs_file *file; 1017 struct ib_device *ib_dev; 1018 int ret; 1019 int module_dependent; 1020 int srcu_key; 1021 1022 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); 1023 if (!atomic_inc_not_zero(&dev->refcount)) 1024 return -ENXIO; 1025 1026 get_device(&dev->dev); 1027 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1028 mutex_lock(&dev->lists_mutex); 1029 ib_dev = srcu_dereference(dev->ib_dev, 1030 &dev->disassociate_srcu); 1031 if (!ib_dev) { 1032 ret = -EIO; 1033 goto err; 1034 } 1035 1036 /* In case IB device supports disassociate ucontext, there is no hard 1037 * dependency between uverbs device and its low level device. 1038 */ 1039 module_dependent = !(ib_dev->ops.disassociate_ucontext); 1040 1041 if (module_dependent) { 1042 if (!try_module_get(ib_dev->owner)) { 1043 ret = -ENODEV; 1044 goto err; 1045 } 1046 } 1047 1048 file = kzalloc(sizeof(*file), GFP_KERNEL); 1049 if (!file) { 1050 ret = -ENOMEM; 1051 if (module_dependent) 1052 goto err_module; 1053 1054 goto err; 1055 } 1056 1057 file->device = dev; 1058 kref_init(&file->ref); 1059 mutex_init(&file->ucontext_lock); 1060 1061 spin_lock_init(&file->uobjects_lock); 1062 INIT_LIST_HEAD(&file->uobjects); 1063 init_rwsem(&file->hw_destroy_rwsem); 1064 mutex_init(&file->umap_lock); 1065 INIT_LIST_HEAD(&file->umaps); 1066 1067 filp->private_data = file; 1068 list_add_tail(&file->list, &dev->uverbs_file_list); 1069 mutex_unlock(&dev->lists_mutex); 1070 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1071 1072 setup_ufile_idr_uobject(file); 1073 1074 return nonseekable_open(inode, filp); 1075 1076 err_module: 1077 module_put(ib_dev->owner); 1078 1079 err: 1080 mutex_unlock(&dev->lists_mutex); 1081 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1082 if (atomic_dec_and_test(&dev->refcount)) 1083 ib_uverbs_comp_dev(dev); 1084 1085 put_device(&dev->dev); 1086 return ret; 1087 } 1088 1089 static int ib_uverbs_close(struct inode *inode, struct file *filp) 1090 { 1091 struct ib_uverbs_file *file = filp->private_data; 1092 1093 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); 1094 1095 mutex_lock(&file->device->lists_mutex); 1096 list_del_init(&file->list); 1097 mutex_unlock(&file->device->lists_mutex); 1098 1099 if (file->async_file) 1100 kref_put(&file->async_file->ref, 1101 ib_uverbs_release_async_event_file); 1102 1103 kref_put(&file->ref, ib_uverbs_release_file); 1104 1105 return 0; 1106 } 1107 1108 static const struct file_operations uverbs_fops = { 1109 .owner = THIS_MODULE, 1110 .write = ib_uverbs_write, 1111 .open = ib_uverbs_open, 1112 .release = ib_uverbs_close, 1113 .llseek = no_llseek, 1114 .unlocked_ioctl = ib_uverbs_ioctl, 1115 .compat_ioctl = ib_uverbs_ioctl, 1116 }; 1117 1118 static const struct file_operations uverbs_mmap_fops = { 1119 .owner = THIS_MODULE, 1120 .write = ib_uverbs_write, 1121 .mmap = ib_uverbs_mmap, 1122 .open = ib_uverbs_open, 1123 .release = ib_uverbs_close, 1124 .llseek = no_llseek, 1125 .unlocked_ioctl = ib_uverbs_ioctl, 1126 .compat_ioctl = ib_uverbs_ioctl, 1127 }; 1128 1129 static struct ib_client uverbs_client = { 1130 .name = "uverbs", 1131 .add = ib_uverbs_add_one, 1132 .remove = ib_uverbs_remove_one 1133 }; 1134 1135 static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, 1136 char *buf) 1137 { 1138 struct ib_uverbs_device *dev = 1139 container_of(device, struct ib_uverbs_device, dev); 1140 int ret = -ENODEV; 1141 int srcu_key; 1142 struct ib_device *ib_dev; 1143 1144 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1145 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1146 if (ib_dev) 1147 ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); 1148 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1149 1150 return ret; 1151 } 1152 static DEVICE_ATTR_RO(ibdev); 1153 1154 static ssize_t abi_version_show(struct device *device, 1155 struct device_attribute *attr, char *buf) 1156 { 1157 struct ib_uverbs_device *dev = 1158 container_of(device, struct ib_uverbs_device, dev); 1159 int ret = -ENODEV; 1160 int srcu_key; 1161 struct ib_device *ib_dev; 1162 1163 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1164 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1165 if (ib_dev) 1166 ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); 1167 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1168 1169 return ret; 1170 } 1171 static DEVICE_ATTR_RO(abi_version); 1172 1173 static struct attribute *ib_dev_attrs[] = { 1174 &dev_attr_abi_version.attr, 1175 &dev_attr_ibdev.attr, 1176 NULL, 1177 }; 1178 1179 static const struct attribute_group dev_attr_group = { 1180 .attrs = ib_dev_attrs, 1181 }; 1182 1183 static CLASS_ATTR_STRING(abi_version, S_IRUGO, 1184 __stringify(IB_USER_VERBS_ABI_VERSION)); 1185 1186 static int ib_uverbs_create_uapi(struct ib_device *device, 1187 struct ib_uverbs_device *uverbs_dev) 1188 { 1189 struct uverbs_api *uapi; 1190 1191 uapi = uverbs_alloc_api(device); 1192 if (IS_ERR(uapi)) 1193 return PTR_ERR(uapi); 1194 1195 uverbs_dev->uapi = uapi; 1196 return 0; 1197 } 1198 1199 static void ib_uverbs_add_one(struct ib_device *device) 1200 { 1201 int devnum; 1202 dev_t base; 1203 struct ib_uverbs_device *uverbs_dev; 1204 int ret; 1205 1206 if (!device->ops.alloc_ucontext) 1207 return; 1208 1209 uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); 1210 if (!uverbs_dev) 1211 return; 1212 1213 ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); 1214 if (ret) { 1215 kfree(uverbs_dev); 1216 return; 1217 } 1218 1219 device_initialize(&uverbs_dev->dev); 1220 uverbs_dev->dev.class = uverbs_class; 1221 uverbs_dev->dev.parent = device->dev.parent; 1222 uverbs_dev->dev.release = ib_uverbs_release_dev; 1223 uverbs_dev->groups[0] = &dev_attr_group; 1224 uverbs_dev->dev.groups = uverbs_dev->groups; 1225 atomic_set(&uverbs_dev->refcount, 1); 1226 init_completion(&uverbs_dev->comp); 1227 uverbs_dev->xrcd_tree = RB_ROOT; 1228 mutex_init(&uverbs_dev->xrcd_tree_mutex); 1229 mutex_init(&uverbs_dev->lists_mutex); 1230 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); 1231 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); 1232 rcu_assign_pointer(uverbs_dev->ib_dev, device); 1233 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 1234 1235 devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, 1236 GFP_KERNEL); 1237 if (devnum < 0) 1238 goto err; 1239 uverbs_dev->devnum = devnum; 1240 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) 1241 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; 1242 else 1243 base = IB_UVERBS_BASE_DEV + devnum; 1244 1245 if (ib_uverbs_create_uapi(device, uverbs_dev)) 1246 goto err_uapi; 1247 1248 uverbs_dev->dev.devt = base; 1249 dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); 1250 1251 cdev_init(&uverbs_dev->cdev, 1252 device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops); 1253 uverbs_dev->cdev.owner = THIS_MODULE; 1254 1255 ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); 1256 if (ret) 1257 goto err_uapi; 1258 1259 ib_set_client_data(device, &uverbs_client, uverbs_dev); 1260 return; 1261 1262 err_uapi: 1263 ida_free(&uverbs_ida, devnum); 1264 err: 1265 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1266 ib_uverbs_comp_dev(uverbs_dev); 1267 wait_for_completion(&uverbs_dev->comp); 1268 put_device(&uverbs_dev->dev); 1269 return; 1270 } 1271 1272 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, 1273 struct ib_device *ib_dev) 1274 { 1275 struct ib_uverbs_file *file; 1276 struct ib_uverbs_async_event_file *event_file; 1277 struct ib_event event; 1278 1279 /* Pending running commands to terminate */ 1280 uverbs_disassociate_api_pre(uverbs_dev); 1281 event.event = IB_EVENT_DEVICE_FATAL; 1282 event.element.port_num = 0; 1283 event.device = ib_dev; 1284 1285 mutex_lock(&uverbs_dev->lists_mutex); 1286 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1287 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1288 struct ib_uverbs_file, list); 1289 list_del_init(&file->list); 1290 kref_get(&file->ref); 1291 1292 /* We must release the mutex before going ahead and calling 1293 * uverbs_cleanup_ufile, as it might end up indirectly calling 1294 * uverbs_close, for example due to freeing the resources (e.g 1295 * mmput). 1296 */ 1297 mutex_unlock(&uverbs_dev->lists_mutex); 1298 1299 ib_uverbs_event_handler(&file->event_handler, &event); 1300 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); 1301 kref_put(&file->ref, ib_uverbs_release_file); 1302 1303 mutex_lock(&uverbs_dev->lists_mutex); 1304 } 1305 1306 while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { 1307 event_file = list_first_entry(&uverbs_dev-> 1308 uverbs_events_file_list, 1309 struct ib_uverbs_async_event_file, 1310 list); 1311 spin_lock_irq(&event_file->ev_queue.lock); 1312 event_file->ev_queue.is_closed = 1; 1313 spin_unlock_irq(&event_file->ev_queue.lock); 1314 1315 list_del(&event_file->list); 1316 ib_unregister_event_handler( 1317 &event_file->uverbs_file->event_handler); 1318 event_file->uverbs_file->event_handler.device = 1319 NULL; 1320 1321 wake_up_interruptible(&event_file->ev_queue.poll_wait); 1322 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); 1323 } 1324 mutex_unlock(&uverbs_dev->lists_mutex); 1325 1326 uverbs_disassociate_api(uverbs_dev->uapi); 1327 } 1328 1329 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) 1330 { 1331 struct ib_uverbs_device *uverbs_dev = client_data; 1332 int wait_clients = 1; 1333 1334 if (!uverbs_dev) 1335 return; 1336 1337 cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); 1338 ida_free(&uverbs_ida, uverbs_dev->devnum); 1339 1340 if (device->ops.disassociate_ucontext) { 1341 /* We disassociate HW resources and immediately return. 1342 * Userspace will see a EIO errno for all future access. 1343 * Upon returning, ib_device may be freed internally and is not 1344 * valid any more. 1345 * uverbs_device is still available until all clients close 1346 * their files, then the uverbs device ref count will be zero 1347 * and its resources will be freed. 1348 * Note: At this point no more files can be opened since the 1349 * cdev was deleted, however active clients can still issue 1350 * commands and close their open files. 1351 */ 1352 ib_uverbs_free_hw_resources(uverbs_dev, device); 1353 wait_clients = 0; 1354 } 1355 1356 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1357 ib_uverbs_comp_dev(uverbs_dev); 1358 if (wait_clients) 1359 wait_for_completion(&uverbs_dev->comp); 1360 1361 put_device(&uverbs_dev->dev); 1362 } 1363 1364 static char *uverbs_devnode(struct device *dev, umode_t *mode) 1365 { 1366 if (mode) 1367 *mode = 0666; 1368 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); 1369 } 1370 1371 static int __init ib_uverbs_init(void) 1372 { 1373 int ret; 1374 1375 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, 1376 IB_UVERBS_NUM_FIXED_MINOR, 1377 "infiniband_verbs"); 1378 if (ret) { 1379 pr_err("user_verbs: couldn't register device number\n"); 1380 goto out; 1381 } 1382 1383 ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0, 1384 IB_UVERBS_NUM_DYNAMIC_MINOR, 1385 "infiniband_verbs"); 1386 if (ret) { 1387 pr_err("couldn't register dynamic device number\n"); 1388 goto out_alloc; 1389 } 1390 1391 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); 1392 if (IS_ERR(uverbs_class)) { 1393 ret = PTR_ERR(uverbs_class); 1394 pr_err("user_verbs: couldn't create class infiniband_verbs\n"); 1395 goto out_chrdev; 1396 } 1397 1398 uverbs_class->devnode = uverbs_devnode; 1399 1400 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); 1401 if (ret) { 1402 pr_err("user_verbs: couldn't create abi_version attribute\n"); 1403 goto out_class; 1404 } 1405 1406 ret = ib_register_client(&uverbs_client); 1407 if (ret) { 1408 pr_err("user_verbs: couldn't register client\n"); 1409 goto out_class; 1410 } 1411 1412 return 0; 1413 1414 out_class: 1415 class_destroy(uverbs_class); 1416 1417 out_chrdev: 1418 unregister_chrdev_region(dynamic_uverbs_dev, 1419 IB_UVERBS_NUM_DYNAMIC_MINOR); 1420 1421 out_alloc: 1422 unregister_chrdev_region(IB_UVERBS_BASE_DEV, 1423 IB_UVERBS_NUM_FIXED_MINOR); 1424 1425 out: 1426 return ret; 1427 } 1428 1429 static void __exit ib_uverbs_cleanup(void) 1430 { 1431 ib_unregister_client(&uverbs_client); 1432 class_destroy(uverbs_class); 1433 unregister_chrdev_region(IB_UVERBS_BASE_DEV, 1434 IB_UVERBS_NUM_FIXED_MINOR); 1435 unregister_chrdev_region(dynamic_uverbs_dev, 1436 IB_UVERBS_NUM_DYNAMIC_MINOR); 1437 } 1438 1439 module_init(ib_uverbs_init); 1440 module_exit(ib_uverbs_cleanup); 1441