1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 */ 36 37 #include <linux/module.h> 38 #include <linux/init.h> 39 #include <linux/device.h> 40 #include <linux/err.h> 41 #include <linux/fs.h> 42 #include <linux/poll.h> 43 #include <linux/sched.h> 44 #include <linux/file.h> 45 #include <linux/cdev.h> 46 #include <linux/anon_inodes.h> 47 #include <linux/slab.h> 48 #include <linux/sched/mm.h> 49 50 #include <linux/uaccess.h> 51 52 #include <rdma/ib.h> 53 #include <rdma/uverbs_std_types.h> 54 55 #include "uverbs.h" 56 #include "core_priv.h" 57 #include "rdma_core.h" 58 59 MODULE_AUTHOR("Roland Dreier"); 60 MODULE_DESCRIPTION("InfiniBand userspace verbs access"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 63 enum { 64 IB_UVERBS_MAJOR = 231, 65 IB_UVERBS_BASE_MINOR = 192, 66 IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS, 67 IB_UVERBS_NUM_FIXED_MINOR = 32, 68 IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR, 69 }; 70 71 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) 72 73 static dev_t dynamic_uverbs_dev; 74 static struct class *uverbs_class; 75 76 static DEFINE_IDA(uverbs_ida); 77 78 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 79 const char __user *buf, int in_len, 80 int out_len) = { 81 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, 82 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, 83 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, 84 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, 85 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, 86 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, 87 [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, 88 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, 89 [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, 90 [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, 91 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, 92 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, 93 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, 94 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, 95 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, 96 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, 97 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, 98 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, 99 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, 100 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, 101 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, 102 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, 103 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, 104 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, 105 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, 106 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, 107 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, 108 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, 109 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, 110 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, 111 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, 112 [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, 113 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, 114 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, 115 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, 116 }; 117 118 static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, 119 struct ib_udata *ucore, 120 struct ib_udata *uhw) = { 121 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, 122 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, 123 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, 124 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, 125 [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, 126 [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, 127 [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, 128 [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, 129 [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, 130 [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, 131 [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, 132 [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq, 133 }; 134 135 static void ib_uverbs_add_one(struct ib_device *device); 136 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); 137 138 /* 139 * Must be called with the ufile->device->disassociate_srcu held, and the lock 140 * must be held until use of the ucontext is finished. 141 */ 142 struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) 143 { 144 /* 145 * We do not hold the hw_destroy_rwsem lock for this flow, instead 146 * srcu is used. It does not matter if someone races this with 147 * get_context, we get NULL or valid ucontext. 148 */ 149 struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); 150 151 if (!srcu_dereference(ufile->device->ib_dev, 152 &ufile->device->disassociate_srcu)) 153 return ERR_PTR(-EIO); 154 155 if (!ucontext) 156 return ERR_PTR(-EINVAL); 157 158 return ucontext; 159 } 160 EXPORT_SYMBOL(ib_uverbs_get_ucontext); 161 162 int uverbs_dealloc_mw(struct ib_mw *mw) 163 { 164 struct ib_pd *pd = mw->pd; 165 int ret; 166 167 ret = mw->device->dealloc_mw(mw); 168 if (!ret) 169 atomic_dec(&pd->usecnt); 170 return ret; 171 } 172 173 static void ib_uverbs_release_dev(struct device *device) 174 { 175 struct ib_uverbs_device *dev = 176 container_of(device, struct ib_uverbs_device, dev); 177 178 uverbs_destroy_api(dev->uapi); 179 cleanup_srcu_struct(&dev->disassociate_srcu); 180 kfree(dev); 181 } 182 183 static void ib_uverbs_release_async_event_file(struct kref *ref) 184 { 185 struct ib_uverbs_async_event_file *file = 186 container_of(ref, struct ib_uverbs_async_event_file, ref); 187 188 kfree(file); 189 } 190 191 void ib_uverbs_release_ucq(struct ib_uverbs_file *file, 192 struct ib_uverbs_completion_event_file *ev_file, 193 struct ib_ucq_object *uobj) 194 { 195 struct ib_uverbs_event *evt, *tmp; 196 197 if (ev_file) { 198 spin_lock_irq(&ev_file->ev_queue.lock); 199 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { 200 list_del(&evt->list); 201 kfree(evt); 202 } 203 spin_unlock_irq(&ev_file->ev_queue.lock); 204 205 uverbs_uobject_put(&ev_file->uobj); 206 } 207 208 spin_lock_irq(&file->async_file->ev_queue.lock); 209 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { 210 list_del(&evt->list); 211 kfree(evt); 212 } 213 spin_unlock_irq(&file->async_file->ev_queue.lock); 214 } 215 216 void ib_uverbs_release_uevent(struct ib_uverbs_file *file, 217 struct ib_uevent_object *uobj) 218 { 219 struct ib_uverbs_event *evt, *tmp; 220 221 spin_lock_irq(&file->async_file->ev_queue.lock); 222 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { 223 list_del(&evt->list); 224 kfree(evt); 225 } 226 spin_unlock_irq(&file->async_file->ev_queue.lock); 227 } 228 229 void ib_uverbs_detach_umcast(struct ib_qp *qp, 230 struct ib_uqp_object *uobj) 231 { 232 struct ib_uverbs_mcast_entry *mcast, *tmp; 233 234 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { 235 ib_detach_mcast(qp, &mcast->gid, mcast->lid); 236 list_del(&mcast->list); 237 kfree(mcast); 238 } 239 } 240 241 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) 242 { 243 complete(&dev->comp); 244 } 245 246 void ib_uverbs_release_file(struct kref *ref) 247 { 248 struct ib_uverbs_file *file = 249 container_of(ref, struct ib_uverbs_file, ref); 250 struct ib_device *ib_dev; 251 int srcu_key; 252 253 release_ufile_idr_uobject(file); 254 255 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 256 ib_dev = srcu_dereference(file->device->ib_dev, 257 &file->device->disassociate_srcu); 258 if (ib_dev && !ib_dev->disassociate_ucontext) 259 module_put(ib_dev->owner); 260 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 261 262 if (atomic_dec_and_test(&file->device->refcount)) 263 ib_uverbs_comp_dev(file->device); 264 265 put_device(&file->device->dev); 266 kfree(file); 267 } 268 269 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, 270 struct ib_uverbs_file *uverbs_file, 271 struct file *filp, char __user *buf, 272 size_t count, loff_t *pos, 273 size_t eventsz) 274 { 275 struct ib_uverbs_event *event; 276 int ret = 0; 277 278 spin_lock_irq(&ev_queue->lock); 279 280 while (list_empty(&ev_queue->event_list)) { 281 spin_unlock_irq(&ev_queue->lock); 282 283 if (filp->f_flags & O_NONBLOCK) 284 return -EAGAIN; 285 286 if (wait_event_interruptible(ev_queue->poll_wait, 287 (!list_empty(&ev_queue->event_list) || 288 /* The barriers built into wait_event_interruptible() 289 * and wake_up() guarentee this will see the null set 290 * without using RCU 291 */ 292 !uverbs_file->device->ib_dev))) 293 return -ERESTARTSYS; 294 295 /* If device was disassociated and no event exists set an error */ 296 if (list_empty(&ev_queue->event_list) && 297 !uverbs_file->device->ib_dev) 298 return -EIO; 299 300 spin_lock_irq(&ev_queue->lock); 301 } 302 303 event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); 304 305 if (eventsz > count) { 306 ret = -EINVAL; 307 event = NULL; 308 } else { 309 list_del(ev_queue->event_list.next); 310 if (event->counter) { 311 ++(*event->counter); 312 list_del(&event->obj_list); 313 } 314 } 315 316 spin_unlock_irq(&ev_queue->lock); 317 318 if (event) { 319 if (copy_to_user(buf, event, eventsz)) 320 ret = -EFAULT; 321 else 322 ret = eventsz; 323 } 324 325 kfree(event); 326 327 return ret; 328 } 329 330 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, 331 size_t count, loff_t *pos) 332 { 333 struct ib_uverbs_async_event_file *file = filp->private_data; 334 335 return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, 336 buf, count, pos, 337 sizeof(struct ib_uverbs_async_event_desc)); 338 } 339 340 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, 341 size_t count, loff_t *pos) 342 { 343 struct ib_uverbs_completion_event_file *comp_ev_file = 344 filp->private_data; 345 346 return ib_uverbs_event_read(&comp_ev_file->ev_queue, 347 comp_ev_file->uobj.ufile, filp, 348 buf, count, pos, 349 sizeof(struct ib_uverbs_comp_event_desc)); 350 } 351 352 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, 353 struct file *filp, 354 struct poll_table_struct *wait) 355 { 356 __poll_t pollflags = 0; 357 358 poll_wait(filp, &ev_queue->poll_wait, wait); 359 360 spin_lock_irq(&ev_queue->lock); 361 if (!list_empty(&ev_queue->event_list)) 362 pollflags = EPOLLIN | EPOLLRDNORM; 363 spin_unlock_irq(&ev_queue->lock); 364 365 return pollflags; 366 } 367 368 static __poll_t ib_uverbs_async_event_poll(struct file *filp, 369 struct poll_table_struct *wait) 370 { 371 return ib_uverbs_event_poll(filp->private_data, filp, wait); 372 } 373 374 static __poll_t ib_uverbs_comp_event_poll(struct file *filp, 375 struct poll_table_struct *wait) 376 { 377 struct ib_uverbs_completion_event_file *comp_ev_file = 378 filp->private_data; 379 380 return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); 381 } 382 383 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) 384 { 385 struct ib_uverbs_event_queue *ev_queue = filp->private_data; 386 387 return fasync_helper(fd, filp, on, &ev_queue->async_queue); 388 } 389 390 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) 391 { 392 struct ib_uverbs_completion_event_file *comp_ev_file = 393 filp->private_data; 394 395 return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); 396 } 397 398 static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) 399 { 400 struct ib_uverbs_async_event_file *file = filp->private_data; 401 struct ib_uverbs_file *uverbs_file = file->uverbs_file; 402 struct ib_uverbs_event *entry, *tmp; 403 int closed_already = 0; 404 405 mutex_lock(&uverbs_file->device->lists_mutex); 406 spin_lock_irq(&file->ev_queue.lock); 407 closed_already = file->ev_queue.is_closed; 408 file->ev_queue.is_closed = 1; 409 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { 410 if (entry->counter) 411 list_del(&entry->obj_list); 412 kfree(entry); 413 } 414 spin_unlock_irq(&file->ev_queue.lock); 415 if (!closed_already) { 416 list_del(&file->list); 417 ib_unregister_event_handler(&uverbs_file->event_handler); 418 } 419 mutex_unlock(&uverbs_file->device->lists_mutex); 420 421 kref_put(&uverbs_file->ref, ib_uverbs_release_file); 422 kref_put(&file->ref, ib_uverbs_release_async_event_file); 423 424 return 0; 425 } 426 427 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) 428 { 429 struct ib_uobject *uobj = filp->private_data; 430 struct ib_uverbs_completion_event_file *file = container_of( 431 uobj, struct ib_uverbs_completion_event_file, uobj); 432 struct ib_uverbs_event *entry, *tmp; 433 434 spin_lock_irq(&file->ev_queue.lock); 435 list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { 436 if (entry->counter) 437 list_del(&entry->obj_list); 438 kfree(entry); 439 } 440 file->ev_queue.is_closed = 1; 441 spin_unlock_irq(&file->ev_queue.lock); 442 443 uverbs_close_fd(filp); 444 445 return 0; 446 } 447 448 const struct file_operations uverbs_event_fops = { 449 .owner = THIS_MODULE, 450 .read = ib_uverbs_comp_event_read, 451 .poll = ib_uverbs_comp_event_poll, 452 .release = ib_uverbs_comp_event_close, 453 .fasync = ib_uverbs_comp_event_fasync, 454 .llseek = no_llseek, 455 }; 456 457 static const struct file_operations uverbs_async_event_fops = { 458 .owner = THIS_MODULE, 459 .read = ib_uverbs_async_event_read, 460 .poll = ib_uverbs_async_event_poll, 461 .release = ib_uverbs_async_event_close, 462 .fasync = ib_uverbs_async_event_fasync, 463 .llseek = no_llseek, 464 }; 465 466 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) 467 { 468 struct ib_uverbs_event_queue *ev_queue = cq_context; 469 struct ib_ucq_object *uobj; 470 struct ib_uverbs_event *entry; 471 unsigned long flags; 472 473 if (!ev_queue) 474 return; 475 476 spin_lock_irqsave(&ev_queue->lock, flags); 477 if (ev_queue->is_closed) { 478 spin_unlock_irqrestore(&ev_queue->lock, flags); 479 return; 480 } 481 482 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 483 if (!entry) { 484 spin_unlock_irqrestore(&ev_queue->lock, flags); 485 return; 486 } 487 488 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); 489 490 entry->desc.comp.cq_handle = cq->uobject->user_handle; 491 entry->counter = &uobj->comp_events_reported; 492 493 list_add_tail(&entry->list, &ev_queue->event_list); 494 list_add_tail(&entry->obj_list, &uobj->comp_list); 495 spin_unlock_irqrestore(&ev_queue->lock, flags); 496 497 wake_up_interruptible(&ev_queue->poll_wait); 498 kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); 499 } 500 501 static void ib_uverbs_async_handler(struct ib_uverbs_file *file, 502 __u64 element, __u64 event, 503 struct list_head *obj_list, 504 u32 *counter) 505 { 506 struct ib_uverbs_event *entry; 507 unsigned long flags; 508 509 spin_lock_irqsave(&file->async_file->ev_queue.lock, flags); 510 if (file->async_file->ev_queue.is_closed) { 511 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 512 return; 513 } 514 515 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 516 if (!entry) { 517 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 518 return; 519 } 520 521 entry->desc.async.element = element; 522 entry->desc.async.event_type = event; 523 entry->desc.async.reserved = 0; 524 entry->counter = counter; 525 526 list_add_tail(&entry->list, &file->async_file->ev_queue.event_list); 527 if (obj_list) 528 list_add_tail(&entry->obj_list, obj_list); 529 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); 530 531 wake_up_interruptible(&file->async_file->ev_queue.poll_wait); 532 kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN); 533 } 534 535 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) 536 { 537 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, 538 struct ib_ucq_object, uobject); 539 540 ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, 541 event->event, &uobj->async_list, 542 &uobj->async_events_reported); 543 } 544 545 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) 546 { 547 struct ib_uevent_object *uobj; 548 549 /* for XRC target qp's, check that qp is live */ 550 if (!event->element.qp->uobject) 551 return; 552 553 uobj = container_of(event->element.qp->uobject, 554 struct ib_uevent_object, uobject); 555 556 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 557 event->event, &uobj->event_list, 558 &uobj->events_reported); 559 } 560 561 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) 562 { 563 struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, 564 struct ib_uevent_object, uobject); 565 566 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 567 event->event, &uobj->event_list, 568 &uobj->events_reported); 569 } 570 571 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) 572 { 573 struct ib_uevent_object *uobj; 574 575 uobj = container_of(event->element.srq->uobject, 576 struct ib_uevent_object, uobject); 577 578 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 579 event->event, &uobj->event_list, 580 &uobj->events_reported); 581 } 582 583 void ib_uverbs_event_handler(struct ib_event_handler *handler, 584 struct ib_event *event) 585 { 586 struct ib_uverbs_file *file = 587 container_of(handler, struct ib_uverbs_file, event_handler); 588 589 ib_uverbs_async_handler(file, event->element.port_num, event->event, 590 NULL, NULL); 591 } 592 593 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) 594 { 595 kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); 596 file->async_file = NULL; 597 } 598 599 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) 600 { 601 spin_lock_init(&ev_queue->lock); 602 INIT_LIST_HEAD(&ev_queue->event_list); 603 init_waitqueue_head(&ev_queue->poll_wait); 604 ev_queue->is_closed = 0; 605 ev_queue->async_queue = NULL; 606 } 607 608 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, 609 struct ib_device *ib_dev) 610 { 611 struct ib_uverbs_async_event_file *ev_file; 612 struct file *filp; 613 614 ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); 615 if (!ev_file) 616 return ERR_PTR(-ENOMEM); 617 618 ib_uverbs_init_event_queue(&ev_file->ev_queue); 619 ev_file->uverbs_file = uverbs_file; 620 kref_get(&ev_file->uverbs_file->ref); 621 kref_init(&ev_file->ref); 622 filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, 623 ev_file, O_RDONLY); 624 if (IS_ERR(filp)) 625 goto err_put_refs; 626 627 mutex_lock(&uverbs_file->device->lists_mutex); 628 list_add_tail(&ev_file->list, 629 &uverbs_file->device->uverbs_events_file_list); 630 mutex_unlock(&uverbs_file->device->lists_mutex); 631 632 WARN_ON(uverbs_file->async_file); 633 uverbs_file->async_file = ev_file; 634 kref_get(&uverbs_file->async_file->ref); 635 INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, 636 ib_dev, 637 ib_uverbs_event_handler); 638 ib_register_event_handler(&uverbs_file->event_handler); 639 /* At that point async file stuff was fully set */ 640 641 return filp; 642 643 err_put_refs: 644 kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); 645 kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); 646 return filp; 647 } 648 649 static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command, 650 bool extended) 651 { 652 if (!extended) 653 return ufile->uverbs_cmd_mask & BIT_ULL(command); 654 655 return ufile->uverbs_ex_cmd_mask & BIT_ULL(command); 656 } 657 658 static bool verify_command_idx(u32 command, bool extended) 659 { 660 if (extended) 661 return command < ARRAY_SIZE(uverbs_ex_cmd_table) && 662 uverbs_ex_cmd_table[command]; 663 664 return command < ARRAY_SIZE(uverbs_cmd_table) && 665 uverbs_cmd_table[command]; 666 } 667 668 static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr, 669 u32 *command, bool *extended) 670 { 671 if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | 672 IB_USER_VERBS_CMD_COMMAND_MASK)) 673 return -EINVAL; 674 675 *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK; 676 *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED; 677 678 if (!verify_command_idx(*command, *extended)) 679 return -EOPNOTSUPP; 680 681 return 0; 682 } 683 684 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, 685 struct ib_uverbs_ex_cmd_hdr *ex_hdr, 686 size_t count, bool extended) 687 { 688 if (extended) { 689 count -= sizeof(*hdr) + sizeof(*ex_hdr); 690 691 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) 692 return -EINVAL; 693 694 if (ex_hdr->cmd_hdr_reserved) 695 return -EINVAL; 696 697 if (ex_hdr->response) { 698 if (!hdr->out_words && !ex_hdr->provider_out_words) 699 return -EINVAL; 700 701 if (!access_ok(VERIFY_WRITE, 702 u64_to_user_ptr(ex_hdr->response), 703 (hdr->out_words + ex_hdr->provider_out_words) * 8)) 704 return -EFAULT; 705 } else { 706 if (hdr->out_words || ex_hdr->provider_out_words) 707 return -EINVAL; 708 } 709 710 return 0; 711 } 712 713 /* not extended command */ 714 if (hdr->in_words * 4 != count) 715 return -EINVAL; 716 717 return 0; 718 } 719 720 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, 721 size_t count, loff_t *pos) 722 { 723 struct ib_uverbs_file *file = filp->private_data; 724 struct ib_uverbs_ex_cmd_hdr ex_hdr; 725 struct ib_uverbs_cmd_hdr hdr; 726 bool extended; 727 int srcu_key; 728 u32 command; 729 ssize_t ret; 730 731 if (!ib_safe_file_access(filp)) { 732 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", 733 task_tgid_vnr(current), current->comm); 734 return -EACCES; 735 } 736 737 if (count < sizeof(hdr)) 738 return -EINVAL; 739 740 if (copy_from_user(&hdr, buf, sizeof(hdr))) 741 return -EFAULT; 742 743 ret = process_hdr(&hdr, &command, &extended); 744 if (ret) 745 return ret; 746 747 if (extended) { 748 if (count < (sizeof(hdr) + sizeof(ex_hdr))) 749 return -EINVAL; 750 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) 751 return -EFAULT; 752 } 753 754 ret = verify_hdr(&hdr, &ex_hdr, count, extended); 755 if (ret) 756 return ret; 757 758 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 759 760 if (!verify_command_mask(file, command, extended)) { 761 ret = -EOPNOTSUPP; 762 goto out; 763 } 764 765 buf += sizeof(hdr); 766 767 if (!extended) { 768 ret = uverbs_cmd_table[command](file, buf, 769 hdr.in_words * 4, 770 hdr.out_words * 4); 771 } else { 772 struct ib_udata ucore; 773 struct ib_udata uhw; 774 775 buf += sizeof(ex_hdr); 776 777 ib_uverbs_init_udata_buf_or_null(&ucore, buf, 778 u64_to_user_ptr(ex_hdr.response), 779 hdr.in_words * 8, hdr.out_words * 8); 780 781 ib_uverbs_init_udata_buf_or_null(&uhw, 782 buf + ucore.inlen, 783 u64_to_user_ptr(ex_hdr.response) + ucore.outlen, 784 ex_hdr.provider_in_words * 8, 785 ex_hdr.provider_out_words * 8); 786 787 ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw); 788 ret = (ret) ? : count; 789 } 790 791 out: 792 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 793 return ret; 794 } 795 796 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 797 { 798 struct ib_uverbs_file *file = filp->private_data; 799 struct ib_ucontext *ucontext; 800 int ret = 0; 801 int srcu_key; 802 803 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 804 ucontext = ib_uverbs_get_ucontext(file); 805 if (IS_ERR(ucontext)) { 806 ret = PTR_ERR(ucontext); 807 goto out; 808 } 809 810 ret = ucontext->device->mmap(ucontext, vma); 811 out: 812 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 813 return ret; 814 } 815 816 /* 817 * Each time we map IO memory into user space this keeps track of the mapping. 818 * When the device is hot-unplugged we 'zap' the mmaps in user space to point 819 * to the zero page and allow the hot unplug to proceed. 820 * 821 * This is necessary for cases like PCI physical hot unplug as the actual BAR 822 * memory may vanish after this and access to it from userspace could MCE. 823 * 824 * RDMA drivers supporting disassociation must have their user space designed 825 * to cope in some way with their IO pages going to the zero page. 826 */ 827 struct rdma_umap_priv { 828 struct vm_area_struct *vma; 829 struct list_head list; 830 }; 831 832 static const struct vm_operations_struct rdma_umap_ops; 833 834 static void rdma_umap_priv_init(struct rdma_umap_priv *priv, 835 struct vm_area_struct *vma) 836 { 837 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 838 839 priv->vma = vma; 840 vma->vm_private_data = priv; 841 vma->vm_ops = &rdma_umap_ops; 842 843 mutex_lock(&ufile->umap_lock); 844 list_add(&priv->list, &ufile->umaps); 845 mutex_unlock(&ufile->umap_lock); 846 } 847 848 /* 849 * The VMA has been dup'd, initialize the vm_private_data with a new tracking 850 * struct 851 */ 852 static void rdma_umap_open(struct vm_area_struct *vma) 853 { 854 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 855 struct rdma_umap_priv *opriv = vma->vm_private_data; 856 struct rdma_umap_priv *priv; 857 858 if (!opriv) 859 return; 860 861 /* We are racing with disassociation */ 862 if (!down_read_trylock(&ufile->hw_destroy_rwsem)) 863 goto out_zap; 864 /* 865 * Disassociation already completed, the VMA should already be zapped. 866 */ 867 if (!ufile->ucontext) 868 goto out_unlock; 869 870 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 871 if (!priv) 872 goto out_unlock; 873 rdma_umap_priv_init(priv, vma); 874 875 up_read(&ufile->hw_destroy_rwsem); 876 return; 877 878 out_unlock: 879 up_read(&ufile->hw_destroy_rwsem); 880 out_zap: 881 /* 882 * We can't allow the VMA to be created with the actual IO pages, that 883 * would break our API contract, and it can't be stopped at this 884 * point, so zap it. 885 */ 886 vma->vm_private_data = NULL; 887 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); 888 } 889 890 static void rdma_umap_close(struct vm_area_struct *vma) 891 { 892 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 893 struct rdma_umap_priv *priv = vma->vm_private_data; 894 895 if (!priv) 896 return; 897 898 /* 899 * The vma holds a reference on the struct file that created it, which 900 * in turn means that the ib_uverbs_file is guaranteed to exist at 901 * this point. 902 */ 903 mutex_lock(&ufile->umap_lock); 904 list_del(&priv->list); 905 mutex_unlock(&ufile->umap_lock); 906 kfree(priv); 907 } 908 909 static const struct vm_operations_struct rdma_umap_ops = { 910 .open = rdma_umap_open, 911 .close = rdma_umap_close, 912 }; 913 914 static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, 915 struct vm_area_struct *vma, 916 unsigned long size) 917 { 918 struct ib_uverbs_file *ufile = ucontext->ufile; 919 struct rdma_umap_priv *priv; 920 921 if (vma->vm_end - vma->vm_start != size) 922 return ERR_PTR(-EINVAL); 923 924 /* Driver is using this wrong, must be called by ib_uverbs_mmap */ 925 if (WARN_ON(!vma->vm_file || 926 vma->vm_file->private_data != ufile)) 927 return ERR_PTR(-EINVAL); 928 lockdep_assert_held(&ufile->device->disassociate_srcu); 929 930 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 931 if (!priv) 932 return ERR_PTR(-ENOMEM); 933 return priv; 934 } 935 936 /* 937 * Map IO memory into a process. This is to be called by drivers as part of 938 * their mmap() functions if they wish to send something like PCI-E BAR memory 939 * to userspace. 940 */ 941 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, 942 unsigned long pfn, unsigned long size, pgprot_t prot) 943 { 944 struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); 945 946 if (IS_ERR(priv)) 947 return PTR_ERR(priv); 948 949 vma->vm_page_prot = prot; 950 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { 951 kfree(priv); 952 return -EAGAIN; 953 } 954 955 rdma_umap_priv_init(priv, vma); 956 return 0; 957 } 958 EXPORT_SYMBOL(rdma_user_mmap_io); 959 960 /* 961 * The page case is here for a slightly different reason, the driver expects 962 * to be able to free the page it is sharing to user space when it destroys 963 * its ucontext, which means we need to zap the user space references. 964 * 965 * We could handle this differently by providing an API to allocate a shared 966 * page and then only freeing the shared page when the last ufile is 967 * destroyed. 968 */ 969 int rdma_user_mmap_page(struct ib_ucontext *ucontext, 970 struct vm_area_struct *vma, struct page *page, 971 unsigned long size) 972 { 973 struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); 974 975 if (IS_ERR(priv)) 976 return PTR_ERR(priv); 977 978 if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, 979 vma->vm_page_prot)) { 980 kfree(priv); 981 return -EAGAIN; 982 } 983 984 rdma_umap_priv_init(priv, vma); 985 return 0; 986 } 987 EXPORT_SYMBOL(rdma_user_mmap_page); 988 989 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) 990 { 991 struct rdma_umap_priv *priv, *next_priv; 992 993 lockdep_assert_held(&ufile->hw_destroy_rwsem); 994 995 while (1) { 996 struct mm_struct *mm = NULL; 997 998 /* Get an arbitrary mm pointer that hasn't been cleaned yet */ 999 mutex_lock(&ufile->umap_lock); 1000 if (!list_empty(&ufile->umaps)) { 1001 mm = list_first_entry(&ufile->umaps, 1002 struct rdma_umap_priv, list) 1003 ->vma->vm_mm; 1004 mmget(mm); 1005 } 1006 mutex_unlock(&ufile->umap_lock); 1007 if (!mm) 1008 return; 1009 1010 /* 1011 * The umap_lock is nested under mmap_sem since it used within 1012 * the vma_ops callbacks, so we have to clean the list one mm 1013 * at a time to get the lock ordering right. Typically there 1014 * will only be one mm, so no big deal. 1015 */ 1016 down_write(&mm->mmap_sem); 1017 mutex_lock(&ufile->umap_lock); 1018 list_for_each_entry_safe (priv, next_priv, &ufile->umaps, 1019 list) { 1020 struct vm_area_struct *vma = priv->vma; 1021 1022 if (vma->vm_mm != mm) 1023 continue; 1024 list_del_init(&priv->list); 1025 1026 zap_vma_ptes(vma, vma->vm_start, 1027 vma->vm_end - vma->vm_start); 1028 vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); 1029 } 1030 mutex_unlock(&ufile->umap_lock); 1031 up_write(&mm->mmap_sem); 1032 mmput(mm); 1033 } 1034 } 1035 1036 /* 1037 * ib_uverbs_open() does not need the BKL: 1038 * 1039 * - the ib_uverbs_device structures are properly reference counted and 1040 * everything else is purely local to the file being created, so 1041 * races against other open calls are not a problem; 1042 * - there is no ioctl method to race against; 1043 * - the open method will either immediately run -ENXIO, or all 1044 * required initialization will be done. 1045 */ 1046 static int ib_uverbs_open(struct inode *inode, struct file *filp) 1047 { 1048 struct ib_uverbs_device *dev; 1049 struct ib_uverbs_file *file; 1050 struct ib_device *ib_dev; 1051 int ret; 1052 int module_dependent; 1053 int srcu_key; 1054 1055 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); 1056 if (!atomic_inc_not_zero(&dev->refcount)) 1057 return -ENXIO; 1058 1059 get_device(&dev->dev); 1060 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1061 mutex_lock(&dev->lists_mutex); 1062 ib_dev = srcu_dereference(dev->ib_dev, 1063 &dev->disassociate_srcu); 1064 if (!ib_dev) { 1065 ret = -EIO; 1066 goto err; 1067 } 1068 1069 /* In case IB device supports disassociate ucontext, there is no hard 1070 * dependency between uverbs device and its low level device. 1071 */ 1072 module_dependent = !(ib_dev->disassociate_ucontext); 1073 1074 if (module_dependent) { 1075 if (!try_module_get(ib_dev->owner)) { 1076 ret = -ENODEV; 1077 goto err; 1078 } 1079 } 1080 1081 file = kzalloc(sizeof(*file), GFP_KERNEL); 1082 if (!file) { 1083 ret = -ENOMEM; 1084 if (module_dependent) 1085 goto err_module; 1086 1087 goto err; 1088 } 1089 1090 file->device = dev; 1091 kref_init(&file->ref); 1092 mutex_init(&file->ucontext_lock); 1093 1094 spin_lock_init(&file->uobjects_lock); 1095 INIT_LIST_HEAD(&file->uobjects); 1096 init_rwsem(&file->hw_destroy_rwsem); 1097 mutex_init(&file->umap_lock); 1098 INIT_LIST_HEAD(&file->umaps); 1099 1100 filp->private_data = file; 1101 list_add_tail(&file->list, &dev->uverbs_file_list); 1102 mutex_unlock(&dev->lists_mutex); 1103 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1104 1105 file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask; 1106 file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask; 1107 1108 setup_ufile_idr_uobject(file); 1109 1110 return nonseekable_open(inode, filp); 1111 1112 err_module: 1113 module_put(ib_dev->owner); 1114 1115 err: 1116 mutex_unlock(&dev->lists_mutex); 1117 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1118 if (atomic_dec_and_test(&dev->refcount)) 1119 ib_uverbs_comp_dev(dev); 1120 1121 put_device(&dev->dev); 1122 return ret; 1123 } 1124 1125 static int ib_uverbs_close(struct inode *inode, struct file *filp) 1126 { 1127 struct ib_uverbs_file *file = filp->private_data; 1128 1129 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); 1130 1131 mutex_lock(&file->device->lists_mutex); 1132 list_del_init(&file->list); 1133 mutex_unlock(&file->device->lists_mutex); 1134 1135 if (file->async_file) 1136 kref_put(&file->async_file->ref, 1137 ib_uverbs_release_async_event_file); 1138 1139 kref_put(&file->ref, ib_uverbs_release_file); 1140 1141 return 0; 1142 } 1143 1144 static const struct file_operations uverbs_fops = { 1145 .owner = THIS_MODULE, 1146 .write = ib_uverbs_write, 1147 .open = ib_uverbs_open, 1148 .release = ib_uverbs_close, 1149 .llseek = no_llseek, 1150 .unlocked_ioctl = ib_uverbs_ioctl, 1151 .compat_ioctl = ib_uverbs_ioctl, 1152 }; 1153 1154 static const struct file_operations uverbs_mmap_fops = { 1155 .owner = THIS_MODULE, 1156 .write = ib_uverbs_write, 1157 .mmap = ib_uverbs_mmap, 1158 .open = ib_uverbs_open, 1159 .release = ib_uverbs_close, 1160 .llseek = no_llseek, 1161 .unlocked_ioctl = ib_uverbs_ioctl, 1162 .compat_ioctl = ib_uverbs_ioctl, 1163 }; 1164 1165 static struct ib_client uverbs_client = { 1166 .name = "uverbs", 1167 .add = ib_uverbs_add_one, 1168 .remove = ib_uverbs_remove_one 1169 }; 1170 1171 static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, 1172 char *buf) 1173 { 1174 struct ib_uverbs_device *dev = 1175 container_of(device, struct ib_uverbs_device, dev); 1176 int ret = -ENODEV; 1177 int srcu_key; 1178 struct ib_device *ib_dev; 1179 1180 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1181 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1182 if (ib_dev) 1183 ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); 1184 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1185 1186 return ret; 1187 } 1188 static DEVICE_ATTR_RO(ibdev); 1189 1190 static ssize_t abi_version_show(struct device *device, 1191 struct device_attribute *attr, char *buf) 1192 { 1193 struct ib_uverbs_device *dev = 1194 container_of(device, struct ib_uverbs_device, dev); 1195 int ret = -ENODEV; 1196 int srcu_key; 1197 struct ib_device *ib_dev; 1198 1199 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1200 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1201 if (ib_dev) 1202 ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); 1203 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1204 1205 return ret; 1206 } 1207 static DEVICE_ATTR_RO(abi_version); 1208 1209 static struct attribute *ib_dev_attrs[] = { 1210 &dev_attr_abi_version.attr, 1211 &dev_attr_ibdev.attr, 1212 NULL, 1213 }; 1214 1215 static const struct attribute_group dev_attr_group = { 1216 .attrs = ib_dev_attrs, 1217 }; 1218 1219 static CLASS_ATTR_STRING(abi_version, S_IRUGO, 1220 __stringify(IB_USER_VERBS_ABI_VERSION)); 1221 1222 static int ib_uverbs_create_uapi(struct ib_device *device, 1223 struct ib_uverbs_device *uverbs_dev) 1224 { 1225 struct uverbs_api *uapi; 1226 1227 uapi = uverbs_alloc_api(device->driver_specs, device->driver_id); 1228 if (IS_ERR(uapi)) 1229 return PTR_ERR(uapi); 1230 1231 uverbs_dev->uapi = uapi; 1232 return 0; 1233 } 1234 1235 static void ib_uverbs_add_one(struct ib_device *device) 1236 { 1237 int devnum; 1238 dev_t base; 1239 struct ib_uverbs_device *uverbs_dev; 1240 int ret; 1241 1242 if (!device->alloc_ucontext) 1243 return; 1244 1245 uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); 1246 if (!uverbs_dev) 1247 return; 1248 1249 ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); 1250 if (ret) { 1251 kfree(uverbs_dev); 1252 return; 1253 } 1254 1255 device_initialize(&uverbs_dev->dev); 1256 uverbs_dev->dev.class = uverbs_class; 1257 uverbs_dev->dev.parent = device->dev.parent; 1258 uverbs_dev->dev.release = ib_uverbs_release_dev; 1259 uverbs_dev->groups[0] = &dev_attr_group; 1260 uverbs_dev->dev.groups = uverbs_dev->groups; 1261 atomic_set(&uverbs_dev->refcount, 1); 1262 init_completion(&uverbs_dev->comp); 1263 uverbs_dev->xrcd_tree = RB_ROOT; 1264 mutex_init(&uverbs_dev->xrcd_tree_mutex); 1265 mutex_init(&uverbs_dev->lists_mutex); 1266 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); 1267 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); 1268 rcu_assign_pointer(uverbs_dev->ib_dev, device); 1269 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 1270 1271 devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, 1272 GFP_KERNEL); 1273 if (devnum < 0) 1274 goto err; 1275 uverbs_dev->devnum = devnum; 1276 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) 1277 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; 1278 else 1279 base = IB_UVERBS_BASE_DEV + devnum; 1280 1281 if (ib_uverbs_create_uapi(device, uverbs_dev)) 1282 goto err_uapi; 1283 1284 uverbs_dev->dev.devt = base; 1285 dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); 1286 1287 cdev_init(&uverbs_dev->cdev, 1288 device->mmap ? &uverbs_mmap_fops : &uverbs_fops); 1289 uverbs_dev->cdev.owner = THIS_MODULE; 1290 1291 ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); 1292 if (ret) 1293 goto err_uapi; 1294 1295 ib_set_client_data(device, &uverbs_client, uverbs_dev); 1296 return; 1297 1298 err_uapi: 1299 ida_free(&uverbs_ida, devnum); 1300 err: 1301 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1302 ib_uverbs_comp_dev(uverbs_dev); 1303 wait_for_completion(&uverbs_dev->comp); 1304 put_device(&uverbs_dev->dev); 1305 return; 1306 } 1307 1308 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, 1309 struct ib_device *ib_dev) 1310 { 1311 struct ib_uverbs_file *file; 1312 struct ib_uverbs_async_event_file *event_file; 1313 struct ib_event event; 1314 1315 /* Pending running commands to terminate */ 1316 uverbs_disassociate_api_pre(uverbs_dev); 1317 event.event = IB_EVENT_DEVICE_FATAL; 1318 event.element.port_num = 0; 1319 event.device = ib_dev; 1320 1321 mutex_lock(&uverbs_dev->lists_mutex); 1322 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1323 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1324 struct ib_uverbs_file, list); 1325 list_del_init(&file->list); 1326 kref_get(&file->ref); 1327 1328 /* We must release the mutex before going ahead and calling 1329 * uverbs_cleanup_ufile, as it might end up indirectly calling 1330 * uverbs_close, for example due to freeing the resources (e.g 1331 * mmput). 1332 */ 1333 mutex_unlock(&uverbs_dev->lists_mutex); 1334 1335 ib_uverbs_event_handler(&file->event_handler, &event); 1336 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); 1337 kref_put(&file->ref, ib_uverbs_release_file); 1338 1339 mutex_lock(&uverbs_dev->lists_mutex); 1340 } 1341 1342 while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { 1343 event_file = list_first_entry(&uverbs_dev-> 1344 uverbs_events_file_list, 1345 struct ib_uverbs_async_event_file, 1346 list); 1347 spin_lock_irq(&event_file->ev_queue.lock); 1348 event_file->ev_queue.is_closed = 1; 1349 spin_unlock_irq(&event_file->ev_queue.lock); 1350 1351 list_del(&event_file->list); 1352 ib_unregister_event_handler( 1353 &event_file->uverbs_file->event_handler); 1354 event_file->uverbs_file->event_handler.device = 1355 NULL; 1356 1357 wake_up_interruptible(&event_file->ev_queue.poll_wait); 1358 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); 1359 } 1360 mutex_unlock(&uverbs_dev->lists_mutex); 1361 1362 uverbs_disassociate_api(uverbs_dev->uapi); 1363 } 1364 1365 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) 1366 { 1367 struct ib_uverbs_device *uverbs_dev = client_data; 1368 int wait_clients = 1; 1369 1370 if (!uverbs_dev) 1371 return; 1372 1373 cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); 1374 ida_free(&uverbs_ida, uverbs_dev->devnum); 1375 1376 if (device->disassociate_ucontext) { 1377 /* We disassociate HW resources and immediately return. 1378 * Userspace will see a EIO errno for all future access. 1379 * Upon returning, ib_device may be freed internally and is not 1380 * valid any more. 1381 * uverbs_device is still available until all clients close 1382 * their files, then the uverbs device ref count will be zero 1383 * and its resources will be freed. 1384 * Note: At this point no more files can be opened since the 1385 * cdev was deleted, however active clients can still issue 1386 * commands and close their open files. 1387 */ 1388 ib_uverbs_free_hw_resources(uverbs_dev, device); 1389 wait_clients = 0; 1390 } 1391 1392 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1393 ib_uverbs_comp_dev(uverbs_dev); 1394 if (wait_clients) 1395 wait_for_completion(&uverbs_dev->comp); 1396 1397 put_device(&uverbs_dev->dev); 1398 } 1399 1400 static char *uverbs_devnode(struct device *dev, umode_t *mode) 1401 { 1402 if (mode) 1403 *mode = 0666; 1404 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); 1405 } 1406 1407 static int __init ib_uverbs_init(void) 1408 { 1409 int ret; 1410 1411 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, 1412 IB_UVERBS_NUM_FIXED_MINOR, 1413 "infiniband_verbs"); 1414 if (ret) { 1415 pr_err("user_verbs: couldn't register device number\n"); 1416 goto out; 1417 } 1418 1419 ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0, 1420 IB_UVERBS_NUM_DYNAMIC_MINOR, 1421 "infiniband_verbs"); 1422 if (ret) { 1423 pr_err("couldn't register dynamic device number\n"); 1424 goto out_alloc; 1425 } 1426 1427 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); 1428 if (IS_ERR(uverbs_class)) { 1429 ret = PTR_ERR(uverbs_class); 1430 pr_err("user_verbs: couldn't create class infiniband_verbs\n"); 1431 goto out_chrdev; 1432 } 1433 1434 uverbs_class->devnode = uverbs_devnode; 1435 1436 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); 1437 if (ret) { 1438 pr_err("user_verbs: couldn't create abi_version attribute\n"); 1439 goto out_class; 1440 } 1441 1442 ret = ib_register_client(&uverbs_client); 1443 if (ret) { 1444 pr_err("user_verbs: couldn't register client\n"); 1445 goto out_class; 1446 } 1447 1448 return 0; 1449 1450 out_class: 1451 class_destroy(uverbs_class); 1452 1453 out_chrdev: 1454 unregister_chrdev_region(dynamic_uverbs_dev, 1455 IB_UVERBS_NUM_DYNAMIC_MINOR); 1456 1457 out_alloc: 1458 unregister_chrdev_region(IB_UVERBS_BASE_DEV, 1459 IB_UVERBS_NUM_FIXED_MINOR); 1460 1461 out: 1462 return ret; 1463 } 1464 1465 static void __exit ib_uverbs_cleanup(void) 1466 { 1467 ib_unregister_client(&uverbs_client); 1468 class_destroy(uverbs_class); 1469 unregister_chrdev_region(IB_UVERBS_BASE_DEV, 1470 IB_UVERBS_NUM_FIXED_MINOR); 1471 unregister_chrdev_region(dynamic_uverbs_dev, 1472 IB_UVERBS_NUM_DYNAMIC_MINOR); 1473 } 1474 1475 module_init(ib_uverbs_init); 1476 module_exit(ib_uverbs_cleanup); 1477