1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 */ 36 37 #include <linux/module.h> 38 #include <linux/init.h> 39 #include <linux/device.h> 40 #include <linux/err.h> 41 #include <linux/fs.h> 42 #include <linux/poll.h> 43 #include <linux/sched.h> 44 #include <linux/file.h> 45 #include <linux/cdev.h> 46 #include <linux/anon_inodes.h> 47 #include <linux/slab.h> 48 49 #include <asm/uaccess.h> 50 51 #include "uverbs.h" 52 53 MODULE_AUTHOR("Roland Dreier"); 54 MODULE_DESCRIPTION("InfiniBand userspace verbs access"); 55 MODULE_LICENSE("Dual BSD/GPL"); 56 57 enum { 58 IB_UVERBS_MAJOR = 231, 59 IB_UVERBS_BASE_MINOR = 192, 60 IB_UVERBS_MAX_DEVICES = 32 61 }; 62 63 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) 64 65 static struct class *uverbs_class; 66 67 DEFINE_SPINLOCK(ib_uverbs_idr_lock); 68 DEFINE_IDR(ib_uverbs_pd_idr); 69 DEFINE_IDR(ib_uverbs_mr_idr); 70 DEFINE_IDR(ib_uverbs_mw_idr); 71 DEFINE_IDR(ib_uverbs_ah_idr); 72 DEFINE_IDR(ib_uverbs_cq_idr); 73 DEFINE_IDR(ib_uverbs_qp_idr); 74 DEFINE_IDR(ib_uverbs_srq_idr); 75 DEFINE_IDR(ib_uverbs_xrcd_idr); 76 DEFINE_IDR(ib_uverbs_rule_idr); 77 78 static DEFINE_SPINLOCK(map_lock); 79 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 80 81 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 82 const char __user *buf, int in_len, 83 int out_len) = { 84 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, 85 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, 86 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, 87 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, 88 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, 89 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, 90 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, 91 [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, 92 [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, 93 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, 94 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, 95 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, 96 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, 97 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, 98 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, 99 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, 100 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, 101 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, 102 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, 103 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, 104 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, 105 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, 106 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, 107 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, 108 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, 109 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, 110 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, 111 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, 112 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, 113 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, 114 [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, 115 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, 116 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, 117 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, 118 #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 119 [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow, 120 [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow 121 #endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 122 }; 123 124 static void ib_uverbs_add_one(struct ib_device *device); 125 static void ib_uverbs_remove_one(struct ib_device *device); 126 127 static void ib_uverbs_release_dev(struct kref *ref) 128 { 129 struct ib_uverbs_device *dev = 130 container_of(ref, struct ib_uverbs_device, ref); 131 132 complete(&dev->comp); 133 } 134 135 static void ib_uverbs_release_event_file(struct kref *ref) 136 { 137 struct ib_uverbs_event_file *file = 138 container_of(ref, struct ib_uverbs_event_file, ref); 139 140 kfree(file); 141 } 142 143 void ib_uverbs_release_ucq(struct ib_uverbs_file *file, 144 struct ib_uverbs_event_file *ev_file, 145 struct ib_ucq_object *uobj) 146 { 147 struct ib_uverbs_event *evt, *tmp; 148 149 if (ev_file) { 150 spin_lock_irq(&ev_file->lock); 151 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { 152 list_del(&evt->list); 153 kfree(evt); 154 } 155 spin_unlock_irq(&ev_file->lock); 156 157 kref_put(&ev_file->ref, ib_uverbs_release_event_file); 158 } 159 160 spin_lock_irq(&file->async_file->lock); 161 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { 162 list_del(&evt->list); 163 kfree(evt); 164 } 165 spin_unlock_irq(&file->async_file->lock); 166 } 167 168 void ib_uverbs_release_uevent(struct ib_uverbs_file *file, 169 struct ib_uevent_object *uobj) 170 { 171 struct ib_uverbs_event *evt, *tmp; 172 173 spin_lock_irq(&file->async_file->lock); 174 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { 175 list_del(&evt->list); 176 kfree(evt); 177 } 178 spin_unlock_irq(&file->async_file->lock); 179 } 180 181 static void ib_uverbs_detach_umcast(struct ib_qp *qp, 182 struct ib_uqp_object *uobj) 183 { 184 struct ib_uverbs_mcast_entry *mcast, *tmp; 185 186 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { 187 ib_detach_mcast(qp, &mcast->gid, mcast->lid); 188 list_del(&mcast->list); 189 kfree(mcast); 190 } 191 } 192 193 static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, 194 struct ib_ucontext *context) 195 { 196 struct ib_uobject *uobj, *tmp; 197 198 if (!context) 199 return 0; 200 201 context->closing = 1; 202 203 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { 204 struct ib_ah *ah = uobj->object; 205 206 idr_remove_uobj(&ib_uverbs_ah_idr, uobj); 207 ib_destroy_ah(ah); 208 kfree(uobj); 209 } 210 211 /* Remove MWs before QPs, in order to support type 2A MWs. */ 212 list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { 213 struct ib_mw *mw = uobj->object; 214 215 idr_remove_uobj(&ib_uverbs_mw_idr, uobj); 216 ib_dealloc_mw(mw); 217 kfree(uobj); 218 } 219 220 list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { 221 struct ib_flow *flow_id = uobj->object; 222 223 idr_remove_uobj(&ib_uverbs_rule_idr, uobj); 224 ib_destroy_flow(flow_id); 225 kfree(uobj); 226 } 227 228 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { 229 struct ib_qp *qp = uobj->object; 230 struct ib_uqp_object *uqp = 231 container_of(uobj, struct ib_uqp_object, uevent.uobject); 232 233 idr_remove_uobj(&ib_uverbs_qp_idr, uobj); 234 if (qp != qp->real_qp) { 235 ib_close_qp(qp); 236 } else { 237 ib_uverbs_detach_umcast(qp, uqp); 238 ib_destroy_qp(qp); 239 } 240 ib_uverbs_release_uevent(file, &uqp->uevent); 241 kfree(uqp); 242 } 243 244 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { 245 struct ib_cq *cq = uobj->object; 246 struct ib_uverbs_event_file *ev_file = cq->cq_context; 247 struct ib_ucq_object *ucq = 248 container_of(uobj, struct ib_ucq_object, uobject); 249 250 idr_remove_uobj(&ib_uverbs_cq_idr, uobj); 251 ib_destroy_cq(cq); 252 ib_uverbs_release_ucq(file, ev_file, ucq); 253 kfree(ucq); 254 } 255 256 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { 257 struct ib_srq *srq = uobj->object; 258 struct ib_uevent_object *uevent = 259 container_of(uobj, struct ib_uevent_object, uobject); 260 261 idr_remove_uobj(&ib_uverbs_srq_idr, uobj); 262 ib_destroy_srq(srq); 263 ib_uverbs_release_uevent(file, uevent); 264 kfree(uevent); 265 } 266 267 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { 268 struct ib_mr *mr = uobj->object; 269 270 idr_remove_uobj(&ib_uverbs_mr_idr, uobj); 271 ib_dereg_mr(mr); 272 kfree(uobj); 273 } 274 275 mutex_lock(&file->device->xrcd_tree_mutex); 276 list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { 277 struct ib_xrcd *xrcd = uobj->object; 278 struct ib_uxrcd_object *uxrcd = 279 container_of(uobj, struct ib_uxrcd_object, uobject); 280 281 idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); 282 ib_uverbs_dealloc_xrcd(file->device, xrcd); 283 kfree(uxrcd); 284 } 285 mutex_unlock(&file->device->xrcd_tree_mutex); 286 287 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { 288 struct ib_pd *pd = uobj->object; 289 290 idr_remove_uobj(&ib_uverbs_pd_idr, uobj); 291 ib_dealloc_pd(pd); 292 kfree(uobj); 293 } 294 295 return context->device->dealloc_ucontext(context); 296 } 297 298 static void ib_uverbs_release_file(struct kref *ref) 299 { 300 struct ib_uverbs_file *file = 301 container_of(ref, struct ib_uverbs_file, ref); 302 303 module_put(file->device->ib_dev->owner); 304 kref_put(&file->device->ref, ib_uverbs_release_dev); 305 306 kfree(file); 307 } 308 309 static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, 310 size_t count, loff_t *pos) 311 { 312 struct ib_uverbs_event_file *file = filp->private_data; 313 struct ib_uverbs_event *event; 314 int eventsz; 315 int ret = 0; 316 317 spin_lock_irq(&file->lock); 318 319 while (list_empty(&file->event_list)) { 320 spin_unlock_irq(&file->lock); 321 322 if (filp->f_flags & O_NONBLOCK) 323 return -EAGAIN; 324 325 if (wait_event_interruptible(file->poll_wait, 326 !list_empty(&file->event_list))) 327 return -ERESTARTSYS; 328 329 spin_lock_irq(&file->lock); 330 } 331 332 event = list_entry(file->event_list.next, struct ib_uverbs_event, list); 333 334 if (file->is_async) 335 eventsz = sizeof (struct ib_uverbs_async_event_desc); 336 else 337 eventsz = sizeof (struct ib_uverbs_comp_event_desc); 338 339 if (eventsz > count) { 340 ret = -EINVAL; 341 event = NULL; 342 } else { 343 list_del(file->event_list.next); 344 if (event->counter) { 345 ++(*event->counter); 346 list_del(&event->obj_list); 347 } 348 } 349 350 spin_unlock_irq(&file->lock); 351 352 if (event) { 353 if (copy_to_user(buf, event, eventsz)) 354 ret = -EFAULT; 355 else 356 ret = eventsz; 357 } 358 359 kfree(event); 360 361 return ret; 362 } 363 364 static unsigned int ib_uverbs_event_poll(struct file *filp, 365 struct poll_table_struct *wait) 366 { 367 unsigned int pollflags = 0; 368 struct ib_uverbs_event_file *file = filp->private_data; 369 370 poll_wait(filp, &file->poll_wait, wait); 371 372 spin_lock_irq(&file->lock); 373 if (!list_empty(&file->event_list)) 374 pollflags = POLLIN | POLLRDNORM; 375 spin_unlock_irq(&file->lock); 376 377 return pollflags; 378 } 379 380 static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) 381 { 382 struct ib_uverbs_event_file *file = filp->private_data; 383 384 return fasync_helper(fd, filp, on, &file->async_queue); 385 } 386 387 static int ib_uverbs_event_close(struct inode *inode, struct file *filp) 388 { 389 struct ib_uverbs_event_file *file = filp->private_data; 390 struct ib_uverbs_event *entry, *tmp; 391 392 spin_lock_irq(&file->lock); 393 file->is_closed = 1; 394 list_for_each_entry_safe(entry, tmp, &file->event_list, list) { 395 if (entry->counter) 396 list_del(&entry->obj_list); 397 kfree(entry); 398 } 399 spin_unlock_irq(&file->lock); 400 401 if (file->is_async) { 402 ib_unregister_event_handler(&file->uverbs_file->event_handler); 403 kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); 404 } 405 kref_put(&file->ref, ib_uverbs_release_event_file); 406 407 return 0; 408 } 409 410 static const struct file_operations uverbs_event_fops = { 411 .owner = THIS_MODULE, 412 .read = ib_uverbs_event_read, 413 .poll = ib_uverbs_event_poll, 414 .release = ib_uverbs_event_close, 415 .fasync = ib_uverbs_event_fasync, 416 .llseek = no_llseek, 417 }; 418 419 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) 420 { 421 struct ib_uverbs_event_file *file = cq_context; 422 struct ib_ucq_object *uobj; 423 struct ib_uverbs_event *entry; 424 unsigned long flags; 425 426 if (!file) 427 return; 428 429 spin_lock_irqsave(&file->lock, flags); 430 if (file->is_closed) { 431 spin_unlock_irqrestore(&file->lock, flags); 432 return; 433 } 434 435 entry = kmalloc(sizeof *entry, GFP_ATOMIC); 436 if (!entry) { 437 spin_unlock_irqrestore(&file->lock, flags); 438 return; 439 } 440 441 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); 442 443 entry->desc.comp.cq_handle = cq->uobject->user_handle; 444 entry->counter = &uobj->comp_events_reported; 445 446 list_add_tail(&entry->list, &file->event_list); 447 list_add_tail(&entry->obj_list, &uobj->comp_list); 448 spin_unlock_irqrestore(&file->lock, flags); 449 450 wake_up_interruptible(&file->poll_wait); 451 kill_fasync(&file->async_queue, SIGIO, POLL_IN); 452 } 453 454 static void ib_uverbs_async_handler(struct ib_uverbs_file *file, 455 __u64 element, __u64 event, 456 struct list_head *obj_list, 457 u32 *counter) 458 { 459 struct ib_uverbs_event *entry; 460 unsigned long flags; 461 462 spin_lock_irqsave(&file->async_file->lock, flags); 463 if (file->async_file->is_closed) { 464 spin_unlock_irqrestore(&file->async_file->lock, flags); 465 return; 466 } 467 468 entry = kmalloc(sizeof *entry, GFP_ATOMIC); 469 if (!entry) { 470 spin_unlock_irqrestore(&file->async_file->lock, flags); 471 return; 472 } 473 474 entry->desc.async.element = element; 475 entry->desc.async.event_type = event; 476 entry->counter = counter; 477 478 list_add_tail(&entry->list, &file->async_file->event_list); 479 if (obj_list) 480 list_add_tail(&entry->obj_list, obj_list); 481 spin_unlock_irqrestore(&file->async_file->lock, flags); 482 483 wake_up_interruptible(&file->async_file->poll_wait); 484 kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); 485 } 486 487 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) 488 { 489 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, 490 struct ib_ucq_object, uobject); 491 492 ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, 493 event->event, &uobj->async_list, 494 &uobj->async_events_reported); 495 } 496 497 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) 498 { 499 struct ib_uevent_object *uobj; 500 501 uobj = container_of(event->element.qp->uobject, 502 struct ib_uevent_object, uobject); 503 504 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 505 event->event, &uobj->event_list, 506 &uobj->events_reported); 507 } 508 509 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) 510 { 511 struct ib_uevent_object *uobj; 512 513 uobj = container_of(event->element.srq->uobject, 514 struct ib_uevent_object, uobject); 515 516 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 517 event->event, &uobj->event_list, 518 &uobj->events_reported); 519 } 520 521 void ib_uverbs_event_handler(struct ib_event_handler *handler, 522 struct ib_event *event) 523 { 524 struct ib_uverbs_file *file = 525 container_of(handler, struct ib_uverbs_file, event_handler); 526 527 ib_uverbs_async_handler(file, event->element.port_num, event->event, 528 NULL, NULL); 529 } 530 531 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, 532 int is_async) 533 { 534 struct ib_uverbs_event_file *ev_file; 535 struct file *filp; 536 537 ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); 538 if (!ev_file) 539 return ERR_PTR(-ENOMEM); 540 541 kref_init(&ev_file->ref); 542 spin_lock_init(&ev_file->lock); 543 INIT_LIST_HEAD(&ev_file->event_list); 544 init_waitqueue_head(&ev_file->poll_wait); 545 ev_file->uverbs_file = uverbs_file; 546 ev_file->async_queue = NULL; 547 ev_file->is_async = is_async; 548 ev_file->is_closed = 0; 549 550 filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops, 551 ev_file, O_RDONLY); 552 if (IS_ERR(filp)) 553 kfree(ev_file); 554 555 return filp; 556 } 557 558 /* 559 * Look up a completion event file by FD. If lookup is successful, 560 * takes a ref to the event file struct that it returns; if 561 * unsuccessful, returns NULL. 562 */ 563 struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) 564 { 565 struct ib_uverbs_event_file *ev_file = NULL; 566 struct fd f = fdget(fd); 567 568 if (!f.file) 569 return NULL; 570 571 if (f.file->f_op != &uverbs_event_fops) 572 goto out; 573 574 ev_file = f.file->private_data; 575 if (ev_file->is_async) { 576 ev_file = NULL; 577 goto out; 578 } 579 580 kref_get(&ev_file->ref); 581 582 out: 583 fdput(f); 584 return ev_file; 585 } 586 587 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, 588 size_t count, loff_t *pos) 589 { 590 struct ib_uverbs_file *file = filp->private_data; 591 struct ib_uverbs_cmd_hdr hdr; 592 593 if (count < sizeof hdr) 594 return -EINVAL; 595 596 if (copy_from_user(&hdr, buf, sizeof hdr)) 597 return -EFAULT; 598 599 if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || 600 !uverbs_cmd_table[hdr.command]) 601 return -EINVAL; 602 603 if (!file->ucontext && 604 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) 605 return -EINVAL; 606 607 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) 608 return -ENOSYS; 609 610 #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 611 if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) { 612 struct ib_uverbs_cmd_hdr_ex hdr_ex; 613 614 if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex))) 615 return -EFAULT; 616 617 if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count) 618 return -EINVAL; 619 620 return uverbs_cmd_table[hdr.command](file, 621 buf + sizeof(hdr_ex), 622 (hdr_ex.in_words + 623 hdr_ex.provider_in_words) * 4, 624 (hdr_ex.out_words + 625 hdr_ex.provider_out_words) * 4); 626 } else { 627 #endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 628 if (hdr.in_words * 4 != count) 629 return -EINVAL; 630 631 return uverbs_cmd_table[hdr.command](file, 632 buf + sizeof(hdr), 633 hdr.in_words * 4, 634 hdr.out_words * 4); 635 #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 636 } 637 #endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 638 } 639 640 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 641 { 642 struct ib_uverbs_file *file = filp->private_data; 643 644 if (!file->ucontext) 645 return -ENODEV; 646 else 647 return file->device->ib_dev->mmap(file->ucontext, vma); 648 } 649 650 /* 651 * ib_uverbs_open() does not need the BKL: 652 * 653 * - the ib_uverbs_device structures are properly reference counted and 654 * everything else is purely local to the file being created, so 655 * races against other open calls are not a problem; 656 * - there is no ioctl method to race against; 657 * - the open method will either immediately run -ENXIO, or all 658 * required initialization will be done. 659 */ 660 static int ib_uverbs_open(struct inode *inode, struct file *filp) 661 { 662 struct ib_uverbs_device *dev; 663 struct ib_uverbs_file *file; 664 int ret; 665 666 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); 667 if (dev) 668 kref_get(&dev->ref); 669 else 670 return -ENXIO; 671 672 if (!try_module_get(dev->ib_dev->owner)) { 673 ret = -ENODEV; 674 goto err; 675 } 676 677 file = kmalloc(sizeof *file, GFP_KERNEL); 678 if (!file) { 679 ret = -ENOMEM; 680 goto err_module; 681 } 682 683 file->device = dev; 684 file->ucontext = NULL; 685 file->async_file = NULL; 686 kref_init(&file->ref); 687 mutex_init(&file->mutex); 688 689 filp->private_data = file; 690 691 return nonseekable_open(inode, filp); 692 693 err_module: 694 module_put(dev->ib_dev->owner); 695 696 err: 697 kref_put(&dev->ref, ib_uverbs_release_dev); 698 return ret; 699 } 700 701 static int ib_uverbs_close(struct inode *inode, struct file *filp) 702 { 703 struct ib_uverbs_file *file = filp->private_data; 704 705 ib_uverbs_cleanup_ucontext(file, file->ucontext); 706 707 if (file->async_file) 708 kref_put(&file->async_file->ref, ib_uverbs_release_event_file); 709 710 kref_put(&file->ref, ib_uverbs_release_file); 711 712 return 0; 713 } 714 715 static const struct file_operations uverbs_fops = { 716 .owner = THIS_MODULE, 717 .write = ib_uverbs_write, 718 .open = ib_uverbs_open, 719 .release = ib_uverbs_close, 720 .llseek = no_llseek, 721 }; 722 723 static const struct file_operations uverbs_mmap_fops = { 724 .owner = THIS_MODULE, 725 .write = ib_uverbs_write, 726 .mmap = ib_uverbs_mmap, 727 .open = ib_uverbs_open, 728 .release = ib_uverbs_close, 729 .llseek = no_llseek, 730 }; 731 732 static struct ib_client uverbs_client = { 733 .name = "uverbs", 734 .add = ib_uverbs_add_one, 735 .remove = ib_uverbs_remove_one 736 }; 737 738 static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, 739 char *buf) 740 { 741 struct ib_uverbs_device *dev = dev_get_drvdata(device); 742 743 if (!dev) 744 return -ENODEV; 745 746 return sprintf(buf, "%s\n", dev->ib_dev->name); 747 } 748 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 749 750 static ssize_t show_dev_abi_version(struct device *device, 751 struct device_attribute *attr, char *buf) 752 { 753 struct ib_uverbs_device *dev = dev_get_drvdata(device); 754 755 if (!dev) 756 return -ENODEV; 757 758 return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); 759 } 760 static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); 761 762 static CLASS_ATTR_STRING(abi_version, S_IRUGO, 763 __stringify(IB_USER_VERBS_ABI_VERSION)); 764 765 static dev_t overflow_maj; 766 static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); 767 768 /* 769 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by 770 * requesting a new major number and doubling the number of max devices we 771 * support. It's stupid, but simple. 772 */ 773 static int find_overflow_devnum(void) 774 { 775 int ret; 776 777 if (!overflow_maj) { 778 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, 779 "infiniband_verbs"); 780 if (ret) { 781 printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n"); 782 return ret; 783 } 784 } 785 786 ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); 787 if (ret >= IB_UVERBS_MAX_DEVICES) 788 return -1; 789 790 return ret; 791 } 792 793 static void ib_uverbs_add_one(struct ib_device *device) 794 { 795 int devnum; 796 dev_t base; 797 struct ib_uverbs_device *uverbs_dev; 798 799 if (!device->alloc_ucontext) 800 return; 801 802 uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); 803 if (!uverbs_dev) 804 return; 805 806 kref_init(&uverbs_dev->ref); 807 init_completion(&uverbs_dev->comp); 808 uverbs_dev->xrcd_tree = RB_ROOT; 809 mutex_init(&uverbs_dev->xrcd_tree_mutex); 810 811 spin_lock(&map_lock); 812 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); 813 if (devnum >= IB_UVERBS_MAX_DEVICES) { 814 spin_unlock(&map_lock); 815 devnum = find_overflow_devnum(); 816 if (devnum < 0) 817 goto err; 818 819 spin_lock(&map_lock); 820 uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; 821 base = devnum + overflow_maj; 822 set_bit(devnum, overflow_map); 823 } else { 824 uverbs_dev->devnum = devnum; 825 base = devnum + IB_UVERBS_BASE_DEV; 826 set_bit(devnum, dev_map); 827 } 828 spin_unlock(&map_lock); 829 830 uverbs_dev->ib_dev = device; 831 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 832 833 cdev_init(&uverbs_dev->cdev, NULL); 834 uverbs_dev->cdev.owner = THIS_MODULE; 835 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; 836 kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); 837 if (cdev_add(&uverbs_dev->cdev, base, 1)) 838 goto err_cdev; 839 840 uverbs_dev->dev = device_create(uverbs_class, device->dma_device, 841 uverbs_dev->cdev.dev, uverbs_dev, 842 "uverbs%d", uverbs_dev->devnum); 843 if (IS_ERR(uverbs_dev->dev)) 844 goto err_cdev; 845 846 if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) 847 goto err_class; 848 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) 849 goto err_class; 850 851 ib_set_client_data(device, &uverbs_client, uverbs_dev); 852 853 return; 854 855 err_class: 856 device_destroy(uverbs_class, uverbs_dev->cdev.dev); 857 858 err_cdev: 859 cdev_del(&uverbs_dev->cdev); 860 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) 861 clear_bit(devnum, dev_map); 862 else 863 clear_bit(devnum, overflow_map); 864 865 err: 866 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); 867 wait_for_completion(&uverbs_dev->comp); 868 kfree(uverbs_dev); 869 return; 870 } 871 872 static void ib_uverbs_remove_one(struct ib_device *device) 873 { 874 struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); 875 876 if (!uverbs_dev) 877 return; 878 879 dev_set_drvdata(uverbs_dev->dev, NULL); 880 device_destroy(uverbs_class, uverbs_dev->cdev.dev); 881 cdev_del(&uverbs_dev->cdev); 882 883 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) 884 clear_bit(uverbs_dev->devnum, dev_map); 885 else 886 clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); 887 888 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); 889 wait_for_completion(&uverbs_dev->comp); 890 kfree(uverbs_dev); 891 } 892 893 static char *uverbs_devnode(struct device *dev, umode_t *mode) 894 { 895 if (mode) 896 *mode = 0666; 897 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); 898 } 899 900 static int __init ib_uverbs_init(void) 901 { 902 int ret; 903 904 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, 905 "infiniband_verbs"); 906 if (ret) { 907 printk(KERN_ERR "user_verbs: couldn't register device number\n"); 908 goto out; 909 } 910 911 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); 912 if (IS_ERR(uverbs_class)) { 913 ret = PTR_ERR(uverbs_class); 914 printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); 915 goto out_chrdev; 916 } 917 918 uverbs_class->devnode = uverbs_devnode; 919 920 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); 921 if (ret) { 922 printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); 923 goto out_class; 924 } 925 926 ret = ib_register_client(&uverbs_client); 927 if (ret) { 928 printk(KERN_ERR "user_verbs: couldn't register client\n"); 929 goto out_class; 930 } 931 932 return 0; 933 934 out_class: 935 class_destroy(uverbs_class); 936 937 out_chrdev: 938 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 939 940 out: 941 return ret; 942 } 943 944 static void __exit ib_uverbs_cleanup(void) 945 { 946 ib_unregister_client(&uverbs_client); 947 class_destroy(uverbs_class); 948 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 949 if (overflow_maj) 950 unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); 951 idr_destroy(&ib_uverbs_pd_idr); 952 idr_destroy(&ib_uverbs_mr_idr); 953 idr_destroy(&ib_uverbs_mw_idr); 954 idr_destroy(&ib_uverbs_ah_idr); 955 idr_destroy(&ib_uverbs_cq_idr); 956 idr_destroy(&ib_uverbs_qp_idr); 957 idr_destroy(&ib_uverbs_srq_idr); 958 } 959 960 module_init(ib_uverbs_init); 961 module_exit(ib_uverbs_cleanup); 962