1 /* 2 * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/file.h> 34 #include <linux/anon_inodes.h> 35 #include <rdma/ib_verbs.h> 36 #include <rdma/uverbs_types.h> 37 #include <linux/rcupdate.h> 38 #include "uverbs.h" 39 #include "core_priv.h" 40 #include "rdma_core.h" 41 42 void uverbs_uobject_get(struct ib_uobject *uobject) 43 { 44 kref_get(&uobject->ref); 45 } 46 47 static void uverbs_uobject_free(struct kref *ref) 48 { 49 struct ib_uobject *uobj = 50 container_of(ref, struct ib_uobject, ref); 51 52 if (uobj->type->type_class->needs_kfree_rcu) 53 kfree_rcu(uobj, rcu); 54 else 55 kfree(uobj); 56 } 57 58 void uverbs_uobject_put(struct ib_uobject *uobject) 59 { 60 kref_put(&uobject->ref, uverbs_uobject_free); 61 } 62 63 static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) 64 { 65 /* 66 * When a shared access is required, we use a positive counter. Each 67 * shared access request checks that the value != -1 and increment it. 68 * Exclusive access is required for operations like write or destroy. 69 * In exclusive access mode, we check that the counter is zero (nobody 70 * claimed this object) and we set it to -1. Releasing a shared access 71 * lock is done simply by decreasing the counter. As for exclusive 72 * access locks, since only a single one of them is is allowed 73 * concurrently, setting the counter to zero is enough for releasing 74 * this lock. 75 */ 76 if (!exclusive) 77 return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ? 78 -EBUSY : 0; 79 80 /* lock is either WRITE or DESTROY - should be exclusive */ 81 return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; 82 } 83 84 static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, 85 const struct uverbs_obj_type *type) 86 { 87 struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); 88 89 if (!uobj) 90 return ERR_PTR(-ENOMEM); 91 /* 92 * user_handle should be filled by the handler, 93 * The object is added to the list in the commit stage. 94 */ 95 uobj->context = context; 96 uobj->type = type; 97 atomic_set(&uobj->usecnt, 0); 98 kref_init(&uobj->ref); 99 100 return uobj; 101 } 102 103 static int idr_add_uobj(struct ib_uobject *uobj) 104 { 105 int ret; 106 107 idr_preload(GFP_KERNEL); 108 spin_lock(&uobj->context->ufile->idr_lock); 109 110 /* 111 * We start with allocating an idr pointing to NULL. This represents an 112 * object which isn't initialized yet. We'll replace it later on with 113 * the real object once we commit. 114 */ 115 ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0, 116 min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); 117 if (ret >= 0) 118 uobj->id = ret; 119 120 spin_unlock(&uobj->context->ufile->idr_lock); 121 idr_preload_end(); 122 123 return ret < 0 ? ret : 0; 124 } 125 126 /* 127 * It only removes it from the uobjects list, uverbs_uobject_put() is still 128 * required. 129 */ 130 static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) 131 { 132 spin_lock(&uobj->context->ufile->idr_lock); 133 idr_remove(&uobj->context->ufile->idr, uobj->id); 134 spin_unlock(&uobj->context->ufile->idr_lock); 135 } 136 137 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ 138 static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, 139 struct ib_ucontext *ucontext, 140 int id, bool exclusive) 141 { 142 struct ib_uobject *uobj; 143 144 rcu_read_lock(); 145 /* object won't be released as we're protected in rcu */ 146 uobj = idr_find(&ucontext->ufile->idr, id); 147 if (!uobj) { 148 uobj = ERR_PTR(-ENOENT); 149 goto free; 150 } 151 152 uverbs_uobject_get(uobj); 153 free: 154 rcu_read_unlock(); 155 return uobj; 156 } 157 158 static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, 159 struct ib_ucontext *ucontext, 160 int id, bool exclusive) 161 { 162 struct file *f; 163 struct ib_uobject *uobject; 164 const struct uverbs_obj_fd_type *fd_type = 165 container_of(type, struct uverbs_obj_fd_type, type); 166 167 if (exclusive) 168 return ERR_PTR(-EOPNOTSUPP); 169 170 f = fget(id); 171 if (!f) 172 return ERR_PTR(-EBADF); 173 174 uobject = f->private_data; 175 /* 176 * fget(id) ensures we are not currently running uverbs_close_fd, 177 * and the caller is expected to ensure that uverbs_close_fd is never 178 * done while a call top lookup is possible. 179 */ 180 if (f->f_op != fd_type->fops) { 181 fput(f); 182 return ERR_PTR(-EBADF); 183 } 184 185 uverbs_uobject_get(uobject); 186 return uobject; 187 } 188 189 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, 190 struct ib_ucontext *ucontext, 191 int id, bool exclusive) 192 { 193 struct ib_uobject *uobj; 194 int ret; 195 196 uobj = type->type_class->lookup_get(type, ucontext, id, exclusive); 197 if (IS_ERR(uobj)) 198 return uobj; 199 200 if (uobj->type != type) { 201 ret = -EINVAL; 202 goto free; 203 } 204 205 ret = uverbs_try_lock_object(uobj, exclusive); 206 if (ret) { 207 WARN(ucontext->cleanup_reason, 208 "ib_uverbs: Trying to lookup_get while cleanup context\n"); 209 goto free; 210 } 211 212 return uobj; 213 free: 214 uobj->type->type_class->lookup_put(uobj, exclusive); 215 uverbs_uobject_put(uobj); 216 return ERR_PTR(ret); 217 } 218 219 static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, 220 struct ib_ucontext *ucontext) 221 { 222 int ret; 223 struct ib_uobject *uobj; 224 225 uobj = alloc_uobj(ucontext, type); 226 if (IS_ERR(uobj)) 227 return uobj; 228 229 ret = idr_add_uobj(uobj); 230 if (ret) 231 goto uobj_put; 232 233 ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device, 234 RDMACG_RESOURCE_HCA_OBJECT); 235 if (ret) 236 goto idr_remove; 237 238 return uobj; 239 240 idr_remove: 241 uverbs_idr_remove_uobj(uobj); 242 uobj_put: 243 uverbs_uobject_put(uobj); 244 return ERR_PTR(ret); 245 } 246 247 static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, 248 struct ib_ucontext *ucontext) 249 { 250 const struct uverbs_obj_fd_type *fd_type = 251 container_of(type, struct uverbs_obj_fd_type, type); 252 int new_fd; 253 struct ib_uobject *uobj; 254 struct ib_uobject_file *uobj_file; 255 struct file *filp; 256 257 new_fd = get_unused_fd_flags(O_CLOEXEC); 258 if (new_fd < 0) 259 return ERR_PTR(new_fd); 260 261 uobj = alloc_uobj(ucontext, type); 262 if (IS_ERR(uobj)) { 263 put_unused_fd(new_fd); 264 return uobj; 265 } 266 267 uobj_file = container_of(uobj, struct ib_uobject_file, uobj); 268 filp = anon_inode_getfile(fd_type->name, 269 fd_type->fops, 270 uobj_file, 271 fd_type->flags); 272 if (IS_ERR(filp)) { 273 put_unused_fd(new_fd); 274 uverbs_uobject_put(uobj); 275 return (void *)filp; 276 } 277 278 uobj_file->uobj.id = new_fd; 279 uobj_file->uobj.object = filp; 280 uobj_file->ufile = ucontext->ufile; 281 INIT_LIST_HEAD(&uobj->list); 282 kref_get(&uobj_file->ufile->ref); 283 284 return uobj; 285 } 286 287 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, 288 struct ib_ucontext *ucontext) 289 { 290 return type->type_class->alloc_begin(type, ucontext); 291 } 292 293 static void uverbs_uobject_add(struct ib_uobject *uobject) 294 { 295 mutex_lock(&uobject->context->uobjects_lock); 296 list_add(&uobject->list, &uobject->context->uobjects); 297 mutex_unlock(&uobject->context->uobjects_lock); 298 } 299 300 static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, 301 enum rdma_remove_reason why) 302 { 303 const struct uverbs_obj_idr_type *idr_type = 304 container_of(uobj->type, struct uverbs_obj_idr_type, 305 type); 306 int ret = idr_type->destroy_object(uobj, why); 307 308 /* 309 * We can only fail gracefully if the user requested to destroy the 310 * object. In the rest of the cases, just remove whatever you can. 311 */ 312 if (why == RDMA_REMOVE_DESTROY && ret) 313 return ret; 314 315 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, 316 RDMACG_RESOURCE_HCA_OBJECT); 317 uverbs_idr_remove_uobj(uobj); 318 319 return ret; 320 } 321 322 static void alloc_abort_fd_uobject(struct ib_uobject *uobj) 323 { 324 struct ib_uobject_file *uobj_file = 325 container_of(uobj, struct ib_uobject_file, uobj); 326 struct file *filp = uobj->object; 327 int id = uobj_file->uobj.id; 328 329 /* Unsuccessful NEW */ 330 fput(filp); 331 put_unused_fd(id); 332 } 333 334 static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, 335 enum rdma_remove_reason why) 336 { 337 const struct uverbs_obj_fd_type *fd_type = 338 container_of(uobj->type, struct uverbs_obj_fd_type, type); 339 struct ib_uobject_file *uobj_file = 340 container_of(uobj, struct ib_uobject_file, uobj); 341 int ret = fd_type->context_closed(uobj_file, why); 342 343 if (why == RDMA_REMOVE_DESTROY && ret) 344 return ret; 345 346 if (why == RDMA_REMOVE_DURING_CLEANUP) { 347 alloc_abort_fd_uobject(uobj); 348 return ret; 349 } 350 351 uobj_file->uobj.context = NULL; 352 return ret; 353 } 354 355 static void lockdep_check(struct ib_uobject *uobj, bool exclusive) 356 { 357 #ifdef CONFIG_LOCKDEP 358 if (exclusive) 359 WARN_ON(atomic_read(&uobj->usecnt) > 0); 360 else 361 WARN_ON(atomic_read(&uobj->usecnt) == -1); 362 #endif 363 } 364 365 static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, 366 enum rdma_remove_reason why) 367 { 368 int ret; 369 struct ib_ucontext *ucontext = uobj->context; 370 371 ret = uobj->type->type_class->remove_commit(uobj, why); 372 if (ret && why == RDMA_REMOVE_DESTROY) { 373 /* We couldn't remove the object, so just unlock the uobject */ 374 atomic_set(&uobj->usecnt, 0); 375 uobj->type->type_class->lookup_put(uobj, true); 376 } else { 377 mutex_lock(&ucontext->uobjects_lock); 378 list_del(&uobj->list); 379 mutex_unlock(&ucontext->uobjects_lock); 380 /* put the ref we took when we created the object */ 381 uverbs_uobject_put(uobj); 382 } 383 384 return ret; 385 } 386 387 /* This is called only for user requested DESTROY reasons */ 388 int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) 389 { 390 int ret; 391 struct ib_ucontext *ucontext = uobj->context; 392 393 /* put the ref count we took at lookup_get */ 394 uverbs_uobject_put(uobj); 395 /* Cleanup is running. Calling this should have been impossible */ 396 if (!down_read_trylock(&ucontext->cleanup_rwsem)) { 397 WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); 398 return 0; 399 } 400 lockdep_check(uobj, true); 401 ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); 402 403 up_read(&ucontext->cleanup_rwsem); 404 return ret; 405 } 406 407 static void alloc_commit_idr_uobject(struct ib_uobject *uobj) 408 { 409 uverbs_uobject_add(uobj); 410 spin_lock(&uobj->context->ufile->idr_lock); 411 /* 412 * We already allocated this IDR with a NULL object, so 413 * this shouldn't fail. 414 */ 415 WARN_ON(idr_replace(&uobj->context->ufile->idr, 416 uobj, uobj->id)); 417 spin_unlock(&uobj->context->ufile->idr_lock); 418 } 419 420 static void alloc_commit_fd_uobject(struct ib_uobject *uobj) 421 { 422 struct ib_uobject_file *uobj_file = 423 container_of(uobj, struct ib_uobject_file, uobj); 424 425 uverbs_uobject_add(&uobj_file->uobj); 426 fd_install(uobj_file->uobj.id, uobj->object); 427 /* This shouldn't be used anymore. Use the file object instead */ 428 uobj_file->uobj.id = 0; 429 /* Get another reference as we export this to the fops */ 430 uverbs_uobject_get(&uobj_file->uobj); 431 } 432 433 int rdma_alloc_commit_uobject(struct ib_uobject *uobj) 434 { 435 /* Cleanup is running. Calling this should have been impossible */ 436 if (!down_read_trylock(&uobj->context->cleanup_rwsem)) { 437 int ret; 438 439 WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); 440 ret = uobj->type->type_class->remove_commit(uobj, 441 RDMA_REMOVE_DURING_CLEANUP); 442 if (ret) 443 pr_warn("ib_uverbs: cleanup of idr object %d failed\n", 444 uobj->id); 445 return ret; 446 } 447 448 uobj->type->type_class->alloc_commit(uobj); 449 up_read(&uobj->context->cleanup_rwsem); 450 451 return 0; 452 } 453 454 static void alloc_abort_idr_uobject(struct ib_uobject *uobj) 455 { 456 uverbs_idr_remove_uobj(uobj); 457 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, 458 RDMACG_RESOURCE_HCA_OBJECT); 459 uverbs_uobject_put(uobj); 460 } 461 462 void rdma_alloc_abort_uobject(struct ib_uobject *uobj) 463 { 464 uobj->type->type_class->alloc_abort(uobj); 465 } 466 467 static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive) 468 { 469 } 470 471 static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) 472 { 473 struct file *filp = uobj->object; 474 475 WARN_ON(exclusive); 476 /* This indirectly calls uverbs_close_fd and free the object */ 477 fput(filp); 478 } 479 480 void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) 481 { 482 lockdep_check(uobj, exclusive); 483 uobj->type->type_class->lookup_put(uobj, exclusive); 484 /* 485 * In order to unlock an object, either decrease its usecnt for 486 * read access or zero it in case of exclusive access. See 487 * uverbs_try_lock_object for locking schema information. 488 */ 489 if (!exclusive) 490 atomic_dec(&uobj->usecnt); 491 else 492 atomic_set(&uobj->usecnt, 0); 493 494 uverbs_uobject_put(uobj); 495 } 496 497 const struct uverbs_obj_type_class uverbs_idr_class = { 498 .alloc_begin = alloc_begin_idr_uobject, 499 .lookup_get = lookup_get_idr_uobject, 500 .alloc_commit = alloc_commit_idr_uobject, 501 .alloc_abort = alloc_abort_idr_uobject, 502 .lookup_put = lookup_put_idr_uobject, 503 .remove_commit = remove_commit_idr_uobject, 504 /* 505 * When we destroy an object, we first just lock it for WRITE and 506 * actually DESTROY it in the finalize stage. So, the problematic 507 * scenario is when we just started the finalize stage of the 508 * destruction (nothing was executed yet). Now, the other thread 509 * fetched the object for READ access, but it didn't lock it yet. 510 * The DESTROY thread continues and starts destroying the object. 511 * When the other thread continue - without the RCU, it would 512 * access freed memory. However, the rcu_read_lock delays the free 513 * until the rcu_read_lock of the READ operation quits. Since the 514 * exclusive lock of the object is still taken by the DESTROY flow, the 515 * READ operation will get -EBUSY and it'll just bail out. 516 */ 517 .needs_kfree_rcu = true, 518 }; 519 520 static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) 521 { 522 struct ib_ucontext *ucontext; 523 struct ib_uverbs_file *ufile = uobj_file->ufile; 524 int ret; 525 526 mutex_lock(&uobj_file->ufile->cleanup_mutex); 527 528 /* uobject was either already cleaned up or is cleaned up right now anyway */ 529 if (!uobj_file->uobj.context || 530 !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem)) 531 goto unlock; 532 533 ucontext = uobj_file->uobj.context; 534 ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); 535 up_read(&ucontext->cleanup_rwsem); 536 if (ret) 537 pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); 538 unlock: 539 mutex_unlock(&ufile->cleanup_mutex); 540 } 541 542 void uverbs_close_fd(struct file *f) 543 { 544 struct ib_uobject_file *uobj_file = f->private_data; 545 struct kref *uverbs_file_ref = &uobj_file->ufile->ref; 546 547 _uverbs_close_fd(uobj_file); 548 uverbs_uobject_put(&uobj_file->uobj); 549 kref_put(uverbs_file_ref, ib_uverbs_release_file); 550 } 551 552 void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) 553 { 554 enum rdma_remove_reason reason = device_removed ? 555 RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; 556 unsigned int cur_order = 0; 557 558 ucontext->cleanup_reason = reason; 559 /* 560 * Waits for all remove_commit and alloc_commit to finish. Logically, We 561 * want to hold this forever as the context is going to be destroyed, 562 * but we'll release it since it causes a "held lock freed" BUG message. 563 */ 564 down_write(&ucontext->cleanup_rwsem); 565 566 while (!list_empty(&ucontext->uobjects)) { 567 struct ib_uobject *obj, *next_obj; 568 unsigned int next_order = UINT_MAX; 569 570 /* 571 * This shouldn't run while executing other commands on this 572 * context. Thus, the only thing we should take care of is 573 * releasing a FD while traversing this list. The FD could be 574 * closed and released from the _release fop of this FD. 575 * In order to mitigate this, we add a lock. 576 * We take and release the lock per order traversal in order 577 * to let other threads (which might still use the FDs) chance 578 * to run. 579 */ 580 mutex_lock(&ucontext->uobjects_lock); 581 list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, 582 list) { 583 if (obj->type->destroy_order == cur_order) { 584 int ret; 585 586 /* 587 * if we hit this WARN_ON, that means we are 588 * racing with a lookup_get. 589 */ 590 WARN_ON(uverbs_try_lock_object(obj, true)); 591 ret = obj->type->type_class->remove_commit(obj, 592 reason); 593 list_del(&obj->list); 594 if (ret) 595 pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n", 596 obj->id, cur_order); 597 /* put the ref we took when we created the object */ 598 uverbs_uobject_put(obj); 599 } else { 600 next_order = min(next_order, 601 obj->type->destroy_order); 602 } 603 } 604 mutex_unlock(&ucontext->uobjects_lock); 605 cur_order = next_order; 606 } 607 up_write(&ucontext->cleanup_rwsem); 608 } 609 610 void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) 611 { 612 ucontext->cleanup_reason = 0; 613 mutex_init(&ucontext->uobjects_lock); 614 INIT_LIST_HEAD(&ucontext->uobjects); 615 init_rwsem(&ucontext->cleanup_rwsem); 616 } 617 618 const struct uverbs_obj_type_class uverbs_fd_class = { 619 .alloc_begin = alloc_begin_fd_uobject, 620 .lookup_get = lookup_get_fd_uobject, 621 .alloc_commit = alloc_commit_fd_uobject, 622 .alloc_abort = alloc_abort_fd_uobject, 623 .lookup_put = lookup_put_fd_uobject, 624 .remove_commit = remove_commit_fd_uobject, 625 .needs_kfree_rcu = false, 626 }; 627 628