1 /* 2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2, or (at your option) 7 * any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; see the file COPYING. If not, write to 16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 19 /* 20 * fsnotify inode mark locking/lifetime/and refcnting 21 * 22 * REFCNT: 23 * The group->recnt and mark->refcnt tell how many "things" in the kernel 24 * currently are referencing the objects. Both kind of objects typically will 25 * live inside the kernel with a refcnt of 2, one for its creation and one for 26 * the reference a group and a mark hold to each other. 27 * If you are holding the appropriate locks, you can take a reference and the 28 * object itself is guaranteed to survive until the reference is dropped. 29 * 30 * LOCKING: 31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken 32 * in order as follows: 33 * 34 * group->mark_mutex 35 * mark->lock 36 * mark->connector->lock 37 * 38 * group->mark_mutex protects the marks_list anchored inside a given group and 39 * each mark is hooked via the g_list. It also protects the groups private 40 * data (i.e group limits). 41 42 * mark->lock protects the marks attributes like its masks and flags. 43 * Furthermore it protects the access to a reference of the group that the mark 44 * is assigned to as well as the access to a reference of the inode/vfsmount 45 * that is being watched by the mark. 46 * 47 * mark->connector->lock protects the list of marks anchored inside an 48 * inode / vfsmount and each mark is hooked via the i_list. 49 * 50 * A list of notification marks relating to inode / mnt is contained in 51 * fsnotify_mark_connector. That structure is alive as long as there are any 52 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets 53 * detached from fsnotify_mark_connector when last reference to the mark is 54 * dropped. Thus having mark reference is enough to protect mark->connector 55 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also 56 * because we remove mark from g_list before dropping mark reference associated 57 * with that, any mark found through g_list is guaranteed to have 58 * mark->connector set until we drop group->mark_mutex. 59 * 60 * LIFETIME: 61 * Inode marks survive between when they are added to an inode and when their 62 * refcnt==0. Marks are also protected by fsnotify_mark_srcu. 63 * 64 * The inode mark can be cleared for a number of different reasons including: 65 * - The inode is unlinked for the last time. (fsnotify_inode_remove) 66 * - The inode is being evicted from cache. (fsnotify_inode_delete) 67 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) 68 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) 69 * - The fsnotify_group associated with the mark is going away and all such marks 70 * need to be cleaned up. (fsnotify_clear_marks_by_group) 71 * 72 * This has the very interesting property of being able to run concurrently with 73 * any (or all) other directions. 74 */ 75 76 #include <linux/fs.h> 77 #include <linux/init.h> 78 #include <linux/kernel.h> 79 #include <linux/kthread.h> 80 #include <linux/module.h> 81 #include <linux/mutex.h> 82 #include <linux/slab.h> 83 #include <linux/spinlock.h> 84 #include <linux/srcu.h> 85 86 #include <linux/atomic.h> 87 88 #include <linux/fsnotify_backend.h> 89 #include "fsnotify.h" 90 91 #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ 92 93 struct srcu_struct fsnotify_mark_srcu; 94 struct kmem_cache *fsnotify_mark_connector_cachep; 95 96 static DEFINE_SPINLOCK(destroy_lock); 97 static LIST_HEAD(destroy_list); 98 static struct fsnotify_mark_connector *connector_destroy_list; 99 100 static void fsnotify_mark_destroy_workfn(struct work_struct *work); 101 static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); 102 103 static void fsnotify_connector_destroy_workfn(struct work_struct *work); 104 static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); 105 106 void fsnotify_get_mark(struct fsnotify_mark *mark) 107 { 108 WARN_ON_ONCE(!refcount_read(&mark->refcnt)); 109 refcount_inc(&mark->refcnt); 110 } 111 112 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 113 { 114 u32 new_mask = 0; 115 struct fsnotify_mark *mark; 116 117 assert_spin_locked(&conn->lock); 118 hlist_for_each_entry(mark, &conn->list, obj_list) { 119 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) 120 new_mask |= mark->mask; 121 } 122 if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) 123 conn->inode->i_fsnotify_mask = new_mask; 124 else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) 125 real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; 126 } 127 128 /* 129 * Calculate mask of events for a list of marks. The caller must make sure 130 * connector and connector->inode cannot disappear under us. Callers achieve 131 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this 132 * list. 133 */ 134 void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 135 { 136 if (!conn) 137 return; 138 139 spin_lock(&conn->lock); 140 __fsnotify_recalc_mask(conn); 141 spin_unlock(&conn->lock); 142 if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) 143 __fsnotify_update_child_dentry_flags(conn->inode); 144 } 145 146 /* Free all connectors queued for freeing once SRCU period ends */ 147 static void fsnotify_connector_destroy_workfn(struct work_struct *work) 148 { 149 struct fsnotify_mark_connector *conn, *free; 150 151 spin_lock(&destroy_lock); 152 conn = connector_destroy_list; 153 connector_destroy_list = NULL; 154 spin_unlock(&destroy_lock); 155 156 synchronize_srcu(&fsnotify_mark_srcu); 157 while (conn) { 158 free = conn; 159 conn = conn->destroy_next; 160 kmem_cache_free(fsnotify_mark_connector_cachep, free); 161 } 162 } 163 164 static struct inode *fsnotify_detach_connector_from_object( 165 struct fsnotify_mark_connector *conn) 166 { 167 struct inode *inode = NULL; 168 169 if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { 170 inode = conn->inode; 171 rcu_assign_pointer(inode->i_fsnotify_marks, NULL); 172 inode->i_fsnotify_mask = 0; 173 conn->inode = NULL; 174 conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; 175 } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { 176 rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, 177 NULL); 178 real_mount(conn->mnt)->mnt_fsnotify_mask = 0; 179 conn->mnt = NULL; 180 conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; 181 } 182 183 return inode; 184 } 185 186 static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) 187 { 188 struct fsnotify_group *group = mark->group; 189 190 if (WARN_ON_ONCE(!group)) 191 return; 192 group->ops->free_mark(mark); 193 fsnotify_put_group(group); 194 } 195 196 void fsnotify_put_mark(struct fsnotify_mark *mark) 197 { 198 struct fsnotify_mark_connector *conn; 199 struct inode *inode = NULL; 200 bool free_conn = false; 201 202 /* Catch marks that were actually never attached to object */ 203 if (!mark->connector) { 204 if (refcount_dec_and_test(&mark->refcnt)) 205 fsnotify_final_mark_destroy(mark); 206 return; 207 } 208 209 /* 210 * We have to be careful so that traversals of obj_list under lock can 211 * safely grab mark reference. 212 */ 213 if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock)) 214 return; 215 216 conn = mark->connector; 217 hlist_del_init_rcu(&mark->obj_list); 218 if (hlist_empty(&conn->list)) { 219 inode = fsnotify_detach_connector_from_object(conn); 220 free_conn = true; 221 } else { 222 __fsnotify_recalc_mask(conn); 223 } 224 mark->connector = NULL; 225 spin_unlock(&conn->lock); 226 227 iput(inode); 228 229 if (free_conn) { 230 spin_lock(&destroy_lock); 231 conn->destroy_next = connector_destroy_list; 232 connector_destroy_list = conn; 233 spin_unlock(&destroy_lock); 234 queue_work(system_unbound_wq, &connector_reaper_work); 235 } 236 /* 237 * Note that we didn't update flags telling whether inode cares about 238 * what's happening with children. We update these flags from 239 * __fsnotify_parent() lazily when next event happens on one of our 240 * children. 241 */ 242 spin_lock(&destroy_lock); 243 list_add(&mark->g_list, &destroy_list); 244 spin_unlock(&destroy_lock); 245 queue_delayed_work(system_unbound_wq, &reaper_work, 246 FSNOTIFY_REAPER_DELAY); 247 } 248 249 /* 250 * Get mark reference when we found the mark via lockless traversal of object 251 * list. Mark can be already removed from the list by now and on its way to be 252 * destroyed once SRCU period ends. 253 * 254 * Also pin the group so it doesn't disappear under us. 255 */ 256 static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) 257 { 258 if (!mark) 259 return true; 260 261 if (refcount_inc_not_zero(&mark->refcnt)) { 262 spin_lock(&mark->lock); 263 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { 264 /* mark is attached, group is still alive then */ 265 atomic_inc(&mark->group->user_waits); 266 spin_unlock(&mark->lock); 267 return true; 268 } 269 spin_unlock(&mark->lock); 270 fsnotify_put_mark(mark); 271 } 272 return false; 273 } 274 275 /* 276 * Puts marks and wakes up group destruction if necessary. 277 * 278 * Pairs with fsnotify_get_mark_safe() 279 */ 280 static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) 281 { 282 if (mark) { 283 struct fsnotify_group *group = mark->group; 284 285 fsnotify_put_mark(mark); 286 /* 287 * We abuse notification_waitq on group shutdown for waiting for 288 * all marks pinned when waiting for userspace. 289 */ 290 if (atomic_dec_and_test(&group->user_waits) && group->shutdown) 291 wake_up(&group->notification_waitq); 292 } 293 } 294 295 bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) 296 { 297 /* This can fail if mark is being removed */ 298 if (!fsnotify_get_mark_safe(iter_info->inode_mark)) 299 return false; 300 if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) { 301 fsnotify_put_mark_wake(iter_info->inode_mark); 302 return false; 303 } 304 305 /* 306 * Now that both marks are pinned by refcount in the inode / vfsmount 307 * lists, we can drop SRCU lock, and safely resume the list iteration 308 * once userspace returns. 309 */ 310 srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); 311 312 return true; 313 } 314 315 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) 316 { 317 iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); 318 fsnotify_put_mark_wake(iter_info->inode_mark); 319 fsnotify_put_mark_wake(iter_info->vfsmount_mark); 320 } 321 322 /* 323 * Mark mark as detached, remove it from group list. Mark still stays in object 324 * list until its last reference is dropped. Note that we rely on mark being 325 * removed from group list before corresponding reference to it is dropped. In 326 * particular we rely on mark->connector being valid while we hold 327 * group->mark_mutex if we found the mark through g_list. 328 * 329 * Must be called with group->mark_mutex held. The caller must either hold 330 * reference to the mark or be protected by fsnotify_mark_srcu. 331 */ 332 void fsnotify_detach_mark(struct fsnotify_mark *mark) 333 { 334 struct fsnotify_group *group = mark->group; 335 336 WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); 337 WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && 338 refcount_read(&mark->refcnt) < 1 + 339 !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); 340 341 spin_lock(&mark->lock); 342 /* something else already called this function on this mark */ 343 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 344 spin_unlock(&mark->lock); 345 return; 346 } 347 mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; 348 list_del_init(&mark->g_list); 349 spin_unlock(&mark->lock); 350 351 atomic_dec(&group->num_marks); 352 353 /* Drop mark reference acquired in fsnotify_add_mark_locked() */ 354 fsnotify_put_mark(mark); 355 } 356 357 /* 358 * Free fsnotify mark. The mark is actually only marked as being freed. The 359 * freeing is actually happening only once last reference to the mark is 360 * dropped from a workqueue which first waits for srcu period end. 361 * 362 * Caller must have a reference to the mark or be protected by 363 * fsnotify_mark_srcu. 364 */ 365 void fsnotify_free_mark(struct fsnotify_mark *mark) 366 { 367 struct fsnotify_group *group = mark->group; 368 369 spin_lock(&mark->lock); 370 /* something else already called this function on this mark */ 371 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { 372 spin_unlock(&mark->lock); 373 return; 374 } 375 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 376 spin_unlock(&mark->lock); 377 378 /* 379 * Some groups like to know that marks are being freed. This is a 380 * callback to the group function to let it know that this mark 381 * is being freed. 382 */ 383 if (group->ops->freeing_mark) 384 group->ops->freeing_mark(mark, group); 385 } 386 387 void fsnotify_destroy_mark(struct fsnotify_mark *mark, 388 struct fsnotify_group *group) 389 { 390 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 391 fsnotify_detach_mark(mark); 392 mutex_unlock(&group->mark_mutex); 393 fsnotify_free_mark(mark); 394 } 395 396 /* 397 * Sorting function for lists of fsnotify marks. 398 * 399 * Fanotify supports different notification classes (reflected as priority of 400 * notification group). Events shall be passed to notification groups in 401 * decreasing priority order. To achieve this marks in notification lists for 402 * inodes and vfsmounts are sorted so that priorities of corresponding groups 403 * are descending. 404 * 405 * Furthermore correct handling of the ignore mask requires processing inode 406 * and vfsmount marks of each group together. Using the group address as 407 * further sort criterion provides a unique sorting order and thus we can 408 * merge inode and vfsmount lists of marks in linear time and find groups 409 * present in both lists. 410 * 411 * A return value of 1 signifies that b has priority over a. 412 * A return value of 0 signifies that the two marks have to be handled together. 413 * A return value of -1 signifies that a has priority over b. 414 */ 415 int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) 416 { 417 if (a == b) 418 return 0; 419 if (!a) 420 return 1; 421 if (!b) 422 return -1; 423 if (a->priority < b->priority) 424 return 1; 425 if (a->priority > b->priority) 426 return -1; 427 if (a < b) 428 return 1; 429 return -1; 430 } 431 432 static int fsnotify_attach_connector_to_object( 433 struct fsnotify_mark_connector __rcu **connp, 434 struct inode *inode, 435 struct vfsmount *mnt) 436 { 437 struct fsnotify_mark_connector *conn; 438 439 conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); 440 if (!conn) 441 return -ENOMEM; 442 spin_lock_init(&conn->lock); 443 INIT_HLIST_HEAD(&conn->list); 444 if (inode) { 445 conn->flags = FSNOTIFY_OBJ_TYPE_INODE; 446 conn->inode = igrab(inode); 447 } else { 448 conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT; 449 conn->mnt = mnt; 450 } 451 /* 452 * cmpxchg() provides the barrier so that readers of *connp can see 453 * only initialized structure 454 */ 455 if (cmpxchg(connp, NULL, conn)) { 456 /* Someone else created list structure for us */ 457 if (inode) 458 iput(inode); 459 kmem_cache_free(fsnotify_mark_connector_cachep, conn); 460 } 461 462 return 0; 463 } 464 465 /* 466 * Get mark connector, make sure it is alive and return with its lock held. 467 * This is for users that get connector pointer from inode or mount. Users that 468 * hold reference to a mark on the list may directly lock connector->lock as 469 * they are sure list cannot go away under them. 470 */ 471 static struct fsnotify_mark_connector *fsnotify_grab_connector( 472 struct fsnotify_mark_connector __rcu **connp) 473 { 474 struct fsnotify_mark_connector *conn; 475 int idx; 476 477 idx = srcu_read_lock(&fsnotify_mark_srcu); 478 conn = srcu_dereference(*connp, &fsnotify_mark_srcu); 479 if (!conn) 480 goto out; 481 spin_lock(&conn->lock); 482 if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE | 483 FSNOTIFY_OBJ_TYPE_VFSMOUNT))) { 484 spin_unlock(&conn->lock); 485 srcu_read_unlock(&fsnotify_mark_srcu, idx); 486 return NULL; 487 } 488 out: 489 srcu_read_unlock(&fsnotify_mark_srcu, idx); 490 return conn; 491 } 492 493 /* 494 * Add mark into proper place in given list of marks. These marks may be used 495 * for the fsnotify backend to determine which event types should be delivered 496 * to which group and for which inodes. These marks are ordered according to 497 * priority, highest number first, and then by the group's location in memory. 498 */ 499 static int fsnotify_add_mark_list(struct fsnotify_mark *mark, 500 struct inode *inode, struct vfsmount *mnt, 501 int allow_dups) 502 { 503 struct fsnotify_mark *lmark, *last = NULL; 504 struct fsnotify_mark_connector *conn; 505 struct fsnotify_mark_connector __rcu **connp; 506 int cmp; 507 int err = 0; 508 509 if (WARN_ON(!inode && !mnt)) 510 return -EINVAL; 511 if (inode) 512 connp = &inode->i_fsnotify_marks; 513 else 514 connp = &real_mount(mnt)->mnt_fsnotify_marks; 515 restart: 516 spin_lock(&mark->lock); 517 conn = fsnotify_grab_connector(connp); 518 if (!conn) { 519 spin_unlock(&mark->lock); 520 err = fsnotify_attach_connector_to_object(connp, inode, mnt); 521 if (err) 522 return err; 523 goto restart; 524 } 525 526 /* is mark the first mark? */ 527 if (hlist_empty(&conn->list)) { 528 hlist_add_head_rcu(&mark->obj_list, &conn->list); 529 goto added; 530 } 531 532 /* should mark be in the middle of the current list? */ 533 hlist_for_each_entry(lmark, &conn->list, obj_list) { 534 last = lmark; 535 536 if ((lmark->group == mark->group) && 537 (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && 538 !allow_dups) { 539 err = -EEXIST; 540 goto out_err; 541 } 542 543 cmp = fsnotify_compare_groups(lmark->group, mark->group); 544 if (cmp >= 0) { 545 hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); 546 goto added; 547 } 548 } 549 550 BUG_ON(last == NULL); 551 /* mark should be the last entry. last is the current last entry */ 552 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 553 added: 554 mark->connector = conn; 555 out_err: 556 spin_unlock(&conn->lock); 557 spin_unlock(&mark->lock); 558 return err; 559 } 560 561 /* 562 * Attach an initialized mark to a given group and fs object. 563 * These marks may be used for the fsnotify backend to determine which 564 * event types should be delivered to which group. 565 */ 566 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, 567 struct vfsmount *mnt, int allow_dups) 568 { 569 struct fsnotify_group *group = mark->group; 570 int ret = 0; 571 572 BUG_ON(inode && mnt); 573 BUG_ON(!inode && !mnt); 574 BUG_ON(!mutex_is_locked(&group->mark_mutex)); 575 576 /* 577 * LOCKING ORDER!!!! 578 * group->mark_mutex 579 * mark->lock 580 * mark->connector->lock 581 */ 582 spin_lock(&mark->lock); 583 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; 584 585 list_add(&mark->g_list, &group->marks_list); 586 atomic_inc(&group->num_marks); 587 fsnotify_get_mark(mark); /* for g_list */ 588 spin_unlock(&mark->lock); 589 590 ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); 591 if (ret) 592 goto err; 593 594 if (mark->mask) 595 fsnotify_recalc_mask(mark->connector); 596 597 return ret; 598 err: 599 spin_lock(&mark->lock); 600 mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | 601 FSNOTIFY_MARK_FLAG_ATTACHED); 602 list_del_init(&mark->g_list); 603 spin_unlock(&mark->lock); 604 atomic_dec(&group->num_marks); 605 606 fsnotify_put_mark(mark); 607 return ret; 608 } 609 610 int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, 611 struct vfsmount *mnt, int allow_dups) 612 { 613 int ret; 614 struct fsnotify_group *group = mark->group; 615 616 mutex_lock(&group->mark_mutex); 617 ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups); 618 mutex_unlock(&group->mark_mutex); 619 return ret; 620 } 621 622 /* 623 * Given a list of marks, find the mark associated with given group. If found 624 * take a reference to that mark and return it, else return NULL. 625 */ 626 struct fsnotify_mark *fsnotify_find_mark( 627 struct fsnotify_mark_connector __rcu **connp, 628 struct fsnotify_group *group) 629 { 630 struct fsnotify_mark_connector *conn; 631 struct fsnotify_mark *mark; 632 633 conn = fsnotify_grab_connector(connp); 634 if (!conn) 635 return NULL; 636 637 hlist_for_each_entry(mark, &conn->list, obj_list) { 638 if (mark->group == group && 639 (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 640 fsnotify_get_mark(mark); 641 spin_unlock(&conn->lock); 642 return mark; 643 } 644 } 645 spin_unlock(&conn->lock); 646 return NULL; 647 } 648 649 /* Clear any marks in a group with given type */ 650 void fsnotify_clear_marks_by_group(struct fsnotify_group *group, 651 unsigned int type) 652 { 653 struct fsnotify_mark *lmark, *mark; 654 LIST_HEAD(to_free); 655 struct list_head *head = &to_free; 656 657 /* Skip selection step if we want to clear all marks. */ 658 if (type == FSNOTIFY_OBJ_ALL_TYPES) { 659 head = &group->marks_list; 660 goto clear; 661 } 662 /* 663 * We have to be really careful here. Anytime we drop mark_mutex, e.g. 664 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our 665 * to_free list so we have to use mark_mutex even when accessing that 666 * list. And freeing mark requires us to drop mark_mutex. So we can 667 * reliably free only the first mark in the list. That's why we first 668 * move marks to free to to_free list in one go and then free marks in 669 * to_free list one by one. 670 */ 671 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 672 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { 673 if (mark->connector->flags & type) 674 list_move(&mark->g_list, &to_free); 675 } 676 mutex_unlock(&group->mark_mutex); 677 678 clear: 679 while (1) { 680 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 681 if (list_empty(head)) { 682 mutex_unlock(&group->mark_mutex); 683 break; 684 } 685 mark = list_first_entry(head, struct fsnotify_mark, g_list); 686 fsnotify_get_mark(mark); 687 fsnotify_detach_mark(mark); 688 mutex_unlock(&group->mark_mutex); 689 fsnotify_free_mark(mark); 690 fsnotify_put_mark(mark); 691 } 692 } 693 694 /* Destroy all marks attached to inode / vfsmount */ 695 void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) 696 { 697 struct fsnotify_mark_connector *conn; 698 struct fsnotify_mark *mark, *old_mark = NULL; 699 struct inode *inode; 700 701 conn = fsnotify_grab_connector(connp); 702 if (!conn) 703 return; 704 /* 705 * We have to be careful since we can race with e.g. 706 * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the 707 * list can get modified. However we are holding mark reference and 708 * thus our mark cannot be removed from obj_list so we can continue 709 * iteration after regaining conn->lock. 710 */ 711 hlist_for_each_entry(mark, &conn->list, obj_list) { 712 fsnotify_get_mark(mark); 713 spin_unlock(&conn->lock); 714 if (old_mark) 715 fsnotify_put_mark(old_mark); 716 old_mark = mark; 717 fsnotify_destroy_mark(mark, mark->group); 718 spin_lock(&conn->lock); 719 } 720 /* 721 * Detach list from object now so that we don't pin inode until all 722 * mark references get dropped. It would lead to strange results such 723 * as delaying inode deletion or blocking unmount. 724 */ 725 inode = fsnotify_detach_connector_from_object(conn); 726 spin_unlock(&conn->lock); 727 if (old_mark) 728 fsnotify_put_mark(old_mark); 729 iput(inode); 730 } 731 732 /* 733 * Nothing fancy, just initialize lists and locks and counters. 734 */ 735 void fsnotify_init_mark(struct fsnotify_mark *mark, 736 struct fsnotify_group *group) 737 { 738 memset(mark, 0, sizeof(*mark)); 739 spin_lock_init(&mark->lock); 740 refcount_set(&mark->refcnt, 1); 741 fsnotify_get_group(group); 742 mark->group = group; 743 } 744 745 /* 746 * Destroy all marks in destroy_list, waits for SRCU period to finish before 747 * actually freeing marks. 748 */ 749 static void fsnotify_mark_destroy_workfn(struct work_struct *work) 750 { 751 struct fsnotify_mark *mark, *next; 752 struct list_head private_destroy_list; 753 754 spin_lock(&destroy_lock); 755 /* exchange the list head */ 756 list_replace_init(&destroy_list, &private_destroy_list); 757 spin_unlock(&destroy_lock); 758 759 synchronize_srcu(&fsnotify_mark_srcu); 760 761 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { 762 list_del_init(&mark->g_list); 763 fsnotify_final_mark_destroy(mark); 764 } 765 } 766 767 /* Wait for all marks queued for destruction to be actually destroyed */ 768 void fsnotify_wait_marks_destroyed(void) 769 { 770 flush_delayed_work(&reaper_work); 771 } 772