1 /* 2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2, or (at your option) 7 * any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; see the file COPYING. If not, write to 16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 19 /* 20 * fsnotify inode mark locking/lifetime/and refcnting 21 * 22 * REFCNT: 23 * The group->recnt and mark->refcnt tell how many "things" in the kernel 24 * currently are referencing the objects. Both kind of objects typically will 25 * live inside the kernel with a refcnt of 2, one for its creation and one for 26 * the reference a group and a mark hold to each other. 27 * If you are holding the appropriate locks, you can take a reference and the 28 * object itself is guaranteed to survive until the reference is dropped. 29 * 30 * LOCKING: 31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken 32 * in order as follows: 33 * 34 * group->mark_mutex 35 * mark->lock 36 * mark->connector->lock 37 * 38 * group->mark_mutex protects the marks_list anchored inside a given group and 39 * each mark is hooked via the g_list. It also protects the groups private 40 * data (i.e group limits). 41 42 * mark->lock protects the marks attributes like its masks and flags. 43 * Furthermore it protects the access to a reference of the group that the mark 44 * is assigned to as well as the access to a reference of the inode/vfsmount 45 * that is being watched by the mark. 46 * 47 * mark->connector->lock protects the list of marks anchored inside an 48 * inode / vfsmount and each mark is hooked via the i_list. 49 * 50 * A list of notification marks relating to inode / mnt is contained in 51 * fsnotify_mark_connector. That structure is alive as long as there are any 52 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets 53 * detached from fsnotify_mark_connector when last reference to the mark is 54 * dropped. Thus having mark reference is enough to protect mark->connector 55 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also 56 * because we remove mark from g_list before dropping mark reference associated 57 * with that, any mark found through g_list is guaranteed to have 58 * mark->connector set until we drop group->mark_mutex. 59 * 60 * LIFETIME: 61 * Inode marks survive between when they are added to an inode and when their 62 * refcnt==0. Marks are also protected by fsnotify_mark_srcu. 63 * 64 * The inode mark can be cleared for a number of different reasons including: 65 * - The inode is unlinked for the last time. (fsnotify_inode_remove) 66 * - The inode is being evicted from cache. (fsnotify_inode_delete) 67 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) 68 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) 69 * - The fsnotify_group associated with the mark is going away and all such marks 70 * need to be cleaned up. (fsnotify_clear_marks_by_group) 71 * 72 * This has the very interesting property of being able to run concurrently with 73 * any (or all) other directions. 74 */ 75 76 #include <linux/fs.h> 77 #include <linux/init.h> 78 #include <linux/kernel.h> 79 #include <linux/kthread.h> 80 #include <linux/module.h> 81 #include <linux/mutex.h> 82 #include <linux/slab.h> 83 #include <linux/spinlock.h> 84 #include <linux/srcu.h> 85 86 #include <linux/atomic.h> 87 88 #include <linux/fsnotify_backend.h> 89 #include "fsnotify.h" 90 91 #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ 92 93 struct srcu_struct fsnotify_mark_srcu; 94 struct kmem_cache *fsnotify_mark_connector_cachep; 95 96 static DEFINE_SPINLOCK(destroy_lock); 97 static LIST_HEAD(destroy_list); 98 static struct fsnotify_mark_connector *connector_destroy_list; 99 100 static void fsnotify_mark_destroy_workfn(struct work_struct *work); 101 static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); 102 103 static void fsnotify_connector_destroy_workfn(struct work_struct *work); 104 static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); 105 106 void fsnotify_get_mark(struct fsnotify_mark *mark) 107 { 108 WARN_ON_ONCE(!atomic_read(&mark->refcnt)); 109 atomic_inc(&mark->refcnt); 110 } 111 112 /* 113 * Get mark reference when we found the mark via lockless traversal of object 114 * list. Mark can be already removed from the list by now and on its way to be 115 * destroyed once SRCU period ends. 116 */ 117 static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) 118 { 119 return atomic_inc_not_zero(&mark->refcnt); 120 } 121 122 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 123 { 124 u32 new_mask = 0; 125 struct fsnotify_mark *mark; 126 127 assert_spin_locked(&conn->lock); 128 hlist_for_each_entry(mark, &conn->list, obj_list) { 129 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) 130 new_mask |= mark->mask; 131 } 132 if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) 133 conn->inode->i_fsnotify_mask = new_mask; 134 else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) 135 real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; 136 } 137 138 /* 139 * Calculate mask of events for a list of marks. The caller must make sure 140 * connector and connector->inode cannot disappear under us. Callers achieve 141 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this 142 * list. 143 */ 144 void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 145 { 146 if (!conn) 147 return; 148 149 spin_lock(&conn->lock); 150 __fsnotify_recalc_mask(conn); 151 spin_unlock(&conn->lock); 152 if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) 153 __fsnotify_update_child_dentry_flags(conn->inode); 154 } 155 156 /* Free all connectors queued for freeing once SRCU period ends */ 157 static void fsnotify_connector_destroy_workfn(struct work_struct *work) 158 { 159 struct fsnotify_mark_connector *conn, *free; 160 161 spin_lock(&destroy_lock); 162 conn = connector_destroy_list; 163 connector_destroy_list = NULL; 164 spin_unlock(&destroy_lock); 165 166 synchronize_srcu(&fsnotify_mark_srcu); 167 while (conn) { 168 free = conn; 169 conn = conn->destroy_next; 170 kmem_cache_free(fsnotify_mark_connector_cachep, free); 171 } 172 } 173 174 static struct inode *fsnotify_detach_connector_from_object( 175 struct fsnotify_mark_connector *conn) 176 { 177 struct inode *inode = NULL; 178 179 if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { 180 inode = conn->inode; 181 rcu_assign_pointer(inode->i_fsnotify_marks, NULL); 182 inode->i_fsnotify_mask = 0; 183 conn->inode = NULL; 184 conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; 185 } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { 186 rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, 187 NULL); 188 real_mount(conn->mnt)->mnt_fsnotify_mask = 0; 189 conn->mnt = NULL; 190 conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; 191 } 192 193 return inode; 194 } 195 196 static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) 197 { 198 struct fsnotify_group *group = mark->group; 199 200 if (WARN_ON_ONCE(!group)) 201 return; 202 group->ops->free_mark(mark); 203 fsnotify_put_group(group); 204 } 205 206 void fsnotify_put_mark(struct fsnotify_mark *mark) 207 { 208 struct fsnotify_mark_connector *conn; 209 struct inode *inode = NULL; 210 bool free_conn = false; 211 212 /* Catch marks that were actually never attached to object */ 213 if (!mark->connector) { 214 if (atomic_dec_and_test(&mark->refcnt)) 215 fsnotify_final_mark_destroy(mark); 216 return; 217 } 218 219 /* 220 * We have to be careful so that traversals of obj_list under lock can 221 * safely grab mark reference. 222 */ 223 if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) 224 return; 225 226 conn = mark->connector; 227 hlist_del_init_rcu(&mark->obj_list); 228 if (hlist_empty(&conn->list)) { 229 inode = fsnotify_detach_connector_from_object(conn); 230 free_conn = true; 231 } else { 232 __fsnotify_recalc_mask(conn); 233 } 234 mark->connector = NULL; 235 spin_unlock(&conn->lock); 236 237 iput(inode); 238 239 if (free_conn) { 240 spin_lock(&destroy_lock); 241 conn->destroy_next = connector_destroy_list; 242 connector_destroy_list = conn; 243 spin_unlock(&destroy_lock); 244 queue_work(system_unbound_wq, &connector_reaper_work); 245 } 246 /* 247 * Note that we didn't update flags telling whether inode cares about 248 * what's happening with children. We update these flags from 249 * __fsnotify_parent() lazily when next event happens on one of our 250 * children. 251 */ 252 spin_lock(&destroy_lock); 253 list_add(&mark->g_list, &destroy_list); 254 spin_unlock(&destroy_lock); 255 queue_delayed_work(system_unbound_wq, &reaper_work, 256 FSNOTIFY_REAPER_DELAY); 257 } 258 259 bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) 260 { 261 struct fsnotify_group *group; 262 263 if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) 264 return false; 265 266 if (iter_info->inode_mark) 267 group = iter_info->inode_mark->group; 268 else 269 group = iter_info->vfsmount_mark->group; 270 271 /* 272 * Since acquisition of mark reference is an atomic op as well, we can 273 * be sure this inc is seen before any effect of refcount increment. 274 */ 275 atomic_inc(&group->user_waits); 276 277 if (iter_info->inode_mark) { 278 /* This can fail if mark is being removed */ 279 if (!fsnotify_get_mark_safe(iter_info->inode_mark)) 280 goto out_wait; 281 } 282 if (iter_info->vfsmount_mark) { 283 if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) 284 goto out_inode; 285 } 286 287 /* 288 * Now that both marks are pinned by refcount in the inode / vfsmount 289 * lists, we can drop SRCU lock, and safely resume the list iteration 290 * once userspace returns. 291 */ 292 srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); 293 294 return true; 295 out_inode: 296 if (iter_info->inode_mark) 297 fsnotify_put_mark(iter_info->inode_mark); 298 out_wait: 299 if (atomic_dec_and_test(&group->user_waits) && group->shutdown) 300 wake_up(&group->notification_waitq); 301 return false; 302 } 303 304 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) 305 { 306 struct fsnotify_group *group = NULL; 307 308 iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); 309 if (iter_info->inode_mark) { 310 group = iter_info->inode_mark->group; 311 fsnotify_put_mark(iter_info->inode_mark); 312 } 313 if (iter_info->vfsmount_mark) { 314 group = iter_info->vfsmount_mark->group; 315 fsnotify_put_mark(iter_info->vfsmount_mark); 316 } 317 /* 318 * We abuse notification_waitq on group shutdown for waiting for all 319 * marks pinned when waiting for userspace. 320 */ 321 if (atomic_dec_and_test(&group->user_waits) && group->shutdown) 322 wake_up(&group->notification_waitq); 323 } 324 325 /* 326 * Mark mark as detached, remove it from group list. Mark still stays in object 327 * list until its last reference is dropped. Note that we rely on mark being 328 * removed from group list before corresponding reference to it is dropped. In 329 * particular we rely on mark->connector being valid while we hold 330 * group->mark_mutex if we found the mark through g_list. 331 * 332 * Must be called with group->mark_mutex held. The caller must either hold 333 * reference to the mark or be protected by fsnotify_mark_srcu. 334 */ 335 void fsnotify_detach_mark(struct fsnotify_mark *mark) 336 { 337 struct fsnotify_group *group = mark->group; 338 339 WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); 340 WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && 341 atomic_read(&mark->refcnt) < 1 + 342 !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); 343 344 spin_lock(&mark->lock); 345 /* something else already called this function on this mark */ 346 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 347 spin_unlock(&mark->lock); 348 return; 349 } 350 mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; 351 list_del_init(&mark->g_list); 352 spin_unlock(&mark->lock); 353 354 atomic_dec(&group->num_marks); 355 356 /* Drop mark reference acquired in fsnotify_add_mark_locked() */ 357 fsnotify_put_mark(mark); 358 } 359 360 /* 361 * Free fsnotify mark. The mark is actually only marked as being freed. The 362 * freeing is actually happening only once last reference to the mark is 363 * dropped from a workqueue which first waits for srcu period end. 364 * 365 * Caller must have a reference to the mark or be protected by 366 * fsnotify_mark_srcu. 367 */ 368 void fsnotify_free_mark(struct fsnotify_mark *mark) 369 { 370 struct fsnotify_group *group = mark->group; 371 372 spin_lock(&mark->lock); 373 /* something else already called this function on this mark */ 374 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { 375 spin_unlock(&mark->lock); 376 return; 377 } 378 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 379 spin_unlock(&mark->lock); 380 381 /* 382 * Some groups like to know that marks are being freed. This is a 383 * callback to the group function to let it know that this mark 384 * is being freed. 385 */ 386 if (group->ops->freeing_mark) 387 group->ops->freeing_mark(mark, group); 388 } 389 390 void fsnotify_destroy_mark(struct fsnotify_mark *mark, 391 struct fsnotify_group *group) 392 { 393 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 394 fsnotify_detach_mark(mark); 395 mutex_unlock(&group->mark_mutex); 396 fsnotify_free_mark(mark); 397 } 398 399 /* 400 * Sorting function for lists of fsnotify marks. 401 * 402 * Fanotify supports different notification classes (reflected as priority of 403 * notification group). Events shall be passed to notification groups in 404 * decreasing priority order. To achieve this marks in notification lists for 405 * inodes and vfsmounts are sorted so that priorities of corresponding groups 406 * are descending. 407 * 408 * Furthermore correct handling of the ignore mask requires processing inode 409 * and vfsmount marks of each group together. Using the group address as 410 * further sort criterion provides a unique sorting order and thus we can 411 * merge inode and vfsmount lists of marks in linear time and find groups 412 * present in both lists. 413 * 414 * A return value of 1 signifies that b has priority over a. 415 * A return value of 0 signifies that the two marks have to be handled together. 416 * A return value of -1 signifies that a has priority over b. 417 */ 418 int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) 419 { 420 if (a == b) 421 return 0; 422 if (!a) 423 return 1; 424 if (!b) 425 return -1; 426 if (a->priority < b->priority) 427 return 1; 428 if (a->priority > b->priority) 429 return -1; 430 if (a < b) 431 return 1; 432 return -1; 433 } 434 435 static int fsnotify_attach_connector_to_object( 436 struct fsnotify_mark_connector __rcu **connp, 437 struct inode *inode, 438 struct vfsmount *mnt) 439 { 440 struct fsnotify_mark_connector *conn; 441 442 conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); 443 if (!conn) 444 return -ENOMEM; 445 spin_lock_init(&conn->lock); 446 INIT_HLIST_HEAD(&conn->list); 447 if (inode) { 448 conn->flags = FSNOTIFY_OBJ_TYPE_INODE; 449 conn->inode = igrab(inode); 450 } else { 451 conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT; 452 conn->mnt = mnt; 453 } 454 /* 455 * cmpxchg() provides the barrier so that readers of *connp can see 456 * only initialized structure 457 */ 458 if (cmpxchg(connp, NULL, conn)) { 459 /* Someone else created list structure for us */ 460 if (inode) 461 iput(inode); 462 kmem_cache_free(fsnotify_mark_connector_cachep, conn); 463 } 464 465 return 0; 466 } 467 468 /* 469 * Get mark connector, make sure it is alive and return with its lock held. 470 * This is for users that get connector pointer from inode or mount. Users that 471 * hold reference to a mark on the list may directly lock connector->lock as 472 * they are sure list cannot go away under them. 473 */ 474 static struct fsnotify_mark_connector *fsnotify_grab_connector( 475 struct fsnotify_mark_connector __rcu **connp) 476 { 477 struct fsnotify_mark_connector *conn; 478 int idx; 479 480 idx = srcu_read_lock(&fsnotify_mark_srcu); 481 conn = srcu_dereference(*connp, &fsnotify_mark_srcu); 482 if (!conn) 483 goto out; 484 spin_lock(&conn->lock); 485 if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE | 486 FSNOTIFY_OBJ_TYPE_VFSMOUNT))) { 487 spin_unlock(&conn->lock); 488 srcu_read_unlock(&fsnotify_mark_srcu, idx); 489 return NULL; 490 } 491 out: 492 srcu_read_unlock(&fsnotify_mark_srcu, idx); 493 return conn; 494 } 495 496 /* 497 * Add mark into proper place in given list of marks. These marks may be used 498 * for the fsnotify backend to determine which event types should be delivered 499 * to which group and for which inodes. These marks are ordered according to 500 * priority, highest number first, and then by the group's location in memory. 501 */ 502 static int fsnotify_add_mark_list(struct fsnotify_mark *mark, 503 struct inode *inode, struct vfsmount *mnt, 504 int allow_dups) 505 { 506 struct fsnotify_mark *lmark, *last = NULL; 507 struct fsnotify_mark_connector *conn; 508 struct fsnotify_mark_connector __rcu **connp; 509 int cmp; 510 int err = 0; 511 512 if (WARN_ON(!inode && !mnt)) 513 return -EINVAL; 514 if (inode) 515 connp = &inode->i_fsnotify_marks; 516 else 517 connp = &real_mount(mnt)->mnt_fsnotify_marks; 518 restart: 519 spin_lock(&mark->lock); 520 conn = fsnotify_grab_connector(connp); 521 if (!conn) { 522 spin_unlock(&mark->lock); 523 err = fsnotify_attach_connector_to_object(connp, inode, mnt); 524 if (err) 525 return err; 526 goto restart; 527 } 528 529 /* is mark the first mark? */ 530 if (hlist_empty(&conn->list)) { 531 hlist_add_head_rcu(&mark->obj_list, &conn->list); 532 goto added; 533 } 534 535 /* should mark be in the middle of the current list? */ 536 hlist_for_each_entry(lmark, &conn->list, obj_list) { 537 last = lmark; 538 539 if ((lmark->group == mark->group) && 540 (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && 541 !allow_dups) { 542 err = -EEXIST; 543 goto out_err; 544 } 545 546 cmp = fsnotify_compare_groups(lmark->group, mark->group); 547 if (cmp >= 0) { 548 hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); 549 goto added; 550 } 551 } 552 553 BUG_ON(last == NULL); 554 /* mark should be the last entry. last is the current last entry */ 555 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 556 added: 557 mark->connector = conn; 558 out_err: 559 spin_unlock(&conn->lock); 560 spin_unlock(&mark->lock); 561 return err; 562 } 563 564 /* 565 * Attach an initialized mark to a given group and fs object. 566 * These marks may be used for the fsnotify backend to determine which 567 * event types should be delivered to which group. 568 */ 569 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, 570 struct vfsmount *mnt, int allow_dups) 571 { 572 struct fsnotify_group *group = mark->group; 573 int ret = 0; 574 575 BUG_ON(inode && mnt); 576 BUG_ON(!inode && !mnt); 577 BUG_ON(!mutex_is_locked(&group->mark_mutex)); 578 579 /* 580 * LOCKING ORDER!!!! 581 * group->mark_mutex 582 * mark->lock 583 * mark->connector->lock 584 */ 585 spin_lock(&mark->lock); 586 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; 587 588 list_add(&mark->g_list, &group->marks_list); 589 atomic_inc(&group->num_marks); 590 fsnotify_get_mark(mark); /* for g_list */ 591 spin_unlock(&mark->lock); 592 593 ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); 594 if (ret) 595 goto err; 596 597 if (mark->mask) 598 fsnotify_recalc_mask(mark->connector); 599 600 return ret; 601 err: 602 mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | 603 FSNOTIFY_MARK_FLAG_ATTACHED); 604 list_del_init(&mark->g_list); 605 atomic_dec(&group->num_marks); 606 607 fsnotify_put_mark(mark); 608 return ret; 609 } 610 611 int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, 612 struct vfsmount *mnt, int allow_dups) 613 { 614 int ret; 615 struct fsnotify_group *group = mark->group; 616 617 mutex_lock(&group->mark_mutex); 618 ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups); 619 mutex_unlock(&group->mark_mutex); 620 return ret; 621 } 622 623 /* 624 * Given a list of marks, find the mark associated with given group. If found 625 * take a reference to that mark and return it, else return NULL. 626 */ 627 struct fsnotify_mark *fsnotify_find_mark( 628 struct fsnotify_mark_connector __rcu **connp, 629 struct fsnotify_group *group) 630 { 631 struct fsnotify_mark_connector *conn; 632 struct fsnotify_mark *mark; 633 634 conn = fsnotify_grab_connector(connp); 635 if (!conn) 636 return NULL; 637 638 hlist_for_each_entry(mark, &conn->list, obj_list) { 639 if (mark->group == group && 640 (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 641 fsnotify_get_mark(mark); 642 spin_unlock(&conn->lock); 643 return mark; 644 } 645 } 646 spin_unlock(&conn->lock); 647 return NULL; 648 } 649 650 /* Clear any marks in a group with given type */ 651 void fsnotify_clear_marks_by_group(struct fsnotify_group *group, 652 unsigned int type) 653 { 654 struct fsnotify_mark *lmark, *mark; 655 LIST_HEAD(to_free); 656 struct list_head *head = &to_free; 657 658 /* Skip selection step if we want to clear all marks. */ 659 if (type == FSNOTIFY_OBJ_ALL_TYPES) { 660 head = &group->marks_list; 661 goto clear; 662 } 663 /* 664 * We have to be really careful here. Anytime we drop mark_mutex, e.g. 665 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our 666 * to_free list so we have to use mark_mutex even when accessing that 667 * list. And freeing mark requires us to drop mark_mutex. So we can 668 * reliably free only the first mark in the list. That's why we first 669 * move marks to free to to_free list in one go and then free marks in 670 * to_free list one by one. 671 */ 672 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 673 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { 674 if (mark->connector->flags & type) 675 list_move(&mark->g_list, &to_free); 676 } 677 mutex_unlock(&group->mark_mutex); 678 679 clear: 680 while (1) { 681 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 682 if (list_empty(head)) { 683 mutex_unlock(&group->mark_mutex); 684 break; 685 } 686 mark = list_first_entry(head, struct fsnotify_mark, g_list); 687 fsnotify_get_mark(mark); 688 fsnotify_detach_mark(mark); 689 mutex_unlock(&group->mark_mutex); 690 fsnotify_free_mark(mark); 691 fsnotify_put_mark(mark); 692 } 693 } 694 695 /* Destroy all marks attached to inode / vfsmount */ 696 void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) 697 { 698 struct fsnotify_mark_connector *conn; 699 struct fsnotify_mark *mark, *old_mark = NULL; 700 struct inode *inode; 701 702 conn = fsnotify_grab_connector(connp); 703 if (!conn) 704 return; 705 /* 706 * We have to be careful since we can race with e.g. 707 * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the 708 * list can get modified. However we are holding mark reference and 709 * thus our mark cannot be removed from obj_list so we can continue 710 * iteration after regaining conn->lock. 711 */ 712 hlist_for_each_entry(mark, &conn->list, obj_list) { 713 fsnotify_get_mark(mark); 714 spin_unlock(&conn->lock); 715 if (old_mark) 716 fsnotify_put_mark(old_mark); 717 old_mark = mark; 718 fsnotify_destroy_mark(mark, mark->group); 719 spin_lock(&conn->lock); 720 } 721 /* 722 * Detach list from object now so that we don't pin inode until all 723 * mark references get dropped. It would lead to strange results such 724 * as delaying inode deletion or blocking unmount. 725 */ 726 inode = fsnotify_detach_connector_from_object(conn); 727 spin_unlock(&conn->lock); 728 if (old_mark) 729 fsnotify_put_mark(old_mark); 730 iput(inode); 731 } 732 733 /* 734 * Nothing fancy, just initialize lists and locks and counters. 735 */ 736 void fsnotify_init_mark(struct fsnotify_mark *mark, 737 struct fsnotify_group *group) 738 { 739 memset(mark, 0, sizeof(*mark)); 740 spin_lock_init(&mark->lock); 741 atomic_set(&mark->refcnt, 1); 742 fsnotify_get_group(group); 743 mark->group = group; 744 } 745 746 /* 747 * Destroy all marks in destroy_list, waits for SRCU period to finish before 748 * actually freeing marks. 749 */ 750 static void fsnotify_mark_destroy_workfn(struct work_struct *work) 751 { 752 struct fsnotify_mark *mark, *next; 753 struct list_head private_destroy_list; 754 755 spin_lock(&destroy_lock); 756 /* exchange the list head */ 757 list_replace_init(&destroy_list, &private_destroy_list); 758 spin_unlock(&destroy_lock); 759 760 synchronize_srcu(&fsnotify_mark_srcu); 761 762 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { 763 list_del_init(&mark->g_list); 764 fsnotify_final_mark_destroy(mark); 765 } 766 } 767 768 /* Wait for all marks queued for destruction to be actually destroyed */ 769 void fsnotify_wait_marks_destroyed(void) 770 { 771 flush_delayed_work(&reaper_work); 772 } 773