1 /* 2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2, or (at your option) 7 * any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; see the file COPYING. If not, write to 16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 19 /* 20 * fsnotify inode mark locking/lifetime/and refcnting 21 * 22 * REFCNT: 23 * The group->recnt and mark->refcnt tell how many "things" in the kernel 24 * currently are referencing the objects. Both kind of objects typically will 25 * live inside the kernel with a refcnt of 2, one for its creation and one for 26 * the reference a group and a mark hold to each other. 27 * If you are holding the appropriate locks, you can take a reference and the 28 * object itself is guaranteed to survive until the reference is dropped. 29 * 30 * LOCKING: 31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken 32 * in order as follows: 33 * 34 * group->mark_mutex 35 * mark->lock 36 * inode->i_lock 37 * 38 * group->mark_mutex protects the marks_list anchored inside a given group and 39 * each mark is hooked via the g_list. It also protects the groups private 40 * data (i.e group limits). 41 42 * mark->lock protects the marks attributes like its masks and flags. 43 * Furthermore it protects the access to a reference of the group that the mark 44 * is assigned to as well as the access to a reference of the inode/vfsmount 45 * that is being watched by the mark. 46 * 47 * inode->i_lock protects the i_fsnotify_marks list anchored inside a 48 * given inode and each mark is hooked via the i_list. (and sorta the 49 * free_i_list) 50 * 51 * 52 * LIFETIME: 53 * Inode marks survive between when they are added to an inode and when their 54 * refcnt==0. 55 * 56 * The inode mark can be cleared for a number of different reasons including: 57 * - The inode is unlinked for the last time. (fsnotify_inode_remove) 58 * - The inode is being evicted from cache. (fsnotify_inode_delete) 59 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) 60 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) 61 * - The fsnotify_group associated with the mark is going away and all such marks 62 * need to be cleaned up. (fsnotify_clear_marks_by_group) 63 * 64 * Worst case we are given an inode and need to clean up all the marks on that 65 * inode. We take i_lock and walk the i_fsnotify_marks safely. For each 66 * mark on the list we take a reference (so the mark can't disappear under us). 67 * We remove that mark form the inode's list of marks and we add this mark to a 68 * private list anchored on the stack using i_free_list; we walk i_free_list 69 * and before we destroy the mark we make sure that we dont race with a 70 * concurrent destroy_group by getting a ref to the marks group and taking the 71 * groups mutex. 72 73 * Very similarly for freeing by group, except we use free_g_list. 74 * 75 * This has the very interesting property of being able to run concurrently with 76 * any (or all) other directions. 77 */ 78 79 #include <linux/fs.h> 80 #include <linux/init.h> 81 #include <linux/kernel.h> 82 #include <linux/kthread.h> 83 #include <linux/module.h> 84 #include <linux/mutex.h> 85 #include <linux/slab.h> 86 #include <linux/spinlock.h> 87 #include <linux/srcu.h> 88 89 #include <linux/atomic.h> 90 91 #include <linux/fsnotify_backend.h> 92 #include "fsnotify.h" 93 94 struct srcu_struct fsnotify_mark_srcu; 95 96 void fsnotify_get_mark(struct fsnotify_mark *mark) 97 { 98 atomic_inc(&mark->refcnt); 99 } 100 101 void fsnotify_put_mark(struct fsnotify_mark *mark) 102 { 103 if (atomic_dec_and_test(&mark->refcnt)) { 104 if (mark->group) 105 fsnotify_put_group(mark->group); 106 mark->free_mark(mark); 107 } 108 } 109 110 /* Calculate mask of events for a list of marks */ 111 u32 fsnotify_recalc_mask(struct hlist_head *head) 112 { 113 u32 new_mask = 0; 114 struct fsnotify_mark *mark; 115 116 hlist_for_each_entry(mark, head, obj_list) 117 new_mask |= mark->mask; 118 return new_mask; 119 } 120 121 /* 122 * Remove mark from inode / vfsmount list, group list, drop inode reference 123 * if we got one. 124 * 125 * Must be called with group->mark_mutex held. 126 */ 127 void fsnotify_detach_mark(struct fsnotify_mark *mark) 128 { 129 struct inode *inode = NULL; 130 struct fsnotify_group *group = mark->group; 131 132 BUG_ON(!mutex_is_locked(&group->mark_mutex)); 133 134 spin_lock(&mark->lock); 135 136 /* something else already called this function on this mark */ 137 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 138 spin_unlock(&mark->lock); 139 return; 140 } 141 142 mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; 143 144 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { 145 inode = mark->inode; 146 fsnotify_destroy_inode_mark(mark); 147 } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) 148 fsnotify_destroy_vfsmount_mark(mark); 149 else 150 BUG(); 151 /* 152 * Note that we didn't update flags telling whether inode cares about 153 * what's happening with children. We update these flags from 154 * __fsnotify_parent() lazily when next event happens on one of our 155 * children. 156 */ 157 158 list_del_init(&mark->g_list); 159 160 spin_unlock(&mark->lock); 161 162 if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) 163 iput(inode); 164 165 atomic_dec(&group->num_marks); 166 } 167 168 static void 169 fsnotify_mark_free_rcu(struct rcu_head *rcu) 170 { 171 struct fsnotify_mark *mark; 172 173 mark = container_of(rcu, struct fsnotify_mark, g_rcu); 174 fsnotify_put_mark(mark); 175 } 176 177 /* 178 * Free fsnotify mark. The freeing is actually happening from a call_srcu 179 * callback. Caller must have a reference to the mark or be protected by 180 * fsnotify_mark_srcu. 181 */ 182 void fsnotify_free_mark(struct fsnotify_mark *mark) 183 { 184 struct fsnotify_group *group = mark->group; 185 186 spin_lock(&mark->lock); 187 /* something else already called this function on this mark */ 188 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { 189 spin_unlock(&mark->lock); 190 return; 191 } 192 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 193 spin_unlock(&mark->lock); 194 195 call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); 196 197 /* 198 * Some groups like to know that marks are being freed. This is a 199 * callback to the group function to let it know that this mark 200 * is being freed. 201 */ 202 if (group->ops->freeing_mark) 203 group->ops->freeing_mark(mark, group); 204 } 205 206 void fsnotify_destroy_mark(struct fsnotify_mark *mark, 207 struct fsnotify_group *group) 208 { 209 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 210 fsnotify_detach_mark(mark); 211 mutex_unlock(&group->mark_mutex); 212 fsnotify_free_mark(mark); 213 } 214 215 void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock) 216 { 217 struct fsnotify_mark *mark; 218 219 while (1) { 220 /* 221 * We have to be careful since we can race with e.g. 222 * fsnotify_clear_marks_by_group() and once we drop 'lock', 223 * mark can get removed from the obj_list and destroyed. But 224 * we are holding mark reference so mark cannot be freed and 225 * calling fsnotify_destroy_mark() more than once is fine. 226 */ 227 spin_lock(lock); 228 if (hlist_empty(head)) { 229 spin_unlock(lock); 230 break; 231 } 232 mark = hlist_entry(head->first, struct fsnotify_mark, obj_list); 233 /* 234 * We don't update i_fsnotify_mask / mnt_fsnotify_mask here 235 * since inode / mount is going away anyway. So just remove 236 * mark from the list. 237 */ 238 hlist_del_init_rcu(&mark->obj_list); 239 fsnotify_get_mark(mark); 240 spin_unlock(lock); 241 fsnotify_destroy_mark(mark, mark->group); 242 fsnotify_put_mark(mark); 243 } 244 } 245 246 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) 247 { 248 assert_spin_locked(&mark->lock); 249 250 mark->mask = mask; 251 252 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) 253 fsnotify_set_inode_mark_mask_locked(mark, mask); 254 } 255 256 void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask) 257 { 258 assert_spin_locked(&mark->lock); 259 260 mark->ignored_mask = mask; 261 } 262 263 /* 264 * Sorting function for lists of fsnotify marks. 265 * 266 * Fanotify supports different notification classes (reflected as priority of 267 * notification group). Events shall be passed to notification groups in 268 * decreasing priority order. To achieve this marks in notification lists for 269 * inodes and vfsmounts are sorted so that priorities of corresponding groups 270 * are descending. 271 * 272 * Furthermore correct handling of the ignore mask requires processing inode 273 * and vfsmount marks of each group together. Using the group address as 274 * further sort criterion provides a unique sorting order and thus we can 275 * merge inode and vfsmount lists of marks in linear time and find groups 276 * present in both lists. 277 * 278 * A return value of 1 signifies that b has priority over a. 279 * A return value of 0 signifies that the two marks have to be handled together. 280 * A return value of -1 signifies that a has priority over b. 281 */ 282 int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) 283 { 284 if (a == b) 285 return 0; 286 if (!a) 287 return 1; 288 if (!b) 289 return -1; 290 if (a->priority < b->priority) 291 return 1; 292 if (a->priority > b->priority) 293 return -1; 294 if (a < b) 295 return 1; 296 return -1; 297 } 298 299 /* Add mark into proper place in given list of marks */ 300 int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark, 301 int allow_dups) 302 { 303 struct fsnotify_mark *lmark, *last = NULL; 304 int cmp; 305 306 /* is mark the first mark? */ 307 if (hlist_empty(head)) { 308 hlist_add_head_rcu(&mark->obj_list, head); 309 return 0; 310 } 311 312 /* should mark be in the middle of the current list? */ 313 hlist_for_each_entry(lmark, head, obj_list) { 314 last = lmark; 315 316 if ((lmark->group == mark->group) && !allow_dups) 317 return -EEXIST; 318 319 cmp = fsnotify_compare_groups(lmark->group, mark->group); 320 if (cmp >= 0) { 321 hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); 322 return 0; 323 } 324 } 325 326 BUG_ON(last == NULL); 327 /* mark should be the last entry. last is the current last entry */ 328 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 329 return 0; 330 } 331 332 /* 333 * Attach an initialized mark to a given group and fs object. 334 * These marks may be used for the fsnotify backend to determine which 335 * event types should be delivered to which group. 336 */ 337 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, 338 struct fsnotify_group *group, struct inode *inode, 339 struct vfsmount *mnt, int allow_dups) 340 { 341 int ret = 0; 342 343 BUG_ON(inode && mnt); 344 BUG_ON(!inode && !mnt); 345 BUG_ON(!mutex_is_locked(&group->mark_mutex)); 346 347 /* 348 * LOCKING ORDER!!!! 349 * group->mark_mutex 350 * mark->lock 351 * inode->i_lock 352 */ 353 spin_lock(&mark->lock); 354 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; 355 356 fsnotify_get_group(group); 357 mark->group = group; 358 list_add(&mark->g_list, &group->marks_list); 359 atomic_inc(&group->num_marks); 360 fsnotify_get_mark(mark); /* for i_list and g_list */ 361 362 if (inode) { 363 ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups); 364 if (ret) 365 goto err; 366 } else if (mnt) { 367 ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups); 368 if (ret) 369 goto err; 370 } else { 371 BUG(); 372 } 373 374 /* this will pin the object if appropriate */ 375 fsnotify_set_mark_mask_locked(mark, mark->mask); 376 spin_unlock(&mark->lock); 377 378 if (inode) 379 __fsnotify_update_child_dentry_flags(inode); 380 381 return ret; 382 err: 383 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 384 list_del_init(&mark->g_list); 385 fsnotify_put_group(group); 386 mark->group = NULL; 387 atomic_dec(&group->num_marks); 388 389 spin_unlock(&mark->lock); 390 391 call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); 392 return ret; 393 } 394 395 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, 396 struct inode *inode, struct vfsmount *mnt, int allow_dups) 397 { 398 int ret; 399 mutex_lock(&group->mark_mutex); 400 ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups); 401 mutex_unlock(&group->mark_mutex); 402 return ret; 403 } 404 405 /* 406 * Given a list of marks, find the mark associated with given group. If found 407 * take a reference to that mark and return it, else return NULL. 408 */ 409 struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, 410 struct fsnotify_group *group) 411 { 412 struct fsnotify_mark *mark; 413 414 hlist_for_each_entry(mark, head, obj_list) { 415 if (mark->group == group) { 416 fsnotify_get_mark(mark); 417 return mark; 418 } 419 } 420 return NULL; 421 } 422 423 /* 424 * clear any marks in a group in which mark->flags & flags is true 425 */ 426 void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, 427 unsigned int flags) 428 { 429 struct fsnotify_mark *lmark, *mark; 430 LIST_HEAD(to_free); 431 432 /* 433 * We have to be really careful here. Anytime we drop mark_mutex, e.g. 434 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our 435 * to_free list so we have to use mark_mutex even when accessing that 436 * list. And freeing mark requires us to drop mark_mutex. So we can 437 * reliably free only the first mark in the list. That's why we first 438 * move marks to free to to_free list in one go and then free marks in 439 * to_free list one by one. 440 */ 441 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 442 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { 443 if (mark->flags & flags) 444 list_move(&mark->g_list, &to_free); 445 } 446 mutex_unlock(&group->mark_mutex); 447 448 while (1) { 449 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 450 if (list_empty(&to_free)) { 451 mutex_unlock(&group->mark_mutex); 452 break; 453 } 454 mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); 455 fsnotify_get_mark(mark); 456 fsnotify_detach_mark(mark); 457 mutex_unlock(&group->mark_mutex); 458 fsnotify_free_mark(mark); 459 fsnotify_put_mark(mark); 460 } 461 } 462 463 /* 464 * Given a group, destroy all of the marks associated with that group. 465 */ 466 void fsnotify_clear_marks_by_group(struct fsnotify_group *group) 467 { 468 fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1); 469 } 470 471 void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) 472 { 473 assert_spin_locked(&old->lock); 474 new->inode = old->inode; 475 new->mnt = old->mnt; 476 if (old->group) 477 fsnotify_get_group(old->group); 478 new->group = old->group; 479 new->mask = old->mask; 480 new->free_mark = old->free_mark; 481 } 482 483 /* 484 * Nothing fancy, just initialize lists and locks and counters. 485 */ 486 void fsnotify_init_mark(struct fsnotify_mark *mark, 487 void (*free_mark)(struct fsnotify_mark *mark)) 488 { 489 memset(mark, 0, sizeof(*mark)); 490 spin_lock_init(&mark->lock); 491 atomic_set(&mark->refcnt, 1); 492 mark->free_mark = free_mark; 493 } 494