1 /* 2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2, or (at your option) 7 * any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; see the file COPYING. If not, write to 16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 19 /* 20 * fsnotify inode mark locking/lifetime/and refcnting 21 * 22 * REFCNT: 23 * The group->recnt and mark->refcnt tell how many "things" in the kernel 24 * currently are referencing the objects. Both kind of objects typically will 25 * live inside the kernel with a refcnt of 2, one for its creation and one for 26 * the reference a group and a mark hold to each other. 27 * If you are holding the appropriate locks, you can take a reference and the 28 * object itself is guaranteed to survive until the reference is dropped. 29 * 30 * LOCKING: 31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken 32 * in order as follows: 33 * 34 * group->mark_mutex 35 * mark->lock 36 * inode->i_lock 37 * 38 * group->mark_mutex protects the marks_list anchored inside a given group and 39 * each mark is hooked via the g_list. It also protects the groups private 40 * data (i.e group limits). 41 42 * mark->lock protects the marks attributes like its masks and flags. 43 * Furthermore it protects the access to a reference of the group that the mark 44 * is assigned to as well as the access to a reference of the inode/vfsmount 45 * that is being watched by the mark. 46 * 47 * inode->i_lock protects the i_fsnotify_marks list anchored inside a 48 * given inode and each mark is hooked via the i_list. (and sorta the 49 * free_i_list) 50 * 51 * 52 * LIFETIME: 53 * Inode marks survive between when they are added to an inode and when their 54 * refcnt==0. 55 * 56 * The inode mark can be cleared for a number of different reasons including: 57 * - The inode is unlinked for the last time. (fsnotify_inode_remove) 58 * - The inode is being evicted from cache. (fsnotify_inode_delete) 59 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) 60 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) 61 * - The fsnotify_group associated with the mark is going away and all such marks 62 * need to be cleaned up. (fsnotify_clear_marks_by_group) 63 * 64 * Worst case we are given an inode and need to clean up all the marks on that 65 * inode. We take i_lock and walk the i_fsnotify_marks safely. For each 66 * mark on the list we take a reference (so the mark can't disappear under us). 67 * We remove that mark form the inode's list of marks and we add this mark to a 68 * private list anchored on the stack using i_free_list; we walk i_free_list 69 * and before we destroy the mark we make sure that we dont race with a 70 * concurrent destroy_group by getting a ref to the marks group and taking the 71 * groups mutex. 72 73 * Very similarly for freeing by group, except we use free_g_list. 74 * 75 * This has the very interesting property of being able to run concurrently with 76 * any (or all) other directions. 77 */ 78 79 #include <linux/fs.h> 80 #include <linux/init.h> 81 #include <linux/kernel.h> 82 #include <linux/kthread.h> 83 #include <linux/module.h> 84 #include <linux/mutex.h> 85 #include <linux/slab.h> 86 #include <linux/spinlock.h> 87 #include <linux/srcu.h> 88 89 #include <linux/atomic.h> 90 91 #include <linux/fsnotify_backend.h> 92 #include "fsnotify.h" 93 94 struct srcu_struct fsnotify_mark_srcu; 95 static DEFINE_SPINLOCK(destroy_lock); 96 static LIST_HEAD(destroy_list); 97 static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq); 98 99 void fsnotify_get_mark(struct fsnotify_mark *mark) 100 { 101 atomic_inc(&mark->refcnt); 102 } 103 104 void fsnotify_put_mark(struct fsnotify_mark *mark) 105 { 106 if (atomic_dec_and_test(&mark->refcnt)) { 107 if (mark->group) 108 fsnotify_put_group(mark->group); 109 mark->free_mark(mark); 110 } 111 } 112 113 /* 114 * Any time a mark is getting freed we end up here. 115 * The caller had better be holding a reference to this mark so we don't actually 116 * do the final put under the mark->lock 117 */ 118 void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, 119 struct fsnotify_group *group) 120 { 121 struct inode *inode = NULL; 122 123 BUG_ON(!mutex_is_locked(&group->mark_mutex)); 124 125 spin_lock(&mark->lock); 126 127 /* something else already called this function on this mark */ 128 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { 129 spin_unlock(&mark->lock); 130 return; 131 } 132 133 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 134 135 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { 136 inode = mark->i.inode; 137 fsnotify_destroy_inode_mark(mark); 138 } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) 139 fsnotify_destroy_vfsmount_mark(mark); 140 else 141 BUG(); 142 143 list_del_init(&mark->g_list); 144 145 spin_unlock(&mark->lock); 146 147 if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) 148 iput(inode); 149 /* release lock temporarily */ 150 mutex_unlock(&group->mark_mutex); 151 152 spin_lock(&destroy_lock); 153 list_add(&mark->destroy_list, &destroy_list); 154 spin_unlock(&destroy_lock); 155 wake_up(&destroy_waitq); 156 /* 157 * We don't necessarily have a ref on mark from caller so the above destroy 158 * may have actually freed it, unless this group provides a 'freeing_mark' 159 * function which must be holding a reference. 160 */ 161 162 /* 163 * Some groups like to know that marks are being freed. This is a 164 * callback to the group function to let it know that this mark 165 * is being freed. 166 */ 167 if (group->ops->freeing_mark) 168 group->ops->freeing_mark(mark, group); 169 170 /* 171 * __fsnotify_update_child_dentry_flags(inode); 172 * 173 * I really want to call that, but we can't, we have no idea if the inode 174 * still exists the second we drop the mark->lock. 175 * 176 * The next time an event arrive to this inode from one of it's children 177 * __fsnotify_parent will see that the inode doesn't care about it's 178 * children and will update all of these flags then. So really this 179 * is just a lazy update (and could be a perf win...) 180 */ 181 182 atomic_dec(&group->num_marks); 183 184 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 185 } 186 187 void fsnotify_destroy_mark(struct fsnotify_mark *mark, 188 struct fsnotify_group *group) 189 { 190 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 191 fsnotify_destroy_mark_locked(mark, group); 192 mutex_unlock(&group->mark_mutex); 193 } 194 195 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) 196 { 197 assert_spin_locked(&mark->lock); 198 199 mark->mask = mask; 200 201 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) 202 fsnotify_set_inode_mark_mask_locked(mark, mask); 203 } 204 205 void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask) 206 { 207 assert_spin_locked(&mark->lock); 208 209 mark->ignored_mask = mask; 210 } 211 212 /* 213 * Attach an initialized mark to a given group and fs object. 214 * These marks may be used for the fsnotify backend to determine which 215 * event types should be delivered to which group. 216 */ 217 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, 218 struct fsnotify_group *group, struct inode *inode, 219 struct vfsmount *mnt, int allow_dups) 220 { 221 int ret = 0; 222 223 BUG_ON(inode && mnt); 224 BUG_ON(!inode && !mnt); 225 BUG_ON(!mutex_is_locked(&group->mark_mutex)); 226 227 /* 228 * LOCKING ORDER!!!! 229 * group->mark_mutex 230 * mark->lock 231 * inode->i_lock 232 */ 233 spin_lock(&mark->lock); 234 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE; 235 236 fsnotify_get_group(group); 237 mark->group = group; 238 list_add(&mark->g_list, &group->marks_list); 239 atomic_inc(&group->num_marks); 240 fsnotify_get_mark(mark); /* for i_list and g_list */ 241 242 if (inode) { 243 ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups); 244 if (ret) 245 goto err; 246 } else if (mnt) { 247 ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups); 248 if (ret) 249 goto err; 250 } else { 251 BUG(); 252 } 253 254 /* this will pin the object if appropriate */ 255 fsnotify_set_mark_mask_locked(mark, mark->mask); 256 spin_unlock(&mark->lock); 257 258 if (inode) 259 __fsnotify_update_child_dentry_flags(inode); 260 261 return ret; 262 err: 263 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 264 list_del_init(&mark->g_list); 265 fsnotify_put_group(group); 266 mark->group = NULL; 267 atomic_dec(&group->num_marks); 268 269 spin_unlock(&mark->lock); 270 271 spin_lock(&destroy_lock); 272 list_add(&mark->destroy_list, &destroy_list); 273 spin_unlock(&destroy_lock); 274 wake_up(&destroy_waitq); 275 276 return ret; 277 } 278 279 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, 280 struct inode *inode, struct vfsmount *mnt, int allow_dups) 281 { 282 int ret; 283 mutex_lock(&group->mark_mutex); 284 ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups); 285 mutex_unlock(&group->mark_mutex); 286 return ret; 287 } 288 289 /* 290 * clear any marks in a group in which mark->flags & flags is true 291 */ 292 void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, 293 unsigned int flags) 294 { 295 struct fsnotify_mark *lmark, *mark; 296 297 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 298 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { 299 if (mark->flags & flags) { 300 fsnotify_get_mark(mark); 301 fsnotify_destroy_mark_locked(mark, group); 302 fsnotify_put_mark(mark); 303 } 304 } 305 mutex_unlock(&group->mark_mutex); 306 } 307 308 /* 309 * Given a group, destroy all of the marks associated with that group. 310 */ 311 void fsnotify_clear_marks_by_group(struct fsnotify_group *group) 312 { 313 fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1); 314 } 315 316 void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) 317 { 318 assert_spin_locked(&old->lock); 319 new->i.inode = old->i.inode; 320 new->m.mnt = old->m.mnt; 321 if (old->group) 322 fsnotify_get_group(old->group); 323 new->group = old->group; 324 new->mask = old->mask; 325 new->free_mark = old->free_mark; 326 } 327 328 /* 329 * Nothing fancy, just initialize lists and locks and counters. 330 */ 331 void fsnotify_init_mark(struct fsnotify_mark *mark, 332 void (*free_mark)(struct fsnotify_mark *mark)) 333 { 334 memset(mark, 0, sizeof(*mark)); 335 spin_lock_init(&mark->lock); 336 atomic_set(&mark->refcnt, 1); 337 mark->free_mark = free_mark; 338 } 339 340 static int fsnotify_mark_destroy(void *ignored) 341 { 342 struct fsnotify_mark *mark, *next; 343 struct list_head private_destroy_list; 344 345 for (;;) { 346 spin_lock(&destroy_lock); 347 /* exchange the list head */ 348 list_replace_init(&destroy_list, &private_destroy_list); 349 spin_unlock(&destroy_lock); 350 351 synchronize_srcu(&fsnotify_mark_srcu); 352 353 list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) { 354 list_del_init(&mark->destroy_list); 355 fsnotify_put_mark(mark); 356 } 357 358 wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list)); 359 } 360 361 return 0; 362 } 363 364 static int __init fsnotify_mark_init(void) 365 { 366 struct task_struct *thread; 367 368 thread = kthread_run(fsnotify_mark_destroy, NULL, 369 "fsnotify_mark"); 370 if (IS_ERR(thread)) 371 panic("unable to start fsnotify mark destruction thread."); 372 373 return 0; 374 } 375 device_initcall(fsnotify_mark_init); 376