xref: /openbmc/linux/fs/notify/fsnotify.c (revision b8d312aa)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
4  */
5 
6 #include <linux/dcache.h>
7 #include <linux/fs.h>
8 #include <linux/gfp.h>
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/srcu.h>
13 
14 #include <linux/fsnotify_backend.h>
15 #include "fsnotify.h"
16 
17 /*
18  * Clear all of the marks on an inode when it is being evicted from core
19  */
20 void __fsnotify_inode_delete(struct inode *inode)
21 {
22 	fsnotify_clear_marks_by_inode(inode);
23 }
24 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
25 
26 void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
27 {
28 	fsnotify_clear_marks_by_mount(mnt);
29 }
30 
31 /**
32  * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
33  * @sb: superblock being unmounted.
34  *
35  * Called during unmount with no locks held, so needs to be safe against
36  * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
37  */
38 static void fsnotify_unmount_inodes(struct super_block *sb)
39 {
40 	struct inode *inode, *iput_inode = NULL;
41 
42 	spin_lock(&sb->s_inode_list_lock);
43 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
44 		/*
45 		 * We cannot __iget() an inode in state I_FREEING,
46 		 * I_WILL_FREE, or I_NEW which is fine because by that point
47 		 * the inode cannot have any associated watches.
48 		 */
49 		spin_lock(&inode->i_lock);
50 		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
51 			spin_unlock(&inode->i_lock);
52 			continue;
53 		}
54 
55 		/*
56 		 * If i_count is zero, the inode cannot have any watches and
57 		 * doing an __iget/iput with SB_ACTIVE clear would actually
58 		 * evict all inodes with zero i_count from icache which is
59 		 * unnecessarily violent and may in fact be illegal to do.
60 		 */
61 		if (!atomic_read(&inode->i_count)) {
62 			spin_unlock(&inode->i_lock);
63 			continue;
64 		}
65 
66 		__iget(inode);
67 		spin_unlock(&inode->i_lock);
68 		spin_unlock(&sb->s_inode_list_lock);
69 
70 		if (iput_inode)
71 			iput(iput_inode);
72 
73 		/* for each watch, send FS_UNMOUNT and then remove it */
74 		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
75 
76 		fsnotify_inode_delete(inode);
77 
78 		iput_inode = inode;
79 
80 		spin_lock(&sb->s_inode_list_lock);
81 	}
82 	spin_unlock(&sb->s_inode_list_lock);
83 
84 	if (iput_inode)
85 		iput(iput_inode);
86 	/* Wait for outstanding inode references from connectors */
87 	wait_var_event(&sb->s_fsnotify_inode_refs,
88 		       !atomic_long_read(&sb->s_fsnotify_inode_refs));
89 }
90 
91 void fsnotify_sb_delete(struct super_block *sb)
92 {
93 	fsnotify_unmount_inodes(sb);
94 	fsnotify_clear_marks_by_sb(sb);
95 }
96 
97 /*
98  * Given an inode, first check if we care what happens to our children.  Inotify
99  * and dnotify both tell their parents about events.  If we care about any event
100  * on a child we run all of our children and set a dentry flag saying that the
101  * parent cares.  Thus when an event happens on a child it can quickly tell if
102  * if there is a need to find a parent and send the event to the parent.
103  */
104 void __fsnotify_update_child_dentry_flags(struct inode *inode)
105 {
106 	struct dentry *alias;
107 	int watched;
108 
109 	if (!S_ISDIR(inode->i_mode))
110 		return;
111 
112 	/* determine if the children should tell inode about their events */
113 	watched = fsnotify_inode_watches_children(inode);
114 
115 	spin_lock(&inode->i_lock);
116 	/* run all of the dentries associated with this inode.  Since this is a
117 	 * directory, there damn well better only be one item on this list */
118 	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
119 		struct dentry *child;
120 
121 		/* run all of the children of the original inode and fix their
122 		 * d_flags to indicate parental interest (their parent is the
123 		 * original inode) */
124 		spin_lock(&alias->d_lock);
125 		list_for_each_entry(child, &alias->d_subdirs, d_child) {
126 			if (!child->d_inode)
127 				continue;
128 
129 			spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
130 			if (watched)
131 				child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
132 			else
133 				child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
134 			spin_unlock(&child->d_lock);
135 		}
136 		spin_unlock(&alias->d_lock);
137 	}
138 	spin_unlock(&inode->i_lock);
139 }
140 
141 /* Notify this dentry's parent about a child's events. */
142 int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
143 {
144 	struct dentry *parent;
145 	struct inode *p_inode;
146 	int ret = 0;
147 
148 	if (!dentry)
149 		dentry = path->dentry;
150 
151 	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
152 		return 0;
153 
154 	parent = dget_parent(dentry);
155 	p_inode = parent->d_inode;
156 
157 	if (unlikely(!fsnotify_inode_watches_children(p_inode))) {
158 		__fsnotify_update_child_dentry_flags(p_inode);
159 	} else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) {
160 		struct name_snapshot name;
161 
162 		/* we are notifying a parent so come up with the new mask which
163 		 * specifies these are events which came from a child. */
164 		mask |= FS_EVENT_ON_CHILD;
165 
166 		take_dentry_name_snapshot(&name, dentry);
167 		if (path)
168 			ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
169 				       &name.name, 0);
170 		else
171 			ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
172 				       &name.name, 0);
173 		release_dentry_name_snapshot(&name);
174 	}
175 
176 	dput(parent);
177 
178 	return ret;
179 }
180 EXPORT_SYMBOL_GPL(__fsnotify_parent);
181 
182 static int send_to_group(struct inode *to_tell,
183 			 __u32 mask, const void *data,
184 			 int data_is, u32 cookie,
185 			 const struct qstr *file_name,
186 			 struct fsnotify_iter_info *iter_info)
187 {
188 	struct fsnotify_group *group = NULL;
189 	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
190 	__u32 marks_mask = 0;
191 	__u32 marks_ignored_mask = 0;
192 	struct fsnotify_mark *mark;
193 	int type;
194 
195 	if (WARN_ON(!iter_info->report_mask))
196 		return 0;
197 
198 	/* clear ignored on inode modification */
199 	if (mask & FS_MODIFY) {
200 		fsnotify_foreach_obj_type(type) {
201 			if (!fsnotify_iter_should_report_type(iter_info, type))
202 				continue;
203 			mark = iter_info->marks[type];
204 			if (mark &&
205 			    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
206 				mark->ignored_mask = 0;
207 		}
208 	}
209 
210 	fsnotify_foreach_obj_type(type) {
211 		if (!fsnotify_iter_should_report_type(iter_info, type))
212 			continue;
213 		mark = iter_info->marks[type];
214 		/* does the object mark tell us to do something? */
215 		if (mark) {
216 			group = mark->group;
217 			marks_mask |= mark->mask;
218 			marks_ignored_mask |= mark->ignored_mask;
219 		}
220 	}
221 
222 	pr_debug("%s: group=%p to_tell=%p mask=%x marks_mask=%x marks_ignored_mask=%x"
223 		 " data=%p data_is=%d cookie=%d\n",
224 		 __func__, group, to_tell, mask, marks_mask, marks_ignored_mask,
225 		 data, data_is, cookie);
226 
227 	if (!(test_mask & marks_mask & ~marks_ignored_mask))
228 		return 0;
229 
230 	return group->ops->handle_event(group, to_tell, mask, data, data_is,
231 					file_name, cookie, iter_info);
232 }
233 
234 static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
235 {
236 	struct fsnotify_mark_connector *conn;
237 	struct hlist_node *node = NULL;
238 
239 	conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
240 	if (conn)
241 		node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);
242 
243 	return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
244 }
245 
246 static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
247 {
248 	struct hlist_node *node = NULL;
249 
250 	if (mark)
251 		node = srcu_dereference(mark->obj_list.next,
252 					&fsnotify_mark_srcu);
253 
254 	return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
255 }
256 
257 /*
258  * iter_info is a multi head priority queue of marks.
259  * Pick a subset of marks from queue heads, all with the
260  * same group and set the report_mask for selected subset.
261  * Returns the report_mask of the selected subset.
262  */
263 static unsigned int fsnotify_iter_select_report_types(
264 		struct fsnotify_iter_info *iter_info)
265 {
266 	struct fsnotify_group *max_prio_group = NULL;
267 	struct fsnotify_mark *mark;
268 	int type;
269 
270 	/* Choose max prio group among groups of all queue heads */
271 	fsnotify_foreach_obj_type(type) {
272 		mark = iter_info->marks[type];
273 		if (mark &&
274 		    fsnotify_compare_groups(max_prio_group, mark->group) > 0)
275 			max_prio_group = mark->group;
276 	}
277 
278 	if (!max_prio_group)
279 		return 0;
280 
281 	/* Set the report mask for marks from same group as max prio group */
282 	iter_info->report_mask = 0;
283 	fsnotify_foreach_obj_type(type) {
284 		mark = iter_info->marks[type];
285 		if (mark &&
286 		    fsnotify_compare_groups(max_prio_group, mark->group) == 0)
287 			fsnotify_iter_set_report_type(iter_info, type);
288 	}
289 
290 	return iter_info->report_mask;
291 }
292 
293 /*
294  * Pop from iter_info multi head queue, the marks that were iterated in the
295  * current iteration step.
296  */
297 static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
298 {
299 	int type;
300 
301 	fsnotify_foreach_obj_type(type) {
302 		if (fsnotify_iter_should_report_type(iter_info, type))
303 			iter_info->marks[type] =
304 				fsnotify_next_mark(iter_info->marks[type]);
305 	}
306 }
307 
308 /*
309  * This is the main call to fsnotify.  The VFS calls into hook specific functions
310  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
311  * out to all of the registered fsnotify_group.  Those groups can then use the
312  * notification event in whatever means they feel necessary.
313  */
314 int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
315 	     const struct qstr *file_name, u32 cookie)
316 {
317 	struct fsnotify_iter_info iter_info = {};
318 	struct super_block *sb = to_tell->i_sb;
319 	struct mount *mnt = NULL;
320 	__u32 mnt_or_sb_mask = sb->s_fsnotify_mask;
321 	int ret = 0;
322 	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
323 
324 	if (data_is == FSNOTIFY_EVENT_PATH) {
325 		mnt = real_mount(((const struct path *)data)->mnt);
326 		mnt_or_sb_mask |= mnt->mnt_fsnotify_mask;
327 	}
328 	/* An event "on child" is not intended for a mount/sb mark */
329 	if (mask & FS_EVENT_ON_CHILD)
330 		mnt_or_sb_mask = 0;
331 
332 	/*
333 	 * Optimization: srcu_read_lock() has a memory barrier which can
334 	 * be expensive.  It protects walking the *_fsnotify_marks lists.
335 	 * However, if we do not walk the lists, we do not have to do
336 	 * SRCU because we have no references to any objects and do not
337 	 * need SRCU to keep them "alive".
338 	 */
339 	if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks &&
340 	    (!mnt || !mnt->mnt_fsnotify_marks))
341 		return 0;
342 	/*
343 	 * if this is a modify event we may need to clear the ignored masks
344 	 * otherwise return if neither the inode nor the vfsmount/sb care about
345 	 * this type of event.
346 	 */
347 	if (!(mask & FS_MODIFY) &&
348 	    !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask)))
349 		return 0;
350 
351 	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
352 
353 	iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
354 		fsnotify_first_mark(&to_tell->i_fsnotify_marks);
355 	iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
356 		fsnotify_first_mark(&sb->s_fsnotify_marks);
357 	if (mnt) {
358 		iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
359 			fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
360 	}
361 
362 	/*
363 	 * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
364 	 * ignore masks are properly reflected for mount/sb mark notifications.
365 	 * That's why this traversal is so complicated...
366 	 */
367 	while (fsnotify_iter_select_report_types(&iter_info)) {
368 		ret = send_to_group(to_tell, mask, data, data_is, cookie,
369 				    file_name, &iter_info);
370 
371 		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
372 			goto out;
373 
374 		fsnotify_iter_next(&iter_info);
375 	}
376 	ret = 0;
377 out:
378 	srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx);
379 
380 	return ret;
381 }
382 EXPORT_SYMBOL_GPL(fsnotify);
383 
384 extern struct kmem_cache *fsnotify_mark_connector_cachep;
385 
386 static __init int fsnotify_init(void)
387 {
388 	int ret;
389 
390 	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
391 
392 	ret = init_srcu_struct(&fsnotify_mark_srcu);
393 	if (ret)
394 		panic("initializing fsnotify_mark_srcu");
395 
396 	fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
397 						    SLAB_PANIC);
398 
399 	return 0;
400 }
401 core_initcall(fsnotify_init);
402