xref: /openbmc/linux/kernel/audit_tree.c (revision f7d84fa7)
1 #include "audit.h"
2 #include <linux/fsnotify_backend.h>
3 #include <linux/namei.h>
4 #include <linux/mount.h>
5 #include <linux/kthread.h>
6 #include <linux/refcount.h>
7 #include <linux/slab.h>
8 
9 struct audit_tree;
10 struct audit_chunk;
11 
12 struct audit_tree {
13 	refcount_t count;
14 	int goner;
15 	struct audit_chunk *root;
16 	struct list_head chunks;
17 	struct list_head rules;
18 	struct list_head list;
19 	struct list_head same_root;
20 	struct rcu_head head;
21 	char pathname[];
22 };
23 
24 struct audit_chunk {
25 	struct list_head hash;
26 	struct fsnotify_mark mark;
27 	struct list_head trees;		/* with root here */
28 	int dead;
29 	int count;
30 	atomic_long_t refs;
31 	struct rcu_head head;
32 	struct node {
33 		struct list_head list;
34 		struct audit_tree *owner;
35 		unsigned index;		/* index; upper bit indicates 'will prune' */
36 	} owners[];
37 };
38 
39 static LIST_HEAD(tree_list);
40 static LIST_HEAD(prune_list);
41 static struct task_struct *prune_thread;
42 
43 /*
44  * One struct chunk is attached to each inode of interest.
45  * We replace struct chunk on tagging/untagging.
46  * Rules have pointer to struct audit_tree.
47  * Rules have struct list_head rlist forming a list of rules over
48  * the same tree.
49  * References to struct chunk are collected at audit_inode{,_child}()
50  * time and used in AUDIT_TREE rule matching.
51  * These references are dropped at the same time we are calling
52  * audit_free_names(), etc.
53  *
54  * Cyclic lists galore:
55  * tree.chunks anchors chunk.owners[].list			hash_lock
56  * tree.rules anchors rule.rlist				audit_filter_mutex
57  * chunk.trees anchors tree.same_root				hash_lock
58  * chunk.hash is a hash with middle bits of watch.inode as
59  * a hash function.						RCU, hash_lock
60  *
61  * tree is refcounted; one reference for "some rules on rules_list refer to
62  * it", one for each chunk with pointer to it.
63  *
64  * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
65  * of watch contributes 1 to .refs).
66  *
67  * node.index allows to get from node.list to containing chunk.
68  * MSB of that sucker is stolen to mark taggings that we might have to
69  * revert - several operations have very unpleasant cleanup logics and
70  * that makes a difference.  Some.
71  */
72 
73 static struct fsnotify_group *audit_tree_group;
74 
75 static struct audit_tree *alloc_tree(const char *s)
76 {
77 	struct audit_tree *tree;
78 
79 	tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
80 	if (tree) {
81 		refcount_set(&tree->count, 1);
82 		tree->goner = 0;
83 		INIT_LIST_HEAD(&tree->chunks);
84 		INIT_LIST_HEAD(&tree->rules);
85 		INIT_LIST_HEAD(&tree->list);
86 		INIT_LIST_HEAD(&tree->same_root);
87 		tree->root = NULL;
88 		strcpy(tree->pathname, s);
89 	}
90 	return tree;
91 }
92 
93 static inline void get_tree(struct audit_tree *tree)
94 {
95 	refcount_inc(&tree->count);
96 }
97 
98 static inline void put_tree(struct audit_tree *tree)
99 {
100 	if (refcount_dec_and_test(&tree->count))
101 		kfree_rcu(tree, head);
102 }
103 
104 /* to avoid bringing the entire thing in audit.h */
105 const char *audit_tree_path(struct audit_tree *tree)
106 {
107 	return tree->pathname;
108 }
109 
110 static void free_chunk(struct audit_chunk *chunk)
111 {
112 	int i;
113 
114 	for (i = 0; i < chunk->count; i++) {
115 		if (chunk->owners[i].owner)
116 			put_tree(chunk->owners[i].owner);
117 	}
118 	kfree(chunk);
119 }
120 
121 void audit_put_chunk(struct audit_chunk *chunk)
122 {
123 	if (atomic_long_dec_and_test(&chunk->refs))
124 		free_chunk(chunk);
125 }
126 
127 static void __put_chunk(struct rcu_head *rcu)
128 {
129 	struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
130 	audit_put_chunk(chunk);
131 }
132 
133 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
134 {
135 	struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
136 	call_rcu(&chunk->head, __put_chunk);
137 }
138 
139 static struct audit_chunk *alloc_chunk(int count)
140 {
141 	struct audit_chunk *chunk;
142 	size_t size;
143 	int i;
144 
145 	size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
146 	chunk = kzalloc(size, GFP_KERNEL);
147 	if (!chunk)
148 		return NULL;
149 
150 	INIT_LIST_HEAD(&chunk->hash);
151 	INIT_LIST_HEAD(&chunk->trees);
152 	chunk->count = count;
153 	atomic_long_set(&chunk->refs, 1);
154 	for (i = 0; i < count; i++) {
155 		INIT_LIST_HEAD(&chunk->owners[i].list);
156 		chunk->owners[i].index = i;
157 	}
158 	fsnotify_init_mark(&chunk->mark, audit_tree_group);
159 	chunk->mark.mask = FS_IN_IGNORED;
160 	return chunk;
161 }
162 
163 enum {HASH_SIZE = 128};
164 static struct list_head chunk_hash_heads[HASH_SIZE];
165 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
166 
167 /* Function to return search key in our hash from inode. */
168 static unsigned long inode_to_key(const struct inode *inode)
169 {
170 	return (unsigned long)inode;
171 }
172 
173 /*
174  * Function to return search key in our hash from chunk. Key 0 is special and
175  * should never be present in the hash.
176  */
177 static unsigned long chunk_to_key(struct audit_chunk *chunk)
178 {
179 	/*
180 	 * We have a reference to the mark so it should be attached to a
181 	 * connector.
182 	 */
183 	if (WARN_ON_ONCE(!chunk->mark.connector))
184 		return 0;
185 	return (unsigned long)chunk->mark.connector->inode;
186 }
187 
188 static inline struct list_head *chunk_hash(unsigned long key)
189 {
190 	unsigned long n = key / L1_CACHE_BYTES;
191 	return chunk_hash_heads + n % HASH_SIZE;
192 }
193 
194 /* hash_lock & entry->lock is held by caller */
195 static void insert_hash(struct audit_chunk *chunk)
196 {
197 	unsigned long key = chunk_to_key(chunk);
198 	struct list_head *list;
199 
200 	if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
201 		return;
202 	list = chunk_hash(key);
203 	list_add_rcu(&chunk->hash, list);
204 }
205 
206 /* called under rcu_read_lock */
207 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
208 {
209 	unsigned long key = inode_to_key(inode);
210 	struct list_head *list = chunk_hash(key);
211 	struct audit_chunk *p;
212 
213 	list_for_each_entry_rcu(p, list, hash) {
214 		if (chunk_to_key(p) == key) {
215 			atomic_long_inc(&p->refs);
216 			return p;
217 		}
218 	}
219 	return NULL;
220 }
221 
222 bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
223 {
224 	int n;
225 	for (n = 0; n < chunk->count; n++)
226 		if (chunk->owners[n].owner == tree)
227 			return true;
228 	return false;
229 }
230 
231 /* tagging and untagging inodes with trees */
232 
233 static struct audit_chunk *find_chunk(struct node *p)
234 {
235 	int index = p->index & ~(1U<<31);
236 	p -= index;
237 	return container_of(p, struct audit_chunk, owners[0]);
238 }
239 
240 static void untag_chunk(struct node *p)
241 {
242 	struct audit_chunk *chunk = find_chunk(p);
243 	struct fsnotify_mark *entry = &chunk->mark;
244 	struct audit_chunk *new = NULL;
245 	struct audit_tree *owner;
246 	int size = chunk->count - 1;
247 	int i, j;
248 
249 	fsnotify_get_mark(entry);
250 
251 	spin_unlock(&hash_lock);
252 
253 	if (size)
254 		new = alloc_chunk(size);
255 
256 	mutex_lock(&entry->group->mark_mutex);
257 	spin_lock(&entry->lock);
258 	/*
259 	 * mark_mutex protects mark from getting detached and thus also from
260 	 * mark->connector->inode getting NULL.
261 	 */
262 	if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
263 		spin_unlock(&entry->lock);
264 		mutex_unlock(&entry->group->mark_mutex);
265 		if (new)
266 			fsnotify_put_mark(&new->mark);
267 		goto out;
268 	}
269 
270 	owner = p->owner;
271 
272 	if (!size) {
273 		chunk->dead = 1;
274 		spin_lock(&hash_lock);
275 		list_del_init(&chunk->trees);
276 		if (owner->root == chunk)
277 			owner->root = NULL;
278 		list_del_init(&p->list);
279 		list_del_rcu(&chunk->hash);
280 		spin_unlock(&hash_lock);
281 		spin_unlock(&entry->lock);
282 		mutex_unlock(&entry->group->mark_mutex);
283 		fsnotify_destroy_mark(entry, audit_tree_group);
284 		goto out;
285 	}
286 
287 	if (!new)
288 		goto Fallback;
289 
290 	if (fsnotify_add_mark_locked(&new->mark, entry->connector->inode,
291 				     NULL, 1)) {
292 		fsnotify_put_mark(&new->mark);
293 		goto Fallback;
294 	}
295 
296 	chunk->dead = 1;
297 	spin_lock(&hash_lock);
298 	list_replace_init(&chunk->trees, &new->trees);
299 	if (owner->root == chunk) {
300 		list_del_init(&owner->same_root);
301 		owner->root = NULL;
302 	}
303 
304 	for (i = j = 0; j <= size; i++, j++) {
305 		struct audit_tree *s;
306 		if (&chunk->owners[j] == p) {
307 			list_del_init(&p->list);
308 			i--;
309 			continue;
310 		}
311 		s = chunk->owners[j].owner;
312 		new->owners[i].owner = s;
313 		new->owners[i].index = chunk->owners[j].index - j + i;
314 		if (!s) /* result of earlier fallback */
315 			continue;
316 		get_tree(s);
317 		list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
318 	}
319 
320 	list_replace_rcu(&chunk->hash, &new->hash);
321 	list_for_each_entry(owner, &new->trees, same_root)
322 		owner->root = new;
323 	spin_unlock(&hash_lock);
324 	spin_unlock(&entry->lock);
325 	mutex_unlock(&entry->group->mark_mutex);
326 	fsnotify_destroy_mark(entry, audit_tree_group);
327 	fsnotify_put_mark(&new->mark);	/* drop initial reference */
328 	goto out;
329 
330 Fallback:
331 	// do the best we can
332 	spin_lock(&hash_lock);
333 	if (owner->root == chunk) {
334 		list_del_init(&owner->same_root);
335 		owner->root = NULL;
336 	}
337 	list_del_init(&p->list);
338 	p->owner = NULL;
339 	put_tree(owner);
340 	spin_unlock(&hash_lock);
341 	spin_unlock(&entry->lock);
342 	mutex_unlock(&entry->group->mark_mutex);
343 out:
344 	fsnotify_put_mark(entry);
345 	spin_lock(&hash_lock);
346 }
347 
348 static int create_chunk(struct inode *inode, struct audit_tree *tree)
349 {
350 	struct fsnotify_mark *entry;
351 	struct audit_chunk *chunk = alloc_chunk(1);
352 	if (!chunk)
353 		return -ENOMEM;
354 
355 	entry = &chunk->mark;
356 	if (fsnotify_add_mark(entry, inode, NULL, 0)) {
357 		fsnotify_put_mark(entry);
358 		return -ENOSPC;
359 	}
360 
361 	spin_lock(&entry->lock);
362 	spin_lock(&hash_lock);
363 	if (tree->goner) {
364 		spin_unlock(&hash_lock);
365 		chunk->dead = 1;
366 		spin_unlock(&entry->lock);
367 		fsnotify_destroy_mark(entry, audit_tree_group);
368 		fsnotify_put_mark(entry);
369 		return 0;
370 	}
371 	chunk->owners[0].index = (1U << 31);
372 	chunk->owners[0].owner = tree;
373 	get_tree(tree);
374 	list_add(&chunk->owners[0].list, &tree->chunks);
375 	if (!tree->root) {
376 		tree->root = chunk;
377 		list_add(&tree->same_root, &chunk->trees);
378 	}
379 	insert_hash(chunk);
380 	spin_unlock(&hash_lock);
381 	spin_unlock(&entry->lock);
382 	fsnotify_put_mark(entry);	/* drop initial reference */
383 	return 0;
384 }
385 
386 /* the first tagged inode becomes root of tree */
387 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
388 {
389 	struct fsnotify_mark *old_entry, *chunk_entry;
390 	struct audit_tree *owner;
391 	struct audit_chunk *chunk, *old;
392 	struct node *p;
393 	int n;
394 
395 	old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
396 				       audit_tree_group);
397 	if (!old_entry)
398 		return create_chunk(inode, tree);
399 
400 	old = container_of(old_entry, struct audit_chunk, mark);
401 
402 	/* are we already there? */
403 	spin_lock(&hash_lock);
404 	for (n = 0; n < old->count; n++) {
405 		if (old->owners[n].owner == tree) {
406 			spin_unlock(&hash_lock);
407 			fsnotify_put_mark(old_entry);
408 			return 0;
409 		}
410 	}
411 	spin_unlock(&hash_lock);
412 
413 	chunk = alloc_chunk(old->count + 1);
414 	if (!chunk) {
415 		fsnotify_put_mark(old_entry);
416 		return -ENOMEM;
417 	}
418 
419 	chunk_entry = &chunk->mark;
420 
421 	mutex_lock(&old_entry->group->mark_mutex);
422 	spin_lock(&old_entry->lock);
423 	/*
424 	 * mark_mutex protects mark from getting detached and thus also from
425 	 * mark->connector->inode getting NULL.
426 	 */
427 	if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
428 		/* old_entry is being shot, lets just lie */
429 		spin_unlock(&old_entry->lock);
430 		mutex_unlock(&old_entry->group->mark_mutex);
431 		fsnotify_put_mark(old_entry);
432 		fsnotify_put_mark(&chunk->mark);
433 		return -ENOENT;
434 	}
435 
436 	if (fsnotify_add_mark_locked(chunk_entry,
437 			     old_entry->connector->inode, NULL, 1)) {
438 		spin_unlock(&old_entry->lock);
439 		mutex_unlock(&old_entry->group->mark_mutex);
440 		fsnotify_put_mark(chunk_entry);
441 		fsnotify_put_mark(old_entry);
442 		return -ENOSPC;
443 	}
444 
445 	/* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
446 	spin_lock(&chunk_entry->lock);
447 	spin_lock(&hash_lock);
448 
449 	/* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
450 	if (tree->goner) {
451 		spin_unlock(&hash_lock);
452 		chunk->dead = 1;
453 		spin_unlock(&chunk_entry->lock);
454 		spin_unlock(&old_entry->lock);
455 		mutex_unlock(&old_entry->group->mark_mutex);
456 
457 		fsnotify_destroy_mark(chunk_entry, audit_tree_group);
458 
459 		fsnotify_put_mark(chunk_entry);
460 		fsnotify_put_mark(old_entry);
461 		return 0;
462 	}
463 	list_replace_init(&old->trees, &chunk->trees);
464 	for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
465 		struct audit_tree *s = old->owners[n].owner;
466 		p->owner = s;
467 		p->index = old->owners[n].index;
468 		if (!s) /* result of fallback in untag */
469 			continue;
470 		get_tree(s);
471 		list_replace_init(&old->owners[n].list, &p->list);
472 	}
473 	p->index = (chunk->count - 1) | (1U<<31);
474 	p->owner = tree;
475 	get_tree(tree);
476 	list_add(&p->list, &tree->chunks);
477 	list_replace_rcu(&old->hash, &chunk->hash);
478 	list_for_each_entry(owner, &chunk->trees, same_root)
479 		owner->root = chunk;
480 	old->dead = 1;
481 	if (!tree->root) {
482 		tree->root = chunk;
483 		list_add(&tree->same_root, &chunk->trees);
484 	}
485 	spin_unlock(&hash_lock);
486 	spin_unlock(&chunk_entry->lock);
487 	spin_unlock(&old_entry->lock);
488 	mutex_unlock(&old_entry->group->mark_mutex);
489 	fsnotify_destroy_mark(old_entry, audit_tree_group);
490 	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
491 	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
492 	return 0;
493 }
494 
495 static void audit_tree_log_remove_rule(struct audit_krule *rule)
496 {
497 	struct audit_buffer *ab;
498 
499 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
500 	if (unlikely(!ab))
501 		return;
502 	audit_log_format(ab, "op=remove_rule");
503 	audit_log_format(ab, " dir=");
504 	audit_log_untrustedstring(ab, rule->tree->pathname);
505 	audit_log_key(ab, rule->filterkey);
506 	audit_log_format(ab, " list=%d res=1", rule->listnr);
507 	audit_log_end(ab);
508 }
509 
510 static void kill_rules(struct audit_tree *tree)
511 {
512 	struct audit_krule *rule, *next;
513 	struct audit_entry *entry;
514 
515 	list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
516 		entry = container_of(rule, struct audit_entry, rule);
517 
518 		list_del_init(&rule->rlist);
519 		if (rule->tree) {
520 			/* not a half-baked one */
521 			audit_tree_log_remove_rule(rule);
522 			if (entry->rule.exe)
523 				audit_remove_mark(entry->rule.exe);
524 			rule->tree = NULL;
525 			list_del_rcu(&entry->list);
526 			list_del(&entry->rule.list);
527 			call_rcu(&entry->rcu, audit_free_rule_rcu);
528 		}
529 	}
530 }
531 
532 /*
533  * finish killing struct audit_tree
534  */
535 static void prune_one(struct audit_tree *victim)
536 {
537 	spin_lock(&hash_lock);
538 	while (!list_empty(&victim->chunks)) {
539 		struct node *p;
540 
541 		p = list_entry(victim->chunks.next, struct node, list);
542 
543 		untag_chunk(p);
544 	}
545 	spin_unlock(&hash_lock);
546 	put_tree(victim);
547 }
548 
549 /* trim the uncommitted chunks from tree */
550 
551 static void trim_marked(struct audit_tree *tree)
552 {
553 	struct list_head *p, *q;
554 	spin_lock(&hash_lock);
555 	if (tree->goner) {
556 		spin_unlock(&hash_lock);
557 		return;
558 	}
559 	/* reorder */
560 	for (p = tree->chunks.next; p != &tree->chunks; p = q) {
561 		struct node *node = list_entry(p, struct node, list);
562 		q = p->next;
563 		if (node->index & (1U<<31)) {
564 			list_del_init(p);
565 			list_add(p, &tree->chunks);
566 		}
567 	}
568 
569 	while (!list_empty(&tree->chunks)) {
570 		struct node *node;
571 
572 		node = list_entry(tree->chunks.next, struct node, list);
573 
574 		/* have we run out of marked? */
575 		if (!(node->index & (1U<<31)))
576 			break;
577 
578 		untag_chunk(node);
579 	}
580 	if (!tree->root && !tree->goner) {
581 		tree->goner = 1;
582 		spin_unlock(&hash_lock);
583 		mutex_lock(&audit_filter_mutex);
584 		kill_rules(tree);
585 		list_del_init(&tree->list);
586 		mutex_unlock(&audit_filter_mutex);
587 		prune_one(tree);
588 	} else {
589 		spin_unlock(&hash_lock);
590 	}
591 }
592 
593 static void audit_schedule_prune(void);
594 
595 /* called with audit_filter_mutex */
596 int audit_remove_tree_rule(struct audit_krule *rule)
597 {
598 	struct audit_tree *tree;
599 	tree = rule->tree;
600 	if (tree) {
601 		spin_lock(&hash_lock);
602 		list_del_init(&rule->rlist);
603 		if (list_empty(&tree->rules) && !tree->goner) {
604 			tree->root = NULL;
605 			list_del_init(&tree->same_root);
606 			tree->goner = 1;
607 			list_move(&tree->list, &prune_list);
608 			rule->tree = NULL;
609 			spin_unlock(&hash_lock);
610 			audit_schedule_prune();
611 			return 1;
612 		}
613 		rule->tree = NULL;
614 		spin_unlock(&hash_lock);
615 		return 1;
616 	}
617 	return 0;
618 }
619 
620 static int compare_root(struct vfsmount *mnt, void *arg)
621 {
622 	return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
623 	       (unsigned long)arg;
624 }
625 
626 void audit_trim_trees(void)
627 {
628 	struct list_head cursor;
629 
630 	mutex_lock(&audit_filter_mutex);
631 	list_add(&cursor, &tree_list);
632 	while (cursor.next != &tree_list) {
633 		struct audit_tree *tree;
634 		struct path path;
635 		struct vfsmount *root_mnt;
636 		struct node *node;
637 		int err;
638 
639 		tree = container_of(cursor.next, struct audit_tree, list);
640 		get_tree(tree);
641 		list_del(&cursor);
642 		list_add(&cursor, &tree->list);
643 		mutex_unlock(&audit_filter_mutex);
644 
645 		err = kern_path(tree->pathname, 0, &path);
646 		if (err)
647 			goto skip_it;
648 
649 		root_mnt = collect_mounts(&path);
650 		path_put(&path);
651 		if (IS_ERR(root_mnt))
652 			goto skip_it;
653 
654 		spin_lock(&hash_lock);
655 		list_for_each_entry(node, &tree->chunks, list) {
656 			struct audit_chunk *chunk = find_chunk(node);
657 			/* this could be NULL if the watch is dying else where... */
658 			node->index |= 1U<<31;
659 			if (iterate_mounts(compare_root,
660 					   (void *)chunk_to_key(chunk),
661 					   root_mnt))
662 				node->index &= ~(1U<<31);
663 		}
664 		spin_unlock(&hash_lock);
665 		trim_marked(tree);
666 		drop_collected_mounts(root_mnt);
667 skip_it:
668 		put_tree(tree);
669 		mutex_lock(&audit_filter_mutex);
670 	}
671 	list_del(&cursor);
672 	mutex_unlock(&audit_filter_mutex);
673 }
674 
675 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
676 {
677 
678 	if (pathname[0] != '/' ||
679 	    rule->listnr != AUDIT_FILTER_EXIT ||
680 	    op != Audit_equal ||
681 	    rule->inode_f || rule->watch || rule->tree)
682 		return -EINVAL;
683 	rule->tree = alloc_tree(pathname);
684 	if (!rule->tree)
685 		return -ENOMEM;
686 	return 0;
687 }
688 
689 void audit_put_tree(struct audit_tree *tree)
690 {
691 	put_tree(tree);
692 }
693 
694 static int tag_mount(struct vfsmount *mnt, void *arg)
695 {
696 	return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
697 }
698 
699 /*
700  * That gets run when evict_chunk() ends up needing to kill audit_tree.
701  * Runs from a separate thread.
702  */
703 static int prune_tree_thread(void *unused)
704 {
705 	for (;;) {
706 		if (list_empty(&prune_list)) {
707 			set_current_state(TASK_INTERRUPTIBLE);
708 			schedule();
709 		}
710 
711 		mutex_lock(&audit_cmd_mutex);
712 		mutex_lock(&audit_filter_mutex);
713 
714 		while (!list_empty(&prune_list)) {
715 			struct audit_tree *victim;
716 
717 			victim = list_entry(prune_list.next,
718 					struct audit_tree, list);
719 			list_del_init(&victim->list);
720 
721 			mutex_unlock(&audit_filter_mutex);
722 
723 			prune_one(victim);
724 
725 			mutex_lock(&audit_filter_mutex);
726 		}
727 
728 		mutex_unlock(&audit_filter_mutex);
729 		mutex_unlock(&audit_cmd_mutex);
730 	}
731 	return 0;
732 }
733 
734 static int audit_launch_prune(void)
735 {
736 	if (prune_thread)
737 		return 0;
738 	prune_thread = kthread_run(prune_tree_thread, NULL,
739 				"audit_prune_tree");
740 	if (IS_ERR(prune_thread)) {
741 		pr_err("cannot start thread audit_prune_tree");
742 		prune_thread = NULL;
743 		return -ENOMEM;
744 	}
745 	return 0;
746 }
747 
748 /* called with audit_filter_mutex */
749 int audit_add_tree_rule(struct audit_krule *rule)
750 {
751 	struct audit_tree *seed = rule->tree, *tree;
752 	struct path path;
753 	struct vfsmount *mnt;
754 	int err;
755 
756 	rule->tree = NULL;
757 	list_for_each_entry(tree, &tree_list, list) {
758 		if (!strcmp(seed->pathname, tree->pathname)) {
759 			put_tree(seed);
760 			rule->tree = tree;
761 			list_add(&rule->rlist, &tree->rules);
762 			return 0;
763 		}
764 	}
765 	tree = seed;
766 	list_add(&tree->list, &tree_list);
767 	list_add(&rule->rlist, &tree->rules);
768 	/* do not set rule->tree yet */
769 	mutex_unlock(&audit_filter_mutex);
770 
771 	if (unlikely(!prune_thread)) {
772 		err = audit_launch_prune();
773 		if (err)
774 			goto Err;
775 	}
776 
777 	err = kern_path(tree->pathname, 0, &path);
778 	if (err)
779 		goto Err;
780 	mnt = collect_mounts(&path);
781 	path_put(&path);
782 	if (IS_ERR(mnt)) {
783 		err = PTR_ERR(mnt);
784 		goto Err;
785 	}
786 
787 	get_tree(tree);
788 	err = iterate_mounts(tag_mount, tree, mnt);
789 	drop_collected_mounts(mnt);
790 
791 	if (!err) {
792 		struct node *node;
793 		spin_lock(&hash_lock);
794 		list_for_each_entry(node, &tree->chunks, list)
795 			node->index &= ~(1U<<31);
796 		spin_unlock(&hash_lock);
797 	} else {
798 		trim_marked(tree);
799 		goto Err;
800 	}
801 
802 	mutex_lock(&audit_filter_mutex);
803 	if (list_empty(&rule->rlist)) {
804 		put_tree(tree);
805 		return -ENOENT;
806 	}
807 	rule->tree = tree;
808 	put_tree(tree);
809 
810 	return 0;
811 Err:
812 	mutex_lock(&audit_filter_mutex);
813 	list_del_init(&tree->list);
814 	list_del_init(&tree->rules);
815 	put_tree(tree);
816 	return err;
817 }
818 
819 int audit_tag_tree(char *old, char *new)
820 {
821 	struct list_head cursor, barrier;
822 	int failed = 0;
823 	struct path path1, path2;
824 	struct vfsmount *tagged;
825 	int err;
826 
827 	err = kern_path(new, 0, &path2);
828 	if (err)
829 		return err;
830 	tagged = collect_mounts(&path2);
831 	path_put(&path2);
832 	if (IS_ERR(tagged))
833 		return PTR_ERR(tagged);
834 
835 	err = kern_path(old, 0, &path1);
836 	if (err) {
837 		drop_collected_mounts(tagged);
838 		return err;
839 	}
840 
841 	mutex_lock(&audit_filter_mutex);
842 	list_add(&barrier, &tree_list);
843 	list_add(&cursor, &barrier);
844 
845 	while (cursor.next != &tree_list) {
846 		struct audit_tree *tree;
847 		int good_one = 0;
848 
849 		tree = container_of(cursor.next, struct audit_tree, list);
850 		get_tree(tree);
851 		list_del(&cursor);
852 		list_add(&cursor, &tree->list);
853 		mutex_unlock(&audit_filter_mutex);
854 
855 		err = kern_path(tree->pathname, 0, &path2);
856 		if (!err) {
857 			good_one = path_is_under(&path1, &path2);
858 			path_put(&path2);
859 		}
860 
861 		if (!good_one) {
862 			put_tree(tree);
863 			mutex_lock(&audit_filter_mutex);
864 			continue;
865 		}
866 
867 		failed = iterate_mounts(tag_mount, tree, tagged);
868 		if (failed) {
869 			put_tree(tree);
870 			mutex_lock(&audit_filter_mutex);
871 			break;
872 		}
873 
874 		mutex_lock(&audit_filter_mutex);
875 		spin_lock(&hash_lock);
876 		if (!tree->goner) {
877 			list_del(&tree->list);
878 			list_add(&tree->list, &tree_list);
879 		}
880 		spin_unlock(&hash_lock);
881 		put_tree(tree);
882 	}
883 
884 	while (barrier.prev != &tree_list) {
885 		struct audit_tree *tree;
886 
887 		tree = container_of(barrier.prev, struct audit_tree, list);
888 		get_tree(tree);
889 		list_del(&tree->list);
890 		list_add(&tree->list, &barrier);
891 		mutex_unlock(&audit_filter_mutex);
892 
893 		if (!failed) {
894 			struct node *node;
895 			spin_lock(&hash_lock);
896 			list_for_each_entry(node, &tree->chunks, list)
897 				node->index &= ~(1U<<31);
898 			spin_unlock(&hash_lock);
899 		} else {
900 			trim_marked(tree);
901 		}
902 
903 		put_tree(tree);
904 		mutex_lock(&audit_filter_mutex);
905 	}
906 	list_del(&barrier);
907 	list_del(&cursor);
908 	mutex_unlock(&audit_filter_mutex);
909 	path_put(&path1);
910 	drop_collected_mounts(tagged);
911 	return failed;
912 }
913 
914 
915 static void audit_schedule_prune(void)
916 {
917 	wake_up_process(prune_thread);
918 }
919 
920 /*
921  * ... and that one is done if evict_chunk() decides to delay until the end
922  * of syscall.  Runs synchronously.
923  */
924 void audit_kill_trees(struct list_head *list)
925 {
926 	mutex_lock(&audit_cmd_mutex);
927 	mutex_lock(&audit_filter_mutex);
928 
929 	while (!list_empty(list)) {
930 		struct audit_tree *victim;
931 
932 		victim = list_entry(list->next, struct audit_tree, list);
933 		kill_rules(victim);
934 		list_del_init(&victim->list);
935 
936 		mutex_unlock(&audit_filter_mutex);
937 
938 		prune_one(victim);
939 
940 		mutex_lock(&audit_filter_mutex);
941 	}
942 
943 	mutex_unlock(&audit_filter_mutex);
944 	mutex_unlock(&audit_cmd_mutex);
945 }
946 
947 /*
948  *  Here comes the stuff asynchronous to auditctl operations
949  */
950 
951 static void evict_chunk(struct audit_chunk *chunk)
952 {
953 	struct audit_tree *owner;
954 	struct list_head *postponed = audit_killed_trees();
955 	int need_prune = 0;
956 	int n;
957 
958 	if (chunk->dead)
959 		return;
960 
961 	chunk->dead = 1;
962 	mutex_lock(&audit_filter_mutex);
963 	spin_lock(&hash_lock);
964 	while (!list_empty(&chunk->trees)) {
965 		owner = list_entry(chunk->trees.next,
966 				   struct audit_tree, same_root);
967 		owner->goner = 1;
968 		owner->root = NULL;
969 		list_del_init(&owner->same_root);
970 		spin_unlock(&hash_lock);
971 		if (!postponed) {
972 			kill_rules(owner);
973 			list_move(&owner->list, &prune_list);
974 			need_prune = 1;
975 		} else {
976 			list_move(&owner->list, postponed);
977 		}
978 		spin_lock(&hash_lock);
979 	}
980 	list_del_rcu(&chunk->hash);
981 	for (n = 0; n < chunk->count; n++)
982 		list_del_init(&chunk->owners[n].list);
983 	spin_unlock(&hash_lock);
984 	mutex_unlock(&audit_filter_mutex);
985 	if (need_prune)
986 		audit_schedule_prune();
987 }
988 
989 static int audit_tree_handle_event(struct fsnotify_group *group,
990 				   struct inode *to_tell,
991 				   struct fsnotify_mark *inode_mark,
992 				   struct fsnotify_mark *vfsmount_mark,
993 				   u32 mask, const void *data, int data_type,
994 				   const unsigned char *file_name, u32 cookie,
995 				   struct fsnotify_iter_info *iter_info)
996 {
997 	return 0;
998 }
999 
1000 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
1001 {
1002 	struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
1003 
1004 	evict_chunk(chunk);
1005 
1006 	/*
1007 	 * We are guaranteed to have at least one reference to the mark from
1008 	 * either the inode or the caller of fsnotify_destroy_mark().
1009 	 */
1010 	BUG_ON(atomic_read(&entry->refcnt) < 1);
1011 }
1012 
1013 static const struct fsnotify_ops audit_tree_ops = {
1014 	.handle_event = audit_tree_handle_event,
1015 	.freeing_mark = audit_tree_freeing_mark,
1016 	.free_mark = audit_tree_destroy_watch,
1017 };
1018 
1019 static int __init audit_tree_init(void)
1020 {
1021 	int i;
1022 
1023 	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
1024 	if (IS_ERR(audit_tree_group))
1025 		audit_panic("cannot initialize fsnotify group for rectree watches");
1026 
1027 	for (i = 0; i < HASH_SIZE; i++)
1028 		INIT_LIST_HEAD(&chunk_hash_heads[i]);
1029 
1030 	return 0;
1031 }
1032 __initcall(audit_tree_init);
1033