xref: /linux-6.15/kernel/audit_tree.c (revision 8cd0feb5)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "audit.h"
3 #include <linux/fsnotify_backend.h>
4 #include <linux/namei.h>
5 #include <linux/mount.h>
6 #include <linux/kthread.h>
7 #include <linux/refcount.h>
8 #include <linux/slab.h>
9 
10 struct audit_tree;
11 struct audit_chunk;
12 
13 struct audit_tree {
14 	refcount_t count;
15 	int goner;
16 	struct audit_chunk *root;
17 	struct list_head chunks;
18 	struct list_head rules;
19 	struct list_head list;
20 	struct list_head same_root;
21 	struct rcu_head head;
22 	char pathname[];
23 };
24 
25 struct audit_chunk {
26 	struct list_head hash;
27 	unsigned long key;
28 	struct fsnotify_mark mark;
29 	struct list_head trees;		/* with root here */
30 	int dead;
31 	int count;
32 	atomic_long_t refs;
33 	struct rcu_head head;
34 	struct node {
35 		struct list_head list;
36 		struct audit_tree *owner;
37 		unsigned index;		/* index; upper bit indicates 'will prune' */
38 	} owners[];
39 };
40 
41 static LIST_HEAD(tree_list);
42 static LIST_HEAD(prune_list);
43 static struct task_struct *prune_thread;
44 
45 /*
46  * One struct chunk is attached to each inode of interest.
47  * We replace struct chunk on tagging/untagging.
48  * Rules have pointer to struct audit_tree.
49  * Rules have struct list_head rlist forming a list of rules over
50  * the same tree.
51  * References to struct chunk are collected at audit_inode{,_child}()
52  * time and used in AUDIT_TREE rule matching.
53  * These references are dropped at the same time we are calling
54  * audit_free_names(), etc.
55  *
56  * Cyclic lists galore:
57  * tree.chunks anchors chunk.owners[].list			hash_lock
58  * tree.rules anchors rule.rlist				audit_filter_mutex
59  * chunk.trees anchors tree.same_root				hash_lock
60  * chunk.hash is a hash with middle bits of watch.inode as
61  * a hash function.						RCU, hash_lock
62  *
63  * tree is refcounted; one reference for "some rules on rules_list refer to
64  * it", one for each chunk with pointer to it.
65  *
66  * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
67  * of watch contributes 1 to .refs).
68  *
69  * node.index allows to get from node.list to containing chunk.
70  * MSB of that sucker is stolen to mark taggings that we might have to
71  * revert - several operations have very unpleasant cleanup logics and
72  * that makes a difference.  Some.
73  */
74 
75 static struct fsnotify_group *audit_tree_group;
76 
77 static struct audit_tree *alloc_tree(const char *s)
78 {
79 	struct audit_tree *tree;
80 
81 	tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
82 	if (tree) {
83 		refcount_set(&tree->count, 1);
84 		tree->goner = 0;
85 		INIT_LIST_HEAD(&tree->chunks);
86 		INIT_LIST_HEAD(&tree->rules);
87 		INIT_LIST_HEAD(&tree->list);
88 		INIT_LIST_HEAD(&tree->same_root);
89 		tree->root = NULL;
90 		strcpy(tree->pathname, s);
91 	}
92 	return tree;
93 }
94 
95 static inline void get_tree(struct audit_tree *tree)
96 {
97 	refcount_inc(&tree->count);
98 }
99 
100 static inline void put_tree(struct audit_tree *tree)
101 {
102 	if (refcount_dec_and_test(&tree->count))
103 		kfree_rcu(tree, head);
104 }
105 
106 /* to avoid bringing the entire thing in audit.h */
107 const char *audit_tree_path(struct audit_tree *tree)
108 {
109 	return tree->pathname;
110 }
111 
112 static void free_chunk(struct audit_chunk *chunk)
113 {
114 	int i;
115 
116 	for (i = 0; i < chunk->count; i++) {
117 		if (chunk->owners[i].owner)
118 			put_tree(chunk->owners[i].owner);
119 	}
120 	kfree(chunk);
121 }
122 
123 void audit_put_chunk(struct audit_chunk *chunk)
124 {
125 	if (atomic_long_dec_and_test(&chunk->refs))
126 		free_chunk(chunk);
127 }
128 
129 static void __put_chunk(struct rcu_head *rcu)
130 {
131 	struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
132 	audit_put_chunk(chunk);
133 }
134 
135 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
136 {
137 	struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
138 	call_rcu(&chunk->head, __put_chunk);
139 }
140 
141 static struct audit_chunk *alloc_chunk(int count)
142 {
143 	struct audit_chunk *chunk;
144 	size_t size;
145 	int i;
146 
147 	size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
148 	chunk = kzalloc(size, GFP_KERNEL);
149 	if (!chunk)
150 		return NULL;
151 
152 	INIT_LIST_HEAD(&chunk->hash);
153 	INIT_LIST_HEAD(&chunk->trees);
154 	chunk->count = count;
155 	atomic_long_set(&chunk->refs, 1);
156 	for (i = 0; i < count; i++) {
157 		INIT_LIST_HEAD(&chunk->owners[i].list);
158 		chunk->owners[i].index = i;
159 	}
160 	fsnotify_init_mark(&chunk->mark, audit_tree_group);
161 	chunk->mark.mask = FS_IN_IGNORED;
162 	return chunk;
163 }
164 
165 enum {HASH_SIZE = 128};
166 static struct list_head chunk_hash_heads[HASH_SIZE];
167 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
168 
169 /* Function to return search key in our hash from inode. */
170 static unsigned long inode_to_key(const struct inode *inode)
171 {
172 	/* Use address pointed to by connector->obj as the key */
173 	return (unsigned long)&inode->i_fsnotify_marks;
174 }
175 
176 static inline struct list_head *chunk_hash(unsigned long key)
177 {
178 	unsigned long n = key / L1_CACHE_BYTES;
179 	return chunk_hash_heads + n % HASH_SIZE;
180 }
181 
182 /* hash_lock & entry->group->mark_mutex is held by caller */
183 static void insert_hash(struct audit_chunk *chunk)
184 {
185 	struct list_head *list;
186 
187 	/*
188 	 * Make sure chunk is fully initialized before making it visible in the
189 	 * hash. Pairs with a data dependency barrier in READ_ONCE() in
190 	 * audit_tree_lookup().
191 	 */
192 	smp_wmb();
193 	WARN_ON_ONCE(!chunk->key);
194 	list = chunk_hash(chunk->key);
195 	list_add_rcu(&chunk->hash, list);
196 }
197 
198 /* called under rcu_read_lock */
199 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
200 {
201 	unsigned long key = inode_to_key(inode);
202 	struct list_head *list = chunk_hash(key);
203 	struct audit_chunk *p;
204 
205 	list_for_each_entry_rcu(p, list, hash) {
206 		/*
207 		 * We use a data dependency barrier in READ_ONCE() to make sure
208 		 * the chunk we see is fully initialized.
209 		 */
210 		if (READ_ONCE(p->key) == key) {
211 			atomic_long_inc(&p->refs);
212 			return p;
213 		}
214 	}
215 	return NULL;
216 }
217 
218 bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
219 {
220 	int n;
221 	for (n = 0; n < chunk->count; n++)
222 		if (chunk->owners[n].owner == tree)
223 			return true;
224 	return false;
225 }
226 
227 /* tagging and untagging inodes with trees */
228 
229 static struct audit_chunk *find_chunk(struct node *p)
230 {
231 	int index = p->index & ~(1U<<31);
232 	p -= index;
233 	return container_of(p, struct audit_chunk, owners[0]);
234 }
235 
236 static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
237 			  struct node *skip)
238 {
239 	struct audit_tree *owner;
240 	int i, j;
241 
242 	new->key = old->key;
243 	list_splice_init(&old->trees, &new->trees);
244 	list_for_each_entry(owner, &new->trees, same_root)
245 		owner->root = new;
246 	for (i = j = 0; j < old->count; i++, j++) {
247 		if (&old->owners[j] == skip) {
248 			i--;
249 			continue;
250 		}
251 		owner = old->owners[j].owner;
252 		new->owners[i].owner = owner;
253 		new->owners[i].index = old->owners[j].index - j + i;
254 		if (!owner) /* result of earlier fallback */
255 			continue;
256 		get_tree(owner);
257 		list_replace_init(&old->owners[j].list, &new->owners[i].list);
258 	}
259 	/*
260 	 * Make sure chunk is fully initialized before making it visible in the
261 	 * hash. Pairs with a data dependency barrier in READ_ONCE() in
262 	 * audit_tree_lookup().
263 	 */
264 	smp_wmb();
265 	list_replace_rcu(&old->hash, &new->hash);
266 }
267 
268 static void untag_chunk(struct node *p)
269 {
270 	struct audit_chunk *chunk = find_chunk(p);
271 	struct fsnotify_mark *entry = &chunk->mark;
272 	struct audit_chunk *new = NULL;
273 	struct audit_tree *owner;
274 	int size = chunk->count - 1;
275 
276 	fsnotify_get_mark(entry);
277 
278 	spin_unlock(&hash_lock);
279 
280 	if (size)
281 		new = alloc_chunk(size);
282 
283 	mutex_lock(&entry->group->mark_mutex);
284 	/*
285 	 * mark_mutex protects mark from getting detached and thus also from
286 	 * mark->connector->obj getting NULL.
287 	 */
288 	if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
289 		mutex_unlock(&entry->group->mark_mutex);
290 		if (new)
291 			fsnotify_put_mark(&new->mark);
292 		goto out;
293 	}
294 
295 	owner = p->owner;
296 
297 	if (!size) {
298 		chunk->dead = 1;
299 		spin_lock(&hash_lock);
300 		list_del_init(&chunk->trees);
301 		if (owner->root == chunk)
302 			owner->root = NULL;
303 		list_del_init(&p->list);
304 		list_del_rcu(&chunk->hash);
305 		spin_unlock(&hash_lock);
306 		fsnotify_detach_mark(entry);
307 		mutex_unlock(&entry->group->mark_mutex);
308 		fsnotify_free_mark(entry);
309 		goto out;
310 	}
311 
312 	if (!new)
313 		goto Fallback;
314 
315 	if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
316 				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
317 		fsnotify_put_mark(&new->mark);
318 		goto Fallback;
319 	}
320 
321 	chunk->dead = 1;
322 	spin_lock(&hash_lock);
323 	if (owner->root == chunk) {
324 		list_del_init(&owner->same_root);
325 		owner->root = NULL;
326 	}
327 	list_del_init(&p->list);
328 	/*
329 	 * This has to go last when updating chunk as once replace_chunk() is
330 	 * called, new RCU readers can see the new chunk.
331 	 */
332 	replace_chunk(new, chunk, p);
333 	spin_unlock(&hash_lock);
334 	fsnotify_detach_mark(entry);
335 	mutex_unlock(&entry->group->mark_mutex);
336 	fsnotify_free_mark(entry);
337 	fsnotify_put_mark(&new->mark);	/* drop initial reference */
338 	goto out;
339 
340 Fallback:
341 	// do the best we can
342 	spin_lock(&hash_lock);
343 	if (owner->root == chunk) {
344 		list_del_init(&owner->same_root);
345 		owner->root = NULL;
346 	}
347 	list_del_init(&p->list);
348 	p->owner = NULL;
349 	put_tree(owner);
350 	spin_unlock(&hash_lock);
351 	mutex_unlock(&entry->group->mark_mutex);
352 out:
353 	fsnotify_put_mark(entry);
354 	spin_lock(&hash_lock);
355 }
356 
357 /* Call with group->mark_mutex held, releases it */
358 static int create_chunk(struct inode *inode, struct audit_tree *tree)
359 {
360 	struct fsnotify_mark *entry;
361 	struct audit_chunk *chunk = alloc_chunk(1);
362 
363 	if (!chunk) {
364 		mutex_unlock(&audit_tree_group->mark_mutex);
365 		return -ENOMEM;
366 	}
367 
368 	entry = &chunk->mark;
369 	if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
370 		mutex_unlock(&audit_tree_group->mark_mutex);
371 		fsnotify_put_mark(entry);
372 		return -ENOSPC;
373 	}
374 
375 	spin_lock(&hash_lock);
376 	if (tree->goner) {
377 		spin_unlock(&hash_lock);
378 		chunk->dead = 1;
379 		fsnotify_detach_mark(entry);
380 		mutex_unlock(&audit_tree_group->mark_mutex);
381 		fsnotify_free_mark(entry);
382 		fsnotify_put_mark(entry);
383 		return 0;
384 	}
385 	chunk->owners[0].index = (1U << 31);
386 	chunk->owners[0].owner = tree;
387 	get_tree(tree);
388 	list_add(&chunk->owners[0].list, &tree->chunks);
389 	if (!tree->root) {
390 		tree->root = chunk;
391 		list_add(&tree->same_root, &chunk->trees);
392 	}
393 	chunk->key = inode_to_key(inode);
394 	/*
395 	 * Inserting into the hash table has to go last as once we do that RCU
396 	 * readers can see the chunk.
397 	 */
398 	insert_hash(chunk);
399 	spin_unlock(&hash_lock);
400 	mutex_unlock(&audit_tree_group->mark_mutex);
401 	fsnotify_put_mark(entry);	/* drop initial reference */
402 	return 0;
403 }
404 
405 /* the first tagged inode becomes root of tree */
406 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
407 {
408 	struct fsnotify_mark *old_entry, *chunk_entry;
409 	struct audit_chunk *chunk, *old;
410 	struct node *p;
411 	int n;
412 
413 	mutex_lock(&audit_tree_group->mark_mutex);
414 	old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
415 				       audit_tree_group);
416 	if (!old_entry)
417 		return create_chunk(inode, tree);
418 
419 	old = container_of(old_entry, struct audit_chunk, mark);
420 
421 	/* are we already there? */
422 	spin_lock(&hash_lock);
423 	for (n = 0; n < old->count; n++) {
424 		if (old->owners[n].owner == tree) {
425 			spin_unlock(&hash_lock);
426 			mutex_unlock(&audit_tree_group->mark_mutex);
427 			fsnotify_put_mark(old_entry);
428 			return 0;
429 		}
430 	}
431 	spin_unlock(&hash_lock);
432 
433 	chunk = alloc_chunk(old->count + 1);
434 	if (!chunk) {
435 		mutex_unlock(&audit_tree_group->mark_mutex);
436 		fsnotify_put_mark(old_entry);
437 		return -ENOMEM;
438 	}
439 
440 	chunk_entry = &chunk->mark;
441 
442 	/*
443 	 * mark_mutex protects mark from getting detached and thus also from
444 	 * mark->connector->obj getting NULL.
445 	 */
446 	if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
447 		/* old_entry is being shot, lets just lie */
448 		mutex_unlock(&audit_tree_group->mark_mutex);
449 		fsnotify_put_mark(old_entry);
450 		fsnotify_put_mark(&chunk->mark);
451 		return -ENOENT;
452 	}
453 
454 	if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
455 				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
456 		mutex_unlock(&audit_tree_group->mark_mutex);
457 		fsnotify_put_mark(chunk_entry);
458 		fsnotify_put_mark(old_entry);
459 		return -ENOSPC;
460 	}
461 
462 	spin_lock(&hash_lock);
463 	if (tree->goner) {
464 		spin_unlock(&hash_lock);
465 		chunk->dead = 1;
466 		fsnotify_detach_mark(chunk_entry);
467 		mutex_unlock(&audit_tree_group->mark_mutex);
468 		fsnotify_free_mark(chunk_entry);
469 		fsnotify_put_mark(chunk_entry);
470 		fsnotify_put_mark(old_entry);
471 		return 0;
472 	}
473 	p = &chunk->owners[chunk->count - 1];
474 	p->index = (chunk->count - 1) | (1U<<31);
475 	p->owner = tree;
476 	get_tree(tree);
477 	list_add(&p->list, &tree->chunks);
478 	old->dead = 1;
479 	if (!tree->root) {
480 		tree->root = chunk;
481 		list_add(&tree->same_root, &chunk->trees);
482 	}
483 	/*
484 	 * This has to go last when updating chunk as once replace_chunk() is
485 	 * called, new RCU readers can see the new chunk.
486 	 */
487 	replace_chunk(chunk, old, NULL);
488 	spin_unlock(&hash_lock);
489 	fsnotify_detach_mark(old_entry);
490 	mutex_unlock(&audit_tree_group->mark_mutex);
491 	fsnotify_free_mark(old_entry);
492 	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
493 	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
494 	return 0;
495 }
496 
497 static void audit_tree_log_remove_rule(struct audit_krule *rule)
498 {
499 	struct audit_buffer *ab;
500 
501 	if (!audit_enabled)
502 		return;
503 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
504 	if (unlikely(!ab))
505 		return;
506 	audit_log_format(ab, "op=remove_rule");
507 	audit_log_format(ab, " dir=");
508 	audit_log_untrustedstring(ab, rule->tree->pathname);
509 	audit_log_key(ab, rule->filterkey);
510 	audit_log_format(ab, " list=%d res=1", rule->listnr);
511 	audit_log_end(ab);
512 }
513 
514 static void kill_rules(struct audit_tree *tree)
515 {
516 	struct audit_krule *rule, *next;
517 	struct audit_entry *entry;
518 
519 	list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
520 		entry = container_of(rule, struct audit_entry, rule);
521 
522 		list_del_init(&rule->rlist);
523 		if (rule->tree) {
524 			/* not a half-baked one */
525 			audit_tree_log_remove_rule(rule);
526 			if (entry->rule.exe)
527 				audit_remove_mark(entry->rule.exe);
528 			rule->tree = NULL;
529 			list_del_rcu(&entry->list);
530 			list_del(&entry->rule.list);
531 			call_rcu(&entry->rcu, audit_free_rule_rcu);
532 		}
533 	}
534 }
535 
536 /*
537  * finish killing struct audit_tree
538  */
539 static void prune_one(struct audit_tree *victim)
540 {
541 	spin_lock(&hash_lock);
542 	while (!list_empty(&victim->chunks)) {
543 		struct node *p;
544 
545 		p = list_entry(victim->chunks.next, struct node, list);
546 
547 		untag_chunk(p);
548 	}
549 	spin_unlock(&hash_lock);
550 	put_tree(victim);
551 }
552 
553 /* trim the uncommitted chunks from tree */
554 
555 static void trim_marked(struct audit_tree *tree)
556 {
557 	struct list_head *p, *q;
558 	spin_lock(&hash_lock);
559 	if (tree->goner) {
560 		spin_unlock(&hash_lock);
561 		return;
562 	}
563 	/* reorder */
564 	for (p = tree->chunks.next; p != &tree->chunks; p = q) {
565 		struct node *node = list_entry(p, struct node, list);
566 		q = p->next;
567 		if (node->index & (1U<<31)) {
568 			list_del_init(p);
569 			list_add(p, &tree->chunks);
570 		}
571 	}
572 
573 	while (!list_empty(&tree->chunks)) {
574 		struct node *node;
575 
576 		node = list_entry(tree->chunks.next, struct node, list);
577 
578 		/* have we run out of marked? */
579 		if (!(node->index & (1U<<31)))
580 			break;
581 
582 		untag_chunk(node);
583 	}
584 	if (!tree->root && !tree->goner) {
585 		tree->goner = 1;
586 		spin_unlock(&hash_lock);
587 		mutex_lock(&audit_filter_mutex);
588 		kill_rules(tree);
589 		list_del_init(&tree->list);
590 		mutex_unlock(&audit_filter_mutex);
591 		prune_one(tree);
592 	} else {
593 		spin_unlock(&hash_lock);
594 	}
595 }
596 
597 static void audit_schedule_prune(void);
598 
599 /* called with audit_filter_mutex */
600 int audit_remove_tree_rule(struct audit_krule *rule)
601 {
602 	struct audit_tree *tree;
603 	tree = rule->tree;
604 	if (tree) {
605 		spin_lock(&hash_lock);
606 		list_del_init(&rule->rlist);
607 		if (list_empty(&tree->rules) && !tree->goner) {
608 			tree->root = NULL;
609 			list_del_init(&tree->same_root);
610 			tree->goner = 1;
611 			list_move(&tree->list, &prune_list);
612 			rule->tree = NULL;
613 			spin_unlock(&hash_lock);
614 			audit_schedule_prune();
615 			return 1;
616 		}
617 		rule->tree = NULL;
618 		spin_unlock(&hash_lock);
619 		return 1;
620 	}
621 	return 0;
622 }
623 
624 static int compare_root(struct vfsmount *mnt, void *arg)
625 {
626 	return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
627 	       (unsigned long)arg;
628 }
629 
630 void audit_trim_trees(void)
631 {
632 	struct list_head cursor;
633 
634 	mutex_lock(&audit_filter_mutex);
635 	list_add(&cursor, &tree_list);
636 	while (cursor.next != &tree_list) {
637 		struct audit_tree *tree;
638 		struct path path;
639 		struct vfsmount *root_mnt;
640 		struct node *node;
641 		int err;
642 
643 		tree = container_of(cursor.next, struct audit_tree, list);
644 		get_tree(tree);
645 		list_del(&cursor);
646 		list_add(&cursor, &tree->list);
647 		mutex_unlock(&audit_filter_mutex);
648 
649 		err = kern_path(tree->pathname, 0, &path);
650 		if (err)
651 			goto skip_it;
652 
653 		root_mnt = collect_mounts(&path);
654 		path_put(&path);
655 		if (IS_ERR(root_mnt))
656 			goto skip_it;
657 
658 		spin_lock(&hash_lock);
659 		list_for_each_entry(node, &tree->chunks, list) {
660 			struct audit_chunk *chunk = find_chunk(node);
661 			/* this could be NULL if the watch is dying else where... */
662 			node->index |= 1U<<31;
663 			if (iterate_mounts(compare_root,
664 					   (void *)(chunk->key),
665 					   root_mnt))
666 				node->index &= ~(1U<<31);
667 		}
668 		spin_unlock(&hash_lock);
669 		trim_marked(tree);
670 		drop_collected_mounts(root_mnt);
671 skip_it:
672 		put_tree(tree);
673 		mutex_lock(&audit_filter_mutex);
674 	}
675 	list_del(&cursor);
676 	mutex_unlock(&audit_filter_mutex);
677 }
678 
679 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
680 {
681 
682 	if (pathname[0] != '/' ||
683 	    rule->listnr != AUDIT_FILTER_EXIT ||
684 	    op != Audit_equal ||
685 	    rule->inode_f || rule->watch || rule->tree)
686 		return -EINVAL;
687 	rule->tree = alloc_tree(pathname);
688 	if (!rule->tree)
689 		return -ENOMEM;
690 	return 0;
691 }
692 
693 void audit_put_tree(struct audit_tree *tree)
694 {
695 	put_tree(tree);
696 }
697 
698 static int tag_mount(struct vfsmount *mnt, void *arg)
699 {
700 	return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
701 }
702 
703 /*
704  * That gets run when evict_chunk() ends up needing to kill audit_tree.
705  * Runs from a separate thread.
706  */
707 static int prune_tree_thread(void *unused)
708 {
709 	for (;;) {
710 		if (list_empty(&prune_list)) {
711 			set_current_state(TASK_INTERRUPTIBLE);
712 			schedule();
713 		}
714 
715 		audit_ctl_lock();
716 		mutex_lock(&audit_filter_mutex);
717 
718 		while (!list_empty(&prune_list)) {
719 			struct audit_tree *victim;
720 
721 			victim = list_entry(prune_list.next,
722 					struct audit_tree, list);
723 			list_del_init(&victim->list);
724 
725 			mutex_unlock(&audit_filter_mutex);
726 
727 			prune_one(victim);
728 
729 			mutex_lock(&audit_filter_mutex);
730 		}
731 
732 		mutex_unlock(&audit_filter_mutex);
733 		audit_ctl_unlock();
734 	}
735 	return 0;
736 }
737 
738 static int audit_launch_prune(void)
739 {
740 	if (prune_thread)
741 		return 0;
742 	prune_thread = kthread_run(prune_tree_thread, NULL,
743 				"audit_prune_tree");
744 	if (IS_ERR(prune_thread)) {
745 		pr_err("cannot start thread audit_prune_tree");
746 		prune_thread = NULL;
747 		return -ENOMEM;
748 	}
749 	return 0;
750 }
751 
752 /* called with audit_filter_mutex */
753 int audit_add_tree_rule(struct audit_krule *rule)
754 {
755 	struct audit_tree *seed = rule->tree, *tree;
756 	struct path path;
757 	struct vfsmount *mnt;
758 	int err;
759 
760 	rule->tree = NULL;
761 	list_for_each_entry(tree, &tree_list, list) {
762 		if (!strcmp(seed->pathname, tree->pathname)) {
763 			put_tree(seed);
764 			rule->tree = tree;
765 			list_add(&rule->rlist, &tree->rules);
766 			return 0;
767 		}
768 	}
769 	tree = seed;
770 	list_add(&tree->list, &tree_list);
771 	list_add(&rule->rlist, &tree->rules);
772 	/* do not set rule->tree yet */
773 	mutex_unlock(&audit_filter_mutex);
774 
775 	if (unlikely(!prune_thread)) {
776 		err = audit_launch_prune();
777 		if (err)
778 			goto Err;
779 	}
780 
781 	err = kern_path(tree->pathname, 0, &path);
782 	if (err)
783 		goto Err;
784 	mnt = collect_mounts(&path);
785 	path_put(&path);
786 	if (IS_ERR(mnt)) {
787 		err = PTR_ERR(mnt);
788 		goto Err;
789 	}
790 
791 	get_tree(tree);
792 	err = iterate_mounts(tag_mount, tree, mnt);
793 	drop_collected_mounts(mnt);
794 
795 	if (!err) {
796 		struct node *node;
797 		spin_lock(&hash_lock);
798 		list_for_each_entry(node, &tree->chunks, list)
799 			node->index &= ~(1U<<31);
800 		spin_unlock(&hash_lock);
801 	} else {
802 		trim_marked(tree);
803 		goto Err;
804 	}
805 
806 	mutex_lock(&audit_filter_mutex);
807 	if (list_empty(&rule->rlist)) {
808 		put_tree(tree);
809 		return -ENOENT;
810 	}
811 	rule->tree = tree;
812 	put_tree(tree);
813 
814 	return 0;
815 Err:
816 	mutex_lock(&audit_filter_mutex);
817 	list_del_init(&tree->list);
818 	list_del_init(&tree->rules);
819 	put_tree(tree);
820 	return err;
821 }
822 
823 int audit_tag_tree(char *old, char *new)
824 {
825 	struct list_head cursor, barrier;
826 	int failed = 0;
827 	struct path path1, path2;
828 	struct vfsmount *tagged;
829 	int err;
830 
831 	err = kern_path(new, 0, &path2);
832 	if (err)
833 		return err;
834 	tagged = collect_mounts(&path2);
835 	path_put(&path2);
836 	if (IS_ERR(tagged))
837 		return PTR_ERR(tagged);
838 
839 	err = kern_path(old, 0, &path1);
840 	if (err) {
841 		drop_collected_mounts(tagged);
842 		return err;
843 	}
844 
845 	mutex_lock(&audit_filter_mutex);
846 	list_add(&barrier, &tree_list);
847 	list_add(&cursor, &barrier);
848 
849 	while (cursor.next != &tree_list) {
850 		struct audit_tree *tree;
851 		int good_one = 0;
852 
853 		tree = container_of(cursor.next, struct audit_tree, list);
854 		get_tree(tree);
855 		list_del(&cursor);
856 		list_add(&cursor, &tree->list);
857 		mutex_unlock(&audit_filter_mutex);
858 
859 		err = kern_path(tree->pathname, 0, &path2);
860 		if (!err) {
861 			good_one = path_is_under(&path1, &path2);
862 			path_put(&path2);
863 		}
864 
865 		if (!good_one) {
866 			put_tree(tree);
867 			mutex_lock(&audit_filter_mutex);
868 			continue;
869 		}
870 
871 		failed = iterate_mounts(tag_mount, tree, tagged);
872 		if (failed) {
873 			put_tree(tree);
874 			mutex_lock(&audit_filter_mutex);
875 			break;
876 		}
877 
878 		mutex_lock(&audit_filter_mutex);
879 		spin_lock(&hash_lock);
880 		if (!tree->goner) {
881 			list_del(&tree->list);
882 			list_add(&tree->list, &tree_list);
883 		}
884 		spin_unlock(&hash_lock);
885 		put_tree(tree);
886 	}
887 
888 	while (barrier.prev != &tree_list) {
889 		struct audit_tree *tree;
890 
891 		tree = container_of(barrier.prev, struct audit_tree, list);
892 		get_tree(tree);
893 		list_del(&tree->list);
894 		list_add(&tree->list, &barrier);
895 		mutex_unlock(&audit_filter_mutex);
896 
897 		if (!failed) {
898 			struct node *node;
899 			spin_lock(&hash_lock);
900 			list_for_each_entry(node, &tree->chunks, list)
901 				node->index &= ~(1U<<31);
902 			spin_unlock(&hash_lock);
903 		} else {
904 			trim_marked(tree);
905 		}
906 
907 		put_tree(tree);
908 		mutex_lock(&audit_filter_mutex);
909 	}
910 	list_del(&barrier);
911 	list_del(&cursor);
912 	mutex_unlock(&audit_filter_mutex);
913 	path_put(&path1);
914 	drop_collected_mounts(tagged);
915 	return failed;
916 }
917 
918 
919 static void audit_schedule_prune(void)
920 {
921 	wake_up_process(prune_thread);
922 }
923 
924 /*
925  * ... and that one is done if evict_chunk() decides to delay until the end
926  * of syscall.  Runs synchronously.
927  */
928 void audit_kill_trees(struct list_head *list)
929 {
930 	audit_ctl_lock();
931 	mutex_lock(&audit_filter_mutex);
932 
933 	while (!list_empty(list)) {
934 		struct audit_tree *victim;
935 
936 		victim = list_entry(list->next, struct audit_tree, list);
937 		kill_rules(victim);
938 		list_del_init(&victim->list);
939 
940 		mutex_unlock(&audit_filter_mutex);
941 
942 		prune_one(victim);
943 
944 		mutex_lock(&audit_filter_mutex);
945 	}
946 
947 	mutex_unlock(&audit_filter_mutex);
948 	audit_ctl_unlock();
949 }
950 
951 /*
952  *  Here comes the stuff asynchronous to auditctl operations
953  */
954 
955 static void evict_chunk(struct audit_chunk *chunk)
956 {
957 	struct audit_tree *owner;
958 	struct list_head *postponed = audit_killed_trees();
959 	int need_prune = 0;
960 	int n;
961 
962 	if (chunk->dead)
963 		return;
964 
965 	chunk->dead = 1;
966 	mutex_lock(&audit_filter_mutex);
967 	spin_lock(&hash_lock);
968 	while (!list_empty(&chunk->trees)) {
969 		owner = list_entry(chunk->trees.next,
970 				   struct audit_tree, same_root);
971 		owner->goner = 1;
972 		owner->root = NULL;
973 		list_del_init(&owner->same_root);
974 		spin_unlock(&hash_lock);
975 		if (!postponed) {
976 			kill_rules(owner);
977 			list_move(&owner->list, &prune_list);
978 			need_prune = 1;
979 		} else {
980 			list_move(&owner->list, postponed);
981 		}
982 		spin_lock(&hash_lock);
983 	}
984 	list_del_rcu(&chunk->hash);
985 	for (n = 0; n < chunk->count; n++)
986 		list_del_init(&chunk->owners[n].list);
987 	spin_unlock(&hash_lock);
988 	mutex_unlock(&audit_filter_mutex);
989 	if (need_prune)
990 		audit_schedule_prune();
991 }
992 
993 static int audit_tree_handle_event(struct fsnotify_group *group,
994 				   struct inode *to_tell,
995 				   u32 mask, const void *data, int data_type,
996 				   const unsigned char *file_name, u32 cookie,
997 				   struct fsnotify_iter_info *iter_info)
998 {
999 	return 0;
1000 }
1001 
1002 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
1003 {
1004 	struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
1005 
1006 	evict_chunk(chunk);
1007 
1008 	/*
1009 	 * We are guaranteed to have at least one reference to the mark from
1010 	 * either the inode or the caller of fsnotify_destroy_mark().
1011 	 */
1012 	BUG_ON(refcount_read(&entry->refcnt) < 1);
1013 }
1014 
1015 static const struct fsnotify_ops audit_tree_ops = {
1016 	.handle_event = audit_tree_handle_event,
1017 	.freeing_mark = audit_tree_freeing_mark,
1018 	.free_mark = audit_tree_destroy_watch,
1019 };
1020 
1021 static int __init audit_tree_init(void)
1022 {
1023 	int i;
1024 
1025 	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
1026 	if (IS_ERR(audit_tree_group))
1027 		audit_panic("cannot initialize fsnotify group for rectree watches");
1028 
1029 	for (i = 0; i < HASH_SIZE; i++)
1030 		INIT_LIST_HEAD(&chunk_hash_heads[i]);
1031 
1032 	return 0;
1033 }
1034 __initcall(audit_tree_init);
1035