xref: /linux-6.15/kernel/audit_tree.c (revision c22fcde7)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "audit.h"
3 #include <linux/fsnotify_backend.h>
4 #include <linux/namei.h>
5 #include <linux/mount.h>
6 #include <linux/kthread.h>
7 #include <linux/refcount.h>
8 #include <linux/slab.h>
9 
10 struct audit_tree;
11 struct audit_chunk;
12 
13 struct audit_tree {
14 	refcount_t count;
15 	int goner;
16 	struct audit_chunk *root;
17 	struct list_head chunks;
18 	struct list_head rules;
19 	struct list_head list;
20 	struct list_head same_root;
21 	struct rcu_head head;
22 	char pathname[];
23 };
24 
25 struct audit_chunk {
26 	struct list_head hash;
27 	unsigned long key;
28 	struct fsnotify_mark *mark;
29 	struct list_head trees;		/* with root here */
30 	int dead;
31 	int count;
32 	atomic_long_t refs;
33 	struct rcu_head head;
34 	struct node {
35 		struct list_head list;
36 		struct audit_tree *owner;
37 		unsigned index;		/* index; upper bit indicates 'will prune' */
38 	} owners[];
39 };
40 
41 struct audit_tree_mark {
42 	struct fsnotify_mark mark;
43 	struct audit_chunk *chunk;
44 };
45 
46 static LIST_HEAD(tree_list);
47 static LIST_HEAD(prune_list);
48 static struct task_struct *prune_thread;
49 
50 /*
51  * One struct chunk is attached to each inode of interest.
52  * We replace struct chunk on tagging/untagging.
53  * Rules have pointer to struct audit_tree.
54  * Rules have struct list_head rlist forming a list of rules over
55  * the same tree.
56  * References to struct chunk are collected at audit_inode{,_child}()
57  * time and used in AUDIT_TREE rule matching.
58  * These references are dropped at the same time we are calling
59  * audit_free_names(), etc.
60  *
61  * Cyclic lists galore:
62  * tree.chunks anchors chunk.owners[].list			hash_lock
63  * tree.rules anchors rule.rlist				audit_filter_mutex
64  * chunk.trees anchors tree.same_root				hash_lock
65  * chunk.hash is a hash with middle bits of watch.inode as
66  * a hash function.						RCU, hash_lock
67  *
68  * tree is refcounted; one reference for "some rules on rules_list refer to
69  * it", one for each chunk with pointer to it.
70  *
71  * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
72  * of watch contributes 1 to .refs).
73  *
74  * node.index allows to get from node.list to containing chunk.
75  * MSB of that sucker is stolen to mark taggings that we might have to
76  * revert - several operations have very unpleasant cleanup logics and
77  * that makes a difference.  Some.
78  */
79 
80 static struct fsnotify_group *audit_tree_group;
81 static struct kmem_cache *audit_tree_mark_cachep __read_mostly;
82 
83 static struct audit_tree *alloc_tree(const char *s)
84 {
85 	struct audit_tree *tree;
86 
87 	tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
88 	if (tree) {
89 		refcount_set(&tree->count, 1);
90 		tree->goner = 0;
91 		INIT_LIST_HEAD(&tree->chunks);
92 		INIT_LIST_HEAD(&tree->rules);
93 		INIT_LIST_HEAD(&tree->list);
94 		INIT_LIST_HEAD(&tree->same_root);
95 		tree->root = NULL;
96 		strcpy(tree->pathname, s);
97 	}
98 	return tree;
99 }
100 
101 static inline void get_tree(struct audit_tree *tree)
102 {
103 	refcount_inc(&tree->count);
104 }
105 
106 static inline void put_tree(struct audit_tree *tree)
107 {
108 	if (refcount_dec_and_test(&tree->count))
109 		kfree_rcu(tree, head);
110 }
111 
112 /* to avoid bringing the entire thing in audit.h */
113 const char *audit_tree_path(struct audit_tree *tree)
114 {
115 	return tree->pathname;
116 }
117 
118 static void free_chunk(struct audit_chunk *chunk)
119 {
120 	int i;
121 
122 	for (i = 0; i < chunk->count; i++) {
123 		if (chunk->owners[i].owner)
124 			put_tree(chunk->owners[i].owner);
125 	}
126 	kfree(chunk);
127 }
128 
129 void audit_put_chunk(struct audit_chunk *chunk)
130 {
131 	if (atomic_long_dec_and_test(&chunk->refs))
132 		free_chunk(chunk);
133 }
134 
135 static void __put_chunk(struct rcu_head *rcu)
136 {
137 	struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
138 	audit_put_chunk(chunk);
139 }
140 
141 /*
142  * Drop reference to the chunk that was held by the mark. This is the reference
143  * that gets dropped after we've removed the chunk from the hash table and we
144  * use it to make sure chunk cannot be freed before RCU grace period expires.
145  */
146 static void audit_mark_put_chunk(struct audit_chunk *chunk)
147 {
148 	call_rcu(&chunk->head, __put_chunk);
149 }
150 
151 static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
152 {
153 	return container_of(entry, struct audit_tree_mark, mark);
154 }
155 
156 static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
157 {
158 	return audit_mark(mark)->chunk;
159 }
160 
161 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
162 {
163 	struct audit_chunk *chunk = mark_chunk(entry);
164 	audit_mark_put_chunk(chunk);
165 	kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
166 }
167 
168 static struct fsnotify_mark *alloc_mark(void)
169 {
170 	struct audit_tree_mark *amark;
171 
172 	amark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
173 	if (!amark)
174 		return NULL;
175 	fsnotify_init_mark(&amark->mark, audit_tree_group);
176 	amark->mark.mask = FS_IN_IGNORED;
177 	return &amark->mark;
178 }
179 
180 static struct audit_chunk *alloc_chunk(int count)
181 {
182 	struct audit_chunk *chunk;
183 	size_t size;
184 	int i;
185 
186 	size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
187 	chunk = kzalloc(size, GFP_KERNEL);
188 	if (!chunk)
189 		return NULL;
190 
191 	chunk->mark = alloc_mark();
192 	if (!chunk->mark) {
193 		kfree(chunk);
194 		return NULL;
195 	}
196 	audit_mark(chunk->mark)->chunk = chunk;
197 
198 	INIT_LIST_HEAD(&chunk->hash);
199 	INIT_LIST_HEAD(&chunk->trees);
200 	chunk->count = count;
201 	atomic_long_set(&chunk->refs, 1);
202 	for (i = 0; i < count; i++) {
203 		INIT_LIST_HEAD(&chunk->owners[i].list);
204 		chunk->owners[i].index = i;
205 	}
206 	return chunk;
207 }
208 
209 enum {HASH_SIZE = 128};
210 static struct list_head chunk_hash_heads[HASH_SIZE];
211 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
212 
213 /* Function to return search key in our hash from inode. */
214 static unsigned long inode_to_key(const struct inode *inode)
215 {
216 	/* Use address pointed to by connector->obj as the key */
217 	return (unsigned long)&inode->i_fsnotify_marks;
218 }
219 
220 static inline struct list_head *chunk_hash(unsigned long key)
221 {
222 	unsigned long n = key / L1_CACHE_BYTES;
223 	return chunk_hash_heads + n % HASH_SIZE;
224 }
225 
226 /* hash_lock & entry->group->mark_mutex is held by caller */
227 static void insert_hash(struct audit_chunk *chunk)
228 {
229 	struct list_head *list;
230 
231 	/*
232 	 * Make sure chunk is fully initialized before making it visible in the
233 	 * hash. Pairs with a data dependency barrier in READ_ONCE() in
234 	 * audit_tree_lookup().
235 	 */
236 	smp_wmb();
237 	WARN_ON_ONCE(!chunk->key);
238 	list = chunk_hash(chunk->key);
239 	list_add_rcu(&chunk->hash, list);
240 }
241 
242 /* called under rcu_read_lock */
243 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
244 {
245 	unsigned long key = inode_to_key(inode);
246 	struct list_head *list = chunk_hash(key);
247 	struct audit_chunk *p;
248 
249 	list_for_each_entry_rcu(p, list, hash) {
250 		/*
251 		 * We use a data dependency barrier in READ_ONCE() to make sure
252 		 * the chunk we see is fully initialized.
253 		 */
254 		if (READ_ONCE(p->key) == key) {
255 			atomic_long_inc(&p->refs);
256 			return p;
257 		}
258 	}
259 	return NULL;
260 }
261 
262 bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
263 {
264 	int n;
265 	for (n = 0; n < chunk->count; n++)
266 		if (chunk->owners[n].owner == tree)
267 			return true;
268 	return false;
269 }
270 
271 /* tagging and untagging inodes with trees */
272 
273 static struct audit_chunk *find_chunk(struct node *p)
274 {
275 	int index = p->index & ~(1U<<31);
276 	p -= index;
277 	return container_of(p, struct audit_chunk, owners[0]);
278 }
279 
280 static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
281 {
282 	struct audit_tree *owner;
283 	int i, j;
284 
285 	new->key = old->key;
286 	list_splice_init(&old->trees, &new->trees);
287 	list_for_each_entry(owner, &new->trees, same_root)
288 		owner->root = new;
289 	for (i = j = 0; j < old->count; i++, j++) {
290 		if (!old->owners[j].owner) {
291 			i--;
292 			continue;
293 		}
294 		owner = old->owners[j].owner;
295 		new->owners[i].owner = owner;
296 		new->owners[i].index = old->owners[j].index - j + i;
297 		if (!owner) /* result of earlier fallback */
298 			continue;
299 		get_tree(owner);
300 		list_replace_init(&old->owners[j].list, &new->owners[i].list);
301 	}
302 	/*
303 	 * Make sure chunk is fully initialized before making it visible in the
304 	 * hash. Pairs with a data dependency barrier in READ_ONCE() in
305 	 * audit_tree_lookup().
306 	 */
307 	smp_wmb();
308 	list_replace_rcu(&old->hash, &new->hash);
309 }
310 
311 static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
312 {
313 	struct audit_tree *owner = p->owner;
314 
315 	if (owner->root == chunk) {
316 		list_del_init(&owner->same_root);
317 		owner->root = NULL;
318 	}
319 	list_del_init(&p->list);
320 	p->owner = NULL;
321 	put_tree(owner);
322 }
323 
324 static int chunk_count_trees(struct audit_chunk *chunk)
325 {
326 	int i;
327 	int ret = 0;
328 
329 	for (i = 0; i < chunk->count; i++)
330 		if (chunk->owners[i].owner)
331 			ret++;
332 	return ret;
333 }
334 
335 static void untag_chunk(struct node *p)
336 {
337 	struct audit_chunk *chunk = find_chunk(p);
338 	struct fsnotify_mark *entry = chunk->mark;
339 	struct audit_chunk *new = NULL;
340 	int size;
341 
342 	remove_chunk_node(chunk, p);
343 	fsnotify_get_mark(entry);
344 	spin_unlock(&hash_lock);
345 
346 	mutex_lock(&entry->group->mark_mutex);
347 	/*
348 	 * mark_mutex protects mark from getting detached and thus also from
349 	 * mark->connector->obj getting NULL.
350 	 */
351 	if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
352 		mutex_unlock(&entry->group->mark_mutex);
353 		if (new)
354 			fsnotify_put_mark(new->mark);
355 		goto out;
356 	}
357 
358 	size = chunk_count_trees(chunk);
359 	if (!size) {
360 		chunk->dead = 1;
361 		spin_lock(&hash_lock);
362 		list_del_init(&chunk->trees);
363 		list_del_rcu(&chunk->hash);
364 		spin_unlock(&hash_lock);
365 		fsnotify_detach_mark(entry);
366 		mutex_unlock(&entry->group->mark_mutex);
367 		fsnotify_free_mark(entry);
368 		goto out;
369 	}
370 
371 	new = alloc_chunk(size);
372 	if (!new)
373 		goto out_mutex;
374 
375 	if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
376 				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
377 		fsnotify_put_mark(new->mark);
378 		goto out_mutex;
379 	}
380 
381 	chunk->dead = 1;
382 	spin_lock(&hash_lock);
383 	/*
384 	 * This has to go last when updating chunk as once replace_chunk() is
385 	 * called, new RCU readers can see the new chunk.
386 	 */
387 	replace_chunk(new, chunk);
388 	spin_unlock(&hash_lock);
389 	fsnotify_detach_mark(entry);
390 	mutex_unlock(&entry->group->mark_mutex);
391 	fsnotify_free_mark(entry);
392 	fsnotify_put_mark(new->mark);	/* drop initial reference */
393 	goto out;
394 
395 out_mutex:
396 	mutex_unlock(&entry->group->mark_mutex);
397 out:
398 	fsnotify_put_mark(entry);
399 	spin_lock(&hash_lock);
400 }
401 
402 /* Call with group->mark_mutex held, releases it */
403 static int create_chunk(struct inode *inode, struct audit_tree *tree)
404 {
405 	struct fsnotify_mark *entry;
406 	struct audit_chunk *chunk = alloc_chunk(1);
407 
408 	if (!chunk) {
409 		mutex_unlock(&audit_tree_group->mark_mutex);
410 		return -ENOMEM;
411 	}
412 
413 	entry = chunk->mark;
414 	if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
415 		mutex_unlock(&audit_tree_group->mark_mutex);
416 		fsnotify_put_mark(entry);
417 		return -ENOSPC;
418 	}
419 
420 	spin_lock(&hash_lock);
421 	if (tree->goner) {
422 		spin_unlock(&hash_lock);
423 		chunk->dead = 1;
424 		fsnotify_detach_mark(entry);
425 		mutex_unlock(&audit_tree_group->mark_mutex);
426 		fsnotify_free_mark(entry);
427 		fsnotify_put_mark(entry);
428 		return 0;
429 	}
430 	chunk->owners[0].index = (1U << 31);
431 	chunk->owners[0].owner = tree;
432 	get_tree(tree);
433 	list_add(&chunk->owners[0].list, &tree->chunks);
434 	if (!tree->root) {
435 		tree->root = chunk;
436 		list_add(&tree->same_root, &chunk->trees);
437 	}
438 	chunk->key = inode_to_key(inode);
439 	/*
440 	 * Inserting into the hash table has to go last as once we do that RCU
441 	 * readers can see the chunk.
442 	 */
443 	insert_hash(chunk);
444 	spin_unlock(&hash_lock);
445 	mutex_unlock(&audit_tree_group->mark_mutex);
446 	fsnotify_put_mark(entry);	/* drop initial reference */
447 	return 0;
448 }
449 
450 /* the first tagged inode becomes root of tree */
451 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
452 {
453 	struct fsnotify_mark *old_entry, *chunk_entry;
454 	struct audit_chunk *chunk, *old;
455 	struct node *p;
456 	int n;
457 
458 	mutex_lock(&audit_tree_group->mark_mutex);
459 	old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
460 				       audit_tree_group);
461 	if (!old_entry)
462 		return create_chunk(inode, tree);
463 
464 	old = mark_chunk(old_entry);
465 
466 	/* are we already there? */
467 	spin_lock(&hash_lock);
468 	for (n = 0; n < old->count; n++) {
469 		if (old->owners[n].owner == tree) {
470 			spin_unlock(&hash_lock);
471 			mutex_unlock(&audit_tree_group->mark_mutex);
472 			fsnotify_put_mark(old_entry);
473 			return 0;
474 		}
475 	}
476 	spin_unlock(&hash_lock);
477 
478 	chunk = alloc_chunk(old->count + 1);
479 	if (!chunk) {
480 		mutex_unlock(&audit_tree_group->mark_mutex);
481 		fsnotify_put_mark(old_entry);
482 		return -ENOMEM;
483 	}
484 
485 	chunk_entry = chunk->mark;
486 
487 	/*
488 	 * mark_mutex protects mark from getting detached and thus also from
489 	 * mark->connector->obj getting NULL.
490 	 */
491 	if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
492 		/* old_entry is being shot, lets just lie */
493 		mutex_unlock(&audit_tree_group->mark_mutex);
494 		fsnotify_put_mark(old_entry);
495 		fsnotify_put_mark(chunk->mark);
496 		return -ENOENT;
497 	}
498 
499 	if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
500 				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
501 		mutex_unlock(&audit_tree_group->mark_mutex);
502 		fsnotify_put_mark(chunk_entry);
503 		fsnotify_put_mark(old_entry);
504 		return -ENOSPC;
505 	}
506 
507 	spin_lock(&hash_lock);
508 	if (tree->goner) {
509 		spin_unlock(&hash_lock);
510 		chunk->dead = 1;
511 		fsnotify_detach_mark(chunk_entry);
512 		mutex_unlock(&audit_tree_group->mark_mutex);
513 		fsnotify_free_mark(chunk_entry);
514 		fsnotify_put_mark(chunk_entry);
515 		fsnotify_put_mark(old_entry);
516 		return 0;
517 	}
518 	p = &chunk->owners[chunk->count - 1];
519 	p->index = (chunk->count - 1) | (1U<<31);
520 	p->owner = tree;
521 	get_tree(tree);
522 	list_add(&p->list, &tree->chunks);
523 	old->dead = 1;
524 	if (!tree->root) {
525 		tree->root = chunk;
526 		list_add(&tree->same_root, &chunk->trees);
527 	}
528 	/*
529 	 * This has to go last when updating chunk as once replace_chunk() is
530 	 * called, new RCU readers can see the new chunk.
531 	 */
532 	replace_chunk(chunk, old);
533 	spin_unlock(&hash_lock);
534 	fsnotify_detach_mark(old_entry);
535 	mutex_unlock(&audit_tree_group->mark_mutex);
536 	fsnotify_free_mark(old_entry);
537 	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
538 	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
539 	return 0;
540 }
541 
542 static void audit_tree_log_remove_rule(struct audit_krule *rule)
543 {
544 	struct audit_buffer *ab;
545 
546 	if (!audit_enabled)
547 		return;
548 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
549 	if (unlikely(!ab))
550 		return;
551 	audit_log_format(ab, "op=remove_rule");
552 	audit_log_format(ab, " dir=");
553 	audit_log_untrustedstring(ab, rule->tree->pathname);
554 	audit_log_key(ab, rule->filterkey);
555 	audit_log_format(ab, " list=%d res=1", rule->listnr);
556 	audit_log_end(ab);
557 }
558 
559 static void kill_rules(struct audit_tree *tree)
560 {
561 	struct audit_krule *rule, *next;
562 	struct audit_entry *entry;
563 
564 	list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
565 		entry = container_of(rule, struct audit_entry, rule);
566 
567 		list_del_init(&rule->rlist);
568 		if (rule->tree) {
569 			/* not a half-baked one */
570 			audit_tree_log_remove_rule(rule);
571 			if (entry->rule.exe)
572 				audit_remove_mark(entry->rule.exe);
573 			rule->tree = NULL;
574 			list_del_rcu(&entry->list);
575 			list_del(&entry->rule.list);
576 			call_rcu(&entry->rcu, audit_free_rule_rcu);
577 		}
578 	}
579 }
580 
581 /*
582  * finish killing struct audit_tree
583  */
584 static void prune_one(struct audit_tree *victim)
585 {
586 	spin_lock(&hash_lock);
587 	while (!list_empty(&victim->chunks)) {
588 		struct node *p;
589 
590 		p = list_entry(victim->chunks.next, struct node, list);
591 
592 		untag_chunk(p);
593 	}
594 	spin_unlock(&hash_lock);
595 	put_tree(victim);
596 }
597 
598 /* trim the uncommitted chunks from tree */
599 
600 static void trim_marked(struct audit_tree *tree)
601 {
602 	struct list_head *p, *q;
603 	spin_lock(&hash_lock);
604 	if (tree->goner) {
605 		spin_unlock(&hash_lock);
606 		return;
607 	}
608 	/* reorder */
609 	for (p = tree->chunks.next; p != &tree->chunks; p = q) {
610 		struct node *node = list_entry(p, struct node, list);
611 		q = p->next;
612 		if (node->index & (1U<<31)) {
613 			list_del_init(p);
614 			list_add(p, &tree->chunks);
615 		}
616 	}
617 
618 	while (!list_empty(&tree->chunks)) {
619 		struct node *node;
620 
621 		node = list_entry(tree->chunks.next, struct node, list);
622 
623 		/* have we run out of marked? */
624 		if (!(node->index & (1U<<31)))
625 			break;
626 
627 		untag_chunk(node);
628 	}
629 	if (!tree->root && !tree->goner) {
630 		tree->goner = 1;
631 		spin_unlock(&hash_lock);
632 		mutex_lock(&audit_filter_mutex);
633 		kill_rules(tree);
634 		list_del_init(&tree->list);
635 		mutex_unlock(&audit_filter_mutex);
636 		prune_one(tree);
637 	} else {
638 		spin_unlock(&hash_lock);
639 	}
640 }
641 
642 static void audit_schedule_prune(void);
643 
644 /* called with audit_filter_mutex */
645 int audit_remove_tree_rule(struct audit_krule *rule)
646 {
647 	struct audit_tree *tree;
648 	tree = rule->tree;
649 	if (tree) {
650 		spin_lock(&hash_lock);
651 		list_del_init(&rule->rlist);
652 		if (list_empty(&tree->rules) && !tree->goner) {
653 			tree->root = NULL;
654 			list_del_init(&tree->same_root);
655 			tree->goner = 1;
656 			list_move(&tree->list, &prune_list);
657 			rule->tree = NULL;
658 			spin_unlock(&hash_lock);
659 			audit_schedule_prune();
660 			return 1;
661 		}
662 		rule->tree = NULL;
663 		spin_unlock(&hash_lock);
664 		return 1;
665 	}
666 	return 0;
667 }
668 
669 static int compare_root(struct vfsmount *mnt, void *arg)
670 {
671 	return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
672 	       (unsigned long)arg;
673 }
674 
675 void audit_trim_trees(void)
676 {
677 	struct list_head cursor;
678 
679 	mutex_lock(&audit_filter_mutex);
680 	list_add(&cursor, &tree_list);
681 	while (cursor.next != &tree_list) {
682 		struct audit_tree *tree;
683 		struct path path;
684 		struct vfsmount *root_mnt;
685 		struct node *node;
686 		int err;
687 
688 		tree = container_of(cursor.next, struct audit_tree, list);
689 		get_tree(tree);
690 		list_del(&cursor);
691 		list_add(&cursor, &tree->list);
692 		mutex_unlock(&audit_filter_mutex);
693 
694 		err = kern_path(tree->pathname, 0, &path);
695 		if (err)
696 			goto skip_it;
697 
698 		root_mnt = collect_mounts(&path);
699 		path_put(&path);
700 		if (IS_ERR(root_mnt))
701 			goto skip_it;
702 
703 		spin_lock(&hash_lock);
704 		list_for_each_entry(node, &tree->chunks, list) {
705 			struct audit_chunk *chunk = find_chunk(node);
706 			/* this could be NULL if the watch is dying else where... */
707 			node->index |= 1U<<31;
708 			if (iterate_mounts(compare_root,
709 					   (void *)(chunk->key),
710 					   root_mnt))
711 				node->index &= ~(1U<<31);
712 		}
713 		spin_unlock(&hash_lock);
714 		trim_marked(tree);
715 		drop_collected_mounts(root_mnt);
716 skip_it:
717 		put_tree(tree);
718 		mutex_lock(&audit_filter_mutex);
719 	}
720 	list_del(&cursor);
721 	mutex_unlock(&audit_filter_mutex);
722 }
723 
724 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
725 {
726 
727 	if (pathname[0] != '/' ||
728 	    rule->listnr != AUDIT_FILTER_EXIT ||
729 	    op != Audit_equal ||
730 	    rule->inode_f || rule->watch || rule->tree)
731 		return -EINVAL;
732 	rule->tree = alloc_tree(pathname);
733 	if (!rule->tree)
734 		return -ENOMEM;
735 	return 0;
736 }
737 
738 void audit_put_tree(struct audit_tree *tree)
739 {
740 	put_tree(tree);
741 }
742 
743 static int tag_mount(struct vfsmount *mnt, void *arg)
744 {
745 	return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
746 }
747 
748 /*
749  * That gets run when evict_chunk() ends up needing to kill audit_tree.
750  * Runs from a separate thread.
751  */
752 static int prune_tree_thread(void *unused)
753 {
754 	for (;;) {
755 		if (list_empty(&prune_list)) {
756 			set_current_state(TASK_INTERRUPTIBLE);
757 			schedule();
758 		}
759 
760 		audit_ctl_lock();
761 		mutex_lock(&audit_filter_mutex);
762 
763 		while (!list_empty(&prune_list)) {
764 			struct audit_tree *victim;
765 
766 			victim = list_entry(prune_list.next,
767 					struct audit_tree, list);
768 			list_del_init(&victim->list);
769 
770 			mutex_unlock(&audit_filter_mutex);
771 
772 			prune_one(victim);
773 
774 			mutex_lock(&audit_filter_mutex);
775 		}
776 
777 		mutex_unlock(&audit_filter_mutex);
778 		audit_ctl_unlock();
779 	}
780 	return 0;
781 }
782 
783 static int audit_launch_prune(void)
784 {
785 	if (prune_thread)
786 		return 0;
787 	prune_thread = kthread_run(prune_tree_thread, NULL,
788 				"audit_prune_tree");
789 	if (IS_ERR(prune_thread)) {
790 		pr_err("cannot start thread audit_prune_tree");
791 		prune_thread = NULL;
792 		return -ENOMEM;
793 	}
794 	return 0;
795 }
796 
797 /* called with audit_filter_mutex */
798 int audit_add_tree_rule(struct audit_krule *rule)
799 {
800 	struct audit_tree *seed = rule->tree, *tree;
801 	struct path path;
802 	struct vfsmount *mnt;
803 	int err;
804 
805 	rule->tree = NULL;
806 	list_for_each_entry(tree, &tree_list, list) {
807 		if (!strcmp(seed->pathname, tree->pathname)) {
808 			put_tree(seed);
809 			rule->tree = tree;
810 			list_add(&rule->rlist, &tree->rules);
811 			return 0;
812 		}
813 	}
814 	tree = seed;
815 	list_add(&tree->list, &tree_list);
816 	list_add(&rule->rlist, &tree->rules);
817 	/* do not set rule->tree yet */
818 	mutex_unlock(&audit_filter_mutex);
819 
820 	if (unlikely(!prune_thread)) {
821 		err = audit_launch_prune();
822 		if (err)
823 			goto Err;
824 	}
825 
826 	err = kern_path(tree->pathname, 0, &path);
827 	if (err)
828 		goto Err;
829 	mnt = collect_mounts(&path);
830 	path_put(&path);
831 	if (IS_ERR(mnt)) {
832 		err = PTR_ERR(mnt);
833 		goto Err;
834 	}
835 
836 	get_tree(tree);
837 	err = iterate_mounts(tag_mount, tree, mnt);
838 	drop_collected_mounts(mnt);
839 
840 	if (!err) {
841 		struct node *node;
842 		spin_lock(&hash_lock);
843 		list_for_each_entry(node, &tree->chunks, list)
844 			node->index &= ~(1U<<31);
845 		spin_unlock(&hash_lock);
846 	} else {
847 		trim_marked(tree);
848 		goto Err;
849 	}
850 
851 	mutex_lock(&audit_filter_mutex);
852 	if (list_empty(&rule->rlist)) {
853 		put_tree(tree);
854 		return -ENOENT;
855 	}
856 	rule->tree = tree;
857 	put_tree(tree);
858 
859 	return 0;
860 Err:
861 	mutex_lock(&audit_filter_mutex);
862 	list_del_init(&tree->list);
863 	list_del_init(&tree->rules);
864 	put_tree(tree);
865 	return err;
866 }
867 
868 int audit_tag_tree(char *old, char *new)
869 {
870 	struct list_head cursor, barrier;
871 	int failed = 0;
872 	struct path path1, path2;
873 	struct vfsmount *tagged;
874 	int err;
875 
876 	err = kern_path(new, 0, &path2);
877 	if (err)
878 		return err;
879 	tagged = collect_mounts(&path2);
880 	path_put(&path2);
881 	if (IS_ERR(tagged))
882 		return PTR_ERR(tagged);
883 
884 	err = kern_path(old, 0, &path1);
885 	if (err) {
886 		drop_collected_mounts(tagged);
887 		return err;
888 	}
889 
890 	mutex_lock(&audit_filter_mutex);
891 	list_add(&barrier, &tree_list);
892 	list_add(&cursor, &barrier);
893 
894 	while (cursor.next != &tree_list) {
895 		struct audit_tree *tree;
896 		int good_one = 0;
897 
898 		tree = container_of(cursor.next, struct audit_tree, list);
899 		get_tree(tree);
900 		list_del(&cursor);
901 		list_add(&cursor, &tree->list);
902 		mutex_unlock(&audit_filter_mutex);
903 
904 		err = kern_path(tree->pathname, 0, &path2);
905 		if (!err) {
906 			good_one = path_is_under(&path1, &path2);
907 			path_put(&path2);
908 		}
909 
910 		if (!good_one) {
911 			put_tree(tree);
912 			mutex_lock(&audit_filter_mutex);
913 			continue;
914 		}
915 
916 		failed = iterate_mounts(tag_mount, tree, tagged);
917 		if (failed) {
918 			put_tree(tree);
919 			mutex_lock(&audit_filter_mutex);
920 			break;
921 		}
922 
923 		mutex_lock(&audit_filter_mutex);
924 		spin_lock(&hash_lock);
925 		if (!tree->goner) {
926 			list_del(&tree->list);
927 			list_add(&tree->list, &tree_list);
928 		}
929 		spin_unlock(&hash_lock);
930 		put_tree(tree);
931 	}
932 
933 	while (barrier.prev != &tree_list) {
934 		struct audit_tree *tree;
935 
936 		tree = container_of(barrier.prev, struct audit_tree, list);
937 		get_tree(tree);
938 		list_del(&tree->list);
939 		list_add(&tree->list, &barrier);
940 		mutex_unlock(&audit_filter_mutex);
941 
942 		if (!failed) {
943 			struct node *node;
944 			spin_lock(&hash_lock);
945 			list_for_each_entry(node, &tree->chunks, list)
946 				node->index &= ~(1U<<31);
947 			spin_unlock(&hash_lock);
948 		} else {
949 			trim_marked(tree);
950 		}
951 
952 		put_tree(tree);
953 		mutex_lock(&audit_filter_mutex);
954 	}
955 	list_del(&barrier);
956 	list_del(&cursor);
957 	mutex_unlock(&audit_filter_mutex);
958 	path_put(&path1);
959 	drop_collected_mounts(tagged);
960 	return failed;
961 }
962 
963 
964 static void audit_schedule_prune(void)
965 {
966 	wake_up_process(prune_thread);
967 }
968 
969 /*
970  * ... and that one is done if evict_chunk() decides to delay until the end
971  * of syscall.  Runs synchronously.
972  */
973 void audit_kill_trees(struct list_head *list)
974 {
975 	audit_ctl_lock();
976 	mutex_lock(&audit_filter_mutex);
977 
978 	while (!list_empty(list)) {
979 		struct audit_tree *victim;
980 
981 		victim = list_entry(list->next, struct audit_tree, list);
982 		kill_rules(victim);
983 		list_del_init(&victim->list);
984 
985 		mutex_unlock(&audit_filter_mutex);
986 
987 		prune_one(victim);
988 
989 		mutex_lock(&audit_filter_mutex);
990 	}
991 
992 	mutex_unlock(&audit_filter_mutex);
993 	audit_ctl_unlock();
994 }
995 
996 /*
997  *  Here comes the stuff asynchronous to auditctl operations
998  */
999 
1000 static void evict_chunk(struct audit_chunk *chunk)
1001 {
1002 	struct audit_tree *owner;
1003 	struct list_head *postponed = audit_killed_trees();
1004 	int need_prune = 0;
1005 	int n;
1006 
1007 	if (chunk->dead)
1008 		return;
1009 
1010 	chunk->dead = 1;
1011 	mutex_lock(&audit_filter_mutex);
1012 	spin_lock(&hash_lock);
1013 	while (!list_empty(&chunk->trees)) {
1014 		owner = list_entry(chunk->trees.next,
1015 				   struct audit_tree, same_root);
1016 		owner->goner = 1;
1017 		owner->root = NULL;
1018 		list_del_init(&owner->same_root);
1019 		spin_unlock(&hash_lock);
1020 		if (!postponed) {
1021 			kill_rules(owner);
1022 			list_move(&owner->list, &prune_list);
1023 			need_prune = 1;
1024 		} else {
1025 			list_move(&owner->list, postponed);
1026 		}
1027 		spin_lock(&hash_lock);
1028 	}
1029 	list_del_rcu(&chunk->hash);
1030 	for (n = 0; n < chunk->count; n++)
1031 		list_del_init(&chunk->owners[n].list);
1032 	spin_unlock(&hash_lock);
1033 	mutex_unlock(&audit_filter_mutex);
1034 	if (need_prune)
1035 		audit_schedule_prune();
1036 }
1037 
1038 static int audit_tree_handle_event(struct fsnotify_group *group,
1039 				   struct inode *to_tell,
1040 				   u32 mask, const void *data, int data_type,
1041 				   const unsigned char *file_name, u32 cookie,
1042 				   struct fsnotify_iter_info *iter_info)
1043 {
1044 	return 0;
1045 }
1046 
1047 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
1048 {
1049 	struct audit_chunk *chunk = mark_chunk(entry);
1050 
1051 	evict_chunk(chunk);
1052 
1053 	/*
1054 	 * We are guaranteed to have at least one reference to the mark from
1055 	 * either the inode or the caller of fsnotify_destroy_mark().
1056 	 */
1057 	BUG_ON(refcount_read(&entry->refcnt) < 1);
1058 }
1059 
1060 static const struct fsnotify_ops audit_tree_ops = {
1061 	.handle_event = audit_tree_handle_event,
1062 	.freeing_mark = audit_tree_freeing_mark,
1063 	.free_mark = audit_tree_destroy_watch,
1064 };
1065 
1066 static int __init audit_tree_init(void)
1067 {
1068 	int i;
1069 
1070 	audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
1071 
1072 	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
1073 	if (IS_ERR(audit_tree_group))
1074 		audit_panic("cannot initialize fsnotify group for rectree watches");
1075 
1076 	for (i = 0; i < HASH_SIZE; i++)
1077 		INIT_LIST_HEAD(&chunk_hash_heads[i]);
1078 
1079 	return 0;
1080 }
1081 __initcall(audit_tree_init);
1082