xref: /linux-6.15/kernel/bpf/arraymap.c (revision 96049f3a)
128fbcfa0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
281ed18abSAlexei Starovoitov  * Copyright (c) 2016,2017 Facebook
328fbcfa0SAlexei Starovoitov  *
428fbcfa0SAlexei Starovoitov  * This program is free software; you can redistribute it and/or
528fbcfa0SAlexei Starovoitov  * modify it under the terms of version 2 of the GNU General Public
628fbcfa0SAlexei Starovoitov  * License as published by the Free Software Foundation.
728fbcfa0SAlexei Starovoitov  *
828fbcfa0SAlexei Starovoitov  * This program is distributed in the hope that it will be useful, but
928fbcfa0SAlexei Starovoitov  * WITHOUT ANY WARRANTY; without even the implied warranty of
1028fbcfa0SAlexei Starovoitov  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1128fbcfa0SAlexei Starovoitov  * General Public License for more details.
1228fbcfa0SAlexei Starovoitov  */
1328fbcfa0SAlexei Starovoitov #include <linux/bpf.h>
14a26ca7c9SMartin KaFai Lau #include <linux/btf.h>
1528fbcfa0SAlexei Starovoitov #include <linux/err.h>
1628fbcfa0SAlexei Starovoitov #include <linux/slab.h>
1728fbcfa0SAlexei Starovoitov #include <linux/mm.h>
1804fd61abSAlexei Starovoitov #include <linux/filter.h>
190cdf5640SDaniel Borkmann #include <linux/perf_event.h>
20a26ca7c9SMartin KaFai Lau #include <uapi/linux/btf.h>
2128fbcfa0SAlexei Starovoitov 
2256f668dfSMartin KaFai Lau #include "map_in_map.h"
2356f668dfSMartin KaFai Lau 
246e71b04aSChenbo Feng #define ARRAY_CREATE_FLAG_MASK \
256e71b04aSChenbo Feng 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
266e71b04aSChenbo Feng 
27a10423b8SAlexei Starovoitov static void bpf_array_free_percpu(struct bpf_array *array)
28a10423b8SAlexei Starovoitov {
29a10423b8SAlexei Starovoitov 	int i;
30a10423b8SAlexei Starovoitov 
3132fff239SEric Dumazet 	for (i = 0; i < array->map.max_entries; i++) {
32a10423b8SAlexei Starovoitov 		free_percpu(array->pptrs[i]);
3332fff239SEric Dumazet 		cond_resched();
3432fff239SEric Dumazet 	}
35a10423b8SAlexei Starovoitov }
36a10423b8SAlexei Starovoitov 
37a10423b8SAlexei Starovoitov static int bpf_array_alloc_percpu(struct bpf_array *array)
38a10423b8SAlexei Starovoitov {
39a10423b8SAlexei Starovoitov 	void __percpu *ptr;
40a10423b8SAlexei Starovoitov 	int i;
41a10423b8SAlexei Starovoitov 
42a10423b8SAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++) {
43a10423b8SAlexei Starovoitov 		ptr = __alloc_percpu_gfp(array->elem_size, 8,
44a10423b8SAlexei Starovoitov 					 GFP_USER | __GFP_NOWARN);
45a10423b8SAlexei Starovoitov 		if (!ptr) {
46a10423b8SAlexei Starovoitov 			bpf_array_free_percpu(array);
47a10423b8SAlexei Starovoitov 			return -ENOMEM;
48a10423b8SAlexei Starovoitov 		}
49a10423b8SAlexei Starovoitov 		array->pptrs[i] = ptr;
5032fff239SEric Dumazet 		cond_resched();
51a10423b8SAlexei Starovoitov 	}
52a10423b8SAlexei Starovoitov 
53a10423b8SAlexei Starovoitov 	return 0;
54a10423b8SAlexei Starovoitov }
55a10423b8SAlexei Starovoitov 
5628fbcfa0SAlexei Starovoitov /* Called from syscall */
575dc4c4b7SMartin KaFai Lau int array_map_alloc_check(union bpf_attr *attr)
58ad46061fSJakub Kicinski {
59ad46061fSJakub Kicinski 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
60ad46061fSJakub Kicinski 	int numa_node = bpf_map_attr_numa_node(attr);
61ad46061fSJakub Kicinski 
62ad46061fSJakub Kicinski 	/* check sanity of attributes */
63ad46061fSJakub Kicinski 	if (attr->max_entries == 0 || attr->key_size != 4 ||
64ad46061fSJakub Kicinski 	    attr->value_size == 0 ||
65ad46061fSJakub Kicinski 	    attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
66ad46061fSJakub Kicinski 	    (percpu && numa_node != NUMA_NO_NODE))
67ad46061fSJakub Kicinski 		return -EINVAL;
68ad46061fSJakub Kicinski 
69ad46061fSJakub Kicinski 	if (attr->value_size > KMALLOC_MAX_SIZE)
70ad46061fSJakub Kicinski 		/* if value_size is bigger, the user space won't be able to
71ad46061fSJakub Kicinski 		 * access the elements.
72ad46061fSJakub Kicinski 		 */
73ad46061fSJakub Kicinski 		return -E2BIG;
74ad46061fSJakub Kicinski 
75ad46061fSJakub Kicinski 	return 0;
76ad46061fSJakub Kicinski }
77ad46061fSJakub Kicinski 
7828fbcfa0SAlexei Starovoitov static struct bpf_map *array_map_alloc(union bpf_attr *attr)
7928fbcfa0SAlexei Starovoitov {
80a10423b8SAlexei Starovoitov 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
819c2d63b8SDaniel Borkmann 	int ret, numa_node = bpf_map_attr_numa_node(attr);
82b2157399SAlexei Starovoitov 	u32 elem_size, index_mask, max_entries;
83b2157399SAlexei Starovoitov 	bool unpriv = !capable(CAP_SYS_ADMIN);
849c2d63b8SDaniel Borkmann 	u64 cost, array_size, mask64;
8528fbcfa0SAlexei Starovoitov 	struct bpf_array *array;
8628fbcfa0SAlexei Starovoitov 
8728fbcfa0SAlexei Starovoitov 	elem_size = round_up(attr->value_size, 8);
8828fbcfa0SAlexei Starovoitov 
89b2157399SAlexei Starovoitov 	max_entries = attr->max_entries;
90b2157399SAlexei Starovoitov 
91bbeb6e43SDaniel Borkmann 	/* On 32 bit archs roundup_pow_of_two() with max_entries that has
92bbeb6e43SDaniel Borkmann 	 * upper most bit set in u32 space is undefined behavior due to
93bbeb6e43SDaniel Borkmann 	 * resulting 1U << 32, so do it manually here in u64 space.
94bbeb6e43SDaniel Borkmann 	 */
95bbeb6e43SDaniel Borkmann 	mask64 = fls_long(max_entries - 1);
96bbeb6e43SDaniel Borkmann 	mask64 = 1ULL << mask64;
97bbeb6e43SDaniel Borkmann 	mask64 -= 1;
98bbeb6e43SDaniel Borkmann 
99bbeb6e43SDaniel Borkmann 	index_mask = mask64;
100bbeb6e43SDaniel Borkmann 	if (unpriv) {
101b2157399SAlexei Starovoitov 		/* round up array size to nearest power of 2,
102b2157399SAlexei Starovoitov 		 * since cpu will speculate within index_mask limits
103b2157399SAlexei Starovoitov 		 */
104b2157399SAlexei Starovoitov 		max_entries = index_mask + 1;
105bbeb6e43SDaniel Borkmann 		/* Check for overflows. */
106bbeb6e43SDaniel Borkmann 		if (max_entries < attr->max_entries)
107bbeb6e43SDaniel Borkmann 			return ERR_PTR(-E2BIG);
108bbeb6e43SDaniel Borkmann 	}
109b2157399SAlexei Starovoitov 
110a10423b8SAlexei Starovoitov 	array_size = sizeof(*array);
111a10423b8SAlexei Starovoitov 	if (percpu)
112b2157399SAlexei Starovoitov 		array_size += (u64) max_entries * sizeof(void *);
113a10423b8SAlexei Starovoitov 	else
114b2157399SAlexei Starovoitov 		array_size += (u64) max_entries * elem_size;
115a10423b8SAlexei Starovoitov 
116a10423b8SAlexei Starovoitov 	/* make sure there is no u32 overflow later in round_up() */
1179c2d63b8SDaniel Borkmann 	cost = array_size;
1189c2d63b8SDaniel Borkmann 	if (cost >= U32_MAX - PAGE_SIZE)
119daaf427cSAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
1209c2d63b8SDaniel Borkmann 	if (percpu) {
1219c2d63b8SDaniel Borkmann 		cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
1229c2d63b8SDaniel Borkmann 		if (cost >= U32_MAX - PAGE_SIZE)
1239c2d63b8SDaniel Borkmann 			return ERR_PTR(-ENOMEM);
1249c2d63b8SDaniel Borkmann 	}
1259c2d63b8SDaniel Borkmann 	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
1269c2d63b8SDaniel Borkmann 
1279c2d63b8SDaniel Borkmann 	ret = bpf_map_precharge_memlock(cost);
1289c2d63b8SDaniel Borkmann 	if (ret < 0)
1299c2d63b8SDaniel Borkmann 		return ERR_PTR(ret);
130daaf427cSAlexei Starovoitov 
13128fbcfa0SAlexei Starovoitov 	/* allocate all map elements and zero-initialize them */
13296eabe7aSMartin KaFai Lau 	array = bpf_map_area_alloc(array_size, numa_node);
13328fbcfa0SAlexei Starovoitov 	if (!array)
13428fbcfa0SAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
135b2157399SAlexei Starovoitov 	array->index_mask = index_mask;
136b2157399SAlexei Starovoitov 	array->map.unpriv_array = unpriv;
13728fbcfa0SAlexei Starovoitov 
13828fbcfa0SAlexei Starovoitov 	/* copy mandatory map attributes */
13932852649SJakub Kicinski 	bpf_map_init_from_attr(&array->map, attr);
1409c2d63b8SDaniel Borkmann 	array->map.pages = cost;
14128fbcfa0SAlexei Starovoitov 	array->elem_size = elem_size;
14228fbcfa0SAlexei Starovoitov 
1439c2d63b8SDaniel Borkmann 	if (percpu && bpf_array_alloc_percpu(array)) {
144d407bd25SDaniel Borkmann 		bpf_map_area_free(array);
145a10423b8SAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
146a10423b8SAlexei Starovoitov 	}
147a10423b8SAlexei Starovoitov 
14828fbcfa0SAlexei Starovoitov 	return &array->map;
14928fbcfa0SAlexei Starovoitov }
15028fbcfa0SAlexei Starovoitov 
15128fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
15228fbcfa0SAlexei Starovoitov static void *array_map_lookup_elem(struct bpf_map *map, void *key)
15328fbcfa0SAlexei Starovoitov {
15428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
15528fbcfa0SAlexei Starovoitov 	u32 index = *(u32 *)key;
15628fbcfa0SAlexei Starovoitov 
157a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
15828fbcfa0SAlexei Starovoitov 		return NULL;
15928fbcfa0SAlexei Starovoitov 
160b2157399SAlexei Starovoitov 	return array->value + array->elem_size * (index & array->index_mask);
16128fbcfa0SAlexei Starovoitov }
16228fbcfa0SAlexei Starovoitov 
16381ed18abSAlexei Starovoitov /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
16481ed18abSAlexei Starovoitov static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
16581ed18abSAlexei Starovoitov {
166b2157399SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
16781ed18abSAlexei Starovoitov 	struct bpf_insn *insn = insn_buf;
168fad73a1aSMartin KaFai Lau 	u32 elem_size = round_up(map->value_size, 8);
16981ed18abSAlexei Starovoitov 	const int ret = BPF_REG_0;
17081ed18abSAlexei Starovoitov 	const int map_ptr = BPF_REG_1;
17181ed18abSAlexei Starovoitov 	const int index = BPF_REG_2;
17281ed18abSAlexei Starovoitov 
17381ed18abSAlexei Starovoitov 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
17481ed18abSAlexei Starovoitov 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
175b2157399SAlexei Starovoitov 	if (map->unpriv_array) {
176b2157399SAlexei Starovoitov 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
177b2157399SAlexei Starovoitov 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
178b2157399SAlexei Starovoitov 	} else {
179fad73a1aSMartin KaFai Lau 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
180b2157399SAlexei Starovoitov 	}
181fad73a1aSMartin KaFai Lau 
182fad73a1aSMartin KaFai Lau 	if (is_power_of_2(elem_size)) {
18381ed18abSAlexei Starovoitov 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
18481ed18abSAlexei Starovoitov 	} else {
18581ed18abSAlexei Starovoitov 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
18681ed18abSAlexei Starovoitov 	}
18781ed18abSAlexei Starovoitov 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
18881ed18abSAlexei Starovoitov 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
18981ed18abSAlexei Starovoitov 	*insn++ = BPF_MOV64_IMM(ret, 0);
19081ed18abSAlexei Starovoitov 	return insn - insn_buf;
19181ed18abSAlexei Starovoitov }
19281ed18abSAlexei Starovoitov 
193a10423b8SAlexei Starovoitov /* Called from eBPF program */
194a10423b8SAlexei Starovoitov static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
195a10423b8SAlexei Starovoitov {
196a10423b8SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
197a10423b8SAlexei Starovoitov 	u32 index = *(u32 *)key;
198a10423b8SAlexei Starovoitov 
199a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
200a10423b8SAlexei Starovoitov 		return NULL;
201a10423b8SAlexei Starovoitov 
202b2157399SAlexei Starovoitov 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
203a10423b8SAlexei Starovoitov }
204a10423b8SAlexei Starovoitov 
20515a07b33SAlexei Starovoitov int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
20615a07b33SAlexei Starovoitov {
20715a07b33SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
20815a07b33SAlexei Starovoitov 	u32 index = *(u32 *)key;
20915a07b33SAlexei Starovoitov 	void __percpu *pptr;
21015a07b33SAlexei Starovoitov 	int cpu, off = 0;
21115a07b33SAlexei Starovoitov 	u32 size;
21215a07b33SAlexei Starovoitov 
21315a07b33SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
21415a07b33SAlexei Starovoitov 		return -ENOENT;
21515a07b33SAlexei Starovoitov 
21615a07b33SAlexei Starovoitov 	/* per_cpu areas are zero-filled and bpf programs can only
21715a07b33SAlexei Starovoitov 	 * access 'value_size' of them, so copying rounded areas
21815a07b33SAlexei Starovoitov 	 * will not leak any kernel data
21915a07b33SAlexei Starovoitov 	 */
22015a07b33SAlexei Starovoitov 	size = round_up(map->value_size, 8);
22115a07b33SAlexei Starovoitov 	rcu_read_lock();
222b2157399SAlexei Starovoitov 	pptr = array->pptrs[index & array->index_mask];
22315a07b33SAlexei Starovoitov 	for_each_possible_cpu(cpu) {
22415a07b33SAlexei Starovoitov 		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
22515a07b33SAlexei Starovoitov 		off += size;
22615a07b33SAlexei Starovoitov 	}
22715a07b33SAlexei Starovoitov 	rcu_read_unlock();
22815a07b33SAlexei Starovoitov 	return 0;
22915a07b33SAlexei Starovoitov }
23015a07b33SAlexei Starovoitov 
23128fbcfa0SAlexei Starovoitov /* Called from syscall */
23228fbcfa0SAlexei Starovoitov static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
23328fbcfa0SAlexei Starovoitov {
23428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
2358fe45924STeng Qin 	u32 index = key ? *(u32 *)key : U32_MAX;
23628fbcfa0SAlexei Starovoitov 	u32 *next = (u32 *)next_key;
23728fbcfa0SAlexei Starovoitov 
23828fbcfa0SAlexei Starovoitov 	if (index >= array->map.max_entries) {
23928fbcfa0SAlexei Starovoitov 		*next = 0;
24028fbcfa0SAlexei Starovoitov 		return 0;
24128fbcfa0SAlexei Starovoitov 	}
24228fbcfa0SAlexei Starovoitov 
24328fbcfa0SAlexei Starovoitov 	if (index == array->map.max_entries - 1)
24428fbcfa0SAlexei Starovoitov 		return -ENOENT;
24528fbcfa0SAlexei Starovoitov 
24628fbcfa0SAlexei Starovoitov 	*next = index + 1;
24728fbcfa0SAlexei Starovoitov 	return 0;
24828fbcfa0SAlexei Starovoitov }
24928fbcfa0SAlexei Starovoitov 
25028fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
25128fbcfa0SAlexei Starovoitov static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
25228fbcfa0SAlexei Starovoitov 				 u64 map_flags)
25328fbcfa0SAlexei Starovoitov {
25428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
25528fbcfa0SAlexei Starovoitov 	u32 index = *(u32 *)key;
256*96049f3aSAlexei Starovoitov 	char *val;
25728fbcfa0SAlexei Starovoitov 
258*96049f3aSAlexei Starovoitov 	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
25928fbcfa0SAlexei Starovoitov 		/* unknown flags */
26028fbcfa0SAlexei Starovoitov 		return -EINVAL;
26128fbcfa0SAlexei Starovoitov 
262a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
26328fbcfa0SAlexei Starovoitov 		/* all elements were pre-allocated, cannot insert a new one */
26428fbcfa0SAlexei Starovoitov 		return -E2BIG;
26528fbcfa0SAlexei Starovoitov 
266*96049f3aSAlexei Starovoitov 	if (unlikely(map_flags & BPF_NOEXIST))
267daaf427cSAlexei Starovoitov 		/* all elements already exist */
26828fbcfa0SAlexei Starovoitov 		return -EEXIST;
26928fbcfa0SAlexei Starovoitov 
270*96049f3aSAlexei Starovoitov 	if (unlikely((map_flags & BPF_F_LOCK) &&
271*96049f3aSAlexei Starovoitov 		     !map_value_has_spin_lock(map)))
272*96049f3aSAlexei Starovoitov 		return -EINVAL;
273*96049f3aSAlexei Starovoitov 
274*96049f3aSAlexei Starovoitov 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
275b2157399SAlexei Starovoitov 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
276a10423b8SAlexei Starovoitov 		       value, map->value_size);
277*96049f3aSAlexei Starovoitov 	} else {
278*96049f3aSAlexei Starovoitov 		val = array->value +
279*96049f3aSAlexei Starovoitov 			array->elem_size * (index & array->index_mask);
280*96049f3aSAlexei Starovoitov 		if (map_flags & BPF_F_LOCK)
281*96049f3aSAlexei Starovoitov 			copy_map_value_locked(map, val, value, false);
282a10423b8SAlexei Starovoitov 		else
283*96049f3aSAlexei Starovoitov 			copy_map_value(map, val, value);
284*96049f3aSAlexei Starovoitov 	}
28528fbcfa0SAlexei Starovoitov 	return 0;
28628fbcfa0SAlexei Starovoitov }
28728fbcfa0SAlexei Starovoitov 
28815a07b33SAlexei Starovoitov int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
28915a07b33SAlexei Starovoitov 			    u64 map_flags)
29015a07b33SAlexei Starovoitov {
29115a07b33SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
29215a07b33SAlexei Starovoitov 	u32 index = *(u32 *)key;
29315a07b33SAlexei Starovoitov 	void __percpu *pptr;
29415a07b33SAlexei Starovoitov 	int cpu, off = 0;
29515a07b33SAlexei Starovoitov 	u32 size;
29615a07b33SAlexei Starovoitov 
29715a07b33SAlexei Starovoitov 	if (unlikely(map_flags > BPF_EXIST))
29815a07b33SAlexei Starovoitov 		/* unknown flags */
29915a07b33SAlexei Starovoitov 		return -EINVAL;
30015a07b33SAlexei Starovoitov 
30115a07b33SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
30215a07b33SAlexei Starovoitov 		/* all elements were pre-allocated, cannot insert a new one */
30315a07b33SAlexei Starovoitov 		return -E2BIG;
30415a07b33SAlexei Starovoitov 
30515a07b33SAlexei Starovoitov 	if (unlikely(map_flags == BPF_NOEXIST))
30615a07b33SAlexei Starovoitov 		/* all elements already exist */
30715a07b33SAlexei Starovoitov 		return -EEXIST;
30815a07b33SAlexei Starovoitov 
30915a07b33SAlexei Starovoitov 	/* the user space will provide round_up(value_size, 8) bytes that
31015a07b33SAlexei Starovoitov 	 * will be copied into per-cpu area. bpf programs can only access
31115a07b33SAlexei Starovoitov 	 * value_size of it. During lookup the same extra bytes will be
31215a07b33SAlexei Starovoitov 	 * returned or zeros which were zero-filled by percpu_alloc,
31315a07b33SAlexei Starovoitov 	 * so no kernel data leaks possible
31415a07b33SAlexei Starovoitov 	 */
31515a07b33SAlexei Starovoitov 	size = round_up(map->value_size, 8);
31615a07b33SAlexei Starovoitov 	rcu_read_lock();
317b2157399SAlexei Starovoitov 	pptr = array->pptrs[index & array->index_mask];
31815a07b33SAlexei Starovoitov 	for_each_possible_cpu(cpu) {
31915a07b33SAlexei Starovoitov 		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
32015a07b33SAlexei Starovoitov 		off += size;
32115a07b33SAlexei Starovoitov 	}
32215a07b33SAlexei Starovoitov 	rcu_read_unlock();
32315a07b33SAlexei Starovoitov 	return 0;
32415a07b33SAlexei Starovoitov }
32515a07b33SAlexei Starovoitov 
32628fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
32728fbcfa0SAlexei Starovoitov static int array_map_delete_elem(struct bpf_map *map, void *key)
32828fbcfa0SAlexei Starovoitov {
32928fbcfa0SAlexei Starovoitov 	return -EINVAL;
33028fbcfa0SAlexei Starovoitov }
33128fbcfa0SAlexei Starovoitov 
33228fbcfa0SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
33328fbcfa0SAlexei Starovoitov static void array_map_free(struct bpf_map *map)
33428fbcfa0SAlexei Starovoitov {
33528fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
33628fbcfa0SAlexei Starovoitov 
33728fbcfa0SAlexei Starovoitov 	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
33828fbcfa0SAlexei Starovoitov 	 * so the programs (can be more than one that used this map) were
33928fbcfa0SAlexei Starovoitov 	 * disconnected from events. Wait for outstanding programs to complete
34028fbcfa0SAlexei Starovoitov 	 * and free the array
34128fbcfa0SAlexei Starovoitov 	 */
34228fbcfa0SAlexei Starovoitov 	synchronize_rcu();
34328fbcfa0SAlexei Starovoitov 
344a10423b8SAlexei Starovoitov 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
345a10423b8SAlexei Starovoitov 		bpf_array_free_percpu(array);
346a10423b8SAlexei Starovoitov 
347d407bd25SDaniel Borkmann 	bpf_map_area_free(array);
34828fbcfa0SAlexei Starovoitov }
34928fbcfa0SAlexei Starovoitov 
350a26ca7c9SMartin KaFai Lau static void array_map_seq_show_elem(struct bpf_map *map, void *key,
351a26ca7c9SMartin KaFai Lau 				    struct seq_file *m)
352a26ca7c9SMartin KaFai Lau {
353a26ca7c9SMartin KaFai Lau 	void *value;
354a26ca7c9SMartin KaFai Lau 
355a26ca7c9SMartin KaFai Lau 	rcu_read_lock();
356a26ca7c9SMartin KaFai Lau 
357a26ca7c9SMartin KaFai Lau 	value = array_map_lookup_elem(map, key);
358a26ca7c9SMartin KaFai Lau 	if (!value) {
359a26ca7c9SMartin KaFai Lau 		rcu_read_unlock();
360a26ca7c9SMartin KaFai Lau 		return;
361a26ca7c9SMartin KaFai Lau 	}
362a26ca7c9SMartin KaFai Lau 
363a26ca7c9SMartin KaFai Lau 	seq_printf(m, "%u: ", *(u32 *)key);
3649b2cf328SMartin KaFai Lau 	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
365a26ca7c9SMartin KaFai Lau 	seq_puts(m, "\n");
366a26ca7c9SMartin KaFai Lau 
367a26ca7c9SMartin KaFai Lau 	rcu_read_unlock();
368a26ca7c9SMartin KaFai Lau }
369a26ca7c9SMartin KaFai Lau 
370c7b27c37SYonghong Song static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
371c7b27c37SYonghong Song 					   struct seq_file *m)
372c7b27c37SYonghong Song {
373c7b27c37SYonghong Song 	struct bpf_array *array = container_of(map, struct bpf_array, map);
374c7b27c37SYonghong Song 	u32 index = *(u32 *)key;
375c7b27c37SYonghong Song 	void __percpu *pptr;
376c7b27c37SYonghong Song 	int cpu;
377c7b27c37SYonghong Song 
378c7b27c37SYonghong Song 	rcu_read_lock();
379c7b27c37SYonghong Song 
380c7b27c37SYonghong Song 	seq_printf(m, "%u: {\n", *(u32 *)key);
381c7b27c37SYonghong Song 	pptr = array->pptrs[index & array->index_mask];
382c7b27c37SYonghong Song 	for_each_possible_cpu(cpu) {
383c7b27c37SYonghong Song 		seq_printf(m, "\tcpu%d: ", cpu);
384c7b27c37SYonghong Song 		btf_type_seq_show(map->btf, map->btf_value_type_id,
385c7b27c37SYonghong Song 				  per_cpu_ptr(pptr, cpu), m);
386c7b27c37SYonghong Song 		seq_puts(m, "\n");
387c7b27c37SYonghong Song 	}
388c7b27c37SYonghong Song 	seq_puts(m, "}\n");
389c7b27c37SYonghong Song 
390c7b27c37SYonghong Song 	rcu_read_unlock();
391c7b27c37SYonghong Song }
392c7b27c37SYonghong Song 
393e8d2bec0SDaniel Borkmann static int array_map_check_btf(const struct bpf_map *map,
3941b2b234bSRoman Gushchin 			       const struct btf *btf,
395e8d2bec0SDaniel Borkmann 			       const struct btf_type *key_type,
396e8d2bec0SDaniel Borkmann 			       const struct btf_type *value_type)
397a26ca7c9SMartin KaFai Lau {
398a26ca7c9SMartin KaFai Lau 	u32 int_data;
399a26ca7c9SMartin KaFai Lau 
400e8d2bec0SDaniel Borkmann 	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
401a26ca7c9SMartin KaFai Lau 		return -EINVAL;
402a26ca7c9SMartin KaFai Lau 
403a26ca7c9SMartin KaFai Lau 	int_data = *(u32 *)(key_type + 1);
404e8d2bec0SDaniel Borkmann 	/* bpf array can only take a u32 key. This check makes sure
405e8d2bec0SDaniel Borkmann 	 * that the btf matches the attr used during map_create.
406a26ca7c9SMartin KaFai Lau 	 */
407e8d2bec0SDaniel Borkmann 	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
408a26ca7c9SMartin KaFai Lau 		return -EINVAL;
409a26ca7c9SMartin KaFai Lau 
410a26ca7c9SMartin KaFai Lau 	return 0;
411a26ca7c9SMartin KaFai Lau }
412a26ca7c9SMartin KaFai Lau 
41340077e0cSJohannes Berg const struct bpf_map_ops array_map_ops = {
414ad46061fSJakub Kicinski 	.map_alloc_check = array_map_alloc_check,
41528fbcfa0SAlexei Starovoitov 	.map_alloc = array_map_alloc,
41628fbcfa0SAlexei Starovoitov 	.map_free = array_map_free,
41728fbcfa0SAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
41828fbcfa0SAlexei Starovoitov 	.map_lookup_elem = array_map_lookup_elem,
41928fbcfa0SAlexei Starovoitov 	.map_update_elem = array_map_update_elem,
42028fbcfa0SAlexei Starovoitov 	.map_delete_elem = array_map_delete_elem,
42181ed18abSAlexei Starovoitov 	.map_gen_lookup = array_map_gen_lookup,
422a26ca7c9SMartin KaFai Lau 	.map_seq_show_elem = array_map_seq_show_elem,
423a26ca7c9SMartin KaFai Lau 	.map_check_btf = array_map_check_btf,
42428fbcfa0SAlexei Starovoitov };
42528fbcfa0SAlexei Starovoitov 
42640077e0cSJohannes Berg const struct bpf_map_ops percpu_array_map_ops = {
427ad46061fSJakub Kicinski 	.map_alloc_check = array_map_alloc_check,
428a10423b8SAlexei Starovoitov 	.map_alloc = array_map_alloc,
429a10423b8SAlexei Starovoitov 	.map_free = array_map_free,
430a10423b8SAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
431a10423b8SAlexei Starovoitov 	.map_lookup_elem = percpu_array_map_lookup_elem,
432a10423b8SAlexei Starovoitov 	.map_update_elem = array_map_update_elem,
433a10423b8SAlexei Starovoitov 	.map_delete_elem = array_map_delete_elem,
434c7b27c37SYonghong Song 	.map_seq_show_elem = percpu_array_map_seq_show_elem,
435e8d2bec0SDaniel Borkmann 	.map_check_btf = array_map_check_btf,
436a10423b8SAlexei Starovoitov };
437a10423b8SAlexei Starovoitov 
438ad46061fSJakub Kicinski static int fd_array_map_alloc_check(union bpf_attr *attr)
43904fd61abSAlexei Starovoitov {
4402a36f0b9SWang Nan 	/* only file descriptors can be stored in this type of map */
44104fd61abSAlexei Starovoitov 	if (attr->value_size != sizeof(u32))
442ad46061fSJakub Kicinski 		return -EINVAL;
443ad46061fSJakub Kicinski 	return array_map_alloc_check(attr);
44404fd61abSAlexei Starovoitov }
44504fd61abSAlexei Starovoitov 
4462a36f0b9SWang Nan static void fd_array_map_free(struct bpf_map *map)
44704fd61abSAlexei Starovoitov {
44804fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
44904fd61abSAlexei Starovoitov 	int i;
45004fd61abSAlexei Starovoitov 
45104fd61abSAlexei Starovoitov 	synchronize_rcu();
45204fd61abSAlexei Starovoitov 
45304fd61abSAlexei Starovoitov 	/* make sure it's empty */
45404fd61abSAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++)
4552a36f0b9SWang Nan 		BUG_ON(array->ptrs[i] != NULL);
456d407bd25SDaniel Borkmann 
457d407bd25SDaniel Borkmann 	bpf_map_area_free(array);
45804fd61abSAlexei Starovoitov }
45904fd61abSAlexei Starovoitov 
4602a36f0b9SWang Nan static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
46104fd61abSAlexei Starovoitov {
4623b4a63f6SPrashant Bhole 	return ERR_PTR(-EOPNOTSUPP);
46304fd61abSAlexei Starovoitov }
46404fd61abSAlexei Starovoitov 
46504fd61abSAlexei Starovoitov /* only called from syscall */
46614dc6f04SMartin KaFai Lau int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
46714dc6f04SMartin KaFai Lau {
46814dc6f04SMartin KaFai Lau 	void **elem, *ptr;
46914dc6f04SMartin KaFai Lau 	int ret =  0;
47014dc6f04SMartin KaFai Lau 
47114dc6f04SMartin KaFai Lau 	if (!map->ops->map_fd_sys_lookup_elem)
47214dc6f04SMartin KaFai Lau 		return -ENOTSUPP;
47314dc6f04SMartin KaFai Lau 
47414dc6f04SMartin KaFai Lau 	rcu_read_lock();
47514dc6f04SMartin KaFai Lau 	elem = array_map_lookup_elem(map, key);
47614dc6f04SMartin KaFai Lau 	if (elem && (ptr = READ_ONCE(*elem)))
47714dc6f04SMartin KaFai Lau 		*value = map->ops->map_fd_sys_lookup_elem(ptr);
47814dc6f04SMartin KaFai Lau 	else
47914dc6f04SMartin KaFai Lau 		ret = -ENOENT;
48014dc6f04SMartin KaFai Lau 	rcu_read_unlock();
48114dc6f04SMartin KaFai Lau 
48214dc6f04SMartin KaFai Lau 	return ret;
48314dc6f04SMartin KaFai Lau }
48414dc6f04SMartin KaFai Lau 
48514dc6f04SMartin KaFai Lau /* only called from syscall */
486d056a788SDaniel Borkmann int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
487d056a788SDaniel Borkmann 				 void *key, void *value, u64 map_flags)
48804fd61abSAlexei Starovoitov {
48904fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
4902a36f0b9SWang Nan 	void *new_ptr, *old_ptr;
49104fd61abSAlexei Starovoitov 	u32 index = *(u32 *)key, ufd;
49204fd61abSAlexei Starovoitov 
49304fd61abSAlexei Starovoitov 	if (map_flags != BPF_ANY)
49404fd61abSAlexei Starovoitov 		return -EINVAL;
49504fd61abSAlexei Starovoitov 
49604fd61abSAlexei Starovoitov 	if (index >= array->map.max_entries)
49704fd61abSAlexei Starovoitov 		return -E2BIG;
49804fd61abSAlexei Starovoitov 
49904fd61abSAlexei Starovoitov 	ufd = *(u32 *)value;
500d056a788SDaniel Borkmann 	new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
5012a36f0b9SWang Nan 	if (IS_ERR(new_ptr))
5022a36f0b9SWang Nan 		return PTR_ERR(new_ptr);
50304fd61abSAlexei Starovoitov 
5042a36f0b9SWang Nan 	old_ptr = xchg(array->ptrs + index, new_ptr);
5052a36f0b9SWang Nan 	if (old_ptr)
5062a36f0b9SWang Nan 		map->ops->map_fd_put_ptr(old_ptr);
50704fd61abSAlexei Starovoitov 
50804fd61abSAlexei Starovoitov 	return 0;
50904fd61abSAlexei Starovoitov }
51004fd61abSAlexei Starovoitov 
5112a36f0b9SWang Nan static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
51204fd61abSAlexei Starovoitov {
51304fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
5142a36f0b9SWang Nan 	void *old_ptr;
51504fd61abSAlexei Starovoitov 	u32 index = *(u32 *)key;
51604fd61abSAlexei Starovoitov 
51704fd61abSAlexei Starovoitov 	if (index >= array->map.max_entries)
51804fd61abSAlexei Starovoitov 		return -E2BIG;
51904fd61abSAlexei Starovoitov 
5202a36f0b9SWang Nan 	old_ptr = xchg(array->ptrs + index, NULL);
5212a36f0b9SWang Nan 	if (old_ptr) {
5222a36f0b9SWang Nan 		map->ops->map_fd_put_ptr(old_ptr);
52304fd61abSAlexei Starovoitov 		return 0;
52404fd61abSAlexei Starovoitov 	} else {
52504fd61abSAlexei Starovoitov 		return -ENOENT;
52604fd61abSAlexei Starovoitov 	}
52704fd61abSAlexei Starovoitov }
52804fd61abSAlexei Starovoitov 
529d056a788SDaniel Borkmann static void *prog_fd_array_get_ptr(struct bpf_map *map,
530d056a788SDaniel Borkmann 				   struct file *map_file, int fd)
5312a36f0b9SWang Nan {
5322a36f0b9SWang Nan 	struct bpf_array *array = container_of(map, struct bpf_array, map);
5332a36f0b9SWang Nan 	struct bpf_prog *prog = bpf_prog_get(fd);
534d056a788SDaniel Borkmann 
5352a36f0b9SWang Nan 	if (IS_ERR(prog))
5362a36f0b9SWang Nan 		return prog;
5372a36f0b9SWang Nan 
5382a36f0b9SWang Nan 	if (!bpf_prog_array_compatible(array, prog)) {
5392a36f0b9SWang Nan 		bpf_prog_put(prog);
5402a36f0b9SWang Nan 		return ERR_PTR(-EINVAL);
5412a36f0b9SWang Nan 	}
542d056a788SDaniel Borkmann 
5432a36f0b9SWang Nan 	return prog;
5442a36f0b9SWang Nan }
5452a36f0b9SWang Nan 
5462a36f0b9SWang Nan static void prog_fd_array_put_ptr(void *ptr)
5472a36f0b9SWang Nan {
5481aacde3dSDaniel Borkmann 	bpf_prog_put(ptr);
5492a36f0b9SWang Nan }
5502a36f0b9SWang Nan 
55114dc6f04SMartin KaFai Lau static u32 prog_fd_array_sys_lookup_elem(void *ptr)
55214dc6f04SMartin KaFai Lau {
55314dc6f04SMartin KaFai Lau 	return ((struct bpf_prog *)ptr)->aux->id;
55414dc6f04SMartin KaFai Lau }
55514dc6f04SMartin KaFai Lau 
55604fd61abSAlexei Starovoitov /* decrement refcnt of all bpf_progs that are stored in this map */
557ba6b8de4SJohn Fastabend static void bpf_fd_array_map_clear(struct bpf_map *map)
55804fd61abSAlexei Starovoitov {
55904fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
56004fd61abSAlexei Starovoitov 	int i;
56104fd61abSAlexei Starovoitov 
56204fd61abSAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++)
5632a36f0b9SWang Nan 		fd_array_map_delete_elem(map, &i);
56404fd61abSAlexei Starovoitov }
56504fd61abSAlexei Starovoitov 
566a7c19db3SYonghong Song static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
567a7c19db3SYonghong Song 					 struct seq_file *m)
568a7c19db3SYonghong Song {
569a7c19db3SYonghong Song 	void **elem, *ptr;
570a7c19db3SYonghong Song 	u32 prog_id;
571a7c19db3SYonghong Song 
572a7c19db3SYonghong Song 	rcu_read_lock();
573a7c19db3SYonghong Song 
574a7c19db3SYonghong Song 	elem = array_map_lookup_elem(map, key);
575a7c19db3SYonghong Song 	if (elem) {
576a7c19db3SYonghong Song 		ptr = READ_ONCE(*elem);
577a7c19db3SYonghong Song 		if (ptr) {
578a7c19db3SYonghong Song 			seq_printf(m, "%u: ", *(u32 *)key);
579a7c19db3SYonghong Song 			prog_id = prog_fd_array_sys_lookup_elem(ptr);
580a7c19db3SYonghong Song 			btf_type_seq_show(map->btf, map->btf_value_type_id,
581a7c19db3SYonghong Song 					  &prog_id, m);
582a7c19db3SYonghong Song 			seq_puts(m, "\n");
583a7c19db3SYonghong Song 		}
584a7c19db3SYonghong Song 	}
585a7c19db3SYonghong Song 
586a7c19db3SYonghong Song 	rcu_read_unlock();
587a7c19db3SYonghong Song }
588a7c19db3SYonghong Song 
58940077e0cSJohannes Berg const struct bpf_map_ops prog_array_map_ops = {
590ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
591ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
5922a36f0b9SWang Nan 	.map_free = fd_array_map_free,
59304fd61abSAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
5942a36f0b9SWang Nan 	.map_lookup_elem = fd_array_map_lookup_elem,
5952a36f0b9SWang Nan 	.map_delete_elem = fd_array_map_delete_elem,
5962a36f0b9SWang Nan 	.map_fd_get_ptr = prog_fd_array_get_ptr,
5972a36f0b9SWang Nan 	.map_fd_put_ptr = prog_fd_array_put_ptr,
59814dc6f04SMartin KaFai Lau 	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
599ba6b8de4SJohn Fastabend 	.map_release_uref = bpf_fd_array_map_clear,
600a7c19db3SYonghong Song 	.map_seq_show_elem = prog_array_map_seq_show_elem,
60104fd61abSAlexei Starovoitov };
60204fd61abSAlexei Starovoitov 
6033b1efb19SDaniel Borkmann static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
6043b1efb19SDaniel Borkmann 						   struct file *map_file)
605ea317b26SKaixu Xia {
6063b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6073b1efb19SDaniel Borkmann 
608858d68f1SDaniel Borkmann 	ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
6093b1efb19SDaniel Borkmann 	if (ee) {
6103b1efb19SDaniel Borkmann 		ee->event = perf_file->private_data;
6113b1efb19SDaniel Borkmann 		ee->perf_file = perf_file;
6123b1efb19SDaniel Borkmann 		ee->map_file = map_file;
6133b1efb19SDaniel Borkmann 	}
6143b1efb19SDaniel Borkmann 
6153b1efb19SDaniel Borkmann 	return ee;
6163b1efb19SDaniel Borkmann }
6173b1efb19SDaniel Borkmann 
6183b1efb19SDaniel Borkmann static void __bpf_event_entry_free(struct rcu_head *rcu)
6193b1efb19SDaniel Borkmann {
6203b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6213b1efb19SDaniel Borkmann 
6223b1efb19SDaniel Borkmann 	ee = container_of(rcu, struct bpf_event_entry, rcu);
6233b1efb19SDaniel Borkmann 	fput(ee->perf_file);
6243b1efb19SDaniel Borkmann 	kfree(ee);
6253b1efb19SDaniel Borkmann }
6263b1efb19SDaniel Borkmann 
6273b1efb19SDaniel Borkmann static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
6283b1efb19SDaniel Borkmann {
6293b1efb19SDaniel Borkmann 	call_rcu(&ee->rcu, __bpf_event_entry_free);
630ea317b26SKaixu Xia }
631ea317b26SKaixu Xia 
632d056a788SDaniel Borkmann static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
633d056a788SDaniel Borkmann 					 struct file *map_file, int fd)
634ea317b26SKaixu Xia {
6353b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6363b1efb19SDaniel Borkmann 	struct perf_event *event;
6373b1efb19SDaniel Borkmann 	struct file *perf_file;
638f91840a3SAlexei Starovoitov 	u64 value;
639ea317b26SKaixu Xia 
6403b1efb19SDaniel Borkmann 	perf_file = perf_event_get(fd);
6413b1efb19SDaniel Borkmann 	if (IS_ERR(perf_file))
6423b1efb19SDaniel Borkmann 		return perf_file;
643e03e7ee3SAlexei Starovoitov 
644f91840a3SAlexei Starovoitov 	ee = ERR_PTR(-EOPNOTSUPP);
6453b1efb19SDaniel Borkmann 	event = perf_file->private_data;
64697562633SYonghong Song 	if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
6473b1efb19SDaniel Borkmann 		goto err_out;
648ea317b26SKaixu Xia 
6493b1efb19SDaniel Borkmann 	ee = bpf_event_entry_gen(perf_file, map_file);
6503b1efb19SDaniel Borkmann 	if (ee)
6513b1efb19SDaniel Borkmann 		return ee;
6523b1efb19SDaniel Borkmann 	ee = ERR_PTR(-ENOMEM);
6533b1efb19SDaniel Borkmann err_out:
6543b1efb19SDaniel Borkmann 	fput(perf_file);
6553b1efb19SDaniel Borkmann 	return ee;
656ea317b26SKaixu Xia }
657ea317b26SKaixu Xia 
658ea317b26SKaixu Xia static void perf_event_fd_array_put_ptr(void *ptr)
659ea317b26SKaixu Xia {
6603b1efb19SDaniel Borkmann 	bpf_event_entry_free_rcu(ptr);
6613b1efb19SDaniel Borkmann }
6623b1efb19SDaniel Borkmann 
6633b1efb19SDaniel Borkmann static void perf_event_fd_array_release(struct bpf_map *map,
6643b1efb19SDaniel Borkmann 					struct file *map_file)
6653b1efb19SDaniel Borkmann {
6663b1efb19SDaniel Borkmann 	struct bpf_array *array = container_of(map, struct bpf_array, map);
6673b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6683b1efb19SDaniel Borkmann 	int i;
6693b1efb19SDaniel Borkmann 
6703b1efb19SDaniel Borkmann 	rcu_read_lock();
6713b1efb19SDaniel Borkmann 	for (i = 0; i < array->map.max_entries; i++) {
6723b1efb19SDaniel Borkmann 		ee = READ_ONCE(array->ptrs[i]);
6733b1efb19SDaniel Borkmann 		if (ee && ee->map_file == map_file)
6743b1efb19SDaniel Borkmann 			fd_array_map_delete_elem(map, &i);
6753b1efb19SDaniel Borkmann 	}
6763b1efb19SDaniel Borkmann 	rcu_read_unlock();
677ea317b26SKaixu Xia }
678ea317b26SKaixu Xia 
67940077e0cSJohannes Berg const struct bpf_map_ops perf_event_array_map_ops = {
680ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
681ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
6823b1efb19SDaniel Borkmann 	.map_free = fd_array_map_free,
683ea317b26SKaixu Xia 	.map_get_next_key = array_map_get_next_key,
684ea317b26SKaixu Xia 	.map_lookup_elem = fd_array_map_lookup_elem,
685ea317b26SKaixu Xia 	.map_delete_elem = fd_array_map_delete_elem,
686ea317b26SKaixu Xia 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
687ea317b26SKaixu Xia 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
6883b1efb19SDaniel Borkmann 	.map_release = perf_event_fd_array_release,
689e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
690ea317b26SKaixu Xia };
691ea317b26SKaixu Xia 
69260d20f91SSargun Dhillon #ifdef CONFIG_CGROUPS
6934ed8ec52SMartin KaFai Lau static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
6944ed8ec52SMartin KaFai Lau 				     struct file *map_file /* not used */,
6954ed8ec52SMartin KaFai Lau 				     int fd)
6964ed8ec52SMartin KaFai Lau {
6974ed8ec52SMartin KaFai Lau 	return cgroup_get_from_fd(fd);
6984ed8ec52SMartin KaFai Lau }
6994ed8ec52SMartin KaFai Lau 
7004ed8ec52SMartin KaFai Lau static void cgroup_fd_array_put_ptr(void *ptr)
7014ed8ec52SMartin KaFai Lau {
7024ed8ec52SMartin KaFai Lau 	/* cgroup_put free cgrp after a rcu grace period */
7034ed8ec52SMartin KaFai Lau 	cgroup_put(ptr);
7044ed8ec52SMartin KaFai Lau }
7054ed8ec52SMartin KaFai Lau 
7064ed8ec52SMartin KaFai Lau static void cgroup_fd_array_free(struct bpf_map *map)
7074ed8ec52SMartin KaFai Lau {
7084ed8ec52SMartin KaFai Lau 	bpf_fd_array_map_clear(map);
7094ed8ec52SMartin KaFai Lau 	fd_array_map_free(map);
7104ed8ec52SMartin KaFai Lau }
7114ed8ec52SMartin KaFai Lau 
71240077e0cSJohannes Berg const struct bpf_map_ops cgroup_array_map_ops = {
713ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
714ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
7154ed8ec52SMartin KaFai Lau 	.map_free = cgroup_fd_array_free,
7164ed8ec52SMartin KaFai Lau 	.map_get_next_key = array_map_get_next_key,
7174ed8ec52SMartin KaFai Lau 	.map_lookup_elem = fd_array_map_lookup_elem,
7184ed8ec52SMartin KaFai Lau 	.map_delete_elem = fd_array_map_delete_elem,
7194ed8ec52SMartin KaFai Lau 	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
7204ed8ec52SMartin KaFai Lau 	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
721e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
7224ed8ec52SMartin KaFai Lau };
7234ed8ec52SMartin KaFai Lau #endif
72456f668dfSMartin KaFai Lau 
72556f668dfSMartin KaFai Lau static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
72656f668dfSMartin KaFai Lau {
72756f668dfSMartin KaFai Lau 	struct bpf_map *map, *inner_map_meta;
72856f668dfSMartin KaFai Lau 
72956f668dfSMartin KaFai Lau 	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
73056f668dfSMartin KaFai Lau 	if (IS_ERR(inner_map_meta))
73156f668dfSMartin KaFai Lau 		return inner_map_meta;
73256f668dfSMartin KaFai Lau 
733ad46061fSJakub Kicinski 	map = array_map_alloc(attr);
73456f668dfSMartin KaFai Lau 	if (IS_ERR(map)) {
73556f668dfSMartin KaFai Lau 		bpf_map_meta_free(inner_map_meta);
73656f668dfSMartin KaFai Lau 		return map;
73756f668dfSMartin KaFai Lau 	}
73856f668dfSMartin KaFai Lau 
73956f668dfSMartin KaFai Lau 	map->inner_map_meta = inner_map_meta;
74056f668dfSMartin KaFai Lau 
74156f668dfSMartin KaFai Lau 	return map;
74256f668dfSMartin KaFai Lau }
74356f668dfSMartin KaFai Lau 
74456f668dfSMartin KaFai Lau static void array_of_map_free(struct bpf_map *map)
74556f668dfSMartin KaFai Lau {
74656f668dfSMartin KaFai Lau 	/* map->inner_map_meta is only accessed by syscall which
74756f668dfSMartin KaFai Lau 	 * is protected by fdget/fdput.
74856f668dfSMartin KaFai Lau 	 */
74956f668dfSMartin KaFai Lau 	bpf_map_meta_free(map->inner_map_meta);
75056f668dfSMartin KaFai Lau 	bpf_fd_array_map_clear(map);
75156f668dfSMartin KaFai Lau 	fd_array_map_free(map);
75256f668dfSMartin KaFai Lau }
75356f668dfSMartin KaFai Lau 
75456f668dfSMartin KaFai Lau static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
75556f668dfSMartin KaFai Lau {
75656f668dfSMartin KaFai Lau 	struct bpf_map **inner_map = array_map_lookup_elem(map, key);
75756f668dfSMartin KaFai Lau 
75856f668dfSMartin KaFai Lau 	if (!inner_map)
75956f668dfSMartin KaFai Lau 		return NULL;
76056f668dfSMartin KaFai Lau 
76156f668dfSMartin KaFai Lau 	return READ_ONCE(*inner_map);
76256f668dfSMartin KaFai Lau }
76356f668dfSMartin KaFai Lau 
7647b0c2a05SDaniel Borkmann static u32 array_of_map_gen_lookup(struct bpf_map *map,
7657b0c2a05SDaniel Borkmann 				   struct bpf_insn *insn_buf)
7667b0c2a05SDaniel Borkmann {
767b2157399SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
7687b0c2a05SDaniel Borkmann 	u32 elem_size = round_up(map->value_size, 8);
7697b0c2a05SDaniel Borkmann 	struct bpf_insn *insn = insn_buf;
7707b0c2a05SDaniel Borkmann 	const int ret = BPF_REG_0;
7717b0c2a05SDaniel Borkmann 	const int map_ptr = BPF_REG_1;
7727b0c2a05SDaniel Borkmann 	const int index = BPF_REG_2;
7737b0c2a05SDaniel Borkmann 
7747b0c2a05SDaniel Borkmann 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
7757b0c2a05SDaniel Borkmann 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
776b2157399SAlexei Starovoitov 	if (map->unpriv_array) {
777b2157399SAlexei Starovoitov 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
778b2157399SAlexei Starovoitov 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
779b2157399SAlexei Starovoitov 	} else {
7807b0c2a05SDaniel Borkmann 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
781b2157399SAlexei Starovoitov 	}
7827b0c2a05SDaniel Borkmann 	if (is_power_of_2(elem_size))
7837b0c2a05SDaniel Borkmann 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
7847b0c2a05SDaniel Borkmann 	else
7857b0c2a05SDaniel Borkmann 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
7867b0c2a05SDaniel Borkmann 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
7877b0c2a05SDaniel Borkmann 	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
7887b0c2a05SDaniel Borkmann 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
7897b0c2a05SDaniel Borkmann 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
7907b0c2a05SDaniel Borkmann 	*insn++ = BPF_MOV64_IMM(ret, 0);
7917b0c2a05SDaniel Borkmann 
7927b0c2a05SDaniel Borkmann 	return insn - insn_buf;
7937b0c2a05SDaniel Borkmann }
7947b0c2a05SDaniel Borkmann 
79540077e0cSJohannes Berg const struct bpf_map_ops array_of_maps_map_ops = {
796ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
79756f668dfSMartin KaFai Lau 	.map_alloc = array_of_map_alloc,
79856f668dfSMartin KaFai Lau 	.map_free = array_of_map_free,
79956f668dfSMartin KaFai Lau 	.map_get_next_key = array_map_get_next_key,
80056f668dfSMartin KaFai Lau 	.map_lookup_elem = array_of_map_lookup_elem,
80156f668dfSMartin KaFai Lau 	.map_delete_elem = fd_array_map_delete_elem,
80256f668dfSMartin KaFai Lau 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
80356f668dfSMartin KaFai Lau 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
80414dc6f04SMartin KaFai Lau 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
8057b0c2a05SDaniel Borkmann 	.map_gen_lookup = array_of_map_gen_lookup,
806e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
80756f668dfSMartin KaFai Lau };
808