xref: /linux-6.15/kernel/bpf/arraymap.c (revision a7c19db3)
128fbcfa0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
281ed18abSAlexei Starovoitov  * Copyright (c) 2016,2017 Facebook
328fbcfa0SAlexei Starovoitov  *
428fbcfa0SAlexei Starovoitov  * This program is free software; you can redistribute it and/or
528fbcfa0SAlexei Starovoitov  * modify it under the terms of version 2 of the GNU General Public
628fbcfa0SAlexei Starovoitov  * License as published by the Free Software Foundation.
728fbcfa0SAlexei Starovoitov  *
828fbcfa0SAlexei Starovoitov  * This program is distributed in the hope that it will be useful, but
928fbcfa0SAlexei Starovoitov  * WITHOUT ANY WARRANTY; without even the implied warranty of
1028fbcfa0SAlexei Starovoitov  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1128fbcfa0SAlexei Starovoitov  * General Public License for more details.
1228fbcfa0SAlexei Starovoitov  */
1328fbcfa0SAlexei Starovoitov #include <linux/bpf.h>
14a26ca7c9SMartin KaFai Lau #include <linux/btf.h>
1528fbcfa0SAlexei Starovoitov #include <linux/err.h>
1628fbcfa0SAlexei Starovoitov #include <linux/slab.h>
1728fbcfa0SAlexei Starovoitov #include <linux/mm.h>
1804fd61abSAlexei Starovoitov #include <linux/filter.h>
190cdf5640SDaniel Borkmann #include <linux/perf_event.h>
20a26ca7c9SMartin KaFai Lau #include <uapi/linux/btf.h>
2128fbcfa0SAlexei Starovoitov 
2256f668dfSMartin KaFai Lau #include "map_in_map.h"
2356f668dfSMartin KaFai Lau 
246e71b04aSChenbo Feng #define ARRAY_CREATE_FLAG_MASK \
256e71b04aSChenbo Feng 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
266e71b04aSChenbo Feng 
27a10423b8SAlexei Starovoitov static void bpf_array_free_percpu(struct bpf_array *array)
28a10423b8SAlexei Starovoitov {
29a10423b8SAlexei Starovoitov 	int i;
30a10423b8SAlexei Starovoitov 
3132fff239SEric Dumazet 	for (i = 0; i < array->map.max_entries; i++) {
32a10423b8SAlexei Starovoitov 		free_percpu(array->pptrs[i]);
3332fff239SEric Dumazet 		cond_resched();
3432fff239SEric Dumazet 	}
35a10423b8SAlexei Starovoitov }
36a10423b8SAlexei Starovoitov 
37a10423b8SAlexei Starovoitov static int bpf_array_alloc_percpu(struct bpf_array *array)
38a10423b8SAlexei Starovoitov {
39a10423b8SAlexei Starovoitov 	void __percpu *ptr;
40a10423b8SAlexei Starovoitov 	int i;
41a10423b8SAlexei Starovoitov 
42a10423b8SAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++) {
43a10423b8SAlexei Starovoitov 		ptr = __alloc_percpu_gfp(array->elem_size, 8,
44a10423b8SAlexei Starovoitov 					 GFP_USER | __GFP_NOWARN);
45a10423b8SAlexei Starovoitov 		if (!ptr) {
46a10423b8SAlexei Starovoitov 			bpf_array_free_percpu(array);
47a10423b8SAlexei Starovoitov 			return -ENOMEM;
48a10423b8SAlexei Starovoitov 		}
49a10423b8SAlexei Starovoitov 		array->pptrs[i] = ptr;
5032fff239SEric Dumazet 		cond_resched();
51a10423b8SAlexei Starovoitov 	}
52a10423b8SAlexei Starovoitov 
53a10423b8SAlexei Starovoitov 	return 0;
54a10423b8SAlexei Starovoitov }
55a10423b8SAlexei Starovoitov 
5628fbcfa0SAlexei Starovoitov /* Called from syscall */
575dc4c4b7SMartin KaFai Lau int array_map_alloc_check(union bpf_attr *attr)
58ad46061fSJakub Kicinski {
59ad46061fSJakub Kicinski 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
60ad46061fSJakub Kicinski 	int numa_node = bpf_map_attr_numa_node(attr);
61ad46061fSJakub Kicinski 
62ad46061fSJakub Kicinski 	/* check sanity of attributes */
63ad46061fSJakub Kicinski 	if (attr->max_entries == 0 || attr->key_size != 4 ||
64ad46061fSJakub Kicinski 	    attr->value_size == 0 ||
65ad46061fSJakub Kicinski 	    attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
66ad46061fSJakub Kicinski 	    (percpu && numa_node != NUMA_NO_NODE))
67ad46061fSJakub Kicinski 		return -EINVAL;
68ad46061fSJakub Kicinski 
69ad46061fSJakub Kicinski 	if (attr->value_size > KMALLOC_MAX_SIZE)
70ad46061fSJakub Kicinski 		/* if value_size is bigger, the user space won't be able to
71ad46061fSJakub Kicinski 		 * access the elements.
72ad46061fSJakub Kicinski 		 */
73ad46061fSJakub Kicinski 		return -E2BIG;
74ad46061fSJakub Kicinski 
75ad46061fSJakub Kicinski 	return 0;
76ad46061fSJakub Kicinski }
77ad46061fSJakub Kicinski 
7828fbcfa0SAlexei Starovoitov static struct bpf_map *array_map_alloc(union bpf_attr *attr)
7928fbcfa0SAlexei Starovoitov {
80a10423b8SAlexei Starovoitov 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
819c2d63b8SDaniel Borkmann 	int ret, numa_node = bpf_map_attr_numa_node(attr);
82b2157399SAlexei Starovoitov 	u32 elem_size, index_mask, max_entries;
83b2157399SAlexei Starovoitov 	bool unpriv = !capable(CAP_SYS_ADMIN);
849c2d63b8SDaniel Borkmann 	u64 cost, array_size, mask64;
8528fbcfa0SAlexei Starovoitov 	struct bpf_array *array;
8628fbcfa0SAlexei Starovoitov 
8728fbcfa0SAlexei Starovoitov 	elem_size = round_up(attr->value_size, 8);
8828fbcfa0SAlexei Starovoitov 
89b2157399SAlexei Starovoitov 	max_entries = attr->max_entries;
90b2157399SAlexei Starovoitov 
91bbeb6e43SDaniel Borkmann 	/* On 32 bit archs roundup_pow_of_two() with max_entries that has
92bbeb6e43SDaniel Borkmann 	 * upper most bit set in u32 space is undefined behavior due to
93bbeb6e43SDaniel Borkmann 	 * resulting 1U << 32, so do it manually here in u64 space.
94bbeb6e43SDaniel Borkmann 	 */
95bbeb6e43SDaniel Borkmann 	mask64 = fls_long(max_entries - 1);
96bbeb6e43SDaniel Borkmann 	mask64 = 1ULL << mask64;
97bbeb6e43SDaniel Borkmann 	mask64 -= 1;
98bbeb6e43SDaniel Borkmann 
99bbeb6e43SDaniel Borkmann 	index_mask = mask64;
100bbeb6e43SDaniel Borkmann 	if (unpriv) {
101b2157399SAlexei Starovoitov 		/* round up array size to nearest power of 2,
102b2157399SAlexei Starovoitov 		 * since cpu will speculate within index_mask limits
103b2157399SAlexei Starovoitov 		 */
104b2157399SAlexei Starovoitov 		max_entries = index_mask + 1;
105bbeb6e43SDaniel Borkmann 		/* Check for overflows. */
106bbeb6e43SDaniel Borkmann 		if (max_entries < attr->max_entries)
107bbeb6e43SDaniel Borkmann 			return ERR_PTR(-E2BIG);
108bbeb6e43SDaniel Borkmann 	}
109b2157399SAlexei Starovoitov 
110a10423b8SAlexei Starovoitov 	array_size = sizeof(*array);
111a10423b8SAlexei Starovoitov 	if (percpu)
112b2157399SAlexei Starovoitov 		array_size += (u64) max_entries * sizeof(void *);
113a10423b8SAlexei Starovoitov 	else
114b2157399SAlexei Starovoitov 		array_size += (u64) max_entries * elem_size;
115a10423b8SAlexei Starovoitov 
116a10423b8SAlexei Starovoitov 	/* make sure there is no u32 overflow later in round_up() */
1179c2d63b8SDaniel Borkmann 	cost = array_size;
1189c2d63b8SDaniel Borkmann 	if (cost >= U32_MAX - PAGE_SIZE)
119daaf427cSAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
1209c2d63b8SDaniel Borkmann 	if (percpu) {
1219c2d63b8SDaniel Borkmann 		cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
1229c2d63b8SDaniel Borkmann 		if (cost >= U32_MAX - PAGE_SIZE)
1239c2d63b8SDaniel Borkmann 			return ERR_PTR(-ENOMEM);
1249c2d63b8SDaniel Borkmann 	}
1259c2d63b8SDaniel Borkmann 	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
1269c2d63b8SDaniel Borkmann 
1279c2d63b8SDaniel Borkmann 	ret = bpf_map_precharge_memlock(cost);
1289c2d63b8SDaniel Borkmann 	if (ret < 0)
1299c2d63b8SDaniel Borkmann 		return ERR_PTR(ret);
130daaf427cSAlexei Starovoitov 
13128fbcfa0SAlexei Starovoitov 	/* allocate all map elements and zero-initialize them */
13296eabe7aSMartin KaFai Lau 	array = bpf_map_area_alloc(array_size, numa_node);
13328fbcfa0SAlexei Starovoitov 	if (!array)
13428fbcfa0SAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
135b2157399SAlexei Starovoitov 	array->index_mask = index_mask;
136b2157399SAlexei Starovoitov 	array->map.unpriv_array = unpriv;
13728fbcfa0SAlexei Starovoitov 
13828fbcfa0SAlexei Starovoitov 	/* copy mandatory map attributes */
13932852649SJakub Kicinski 	bpf_map_init_from_attr(&array->map, attr);
1409c2d63b8SDaniel Borkmann 	array->map.pages = cost;
14128fbcfa0SAlexei Starovoitov 	array->elem_size = elem_size;
14228fbcfa0SAlexei Starovoitov 
1439c2d63b8SDaniel Borkmann 	if (percpu && bpf_array_alloc_percpu(array)) {
144d407bd25SDaniel Borkmann 		bpf_map_area_free(array);
145a10423b8SAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
146a10423b8SAlexei Starovoitov 	}
147a10423b8SAlexei Starovoitov 
14828fbcfa0SAlexei Starovoitov 	return &array->map;
14928fbcfa0SAlexei Starovoitov }
15028fbcfa0SAlexei Starovoitov 
15128fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
15228fbcfa0SAlexei Starovoitov static void *array_map_lookup_elem(struct bpf_map *map, void *key)
15328fbcfa0SAlexei Starovoitov {
15428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
15528fbcfa0SAlexei Starovoitov 	u32 index = *(u32 *)key;
15628fbcfa0SAlexei Starovoitov 
157a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
15828fbcfa0SAlexei Starovoitov 		return NULL;
15928fbcfa0SAlexei Starovoitov 
160b2157399SAlexei Starovoitov 	return array->value + array->elem_size * (index & array->index_mask);
16128fbcfa0SAlexei Starovoitov }
16228fbcfa0SAlexei Starovoitov 
16381ed18abSAlexei Starovoitov /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
16481ed18abSAlexei Starovoitov static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
16581ed18abSAlexei Starovoitov {
166b2157399SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
16781ed18abSAlexei Starovoitov 	struct bpf_insn *insn = insn_buf;
168fad73a1aSMartin KaFai Lau 	u32 elem_size = round_up(map->value_size, 8);
16981ed18abSAlexei Starovoitov 	const int ret = BPF_REG_0;
17081ed18abSAlexei Starovoitov 	const int map_ptr = BPF_REG_1;
17181ed18abSAlexei Starovoitov 	const int index = BPF_REG_2;
17281ed18abSAlexei Starovoitov 
17381ed18abSAlexei Starovoitov 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
17481ed18abSAlexei Starovoitov 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
175b2157399SAlexei Starovoitov 	if (map->unpriv_array) {
176b2157399SAlexei Starovoitov 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
177b2157399SAlexei Starovoitov 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
178b2157399SAlexei Starovoitov 	} else {
179fad73a1aSMartin KaFai Lau 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
180b2157399SAlexei Starovoitov 	}
181fad73a1aSMartin KaFai Lau 
182fad73a1aSMartin KaFai Lau 	if (is_power_of_2(elem_size)) {
18381ed18abSAlexei Starovoitov 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
18481ed18abSAlexei Starovoitov 	} else {
18581ed18abSAlexei Starovoitov 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
18681ed18abSAlexei Starovoitov 	}
18781ed18abSAlexei Starovoitov 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
18881ed18abSAlexei Starovoitov 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
18981ed18abSAlexei Starovoitov 	*insn++ = BPF_MOV64_IMM(ret, 0);
19081ed18abSAlexei Starovoitov 	return insn - insn_buf;
19181ed18abSAlexei Starovoitov }
19281ed18abSAlexei Starovoitov 
193a10423b8SAlexei Starovoitov /* Called from eBPF program */
194a10423b8SAlexei Starovoitov static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
195a10423b8SAlexei Starovoitov {
196a10423b8SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
197a10423b8SAlexei Starovoitov 	u32 index = *(u32 *)key;
198a10423b8SAlexei Starovoitov 
199a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
200a10423b8SAlexei Starovoitov 		return NULL;
201a10423b8SAlexei Starovoitov 
202b2157399SAlexei Starovoitov 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
203a10423b8SAlexei Starovoitov }
204a10423b8SAlexei Starovoitov 
20515a07b33SAlexei Starovoitov int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
20615a07b33SAlexei Starovoitov {
20715a07b33SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
20815a07b33SAlexei Starovoitov 	u32 index = *(u32 *)key;
20915a07b33SAlexei Starovoitov 	void __percpu *pptr;
21015a07b33SAlexei Starovoitov 	int cpu, off = 0;
21115a07b33SAlexei Starovoitov 	u32 size;
21215a07b33SAlexei Starovoitov 
21315a07b33SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
21415a07b33SAlexei Starovoitov 		return -ENOENT;
21515a07b33SAlexei Starovoitov 
21615a07b33SAlexei Starovoitov 	/* per_cpu areas are zero-filled and bpf programs can only
21715a07b33SAlexei Starovoitov 	 * access 'value_size' of them, so copying rounded areas
21815a07b33SAlexei Starovoitov 	 * will not leak any kernel data
21915a07b33SAlexei Starovoitov 	 */
22015a07b33SAlexei Starovoitov 	size = round_up(map->value_size, 8);
22115a07b33SAlexei Starovoitov 	rcu_read_lock();
222b2157399SAlexei Starovoitov 	pptr = array->pptrs[index & array->index_mask];
22315a07b33SAlexei Starovoitov 	for_each_possible_cpu(cpu) {
22415a07b33SAlexei Starovoitov 		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
22515a07b33SAlexei Starovoitov 		off += size;
22615a07b33SAlexei Starovoitov 	}
22715a07b33SAlexei Starovoitov 	rcu_read_unlock();
22815a07b33SAlexei Starovoitov 	return 0;
22915a07b33SAlexei Starovoitov }
23015a07b33SAlexei Starovoitov 
23128fbcfa0SAlexei Starovoitov /* Called from syscall */
23228fbcfa0SAlexei Starovoitov static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
23328fbcfa0SAlexei Starovoitov {
23428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
2358fe45924STeng Qin 	u32 index = key ? *(u32 *)key : U32_MAX;
23628fbcfa0SAlexei Starovoitov 	u32 *next = (u32 *)next_key;
23728fbcfa0SAlexei Starovoitov 
23828fbcfa0SAlexei Starovoitov 	if (index >= array->map.max_entries) {
23928fbcfa0SAlexei Starovoitov 		*next = 0;
24028fbcfa0SAlexei Starovoitov 		return 0;
24128fbcfa0SAlexei Starovoitov 	}
24228fbcfa0SAlexei Starovoitov 
24328fbcfa0SAlexei Starovoitov 	if (index == array->map.max_entries - 1)
24428fbcfa0SAlexei Starovoitov 		return -ENOENT;
24528fbcfa0SAlexei Starovoitov 
24628fbcfa0SAlexei Starovoitov 	*next = index + 1;
24728fbcfa0SAlexei Starovoitov 	return 0;
24828fbcfa0SAlexei Starovoitov }
24928fbcfa0SAlexei Starovoitov 
25028fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
25128fbcfa0SAlexei Starovoitov static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
25228fbcfa0SAlexei Starovoitov 				 u64 map_flags)
25328fbcfa0SAlexei Starovoitov {
25428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
25528fbcfa0SAlexei Starovoitov 	u32 index = *(u32 *)key;
25628fbcfa0SAlexei Starovoitov 
257a10423b8SAlexei Starovoitov 	if (unlikely(map_flags > BPF_EXIST))
25828fbcfa0SAlexei Starovoitov 		/* unknown flags */
25928fbcfa0SAlexei Starovoitov 		return -EINVAL;
26028fbcfa0SAlexei Starovoitov 
261a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
26228fbcfa0SAlexei Starovoitov 		/* all elements were pre-allocated, cannot insert a new one */
26328fbcfa0SAlexei Starovoitov 		return -E2BIG;
26428fbcfa0SAlexei Starovoitov 
265a10423b8SAlexei Starovoitov 	if (unlikely(map_flags == BPF_NOEXIST))
266daaf427cSAlexei Starovoitov 		/* all elements already exist */
26728fbcfa0SAlexei Starovoitov 		return -EEXIST;
26828fbcfa0SAlexei Starovoitov 
269a10423b8SAlexei Starovoitov 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
270b2157399SAlexei Starovoitov 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
271a10423b8SAlexei Starovoitov 		       value, map->value_size);
272a10423b8SAlexei Starovoitov 	else
273b2157399SAlexei Starovoitov 		memcpy(array->value +
274b2157399SAlexei Starovoitov 		       array->elem_size * (index & array->index_mask),
275a10423b8SAlexei Starovoitov 		       value, map->value_size);
27628fbcfa0SAlexei Starovoitov 	return 0;
27728fbcfa0SAlexei Starovoitov }
27828fbcfa0SAlexei Starovoitov 
27915a07b33SAlexei Starovoitov int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
28015a07b33SAlexei Starovoitov 			    u64 map_flags)
28115a07b33SAlexei Starovoitov {
28215a07b33SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
28315a07b33SAlexei Starovoitov 	u32 index = *(u32 *)key;
28415a07b33SAlexei Starovoitov 	void __percpu *pptr;
28515a07b33SAlexei Starovoitov 	int cpu, off = 0;
28615a07b33SAlexei Starovoitov 	u32 size;
28715a07b33SAlexei Starovoitov 
28815a07b33SAlexei Starovoitov 	if (unlikely(map_flags > BPF_EXIST))
28915a07b33SAlexei Starovoitov 		/* unknown flags */
29015a07b33SAlexei Starovoitov 		return -EINVAL;
29115a07b33SAlexei Starovoitov 
29215a07b33SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
29315a07b33SAlexei Starovoitov 		/* all elements were pre-allocated, cannot insert a new one */
29415a07b33SAlexei Starovoitov 		return -E2BIG;
29515a07b33SAlexei Starovoitov 
29615a07b33SAlexei Starovoitov 	if (unlikely(map_flags == BPF_NOEXIST))
29715a07b33SAlexei Starovoitov 		/* all elements already exist */
29815a07b33SAlexei Starovoitov 		return -EEXIST;
29915a07b33SAlexei Starovoitov 
30015a07b33SAlexei Starovoitov 	/* the user space will provide round_up(value_size, 8) bytes that
30115a07b33SAlexei Starovoitov 	 * will be copied into per-cpu area. bpf programs can only access
30215a07b33SAlexei Starovoitov 	 * value_size of it. During lookup the same extra bytes will be
30315a07b33SAlexei Starovoitov 	 * returned or zeros which were zero-filled by percpu_alloc,
30415a07b33SAlexei Starovoitov 	 * so no kernel data leaks possible
30515a07b33SAlexei Starovoitov 	 */
30615a07b33SAlexei Starovoitov 	size = round_up(map->value_size, 8);
30715a07b33SAlexei Starovoitov 	rcu_read_lock();
308b2157399SAlexei Starovoitov 	pptr = array->pptrs[index & array->index_mask];
30915a07b33SAlexei Starovoitov 	for_each_possible_cpu(cpu) {
31015a07b33SAlexei Starovoitov 		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
31115a07b33SAlexei Starovoitov 		off += size;
31215a07b33SAlexei Starovoitov 	}
31315a07b33SAlexei Starovoitov 	rcu_read_unlock();
31415a07b33SAlexei Starovoitov 	return 0;
31515a07b33SAlexei Starovoitov }
31615a07b33SAlexei Starovoitov 
31728fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
31828fbcfa0SAlexei Starovoitov static int array_map_delete_elem(struct bpf_map *map, void *key)
31928fbcfa0SAlexei Starovoitov {
32028fbcfa0SAlexei Starovoitov 	return -EINVAL;
32128fbcfa0SAlexei Starovoitov }
32228fbcfa0SAlexei Starovoitov 
32328fbcfa0SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
32428fbcfa0SAlexei Starovoitov static void array_map_free(struct bpf_map *map)
32528fbcfa0SAlexei Starovoitov {
32628fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
32728fbcfa0SAlexei Starovoitov 
32828fbcfa0SAlexei Starovoitov 	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
32928fbcfa0SAlexei Starovoitov 	 * so the programs (can be more than one that used this map) were
33028fbcfa0SAlexei Starovoitov 	 * disconnected from events. Wait for outstanding programs to complete
33128fbcfa0SAlexei Starovoitov 	 * and free the array
33228fbcfa0SAlexei Starovoitov 	 */
33328fbcfa0SAlexei Starovoitov 	synchronize_rcu();
33428fbcfa0SAlexei Starovoitov 
335a10423b8SAlexei Starovoitov 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
336a10423b8SAlexei Starovoitov 		bpf_array_free_percpu(array);
337a10423b8SAlexei Starovoitov 
338d407bd25SDaniel Borkmann 	bpf_map_area_free(array);
33928fbcfa0SAlexei Starovoitov }
34028fbcfa0SAlexei Starovoitov 
341a26ca7c9SMartin KaFai Lau static void array_map_seq_show_elem(struct bpf_map *map, void *key,
342a26ca7c9SMartin KaFai Lau 				    struct seq_file *m)
343a26ca7c9SMartin KaFai Lau {
344a26ca7c9SMartin KaFai Lau 	void *value;
345a26ca7c9SMartin KaFai Lau 
346a26ca7c9SMartin KaFai Lau 	rcu_read_lock();
347a26ca7c9SMartin KaFai Lau 
348a26ca7c9SMartin KaFai Lau 	value = array_map_lookup_elem(map, key);
349a26ca7c9SMartin KaFai Lau 	if (!value) {
350a26ca7c9SMartin KaFai Lau 		rcu_read_unlock();
351a26ca7c9SMartin KaFai Lau 		return;
352a26ca7c9SMartin KaFai Lau 	}
353a26ca7c9SMartin KaFai Lau 
354a26ca7c9SMartin KaFai Lau 	seq_printf(m, "%u: ", *(u32 *)key);
3559b2cf328SMartin KaFai Lau 	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
356a26ca7c9SMartin KaFai Lau 	seq_puts(m, "\n");
357a26ca7c9SMartin KaFai Lau 
358a26ca7c9SMartin KaFai Lau 	rcu_read_unlock();
359a26ca7c9SMartin KaFai Lau }
360a26ca7c9SMartin KaFai Lau 
361c7b27c37SYonghong Song static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
362c7b27c37SYonghong Song 					   struct seq_file *m)
363c7b27c37SYonghong Song {
364c7b27c37SYonghong Song 	struct bpf_array *array = container_of(map, struct bpf_array, map);
365c7b27c37SYonghong Song 	u32 index = *(u32 *)key;
366c7b27c37SYonghong Song 	void __percpu *pptr;
367c7b27c37SYonghong Song 	int cpu;
368c7b27c37SYonghong Song 
369c7b27c37SYonghong Song 	rcu_read_lock();
370c7b27c37SYonghong Song 
371c7b27c37SYonghong Song 	seq_printf(m, "%u: {\n", *(u32 *)key);
372c7b27c37SYonghong Song 	pptr = array->pptrs[index & array->index_mask];
373c7b27c37SYonghong Song 	for_each_possible_cpu(cpu) {
374c7b27c37SYonghong Song 		seq_printf(m, "\tcpu%d: ", cpu);
375c7b27c37SYonghong Song 		btf_type_seq_show(map->btf, map->btf_value_type_id,
376c7b27c37SYonghong Song 				  per_cpu_ptr(pptr, cpu), m);
377c7b27c37SYonghong Song 		seq_puts(m, "\n");
378c7b27c37SYonghong Song 	}
379c7b27c37SYonghong Song 	seq_puts(m, "}\n");
380c7b27c37SYonghong Song 
381c7b27c37SYonghong Song 	rcu_read_unlock();
382c7b27c37SYonghong Song }
383c7b27c37SYonghong Song 
384e8d2bec0SDaniel Borkmann static int array_map_check_btf(const struct bpf_map *map,
385e8d2bec0SDaniel Borkmann 			       const struct btf_type *key_type,
386e8d2bec0SDaniel Borkmann 			       const struct btf_type *value_type)
387a26ca7c9SMartin KaFai Lau {
388a26ca7c9SMartin KaFai Lau 	u32 int_data;
389a26ca7c9SMartin KaFai Lau 
390e8d2bec0SDaniel Borkmann 	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
391a26ca7c9SMartin KaFai Lau 		return -EINVAL;
392a26ca7c9SMartin KaFai Lau 
393a26ca7c9SMartin KaFai Lau 	int_data = *(u32 *)(key_type + 1);
394e8d2bec0SDaniel Borkmann 	/* bpf array can only take a u32 key. This check makes sure
395e8d2bec0SDaniel Borkmann 	 * that the btf matches the attr used during map_create.
396a26ca7c9SMartin KaFai Lau 	 */
397e8d2bec0SDaniel Borkmann 	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
398a26ca7c9SMartin KaFai Lau 		return -EINVAL;
399a26ca7c9SMartin KaFai Lau 
400a26ca7c9SMartin KaFai Lau 	return 0;
401a26ca7c9SMartin KaFai Lau }
402a26ca7c9SMartin KaFai Lau 
40340077e0cSJohannes Berg const struct bpf_map_ops array_map_ops = {
404ad46061fSJakub Kicinski 	.map_alloc_check = array_map_alloc_check,
40528fbcfa0SAlexei Starovoitov 	.map_alloc = array_map_alloc,
40628fbcfa0SAlexei Starovoitov 	.map_free = array_map_free,
40728fbcfa0SAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
40828fbcfa0SAlexei Starovoitov 	.map_lookup_elem = array_map_lookup_elem,
40928fbcfa0SAlexei Starovoitov 	.map_update_elem = array_map_update_elem,
41028fbcfa0SAlexei Starovoitov 	.map_delete_elem = array_map_delete_elem,
41181ed18abSAlexei Starovoitov 	.map_gen_lookup = array_map_gen_lookup,
412a26ca7c9SMartin KaFai Lau 	.map_seq_show_elem = array_map_seq_show_elem,
413a26ca7c9SMartin KaFai Lau 	.map_check_btf = array_map_check_btf,
41428fbcfa0SAlexei Starovoitov };
41528fbcfa0SAlexei Starovoitov 
41640077e0cSJohannes Berg const struct bpf_map_ops percpu_array_map_ops = {
417ad46061fSJakub Kicinski 	.map_alloc_check = array_map_alloc_check,
418a10423b8SAlexei Starovoitov 	.map_alloc = array_map_alloc,
419a10423b8SAlexei Starovoitov 	.map_free = array_map_free,
420a10423b8SAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
421a10423b8SAlexei Starovoitov 	.map_lookup_elem = percpu_array_map_lookup_elem,
422a10423b8SAlexei Starovoitov 	.map_update_elem = array_map_update_elem,
423a10423b8SAlexei Starovoitov 	.map_delete_elem = array_map_delete_elem,
424c7b27c37SYonghong Song 	.map_seq_show_elem = percpu_array_map_seq_show_elem,
425e8d2bec0SDaniel Borkmann 	.map_check_btf = array_map_check_btf,
426a10423b8SAlexei Starovoitov };
427a10423b8SAlexei Starovoitov 
428ad46061fSJakub Kicinski static int fd_array_map_alloc_check(union bpf_attr *attr)
42904fd61abSAlexei Starovoitov {
4302a36f0b9SWang Nan 	/* only file descriptors can be stored in this type of map */
43104fd61abSAlexei Starovoitov 	if (attr->value_size != sizeof(u32))
432ad46061fSJakub Kicinski 		return -EINVAL;
433ad46061fSJakub Kicinski 	return array_map_alloc_check(attr);
43404fd61abSAlexei Starovoitov }
43504fd61abSAlexei Starovoitov 
4362a36f0b9SWang Nan static void fd_array_map_free(struct bpf_map *map)
43704fd61abSAlexei Starovoitov {
43804fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
43904fd61abSAlexei Starovoitov 	int i;
44004fd61abSAlexei Starovoitov 
44104fd61abSAlexei Starovoitov 	synchronize_rcu();
44204fd61abSAlexei Starovoitov 
44304fd61abSAlexei Starovoitov 	/* make sure it's empty */
44404fd61abSAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++)
4452a36f0b9SWang Nan 		BUG_ON(array->ptrs[i] != NULL);
446d407bd25SDaniel Borkmann 
447d407bd25SDaniel Borkmann 	bpf_map_area_free(array);
44804fd61abSAlexei Starovoitov }
44904fd61abSAlexei Starovoitov 
4502a36f0b9SWang Nan static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
45104fd61abSAlexei Starovoitov {
45204fd61abSAlexei Starovoitov 	return NULL;
45304fd61abSAlexei Starovoitov }
45404fd61abSAlexei Starovoitov 
45504fd61abSAlexei Starovoitov /* only called from syscall */
45614dc6f04SMartin KaFai Lau int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
45714dc6f04SMartin KaFai Lau {
45814dc6f04SMartin KaFai Lau 	void **elem, *ptr;
45914dc6f04SMartin KaFai Lau 	int ret =  0;
46014dc6f04SMartin KaFai Lau 
46114dc6f04SMartin KaFai Lau 	if (!map->ops->map_fd_sys_lookup_elem)
46214dc6f04SMartin KaFai Lau 		return -ENOTSUPP;
46314dc6f04SMartin KaFai Lau 
46414dc6f04SMartin KaFai Lau 	rcu_read_lock();
46514dc6f04SMartin KaFai Lau 	elem = array_map_lookup_elem(map, key);
46614dc6f04SMartin KaFai Lau 	if (elem && (ptr = READ_ONCE(*elem)))
46714dc6f04SMartin KaFai Lau 		*value = map->ops->map_fd_sys_lookup_elem(ptr);
46814dc6f04SMartin KaFai Lau 	else
46914dc6f04SMartin KaFai Lau 		ret = -ENOENT;
47014dc6f04SMartin KaFai Lau 	rcu_read_unlock();
47114dc6f04SMartin KaFai Lau 
47214dc6f04SMartin KaFai Lau 	return ret;
47314dc6f04SMartin KaFai Lau }
47414dc6f04SMartin KaFai Lau 
47514dc6f04SMartin KaFai Lau /* only called from syscall */
476d056a788SDaniel Borkmann int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
477d056a788SDaniel Borkmann 				 void *key, void *value, u64 map_flags)
47804fd61abSAlexei Starovoitov {
47904fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
4802a36f0b9SWang Nan 	void *new_ptr, *old_ptr;
48104fd61abSAlexei Starovoitov 	u32 index = *(u32 *)key, ufd;
48204fd61abSAlexei Starovoitov 
48304fd61abSAlexei Starovoitov 	if (map_flags != BPF_ANY)
48404fd61abSAlexei Starovoitov 		return -EINVAL;
48504fd61abSAlexei Starovoitov 
48604fd61abSAlexei Starovoitov 	if (index >= array->map.max_entries)
48704fd61abSAlexei Starovoitov 		return -E2BIG;
48804fd61abSAlexei Starovoitov 
48904fd61abSAlexei Starovoitov 	ufd = *(u32 *)value;
490d056a788SDaniel Borkmann 	new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
4912a36f0b9SWang Nan 	if (IS_ERR(new_ptr))
4922a36f0b9SWang Nan 		return PTR_ERR(new_ptr);
49304fd61abSAlexei Starovoitov 
4942a36f0b9SWang Nan 	old_ptr = xchg(array->ptrs + index, new_ptr);
4952a36f0b9SWang Nan 	if (old_ptr)
4962a36f0b9SWang Nan 		map->ops->map_fd_put_ptr(old_ptr);
49704fd61abSAlexei Starovoitov 
49804fd61abSAlexei Starovoitov 	return 0;
49904fd61abSAlexei Starovoitov }
50004fd61abSAlexei Starovoitov 
5012a36f0b9SWang Nan static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
50204fd61abSAlexei Starovoitov {
50304fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
5042a36f0b9SWang Nan 	void *old_ptr;
50504fd61abSAlexei Starovoitov 	u32 index = *(u32 *)key;
50604fd61abSAlexei Starovoitov 
50704fd61abSAlexei Starovoitov 	if (index >= array->map.max_entries)
50804fd61abSAlexei Starovoitov 		return -E2BIG;
50904fd61abSAlexei Starovoitov 
5102a36f0b9SWang Nan 	old_ptr = xchg(array->ptrs + index, NULL);
5112a36f0b9SWang Nan 	if (old_ptr) {
5122a36f0b9SWang Nan 		map->ops->map_fd_put_ptr(old_ptr);
51304fd61abSAlexei Starovoitov 		return 0;
51404fd61abSAlexei Starovoitov 	} else {
51504fd61abSAlexei Starovoitov 		return -ENOENT;
51604fd61abSAlexei Starovoitov 	}
51704fd61abSAlexei Starovoitov }
51804fd61abSAlexei Starovoitov 
519d056a788SDaniel Borkmann static void *prog_fd_array_get_ptr(struct bpf_map *map,
520d056a788SDaniel Borkmann 				   struct file *map_file, int fd)
5212a36f0b9SWang Nan {
5222a36f0b9SWang Nan 	struct bpf_array *array = container_of(map, struct bpf_array, map);
5232a36f0b9SWang Nan 	struct bpf_prog *prog = bpf_prog_get(fd);
524d056a788SDaniel Borkmann 
5252a36f0b9SWang Nan 	if (IS_ERR(prog))
5262a36f0b9SWang Nan 		return prog;
5272a36f0b9SWang Nan 
5282a36f0b9SWang Nan 	if (!bpf_prog_array_compatible(array, prog)) {
5292a36f0b9SWang Nan 		bpf_prog_put(prog);
5302a36f0b9SWang Nan 		return ERR_PTR(-EINVAL);
5312a36f0b9SWang Nan 	}
532d056a788SDaniel Borkmann 
5332a36f0b9SWang Nan 	return prog;
5342a36f0b9SWang Nan }
5352a36f0b9SWang Nan 
5362a36f0b9SWang Nan static void prog_fd_array_put_ptr(void *ptr)
5372a36f0b9SWang Nan {
5381aacde3dSDaniel Borkmann 	bpf_prog_put(ptr);
5392a36f0b9SWang Nan }
5402a36f0b9SWang Nan 
54114dc6f04SMartin KaFai Lau static u32 prog_fd_array_sys_lookup_elem(void *ptr)
54214dc6f04SMartin KaFai Lau {
54314dc6f04SMartin KaFai Lau 	return ((struct bpf_prog *)ptr)->aux->id;
54414dc6f04SMartin KaFai Lau }
54514dc6f04SMartin KaFai Lau 
54604fd61abSAlexei Starovoitov /* decrement refcnt of all bpf_progs that are stored in this map */
547ba6b8de4SJohn Fastabend static void bpf_fd_array_map_clear(struct bpf_map *map)
54804fd61abSAlexei Starovoitov {
54904fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
55004fd61abSAlexei Starovoitov 	int i;
55104fd61abSAlexei Starovoitov 
55204fd61abSAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++)
5532a36f0b9SWang Nan 		fd_array_map_delete_elem(map, &i);
55404fd61abSAlexei Starovoitov }
55504fd61abSAlexei Starovoitov 
556*a7c19db3SYonghong Song static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
557*a7c19db3SYonghong Song 					 struct seq_file *m)
558*a7c19db3SYonghong Song {
559*a7c19db3SYonghong Song 	void **elem, *ptr;
560*a7c19db3SYonghong Song 	u32 prog_id;
561*a7c19db3SYonghong Song 
562*a7c19db3SYonghong Song 	rcu_read_lock();
563*a7c19db3SYonghong Song 
564*a7c19db3SYonghong Song 	elem = array_map_lookup_elem(map, key);
565*a7c19db3SYonghong Song 	if (elem) {
566*a7c19db3SYonghong Song 		ptr = READ_ONCE(*elem);
567*a7c19db3SYonghong Song 		if (ptr) {
568*a7c19db3SYonghong Song 			seq_printf(m, "%u: ", *(u32 *)key);
569*a7c19db3SYonghong Song 			prog_id = prog_fd_array_sys_lookup_elem(ptr);
570*a7c19db3SYonghong Song 			btf_type_seq_show(map->btf, map->btf_value_type_id,
571*a7c19db3SYonghong Song 					  &prog_id, m);
572*a7c19db3SYonghong Song 			seq_puts(m, "\n");
573*a7c19db3SYonghong Song 		}
574*a7c19db3SYonghong Song 	}
575*a7c19db3SYonghong Song 
576*a7c19db3SYonghong Song 	rcu_read_unlock();
577*a7c19db3SYonghong Song }
578*a7c19db3SYonghong Song 
57940077e0cSJohannes Berg const struct bpf_map_ops prog_array_map_ops = {
580ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
581ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
5822a36f0b9SWang Nan 	.map_free = fd_array_map_free,
58304fd61abSAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
5842a36f0b9SWang Nan 	.map_lookup_elem = fd_array_map_lookup_elem,
5852a36f0b9SWang Nan 	.map_delete_elem = fd_array_map_delete_elem,
5862a36f0b9SWang Nan 	.map_fd_get_ptr = prog_fd_array_get_ptr,
5872a36f0b9SWang Nan 	.map_fd_put_ptr = prog_fd_array_put_ptr,
58814dc6f04SMartin KaFai Lau 	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
589ba6b8de4SJohn Fastabend 	.map_release_uref = bpf_fd_array_map_clear,
590*a7c19db3SYonghong Song 	.map_seq_show_elem = prog_array_map_seq_show_elem,
59104fd61abSAlexei Starovoitov };
59204fd61abSAlexei Starovoitov 
5933b1efb19SDaniel Borkmann static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
5943b1efb19SDaniel Borkmann 						   struct file *map_file)
595ea317b26SKaixu Xia {
5963b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
5973b1efb19SDaniel Borkmann 
598858d68f1SDaniel Borkmann 	ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
5993b1efb19SDaniel Borkmann 	if (ee) {
6003b1efb19SDaniel Borkmann 		ee->event = perf_file->private_data;
6013b1efb19SDaniel Borkmann 		ee->perf_file = perf_file;
6023b1efb19SDaniel Borkmann 		ee->map_file = map_file;
6033b1efb19SDaniel Borkmann 	}
6043b1efb19SDaniel Borkmann 
6053b1efb19SDaniel Borkmann 	return ee;
6063b1efb19SDaniel Borkmann }
6073b1efb19SDaniel Borkmann 
6083b1efb19SDaniel Borkmann static void __bpf_event_entry_free(struct rcu_head *rcu)
6093b1efb19SDaniel Borkmann {
6103b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6113b1efb19SDaniel Borkmann 
6123b1efb19SDaniel Borkmann 	ee = container_of(rcu, struct bpf_event_entry, rcu);
6133b1efb19SDaniel Borkmann 	fput(ee->perf_file);
6143b1efb19SDaniel Borkmann 	kfree(ee);
6153b1efb19SDaniel Borkmann }
6163b1efb19SDaniel Borkmann 
6173b1efb19SDaniel Borkmann static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
6183b1efb19SDaniel Borkmann {
6193b1efb19SDaniel Borkmann 	call_rcu(&ee->rcu, __bpf_event_entry_free);
620ea317b26SKaixu Xia }
621ea317b26SKaixu Xia 
622d056a788SDaniel Borkmann static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
623d056a788SDaniel Borkmann 					 struct file *map_file, int fd)
624ea317b26SKaixu Xia {
6253b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6263b1efb19SDaniel Borkmann 	struct perf_event *event;
6273b1efb19SDaniel Borkmann 	struct file *perf_file;
628f91840a3SAlexei Starovoitov 	u64 value;
629ea317b26SKaixu Xia 
6303b1efb19SDaniel Borkmann 	perf_file = perf_event_get(fd);
6313b1efb19SDaniel Borkmann 	if (IS_ERR(perf_file))
6323b1efb19SDaniel Borkmann 		return perf_file;
633e03e7ee3SAlexei Starovoitov 
634f91840a3SAlexei Starovoitov 	ee = ERR_PTR(-EOPNOTSUPP);
6353b1efb19SDaniel Borkmann 	event = perf_file->private_data;
63697562633SYonghong Song 	if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
6373b1efb19SDaniel Borkmann 		goto err_out;
638ea317b26SKaixu Xia 
6393b1efb19SDaniel Borkmann 	ee = bpf_event_entry_gen(perf_file, map_file);
6403b1efb19SDaniel Borkmann 	if (ee)
6413b1efb19SDaniel Borkmann 		return ee;
6423b1efb19SDaniel Borkmann 	ee = ERR_PTR(-ENOMEM);
6433b1efb19SDaniel Borkmann err_out:
6443b1efb19SDaniel Borkmann 	fput(perf_file);
6453b1efb19SDaniel Borkmann 	return ee;
646ea317b26SKaixu Xia }
647ea317b26SKaixu Xia 
648ea317b26SKaixu Xia static void perf_event_fd_array_put_ptr(void *ptr)
649ea317b26SKaixu Xia {
6503b1efb19SDaniel Borkmann 	bpf_event_entry_free_rcu(ptr);
6513b1efb19SDaniel Borkmann }
6523b1efb19SDaniel Borkmann 
6533b1efb19SDaniel Borkmann static void perf_event_fd_array_release(struct bpf_map *map,
6543b1efb19SDaniel Borkmann 					struct file *map_file)
6553b1efb19SDaniel Borkmann {
6563b1efb19SDaniel Borkmann 	struct bpf_array *array = container_of(map, struct bpf_array, map);
6573b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6583b1efb19SDaniel Borkmann 	int i;
6593b1efb19SDaniel Borkmann 
6603b1efb19SDaniel Borkmann 	rcu_read_lock();
6613b1efb19SDaniel Borkmann 	for (i = 0; i < array->map.max_entries; i++) {
6623b1efb19SDaniel Borkmann 		ee = READ_ONCE(array->ptrs[i]);
6633b1efb19SDaniel Borkmann 		if (ee && ee->map_file == map_file)
6643b1efb19SDaniel Borkmann 			fd_array_map_delete_elem(map, &i);
6653b1efb19SDaniel Borkmann 	}
6663b1efb19SDaniel Borkmann 	rcu_read_unlock();
667ea317b26SKaixu Xia }
668ea317b26SKaixu Xia 
66940077e0cSJohannes Berg const struct bpf_map_ops perf_event_array_map_ops = {
670ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
671ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
6723b1efb19SDaniel Borkmann 	.map_free = fd_array_map_free,
673ea317b26SKaixu Xia 	.map_get_next_key = array_map_get_next_key,
674ea317b26SKaixu Xia 	.map_lookup_elem = fd_array_map_lookup_elem,
675ea317b26SKaixu Xia 	.map_delete_elem = fd_array_map_delete_elem,
676ea317b26SKaixu Xia 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
677ea317b26SKaixu Xia 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
6783b1efb19SDaniel Borkmann 	.map_release = perf_event_fd_array_release,
679e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
680ea317b26SKaixu Xia };
681ea317b26SKaixu Xia 
68260d20f91SSargun Dhillon #ifdef CONFIG_CGROUPS
6834ed8ec52SMartin KaFai Lau static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
6844ed8ec52SMartin KaFai Lau 				     struct file *map_file /* not used */,
6854ed8ec52SMartin KaFai Lau 				     int fd)
6864ed8ec52SMartin KaFai Lau {
6874ed8ec52SMartin KaFai Lau 	return cgroup_get_from_fd(fd);
6884ed8ec52SMartin KaFai Lau }
6894ed8ec52SMartin KaFai Lau 
6904ed8ec52SMartin KaFai Lau static void cgroup_fd_array_put_ptr(void *ptr)
6914ed8ec52SMartin KaFai Lau {
6924ed8ec52SMartin KaFai Lau 	/* cgroup_put free cgrp after a rcu grace period */
6934ed8ec52SMartin KaFai Lau 	cgroup_put(ptr);
6944ed8ec52SMartin KaFai Lau }
6954ed8ec52SMartin KaFai Lau 
6964ed8ec52SMartin KaFai Lau static void cgroup_fd_array_free(struct bpf_map *map)
6974ed8ec52SMartin KaFai Lau {
6984ed8ec52SMartin KaFai Lau 	bpf_fd_array_map_clear(map);
6994ed8ec52SMartin KaFai Lau 	fd_array_map_free(map);
7004ed8ec52SMartin KaFai Lau }
7014ed8ec52SMartin KaFai Lau 
70240077e0cSJohannes Berg const struct bpf_map_ops cgroup_array_map_ops = {
703ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
704ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
7054ed8ec52SMartin KaFai Lau 	.map_free = cgroup_fd_array_free,
7064ed8ec52SMartin KaFai Lau 	.map_get_next_key = array_map_get_next_key,
7074ed8ec52SMartin KaFai Lau 	.map_lookup_elem = fd_array_map_lookup_elem,
7084ed8ec52SMartin KaFai Lau 	.map_delete_elem = fd_array_map_delete_elem,
7094ed8ec52SMartin KaFai Lau 	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
7104ed8ec52SMartin KaFai Lau 	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
711e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
7124ed8ec52SMartin KaFai Lau };
7134ed8ec52SMartin KaFai Lau #endif
71456f668dfSMartin KaFai Lau 
71556f668dfSMartin KaFai Lau static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
71656f668dfSMartin KaFai Lau {
71756f668dfSMartin KaFai Lau 	struct bpf_map *map, *inner_map_meta;
71856f668dfSMartin KaFai Lau 
71956f668dfSMartin KaFai Lau 	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
72056f668dfSMartin KaFai Lau 	if (IS_ERR(inner_map_meta))
72156f668dfSMartin KaFai Lau 		return inner_map_meta;
72256f668dfSMartin KaFai Lau 
723ad46061fSJakub Kicinski 	map = array_map_alloc(attr);
72456f668dfSMartin KaFai Lau 	if (IS_ERR(map)) {
72556f668dfSMartin KaFai Lau 		bpf_map_meta_free(inner_map_meta);
72656f668dfSMartin KaFai Lau 		return map;
72756f668dfSMartin KaFai Lau 	}
72856f668dfSMartin KaFai Lau 
72956f668dfSMartin KaFai Lau 	map->inner_map_meta = inner_map_meta;
73056f668dfSMartin KaFai Lau 
73156f668dfSMartin KaFai Lau 	return map;
73256f668dfSMartin KaFai Lau }
73356f668dfSMartin KaFai Lau 
73456f668dfSMartin KaFai Lau static void array_of_map_free(struct bpf_map *map)
73556f668dfSMartin KaFai Lau {
73656f668dfSMartin KaFai Lau 	/* map->inner_map_meta is only accessed by syscall which
73756f668dfSMartin KaFai Lau 	 * is protected by fdget/fdput.
73856f668dfSMartin KaFai Lau 	 */
73956f668dfSMartin KaFai Lau 	bpf_map_meta_free(map->inner_map_meta);
74056f668dfSMartin KaFai Lau 	bpf_fd_array_map_clear(map);
74156f668dfSMartin KaFai Lau 	fd_array_map_free(map);
74256f668dfSMartin KaFai Lau }
74356f668dfSMartin KaFai Lau 
74456f668dfSMartin KaFai Lau static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
74556f668dfSMartin KaFai Lau {
74656f668dfSMartin KaFai Lau 	struct bpf_map **inner_map = array_map_lookup_elem(map, key);
74756f668dfSMartin KaFai Lau 
74856f668dfSMartin KaFai Lau 	if (!inner_map)
74956f668dfSMartin KaFai Lau 		return NULL;
75056f668dfSMartin KaFai Lau 
75156f668dfSMartin KaFai Lau 	return READ_ONCE(*inner_map);
75256f668dfSMartin KaFai Lau }
75356f668dfSMartin KaFai Lau 
7547b0c2a05SDaniel Borkmann static u32 array_of_map_gen_lookup(struct bpf_map *map,
7557b0c2a05SDaniel Borkmann 				   struct bpf_insn *insn_buf)
7567b0c2a05SDaniel Borkmann {
757b2157399SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
7587b0c2a05SDaniel Borkmann 	u32 elem_size = round_up(map->value_size, 8);
7597b0c2a05SDaniel Borkmann 	struct bpf_insn *insn = insn_buf;
7607b0c2a05SDaniel Borkmann 	const int ret = BPF_REG_0;
7617b0c2a05SDaniel Borkmann 	const int map_ptr = BPF_REG_1;
7627b0c2a05SDaniel Borkmann 	const int index = BPF_REG_2;
7637b0c2a05SDaniel Borkmann 
7647b0c2a05SDaniel Borkmann 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
7657b0c2a05SDaniel Borkmann 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
766b2157399SAlexei Starovoitov 	if (map->unpriv_array) {
767b2157399SAlexei Starovoitov 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
768b2157399SAlexei Starovoitov 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
769b2157399SAlexei Starovoitov 	} else {
7707b0c2a05SDaniel Borkmann 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
771b2157399SAlexei Starovoitov 	}
7727b0c2a05SDaniel Borkmann 	if (is_power_of_2(elem_size))
7737b0c2a05SDaniel Borkmann 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
7747b0c2a05SDaniel Borkmann 	else
7757b0c2a05SDaniel Borkmann 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
7767b0c2a05SDaniel Borkmann 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
7777b0c2a05SDaniel Borkmann 	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
7787b0c2a05SDaniel Borkmann 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
7797b0c2a05SDaniel Borkmann 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
7807b0c2a05SDaniel Borkmann 	*insn++ = BPF_MOV64_IMM(ret, 0);
7817b0c2a05SDaniel Borkmann 
7827b0c2a05SDaniel Borkmann 	return insn - insn_buf;
7837b0c2a05SDaniel Borkmann }
7847b0c2a05SDaniel Borkmann 
78540077e0cSJohannes Berg const struct bpf_map_ops array_of_maps_map_ops = {
786ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
78756f668dfSMartin KaFai Lau 	.map_alloc = array_of_map_alloc,
78856f668dfSMartin KaFai Lau 	.map_free = array_of_map_free,
78956f668dfSMartin KaFai Lau 	.map_get_next_key = array_map_get_next_key,
79056f668dfSMartin KaFai Lau 	.map_lookup_elem = array_of_map_lookup_elem,
79156f668dfSMartin KaFai Lau 	.map_delete_elem = fd_array_map_delete_elem,
79256f668dfSMartin KaFai Lau 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
79356f668dfSMartin KaFai Lau 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
79414dc6f04SMartin KaFai Lau 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
7957b0c2a05SDaniel Borkmann 	.map_gen_lookup = array_of_map_gen_lookup,
796e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
79756f668dfSMartin KaFai Lau };
798