xref: /linux-6.15/kernel/bpf/arraymap.c (revision e8d2bec0)
128fbcfa0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
281ed18abSAlexei Starovoitov  * Copyright (c) 2016,2017 Facebook
328fbcfa0SAlexei Starovoitov  *
428fbcfa0SAlexei Starovoitov  * This program is free software; you can redistribute it and/or
528fbcfa0SAlexei Starovoitov  * modify it under the terms of version 2 of the GNU General Public
628fbcfa0SAlexei Starovoitov  * License as published by the Free Software Foundation.
728fbcfa0SAlexei Starovoitov  *
828fbcfa0SAlexei Starovoitov  * This program is distributed in the hope that it will be useful, but
928fbcfa0SAlexei Starovoitov  * WITHOUT ANY WARRANTY; without even the implied warranty of
1028fbcfa0SAlexei Starovoitov  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1128fbcfa0SAlexei Starovoitov  * General Public License for more details.
1228fbcfa0SAlexei Starovoitov  */
1328fbcfa0SAlexei Starovoitov #include <linux/bpf.h>
14a26ca7c9SMartin KaFai Lau #include <linux/btf.h>
1528fbcfa0SAlexei Starovoitov #include <linux/err.h>
1628fbcfa0SAlexei Starovoitov #include <linux/slab.h>
1728fbcfa0SAlexei Starovoitov #include <linux/mm.h>
1804fd61abSAlexei Starovoitov #include <linux/filter.h>
190cdf5640SDaniel Borkmann #include <linux/perf_event.h>
20a26ca7c9SMartin KaFai Lau #include <uapi/linux/btf.h>
2128fbcfa0SAlexei Starovoitov 
2256f668dfSMartin KaFai Lau #include "map_in_map.h"
2356f668dfSMartin KaFai Lau 
246e71b04aSChenbo Feng #define ARRAY_CREATE_FLAG_MASK \
256e71b04aSChenbo Feng 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
266e71b04aSChenbo Feng 
27a10423b8SAlexei Starovoitov static void bpf_array_free_percpu(struct bpf_array *array)
28a10423b8SAlexei Starovoitov {
29a10423b8SAlexei Starovoitov 	int i;
30a10423b8SAlexei Starovoitov 
3132fff239SEric Dumazet 	for (i = 0; i < array->map.max_entries; i++) {
32a10423b8SAlexei Starovoitov 		free_percpu(array->pptrs[i]);
3332fff239SEric Dumazet 		cond_resched();
3432fff239SEric Dumazet 	}
35a10423b8SAlexei Starovoitov }
36a10423b8SAlexei Starovoitov 
37a10423b8SAlexei Starovoitov static int bpf_array_alloc_percpu(struct bpf_array *array)
38a10423b8SAlexei Starovoitov {
39a10423b8SAlexei Starovoitov 	void __percpu *ptr;
40a10423b8SAlexei Starovoitov 	int i;
41a10423b8SAlexei Starovoitov 
42a10423b8SAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++) {
43a10423b8SAlexei Starovoitov 		ptr = __alloc_percpu_gfp(array->elem_size, 8,
44a10423b8SAlexei Starovoitov 					 GFP_USER | __GFP_NOWARN);
45a10423b8SAlexei Starovoitov 		if (!ptr) {
46a10423b8SAlexei Starovoitov 			bpf_array_free_percpu(array);
47a10423b8SAlexei Starovoitov 			return -ENOMEM;
48a10423b8SAlexei Starovoitov 		}
49a10423b8SAlexei Starovoitov 		array->pptrs[i] = ptr;
5032fff239SEric Dumazet 		cond_resched();
51a10423b8SAlexei Starovoitov 	}
52a10423b8SAlexei Starovoitov 
53a10423b8SAlexei Starovoitov 	return 0;
54a10423b8SAlexei Starovoitov }
55a10423b8SAlexei Starovoitov 
5628fbcfa0SAlexei Starovoitov /* Called from syscall */
575dc4c4b7SMartin KaFai Lau int array_map_alloc_check(union bpf_attr *attr)
58ad46061fSJakub Kicinski {
59ad46061fSJakub Kicinski 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
60ad46061fSJakub Kicinski 	int numa_node = bpf_map_attr_numa_node(attr);
61ad46061fSJakub Kicinski 
62ad46061fSJakub Kicinski 	/* check sanity of attributes */
63ad46061fSJakub Kicinski 	if (attr->max_entries == 0 || attr->key_size != 4 ||
64ad46061fSJakub Kicinski 	    attr->value_size == 0 ||
65ad46061fSJakub Kicinski 	    attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
66ad46061fSJakub Kicinski 	    (percpu && numa_node != NUMA_NO_NODE))
67ad46061fSJakub Kicinski 		return -EINVAL;
68ad46061fSJakub Kicinski 
69ad46061fSJakub Kicinski 	if (attr->value_size > KMALLOC_MAX_SIZE)
70ad46061fSJakub Kicinski 		/* if value_size is bigger, the user space won't be able to
71ad46061fSJakub Kicinski 		 * access the elements.
72ad46061fSJakub Kicinski 		 */
73ad46061fSJakub Kicinski 		return -E2BIG;
74ad46061fSJakub Kicinski 
75ad46061fSJakub Kicinski 	return 0;
76ad46061fSJakub Kicinski }
77ad46061fSJakub Kicinski 
7828fbcfa0SAlexei Starovoitov static struct bpf_map *array_map_alloc(union bpf_attr *attr)
7928fbcfa0SAlexei Starovoitov {
80a10423b8SAlexei Starovoitov 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
819c2d63b8SDaniel Borkmann 	int ret, numa_node = bpf_map_attr_numa_node(attr);
82b2157399SAlexei Starovoitov 	u32 elem_size, index_mask, max_entries;
83b2157399SAlexei Starovoitov 	bool unpriv = !capable(CAP_SYS_ADMIN);
849c2d63b8SDaniel Borkmann 	u64 cost, array_size, mask64;
8528fbcfa0SAlexei Starovoitov 	struct bpf_array *array;
8628fbcfa0SAlexei Starovoitov 
8728fbcfa0SAlexei Starovoitov 	elem_size = round_up(attr->value_size, 8);
8828fbcfa0SAlexei Starovoitov 
89b2157399SAlexei Starovoitov 	max_entries = attr->max_entries;
90b2157399SAlexei Starovoitov 
91bbeb6e43SDaniel Borkmann 	/* On 32 bit archs roundup_pow_of_two() with max_entries that has
92bbeb6e43SDaniel Borkmann 	 * upper most bit set in u32 space is undefined behavior due to
93bbeb6e43SDaniel Borkmann 	 * resulting 1U << 32, so do it manually here in u64 space.
94bbeb6e43SDaniel Borkmann 	 */
95bbeb6e43SDaniel Borkmann 	mask64 = fls_long(max_entries - 1);
96bbeb6e43SDaniel Borkmann 	mask64 = 1ULL << mask64;
97bbeb6e43SDaniel Borkmann 	mask64 -= 1;
98bbeb6e43SDaniel Borkmann 
99bbeb6e43SDaniel Borkmann 	index_mask = mask64;
100bbeb6e43SDaniel Borkmann 	if (unpriv) {
101b2157399SAlexei Starovoitov 		/* round up array size to nearest power of 2,
102b2157399SAlexei Starovoitov 		 * since cpu will speculate within index_mask limits
103b2157399SAlexei Starovoitov 		 */
104b2157399SAlexei Starovoitov 		max_entries = index_mask + 1;
105bbeb6e43SDaniel Borkmann 		/* Check for overflows. */
106bbeb6e43SDaniel Borkmann 		if (max_entries < attr->max_entries)
107bbeb6e43SDaniel Borkmann 			return ERR_PTR(-E2BIG);
108bbeb6e43SDaniel Borkmann 	}
109b2157399SAlexei Starovoitov 
110a10423b8SAlexei Starovoitov 	array_size = sizeof(*array);
111a10423b8SAlexei Starovoitov 	if (percpu)
112b2157399SAlexei Starovoitov 		array_size += (u64) max_entries * sizeof(void *);
113a10423b8SAlexei Starovoitov 	else
114b2157399SAlexei Starovoitov 		array_size += (u64) max_entries * elem_size;
115a10423b8SAlexei Starovoitov 
116a10423b8SAlexei Starovoitov 	/* make sure there is no u32 overflow later in round_up() */
1179c2d63b8SDaniel Borkmann 	cost = array_size;
1189c2d63b8SDaniel Borkmann 	if (cost >= U32_MAX - PAGE_SIZE)
119daaf427cSAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
1209c2d63b8SDaniel Borkmann 	if (percpu) {
1219c2d63b8SDaniel Borkmann 		cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
1229c2d63b8SDaniel Borkmann 		if (cost >= U32_MAX - PAGE_SIZE)
1239c2d63b8SDaniel Borkmann 			return ERR_PTR(-ENOMEM);
1249c2d63b8SDaniel Borkmann 	}
1259c2d63b8SDaniel Borkmann 	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
1269c2d63b8SDaniel Borkmann 
1279c2d63b8SDaniel Borkmann 	ret = bpf_map_precharge_memlock(cost);
1289c2d63b8SDaniel Borkmann 	if (ret < 0)
1299c2d63b8SDaniel Borkmann 		return ERR_PTR(ret);
130daaf427cSAlexei Starovoitov 
13128fbcfa0SAlexei Starovoitov 	/* allocate all map elements and zero-initialize them */
13296eabe7aSMartin KaFai Lau 	array = bpf_map_area_alloc(array_size, numa_node);
13328fbcfa0SAlexei Starovoitov 	if (!array)
13428fbcfa0SAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
135b2157399SAlexei Starovoitov 	array->index_mask = index_mask;
136b2157399SAlexei Starovoitov 	array->map.unpriv_array = unpriv;
13728fbcfa0SAlexei Starovoitov 
13828fbcfa0SAlexei Starovoitov 	/* copy mandatory map attributes */
13932852649SJakub Kicinski 	bpf_map_init_from_attr(&array->map, attr);
1409c2d63b8SDaniel Borkmann 	array->map.pages = cost;
14128fbcfa0SAlexei Starovoitov 	array->elem_size = elem_size;
14228fbcfa0SAlexei Starovoitov 
1439c2d63b8SDaniel Borkmann 	if (percpu && bpf_array_alloc_percpu(array)) {
144d407bd25SDaniel Borkmann 		bpf_map_area_free(array);
145a10423b8SAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
146a10423b8SAlexei Starovoitov 	}
147a10423b8SAlexei Starovoitov 
14828fbcfa0SAlexei Starovoitov 	return &array->map;
14928fbcfa0SAlexei Starovoitov }
15028fbcfa0SAlexei Starovoitov 
15128fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
15228fbcfa0SAlexei Starovoitov static void *array_map_lookup_elem(struct bpf_map *map, void *key)
15328fbcfa0SAlexei Starovoitov {
15428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
15528fbcfa0SAlexei Starovoitov 	u32 index = *(u32 *)key;
15628fbcfa0SAlexei Starovoitov 
157a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
15828fbcfa0SAlexei Starovoitov 		return NULL;
15928fbcfa0SAlexei Starovoitov 
160b2157399SAlexei Starovoitov 	return array->value + array->elem_size * (index & array->index_mask);
16128fbcfa0SAlexei Starovoitov }
16228fbcfa0SAlexei Starovoitov 
16381ed18abSAlexei Starovoitov /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
16481ed18abSAlexei Starovoitov static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
16581ed18abSAlexei Starovoitov {
166b2157399SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
16781ed18abSAlexei Starovoitov 	struct bpf_insn *insn = insn_buf;
168fad73a1aSMartin KaFai Lau 	u32 elem_size = round_up(map->value_size, 8);
16981ed18abSAlexei Starovoitov 	const int ret = BPF_REG_0;
17081ed18abSAlexei Starovoitov 	const int map_ptr = BPF_REG_1;
17181ed18abSAlexei Starovoitov 	const int index = BPF_REG_2;
17281ed18abSAlexei Starovoitov 
17381ed18abSAlexei Starovoitov 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
17481ed18abSAlexei Starovoitov 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
175b2157399SAlexei Starovoitov 	if (map->unpriv_array) {
176b2157399SAlexei Starovoitov 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
177b2157399SAlexei Starovoitov 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
178b2157399SAlexei Starovoitov 	} else {
179fad73a1aSMartin KaFai Lau 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
180b2157399SAlexei Starovoitov 	}
181fad73a1aSMartin KaFai Lau 
182fad73a1aSMartin KaFai Lau 	if (is_power_of_2(elem_size)) {
18381ed18abSAlexei Starovoitov 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
18481ed18abSAlexei Starovoitov 	} else {
18581ed18abSAlexei Starovoitov 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
18681ed18abSAlexei Starovoitov 	}
18781ed18abSAlexei Starovoitov 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
18881ed18abSAlexei Starovoitov 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
18981ed18abSAlexei Starovoitov 	*insn++ = BPF_MOV64_IMM(ret, 0);
19081ed18abSAlexei Starovoitov 	return insn - insn_buf;
19181ed18abSAlexei Starovoitov }
19281ed18abSAlexei Starovoitov 
193a10423b8SAlexei Starovoitov /* Called from eBPF program */
194a10423b8SAlexei Starovoitov static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
195a10423b8SAlexei Starovoitov {
196a10423b8SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
197a10423b8SAlexei Starovoitov 	u32 index = *(u32 *)key;
198a10423b8SAlexei Starovoitov 
199a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
200a10423b8SAlexei Starovoitov 		return NULL;
201a10423b8SAlexei Starovoitov 
202b2157399SAlexei Starovoitov 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
203a10423b8SAlexei Starovoitov }
204a10423b8SAlexei Starovoitov 
20515a07b33SAlexei Starovoitov int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
20615a07b33SAlexei Starovoitov {
20715a07b33SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
20815a07b33SAlexei Starovoitov 	u32 index = *(u32 *)key;
20915a07b33SAlexei Starovoitov 	void __percpu *pptr;
21015a07b33SAlexei Starovoitov 	int cpu, off = 0;
21115a07b33SAlexei Starovoitov 	u32 size;
21215a07b33SAlexei Starovoitov 
21315a07b33SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
21415a07b33SAlexei Starovoitov 		return -ENOENT;
21515a07b33SAlexei Starovoitov 
21615a07b33SAlexei Starovoitov 	/* per_cpu areas are zero-filled and bpf programs can only
21715a07b33SAlexei Starovoitov 	 * access 'value_size' of them, so copying rounded areas
21815a07b33SAlexei Starovoitov 	 * will not leak any kernel data
21915a07b33SAlexei Starovoitov 	 */
22015a07b33SAlexei Starovoitov 	size = round_up(map->value_size, 8);
22115a07b33SAlexei Starovoitov 	rcu_read_lock();
222b2157399SAlexei Starovoitov 	pptr = array->pptrs[index & array->index_mask];
22315a07b33SAlexei Starovoitov 	for_each_possible_cpu(cpu) {
22415a07b33SAlexei Starovoitov 		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
22515a07b33SAlexei Starovoitov 		off += size;
22615a07b33SAlexei Starovoitov 	}
22715a07b33SAlexei Starovoitov 	rcu_read_unlock();
22815a07b33SAlexei Starovoitov 	return 0;
22915a07b33SAlexei Starovoitov }
23015a07b33SAlexei Starovoitov 
23128fbcfa0SAlexei Starovoitov /* Called from syscall */
23228fbcfa0SAlexei Starovoitov static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
23328fbcfa0SAlexei Starovoitov {
23428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
2358fe45924STeng Qin 	u32 index = key ? *(u32 *)key : U32_MAX;
23628fbcfa0SAlexei Starovoitov 	u32 *next = (u32 *)next_key;
23728fbcfa0SAlexei Starovoitov 
23828fbcfa0SAlexei Starovoitov 	if (index >= array->map.max_entries) {
23928fbcfa0SAlexei Starovoitov 		*next = 0;
24028fbcfa0SAlexei Starovoitov 		return 0;
24128fbcfa0SAlexei Starovoitov 	}
24228fbcfa0SAlexei Starovoitov 
24328fbcfa0SAlexei Starovoitov 	if (index == array->map.max_entries - 1)
24428fbcfa0SAlexei Starovoitov 		return -ENOENT;
24528fbcfa0SAlexei Starovoitov 
24628fbcfa0SAlexei Starovoitov 	*next = index + 1;
24728fbcfa0SAlexei Starovoitov 	return 0;
24828fbcfa0SAlexei Starovoitov }
24928fbcfa0SAlexei Starovoitov 
25028fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
25128fbcfa0SAlexei Starovoitov static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
25228fbcfa0SAlexei Starovoitov 				 u64 map_flags)
25328fbcfa0SAlexei Starovoitov {
25428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
25528fbcfa0SAlexei Starovoitov 	u32 index = *(u32 *)key;
25628fbcfa0SAlexei Starovoitov 
257a10423b8SAlexei Starovoitov 	if (unlikely(map_flags > BPF_EXIST))
25828fbcfa0SAlexei Starovoitov 		/* unknown flags */
25928fbcfa0SAlexei Starovoitov 		return -EINVAL;
26028fbcfa0SAlexei Starovoitov 
261a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
26228fbcfa0SAlexei Starovoitov 		/* all elements were pre-allocated, cannot insert a new one */
26328fbcfa0SAlexei Starovoitov 		return -E2BIG;
26428fbcfa0SAlexei Starovoitov 
265a10423b8SAlexei Starovoitov 	if (unlikely(map_flags == BPF_NOEXIST))
266daaf427cSAlexei Starovoitov 		/* all elements already exist */
26728fbcfa0SAlexei Starovoitov 		return -EEXIST;
26828fbcfa0SAlexei Starovoitov 
269a10423b8SAlexei Starovoitov 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
270b2157399SAlexei Starovoitov 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
271a10423b8SAlexei Starovoitov 		       value, map->value_size);
272a10423b8SAlexei Starovoitov 	else
273b2157399SAlexei Starovoitov 		memcpy(array->value +
274b2157399SAlexei Starovoitov 		       array->elem_size * (index & array->index_mask),
275a10423b8SAlexei Starovoitov 		       value, map->value_size);
27628fbcfa0SAlexei Starovoitov 	return 0;
27728fbcfa0SAlexei Starovoitov }
27828fbcfa0SAlexei Starovoitov 
27915a07b33SAlexei Starovoitov int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
28015a07b33SAlexei Starovoitov 			    u64 map_flags)
28115a07b33SAlexei Starovoitov {
28215a07b33SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
28315a07b33SAlexei Starovoitov 	u32 index = *(u32 *)key;
28415a07b33SAlexei Starovoitov 	void __percpu *pptr;
28515a07b33SAlexei Starovoitov 	int cpu, off = 0;
28615a07b33SAlexei Starovoitov 	u32 size;
28715a07b33SAlexei Starovoitov 
28815a07b33SAlexei Starovoitov 	if (unlikely(map_flags > BPF_EXIST))
28915a07b33SAlexei Starovoitov 		/* unknown flags */
29015a07b33SAlexei Starovoitov 		return -EINVAL;
29115a07b33SAlexei Starovoitov 
29215a07b33SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
29315a07b33SAlexei Starovoitov 		/* all elements were pre-allocated, cannot insert a new one */
29415a07b33SAlexei Starovoitov 		return -E2BIG;
29515a07b33SAlexei Starovoitov 
29615a07b33SAlexei Starovoitov 	if (unlikely(map_flags == BPF_NOEXIST))
29715a07b33SAlexei Starovoitov 		/* all elements already exist */
29815a07b33SAlexei Starovoitov 		return -EEXIST;
29915a07b33SAlexei Starovoitov 
30015a07b33SAlexei Starovoitov 	/* the user space will provide round_up(value_size, 8) bytes that
30115a07b33SAlexei Starovoitov 	 * will be copied into per-cpu area. bpf programs can only access
30215a07b33SAlexei Starovoitov 	 * value_size of it. During lookup the same extra bytes will be
30315a07b33SAlexei Starovoitov 	 * returned or zeros which were zero-filled by percpu_alloc,
30415a07b33SAlexei Starovoitov 	 * so no kernel data leaks possible
30515a07b33SAlexei Starovoitov 	 */
30615a07b33SAlexei Starovoitov 	size = round_up(map->value_size, 8);
30715a07b33SAlexei Starovoitov 	rcu_read_lock();
308b2157399SAlexei Starovoitov 	pptr = array->pptrs[index & array->index_mask];
30915a07b33SAlexei Starovoitov 	for_each_possible_cpu(cpu) {
31015a07b33SAlexei Starovoitov 		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
31115a07b33SAlexei Starovoitov 		off += size;
31215a07b33SAlexei Starovoitov 	}
31315a07b33SAlexei Starovoitov 	rcu_read_unlock();
31415a07b33SAlexei Starovoitov 	return 0;
31515a07b33SAlexei Starovoitov }
31615a07b33SAlexei Starovoitov 
31728fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
31828fbcfa0SAlexei Starovoitov static int array_map_delete_elem(struct bpf_map *map, void *key)
31928fbcfa0SAlexei Starovoitov {
32028fbcfa0SAlexei Starovoitov 	return -EINVAL;
32128fbcfa0SAlexei Starovoitov }
32228fbcfa0SAlexei Starovoitov 
32328fbcfa0SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
32428fbcfa0SAlexei Starovoitov static void array_map_free(struct bpf_map *map)
32528fbcfa0SAlexei Starovoitov {
32628fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
32728fbcfa0SAlexei Starovoitov 
32828fbcfa0SAlexei Starovoitov 	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
32928fbcfa0SAlexei Starovoitov 	 * so the programs (can be more than one that used this map) were
33028fbcfa0SAlexei Starovoitov 	 * disconnected from events. Wait for outstanding programs to complete
33128fbcfa0SAlexei Starovoitov 	 * and free the array
33228fbcfa0SAlexei Starovoitov 	 */
33328fbcfa0SAlexei Starovoitov 	synchronize_rcu();
33428fbcfa0SAlexei Starovoitov 
335a10423b8SAlexei Starovoitov 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
336a10423b8SAlexei Starovoitov 		bpf_array_free_percpu(array);
337a10423b8SAlexei Starovoitov 
338d407bd25SDaniel Borkmann 	bpf_map_area_free(array);
33928fbcfa0SAlexei Starovoitov }
34028fbcfa0SAlexei Starovoitov 
341a26ca7c9SMartin KaFai Lau static void array_map_seq_show_elem(struct bpf_map *map, void *key,
342a26ca7c9SMartin KaFai Lau 				    struct seq_file *m)
343a26ca7c9SMartin KaFai Lau {
344a26ca7c9SMartin KaFai Lau 	void *value;
345a26ca7c9SMartin KaFai Lau 
346a26ca7c9SMartin KaFai Lau 	rcu_read_lock();
347a26ca7c9SMartin KaFai Lau 
348a26ca7c9SMartin KaFai Lau 	value = array_map_lookup_elem(map, key);
349a26ca7c9SMartin KaFai Lau 	if (!value) {
350a26ca7c9SMartin KaFai Lau 		rcu_read_unlock();
351a26ca7c9SMartin KaFai Lau 		return;
352a26ca7c9SMartin KaFai Lau 	}
353a26ca7c9SMartin KaFai Lau 
354a26ca7c9SMartin KaFai Lau 	seq_printf(m, "%u: ", *(u32 *)key);
3559b2cf328SMartin KaFai Lau 	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
356a26ca7c9SMartin KaFai Lau 	seq_puts(m, "\n");
357a26ca7c9SMartin KaFai Lau 
358a26ca7c9SMartin KaFai Lau 	rcu_read_unlock();
359a26ca7c9SMartin KaFai Lau }
360a26ca7c9SMartin KaFai Lau 
361*e8d2bec0SDaniel Borkmann static int array_map_check_btf(const struct bpf_map *map,
362*e8d2bec0SDaniel Borkmann 			       const struct btf_type *key_type,
363*e8d2bec0SDaniel Borkmann 			       const struct btf_type *value_type)
364a26ca7c9SMartin KaFai Lau {
365a26ca7c9SMartin KaFai Lau 	u32 int_data;
366a26ca7c9SMartin KaFai Lau 
367*e8d2bec0SDaniel Borkmann 	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
368a26ca7c9SMartin KaFai Lau 		return -EINVAL;
369a26ca7c9SMartin KaFai Lau 
370a26ca7c9SMartin KaFai Lau 	int_data = *(u32 *)(key_type + 1);
371*e8d2bec0SDaniel Borkmann 	/* bpf array can only take a u32 key. This check makes sure
372*e8d2bec0SDaniel Borkmann 	 * that the btf matches the attr used during map_create.
373a26ca7c9SMartin KaFai Lau 	 */
374*e8d2bec0SDaniel Borkmann 	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
375a26ca7c9SMartin KaFai Lau 		return -EINVAL;
376a26ca7c9SMartin KaFai Lau 
377a26ca7c9SMartin KaFai Lau 	return 0;
378a26ca7c9SMartin KaFai Lau }
379a26ca7c9SMartin KaFai Lau 
38040077e0cSJohannes Berg const struct bpf_map_ops array_map_ops = {
381ad46061fSJakub Kicinski 	.map_alloc_check = array_map_alloc_check,
38228fbcfa0SAlexei Starovoitov 	.map_alloc = array_map_alloc,
38328fbcfa0SAlexei Starovoitov 	.map_free = array_map_free,
38428fbcfa0SAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
38528fbcfa0SAlexei Starovoitov 	.map_lookup_elem = array_map_lookup_elem,
38628fbcfa0SAlexei Starovoitov 	.map_update_elem = array_map_update_elem,
38728fbcfa0SAlexei Starovoitov 	.map_delete_elem = array_map_delete_elem,
38881ed18abSAlexei Starovoitov 	.map_gen_lookup = array_map_gen_lookup,
389a26ca7c9SMartin KaFai Lau 	.map_seq_show_elem = array_map_seq_show_elem,
390a26ca7c9SMartin KaFai Lau 	.map_check_btf = array_map_check_btf,
39128fbcfa0SAlexei Starovoitov };
39228fbcfa0SAlexei Starovoitov 
39340077e0cSJohannes Berg const struct bpf_map_ops percpu_array_map_ops = {
394ad46061fSJakub Kicinski 	.map_alloc_check = array_map_alloc_check,
395a10423b8SAlexei Starovoitov 	.map_alloc = array_map_alloc,
396a10423b8SAlexei Starovoitov 	.map_free = array_map_free,
397a10423b8SAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
398a10423b8SAlexei Starovoitov 	.map_lookup_elem = percpu_array_map_lookup_elem,
399a10423b8SAlexei Starovoitov 	.map_update_elem = array_map_update_elem,
400a10423b8SAlexei Starovoitov 	.map_delete_elem = array_map_delete_elem,
401*e8d2bec0SDaniel Borkmann 	.map_check_btf = array_map_check_btf,
402a10423b8SAlexei Starovoitov };
403a10423b8SAlexei Starovoitov 
404ad46061fSJakub Kicinski static int fd_array_map_alloc_check(union bpf_attr *attr)
40504fd61abSAlexei Starovoitov {
4062a36f0b9SWang Nan 	/* only file descriptors can be stored in this type of map */
40704fd61abSAlexei Starovoitov 	if (attr->value_size != sizeof(u32))
408ad46061fSJakub Kicinski 		return -EINVAL;
409ad46061fSJakub Kicinski 	return array_map_alloc_check(attr);
41004fd61abSAlexei Starovoitov }
41104fd61abSAlexei Starovoitov 
4122a36f0b9SWang Nan static void fd_array_map_free(struct bpf_map *map)
41304fd61abSAlexei Starovoitov {
41404fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
41504fd61abSAlexei Starovoitov 	int i;
41604fd61abSAlexei Starovoitov 
41704fd61abSAlexei Starovoitov 	synchronize_rcu();
41804fd61abSAlexei Starovoitov 
41904fd61abSAlexei Starovoitov 	/* make sure it's empty */
42004fd61abSAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++)
4212a36f0b9SWang Nan 		BUG_ON(array->ptrs[i] != NULL);
422d407bd25SDaniel Borkmann 
423d407bd25SDaniel Borkmann 	bpf_map_area_free(array);
42404fd61abSAlexei Starovoitov }
42504fd61abSAlexei Starovoitov 
4262a36f0b9SWang Nan static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
42704fd61abSAlexei Starovoitov {
42804fd61abSAlexei Starovoitov 	return NULL;
42904fd61abSAlexei Starovoitov }
43004fd61abSAlexei Starovoitov 
43104fd61abSAlexei Starovoitov /* only called from syscall */
43214dc6f04SMartin KaFai Lau int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
43314dc6f04SMartin KaFai Lau {
43414dc6f04SMartin KaFai Lau 	void **elem, *ptr;
43514dc6f04SMartin KaFai Lau 	int ret =  0;
43614dc6f04SMartin KaFai Lau 
43714dc6f04SMartin KaFai Lau 	if (!map->ops->map_fd_sys_lookup_elem)
43814dc6f04SMartin KaFai Lau 		return -ENOTSUPP;
43914dc6f04SMartin KaFai Lau 
44014dc6f04SMartin KaFai Lau 	rcu_read_lock();
44114dc6f04SMartin KaFai Lau 	elem = array_map_lookup_elem(map, key);
44214dc6f04SMartin KaFai Lau 	if (elem && (ptr = READ_ONCE(*elem)))
44314dc6f04SMartin KaFai Lau 		*value = map->ops->map_fd_sys_lookup_elem(ptr);
44414dc6f04SMartin KaFai Lau 	else
44514dc6f04SMartin KaFai Lau 		ret = -ENOENT;
44614dc6f04SMartin KaFai Lau 	rcu_read_unlock();
44714dc6f04SMartin KaFai Lau 
44814dc6f04SMartin KaFai Lau 	return ret;
44914dc6f04SMartin KaFai Lau }
45014dc6f04SMartin KaFai Lau 
45114dc6f04SMartin KaFai Lau /* only called from syscall */
452d056a788SDaniel Borkmann int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
453d056a788SDaniel Borkmann 				 void *key, void *value, u64 map_flags)
45404fd61abSAlexei Starovoitov {
45504fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
4562a36f0b9SWang Nan 	void *new_ptr, *old_ptr;
45704fd61abSAlexei Starovoitov 	u32 index = *(u32 *)key, ufd;
45804fd61abSAlexei Starovoitov 
45904fd61abSAlexei Starovoitov 	if (map_flags != BPF_ANY)
46004fd61abSAlexei Starovoitov 		return -EINVAL;
46104fd61abSAlexei Starovoitov 
46204fd61abSAlexei Starovoitov 	if (index >= array->map.max_entries)
46304fd61abSAlexei Starovoitov 		return -E2BIG;
46404fd61abSAlexei Starovoitov 
46504fd61abSAlexei Starovoitov 	ufd = *(u32 *)value;
466d056a788SDaniel Borkmann 	new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
4672a36f0b9SWang Nan 	if (IS_ERR(new_ptr))
4682a36f0b9SWang Nan 		return PTR_ERR(new_ptr);
46904fd61abSAlexei Starovoitov 
4702a36f0b9SWang Nan 	old_ptr = xchg(array->ptrs + index, new_ptr);
4712a36f0b9SWang Nan 	if (old_ptr)
4722a36f0b9SWang Nan 		map->ops->map_fd_put_ptr(old_ptr);
47304fd61abSAlexei Starovoitov 
47404fd61abSAlexei Starovoitov 	return 0;
47504fd61abSAlexei Starovoitov }
47604fd61abSAlexei Starovoitov 
4772a36f0b9SWang Nan static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
47804fd61abSAlexei Starovoitov {
47904fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
4802a36f0b9SWang Nan 	void *old_ptr;
48104fd61abSAlexei Starovoitov 	u32 index = *(u32 *)key;
48204fd61abSAlexei Starovoitov 
48304fd61abSAlexei Starovoitov 	if (index >= array->map.max_entries)
48404fd61abSAlexei Starovoitov 		return -E2BIG;
48504fd61abSAlexei Starovoitov 
4862a36f0b9SWang Nan 	old_ptr = xchg(array->ptrs + index, NULL);
4872a36f0b9SWang Nan 	if (old_ptr) {
4882a36f0b9SWang Nan 		map->ops->map_fd_put_ptr(old_ptr);
48904fd61abSAlexei Starovoitov 		return 0;
49004fd61abSAlexei Starovoitov 	} else {
49104fd61abSAlexei Starovoitov 		return -ENOENT;
49204fd61abSAlexei Starovoitov 	}
49304fd61abSAlexei Starovoitov }
49404fd61abSAlexei Starovoitov 
495d056a788SDaniel Borkmann static void *prog_fd_array_get_ptr(struct bpf_map *map,
496d056a788SDaniel Borkmann 				   struct file *map_file, int fd)
4972a36f0b9SWang Nan {
4982a36f0b9SWang Nan 	struct bpf_array *array = container_of(map, struct bpf_array, map);
4992a36f0b9SWang Nan 	struct bpf_prog *prog = bpf_prog_get(fd);
500d056a788SDaniel Borkmann 
5012a36f0b9SWang Nan 	if (IS_ERR(prog))
5022a36f0b9SWang Nan 		return prog;
5032a36f0b9SWang Nan 
5042a36f0b9SWang Nan 	if (!bpf_prog_array_compatible(array, prog)) {
5052a36f0b9SWang Nan 		bpf_prog_put(prog);
5062a36f0b9SWang Nan 		return ERR_PTR(-EINVAL);
5072a36f0b9SWang Nan 	}
508d056a788SDaniel Borkmann 
5092a36f0b9SWang Nan 	return prog;
5102a36f0b9SWang Nan }
5112a36f0b9SWang Nan 
5122a36f0b9SWang Nan static void prog_fd_array_put_ptr(void *ptr)
5132a36f0b9SWang Nan {
5141aacde3dSDaniel Borkmann 	bpf_prog_put(ptr);
5152a36f0b9SWang Nan }
5162a36f0b9SWang Nan 
51714dc6f04SMartin KaFai Lau static u32 prog_fd_array_sys_lookup_elem(void *ptr)
51814dc6f04SMartin KaFai Lau {
51914dc6f04SMartin KaFai Lau 	return ((struct bpf_prog *)ptr)->aux->id;
52014dc6f04SMartin KaFai Lau }
52114dc6f04SMartin KaFai Lau 
52204fd61abSAlexei Starovoitov /* decrement refcnt of all bpf_progs that are stored in this map */
523ba6b8de4SJohn Fastabend static void bpf_fd_array_map_clear(struct bpf_map *map)
52404fd61abSAlexei Starovoitov {
52504fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
52604fd61abSAlexei Starovoitov 	int i;
52704fd61abSAlexei Starovoitov 
52804fd61abSAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++)
5292a36f0b9SWang Nan 		fd_array_map_delete_elem(map, &i);
53004fd61abSAlexei Starovoitov }
53104fd61abSAlexei Starovoitov 
53240077e0cSJohannes Berg const struct bpf_map_ops prog_array_map_ops = {
533ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
534ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
5352a36f0b9SWang Nan 	.map_free = fd_array_map_free,
53604fd61abSAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
5372a36f0b9SWang Nan 	.map_lookup_elem = fd_array_map_lookup_elem,
5382a36f0b9SWang Nan 	.map_delete_elem = fd_array_map_delete_elem,
5392a36f0b9SWang Nan 	.map_fd_get_ptr = prog_fd_array_get_ptr,
5402a36f0b9SWang Nan 	.map_fd_put_ptr = prog_fd_array_put_ptr,
54114dc6f04SMartin KaFai Lau 	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
542ba6b8de4SJohn Fastabend 	.map_release_uref = bpf_fd_array_map_clear,
543*e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
54404fd61abSAlexei Starovoitov };
54504fd61abSAlexei Starovoitov 
5463b1efb19SDaniel Borkmann static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
5473b1efb19SDaniel Borkmann 						   struct file *map_file)
548ea317b26SKaixu Xia {
5493b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
5503b1efb19SDaniel Borkmann 
551858d68f1SDaniel Borkmann 	ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
5523b1efb19SDaniel Borkmann 	if (ee) {
5533b1efb19SDaniel Borkmann 		ee->event = perf_file->private_data;
5543b1efb19SDaniel Borkmann 		ee->perf_file = perf_file;
5553b1efb19SDaniel Borkmann 		ee->map_file = map_file;
5563b1efb19SDaniel Borkmann 	}
5573b1efb19SDaniel Borkmann 
5583b1efb19SDaniel Borkmann 	return ee;
5593b1efb19SDaniel Borkmann }
5603b1efb19SDaniel Borkmann 
5613b1efb19SDaniel Borkmann static void __bpf_event_entry_free(struct rcu_head *rcu)
5623b1efb19SDaniel Borkmann {
5633b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
5643b1efb19SDaniel Borkmann 
5653b1efb19SDaniel Borkmann 	ee = container_of(rcu, struct bpf_event_entry, rcu);
5663b1efb19SDaniel Borkmann 	fput(ee->perf_file);
5673b1efb19SDaniel Borkmann 	kfree(ee);
5683b1efb19SDaniel Borkmann }
5693b1efb19SDaniel Borkmann 
5703b1efb19SDaniel Borkmann static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
5713b1efb19SDaniel Borkmann {
5723b1efb19SDaniel Borkmann 	call_rcu(&ee->rcu, __bpf_event_entry_free);
573ea317b26SKaixu Xia }
574ea317b26SKaixu Xia 
575d056a788SDaniel Borkmann static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
576d056a788SDaniel Borkmann 					 struct file *map_file, int fd)
577ea317b26SKaixu Xia {
5783b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
5793b1efb19SDaniel Borkmann 	struct perf_event *event;
5803b1efb19SDaniel Borkmann 	struct file *perf_file;
581f91840a3SAlexei Starovoitov 	u64 value;
582ea317b26SKaixu Xia 
5833b1efb19SDaniel Borkmann 	perf_file = perf_event_get(fd);
5843b1efb19SDaniel Borkmann 	if (IS_ERR(perf_file))
5853b1efb19SDaniel Borkmann 		return perf_file;
586e03e7ee3SAlexei Starovoitov 
587f91840a3SAlexei Starovoitov 	ee = ERR_PTR(-EOPNOTSUPP);
5883b1efb19SDaniel Borkmann 	event = perf_file->private_data;
58997562633SYonghong Song 	if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
5903b1efb19SDaniel Borkmann 		goto err_out;
591ea317b26SKaixu Xia 
5923b1efb19SDaniel Borkmann 	ee = bpf_event_entry_gen(perf_file, map_file);
5933b1efb19SDaniel Borkmann 	if (ee)
5943b1efb19SDaniel Borkmann 		return ee;
5953b1efb19SDaniel Borkmann 	ee = ERR_PTR(-ENOMEM);
5963b1efb19SDaniel Borkmann err_out:
5973b1efb19SDaniel Borkmann 	fput(perf_file);
5983b1efb19SDaniel Borkmann 	return ee;
599ea317b26SKaixu Xia }
600ea317b26SKaixu Xia 
601ea317b26SKaixu Xia static void perf_event_fd_array_put_ptr(void *ptr)
602ea317b26SKaixu Xia {
6033b1efb19SDaniel Borkmann 	bpf_event_entry_free_rcu(ptr);
6043b1efb19SDaniel Borkmann }
6053b1efb19SDaniel Borkmann 
6063b1efb19SDaniel Borkmann static void perf_event_fd_array_release(struct bpf_map *map,
6073b1efb19SDaniel Borkmann 					struct file *map_file)
6083b1efb19SDaniel Borkmann {
6093b1efb19SDaniel Borkmann 	struct bpf_array *array = container_of(map, struct bpf_array, map);
6103b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6113b1efb19SDaniel Borkmann 	int i;
6123b1efb19SDaniel Borkmann 
6133b1efb19SDaniel Borkmann 	rcu_read_lock();
6143b1efb19SDaniel Borkmann 	for (i = 0; i < array->map.max_entries; i++) {
6153b1efb19SDaniel Borkmann 		ee = READ_ONCE(array->ptrs[i]);
6163b1efb19SDaniel Borkmann 		if (ee && ee->map_file == map_file)
6173b1efb19SDaniel Borkmann 			fd_array_map_delete_elem(map, &i);
6183b1efb19SDaniel Borkmann 	}
6193b1efb19SDaniel Borkmann 	rcu_read_unlock();
620ea317b26SKaixu Xia }
621ea317b26SKaixu Xia 
62240077e0cSJohannes Berg const struct bpf_map_ops perf_event_array_map_ops = {
623ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
624ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
6253b1efb19SDaniel Borkmann 	.map_free = fd_array_map_free,
626ea317b26SKaixu Xia 	.map_get_next_key = array_map_get_next_key,
627ea317b26SKaixu Xia 	.map_lookup_elem = fd_array_map_lookup_elem,
628ea317b26SKaixu Xia 	.map_delete_elem = fd_array_map_delete_elem,
629ea317b26SKaixu Xia 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
630ea317b26SKaixu Xia 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
6313b1efb19SDaniel Borkmann 	.map_release = perf_event_fd_array_release,
632*e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
633ea317b26SKaixu Xia };
634ea317b26SKaixu Xia 
63560d20f91SSargun Dhillon #ifdef CONFIG_CGROUPS
6364ed8ec52SMartin KaFai Lau static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
6374ed8ec52SMartin KaFai Lau 				     struct file *map_file /* not used */,
6384ed8ec52SMartin KaFai Lau 				     int fd)
6394ed8ec52SMartin KaFai Lau {
6404ed8ec52SMartin KaFai Lau 	return cgroup_get_from_fd(fd);
6414ed8ec52SMartin KaFai Lau }
6424ed8ec52SMartin KaFai Lau 
6434ed8ec52SMartin KaFai Lau static void cgroup_fd_array_put_ptr(void *ptr)
6444ed8ec52SMartin KaFai Lau {
6454ed8ec52SMartin KaFai Lau 	/* cgroup_put free cgrp after a rcu grace period */
6464ed8ec52SMartin KaFai Lau 	cgroup_put(ptr);
6474ed8ec52SMartin KaFai Lau }
6484ed8ec52SMartin KaFai Lau 
6494ed8ec52SMartin KaFai Lau static void cgroup_fd_array_free(struct bpf_map *map)
6504ed8ec52SMartin KaFai Lau {
6514ed8ec52SMartin KaFai Lau 	bpf_fd_array_map_clear(map);
6524ed8ec52SMartin KaFai Lau 	fd_array_map_free(map);
6534ed8ec52SMartin KaFai Lau }
6544ed8ec52SMartin KaFai Lau 
65540077e0cSJohannes Berg const struct bpf_map_ops cgroup_array_map_ops = {
656ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
657ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
6584ed8ec52SMartin KaFai Lau 	.map_free = cgroup_fd_array_free,
6594ed8ec52SMartin KaFai Lau 	.map_get_next_key = array_map_get_next_key,
6604ed8ec52SMartin KaFai Lau 	.map_lookup_elem = fd_array_map_lookup_elem,
6614ed8ec52SMartin KaFai Lau 	.map_delete_elem = fd_array_map_delete_elem,
6624ed8ec52SMartin KaFai Lau 	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
6634ed8ec52SMartin KaFai Lau 	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
664*e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
6654ed8ec52SMartin KaFai Lau };
6664ed8ec52SMartin KaFai Lau #endif
66756f668dfSMartin KaFai Lau 
66856f668dfSMartin KaFai Lau static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
66956f668dfSMartin KaFai Lau {
67056f668dfSMartin KaFai Lau 	struct bpf_map *map, *inner_map_meta;
67156f668dfSMartin KaFai Lau 
67256f668dfSMartin KaFai Lau 	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
67356f668dfSMartin KaFai Lau 	if (IS_ERR(inner_map_meta))
67456f668dfSMartin KaFai Lau 		return inner_map_meta;
67556f668dfSMartin KaFai Lau 
676ad46061fSJakub Kicinski 	map = array_map_alloc(attr);
67756f668dfSMartin KaFai Lau 	if (IS_ERR(map)) {
67856f668dfSMartin KaFai Lau 		bpf_map_meta_free(inner_map_meta);
67956f668dfSMartin KaFai Lau 		return map;
68056f668dfSMartin KaFai Lau 	}
68156f668dfSMartin KaFai Lau 
68256f668dfSMartin KaFai Lau 	map->inner_map_meta = inner_map_meta;
68356f668dfSMartin KaFai Lau 
68456f668dfSMartin KaFai Lau 	return map;
68556f668dfSMartin KaFai Lau }
68656f668dfSMartin KaFai Lau 
68756f668dfSMartin KaFai Lau static void array_of_map_free(struct bpf_map *map)
68856f668dfSMartin KaFai Lau {
68956f668dfSMartin KaFai Lau 	/* map->inner_map_meta is only accessed by syscall which
69056f668dfSMartin KaFai Lau 	 * is protected by fdget/fdput.
69156f668dfSMartin KaFai Lau 	 */
69256f668dfSMartin KaFai Lau 	bpf_map_meta_free(map->inner_map_meta);
69356f668dfSMartin KaFai Lau 	bpf_fd_array_map_clear(map);
69456f668dfSMartin KaFai Lau 	fd_array_map_free(map);
69556f668dfSMartin KaFai Lau }
69656f668dfSMartin KaFai Lau 
69756f668dfSMartin KaFai Lau static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
69856f668dfSMartin KaFai Lau {
69956f668dfSMartin KaFai Lau 	struct bpf_map **inner_map = array_map_lookup_elem(map, key);
70056f668dfSMartin KaFai Lau 
70156f668dfSMartin KaFai Lau 	if (!inner_map)
70256f668dfSMartin KaFai Lau 		return NULL;
70356f668dfSMartin KaFai Lau 
70456f668dfSMartin KaFai Lau 	return READ_ONCE(*inner_map);
70556f668dfSMartin KaFai Lau }
70656f668dfSMartin KaFai Lau 
7077b0c2a05SDaniel Borkmann static u32 array_of_map_gen_lookup(struct bpf_map *map,
7087b0c2a05SDaniel Borkmann 				   struct bpf_insn *insn_buf)
7097b0c2a05SDaniel Borkmann {
710b2157399SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
7117b0c2a05SDaniel Borkmann 	u32 elem_size = round_up(map->value_size, 8);
7127b0c2a05SDaniel Borkmann 	struct bpf_insn *insn = insn_buf;
7137b0c2a05SDaniel Borkmann 	const int ret = BPF_REG_0;
7147b0c2a05SDaniel Borkmann 	const int map_ptr = BPF_REG_1;
7157b0c2a05SDaniel Borkmann 	const int index = BPF_REG_2;
7167b0c2a05SDaniel Borkmann 
7177b0c2a05SDaniel Borkmann 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
7187b0c2a05SDaniel Borkmann 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
719b2157399SAlexei Starovoitov 	if (map->unpriv_array) {
720b2157399SAlexei Starovoitov 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
721b2157399SAlexei Starovoitov 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
722b2157399SAlexei Starovoitov 	} else {
7237b0c2a05SDaniel Borkmann 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
724b2157399SAlexei Starovoitov 	}
7257b0c2a05SDaniel Borkmann 	if (is_power_of_2(elem_size))
7267b0c2a05SDaniel Borkmann 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
7277b0c2a05SDaniel Borkmann 	else
7287b0c2a05SDaniel Borkmann 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
7297b0c2a05SDaniel Borkmann 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
7307b0c2a05SDaniel Borkmann 	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
7317b0c2a05SDaniel Borkmann 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
7327b0c2a05SDaniel Borkmann 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
7337b0c2a05SDaniel Borkmann 	*insn++ = BPF_MOV64_IMM(ret, 0);
7347b0c2a05SDaniel Borkmann 
7357b0c2a05SDaniel Borkmann 	return insn - insn_buf;
7367b0c2a05SDaniel Borkmann }
7377b0c2a05SDaniel Borkmann 
73840077e0cSJohannes Berg const struct bpf_map_ops array_of_maps_map_ops = {
739ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
74056f668dfSMartin KaFai Lau 	.map_alloc = array_of_map_alloc,
74156f668dfSMartin KaFai Lau 	.map_free = array_of_map_free,
74256f668dfSMartin KaFai Lau 	.map_get_next_key = array_map_get_next_key,
74356f668dfSMartin KaFai Lau 	.map_lookup_elem = array_of_map_lookup_elem,
74456f668dfSMartin KaFai Lau 	.map_delete_elem = fd_array_map_delete_elem,
74556f668dfSMartin KaFai Lau 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
74656f668dfSMartin KaFai Lau 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
74714dc6f04SMartin KaFai Lau 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
7487b0c2a05SDaniel Borkmann 	.map_gen_lookup = array_of_map_gen_lookup,
749*e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
75056f668dfSMartin KaFai Lau };
751