xref: /linux-6.15/kernel/bpf/arraymap.c (revision 9b2cf328)
128fbcfa0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
281ed18abSAlexei Starovoitov  * Copyright (c) 2016,2017 Facebook
328fbcfa0SAlexei Starovoitov  *
428fbcfa0SAlexei Starovoitov  * This program is free software; you can redistribute it and/or
528fbcfa0SAlexei Starovoitov  * modify it under the terms of version 2 of the GNU General Public
628fbcfa0SAlexei Starovoitov  * License as published by the Free Software Foundation.
728fbcfa0SAlexei Starovoitov  *
828fbcfa0SAlexei Starovoitov  * This program is distributed in the hope that it will be useful, but
928fbcfa0SAlexei Starovoitov  * WITHOUT ANY WARRANTY; without even the implied warranty of
1028fbcfa0SAlexei Starovoitov  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1128fbcfa0SAlexei Starovoitov  * General Public License for more details.
1228fbcfa0SAlexei Starovoitov  */
1328fbcfa0SAlexei Starovoitov #include <linux/bpf.h>
14a26ca7c9SMartin KaFai Lau #include <linux/btf.h>
1528fbcfa0SAlexei Starovoitov #include <linux/err.h>
1628fbcfa0SAlexei Starovoitov #include <linux/slab.h>
1728fbcfa0SAlexei Starovoitov #include <linux/mm.h>
1804fd61abSAlexei Starovoitov #include <linux/filter.h>
190cdf5640SDaniel Borkmann #include <linux/perf_event.h>
20a26ca7c9SMartin KaFai Lau #include <uapi/linux/btf.h>
2128fbcfa0SAlexei Starovoitov 
2256f668dfSMartin KaFai Lau #include "map_in_map.h"
2356f668dfSMartin KaFai Lau 
246e71b04aSChenbo Feng #define ARRAY_CREATE_FLAG_MASK \
256e71b04aSChenbo Feng 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
266e71b04aSChenbo Feng 
27a10423b8SAlexei Starovoitov static void bpf_array_free_percpu(struct bpf_array *array)
28a10423b8SAlexei Starovoitov {
29a10423b8SAlexei Starovoitov 	int i;
30a10423b8SAlexei Starovoitov 
3132fff239SEric Dumazet 	for (i = 0; i < array->map.max_entries; i++) {
32a10423b8SAlexei Starovoitov 		free_percpu(array->pptrs[i]);
3332fff239SEric Dumazet 		cond_resched();
3432fff239SEric Dumazet 	}
35a10423b8SAlexei Starovoitov }
36a10423b8SAlexei Starovoitov 
37a10423b8SAlexei Starovoitov static int bpf_array_alloc_percpu(struct bpf_array *array)
38a10423b8SAlexei Starovoitov {
39a10423b8SAlexei Starovoitov 	void __percpu *ptr;
40a10423b8SAlexei Starovoitov 	int i;
41a10423b8SAlexei Starovoitov 
42a10423b8SAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++) {
43a10423b8SAlexei Starovoitov 		ptr = __alloc_percpu_gfp(array->elem_size, 8,
44a10423b8SAlexei Starovoitov 					 GFP_USER | __GFP_NOWARN);
45a10423b8SAlexei Starovoitov 		if (!ptr) {
46a10423b8SAlexei Starovoitov 			bpf_array_free_percpu(array);
47a10423b8SAlexei Starovoitov 			return -ENOMEM;
48a10423b8SAlexei Starovoitov 		}
49a10423b8SAlexei Starovoitov 		array->pptrs[i] = ptr;
5032fff239SEric Dumazet 		cond_resched();
51a10423b8SAlexei Starovoitov 	}
52a10423b8SAlexei Starovoitov 
53a10423b8SAlexei Starovoitov 	return 0;
54a10423b8SAlexei Starovoitov }
55a10423b8SAlexei Starovoitov 
5628fbcfa0SAlexei Starovoitov /* Called from syscall */
57ad46061fSJakub Kicinski static int array_map_alloc_check(union bpf_attr *attr)
58ad46061fSJakub Kicinski {
59ad46061fSJakub Kicinski 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
60ad46061fSJakub Kicinski 	int numa_node = bpf_map_attr_numa_node(attr);
61ad46061fSJakub Kicinski 
62ad46061fSJakub Kicinski 	/* check sanity of attributes */
63ad46061fSJakub Kicinski 	if (attr->max_entries == 0 || attr->key_size != 4 ||
64ad46061fSJakub Kicinski 	    attr->value_size == 0 ||
65ad46061fSJakub Kicinski 	    attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
66ad46061fSJakub Kicinski 	    (percpu && numa_node != NUMA_NO_NODE))
67ad46061fSJakub Kicinski 		return -EINVAL;
68ad46061fSJakub Kicinski 
69ad46061fSJakub Kicinski 	if (attr->value_size > KMALLOC_MAX_SIZE)
70ad46061fSJakub Kicinski 		/* if value_size is bigger, the user space won't be able to
71ad46061fSJakub Kicinski 		 * access the elements.
72ad46061fSJakub Kicinski 		 */
73ad46061fSJakub Kicinski 		return -E2BIG;
74ad46061fSJakub Kicinski 
75ad46061fSJakub Kicinski 	return 0;
76ad46061fSJakub Kicinski }
77ad46061fSJakub Kicinski 
7828fbcfa0SAlexei Starovoitov static struct bpf_map *array_map_alloc(union bpf_attr *attr)
7928fbcfa0SAlexei Starovoitov {
80a10423b8SAlexei Starovoitov 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
819c2d63b8SDaniel Borkmann 	int ret, numa_node = bpf_map_attr_numa_node(attr);
82b2157399SAlexei Starovoitov 	u32 elem_size, index_mask, max_entries;
83b2157399SAlexei Starovoitov 	bool unpriv = !capable(CAP_SYS_ADMIN);
849c2d63b8SDaniel Borkmann 	u64 cost, array_size, mask64;
8528fbcfa0SAlexei Starovoitov 	struct bpf_array *array;
8628fbcfa0SAlexei Starovoitov 
8728fbcfa0SAlexei Starovoitov 	elem_size = round_up(attr->value_size, 8);
8828fbcfa0SAlexei Starovoitov 
89b2157399SAlexei Starovoitov 	max_entries = attr->max_entries;
90b2157399SAlexei Starovoitov 
91bbeb6e43SDaniel Borkmann 	/* On 32 bit archs roundup_pow_of_two() with max_entries that has
92bbeb6e43SDaniel Borkmann 	 * upper most bit set in u32 space is undefined behavior due to
93bbeb6e43SDaniel Borkmann 	 * resulting 1U << 32, so do it manually here in u64 space.
94bbeb6e43SDaniel Borkmann 	 */
95bbeb6e43SDaniel Borkmann 	mask64 = fls_long(max_entries - 1);
96bbeb6e43SDaniel Borkmann 	mask64 = 1ULL << mask64;
97bbeb6e43SDaniel Borkmann 	mask64 -= 1;
98bbeb6e43SDaniel Borkmann 
99bbeb6e43SDaniel Borkmann 	index_mask = mask64;
100bbeb6e43SDaniel Borkmann 	if (unpriv) {
101b2157399SAlexei Starovoitov 		/* round up array size to nearest power of 2,
102b2157399SAlexei Starovoitov 		 * since cpu will speculate within index_mask limits
103b2157399SAlexei Starovoitov 		 */
104b2157399SAlexei Starovoitov 		max_entries = index_mask + 1;
105bbeb6e43SDaniel Borkmann 		/* Check for overflows. */
106bbeb6e43SDaniel Borkmann 		if (max_entries < attr->max_entries)
107bbeb6e43SDaniel Borkmann 			return ERR_PTR(-E2BIG);
108bbeb6e43SDaniel Borkmann 	}
109b2157399SAlexei Starovoitov 
110a10423b8SAlexei Starovoitov 	array_size = sizeof(*array);
111a10423b8SAlexei Starovoitov 	if (percpu)
112b2157399SAlexei Starovoitov 		array_size += (u64) max_entries * sizeof(void *);
113a10423b8SAlexei Starovoitov 	else
114b2157399SAlexei Starovoitov 		array_size += (u64) max_entries * elem_size;
115a10423b8SAlexei Starovoitov 
116a10423b8SAlexei Starovoitov 	/* make sure there is no u32 overflow later in round_up() */
1179c2d63b8SDaniel Borkmann 	cost = array_size;
1189c2d63b8SDaniel Borkmann 	if (cost >= U32_MAX - PAGE_SIZE)
119daaf427cSAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
1209c2d63b8SDaniel Borkmann 	if (percpu) {
1219c2d63b8SDaniel Borkmann 		cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
1229c2d63b8SDaniel Borkmann 		if (cost >= U32_MAX - PAGE_SIZE)
1239c2d63b8SDaniel Borkmann 			return ERR_PTR(-ENOMEM);
1249c2d63b8SDaniel Borkmann 	}
1259c2d63b8SDaniel Borkmann 	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
1269c2d63b8SDaniel Borkmann 
1279c2d63b8SDaniel Borkmann 	ret = bpf_map_precharge_memlock(cost);
1289c2d63b8SDaniel Borkmann 	if (ret < 0)
1299c2d63b8SDaniel Borkmann 		return ERR_PTR(ret);
130daaf427cSAlexei Starovoitov 
13128fbcfa0SAlexei Starovoitov 	/* allocate all map elements and zero-initialize them */
13296eabe7aSMartin KaFai Lau 	array = bpf_map_area_alloc(array_size, numa_node);
13328fbcfa0SAlexei Starovoitov 	if (!array)
13428fbcfa0SAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
135b2157399SAlexei Starovoitov 	array->index_mask = index_mask;
136b2157399SAlexei Starovoitov 	array->map.unpriv_array = unpriv;
13728fbcfa0SAlexei Starovoitov 
13828fbcfa0SAlexei Starovoitov 	/* copy mandatory map attributes */
13932852649SJakub Kicinski 	bpf_map_init_from_attr(&array->map, attr);
1409c2d63b8SDaniel Borkmann 	array->map.pages = cost;
14128fbcfa0SAlexei Starovoitov 	array->elem_size = elem_size;
14228fbcfa0SAlexei Starovoitov 
1439c2d63b8SDaniel Borkmann 	if (percpu && bpf_array_alloc_percpu(array)) {
144d407bd25SDaniel Borkmann 		bpf_map_area_free(array);
145a10423b8SAlexei Starovoitov 		return ERR_PTR(-ENOMEM);
146a10423b8SAlexei Starovoitov 	}
147a10423b8SAlexei Starovoitov 
14828fbcfa0SAlexei Starovoitov 	return &array->map;
14928fbcfa0SAlexei Starovoitov }
15028fbcfa0SAlexei Starovoitov 
15128fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
15228fbcfa0SAlexei Starovoitov static void *array_map_lookup_elem(struct bpf_map *map, void *key)
15328fbcfa0SAlexei Starovoitov {
15428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
15528fbcfa0SAlexei Starovoitov 	u32 index = *(u32 *)key;
15628fbcfa0SAlexei Starovoitov 
157a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
15828fbcfa0SAlexei Starovoitov 		return NULL;
15928fbcfa0SAlexei Starovoitov 
160b2157399SAlexei Starovoitov 	return array->value + array->elem_size * (index & array->index_mask);
16128fbcfa0SAlexei Starovoitov }
16228fbcfa0SAlexei Starovoitov 
16381ed18abSAlexei Starovoitov /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
16481ed18abSAlexei Starovoitov static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
16581ed18abSAlexei Starovoitov {
166b2157399SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
16781ed18abSAlexei Starovoitov 	struct bpf_insn *insn = insn_buf;
168fad73a1aSMartin KaFai Lau 	u32 elem_size = round_up(map->value_size, 8);
16981ed18abSAlexei Starovoitov 	const int ret = BPF_REG_0;
17081ed18abSAlexei Starovoitov 	const int map_ptr = BPF_REG_1;
17181ed18abSAlexei Starovoitov 	const int index = BPF_REG_2;
17281ed18abSAlexei Starovoitov 
17381ed18abSAlexei Starovoitov 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
17481ed18abSAlexei Starovoitov 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
175b2157399SAlexei Starovoitov 	if (map->unpriv_array) {
176b2157399SAlexei Starovoitov 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
177b2157399SAlexei Starovoitov 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
178b2157399SAlexei Starovoitov 	} else {
179fad73a1aSMartin KaFai Lau 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
180b2157399SAlexei Starovoitov 	}
181fad73a1aSMartin KaFai Lau 
182fad73a1aSMartin KaFai Lau 	if (is_power_of_2(elem_size)) {
18381ed18abSAlexei Starovoitov 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
18481ed18abSAlexei Starovoitov 	} else {
18581ed18abSAlexei Starovoitov 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
18681ed18abSAlexei Starovoitov 	}
18781ed18abSAlexei Starovoitov 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
18881ed18abSAlexei Starovoitov 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
18981ed18abSAlexei Starovoitov 	*insn++ = BPF_MOV64_IMM(ret, 0);
19081ed18abSAlexei Starovoitov 	return insn - insn_buf;
19181ed18abSAlexei Starovoitov }
19281ed18abSAlexei Starovoitov 
193a10423b8SAlexei Starovoitov /* Called from eBPF program */
194a10423b8SAlexei Starovoitov static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
195a10423b8SAlexei Starovoitov {
196a10423b8SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
197a10423b8SAlexei Starovoitov 	u32 index = *(u32 *)key;
198a10423b8SAlexei Starovoitov 
199a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
200a10423b8SAlexei Starovoitov 		return NULL;
201a10423b8SAlexei Starovoitov 
202b2157399SAlexei Starovoitov 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
203a10423b8SAlexei Starovoitov }
204a10423b8SAlexei Starovoitov 
20515a07b33SAlexei Starovoitov int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
20615a07b33SAlexei Starovoitov {
20715a07b33SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
20815a07b33SAlexei Starovoitov 	u32 index = *(u32 *)key;
20915a07b33SAlexei Starovoitov 	void __percpu *pptr;
21015a07b33SAlexei Starovoitov 	int cpu, off = 0;
21115a07b33SAlexei Starovoitov 	u32 size;
21215a07b33SAlexei Starovoitov 
21315a07b33SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
21415a07b33SAlexei Starovoitov 		return -ENOENT;
21515a07b33SAlexei Starovoitov 
21615a07b33SAlexei Starovoitov 	/* per_cpu areas are zero-filled and bpf programs can only
21715a07b33SAlexei Starovoitov 	 * access 'value_size' of them, so copying rounded areas
21815a07b33SAlexei Starovoitov 	 * will not leak any kernel data
21915a07b33SAlexei Starovoitov 	 */
22015a07b33SAlexei Starovoitov 	size = round_up(map->value_size, 8);
22115a07b33SAlexei Starovoitov 	rcu_read_lock();
222b2157399SAlexei Starovoitov 	pptr = array->pptrs[index & array->index_mask];
22315a07b33SAlexei Starovoitov 	for_each_possible_cpu(cpu) {
22415a07b33SAlexei Starovoitov 		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
22515a07b33SAlexei Starovoitov 		off += size;
22615a07b33SAlexei Starovoitov 	}
22715a07b33SAlexei Starovoitov 	rcu_read_unlock();
22815a07b33SAlexei Starovoitov 	return 0;
22915a07b33SAlexei Starovoitov }
23015a07b33SAlexei Starovoitov 
23128fbcfa0SAlexei Starovoitov /* Called from syscall */
23228fbcfa0SAlexei Starovoitov static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
23328fbcfa0SAlexei Starovoitov {
23428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
2358fe45924STeng Qin 	u32 index = key ? *(u32 *)key : U32_MAX;
23628fbcfa0SAlexei Starovoitov 	u32 *next = (u32 *)next_key;
23728fbcfa0SAlexei Starovoitov 
23828fbcfa0SAlexei Starovoitov 	if (index >= array->map.max_entries) {
23928fbcfa0SAlexei Starovoitov 		*next = 0;
24028fbcfa0SAlexei Starovoitov 		return 0;
24128fbcfa0SAlexei Starovoitov 	}
24228fbcfa0SAlexei Starovoitov 
24328fbcfa0SAlexei Starovoitov 	if (index == array->map.max_entries - 1)
24428fbcfa0SAlexei Starovoitov 		return -ENOENT;
24528fbcfa0SAlexei Starovoitov 
24628fbcfa0SAlexei Starovoitov 	*next = index + 1;
24728fbcfa0SAlexei Starovoitov 	return 0;
24828fbcfa0SAlexei Starovoitov }
24928fbcfa0SAlexei Starovoitov 
25028fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
25128fbcfa0SAlexei Starovoitov static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
25228fbcfa0SAlexei Starovoitov 				 u64 map_flags)
25328fbcfa0SAlexei Starovoitov {
25428fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
25528fbcfa0SAlexei Starovoitov 	u32 index = *(u32 *)key;
25628fbcfa0SAlexei Starovoitov 
257a10423b8SAlexei Starovoitov 	if (unlikely(map_flags > BPF_EXIST))
25828fbcfa0SAlexei Starovoitov 		/* unknown flags */
25928fbcfa0SAlexei Starovoitov 		return -EINVAL;
26028fbcfa0SAlexei Starovoitov 
261a10423b8SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
26228fbcfa0SAlexei Starovoitov 		/* all elements were pre-allocated, cannot insert a new one */
26328fbcfa0SAlexei Starovoitov 		return -E2BIG;
26428fbcfa0SAlexei Starovoitov 
265a10423b8SAlexei Starovoitov 	if (unlikely(map_flags == BPF_NOEXIST))
266daaf427cSAlexei Starovoitov 		/* all elements already exist */
26728fbcfa0SAlexei Starovoitov 		return -EEXIST;
26828fbcfa0SAlexei Starovoitov 
269a10423b8SAlexei Starovoitov 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
270b2157399SAlexei Starovoitov 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
271a10423b8SAlexei Starovoitov 		       value, map->value_size);
272a10423b8SAlexei Starovoitov 	else
273b2157399SAlexei Starovoitov 		memcpy(array->value +
274b2157399SAlexei Starovoitov 		       array->elem_size * (index & array->index_mask),
275a10423b8SAlexei Starovoitov 		       value, map->value_size);
27628fbcfa0SAlexei Starovoitov 	return 0;
27728fbcfa0SAlexei Starovoitov }
27828fbcfa0SAlexei Starovoitov 
27915a07b33SAlexei Starovoitov int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
28015a07b33SAlexei Starovoitov 			    u64 map_flags)
28115a07b33SAlexei Starovoitov {
28215a07b33SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
28315a07b33SAlexei Starovoitov 	u32 index = *(u32 *)key;
28415a07b33SAlexei Starovoitov 	void __percpu *pptr;
28515a07b33SAlexei Starovoitov 	int cpu, off = 0;
28615a07b33SAlexei Starovoitov 	u32 size;
28715a07b33SAlexei Starovoitov 
28815a07b33SAlexei Starovoitov 	if (unlikely(map_flags > BPF_EXIST))
28915a07b33SAlexei Starovoitov 		/* unknown flags */
29015a07b33SAlexei Starovoitov 		return -EINVAL;
29115a07b33SAlexei Starovoitov 
29215a07b33SAlexei Starovoitov 	if (unlikely(index >= array->map.max_entries))
29315a07b33SAlexei Starovoitov 		/* all elements were pre-allocated, cannot insert a new one */
29415a07b33SAlexei Starovoitov 		return -E2BIG;
29515a07b33SAlexei Starovoitov 
29615a07b33SAlexei Starovoitov 	if (unlikely(map_flags == BPF_NOEXIST))
29715a07b33SAlexei Starovoitov 		/* all elements already exist */
29815a07b33SAlexei Starovoitov 		return -EEXIST;
29915a07b33SAlexei Starovoitov 
30015a07b33SAlexei Starovoitov 	/* the user space will provide round_up(value_size, 8) bytes that
30115a07b33SAlexei Starovoitov 	 * will be copied into per-cpu area. bpf programs can only access
30215a07b33SAlexei Starovoitov 	 * value_size of it. During lookup the same extra bytes will be
30315a07b33SAlexei Starovoitov 	 * returned or zeros which were zero-filled by percpu_alloc,
30415a07b33SAlexei Starovoitov 	 * so no kernel data leaks possible
30515a07b33SAlexei Starovoitov 	 */
30615a07b33SAlexei Starovoitov 	size = round_up(map->value_size, 8);
30715a07b33SAlexei Starovoitov 	rcu_read_lock();
308b2157399SAlexei Starovoitov 	pptr = array->pptrs[index & array->index_mask];
30915a07b33SAlexei Starovoitov 	for_each_possible_cpu(cpu) {
31015a07b33SAlexei Starovoitov 		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
31115a07b33SAlexei Starovoitov 		off += size;
31215a07b33SAlexei Starovoitov 	}
31315a07b33SAlexei Starovoitov 	rcu_read_unlock();
31415a07b33SAlexei Starovoitov 	return 0;
31515a07b33SAlexei Starovoitov }
31615a07b33SAlexei Starovoitov 
31728fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */
31828fbcfa0SAlexei Starovoitov static int array_map_delete_elem(struct bpf_map *map, void *key)
31928fbcfa0SAlexei Starovoitov {
32028fbcfa0SAlexei Starovoitov 	return -EINVAL;
32128fbcfa0SAlexei Starovoitov }
32228fbcfa0SAlexei Starovoitov 
32328fbcfa0SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
32428fbcfa0SAlexei Starovoitov static void array_map_free(struct bpf_map *map)
32528fbcfa0SAlexei Starovoitov {
32628fbcfa0SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
32728fbcfa0SAlexei Starovoitov 
32828fbcfa0SAlexei Starovoitov 	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
32928fbcfa0SAlexei Starovoitov 	 * so the programs (can be more than one that used this map) were
33028fbcfa0SAlexei Starovoitov 	 * disconnected from events. Wait for outstanding programs to complete
33128fbcfa0SAlexei Starovoitov 	 * and free the array
33228fbcfa0SAlexei Starovoitov 	 */
33328fbcfa0SAlexei Starovoitov 	synchronize_rcu();
33428fbcfa0SAlexei Starovoitov 
335a10423b8SAlexei Starovoitov 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
336a10423b8SAlexei Starovoitov 		bpf_array_free_percpu(array);
337a10423b8SAlexei Starovoitov 
338d407bd25SDaniel Borkmann 	bpf_map_area_free(array);
33928fbcfa0SAlexei Starovoitov }
34028fbcfa0SAlexei Starovoitov 
341a26ca7c9SMartin KaFai Lau static void array_map_seq_show_elem(struct bpf_map *map, void *key,
342a26ca7c9SMartin KaFai Lau 				    struct seq_file *m)
343a26ca7c9SMartin KaFai Lau {
344a26ca7c9SMartin KaFai Lau 	void *value;
345a26ca7c9SMartin KaFai Lau 
346a26ca7c9SMartin KaFai Lau 	rcu_read_lock();
347a26ca7c9SMartin KaFai Lau 
348a26ca7c9SMartin KaFai Lau 	value = array_map_lookup_elem(map, key);
349a26ca7c9SMartin KaFai Lau 	if (!value) {
350a26ca7c9SMartin KaFai Lau 		rcu_read_unlock();
351a26ca7c9SMartin KaFai Lau 		return;
352a26ca7c9SMartin KaFai Lau 	}
353a26ca7c9SMartin KaFai Lau 
354a26ca7c9SMartin KaFai Lau 	seq_printf(m, "%u: ", *(u32 *)key);
355*9b2cf328SMartin KaFai Lau 	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
356a26ca7c9SMartin KaFai Lau 	seq_puts(m, "\n");
357a26ca7c9SMartin KaFai Lau 
358a26ca7c9SMartin KaFai Lau 	rcu_read_unlock();
359a26ca7c9SMartin KaFai Lau }
360a26ca7c9SMartin KaFai Lau 
361a26ca7c9SMartin KaFai Lau static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
362a26ca7c9SMartin KaFai Lau 			       u32 btf_key_id, u32 btf_value_id)
363a26ca7c9SMartin KaFai Lau {
364a26ca7c9SMartin KaFai Lau 	const struct btf_type *key_type, *value_type;
365a26ca7c9SMartin KaFai Lau 	u32 key_size, value_size;
366a26ca7c9SMartin KaFai Lau 	u32 int_data;
367a26ca7c9SMartin KaFai Lau 
368a26ca7c9SMartin KaFai Lau 	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
369a26ca7c9SMartin KaFai Lau 	if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
370a26ca7c9SMartin KaFai Lau 		return -EINVAL;
371a26ca7c9SMartin KaFai Lau 
372a26ca7c9SMartin KaFai Lau 	int_data = *(u32 *)(key_type + 1);
373a26ca7c9SMartin KaFai Lau 	/* bpf array can only take a u32 key.  This check makes
374a26ca7c9SMartin KaFai Lau 	 * sure that the btf matches the attr used during map_create.
375a26ca7c9SMartin KaFai Lau 	 */
376a26ca7c9SMartin KaFai Lau 	if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
377a26ca7c9SMartin KaFai Lau 	    BTF_INT_OFFSET(int_data))
378a26ca7c9SMartin KaFai Lau 		return -EINVAL;
379a26ca7c9SMartin KaFai Lau 
380a26ca7c9SMartin KaFai Lau 	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
381a26ca7c9SMartin KaFai Lau 	if (!value_type || value_size > map->value_size)
382a26ca7c9SMartin KaFai Lau 		return -EINVAL;
383a26ca7c9SMartin KaFai Lau 
384a26ca7c9SMartin KaFai Lau 	return 0;
385a26ca7c9SMartin KaFai Lau }
386a26ca7c9SMartin KaFai Lau 
38740077e0cSJohannes Berg const struct bpf_map_ops array_map_ops = {
388ad46061fSJakub Kicinski 	.map_alloc_check = array_map_alloc_check,
38928fbcfa0SAlexei Starovoitov 	.map_alloc = array_map_alloc,
39028fbcfa0SAlexei Starovoitov 	.map_free = array_map_free,
39128fbcfa0SAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
39228fbcfa0SAlexei Starovoitov 	.map_lookup_elem = array_map_lookup_elem,
39328fbcfa0SAlexei Starovoitov 	.map_update_elem = array_map_update_elem,
39428fbcfa0SAlexei Starovoitov 	.map_delete_elem = array_map_delete_elem,
39581ed18abSAlexei Starovoitov 	.map_gen_lookup = array_map_gen_lookup,
396a26ca7c9SMartin KaFai Lau 	.map_seq_show_elem = array_map_seq_show_elem,
397a26ca7c9SMartin KaFai Lau 	.map_check_btf = array_map_check_btf,
39828fbcfa0SAlexei Starovoitov };
39928fbcfa0SAlexei Starovoitov 
40040077e0cSJohannes Berg const struct bpf_map_ops percpu_array_map_ops = {
401ad46061fSJakub Kicinski 	.map_alloc_check = array_map_alloc_check,
402a10423b8SAlexei Starovoitov 	.map_alloc = array_map_alloc,
403a10423b8SAlexei Starovoitov 	.map_free = array_map_free,
404a10423b8SAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
405a10423b8SAlexei Starovoitov 	.map_lookup_elem = percpu_array_map_lookup_elem,
406a10423b8SAlexei Starovoitov 	.map_update_elem = array_map_update_elem,
407a10423b8SAlexei Starovoitov 	.map_delete_elem = array_map_delete_elem,
408a10423b8SAlexei Starovoitov };
409a10423b8SAlexei Starovoitov 
410ad46061fSJakub Kicinski static int fd_array_map_alloc_check(union bpf_attr *attr)
41104fd61abSAlexei Starovoitov {
4122a36f0b9SWang Nan 	/* only file descriptors can be stored in this type of map */
41304fd61abSAlexei Starovoitov 	if (attr->value_size != sizeof(u32))
414ad46061fSJakub Kicinski 		return -EINVAL;
415ad46061fSJakub Kicinski 	return array_map_alloc_check(attr);
41604fd61abSAlexei Starovoitov }
41704fd61abSAlexei Starovoitov 
4182a36f0b9SWang Nan static void fd_array_map_free(struct bpf_map *map)
41904fd61abSAlexei Starovoitov {
42004fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
42104fd61abSAlexei Starovoitov 	int i;
42204fd61abSAlexei Starovoitov 
42304fd61abSAlexei Starovoitov 	synchronize_rcu();
42404fd61abSAlexei Starovoitov 
42504fd61abSAlexei Starovoitov 	/* make sure it's empty */
42604fd61abSAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++)
4272a36f0b9SWang Nan 		BUG_ON(array->ptrs[i] != NULL);
428d407bd25SDaniel Borkmann 
429d407bd25SDaniel Borkmann 	bpf_map_area_free(array);
43004fd61abSAlexei Starovoitov }
43104fd61abSAlexei Starovoitov 
4322a36f0b9SWang Nan static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
43304fd61abSAlexei Starovoitov {
43404fd61abSAlexei Starovoitov 	return NULL;
43504fd61abSAlexei Starovoitov }
43604fd61abSAlexei Starovoitov 
43704fd61abSAlexei Starovoitov /* only called from syscall */
43814dc6f04SMartin KaFai Lau int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
43914dc6f04SMartin KaFai Lau {
44014dc6f04SMartin KaFai Lau 	void **elem, *ptr;
44114dc6f04SMartin KaFai Lau 	int ret =  0;
44214dc6f04SMartin KaFai Lau 
44314dc6f04SMartin KaFai Lau 	if (!map->ops->map_fd_sys_lookup_elem)
44414dc6f04SMartin KaFai Lau 		return -ENOTSUPP;
44514dc6f04SMartin KaFai Lau 
44614dc6f04SMartin KaFai Lau 	rcu_read_lock();
44714dc6f04SMartin KaFai Lau 	elem = array_map_lookup_elem(map, key);
44814dc6f04SMartin KaFai Lau 	if (elem && (ptr = READ_ONCE(*elem)))
44914dc6f04SMartin KaFai Lau 		*value = map->ops->map_fd_sys_lookup_elem(ptr);
45014dc6f04SMartin KaFai Lau 	else
45114dc6f04SMartin KaFai Lau 		ret = -ENOENT;
45214dc6f04SMartin KaFai Lau 	rcu_read_unlock();
45314dc6f04SMartin KaFai Lau 
45414dc6f04SMartin KaFai Lau 	return ret;
45514dc6f04SMartin KaFai Lau }
45614dc6f04SMartin KaFai Lau 
45714dc6f04SMartin KaFai Lau /* only called from syscall */
458d056a788SDaniel Borkmann int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
459d056a788SDaniel Borkmann 				 void *key, void *value, u64 map_flags)
46004fd61abSAlexei Starovoitov {
46104fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
4622a36f0b9SWang Nan 	void *new_ptr, *old_ptr;
46304fd61abSAlexei Starovoitov 	u32 index = *(u32 *)key, ufd;
46404fd61abSAlexei Starovoitov 
46504fd61abSAlexei Starovoitov 	if (map_flags != BPF_ANY)
46604fd61abSAlexei Starovoitov 		return -EINVAL;
46704fd61abSAlexei Starovoitov 
46804fd61abSAlexei Starovoitov 	if (index >= array->map.max_entries)
46904fd61abSAlexei Starovoitov 		return -E2BIG;
47004fd61abSAlexei Starovoitov 
47104fd61abSAlexei Starovoitov 	ufd = *(u32 *)value;
472d056a788SDaniel Borkmann 	new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
4732a36f0b9SWang Nan 	if (IS_ERR(new_ptr))
4742a36f0b9SWang Nan 		return PTR_ERR(new_ptr);
47504fd61abSAlexei Starovoitov 
4762a36f0b9SWang Nan 	old_ptr = xchg(array->ptrs + index, new_ptr);
4772a36f0b9SWang Nan 	if (old_ptr)
4782a36f0b9SWang Nan 		map->ops->map_fd_put_ptr(old_ptr);
47904fd61abSAlexei Starovoitov 
48004fd61abSAlexei Starovoitov 	return 0;
48104fd61abSAlexei Starovoitov }
48204fd61abSAlexei Starovoitov 
4832a36f0b9SWang Nan static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
48404fd61abSAlexei Starovoitov {
48504fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
4862a36f0b9SWang Nan 	void *old_ptr;
48704fd61abSAlexei Starovoitov 	u32 index = *(u32 *)key;
48804fd61abSAlexei Starovoitov 
48904fd61abSAlexei Starovoitov 	if (index >= array->map.max_entries)
49004fd61abSAlexei Starovoitov 		return -E2BIG;
49104fd61abSAlexei Starovoitov 
4922a36f0b9SWang Nan 	old_ptr = xchg(array->ptrs + index, NULL);
4932a36f0b9SWang Nan 	if (old_ptr) {
4942a36f0b9SWang Nan 		map->ops->map_fd_put_ptr(old_ptr);
49504fd61abSAlexei Starovoitov 		return 0;
49604fd61abSAlexei Starovoitov 	} else {
49704fd61abSAlexei Starovoitov 		return -ENOENT;
49804fd61abSAlexei Starovoitov 	}
49904fd61abSAlexei Starovoitov }
50004fd61abSAlexei Starovoitov 
501d056a788SDaniel Borkmann static void *prog_fd_array_get_ptr(struct bpf_map *map,
502d056a788SDaniel Borkmann 				   struct file *map_file, int fd)
5032a36f0b9SWang Nan {
5042a36f0b9SWang Nan 	struct bpf_array *array = container_of(map, struct bpf_array, map);
5052a36f0b9SWang Nan 	struct bpf_prog *prog = bpf_prog_get(fd);
506d056a788SDaniel Borkmann 
5072a36f0b9SWang Nan 	if (IS_ERR(prog))
5082a36f0b9SWang Nan 		return prog;
5092a36f0b9SWang Nan 
5102a36f0b9SWang Nan 	if (!bpf_prog_array_compatible(array, prog)) {
5112a36f0b9SWang Nan 		bpf_prog_put(prog);
5122a36f0b9SWang Nan 		return ERR_PTR(-EINVAL);
5132a36f0b9SWang Nan 	}
514d056a788SDaniel Borkmann 
5152a36f0b9SWang Nan 	return prog;
5162a36f0b9SWang Nan }
5172a36f0b9SWang Nan 
5182a36f0b9SWang Nan static void prog_fd_array_put_ptr(void *ptr)
5192a36f0b9SWang Nan {
5201aacde3dSDaniel Borkmann 	bpf_prog_put(ptr);
5212a36f0b9SWang Nan }
5222a36f0b9SWang Nan 
52314dc6f04SMartin KaFai Lau static u32 prog_fd_array_sys_lookup_elem(void *ptr)
52414dc6f04SMartin KaFai Lau {
52514dc6f04SMartin KaFai Lau 	return ((struct bpf_prog *)ptr)->aux->id;
52614dc6f04SMartin KaFai Lau }
52714dc6f04SMartin KaFai Lau 
52804fd61abSAlexei Starovoitov /* decrement refcnt of all bpf_progs that are stored in this map */
529ba6b8de4SJohn Fastabend static void bpf_fd_array_map_clear(struct bpf_map *map)
53004fd61abSAlexei Starovoitov {
53104fd61abSAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
53204fd61abSAlexei Starovoitov 	int i;
53304fd61abSAlexei Starovoitov 
53404fd61abSAlexei Starovoitov 	for (i = 0; i < array->map.max_entries; i++)
5352a36f0b9SWang Nan 		fd_array_map_delete_elem(map, &i);
53604fd61abSAlexei Starovoitov }
53704fd61abSAlexei Starovoitov 
53840077e0cSJohannes Berg const struct bpf_map_ops prog_array_map_ops = {
539ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
540ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
5412a36f0b9SWang Nan 	.map_free = fd_array_map_free,
54204fd61abSAlexei Starovoitov 	.map_get_next_key = array_map_get_next_key,
5432a36f0b9SWang Nan 	.map_lookup_elem = fd_array_map_lookup_elem,
5442a36f0b9SWang Nan 	.map_delete_elem = fd_array_map_delete_elem,
5452a36f0b9SWang Nan 	.map_fd_get_ptr = prog_fd_array_get_ptr,
5462a36f0b9SWang Nan 	.map_fd_put_ptr = prog_fd_array_put_ptr,
54714dc6f04SMartin KaFai Lau 	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
548ba6b8de4SJohn Fastabend 	.map_release_uref = bpf_fd_array_map_clear,
54904fd61abSAlexei Starovoitov };
55004fd61abSAlexei Starovoitov 
5513b1efb19SDaniel Borkmann static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
5523b1efb19SDaniel Borkmann 						   struct file *map_file)
553ea317b26SKaixu Xia {
5543b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
5553b1efb19SDaniel Borkmann 
556858d68f1SDaniel Borkmann 	ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
5573b1efb19SDaniel Borkmann 	if (ee) {
5583b1efb19SDaniel Borkmann 		ee->event = perf_file->private_data;
5593b1efb19SDaniel Borkmann 		ee->perf_file = perf_file;
5603b1efb19SDaniel Borkmann 		ee->map_file = map_file;
5613b1efb19SDaniel Borkmann 	}
5623b1efb19SDaniel Borkmann 
5633b1efb19SDaniel Borkmann 	return ee;
5643b1efb19SDaniel Borkmann }
5653b1efb19SDaniel Borkmann 
5663b1efb19SDaniel Borkmann static void __bpf_event_entry_free(struct rcu_head *rcu)
5673b1efb19SDaniel Borkmann {
5683b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
5693b1efb19SDaniel Borkmann 
5703b1efb19SDaniel Borkmann 	ee = container_of(rcu, struct bpf_event_entry, rcu);
5713b1efb19SDaniel Borkmann 	fput(ee->perf_file);
5723b1efb19SDaniel Borkmann 	kfree(ee);
5733b1efb19SDaniel Borkmann }
5743b1efb19SDaniel Borkmann 
5753b1efb19SDaniel Borkmann static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
5763b1efb19SDaniel Borkmann {
5773b1efb19SDaniel Borkmann 	call_rcu(&ee->rcu, __bpf_event_entry_free);
578ea317b26SKaixu Xia }
579ea317b26SKaixu Xia 
580d056a788SDaniel Borkmann static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
581d056a788SDaniel Borkmann 					 struct file *map_file, int fd)
582ea317b26SKaixu Xia {
5833b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
5843b1efb19SDaniel Borkmann 	struct perf_event *event;
5853b1efb19SDaniel Borkmann 	struct file *perf_file;
586f91840a3SAlexei Starovoitov 	u64 value;
587ea317b26SKaixu Xia 
5883b1efb19SDaniel Borkmann 	perf_file = perf_event_get(fd);
5893b1efb19SDaniel Borkmann 	if (IS_ERR(perf_file))
5903b1efb19SDaniel Borkmann 		return perf_file;
591e03e7ee3SAlexei Starovoitov 
592f91840a3SAlexei Starovoitov 	ee = ERR_PTR(-EOPNOTSUPP);
5933b1efb19SDaniel Borkmann 	event = perf_file->private_data;
59497562633SYonghong Song 	if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
5953b1efb19SDaniel Borkmann 		goto err_out;
596ea317b26SKaixu Xia 
5973b1efb19SDaniel Borkmann 	ee = bpf_event_entry_gen(perf_file, map_file);
5983b1efb19SDaniel Borkmann 	if (ee)
5993b1efb19SDaniel Borkmann 		return ee;
6003b1efb19SDaniel Borkmann 	ee = ERR_PTR(-ENOMEM);
6013b1efb19SDaniel Borkmann err_out:
6023b1efb19SDaniel Borkmann 	fput(perf_file);
6033b1efb19SDaniel Borkmann 	return ee;
604ea317b26SKaixu Xia }
605ea317b26SKaixu Xia 
606ea317b26SKaixu Xia static void perf_event_fd_array_put_ptr(void *ptr)
607ea317b26SKaixu Xia {
6083b1efb19SDaniel Borkmann 	bpf_event_entry_free_rcu(ptr);
6093b1efb19SDaniel Borkmann }
6103b1efb19SDaniel Borkmann 
6113b1efb19SDaniel Borkmann static void perf_event_fd_array_release(struct bpf_map *map,
6123b1efb19SDaniel Borkmann 					struct file *map_file)
6133b1efb19SDaniel Borkmann {
6143b1efb19SDaniel Borkmann 	struct bpf_array *array = container_of(map, struct bpf_array, map);
6153b1efb19SDaniel Borkmann 	struct bpf_event_entry *ee;
6163b1efb19SDaniel Borkmann 	int i;
6173b1efb19SDaniel Borkmann 
6183b1efb19SDaniel Borkmann 	rcu_read_lock();
6193b1efb19SDaniel Borkmann 	for (i = 0; i < array->map.max_entries; i++) {
6203b1efb19SDaniel Borkmann 		ee = READ_ONCE(array->ptrs[i]);
6213b1efb19SDaniel Borkmann 		if (ee && ee->map_file == map_file)
6223b1efb19SDaniel Borkmann 			fd_array_map_delete_elem(map, &i);
6233b1efb19SDaniel Borkmann 	}
6243b1efb19SDaniel Borkmann 	rcu_read_unlock();
625ea317b26SKaixu Xia }
626ea317b26SKaixu Xia 
62740077e0cSJohannes Berg const struct bpf_map_ops perf_event_array_map_ops = {
628ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
629ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
6303b1efb19SDaniel Borkmann 	.map_free = fd_array_map_free,
631ea317b26SKaixu Xia 	.map_get_next_key = array_map_get_next_key,
632ea317b26SKaixu Xia 	.map_lookup_elem = fd_array_map_lookup_elem,
633ea317b26SKaixu Xia 	.map_delete_elem = fd_array_map_delete_elem,
634ea317b26SKaixu Xia 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
635ea317b26SKaixu Xia 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
6363b1efb19SDaniel Borkmann 	.map_release = perf_event_fd_array_release,
637ea317b26SKaixu Xia };
638ea317b26SKaixu Xia 
63960d20f91SSargun Dhillon #ifdef CONFIG_CGROUPS
6404ed8ec52SMartin KaFai Lau static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
6414ed8ec52SMartin KaFai Lau 				     struct file *map_file /* not used */,
6424ed8ec52SMartin KaFai Lau 				     int fd)
6434ed8ec52SMartin KaFai Lau {
6444ed8ec52SMartin KaFai Lau 	return cgroup_get_from_fd(fd);
6454ed8ec52SMartin KaFai Lau }
6464ed8ec52SMartin KaFai Lau 
6474ed8ec52SMartin KaFai Lau static void cgroup_fd_array_put_ptr(void *ptr)
6484ed8ec52SMartin KaFai Lau {
6494ed8ec52SMartin KaFai Lau 	/* cgroup_put free cgrp after a rcu grace period */
6504ed8ec52SMartin KaFai Lau 	cgroup_put(ptr);
6514ed8ec52SMartin KaFai Lau }
6524ed8ec52SMartin KaFai Lau 
6534ed8ec52SMartin KaFai Lau static void cgroup_fd_array_free(struct bpf_map *map)
6544ed8ec52SMartin KaFai Lau {
6554ed8ec52SMartin KaFai Lau 	bpf_fd_array_map_clear(map);
6564ed8ec52SMartin KaFai Lau 	fd_array_map_free(map);
6574ed8ec52SMartin KaFai Lau }
6584ed8ec52SMartin KaFai Lau 
65940077e0cSJohannes Berg const struct bpf_map_ops cgroup_array_map_ops = {
660ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
661ad46061fSJakub Kicinski 	.map_alloc = array_map_alloc,
6624ed8ec52SMartin KaFai Lau 	.map_free = cgroup_fd_array_free,
6634ed8ec52SMartin KaFai Lau 	.map_get_next_key = array_map_get_next_key,
6644ed8ec52SMartin KaFai Lau 	.map_lookup_elem = fd_array_map_lookup_elem,
6654ed8ec52SMartin KaFai Lau 	.map_delete_elem = fd_array_map_delete_elem,
6664ed8ec52SMartin KaFai Lau 	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
6674ed8ec52SMartin KaFai Lau 	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
6684ed8ec52SMartin KaFai Lau };
6694ed8ec52SMartin KaFai Lau #endif
67056f668dfSMartin KaFai Lau 
67156f668dfSMartin KaFai Lau static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
67256f668dfSMartin KaFai Lau {
67356f668dfSMartin KaFai Lau 	struct bpf_map *map, *inner_map_meta;
67456f668dfSMartin KaFai Lau 
67556f668dfSMartin KaFai Lau 	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
67656f668dfSMartin KaFai Lau 	if (IS_ERR(inner_map_meta))
67756f668dfSMartin KaFai Lau 		return inner_map_meta;
67856f668dfSMartin KaFai Lau 
679ad46061fSJakub Kicinski 	map = array_map_alloc(attr);
68056f668dfSMartin KaFai Lau 	if (IS_ERR(map)) {
68156f668dfSMartin KaFai Lau 		bpf_map_meta_free(inner_map_meta);
68256f668dfSMartin KaFai Lau 		return map;
68356f668dfSMartin KaFai Lau 	}
68456f668dfSMartin KaFai Lau 
68556f668dfSMartin KaFai Lau 	map->inner_map_meta = inner_map_meta;
68656f668dfSMartin KaFai Lau 
68756f668dfSMartin KaFai Lau 	return map;
68856f668dfSMartin KaFai Lau }
68956f668dfSMartin KaFai Lau 
69056f668dfSMartin KaFai Lau static void array_of_map_free(struct bpf_map *map)
69156f668dfSMartin KaFai Lau {
69256f668dfSMartin KaFai Lau 	/* map->inner_map_meta is only accessed by syscall which
69356f668dfSMartin KaFai Lau 	 * is protected by fdget/fdput.
69456f668dfSMartin KaFai Lau 	 */
69556f668dfSMartin KaFai Lau 	bpf_map_meta_free(map->inner_map_meta);
69656f668dfSMartin KaFai Lau 	bpf_fd_array_map_clear(map);
69756f668dfSMartin KaFai Lau 	fd_array_map_free(map);
69856f668dfSMartin KaFai Lau }
69956f668dfSMartin KaFai Lau 
70056f668dfSMartin KaFai Lau static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
70156f668dfSMartin KaFai Lau {
70256f668dfSMartin KaFai Lau 	struct bpf_map **inner_map = array_map_lookup_elem(map, key);
70356f668dfSMartin KaFai Lau 
70456f668dfSMartin KaFai Lau 	if (!inner_map)
70556f668dfSMartin KaFai Lau 		return NULL;
70656f668dfSMartin KaFai Lau 
70756f668dfSMartin KaFai Lau 	return READ_ONCE(*inner_map);
70856f668dfSMartin KaFai Lau }
70956f668dfSMartin KaFai Lau 
7107b0c2a05SDaniel Borkmann static u32 array_of_map_gen_lookup(struct bpf_map *map,
7117b0c2a05SDaniel Borkmann 				   struct bpf_insn *insn_buf)
7127b0c2a05SDaniel Borkmann {
713b2157399SAlexei Starovoitov 	struct bpf_array *array = container_of(map, struct bpf_array, map);
7147b0c2a05SDaniel Borkmann 	u32 elem_size = round_up(map->value_size, 8);
7157b0c2a05SDaniel Borkmann 	struct bpf_insn *insn = insn_buf;
7167b0c2a05SDaniel Borkmann 	const int ret = BPF_REG_0;
7177b0c2a05SDaniel Borkmann 	const int map_ptr = BPF_REG_1;
7187b0c2a05SDaniel Borkmann 	const int index = BPF_REG_2;
7197b0c2a05SDaniel Borkmann 
7207b0c2a05SDaniel Borkmann 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
7217b0c2a05SDaniel Borkmann 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
722b2157399SAlexei Starovoitov 	if (map->unpriv_array) {
723b2157399SAlexei Starovoitov 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
724b2157399SAlexei Starovoitov 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
725b2157399SAlexei Starovoitov 	} else {
7267b0c2a05SDaniel Borkmann 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
727b2157399SAlexei Starovoitov 	}
7287b0c2a05SDaniel Borkmann 	if (is_power_of_2(elem_size))
7297b0c2a05SDaniel Borkmann 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
7307b0c2a05SDaniel Borkmann 	else
7317b0c2a05SDaniel Borkmann 		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
7327b0c2a05SDaniel Borkmann 	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
7337b0c2a05SDaniel Borkmann 	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
7347b0c2a05SDaniel Borkmann 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
7357b0c2a05SDaniel Borkmann 	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
7367b0c2a05SDaniel Borkmann 	*insn++ = BPF_MOV64_IMM(ret, 0);
7377b0c2a05SDaniel Borkmann 
7387b0c2a05SDaniel Borkmann 	return insn - insn_buf;
7397b0c2a05SDaniel Borkmann }
7407b0c2a05SDaniel Borkmann 
74140077e0cSJohannes Berg const struct bpf_map_ops array_of_maps_map_ops = {
742ad46061fSJakub Kicinski 	.map_alloc_check = fd_array_map_alloc_check,
74356f668dfSMartin KaFai Lau 	.map_alloc = array_of_map_alloc,
74456f668dfSMartin KaFai Lau 	.map_free = array_of_map_free,
74556f668dfSMartin KaFai Lau 	.map_get_next_key = array_map_get_next_key,
74656f668dfSMartin KaFai Lau 	.map_lookup_elem = array_of_map_lookup_elem,
74756f668dfSMartin KaFai Lau 	.map_delete_elem = fd_array_map_delete_elem,
74856f668dfSMartin KaFai Lau 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
74956f668dfSMartin KaFai Lau 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
75014dc6f04SMartin KaFai Lau 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
7517b0c2a05SDaniel Borkmann 	.map_gen_lookup = array_of_map_gen_lookup,
75256f668dfSMartin KaFai Lau };
753