xref: /linux-6.15/kernel/bpf/bpf_struct_ops.c (revision 8d9f547f)
127ae7997SMartin KaFai Lau // SPDX-License-Identifier: GPL-2.0-only
227ae7997SMartin KaFai Lau /* Copyright (c) 2019 Facebook */
327ae7997SMartin KaFai Lau 
427ae7997SMartin KaFai Lau #include <linux/bpf.h>
527ae7997SMartin KaFai Lau #include <linux/bpf_verifier.h>
627ae7997SMartin KaFai Lau #include <linux/btf.h>
727ae7997SMartin KaFai Lau #include <linux/filter.h>
827ae7997SMartin KaFai Lau #include <linux/slab.h>
927ae7997SMartin KaFai Lau #include <linux/numa.h>
1027ae7997SMartin KaFai Lau #include <linux/seq_file.h>
1127ae7997SMartin KaFai Lau #include <linux/refcount.h>
1285d33df3SMartin KaFai Lau #include <linux/mutex.h>
13c317ab71SMenglong Dong #include <linux/btf_ids.h>
14b671c206SKui-Feng Lee #include <linux/rcupdate_wait.h>
151adddc97SKui-Feng Lee #include <linux/poll.h>
1627ae7997SMartin KaFai Lau 
1785d33df3SMartin KaFai Lau struct bpf_struct_ops_value {
18612d087dSKui-Feng Lee 	struct bpf_struct_ops_common_value common;
19d7f10df8SGustavo A. R. Silva 	char data[] ____cacheline_aligned_in_smp;
2085d33df3SMartin KaFai Lau };
2185d33df3SMartin KaFai Lau 
22187e2af0SKui-Feng Lee #define MAX_TRAMP_IMAGE_PAGES 8
23187e2af0SKui-Feng Lee 
2485d33df3SMartin KaFai Lau struct bpf_struct_ops_map {
2585d33df3SMartin KaFai Lau 	struct bpf_map map;
264c5763edSKui-Feng Lee 	const struct bpf_struct_ops_desc *st_ops_desc;
2785d33df3SMartin KaFai Lau 	/* protect map_update */
2885d33df3SMartin KaFai Lau 	struct mutex lock;
29f7e0beafSKui-Feng Lee 	/* link has all the bpf_links that is populated
3085d33df3SMartin KaFai Lau 	 * to the func ptr of the kernel's struct
3185d33df3SMartin KaFai Lau 	 * (in kvalue.data).
3285d33df3SMartin KaFai Lau 	 */
33f7e0beafSKui-Feng Lee 	struct bpf_link **links;
347c8ce4ffSXu Kuohai 	/* ksyms for bpf trampolines */
357c8ce4ffSXu Kuohai 	struct bpf_ksym **ksyms;
36821a3fa3SXu Kuohai 	u32 funcs_cnt;
37187e2af0SKui-Feng Lee 	u32 image_pages_cnt;
38187e2af0SKui-Feng Lee 	/* image_pages is an array of pages that has all the trampolines
3985d33df3SMartin KaFai Lau 	 * that stores the func args before calling the bpf_prog.
4085d33df3SMartin KaFai Lau 	 */
41187e2af0SKui-Feng Lee 	void *image_pages[MAX_TRAMP_IMAGE_PAGES];
4247f4f657SKui-Feng Lee 	/* The owner moduler's btf. */
4347f4f657SKui-Feng Lee 	struct btf *btf;
4485d33df3SMartin KaFai Lau 	/* uvalue->data stores the kernel struct
4585d33df3SMartin KaFai Lau 	 * (e.g. tcp_congestion_ops) that is more useful
4685d33df3SMartin KaFai Lau 	 * to userspace than the kvalue.  For example,
4785d33df3SMartin KaFai Lau 	 * the bpf_prog's id is stored instead of the kernel
4885d33df3SMartin KaFai Lau 	 * address of a func ptr.
4985d33df3SMartin KaFai Lau 	 */
5085d33df3SMartin KaFai Lau 	struct bpf_struct_ops_value *uvalue;
5185d33df3SMartin KaFai Lau 	/* kvalue.data stores the actual kernel's struct
5285d33df3SMartin KaFai Lau 	 * (e.g. tcp_congestion_ops) that will be
5385d33df3SMartin KaFai Lau 	 * registered to the kernel subsystem.
5485d33df3SMartin KaFai Lau 	 */
5585d33df3SMartin KaFai Lau 	struct bpf_struct_ops_value kvalue;
5685d33df3SMartin KaFai Lau };
5785d33df3SMartin KaFai Lau 
5868b04864SKui-Feng Lee struct bpf_struct_ops_link {
5968b04864SKui-Feng Lee 	struct bpf_link link;
6068b04864SKui-Feng Lee 	struct bpf_map __rcu *map;
611adddc97SKui-Feng Lee 	wait_queue_head_t wait_hup;
6268b04864SKui-Feng Lee };
6368b04864SKui-Feng Lee 
64aef56f2eSKui-Feng Lee static DEFINE_MUTEX(update_mutex);
65aef56f2eSKui-Feng Lee 
6685d33df3SMartin KaFai Lau #define VALUE_PREFIX "bpf_struct_ops_"
6785d33df3SMartin KaFai Lau #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
6885d33df3SMartin KaFai Lau 
6927ae7997SMartin KaFai Lau const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
7027ae7997SMartin KaFai Lau };
7127ae7997SMartin KaFai Lau 
7227ae7997SMartin KaFai Lau const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
73c196906dSHou Tao #ifdef CONFIG_NET
74c196906dSHou Tao 	.test_run = bpf_struct_ops_test_run,
75c196906dSHou Tao #endif
7627ae7997SMartin KaFai Lau };
7727ae7997SMartin KaFai Lau 
7895678395SKui-Feng Lee BTF_ID_LIST(st_ops_ids)
7995678395SKui-Feng Lee BTF_ID(struct, module)
80612d087dSKui-Feng Lee BTF_ID(struct, bpf_struct_ops_common_value)
8195678395SKui-Feng Lee 
8295678395SKui-Feng Lee enum {
8395678395SKui-Feng Lee 	IDX_MODULE_ID,
84612d087dSKui-Feng Lee 	IDX_ST_OPS_COMMON_VALUE_ID,
8595678395SKui-Feng Lee };
8685d33df3SMartin KaFai Lau 
87612d087dSKui-Feng Lee extern struct btf *btf_vmlinux;
88612d087dSKui-Feng Lee 
is_valid_value_type(struct btf * btf,s32 value_id,const struct btf_type * type,const char * value_name)89612d087dSKui-Feng Lee static bool is_valid_value_type(struct btf *btf, s32 value_id,
90612d087dSKui-Feng Lee 				const struct btf_type *type,
91612d087dSKui-Feng Lee 				const char *value_name)
92612d087dSKui-Feng Lee {
93612d087dSKui-Feng Lee 	const struct btf_type *common_value_type;
94612d087dSKui-Feng Lee 	const struct btf_member *member;
95612d087dSKui-Feng Lee 	const struct btf_type *vt, *mt;
96612d087dSKui-Feng Lee 
97612d087dSKui-Feng Lee 	vt = btf_type_by_id(btf, value_id);
98612d087dSKui-Feng Lee 	if (btf_vlen(vt) != 2) {
99612d087dSKui-Feng Lee 		pr_warn("The number of %s's members should be 2, but we get %d\n",
100612d087dSKui-Feng Lee 			value_name, btf_vlen(vt));
101612d087dSKui-Feng Lee 		return false;
102612d087dSKui-Feng Lee 	}
103612d087dSKui-Feng Lee 	member = btf_type_member(vt);
104612d087dSKui-Feng Lee 	mt = btf_type_by_id(btf, member->type);
105612d087dSKui-Feng Lee 	common_value_type = btf_type_by_id(btf_vmlinux,
106612d087dSKui-Feng Lee 					   st_ops_ids[IDX_ST_OPS_COMMON_VALUE_ID]);
107612d087dSKui-Feng Lee 	if (mt != common_value_type) {
108612d087dSKui-Feng Lee 		pr_warn("The first member of %s should be bpf_struct_ops_common_value\n",
109612d087dSKui-Feng Lee 			value_name);
110612d087dSKui-Feng Lee 		return false;
111612d087dSKui-Feng Lee 	}
112612d087dSKui-Feng Lee 	member++;
113612d087dSKui-Feng Lee 	mt = btf_type_by_id(btf, member->type);
114612d087dSKui-Feng Lee 	if (mt != type) {
115612d087dSKui-Feng Lee 		pr_warn("The second member of %s should be %s\n",
116612d087dSKui-Feng Lee 			value_name, btf_name_by_offset(btf, type->name_off));
117612d087dSKui-Feng Lee 		return false;
118612d087dSKui-Feng Lee 	}
119612d087dSKui-Feng Lee 
120612d087dSKui-Feng Lee 	return true;
121612d087dSKui-Feng Lee }
122612d087dSKui-Feng Lee 
bpf_struct_ops_image_alloc(void)123187e2af0SKui-Feng Lee static void *bpf_struct_ops_image_alloc(void)
124187e2af0SKui-Feng Lee {
125187e2af0SKui-Feng Lee 	void *image;
126187e2af0SKui-Feng Lee 	int err;
127187e2af0SKui-Feng Lee 
128187e2af0SKui-Feng Lee 	err = bpf_jit_charge_modmem(PAGE_SIZE);
129187e2af0SKui-Feng Lee 	if (err)
130187e2af0SKui-Feng Lee 		return ERR_PTR(err);
131187e2af0SKui-Feng Lee 	image = arch_alloc_bpf_trampoline(PAGE_SIZE);
132187e2af0SKui-Feng Lee 	if (!image) {
133187e2af0SKui-Feng Lee 		bpf_jit_uncharge_modmem(PAGE_SIZE);
134187e2af0SKui-Feng Lee 		return ERR_PTR(-ENOMEM);
135187e2af0SKui-Feng Lee 	}
136187e2af0SKui-Feng Lee 
137187e2af0SKui-Feng Lee 	return image;
138187e2af0SKui-Feng Lee }
139187e2af0SKui-Feng Lee 
bpf_struct_ops_image_free(void * image)140187e2af0SKui-Feng Lee void bpf_struct_ops_image_free(void *image)
141187e2af0SKui-Feng Lee {
142187e2af0SKui-Feng Lee 	if (image) {
143187e2af0SKui-Feng Lee 		arch_free_bpf_trampoline(image, PAGE_SIZE);
144187e2af0SKui-Feng Lee 		bpf_jit_uncharge_modmem(PAGE_SIZE);
145187e2af0SKui-Feng Lee 	}
146187e2af0SKui-Feng Lee }
147187e2af0SKui-Feng Lee 
14816116035SKui-Feng Lee #define MAYBE_NULL_SUFFIX "__nullable"
149a687df20SAmery Hung #define REFCOUNTED_SUFFIX "__ref"
15016116035SKui-Feng Lee 
15116116035SKui-Feng Lee /* Prepare argument info for every nullable argument of a member of a
15216116035SKui-Feng Lee  * struct_ops type.
15316116035SKui-Feng Lee  *
15416116035SKui-Feng Lee  * Initialize a struct bpf_struct_ops_arg_info according to type info of
15516116035SKui-Feng Lee  * the arguments of a stub function. (Check kCFI for more information about
15616116035SKui-Feng Lee  * stub functions.)
15716116035SKui-Feng Lee  *
15816116035SKui-Feng Lee  * Each member in the struct_ops type has a struct bpf_struct_ops_arg_info
15916116035SKui-Feng Lee  * to provide an array of struct bpf_ctx_arg_aux, which in turn provides
16016116035SKui-Feng Lee  * the information that used by the verifier to check the arguments of the
16116116035SKui-Feng Lee  * BPF struct_ops program assigned to the member. Here, we only care about
16216116035SKui-Feng Lee  * the arguments that are marked as __nullable.
16316116035SKui-Feng Lee  *
16416116035SKui-Feng Lee  * The array of struct bpf_ctx_arg_aux is eventually assigned to
16516116035SKui-Feng Lee  * prog->aux->ctx_arg_info of BPF struct_ops programs and passed to the
16616116035SKui-Feng Lee  * verifier. (See check_struct_ops_btf_id())
16716116035SKui-Feng Lee  *
16816116035SKui-Feng Lee  * arg_info->info will be the list of struct bpf_ctx_arg_aux if success. If
16916116035SKui-Feng Lee  * fails, it will be kept untouched.
17016116035SKui-Feng Lee  */
prepare_arg_info(struct btf * btf,const char * st_ops_name,const char * member_name,const struct btf_type * func_proto,void * stub_func_addr,struct bpf_struct_ops_arg_info * arg_info)17116116035SKui-Feng Lee static int prepare_arg_info(struct btf *btf,
17216116035SKui-Feng Lee 			    const char *st_ops_name,
17316116035SKui-Feng Lee 			    const char *member_name,
17412fdd29dSMartin KaFai Lau 			    const struct btf_type *func_proto, void *stub_func_addr,
17516116035SKui-Feng Lee 			    struct bpf_struct_ops_arg_info *arg_info)
17616116035SKui-Feng Lee {
17716116035SKui-Feng Lee 	const struct btf_type *stub_func_proto, *pointed_type;
178a687df20SAmery Hung 	bool is_nullable = false, is_refcounted = false;
17916116035SKui-Feng Lee 	const struct btf_param *stub_args, *args;
18016116035SKui-Feng Lee 	struct bpf_ctx_arg_aux *info, *info_buf;
18116116035SKui-Feng Lee 	u32 nargs, arg_no, info_cnt = 0;
18212fdd29dSMartin KaFai Lau 	char ksym[KSYM_SYMBOL_LEN];
18312fdd29dSMartin KaFai Lau 	const char *stub_fname;
184a687df20SAmery Hung 	const char *suffix;
18512fdd29dSMartin KaFai Lau 	s32 stub_func_id;
18616116035SKui-Feng Lee 	u32 arg_btf_id;
18716116035SKui-Feng Lee 	int offset;
18816116035SKui-Feng Lee 
18912fdd29dSMartin KaFai Lau 	stub_fname = kallsyms_lookup((unsigned long)stub_func_addr, NULL, NULL, NULL, ksym);
19012fdd29dSMartin KaFai Lau 	if (!stub_fname) {
19112fdd29dSMartin KaFai Lau 		pr_warn("Cannot find the stub function name for the %s in struct %s\n",
19212fdd29dSMartin KaFai Lau 			member_name, st_ops_name);
19312fdd29dSMartin KaFai Lau 		return -ENOENT;
19412fdd29dSMartin KaFai Lau 	}
19512fdd29dSMartin KaFai Lau 
19612fdd29dSMartin KaFai Lau 	stub_func_id = btf_find_by_name_kind(btf, stub_fname, BTF_KIND_FUNC);
19712fdd29dSMartin KaFai Lau 	if (stub_func_id < 0) {
19812fdd29dSMartin KaFai Lau 		pr_warn("Cannot find the stub function %s in btf\n", stub_fname);
19912fdd29dSMartin KaFai Lau 		return -ENOENT;
20012fdd29dSMartin KaFai Lau 	}
20112fdd29dSMartin KaFai Lau 
20212fdd29dSMartin KaFai Lau 	stub_func_proto = btf_type_by_id(btf, stub_func_id);
20312fdd29dSMartin KaFai Lau 	stub_func_proto = btf_type_by_id(btf, stub_func_proto->type);
20416116035SKui-Feng Lee 
20516116035SKui-Feng Lee 	/* Check if the number of arguments of the stub function is the same
20616116035SKui-Feng Lee 	 * as the number of arguments of the function pointer.
20716116035SKui-Feng Lee 	 */
20816116035SKui-Feng Lee 	nargs = btf_type_vlen(func_proto);
20916116035SKui-Feng Lee 	if (nargs != btf_type_vlen(stub_func_proto)) {
21012fdd29dSMartin KaFai Lau 		pr_warn("the number of arguments of the stub function %s does not match the number of arguments of the member %s of struct %s\n",
21112fdd29dSMartin KaFai Lau 			stub_fname, member_name, st_ops_name);
21216116035SKui-Feng Lee 		return -EINVAL;
21316116035SKui-Feng Lee 	}
21416116035SKui-Feng Lee 
21516116035SKui-Feng Lee 	if (!nargs)
21616116035SKui-Feng Lee 		return 0;
21716116035SKui-Feng Lee 
21816116035SKui-Feng Lee 	args = btf_params(func_proto);
21916116035SKui-Feng Lee 	stub_args = btf_params(stub_func_proto);
22016116035SKui-Feng Lee 
22116116035SKui-Feng Lee 	info_buf = kcalloc(nargs, sizeof(*info_buf), GFP_KERNEL);
22216116035SKui-Feng Lee 	if (!info_buf)
22316116035SKui-Feng Lee 		return -ENOMEM;
22416116035SKui-Feng Lee 
22516116035SKui-Feng Lee 	/* Prepare info for every nullable argument */
22616116035SKui-Feng Lee 	info = info_buf;
22716116035SKui-Feng Lee 	for (arg_no = 0; arg_no < nargs; arg_no++) {
22816116035SKui-Feng Lee 		/* Skip arguments that is not suffixed with
229a687df20SAmery Hung 		 * "__nullable or __ref".
23016116035SKui-Feng Lee 		 */
231a687df20SAmery Hung 		is_nullable = btf_param_match_suffix(btf, &stub_args[arg_no],
232a687df20SAmery Hung 						     MAYBE_NULL_SUFFIX);
233a687df20SAmery Hung 		is_refcounted = btf_param_match_suffix(btf, &stub_args[arg_no],
234a687df20SAmery Hung 						       REFCOUNTED_SUFFIX);
235a687df20SAmery Hung 
236a687df20SAmery Hung 		if (is_nullable)
237a687df20SAmery Hung 			suffix = MAYBE_NULL_SUFFIX;
238a687df20SAmery Hung 		else if (is_refcounted)
239a687df20SAmery Hung 			suffix = REFCOUNTED_SUFFIX;
240a687df20SAmery Hung 		else
24116116035SKui-Feng Lee 			continue;
24216116035SKui-Feng Lee 
24316116035SKui-Feng Lee 		/* Should be a pointer to struct */
24416116035SKui-Feng Lee 		pointed_type = btf_type_resolve_ptr(btf,
24516116035SKui-Feng Lee 						    args[arg_no].type,
24616116035SKui-Feng Lee 						    &arg_btf_id);
24716116035SKui-Feng Lee 		if (!pointed_type ||
24816116035SKui-Feng Lee 		    !btf_type_is_struct(pointed_type)) {
24912fdd29dSMartin KaFai Lau 			pr_warn("stub function %s has %s tagging to an unsupported type\n",
250a687df20SAmery Hung 				stub_fname, suffix);
25116116035SKui-Feng Lee 			goto err_out;
25216116035SKui-Feng Lee 		}
25316116035SKui-Feng Lee 
25416116035SKui-Feng Lee 		offset = btf_ctx_arg_offset(btf, func_proto, arg_no);
25516116035SKui-Feng Lee 		if (offset < 0) {
25612fdd29dSMartin KaFai Lau 			pr_warn("stub function %s has an invalid trampoline ctx offset for arg#%u\n",
25712fdd29dSMartin KaFai Lau 				stub_fname, arg_no);
25816116035SKui-Feng Lee 			goto err_out;
25916116035SKui-Feng Lee 		}
26016116035SKui-Feng Lee 
26116116035SKui-Feng Lee 		if (args[arg_no].type != stub_args[arg_no].type) {
26212fdd29dSMartin KaFai Lau 			pr_warn("arg#%u type in stub function %s does not match with its original func_proto\n",
26312fdd29dSMartin KaFai Lau 				arg_no, stub_fname);
26416116035SKui-Feng Lee 			goto err_out;
26516116035SKui-Feng Lee 		}
26616116035SKui-Feng Lee 
26716116035SKui-Feng Lee 		/* Fill the information of the new argument */
26816116035SKui-Feng Lee 		info->btf_id = arg_btf_id;
26916116035SKui-Feng Lee 		info->btf = btf;
27016116035SKui-Feng Lee 		info->offset = offset;
271a687df20SAmery Hung 		if (is_nullable) {
272a687df20SAmery Hung 			info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL;
273a687df20SAmery Hung 		} else if (is_refcounted) {
274a687df20SAmery Hung 			info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID;
275a687df20SAmery Hung 			info->refcounted = true;
276a687df20SAmery Hung 		}
27716116035SKui-Feng Lee 
27816116035SKui-Feng Lee 		info++;
27916116035SKui-Feng Lee 		info_cnt++;
28016116035SKui-Feng Lee 	}
28116116035SKui-Feng Lee 
28216116035SKui-Feng Lee 	if (info_cnt) {
28316116035SKui-Feng Lee 		arg_info->info = info_buf;
28416116035SKui-Feng Lee 		arg_info->cnt = info_cnt;
28516116035SKui-Feng Lee 	} else {
28616116035SKui-Feng Lee 		kfree(info_buf);
28716116035SKui-Feng Lee 	}
28816116035SKui-Feng Lee 
28916116035SKui-Feng Lee 	return 0;
29016116035SKui-Feng Lee 
29116116035SKui-Feng Lee err_out:
29216116035SKui-Feng Lee 	kfree(info_buf);
29316116035SKui-Feng Lee 
29416116035SKui-Feng Lee 	return -EINVAL;
29516116035SKui-Feng Lee }
29616116035SKui-Feng Lee 
29716116035SKui-Feng Lee /* Clean up the arg_info in a struct bpf_struct_ops_desc. */
bpf_struct_ops_desc_release(struct bpf_struct_ops_desc * st_ops_desc)29816116035SKui-Feng Lee void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc)
29916116035SKui-Feng Lee {
30016116035SKui-Feng Lee 	struct bpf_struct_ops_arg_info *arg_info;
30116116035SKui-Feng Lee 	int i;
30216116035SKui-Feng Lee 
30316116035SKui-Feng Lee 	arg_info = st_ops_desc->arg_info;
30416116035SKui-Feng Lee 	for (i = 0; i < btf_type_vlen(st_ops_desc->type); i++)
30516116035SKui-Feng Lee 		kfree(arg_info[i].info);
30616116035SKui-Feng Lee 
30716116035SKui-Feng Lee 	kfree(arg_info);
30816116035SKui-Feng Lee }
30916116035SKui-Feng Lee 
is_module_member(const struct btf * btf,u32 id)31096ea081eSMartin KaFai Lau static bool is_module_member(const struct btf *btf, u32 id)
31196ea081eSMartin KaFai Lau {
31296ea081eSMartin KaFai Lau 	const struct btf_type *t;
31396ea081eSMartin KaFai Lau 
31496ea081eSMartin KaFai Lau 	t = btf_type_resolve_ptr(btf, id, NULL);
31596ea081eSMartin KaFai Lau 	if (!t)
31696ea081eSMartin KaFai Lau 		return false;
31796ea081eSMartin KaFai Lau 
31896ea081eSMartin KaFai Lau 	if (!__btf_type_is_struct(t) && !btf_type_is_fwd(t))
31996ea081eSMartin KaFai Lau 		return false;
32096ea081eSMartin KaFai Lau 
32196ea081eSMartin KaFai Lau 	return !strcmp(btf_name_by_offset(btf, t->name_off), "module");
32296ea081eSMartin KaFai Lau }
32396ea081eSMartin KaFai Lau 
bpf_struct_ops_supported(const struct bpf_struct_ops * st_ops,u32 moff)32412fdd29dSMartin KaFai Lau int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
32512fdd29dSMartin KaFai Lau {
32612fdd29dSMartin KaFai Lau 	void *func_ptr = *(void **)(st_ops->cfi_stubs + moff);
32712fdd29dSMartin KaFai Lau 
32812fdd29dSMartin KaFai Lau 	return func_ptr ? 0 : -ENOTSUPP;
32912fdd29dSMartin KaFai Lau }
33012fdd29dSMartin KaFai Lau 
bpf_struct_ops_desc_init(struct bpf_struct_ops_desc * st_ops_desc,struct btf * btf,struct bpf_verifier_log * log)331f6be98d1SKui-Feng Lee int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
3323b1f89e7SKui-Feng Lee 			     struct btf *btf,
3333b1f89e7SKui-Feng Lee 			     struct bpf_verifier_log *log)
33427ae7997SMartin KaFai Lau {
3354c5763edSKui-Feng Lee 	struct bpf_struct_ops *st_ops = st_ops_desc->st_ops;
33616116035SKui-Feng Lee 	struct bpf_struct_ops_arg_info *arg_info;
33727ae7997SMartin KaFai Lau 	const struct btf_member *member;
33827ae7997SMartin KaFai Lau 	const struct btf_type *t;
3393b1f89e7SKui-Feng Lee 	s32 type_id, value_id;
34085d33df3SMartin KaFai Lau 	char value_name[128];
34127ae7997SMartin KaFai Lau 	const char *mname;
34216116035SKui-Feng Lee 	int i, err;
34327ae7997SMartin KaFai Lau 
34485d33df3SMartin KaFai Lau 	if (strlen(st_ops->name) + VALUE_PREFIX_LEN >=
34585d33df3SMartin KaFai Lau 	    sizeof(value_name)) {
34685d33df3SMartin KaFai Lau 		pr_warn("struct_ops name %s is too long\n",
34785d33df3SMartin KaFai Lau 			st_ops->name);
348f6be98d1SKui-Feng Lee 		return -EINVAL;
34985d33df3SMartin KaFai Lau 	}
35085d33df3SMartin KaFai Lau 	sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name);
35185d33df3SMartin KaFai Lau 
3523e000833SKui-Feng Lee 	if (!st_ops->cfi_stubs) {
3533e000833SKui-Feng Lee 		pr_warn("struct_ops for %s has no cfi_stubs\n", st_ops->name);
3543e000833SKui-Feng Lee 		return -EINVAL;
3553e000833SKui-Feng Lee 	}
3563e000833SKui-Feng Lee 
35727ae7997SMartin KaFai Lau 	type_id = btf_find_by_name_kind(btf, st_ops->name,
35827ae7997SMartin KaFai Lau 					BTF_KIND_STRUCT);
35927ae7997SMartin KaFai Lau 	if (type_id < 0) {
3603b1f89e7SKui-Feng Lee 		pr_warn("Cannot find struct %s in %s\n",
3613b1f89e7SKui-Feng Lee 			st_ops->name, btf_get_name(btf));
362f6be98d1SKui-Feng Lee 		return -EINVAL;
36327ae7997SMartin KaFai Lau 	}
36427ae7997SMartin KaFai Lau 	t = btf_type_by_id(btf, type_id);
36527ae7997SMartin KaFai Lau 	if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) {
36627ae7997SMartin KaFai Lau 		pr_warn("Cannot support #%u members in struct %s\n",
36727ae7997SMartin KaFai Lau 			btf_type_vlen(t), st_ops->name);
368f6be98d1SKui-Feng Lee 		return -EINVAL;
36927ae7997SMartin KaFai Lau 	}
37027ae7997SMartin KaFai Lau 
371612d087dSKui-Feng Lee 	value_id = btf_find_by_name_kind(btf, value_name,
372612d087dSKui-Feng Lee 					 BTF_KIND_STRUCT);
373612d087dSKui-Feng Lee 	if (value_id < 0) {
374612d087dSKui-Feng Lee 		pr_warn("Cannot find struct %s in %s\n",
375612d087dSKui-Feng Lee 			value_name, btf_get_name(btf));
376f6be98d1SKui-Feng Lee 		return -EINVAL;
377612d087dSKui-Feng Lee 	}
378612d087dSKui-Feng Lee 	if (!is_valid_value_type(btf, value_id, t, value_name))
379f6be98d1SKui-Feng Lee 		return -EINVAL;
380612d087dSKui-Feng Lee 
38116116035SKui-Feng Lee 	arg_info = kcalloc(btf_type_vlen(t), sizeof(*arg_info),
38216116035SKui-Feng Lee 			   GFP_KERNEL);
38316116035SKui-Feng Lee 	if (!arg_info)
38416116035SKui-Feng Lee 		return -ENOMEM;
38516116035SKui-Feng Lee 
38616116035SKui-Feng Lee 	st_ops_desc->arg_info = arg_info;
38716116035SKui-Feng Lee 	st_ops_desc->type = t;
38816116035SKui-Feng Lee 	st_ops_desc->type_id = type_id;
38916116035SKui-Feng Lee 	st_ops_desc->value_id = value_id;
39016116035SKui-Feng Lee 	st_ops_desc->value_type = btf_type_by_id(btf, value_id);
39116116035SKui-Feng Lee 
3923b1f89e7SKui-Feng Lee 	for_each_member(i, t, member) {
393*8d9f547fSAmery Hung 		const struct btf_type *func_proto, *ret_type;
39412fdd29dSMartin KaFai Lau 		void **stub_func_addr;
39512fdd29dSMartin KaFai Lau 		u32 moff;
39627ae7997SMartin KaFai Lau 
39712fdd29dSMartin KaFai Lau 		moff = __btf_member_bit_offset(t, member) / 8;
39827ae7997SMartin KaFai Lau 		mname = btf_name_by_offset(btf, member->name_off);
39927ae7997SMartin KaFai Lau 		if (!*mname) {
40027ae7997SMartin KaFai Lau 			pr_warn("anon member in struct %s is not supported\n",
40127ae7997SMartin KaFai Lau 				st_ops->name);
40216116035SKui-Feng Lee 			err = -EOPNOTSUPP;
40316116035SKui-Feng Lee 			goto errout;
40427ae7997SMartin KaFai Lau 		}
40527ae7997SMartin KaFai Lau 
4068293eb99SAlexei Starovoitov 		if (__btf_member_bitfield_size(t, member)) {
40727ae7997SMartin KaFai Lau 			pr_warn("bit field member %s in struct %s is not supported\n",
40827ae7997SMartin KaFai Lau 				mname, st_ops->name);
40916116035SKui-Feng Lee 			err = -EOPNOTSUPP;
41016116035SKui-Feng Lee 			goto errout;
41127ae7997SMartin KaFai Lau 		}
41227ae7997SMartin KaFai Lau 
41396ea081eSMartin KaFai Lau 		if (!st_ops_ids[IDX_MODULE_ID] && is_module_member(btf, member->type)) {
41496ea081eSMartin KaFai Lau 			pr_warn("'struct module' btf id not found. Is CONFIG_MODULES enabled? bpf_struct_ops '%s' needs module support.\n",
41596ea081eSMartin KaFai Lau 				st_ops->name);
41696ea081eSMartin KaFai Lau 			err = -EOPNOTSUPP;
41796ea081eSMartin KaFai Lau 			goto errout;
41896ea081eSMartin KaFai Lau 		}
41996ea081eSMartin KaFai Lau 
42027ae7997SMartin KaFai Lau 		func_proto = btf_type_resolve_func_ptr(btf,
42127ae7997SMartin KaFai Lau 						       member->type,
42227ae7997SMartin KaFai Lau 						       NULL);
42312fdd29dSMartin KaFai Lau 
42412fdd29dSMartin KaFai Lau 		/* The member is not a function pointer or
42512fdd29dSMartin KaFai Lau 		 * the function pointer is not supported.
42612fdd29dSMartin KaFai Lau 		 */
42712fdd29dSMartin KaFai Lau 		if (!func_proto || bpf_struct_ops_supported(st_ops, moff))
42816116035SKui-Feng Lee 			continue;
42916116035SKui-Feng Lee 
430*8d9f547fSAmery Hung 		if (func_proto->type) {
431*8d9f547fSAmery Hung 			ret_type = btf_type_resolve_ptr(btf, func_proto->type, NULL);
432*8d9f547fSAmery Hung 			if (ret_type && !__btf_type_is_struct(ret_type)) {
433*8d9f547fSAmery Hung 				pr_warn("func ptr %s in struct %s returns non-struct pointer, which is not supported\n",
434*8d9f547fSAmery Hung 					mname, st_ops->name);
435*8d9f547fSAmery Hung 				err = -EOPNOTSUPP;
436*8d9f547fSAmery Hung 				goto errout;
437*8d9f547fSAmery Hung 			}
438*8d9f547fSAmery Hung 		}
439*8d9f547fSAmery Hung 
44016116035SKui-Feng Lee 		if (btf_distill_func_proto(log, btf,
44127ae7997SMartin KaFai Lau 					   func_proto, mname,
4423b1f89e7SKui-Feng Lee 					   &st_ops->func_models[i])) {
44327ae7997SMartin KaFai Lau 			pr_warn("Error in parsing func ptr %s in struct %s\n",
44427ae7997SMartin KaFai Lau 				mname, st_ops->name);
44516116035SKui-Feng Lee 			err = -EINVAL;
44616116035SKui-Feng Lee 			goto errout;
44727ae7997SMartin KaFai Lau 		}
44816116035SKui-Feng Lee 
44912fdd29dSMartin KaFai Lau 		stub_func_addr = *(void **)(st_ops->cfi_stubs + moff);
45016116035SKui-Feng Lee 		err = prepare_arg_info(btf, st_ops->name, mname,
45112fdd29dSMartin KaFai Lau 				       func_proto, stub_func_addr,
45216116035SKui-Feng Lee 				       arg_info + i);
45316116035SKui-Feng Lee 		if (err)
45416116035SKui-Feng Lee 			goto errout;
45527ae7997SMartin KaFai Lau 	}
45627ae7997SMartin KaFai Lau 
45727ae7997SMartin KaFai Lau 	if (st_ops->init(btf)) {
45827ae7997SMartin KaFai Lau 		pr_warn("Error in init bpf_struct_ops %s\n",
45927ae7997SMartin KaFai Lau 			st_ops->name);
46016116035SKui-Feng Lee 		err = -EINVAL;
46116116035SKui-Feng Lee 		goto errout;
462df9705eaSKui-Feng Lee 	}
463df9705eaSKui-Feng Lee 
464f6be98d1SKui-Feng Lee 	return 0;
46516116035SKui-Feng Lee 
46616116035SKui-Feng Lee errout:
46716116035SKui-Feng Lee 	bpf_struct_ops_desc_release(st_ops_desc);
46816116035SKui-Feng Lee 
46916116035SKui-Feng Lee 	return err;
47027ae7997SMartin KaFai Lau }
47185d33df3SMartin KaFai Lau 
bpf_struct_ops_map_get_next_key(struct bpf_map * map,void * key,void * next_key)47285d33df3SMartin KaFai Lau static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key,
47385d33df3SMartin KaFai Lau 					   void *next_key)
47485d33df3SMartin KaFai Lau {
47585d33df3SMartin KaFai Lau 	if (key && *(u32 *)key == 0)
47685d33df3SMartin KaFai Lau 		return -ENOENT;
47785d33df3SMartin KaFai Lau 
47885d33df3SMartin KaFai Lau 	*(u32 *)next_key = 0;
47985d33df3SMartin KaFai Lau 	return 0;
48085d33df3SMartin KaFai Lau }
48185d33df3SMartin KaFai Lau 
bpf_struct_ops_map_sys_lookup_elem(struct bpf_map * map,void * key,void * value)48285d33df3SMartin KaFai Lau int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
48385d33df3SMartin KaFai Lau 				       void *value)
48485d33df3SMartin KaFai Lau {
48585d33df3SMartin KaFai Lau 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
48685d33df3SMartin KaFai Lau 	struct bpf_struct_ops_value *uvalue, *kvalue;
48785d33df3SMartin KaFai Lau 	enum bpf_struct_ops_state state;
488b671c206SKui-Feng Lee 	s64 refcnt;
48985d33df3SMartin KaFai Lau 
49085d33df3SMartin KaFai Lau 	if (unlikely(*(u32 *)key != 0))
49185d33df3SMartin KaFai Lau 		return -ENOENT;
49285d33df3SMartin KaFai Lau 
49385d33df3SMartin KaFai Lau 	kvalue = &st_map->kvalue;
49485d33df3SMartin KaFai Lau 	/* Pair with smp_store_release() during map_update */
495612d087dSKui-Feng Lee 	state = smp_load_acquire(&kvalue->common.state);
49685d33df3SMartin KaFai Lau 	if (state == BPF_STRUCT_OPS_STATE_INIT) {
49785d33df3SMartin KaFai Lau 		memset(value, 0, map->value_size);
49885d33df3SMartin KaFai Lau 		return 0;
49985d33df3SMartin KaFai Lau 	}
50085d33df3SMartin KaFai Lau 
50185d33df3SMartin KaFai Lau 	/* No lock is needed.  state and refcnt do not need
50285d33df3SMartin KaFai Lau 	 * to be updated together under atomic context.
50385d33df3SMartin KaFai Lau 	 */
504241d50ecSYu Zhe 	uvalue = value;
50585d33df3SMartin KaFai Lau 	memcpy(uvalue, st_map->uvalue, map->value_size);
506612d087dSKui-Feng Lee 	uvalue->common.state = state;
507b671c206SKui-Feng Lee 
508b671c206SKui-Feng Lee 	/* This value offers the user space a general estimate of how
509b671c206SKui-Feng Lee 	 * many sockets are still utilizing this struct_ops for TCP
510b671c206SKui-Feng Lee 	 * congestion control. The number might not be exact, but it
511b671c206SKui-Feng Lee 	 * should sufficiently meet our present goals.
512b671c206SKui-Feng Lee 	 */
513b671c206SKui-Feng Lee 	refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt);
514612d087dSKui-Feng Lee 	refcount_set(&uvalue->common.refcnt, max_t(s64, refcnt, 0));
51585d33df3SMartin KaFai Lau 
51685d33df3SMartin KaFai Lau 	return 0;
51785d33df3SMartin KaFai Lau }
51885d33df3SMartin KaFai Lau 
bpf_struct_ops_map_lookup_elem(struct bpf_map * map,void * key)51985d33df3SMartin KaFai Lau static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key)
52085d33df3SMartin KaFai Lau {
52185d33df3SMartin KaFai Lau 	return ERR_PTR(-EINVAL);
52285d33df3SMartin KaFai Lau }
52385d33df3SMartin KaFai Lau 
bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map * st_map)52485d33df3SMartin KaFai Lau static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
52585d33df3SMartin KaFai Lau {
52685d33df3SMartin KaFai Lau 	u32 i;
52785d33df3SMartin KaFai Lau 
528821a3fa3SXu Kuohai 	for (i = 0; i < st_map->funcs_cnt; i++) {
529821a3fa3SXu Kuohai 		if (!st_map->links[i])
530821a3fa3SXu Kuohai 			break;
531f7e0beafSKui-Feng Lee 		bpf_link_put(st_map->links[i]);
532f7e0beafSKui-Feng Lee 		st_map->links[i] = NULL;
53385d33df3SMartin KaFai Lau 	}
53485d33df3SMartin KaFai Lau }
53585d33df3SMartin KaFai Lau 
bpf_struct_ops_map_free_image(struct bpf_struct_ops_map * st_map)536187e2af0SKui-Feng Lee static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map)
537187e2af0SKui-Feng Lee {
538187e2af0SKui-Feng Lee 	int i;
539187e2af0SKui-Feng Lee 
540187e2af0SKui-Feng Lee 	for (i = 0; i < st_map->image_pages_cnt; i++)
541187e2af0SKui-Feng Lee 		bpf_struct_ops_image_free(st_map->image_pages[i]);
542187e2af0SKui-Feng Lee 	st_map->image_pages_cnt = 0;
543187e2af0SKui-Feng Lee }
544187e2af0SKui-Feng Lee 
check_zero_holes(const struct btf * btf,const struct btf_type * t,void * data)54547f4f657SKui-Feng Lee static int check_zero_holes(const struct btf *btf, const struct btf_type *t, void *data)
54685d33df3SMartin KaFai Lau {
54785d33df3SMartin KaFai Lau 	const struct btf_member *member;
54885d33df3SMartin KaFai Lau 	u32 i, moff, msize, prev_mend = 0;
54985d33df3SMartin KaFai Lau 	const struct btf_type *mtype;
55085d33df3SMartin KaFai Lau 
55185d33df3SMartin KaFai Lau 	for_each_member(i, t, member) {
5528293eb99SAlexei Starovoitov 		moff = __btf_member_bit_offset(t, member) / 8;
55385d33df3SMartin KaFai Lau 		if (moff > prev_mend &&
55485d33df3SMartin KaFai Lau 		    memchr_inv(data + prev_mend, 0, moff - prev_mend))
55585d33df3SMartin KaFai Lau 			return -EINVAL;
55685d33df3SMartin KaFai Lau 
55747f4f657SKui-Feng Lee 		mtype = btf_type_by_id(btf, member->type);
55847f4f657SKui-Feng Lee 		mtype = btf_resolve_size(btf, mtype, &msize);
55985d33df3SMartin KaFai Lau 		if (IS_ERR(mtype))
56085d33df3SMartin KaFai Lau 			return PTR_ERR(mtype);
56185d33df3SMartin KaFai Lau 		prev_mend = moff + msize;
56285d33df3SMartin KaFai Lau 	}
56385d33df3SMartin KaFai Lau 
56485d33df3SMartin KaFai Lau 	if (t->size > prev_mend &&
56585d33df3SMartin KaFai Lau 	    memchr_inv(data + prev_mend, 0, t->size - prev_mend))
56685d33df3SMartin KaFai Lau 		return -EINVAL;
56785d33df3SMartin KaFai Lau 
56885d33df3SMartin KaFai Lau 	return 0;
56985d33df3SMartin KaFai Lau }
57085d33df3SMartin KaFai Lau 
bpf_struct_ops_link_release(struct bpf_link * link)571f7e0beafSKui-Feng Lee static void bpf_struct_ops_link_release(struct bpf_link *link)
572f7e0beafSKui-Feng Lee {
573f7e0beafSKui-Feng Lee }
574f7e0beafSKui-Feng Lee 
bpf_struct_ops_link_dealloc(struct bpf_link * link)575f7e0beafSKui-Feng Lee static void bpf_struct_ops_link_dealloc(struct bpf_link *link)
576f7e0beafSKui-Feng Lee {
577f7e0beafSKui-Feng Lee 	struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link);
578f7e0beafSKui-Feng Lee 
579f7e0beafSKui-Feng Lee 	kfree(tlink);
580f7e0beafSKui-Feng Lee }
581f7e0beafSKui-Feng Lee 
582f7e0beafSKui-Feng Lee const struct bpf_link_ops bpf_struct_ops_link_lops = {
583f7e0beafSKui-Feng Lee 	.release = bpf_struct_ops_link_release,
584f7e0beafSKui-Feng Lee 	.dealloc = bpf_struct_ops_link_dealloc,
585f7e0beafSKui-Feng Lee };
586f7e0beafSKui-Feng Lee 
bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links * tlinks,struct bpf_tramp_link * link,const struct btf_func_model * model,void * stub_func,void ** _image,u32 * _image_off,bool allow_alloc)587f7e0beafSKui-Feng Lee int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
588f7e0beafSKui-Feng Lee 				      struct bpf_tramp_link *link,
58931a645aeSHou Tao 				      const struct btf_func_model *model,
590187e2af0SKui-Feng Lee 				      void *stub_func,
591187e2af0SKui-Feng Lee 				      void **_image, u32 *_image_off,
592187e2af0SKui-Feng Lee 				      bool allow_alloc)
59331a645aeSHou Tao {
594187e2af0SKui-Feng Lee 	u32 image_off = *_image_off, flags = BPF_TRAMP_F_INDIRECT;
595187e2af0SKui-Feng Lee 	void *image = *_image;
59626ef208cSSong Liu 	int size;
59731a645aeSHou Tao 
598f7e0beafSKui-Feng Lee 	tlinks[BPF_TRAMP_FENTRY].links[0] = link;
599f7e0beafSKui-Feng Lee 	tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
6002cd3e377SPeter Zijlstra 
6012cd3e377SPeter Zijlstra 	if (model->ret_size > 0)
6022cd3e377SPeter Zijlstra 		flags |= BPF_TRAMP_F_RET_FENTRY_RET;
60326ef208cSSong Liu 
60426ef208cSSong Liu 	size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
605187e2af0SKui-Feng Lee 	if (size <= 0)
606187e2af0SKui-Feng Lee 		return size ? : -EFAULT;
607187e2af0SKui-Feng Lee 
608187e2af0SKui-Feng Lee 	/* Allocate image buffer if necessary */
609187e2af0SKui-Feng Lee 	if (!image || size > PAGE_SIZE - image_off) {
610187e2af0SKui-Feng Lee 		if (!allow_alloc)
61126ef208cSSong Liu 			return -E2BIG;
612187e2af0SKui-Feng Lee 
613187e2af0SKui-Feng Lee 		image = bpf_struct_ops_image_alloc();
614187e2af0SKui-Feng Lee 		if (IS_ERR(image))
615187e2af0SKui-Feng Lee 			return PTR_ERR(image);
616187e2af0SKui-Feng Lee 		image_off = 0;
617187e2af0SKui-Feng Lee 	}
618187e2af0SKui-Feng Lee 
619187e2af0SKui-Feng Lee 	size = arch_prepare_bpf_trampoline(NULL, image + image_off,
620d1a42617SPu Lehui 					   image + image_off + size,
6212cd3e377SPeter Zijlstra 					   model, flags, tlinks, stub_func);
622187e2af0SKui-Feng Lee 	if (size <= 0) {
623187e2af0SKui-Feng Lee 		if (image != *_image)
624187e2af0SKui-Feng Lee 			bpf_struct_ops_image_free(image);
625187e2af0SKui-Feng Lee 		return size ? : -EFAULT;
626187e2af0SKui-Feng Lee 	}
627187e2af0SKui-Feng Lee 
628187e2af0SKui-Feng Lee 	*_image = image;
629187e2af0SKui-Feng Lee 	*_image_off = image_off + size;
630187e2af0SKui-Feng Lee 	return 0;
63131a645aeSHou Tao }
63231a645aeSHou Tao 
bpf_struct_ops_ksym_init(const char * tname,const char * mname,void * image,unsigned int size,struct bpf_ksym * ksym)6337c8ce4ffSXu Kuohai static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
6347c8ce4ffSXu Kuohai 				     void *image, unsigned int size,
6357c8ce4ffSXu Kuohai 				     struct bpf_ksym *ksym)
6367c8ce4ffSXu Kuohai {
6377c8ce4ffSXu Kuohai 	snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
6387c8ce4ffSXu Kuohai 	INIT_LIST_HEAD_RCU(&ksym->lnode);
6397c8ce4ffSXu Kuohai 	bpf_image_ksym_init(image, size, ksym);
6407c8ce4ffSXu Kuohai }
6417c8ce4ffSXu Kuohai 
bpf_struct_ops_map_add_ksyms(struct bpf_struct_ops_map * st_map)6427c8ce4ffSXu Kuohai static void bpf_struct_ops_map_add_ksyms(struct bpf_struct_ops_map *st_map)
6437c8ce4ffSXu Kuohai {
6447c8ce4ffSXu Kuohai 	u32 i;
6457c8ce4ffSXu Kuohai 
6467c8ce4ffSXu Kuohai 	for (i = 0; i < st_map->funcs_cnt; i++) {
6477c8ce4ffSXu Kuohai 		if (!st_map->ksyms[i])
6487c8ce4ffSXu Kuohai 			break;
6497c8ce4ffSXu Kuohai 		bpf_image_ksym_add(st_map->ksyms[i]);
6507c8ce4ffSXu Kuohai 	}
6517c8ce4ffSXu Kuohai }
6527c8ce4ffSXu Kuohai 
bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map * st_map)6537c8ce4ffSXu Kuohai static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
6547c8ce4ffSXu Kuohai {
6557c8ce4ffSXu Kuohai 	u32 i;
6567c8ce4ffSXu Kuohai 
6577c8ce4ffSXu Kuohai 	for (i = 0; i < st_map->funcs_cnt; i++) {
6587c8ce4ffSXu Kuohai 		if (!st_map->ksyms[i])
6597c8ce4ffSXu Kuohai 			break;
6607c8ce4ffSXu Kuohai 		bpf_image_ksym_del(st_map->ksyms[i]);
6617c8ce4ffSXu Kuohai 	}
6627c8ce4ffSXu Kuohai }
6637c8ce4ffSXu Kuohai 
bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map * st_map)6647c8ce4ffSXu Kuohai static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
6657c8ce4ffSXu Kuohai {
6667c8ce4ffSXu Kuohai 	u32 i;
6677c8ce4ffSXu Kuohai 
6687c8ce4ffSXu Kuohai 	for (i = 0; i < st_map->funcs_cnt; i++) {
6697c8ce4ffSXu Kuohai 		if (!st_map->ksyms[i])
6707c8ce4ffSXu Kuohai 			break;
6717c8ce4ffSXu Kuohai 		kfree(st_map->ksyms[i]);
6727c8ce4ffSXu Kuohai 		st_map->ksyms[i] = NULL;
6737c8ce4ffSXu Kuohai 	}
6747c8ce4ffSXu Kuohai }
6757c8ce4ffSXu Kuohai 
bpf_struct_ops_map_update_elem(struct bpf_map * map,void * key,void * value,u64 flags)676d7ba4cc9SJP Kobryn static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
67785d33df3SMartin KaFai Lau 					   void *value, u64 flags)
67885d33df3SMartin KaFai Lau {
67985d33df3SMartin KaFai Lau 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
6804c5763edSKui-Feng Lee 	const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc;
6814c5763edSKui-Feng Lee 	const struct bpf_struct_ops *st_ops = st_ops_desc->st_ops;
68285d33df3SMartin KaFai Lau 	struct bpf_struct_ops_value *uvalue, *kvalue;
68395678395SKui-Feng Lee 	const struct btf_type *module_type;
68485d33df3SMartin KaFai Lau 	const struct btf_member *member;
6854c5763edSKui-Feng Lee 	const struct btf_type *t = st_ops_desc->type;
6865964d1e4SLi kunyu 	struct bpf_tramp_links *tlinks;
68785d33df3SMartin KaFai Lau 	void *udata, *kdata;
6885964d1e4SLi kunyu 	int prog_fd, err;
689187e2af0SKui-Feng Lee 	u32 i, trampoline_start, image_off = 0;
690187e2af0SKui-Feng Lee 	void *cur_image = NULL, *image = NULL;
691821a3fa3SXu Kuohai 	struct bpf_link **plink;
6927c8ce4ffSXu Kuohai 	struct bpf_ksym **pksym;
6937c8ce4ffSXu Kuohai 	const char *tname, *mname;
69485d33df3SMartin KaFai Lau 
69585d33df3SMartin KaFai Lau 	if (flags)
69685d33df3SMartin KaFai Lau 		return -EINVAL;
69785d33df3SMartin KaFai Lau 
69885d33df3SMartin KaFai Lau 	if (*(u32 *)key != 0)
69985d33df3SMartin KaFai Lau 		return -E2BIG;
70085d33df3SMartin KaFai Lau 
70147f4f657SKui-Feng Lee 	err = check_zero_holes(st_map->btf, st_ops_desc->value_type, value);
70285d33df3SMartin KaFai Lau 	if (err)
70385d33df3SMartin KaFai Lau 		return err;
70485d33df3SMartin KaFai Lau 
705241d50ecSYu Zhe 	uvalue = value;
70647f4f657SKui-Feng Lee 	err = check_zero_holes(st_map->btf, t, uvalue->data);
70785d33df3SMartin KaFai Lau 	if (err)
70885d33df3SMartin KaFai Lau 		return err;
70985d33df3SMartin KaFai Lau 
710612d087dSKui-Feng Lee 	if (uvalue->common.state || refcount_read(&uvalue->common.refcnt))
71185d33df3SMartin KaFai Lau 		return -EINVAL;
71285d33df3SMartin KaFai Lau 
713f7e0beafSKui-Feng Lee 	tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
714f7e0beafSKui-Feng Lee 	if (!tlinks)
71588fd9e53SKP Singh 		return -ENOMEM;
71688fd9e53SKP Singh 
71785d33df3SMartin KaFai Lau 	uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
71885d33df3SMartin KaFai Lau 	kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
71985d33df3SMartin KaFai Lau 
72085d33df3SMartin KaFai Lau 	mutex_lock(&st_map->lock);
72185d33df3SMartin KaFai Lau 
722612d087dSKui-Feng Lee 	if (kvalue->common.state != BPF_STRUCT_OPS_STATE_INIT) {
72385d33df3SMartin KaFai Lau 		err = -EBUSY;
72485d33df3SMartin KaFai Lau 		goto unlock;
72585d33df3SMartin KaFai Lau 	}
72685d33df3SMartin KaFai Lau 
72785d33df3SMartin KaFai Lau 	memcpy(uvalue, value, map->value_size);
72885d33df3SMartin KaFai Lau 
72985d33df3SMartin KaFai Lau 	udata = &uvalue->data;
73085d33df3SMartin KaFai Lau 	kdata = &kvalue->data;
73185d33df3SMartin KaFai Lau 
732821a3fa3SXu Kuohai 	plink = st_map->links;
7337c8ce4ffSXu Kuohai 	pksym = st_map->ksyms;
7347c8ce4ffSXu Kuohai 	tname = btf_name_by_offset(st_map->btf, t->name_off);
73595678395SKui-Feng Lee 	module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
73685d33df3SMartin KaFai Lau 	for_each_member(i, t, member) {
73785d33df3SMartin KaFai Lau 		const struct btf_type *mtype, *ptype;
73885d33df3SMartin KaFai Lau 		struct bpf_prog *prog;
739f7e0beafSKui-Feng Lee 		struct bpf_tramp_link *link;
7407c8ce4ffSXu Kuohai 		struct bpf_ksym *ksym;
74185d33df3SMartin KaFai Lau 		u32 moff;
74285d33df3SMartin KaFai Lau 
7438293eb99SAlexei Starovoitov 		moff = __btf_member_bit_offset(t, member) / 8;
7447c8ce4ffSXu Kuohai 		mname = btf_name_by_offset(st_map->btf, member->name_off);
74547f4f657SKui-Feng Lee 		ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
74685d33df3SMartin KaFai Lau 		if (ptype == module_type) {
74785d33df3SMartin KaFai Lau 			if (*(void **)(udata + moff))
74885d33df3SMartin KaFai Lau 				goto reset_unlock;
74985d33df3SMartin KaFai Lau 			*(void **)(kdata + moff) = BPF_MODULE_OWNER;
75085d33df3SMartin KaFai Lau 			continue;
75185d33df3SMartin KaFai Lau 		}
75285d33df3SMartin KaFai Lau 
75385d33df3SMartin KaFai Lau 		err = st_ops->init_member(t, member, kdata, udata);
75485d33df3SMartin KaFai Lau 		if (err < 0)
75585d33df3SMartin KaFai Lau 			goto reset_unlock;
75685d33df3SMartin KaFai Lau 
75785d33df3SMartin KaFai Lau 		/* The ->init_member() has handled this member */
75885d33df3SMartin KaFai Lau 		if (err > 0)
75985d33df3SMartin KaFai Lau 			continue;
76085d33df3SMartin KaFai Lau 
76185d33df3SMartin KaFai Lau 		/* If st_ops->init_member does not handle it,
76285d33df3SMartin KaFai Lau 		 * we will only handle func ptrs and zero-ed members
76385d33df3SMartin KaFai Lau 		 * here.  Reject everything else.
76485d33df3SMartin KaFai Lau 		 */
76585d33df3SMartin KaFai Lau 
76685d33df3SMartin KaFai Lau 		/* All non func ptr member must be 0 */
76785d33df3SMartin KaFai Lau 		if (!ptype || !btf_type_is_func_proto(ptype)) {
76885d33df3SMartin KaFai Lau 			u32 msize;
76985d33df3SMartin KaFai Lau 
77047f4f657SKui-Feng Lee 			mtype = btf_type_by_id(st_map->btf, member->type);
77147f4f657SKui-Feng Lee 			mtype = btf_resolve_size(st_map->btf, mtype, &msize);
77285d33df3SMartin KaFai Lau 			if (IS_ERR(mtype)) {
77385d33df3SMartin KaFai Lau 				err = PTR_ERR(mtype);
77485d33df3SMartin KaFai Lau 				goto reset_unlock;
77585d33df3SMartin KaFai Lau 			}
77685d33df3SMartin KaFai Lau 
77785d33df3SMartin KaFai Lau 			if (memchr_inv(udata + moff, 0, msize)) {
77885d33df3SMartin KaFai Lau 				err = -EINVAL;
77985d33df3SMartin KaFai Lau 				goto reset_unlock;
78085d33df3SMartin KaFai Lau 			}
78185d33df3SMartin KaFai Lau 
78285d33df3SMartin KaFai Lau 			continue;
78385d33df3SMartin KaFai Lau 		}
78485d33df3SMartin KaFai Lau 
78585d33df3SMartin KaFai Lau 		prog_fd = (int)(*(unsigned long *)(udata + moff));
78685d33df3SMartin KaFai Lau 		/* Similar check as the attr->attach_prog_fd */
78785d33df3SMartin KaFai Lau 		if (!prog_fd)
78885d33df3SMartin KaFai Lau 			continue;
78985d33df3SMartin KaFai Lau 
79085d33df3SMartin KaFai Lau 		prog = bpf_prog_get(prog_fd);
79185d33df3SMartin KaFai Lau 		if (IS_ERR(prog)) {
79285d33df3SMartin KaFai Lau 			err = PTR_ERR(prog);
79385d33df3SMartin KaFai Lau 			goto reset_unlock;
79485d33df3SMartin KaFai Lau 		}
79585d33df3SMartin KaFai Lau 
79685d33df3SMartin KaFai Lau 		if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
7974c5763edSKui-Feng Lee 		    prog->aux->attach_btf_id != st_ops_desc->type_id ||
79885d33df3SMartin KaFai Lau 		    prog->expected_attach_type != i) {
799f7e0beafSKui-Feng Lee 			bpf_prog_put(prog);
80085d33df3SMartin KaFai Lau 			err = -EINVAL;
80185d33df3SMartin KaFai Lau 			goto reset_unlock;
80285d33df3SMartin KaFai Lau 		}
80385d33df3SMartin KaFai Lau 
804f7e0beafSKui-Feng Lee 		link = kzalloc(sizeof(*link), GFP_USER);
805f7e0beafSKui-Feng Lee 		if (!link) {
806f7e0beafSKui-Feng Lee 			bpf_prog_put(prog);
807f7e0beafSKui-Feng Lee 			err = -ENOMEM;
808f7e0beafSKui-Feng Lee 			goto reset_unlock;
809f7e0beafSKui-Feng Lee 		}
810f7e0beafSKui-Feng Lee 		bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS,
811f7e0beafSKui-Feng Lee 			      &bpf_struct_ops_link_lops, prog);
812821a3fa3SXu Kuohai 		*plink++ = &link->link;
813f7e0beafSKui-Feng Lee 
8147c8ce4ffSXu Kuohai 		ksym = kzalloc(sizeof(*ksym), GFP_USER);
8157c8ce4ffSXu Kuohai 		if (!ksym) {
8167c8ce4ffSXu Kuohai 			err = -ENOMEM;
8177c8ce4ffSXu Kuohai 			goto reset_unlock;
8187c8ce4ffSXu Kuohai 		}
8197c8ce4ffSXu Kuohai 		*pksym++ = ksym;
8207c8ce4ffSXu Kuohai 
821187e2af0SKui-Feng Lee 		trampoline_start = image_off;
822f7e0beafSKui-Feng Lee 		err = bpf_struct_ops_prepare_trampoline(tlinks, link,
823356ed649SHou Tao 						&st_ops->func_models[i],
8242cd3e377SPeter Zijlstra 						*(void **)(st_ops->cfi_stubs + moff),
825187e2af0SKui-Feng Lee 						&image, &image_off,
826187e2af0SKui-Feng Lee 						st_map->image_pages_cnt < MAX_TRAMP_IMAGE_PAGES);
827187e2af0SKui-Feng Lee 		if (err)
828187e2af0SKui-Feng Lee 			goto reset_unlock;
829187e2af0SKui-Feng Lee 
830187e2af0SKui-Feng Lee 		if (cur_image != image) {
831187e2af0SKui-Feng Lee 			st_map->image_pages[st_map->image_pages_cnt++] = image;
832187e2af0SKui-Feng Lee 			cur_image = image;
833187e2af0SKui-Feng Lee 			trampoline_start = 0;
834187e2af0SKui-Feng Lee 		}
83585d33df3SMartin KaFai Lau 
836187e2af0SKui-Feng Lee 		*(void **)(kdata + moff) = image + trampoline_start + cfi_get_offset();
83785d33df3SMartin KaFai Lau 
83885d33df3SMartin KaFai Lau 		/* put prog_id to udata */
83985d33df3SMartin KaFai Lau 		*(unsigned long *)(udata + moff) = prog->aux->id;
8407c8ce4ffSXu Kuohai 
8417c8ce4ffSXu Kuohai 		/* init ksym for this trampoline */
8427c8ce4ffSXu Kuohai 		bpf_struct_ops_ksym_init(tname, mname,
8437c8ce4ffSXu Kuohai 					 image + trampoline_start,
8447c8ce4ffSXu Kuohai 					 image_off - trampoline_start,
8457c8ce4ffSXu Kuohai 					 ksym);
84685d33df3SMartin KaFai Lau 	}
84785d33df3SMartin KaFai Lau 
8488ba651edSDavid Vernet 	if (st_ops->validate) {
84968b04864SKui-Feng Lee 		err = st_ops->validate(kdata);
85068b04864SKui-Feng Lee 		if (err)
85168b04864SKui-Feng Lee 			goto reset_unlock;
8528ba651edSDavid Vernet 	}
853c733239fSChristophe Leroy 	for (i = 0; i < st_map->image_pages_cnt; i++) {
854c733239fSChristophe Leroy 		err = arch_protect_bpf_trampoline(st_map->image_pages[i],
855c733239fSChristophe Leroy 						  PAGE_SIZE);
856c733239fSChristophe Leroy 		if (err)
857c733239fSChristophe Leroy 			goto reset_unlock;
858c733239fSChristophe Leroy 	}
85973e4f9e6SKui-Feng Lee 
86073e4f9e6SKui-Feng Lee 	if (st_map->map.map_flags & BPF_F_LINK) {
86173e4f9e6SKui-Feng Lee 		err = 0;
86268b04864SKui-Feng Lee 		/* Let bpf_link handle registration & unregistration.
86368b04864SKui-Feng Lee 		 *
86468b04864SKui-Feng Lee 		 * Pair with smp_load_acquire() during lookup_elem().
86568b04864SKui-Feng Lee 		 */
866612d087dSKui-Feng Lee 		smp_store_release(&kvalue->common.state, BPF_STRUCT_OPS_STATE_READY);
86768b04864SKui-Feng Lee 		goto unlock;
86868b04864SKui-Feng Lee 	}
86985d33df3SMartin KaFai Lau 
87073287fe2SKui-Feng Lee 	err = st_ops->reg(kdata, NULL);
87185d33df3SMartin KaFai Lau 	if (likely(!err)) {
87268b04864SKui-Feng Lee 		/* This refcnt increment on the map here after
87368b04864SKui-Feng Lee 		 * 'st_ops->reg()' is secure since the state of the
87468b04864SKui-Feng Lee 		 * map must be set to INIT at this moment, and thus
87568b04864SKui-Feng Lee 		 * bpf_struct_ops_map_delete_elem() can't unregister
87668b04864SKui-Feng Lee 		 * or transition it to TOBEFREE concurrently.
87768b04864SKui-Feng Lee 		 */
87868b04864SKui-Feng Lee 		bpf_map_inc(map);
87985d33df3SMartin KaFai Lau 		/* Pair with smp_load_acquire() during lookup_elem().
88085d33df3SMartin KaFai Lau 		 * It ensures the above udata updates (e.g. prog->aux->id)
88185d33df3SMartin KaFai Lau 		 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
88285d33df3SMartin KaFai Lau 		 */
883612d087dSKui-Feng Lee 		smp_store_release(&kvalue->common.state, BPF_STRUCT_OPS_STATE_INUSE);
88485d33df3SMartin KaFai Lau 		goto unlock;
88585d33df3SMartin KaFai Lau 	}
88685d33df3SMartin KaFai Lau 
8879f0265e9SJörn-Thorben Hinz 	/* Error during st_ops->reg(). Can happen if this struct_ops needs to be
8889f0265e9SJörn-Thorben Hinz 	 * verified as a whole, after all init_member() calls. Can also happen if
8899f0265e9SJörn-Thorben Hinz 	 * there was a race in registering the struct_ops (under the same name) to
89085d33df3SMartin KaFai Lau 	 * a sub-system through different struct_ops's maps.
89185d33df3SMartin KaFai Lau 	 */
89285d33df3SMartin KaFai Lau 
89385d33df3SMartin KaFai Lau reset_unlock:
8947c8ce4ffSXu Kuohai 	bpf_struct_ops_map_free_ksyms(st_map);
895187e2af0SKui-Feng Lee 	bpf_struct_ops_map_free_image(st_map);
89685d33df3SMartin KaFai Lau 	bpf_struct_ops_map_put_progs(st_map);
89785d33df3SMartin KaFai Lau 	memset(uvalue, 0, map->value_size);
89885d33df3SMartin KaFai Lau 	memset(kvalue, 0, map->value_size);
89985d33df3SMartin KaFai Lau unlock:
900f7e0beafSKui-Feng Lee 	kfree(tlinks);
90185d33df3SMartin KaFai Lau 	mutex_unlock(&st_map->lock);
9027c8ce4ffSXu Kuohai 	if (!err)
9037c8ce4ffSXu Kuohai 		bpf_struct_ops_map_add_ksyms(st_map);
90485d33df3SMartin KaFai Lau 	return err;
90585d33df3SMartin KaFai Lau }
90685d33df3SMartin KaFai Lau 
bpf_struct_ops_map_delete_elem(struct bpf_map * map,void * key)907d7ba4cc9SJP Kobryn static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
90885d33df3SMartin KaFai Lau {
90985d33df3SMartin KaFai Lau 	enum bpf_struct_ops_state prev_state;
91085d33df3SMartin KaFai Lau 	struct bpf_struct_ops_map *st_map;
91185d33df3SMartin KaFai Lau 
91285d33df3SMartin KaFai Lau 	st_map = (struct bpf_struct_ops_map *)map;
91368b04864SKui-Feng Lee 	if (st_map->map.map_flags & BPF_F_LINK)
91468b04864SKui-Feng Lee 		return -EOPNOTSUPP;
91568b04864SKui-Feng Lee 
916612d087dSKui-Feng Lee 	prev_state = cmpxchg(&st_map->kvalue.common.state,
91785d33df3SMartin KaFai Lau 			     BPF_STRUCT_OPS_STATE_INUSE,
91885d33df3SMartin KaFai Lau 			     BPF_STRUCT_OPS_STATE_TOBEFREE);
9198e5290e7SMartin KaFai Lau 	switch (prev_state) {
9208e5290e7SMartin KaFai Lau 	case BPF_STRUCT_OPS_STATE_INUSE:
92173287fe2SKui-Feng Lee 		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL);
92285d33df3SMartin KaFai Lau 		bpf_map_put(map);
92385d33df3SMartin KaFai Lau 		return 0;
9248e5290e7SMartin KaFai Lau 	case BPF_STRUCT_OPS_STATE_TOBEFREE:
9258e5290e7SMartin KaFai Lau 		return -EINPROGRESS;
9268e5290e7SMartin KaFai Lau 	case BPF_STRUCT_OPS_STATE_INIT:
9278e5290e7SMartin KaFai Lau 		return -ENOENT;
9288e5290e7SMartin KaFai Lau 	default:
9298e5290e7SMartin KaFai Lau 		WARN_ON_ONCE(1);
9308e5290e7SMartin KaFai Lau 		/* Should never happen.  Treat it as not found. */
9318e5290e7SMartin KaFai Lau 		return -ENOENT;
9328e5290e7SMartin KaFai Lau 	}
93385d33df3SMartin KaFai Lau }
93485d33df3SMartin KaFai Lau 
bpf_struct_ops_map_seq_show_elem(struct bpf_map * map,void * key,struct seq_file * m)93585d33df3SMartin KaFai Lau static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
93685d33df3SMartin KaFai Lau 					     struct seq_file *m)
93785d33df3SMartin KaFai Lau {
93847f4f657SKui-Feng Lee 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
93985d33df3SMartin KaFai Lau 	void *value;
9403b413041SMartin KaFai Lau 	int err;
94185d33df3SMartin KaFai Lau 
9423b413041SMartin KaFai Lau 	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
94385d33df3SMartin KaFai Lau 	if (!value)
94485d33df3SMartin KaFai Lau 		return;
94585d33df3SMartin KaFai Lau 
9463b413041SMartin KaFai Lau 	err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
9473b413041SMartin KaFai Lau 	if (!err) {
94847f4f657SKui-Feng Lee 		btf_type_seq_show(st_map->btf,
94947f4f657SKui-Feng Lee 				  map->btf_vmlinux_value_type_id,
95085d33df3SMartin KaFai Lau 				  value, m);
951df862de4SMarkus Elfring 		seq_putc(m, '\n');
95285d33df3SMartin KaFai Lau 	}
95385d33df3SMartin KaFai Lau 
9543b413041SMartin KaFai Lau 	kfree(value);
9553b413041SMartin KaFai Lau }
9563b413041SMartin KaFai Lau 
__bpf_struct_ops_map_free(struct bpf_map * map)957b671c206SKui-Feng Lee static void __bpf_struct_ops_map_free(struct bpf_map *map)
95885d33df3SMartin KaFai Lau {
95985d33df3SMartin KaFai Lau 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
96085d33df3SMartin KaFai Lau 
961f7e0beafSKui-Feng Lee 	if (st_map->links)
96285d33df3SMartin KaFai Lau 		bpf_struct_ops_map_put_progs(st_map);
9637c8ce4ffSXu Kuohai 	if (st_map->ksyms)
9647c8ce4ffSXu Kuohai 		bpf_struct_ops_map_free_ksyms(st_map);
965f7e0beafSKui-Feng Lee 	bpf_map_area_free(st_map->links);
9667c8ce4ffSXu Kuohai 	bpf_map_area_free(st_map->ksyms);
967187e2af0SKui-Feng Lee 	bpf_struct_ops_map_free_image(st_map);
96885d33df3SMartin KaFai Lau 	bpf_map_area_free(st_map->uvalue);
96985d33df3SMartin KaFai Lau 	bpf_map_area_free(st_map);
97085d33df3SMartin KaFai Lau }
97185d33df3SMartin KaFai Lau 
bpf_struct_ops_map_free(struct bpf_map * map)972b671c206SKui-Feng Lee static void bpf_struct_ops_map_free(struct bpf_map *map)
973b671c206SKui-Feng Lee {
974e3f87fdfSKui-Feng Lee 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
975e3f87fdfSKui-Feng Lee 
976e3f87fdfSKui-Feng Lee 	/* st_ops->owner was acquired during map_alloc to implicitly holds
977e3f87fdfSKui-Feng Lee 	 * the btf's refcnt. The acquire was only done when btf_is_module()
978e3f87fdfSKui-Feng Lee 	 * st_map->btf cannot be NULL here.
979e3f87fdfSKui-Feng Lee 	 */
980e3f87fdfSKui-Feng Lee 	if (btf_is_module(st_map->btf))
981e3f87fdfSKui-Feng Lee 		module_put(st_map->st_ops_desc->st_ops->owner);
982e3f87fdfSKui-Feng Lee 
9837c8ce4ffSXu Kuohai 	bpf_struct_ops_map_del_ksyms(st_map);
9847c8ce4ffSXu Kuohai 
985b671c206SKui-Feng Lee 	/* The struct_ops's function may switch to another struct_ops.
986b671c206SKui-Feng Lee 	 *
987b671c206SKui-Feng Lee 	 * For example, bpf_tcp_cc_x->init() may switch to
988b671c206SKui-Feng Lee 	 * another tcp_cc_y by calling
989b671c206SKui-Feng Lee 	 * setsockopt(TCP_CONGESTION, "tcp_cc_y").
990b671c206SKui-Feng Lee 	 * During the switch,  bpf_struct_ops_put(tcp_cc_x) is called
991b671c206SKui-Feng Lee 	 * and its refcount may reach 0 which then free its
992b671c206SKui-Feng Lee 	 * trampoline image while tcp_cc_x is still running.
993b671c206SKui-Feng Lee 	 *
994b671c206SKui-Feng Lee 	 * A vanilla rcu gp is to wait for all bpf-tcp-cc prog
995b671c206SKui-Feng Lee 	 * to finish. bpf-tcp-cc prog is non sleepable.
996b671c206SKui-Feng Lee 	 * A rcu_tasks gp is to wait for the last few insn
997b671c206SKui-Feng Lee 	 * in the tramopline image to finish before releasing
998b671c206SKui-Feng Lee 	 * the trampoline image.
999b671c206SKui-Feng Lee 	 */
1000b671c206SKui-Feng Lee 	synchronize_rcu_mult(call_rcu, call_rcu_tasks);
1001b671c206SKui-Feng Lee 
1002b671c206SKui-Feng Lee 	__bpf_struct_ops_map_free(map);
1003b671c206SKui-Feng Lee }
1004b671c206SKui-Feng Lee 
bpf_struct_ops_map_alloc_check(union bpf_attr * attr)100585d33df3SMartin KaFai Lau static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
100685d33df3SMartin KaFai Lau {
100785d33df3SMartin KaFai Lau 	if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
1008fcc2c1fbSKui-Feng Lee 	    (attr->map_flags & ~(BPF_F_LINK | BPF_F_VTYPE_BTF_OBJ_FD)) ||
1009fcc2c1fbSKui-Feng Lee 	    !attr->btf_vmlinux_value_type_id)
101085d33df3SMartin KaFai Lau 		return -EINVAL;
101185d33df3SMartin KaFai Lau 	return 0;
101285d33df3SMartin KaFai Lau }
101385d33df3SMartin KaFai Lau 
count_func_ptrs(const struct btf * btf,const struct btf_type * t)1014821a3fa3SXu Kuohai static u32 count_func_ptrs(const struct btf *btf, const struct btf_type *t)
1015821a3fa3SXu Kuohai {
1016821a3fa3SXu Kuohai 	int i;
1017821a3fa3SXu Kuohai 	u32 count;
1018821a3fa3SXu Kuohai 	const struct btf_member *member;
1019821a3fa3SXu Kuohai 
1020821a3fa3SXu Kuohai 	count = 0;
1021821a3fa3SXu Kuohai 	for_each_member(i, t, member)
1022821a3fa3SXu Kuohai 		if (btf_type_resolve_func_ptr(btf, member->type, NULL))
1023821a3fa3SXu Kuohai 			count++;
1024821a3fa3SXu Kuohai 	return count;
1025821a3fa3SXu Kuohai }
1026821a3fa3SXu Kuohai 
bpf_struct_ops_map_alloc(union bpf_attr * attr)102785d33df3SMartin KaFai Lau static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
102885d33df3SMartin KaFai Lau {
10294c5763edSKui-Feng Lee 	const struct bpf_struct_ops_desc *st_ops_desc;
1030f043733fSRoman Gushchin 	size_t st_map_size;
103185d33df3SMartin KaFai Lau 	struct bpf_struct_ops_map *st_map;
103285d33df3SMartin KaFai Lau 	const struct btf_type *t, *vt;
1033e3f87fdfSKui-Feng Lee 	struct module *mod = NULL;
103485d33df3SMartin KaFai Lau 	struct bpf_map *map;
1035fcc2c1fbSKui-Feng Lee 	struct btf *btf;
10365c04433dSSong Liu 	int ret;
103785d33df3SMartin KaFai Lau 
1038fcc2c1fbSKui-Feng Lee 	if (attr->map_flags & BPF_F_VTYPE_BTF_OBJ_FD) {
1039fcc2c1fbSKui-Feng Lee 		/* The map holds btf for its whole life time. */
1040fcc2c1fbSKui-Feng Lee 		btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
1041fcc2c1fbSKui-Feng Lee 		if (IS_ERR(btf))
1042fcc2c1fbSKui-Feng Lee 			return ERR_CAST(btf);
1043fcc2c1fbSKui-Feng Lee 		if (!btf_is_module(btf)) {
1044fcc2c1fbSKui-Feng Lee 			btf_put(btf);
1045fcc2c1fbSKui-Feng Lee 			return ERR_PTR(-EINVAL);
1046fcc2c1fbSKui-Feng Lee 		}
1047e3f87fdfSKui-Feng Lee 
1048e3f87fdfSKui-Feng Lee 		mod = btf_try_get_module(btf);
1049e3f87fdfSKui-Feng Lee 		/* mod holds a refcnt to btf. We don't need an extra refcnt
1050e3f87fdfSKui-Feng Lee 		 * here.
1051e3f87fdfSKui-Feng Lee 		 */
1052e3f87fdfSKui-Feng Lee 		btf_put(btf);
1053e3f87fdfSKui-Feng Lee 		if (!mod)
1054e3f87fdfSKui-Feng Lee 			return ERR_PTR(-EINVAL);
1055fcc2c1fbSKui-Feng Lee 	} else {
1056fcc2c1fbSKui-Feng Lee 		btf = bpf_get_btf_vmlinux();
1057fcc2c1fbSKui-Feng Lee 		if (IS_ERR(btf))
1058fcc2c1fbSKui-Feng Lee 			return ERR_CAST(btf);
1059e6be8cd5SKui-Feng Lee 		if (!btf)
1060e6be8cd5SKui-Feng Lee 			return ERR_PTR(-ENOTSUPP);
1061fcc2c1fbSKui-Feng Lee 	}
1062fcc2c1fbSKui-Feng Lee 
1063fcc2c1fbSKui-Feng Lee 	st_ops_desc = bpf_struct_ops_find_value(btf, attr->btf_vmlinux_value_type_id);
1064fcc2c1fbSKui-Feng Lee 	if (!st_ops_desc) {
1065fcc2c1fbSKui-Feng Lee 		ret = -ENOTSUPP;
1066fcc2c1fbSKui-Feng Lee 		goto errout;
1067fcc2c1fbSKui-Feng Lee 	}
106885d33df3SMartin KaFai Lau 
10694c5763edSKui-Feng Lee 	vt = st_ops_desc->value_type;
1070fcc2c1fbSKui-Feng Lee 	if (attr->value_size != vt->size) {
1071fcc2c1fbSKui-Feng Lee 		ret = -EINVAL;
1072fcc2c1fbSKui-Feng Lee 		goto errout;
1073fcc2c1fbSKui-Feng Lee 	}
107485d33df3SMartin KaFai Lau 
10754c5763edSKui-Feng Lee 	t = st_ops_desc->type;
107685d33df3SMartin KaFai Lau 
107785d33df3SMartin KaFai Lau 	st_map_size = sizeof(*st_map) +
107885d33df3SMartin KaFai Lau 		/* kvalue stores the
107985d33df3SMartin KaFai Lau 		 * struct bpf_struct_ops_tcp_congestions_ops
108085d33df3SMartin KaFai Lau 		 */
108185d33df3SMartin KaFai Lau 		(vt->size - sizeof(struct bpf_struct_ops_value));
108285d33df3SMartin KaFai Lau 
108385d33df3SMartin KaFai Lau 	st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
1084fcc2c1fbSKui-Feng Lee 	if (!st_map) {
1085fcc2c1fbSKui-Feng Lee 		ret = -ENOMEM;
1086fcc2c1fbSKui-Feng Lee 		goto errout;
1087fcc2c1fbSKui-Feng Lee 	}
1088f043733fSRoman Gushchin 
10894c5763edSKui-Feng Lee 	st_map->st_ops_desc = st_ops_desc;
109085d33df3SMartin KaFai Lau 	map = &st_map->map;
109185d33df3SMartin KaFai Lau 
109285d33df3SMartin KaFai Lau 	st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
1093821a3fa3SXu Kuohai 	st_map->funcs_cnt = count_func_ptrs(btf, t);
1094f7e0beafSKui-Feng Lee 	st_map->links =
1095821a3fa3SXu Kuohai 		bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_link *),
109685d33df3SMartin KaFai Lau 				   NUMA_NO_NODE);
10977c8ce4ffSXu Kuohai 
10987c8ce4ffSXu Kuohai 	st_map->ksyms =
10997c8ce4ffSXu Kuohai 		bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksym *),
11007c8ce4ffSXu Kuohai 				   NUMA_NO_NODE);
11017c8ce4ffSXu Kuohai 	if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
1102fcc2c1fbSKui-Feng Lee 		ret = -ENOMEM;
1103fcc2c1fbSKui-Feng Lee 		goto errout_free;
110485d33df3SMartin KaFai Lau 	}
1105fcc2c1fbSKui-Feng Lee 	st_map->btf = btf;
110647f4f657SKui-Feng Lee 
110785d33df3SMartin KaFai Lau 	mutex_init(&st_map->lock);
110885d33df3SMartin KaFai Lau 	bpf_map_init_from_attr(map, attr);
110985d33df3SMartin KaFai Lau 
111085d33df3SMartin KaFai Lau 	return map;
1111fcc2c1fbSKui-Feng Lee 
1112fcc2c1fbSKui-Feng Lee errout_free:
1113fcc2c1fbSKui-Feng Lee 	__bpf_struct_ops_map_free(map);
1114fcc2c1fbSKui-Feng Lee errout:
1115e3f87fdfSKui-Feng Lee 	module_put(mod);
1116fcc2c1fbSKui-Feng Lee 
1117fcc2c1fbSKui-Feng Lee 	return ERR_PTR(ret);
111885d33df3SMartin KaFai Lau }
111985d33df3SMartin KaFai Lau 
bpf_struct_ops_map_mem_usage(const struct bpf_map * map)1120f062226dSYafang Shao static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map)
1121f062226dSYafang Shao {
1122f062226dSYafang Shao 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
11234c5763edSKui-Feng Lee 	const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc;
11244c5763edSKui-Feng Lee 	const struct btf_type *vt = st_ops_desc->value_type;
1125f062226dSYafang Shao 	u64 usage;
1126f062226dSYafang Shao 
1127f062226dSYafang Shao 	usage = sizeof(*st_map) +
1128f062226dSYafang Shao 			vt->size - sizeof(struct bpf_struct_ops_value);
1129f062226dSYafang Shao 	usage += vt->size;
1130821a3fa3SXu Kuohai 	usage += st_map->funcs_cnt * sizeof(struct bpf_link *);
11317c8ce4ffSXu Kuohai 	usage += st_map->funcs_cnt * sizeof(struct bpf_ksym *);
1132f062226dSYafang Shao 	usage += PAGE_SIZE;
1133f062226dSYafang Shao 	return usage;
1134f062226dSYafang Shao }
1135f062226dSYafang Shao 
1136c317ab71SMenglong Dong BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map)
113785d33df3SMartin KaFai Lau const struct bpf_map_ops bpf_struct_ops_map_ops = {
113885d33df3SMartin KaFai Lau 	.map_alloc_check = bpf_struct_ops_map_alloc_check,
113985d33df3SMartin KaFai Lau 	.map_alloc = bpf_struct_ops_map_alloc,
114085d33df3SMartin KaFai Lau 	.map_free = bpf_struct_ops_map_free,
114185d33df3SMartin KaFai Lau 	.map_get_next_key = bpf_struct_ops_map_get_next_key,
114285d33df3SMartin KaFai Lau 	.map_lookup_elem = bpf_struct_ops_map_lookup_elem,
114385d33df3SMartin KaFai Lau 	.map_delete_elem = bpf_struct_ops_map_delete_elem,
114485d33df3SMartin KaFai Lau 	.map_update_elem = bpf_struct_ops_map_update_elem,
114585d33df3SMartin KaFai Lau 	.map_seq_show_elem = bpf_struct_ops_map_seq_show_elem,
1146f062226dSYafang Shao 	.map_mem_usage = bpf_struct_ops_map_mem_usage,
1147c317ab71SMenglong Dong 	.map_btf_id = &bpf_struct_ops_map_btf_ids[0],
114885d33df3SMartin KaFai Lau };
114985d33df3SMartin KaFai Lau 
115085d33df3SMartin KaFai Lau /* "const void *" because some subsystem is
115185d33df3SMartin KaFai Lau  * passing a const (e.g. const struct tcp_congestion_ops *)
115285d33df3SMartin KaFai Lau  */
bpf_struct_ops_get(const void * kdata)115385d33df3SMartin KaFai Lau bool bpf_struct_ops_get(const void *kdata)
115485d33df3SMartin KaFai Lau {
115585d33df3SMartin KaFai Lau 	struct bpf_struct_ops_value *kvalue;
1156b671c206SKui-Feng Lee 	struct bpf_struct_ops_map *st_map;
1157b671c206SKui-Feng Lee 	struct bpf_map *map;
115885d33df3SMartin KaFai Lau 
115985d33df3SMartin KaFai Lau 	kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
1160b671c206SKui-Feng Lee 	st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
116185d33df3SMartin KaFai Lau 
1162b671c206SKui-Feng Lee 	map = __bpf_map_inc_not_zero(&st_map->map, false);
1163b671c206SKui-Feng Lee 	return !IS_ERR(map);
1164eb18b49eSMartin KaFai Lau }
1165eb18b49eSMartin KaFai Lau 
bpf_struct_ops_put(const void * kdata)116685d33df3SMartin KaFai Lau void bpf_struct_ops_put(const void *kdata)
116785d33df3SMartin KaFai Lau {
116885d33df3SMartin KaFai Lau 	struct bpf_struct_ops_value *kvalue;
116985d33df3SMartin KaFai Lau 	struct bpf_struct_ops_map *st_map;
117085d33df3SMartin KaFai Lau 
1171b671c206SKui-Feng Lee 	kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
1172b671c206SKui-Feng Lee 	st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
1173b671c206SKui-Feng Lee 
1174b671c206SKui-Feng Lee 	bpf_map_put(&st_map->map);
117585d33df3SMartin KaFai Lau }
117668b04864SKui-Feng Lee 
bpf_struct_ops_valid_to_reg(struct bpf_map * map)117768b04864SKui-Feng Lee static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
117868b04864SKui-Feng Lee {
117968b04864SKui-Feng Lee 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
118068b04864SKui-Feng Lee 
118168b04864SKui-Feng Lee 	return map->map_type == BPF_MAP_TYPE_STRUCT_OPS &&
118268b04864SKui-Feng Lee 		map->map_flags & BPF_F_LINK &&
118368b04864SKui-Feng Lee 		/* Pair with smp_store_release() during map_update */
1184612d087dSKui-Feng Lee 		smp_load_acquire(&st_map->kvalue.common.state) == BPF_STRUCT_OPS_STATE_READY;
118568b04864SKui-Feng Lee }
118668b04864SKui-Feng Lee 
bpf_struct_ops_map_link_dealloc(struct bpf_link * link)118768b04864SKui-Feng Lee static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
118868b04864SKui-Feng Lee {
118968b04864SKui-Feng Lee 	struct bpf_struct_ops_link *st_link;
119068b04864SKui-Feng Lee 	struct bpf_struct_ops_map *st_map;
119168b04864SKui-Feng Lee 
119268b04864SKui-Feng Lee 	st_link = container_of(link, struct bpf_struct_ops_link, link);
119368b04864SKui-Feng Lee 	st_map = (struct bpf_struct_ops_map *)
119468b04864SKui-Feng Lee 		rcu_dereference_protected(st_link->map, true);
119568b04864SKui-Feng Lee 	if (st_map) {
119673287fe2SKui-Feng Lee 		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
119768b04864SKui-Feng Lee 		bpf_map_put(&st_map->map);
119868b04864SKui-Feng Lee 	}
119968b04864SKui-Feng Lee 	kfree(st_link);
120068b04864SKui-Feng Lee }
120168b04864SKui-Feng Lee 
bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link * link,struct seq_file * seq)120268b04864SKui-Feng Lee static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
120368b04864SKui-Feng Lee 					    struct seq_file *seq)
120468b04864SKui-Feng Lee {
120568b04864SKui-Feng Lee 	struct bpf_struct_ops_link *st_link;
120668b04864SKui-Feng Lee 	struct bpf_map *map;
120768b04864SKui-Feng Lee 
120868b04864SKui-Feng Lee 	st_link = container_of(link, struct bpf_struct_ops_link, link);
120968b04864SKui-Feng Lee 	rcu_read_lock();
121068b04864SKui-Feng Lee 	map = rcu_dereference(st_link->map);
12116fb2544eSKui-Feng Lee 	if (map)
121268b04864SKui-Feng Lee 		seq_printf(seq, "map_id:\t%d\n", map->id);
121368b04864SKui-Feng Lee 	rcu_read_unlock();
121468b04864SKui-Feng Lee }
121568b04864SKui-Feng Lee 
bpf_struct_ops_map_link_fill_link_info(const struct bpf_link * link,struct bpf_link_info * info)121668b04864SKui-Feng Lee static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
121768b04864SKui-Feng Lee 					       struct bpf_link_info *info)
121868b04864SKui-Feng Lee {
121968b04864SKui-Feng Lee 	struct bpf_struct_ops_link *st_link;
122068b04864SKui-Feng Lee 	struct bpf_map *map;
122168b04864SKui-Feng Lee 
122268b04864SKui-Feng Lee 	st_link = container_of(link, struct bpf_struct_ops_link, link);
122368b04864SKui-Feng Lee 	rcu_read_lock();
122468b04864SKui-Feng Lee 	map = rcu_dereference(st_link->map);
12256fb2544eSKui-Feng Lee 	if (map)
122668b04864SKui-Feng Lee 		info->struct_ops.map_id = map->id;
122768b04864SKui-Feng Lee 	rcu_read_unlock();
122868b04864SKui-Feng Lee 	return 0;
122968b04864SKui-Feng Lee }
123068b04864SKui-Feng Lee 
bpf_struct_ops_map_link_update(struct bpf_link * link,struct bpf_map * new_map,struct bpf_map * expected_old_map)1231aef56f2eSKui-Feng Lee static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map,
1232aef56f2eSKui-Feng Lee 					  struct bpf_map *expected_old_map)
1233aef56f2eSKui-Feng Lee {
1234aef56f2eSKui-Feng Lee 	struct bpf_struct_ops_map *st_map, *old_st_map;
1235aef56f2eSKui-Feng Lee 	struct bpf_map *old_map;
1236aef56f2eSKui-Feng Lee 	struct bpf_struct_ops_link *st_link;
12375964d1e4SLi kunyu 	int err;
1238aef56f2eSKui-Feng Lee 
1239aef56f2eSKui-Feng Lee 	st_link = container_of(link, struct bpf_struct_ops_link, link);
1240aef56f2eSKui-Feng Lee 	st_map = container_of(new_map, struct bpf_struct_ops_map, map);
1241aef56f2eSKui-Feng Lee 
1242aef56f2eSKui-Feng Lee 	if (!bpf_struct_ops_valid_to_reg(new_map))
1243aef56f2eSKui-Feng Lee 		return -EINVAL;
1244aef56f2eSKui-Feng Lee 
12454c5763edSKui-Feng Lee 	if (!st_map->st_ops_desc->st_ops->update)
12468ba651edSDavid Vernet 		return -EOPNOTSUPP;
12478ba651edSDavid Vernet 
1248aef56f2eSKui-Feng Lee 	mutex_lock(&update_mutex);
1249aef56f2eSKui-Feng Lee 
1250aef56f2eSKui-Feng Lee 	old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
12516fb2544eSKui-Feng Lee 	if (!old_map) {
12526fb2544eSKui-Feng Lee 		err = -ENOLINK;
12536fb2544eSKui-Feng Lee 		goto err_out;
12546fb2544eSKui-Feng Lee 	}
1255aef56f2eSKui-Feng Lee 	if (expected_old_map && old_map != expected_old_map) {
1256aef56f2eSKui-Feng Lee 		err = -EPERM;
1257aef56f2eSKui-Feng Lee 		goto err_out;
1258aef56f2eSKui-Feng Lee 	}
1259aef56f2eSKui-Feng Lee 
1260aef56f2eSKui-Feng Lee 	old_st_map = container_of(old_map, struct bpf_struct_ops_map, map);
1261aef56f2eSKui-Feng Lee 	/* The new and old struct_ops must be the same type. */
12624c5763edSKui-Feng Lee 	if (st_map->st_ops_desc != old_st_map->st_ops_desc) {
1263aef56f2eSKui-Feng Lee 		err = -EINVAL;
1264aef56f2eSKui-Feng Lee 		goto err_out;
1265aef56f2eSKui-Feng Lee 	}
1266aef56f2eSKui-Feng Lee 
126773287fe2SKui-Feng Lee 	err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link);
1268aef56f2eSKui-Feng Lee 	if (err)
1269aef56f2eSKui-Feng Lee 		goto err_out;
1270aef56f2eSKui-Feng Lee 
1271aef56f2eSKui-Feng Lee 	bpf_map_inc(new_map);
1272aef56f2eSKui-Feng Lee 	rcu_assign_pointer(st_link->map, new_map);
1273aef56f2eSKui-Feng Lee 	bpf_map_put(old_map);
1274aef56f2eSKui-Feng Lee 
1275aef56f2eSKui-Feng Lee err_out:
1276aef56f2eSKui-Feng Lee 	mutex_unlock(&update_mutex);
1277aef56f2eSKui-Feng Lee 
1278aef56f2eSKui-Feng Lee 	return err;
1279aef56f2eSKui-Feng Lee }
1280aef56f2eSKui-Feng Lee 
bpf_struct_ops_map_link_detach(struct bpf_link * link)12816fb2544eSKui-Feng Lee static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
12826fb2544eSKui-Feng Lee {
12836fb2544eSKui-Feng Lee 	struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
12846fb2544eSKui-Feng Lee 	struct bpf_struct_ops_map *st_map;
12856fb2544eSKui-Feng Lee 	struct bpf_map *map;
12866fb2544eSKui-Feng Lee 
12876fb2544eSKui-Feng Lee 	mutex_lock(&update_mutex);
12886fb2544eSKui-Feng Lee 
12896fb2544eSKui-Feng Lee 	map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
12906fb2544eSKui-Feng Lee 	if (!map) {
12916fb2544eSKui-Feng Lee 		mutex_unlock(&update_mutex);
12926fb2544eSKui-Feng Lee 		return 0;
12936fb2544eSKui-Feng Lee 	}
12946fb2544eSKui-Feng Lee 	st_map = container_of(map, struct bpf_struct_ops_map, map);
12956fb2544eSKui-Feng Lee 
12966fb2544eSKui-Feng Lee 	st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
12976fb2544eSKui-Feng Lee 
12986fb2544eSKui-Feng Lee 	RCU_INIT_POINTER(st_link->map, NULL);
12996fb2544eSKui-Feng Lee 	/* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
13006fb2544eSKui-Feng Lee 	 * bpf_map_inc() in bpf_struct_ops_map_link_update().
13016fb2544eSKui-Feng Lee 	 */
13026fb2544eSKui-Feng Lee 	bpf_map_put(&st_map->map);
13036fb2544eSKui-Feng Lee 
13046fb2544eSKui-Feng Lee 	mutex_unlock(&update_mutex);
13056fb2544eSKui-Feng Lee 
13061adddc97SKui-Feng Lee 	wake_up_interruptible_poll(&st_link->wait_hup, EPOLLHUP);
13071adddc97SKui-Feng Lee 
13086fb2544eSKui-Feng Lee 	return 0;
13096fb2544eSKui-Feng Lee }
13106fb2544eSKui-Feng Lee 
bpf_struct_ops_map_link_poll(struct file * file,struct poll_table_struct * pts)13111adddc97SKui-Feng Lee static __poll_t bpf_struct_ops_map_link_poll(struct file *file,
13121adddc97SKui-Feng Lee 					     struct poll_table_struct *pts)
13131adddc97SKui-Feng Lee {
13141adddc97SKui-Feng Lee 	struct bpf_struct_ops_link *st_link = file->private_data;
13151adddc97SKui-Feng Lee 
13161adddc97SKui-Feng Lee 	poll_wait(file, &st_link->wait_hup, pts);
13171adddc97SKui-Feng Lee 
13181adddc97SKui-Feng Lee 	return rcu_access_pointer(st_link->map) ? 0 : EPOLLHUP;
13191adddc97SKui-Feng Lee }
13201adddc97SKui-Feng Lee 
132168b04864SKui-Feng Lee static const struct bpf_link_ops bpf_struct_ops_map_lops = {
132268b04864SKui-Feng Lee 	.dealloc = bpf_struct_ops_map_link_dealloc,
13236fb2544eSKui-Feng Lee 	.detach = bpf_struct_ops_map_link_detach,
132468b04864SKui-Feng Lee 	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
132568b04864SKui-Feng Lee 	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
1326aef56f2eSKui-Feng Lee 	.update_map = bpf_struct_ops_map_link_update,
13271adddc97SKui-Feng Lee 	.poll = bpf_struct_ops_map_link_poll,
132868b04864SKui-Feng Lee };
132968b04864SKui-Feng Lee 
bpf_struct_ops_link_create(union bpf_attr * attr)133068b04864SKui-Feng Lee int bpf_struct_ops_link_create(union bpf_attr *attr)
133168b04864SKui-Feng Lee {
133268b04864SKui-Feng Lee 	struct bpf_struct_ops_link *link = NULL;
133368b04864SKui-Feng Lee 	struct bpf_link_primer link_primer;
133468b04864SKui-Feng Lee 	struct bpf_struct_ops_map *st_map;
133568b04864SKui-Feng Lee 	struct bpf_map *map;
133668b04864SKui-Feng Lee 	int err;
133768b04864SKui-Feng Lee 
133868b04864SKui-Feng Lee 	map = bpf_map_get(attr->link_create.map_fd);
133955fbae05SMartin KaFai Lau 	if (IS_ERR(map))
134055fbae05SMartin KaFai Lau 		return PTR_ERR(map);
134168b04864SKui-Feng Lee 
134268b04864SKui-Feng Lee 	st_map = (struct bpf_struct_ops_map *)map;
134368b04864SKui-Feng Lee 
134468b04864SKui-Feng Lee 	if (!bpf_struct_ops_valid_to_reg(map)) {
134568b04864SKui-Feng Lee 		err = -EINVAL;
134668b04864SKui-Feng Lee 		goto err_out;
134768b04864SKui-Feng Lee 	}
134868b04864SKui-Feng Lee 
134968b04864SKui-Feng Lee 	link = kzalloc(sizeof(*link), GFP_USER);
135068b04864SKui-Feng Lee 	if (!link) {
135168b04864SKui-Feng Lee 		err = -ENOMEM;
135268b04864SKui-Feng Lee 		goto err_out;
135368b04864SKui-Feng Lee 	}
135468b04864SKui-Feng Lee 	bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
135568b04864SKui-Feng Lee 
135668b04864SKui-Feng Lee 	err = bpf_link_prime(&link->link, &link_primer);
135768b04864SKui-Feng Lee 	if (err)
135868b04864SKui-Feng Lee 		goto err_out;
135968b04864SKui-Feng Lee 
13601adddc97SKui-Feng Lee 	init_waitqueue_head(&link->wait_hup);
13611adddc97SKui-Feng Lee 
13626fb2544eSKui-Feng Lee 	/* Hold the update_mutex such that the subsystem cannot
13636fb2544eSKui-Feng Lee 	 * do link->ops->detach() before the link is fully initialized.
13646fb2544eSKui-Feng Lee 	 */
13656fb2544eSKui-Feng Lee 	mutex_lock(&update_mutex);
136673287fe2SKui-Feng Lee 	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
136768b04864SKui-Feng Lee 	if (err) {
13686fb2544eSKui-Feng Lee 		mutex_unlock(&update_mutex);
136968b04864SKui-Feng Lee 		bpf_link_cleanup(&link_primer);
137068b04864SKui-Feng Lee 		link = NULL;
137168b04864SKui-Feng Lee 		goto err_out;
137268b04864SKui-Feng Lee 	}
137368b04864SKui-Feng Lee 	RCU_INIT_POINTER(link->map, map);
13746fb2544eSKui-Feng Lee 	mutex_unlock(&update_mutex);
137568b04864SKui-Feng Lee 
137668b04864SKui-Feng Lee 	return bpf_link_settle(&link_primer);
137768b04864SKui-Feng Lee 
137868b04864SKui-Feng Lee err_out:
137968b04864SKui-Feng Lee 	bpf_map_put(map);
138068b04864SKui-Feng Lee 	kfree(link);
138168b04864SKui-Feng Lee 	return err;
138268b04864SKui-Feng Lee }
13831338b933SKui-Feng Lee 
bpf_map_struct_ops_info_fill(struct bpf_map_info * info,struct bpf_map * map)13841338b933SKui-Feng Lee void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
13851338b933SKui-Feng Lee {
13861338b933SKui-Feng Lee 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
13871338b933SKui-Feng Lee 
13881338b933SKui-Feng Lee 	info->btf_vmlinux_id = btf_obj_id(st_map->btf);
13891338b933SKui-Feng Lee }
1390