1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _BPF_CGROUP_H 3 #define _BPF_CGROUP_H 4 5 #include <linux/bpf.h> 6 #include <linux/errno.h> 7 #include <linux/jump_label.h> 8 #include <linux/percpu.h> 9 #include <linux/percpu-refcount.h> 10 #include <linux/rbtree.h> 11 #include <uapi/linux/bpf.h> 12 13 struct sock; 14 struct sockaddr; 15 struct cgroup; 16 struct sk_buff; 17 struct bpf_map; 18 struct bpf_prog; 19 struct bpf_sock_ops_kern; 20 struct bpf_cgroup_storage; 21 struct ctl_table; 22 struct ctl_table_header; 23 struct task_struct; 24 25 #ifdef CONFIG_CGROUP_BPF 26 27 extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; 28 #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) 29 30 #define for_each_cgroup_storage_type(stype) \ 31 for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) 32 33 struct bpf_cgroup_storage_map; 34 35 struct bpf_storage_buffer { 36 struct rcu_head rcu; 37 char data[]; 38 }; 39 40 struct bpf_cgroup_storage { 41 union { 42 struct bpf_storage_buffer *buf; 43 void __percpu *percpu_buf; 44 }; 45 struct bpf_cgroup_storage_map *map; 46 struct bpf_cgroup_storage_key key; 47 struct list_head list_map; 48 struct list_head list_cg; 49 struct rb_node node; 50 struct rcu_head rcu; 51 }; 52 53 struct bpf_cgroup_link { 54 struct bpf_link link; 55 struct cgroup *cgroup; 56 enum bpf_attach_type type; 57 }; 58 59 struct bpf_prog_list { 60 struct list_head node; 61 struct bpf_prog *prog; 62 struct bpf_cgroup_link *link; 63 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; 64 }; 65 66 struct bpf_prog_array; 67 68 struct cgroup_bpf { 69 /* array of effective progs in this cgroup */ 70 struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; 71 72 /* attached progs to this cgroup and attach flags 73 * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will 74 * have either zero or one element 75 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS 76 */ 77 struct list_head progs[MAX_BPF_ATTACH_TYPE]; 78 u32 flags[MAX_BPF_ATTACH_TYPE]; 79 80 /* list of cgroup shared storages */ 81 struct list_head storages; 82 83 /* temp storage for effective prog array used by prog_attach/detach */ 84 struct bpf_prog_array *inactive; 85 86 /* reference counter used to detach bpf programs after cgroup removal */ 87 struct percpu_ref refcnt; 88 89 /* cgroup_bpf is released using a work queue */ 90 struct work_struct release_work; 91 }; 92 93 int cgroup_bpf_inherit(struct cgroup *cgrp); 94 void cgroup_bpf_offline(struct cgroup *cgrp); 95 96 int __cgroup_bpf_attach(struct cgroup *cgrp, 97 struct bpf_prog *prog, struct bpf_prog *replace_prog, 98 struct bpf_cgroup_link *link, 99 enum bpf_attach_type type, u32 flags); 100 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 101 struct bpf_cgroup_link *link, 102 enum bpf_attach_type type); 103 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 104 union bpf_attr __user *uattr); 105 106 /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ 107 int cgroup_bpf_attach(struct cgroup *cgrp, 108 struct bpf_prog *prog, struct bpf_prog *replace_prog, 109 struct bpf_cgroup_link *link, enum bpf_attach_type type, 110 u32 flags); 111 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 112 enum bpf_attach_type type); 113 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 114 union bpf_attr __user *uattr); 115 116 int __cgroup_bpf_run_filter_skb(struct sock *sk, 117 struct sk_buff *skb, 118 enum bpf_attach_type type); 119 120 int __cgroup_bpf_run_filter_sk(struct sock *sk, 121 enum bpf_attach_type type); 122 123 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, 124 struct sockaddr *uaddr, 125 enum bpf_attach_type type, 126 void *t_ctx, 127 u32 *flags); 128 129 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, 130 struct bpf_sock_ops_kern *sock_ops, 131 enum bpf_attach_type type); 132 133 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, 134 short access, enum bpf_attach_type type); 135 136 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, 137 struct ctl_table *table, int write, 138 char **buf, size_t *pcount, loff_t *ppos, 139 enum bpf_attach_type type); 140 141 int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, 142 int *optname, char __user *optval, 143 int *optlen, char **kernel_optval); 144 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, 145 int optname, char __user *optval, 146 int __user *optlen, int max_optlen, 147 int retval); 148 149 int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, 150 int optname, void *optval, 151 int *optlen, int retval); 152 153 static inline enum bpf_cgroup_storage_type cgroup_storage_type( 154 struct bpf_map *map) 155 { 156 if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 157 return BPF_CGROUP_STORAGE_PERCPU; 158 159 return BPF_CGROUP_STORAGE_SHARED; 160 } 161 162 struct bpf_cgroup_storage * 163 cgroup_storage_lookup(struct bpf_cgroup_storage_map *map, 164 void *key, bool locked); 165 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, 166 enum bpf_cgroup_storage_type stype); 167 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); 168 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, 169 struct cgroup *cgroup, 170 enum bpf_attach_type type); 171 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); 172 int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map); 173 174 int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); 175 int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, 176 void *value, u64 flags); 177 178 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ 179 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ 180 ({ \ 181 int __ret = 0; \ 182 if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \ 183 __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ 184 BPF_CGROUP_INET_INGRESS); \ 185 \ 186 __ret; \ 187 }) 188 189 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ 190 ({ \ 191 int __ret = 0; \ 192 if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \ 193 typeof(sk) __sk = sk_to_full_sk(sk); \ 194 if (sk_fullsock(__sk)) \ 195 __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ 196 BPF_CGROUP_INET_EGRESS); \ 197 } \ 198 __ret; \ 199 }) 200 201 #define BPF_CGROUP_RUN_SK_PROG(sk, type) \ 202 ({ \ 203 int __ret = 0; \ 204 if (cgroup_bpf_enabled(type)) { \ 205 __ret = __cgroup_bpf_run_filter_sk(sk, type); \ 206 } \ 207 __ret; \ 208 }) 209 210 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ 211 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE) 212 213 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \ 214 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE) 215 216 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \ 217 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND) 218 219 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ 220 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND) 221 222 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \ 223 ({ \ 224 u32 __unused_flags; \ 225 int __ret = 0; \ 226 if (cgroup_bpf_enabled(type)) \ 227 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ 228 NULL, \ 229 &__unused_flags); \ 230 __ret; \ 231 }) 232 233 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \ 234 ({ \ 235 u32 __unused_flags; \ 236 int __ret = 0; \ 237 if (cgroup_bpf_enabled(type)) { \ 238 lock_sock(sk); \ 239 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ 240 t_ctx, \ 241 &__unused_flags); \ 242 release_sock(sk); \ 243 } \ 244 __ret; \ 245 }) 246 247 /* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags 248 * via upper bits of return code. The only flag that is supported 249 * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check 250 * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). 251 */ 252 #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \ 253 ({ \ 254 u32 __flags = 0; \ 255 int __ret = 0; \ 256 if (cgroup_bpf_enabled(type)) { \ 257 lock_sock(sk); \ 258 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ 259 NULL, &__flags); \ 260 release_sock(sk); \ 261 if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ 262 *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ 263 } \ 264 __ret; \ 265 }) 266 267 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \ 268 ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \ 269 cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \ 270 (sk)->sk_prot->pre_connect) 271 272 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ 273 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT) 274 275 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ 276 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT) 277 278 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ 279 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL) 280 281 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ 282 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL) 283 284 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ 285 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx) 286 287 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ 288 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx) 289 290 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ 291 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL) 292 293 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ 294 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL) 295 296 /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a 297 * fullsock and its parent fullsock cannot be traced by 298 * sk_to_full_sk(). 299 * 300 * e.g. sock_ops->sk is a request_sock and it is under syncookie mode. 301 * Its listener-sk is not attached to the rsk_listener. 302 * In this case, the caller holds the listener-sk (unlocked), 303 * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with 304 * the listener-sk such that the cgroup-bpf-progs of the 305 * listener-sk will be run. 306 * 307 * Regardless of syncookie mode or not, 308 * calling bpf_setsockopt on listener-sk will not make sense anyway, 309 * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here. 310 */ 311 #define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \ 312 ({ \ 313 int __ret = 0; \ 314 if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \ 315 __ret = __cgroup_bpf_run_filter_sock_ops(sk, \ 316 sock_ops, \ 317 BPF_CGROUP_SOCK_OPS); \ 318 __ret; \ 319 }) 320 321 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ 322 ({ \ 323 int __ret = 0; \ 324 if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \ 325 typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ 326 if (__sk && sk_fullsock(__sk)) \ 327 __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ 328 sock_ops, \ 329 BPF_CGROUP_SOCK_OPS); \ 330 } \ 331 __ret; \ 332 }) 333 334 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \ 335 ({ \ 336 int __ret = 0; \ 337 if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \ 338 __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \ 339 access, \ 340 BPF_CGROUP_DEVICE); \ 341 \ 342 __ret; \ 343 }) 344 345 346 #define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \ 347 ({ \ 348 int __ret = 0; \ 349 if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \ 350 __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ 351 buf, count, pos, \ 352 BPF_CGROUP_SYSCTL); \ 353 __ret; \ 354 }) 355 356 #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ 357 kernel_optval) \ 358 ({ \ 359 int __ret = 0; \ 360 if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \ 361 __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \ 362 optname, optval, \ 363 optlen, \ 364 kernel_optval); \ 365 __ret; \ 366 }) 367 368 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ 369 ({ \ 370 int __ret = 0; \ 371 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \ 372 get_user(__ret, optlen); \ 373 __ret; \ 374 }) 375 376 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \ 377 max_optlen, retval) \ 378 ({ \ 379 int __ret = retval; \ 380 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \ 381 if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ 382 !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ 383 tcp_bpf_bypass_getsockopt, \ 384 level, optname)) \ 385 __ret = __cgroup_bpf_run_filter_getsockopt( \ 386 sock, level, optname, optval, optlen, \ 387 max_optlen, retval); \ 388 __ret; \ 389 }) 390 391 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ 392 optlen, retval) \ 393 ({ \ 394 int __ret = retval; \ 395 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \ 396 __ret = __cgroup_bpf_run_filter_getsockopt_kern( \ 397 sock, level, optname, optval, optlen, retval); \ 398 __ret; \ 399 }) 400 401 int cgroup_bpf_prog_attach(const union bpf_attr *attr, 402 enum bpf_prog_type ptype, struct bpf_prog *prog); 403 int cgroup_bpf_prog_detach(const union bpf_attr *attr, 404 enum bpf_prog_type ptype); 405 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); 406 int cgroup_bpf_prog_query(const union bpf_attr *attr, 407 union bpf_attr __user *uattr); 408 #else 409 410 struct cgroup_bpf {}; 411 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } 412 static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} 413 414 static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, 415 enum bpf_prog_type ptype, 416 struct bpf_prog *prog) 417 { 418 return -EINVAL; 419 } 420 421 static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, 422 enum bpf_prog_type ptype) 423 { 424 return -EINVAL; 425 } 426 427 static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, 428 struct bpf_prog *prog) 429 { 430 return -EINVAL; 431 } 432 433 static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, 434 union bpf_attr __user *uattr) 435 { 436 return -EINVAL; 437 } 438 439 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, 440 struct bpf_map *map) { return 0; } 441 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( 442 struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; } 443 static inline void bpf_cgroup_storage_free( 444 struct bpf_cgroup_storage *storage) {} 445 static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, 446 void *value) { 447 return 0; 448 } 449 static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, 450 void *key, void *value, u64 flags) { 451 return 0; 452 } 453 454 #define cgroup_bpf_enabled(type) (0) 455 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; }) 456 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) 457 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) 458 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) 459 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) 460 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) 461 #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; }) 462 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) 463 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) 464 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) 465 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) 466 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) 467 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) 468 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) 469 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) 470 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) 471 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) 472 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) 473 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) 474 #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) 475 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) 476 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ 477 optlen, max_optlen, retval) ({ retval; }) 478 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ 479 optlen, retval) ({ retval; }) 480 #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ 481 kernel_optval) ({ 0; }) 482 483 #define for_each_cgroup_storage_type(stype) for (; false; ) 484 485 #endif /* CONFIG_CGROUP_BPF */ 486 487 #endif /* _BPF_CGROUP_H */ 488