1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 38 39 DEFINE_PER_CPU(int, bpf_prog_active); 40 static DEFINE_IDR(prog_idr); 41 static DEFINE_SPINLOCK(prog_idr_lock); 42 static DEFINE_IDR(map_idr); 43 static DEFINE_SPINLOCK(map_idr_lock); 44 45 int sysctl_unprivileged_bpf_disabled __read_mostly; 46 47 static const struct bpf_map_ops * const bpf_map_types[] = { 48 #define BPF_PROG_TYPE(_id, _ops) 49 #define BPF_MAP_TYPE(_id, _ops) \ 50 [_id] = &_ops, 51 #include <linux/bpf_types.h> 52 #undef BPF_PROG_TYPE 53 #undef BPF_MAP_TYPE 54 }; 55 56 /* 57 * If we're handed a bigger struct than we know of, ensure all the unknown bits 58 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 59 * we don't know about yet. 60 * 61 * There is a ToCToU between this function call and the following 62 * copy_from_user() call. However, this is not a concern since this function is 63 * meant to be a future-proofing of bits. 64 */ 65 static int check_uarg_tail_zero(void __user *uaddr, 66 size_t expected_size, 67 size_t actual_size) 68 { 69 unsigned char __user *addr; 70 unsigned char __user *end; 71 unsigned char val; 72 int err; 73 74 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 75 return -E2BIG; 76 77 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 78 return -EFAULT; 79 80 if (actual_size <= expected_size) 81 return 0; 82 83 addr = uaddr + expected_size; 84 end = uaddr + actual_size; 85 86 for (; addr < end; addr++) { 87 err = get_user(val, addr); 88 if (err) 89 return err; 90 if (val) 91 return -E2BIG; 92 } 93 94 return 0; 95 } 96 97 const struct bpf_map_ops bpf_map_offload_ops = { 98 .map_alloc = bpf_map_offload_map_alloc, 99 .map_free = bpf_map_offload_map_free, 100 }; 101 102 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 103 { 104 const struct bpf_map_ops *ops; 105 struct bpf_map *map; 106 int err; 107 108 if (attr->map_type >= ARRAY_SIZE(bpf_map_types)) 109 return ERR_PTR(-EINVAL); 110 ops = bpf_map_types[attr->map_type]; 111 if (!ops) 112 return ERR_PTR(-EINVAL); 113 114 if (ops->map_alloc_check) { 115 err = ops->map_alloc_check(attr); 116 if (err) 117 return ERR_PTR(err); 118 } 119 if (attr->map_ifindex) 120 ops = &bpf_map_offload_ops; 121 map = ops->map_alloc(attr); 122 if (IS_ERR(map)) 123 return map; 124 map->ops = ops; 125 map->map_type = attr->map_type; 126 return map; 127 } 128 129 void *bpf_map_area_alloc(size_t size, int numa_node) 130 { 131 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 132 * trigger under memory pressure as we really just want to 133 * fail instead. 134 */ 135 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 136 void *area; 137 138 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 139 area = kmalloc_node(size, GFP_USER | flags, numa_node); 140 if (area != NULL) 141 return area; 142 } 143 144 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 145 __builtin_return_address(0)); 146 } 147 148 void bpf_map_area_free(void *area) 149 { 150 kvfree(area); 151 } 152 153 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 154 { 155 map->map_type = attr->map_type; 156 map->key_size = attr->key_size; 157 map->value_size = attr->value_size; 158 map->max_entries = attr->max_entries; 159 map->map_flags = attr->map_flags; 160 map->numa_node = bpf_map_attr_numa_node(attr); 161 } 162 163 int bpf_map_precharge_memlock(u32 pages) 164 { 165 struct user_struct *user = get_current_user(); 166 unsigned long memlock_limit, cur; 167 168 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 169 cur = atomic_long_read(&user->locked_vm); 170 free_uid(user); 171 if (cur + pages > memlock_limit) 172 return -EPERM; 173 return 0; 174 } 175 176 static int bpf_map_charge_memlock(struct bpf_map *map) 177 { 178 struct user_struct *user = get_current_user(); 179 unsigned long memlock_limit; 180 181 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 182 183 atomic_long_add(map->pages, &user->locked_vm); 184 185 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 186 atomic_long_sub(map->pages, &user->locked_vm); 187 free_uid(user); 188 return -EPERM; 189 } 190 map->user = user; 191 return 0; 192 } 193 194 static void bpf_map_uncharge_memlock(struct bpf_map *map) 195 { 196 struct user_struct *user = map->user; 197 198 atomic_long_sub(map->pages, &user->locked_vm); 199 free_uid(user); 200 } 201 202 static int bpf_map_alloc_id(struct bpf_map *map) 203 { 204 int id; 205 206 spin_lock_bh(&map_idr_lock); 207 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 208 if (id > 0) 209 map->id = id; 210 spin_unlock_bh(&map_idr_lock); 211 212 if (WARN_ON_ONCE(!id)) 213 return -ENOSPC; 214 215 return id > 0 ? 0 : id; 216 } 217 218 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 219 { 220 unsigned long flags; 221 222 /* Offloaded maps are removed from the IDR store when their device 223 * disappears - even if someone holds an fd to them they are unusable, 224 * the memory is gone, all ops will fail; they are simply waiting for 225 * refcnt to drop to be freed. 226 */ 227 if (!map->id) 228 return; 229 230 if (do_idr_lock) 231 spin_lock_irqsave(&map_idr_lock, flags); 232 else 233 __acquire(&map_idr_lock); 234 235 idr_remove(&map_idr, map->id); 236 map->id = 0; 237 238 if (do_idr_lock) 239 spin_unlock_irqrestore(&map_idr_lock, flags); 240 else 241 __release(&map_idr_lock); 242 } 243 244 /* called from workqueue */ 245 static void bpf_map_free_deferred(struct work_struct *work) 246 { 247 struct bpf_map *map = container_of(work, struct bpf_map, work); 248 249 bpf_map_uncharge_memlock(map); 250 security_bpf_map_free(map); 251 /* implementation dependent freeing */ 252 map->ops->map_free(map); 253 } 254 255 static void bpf_map_put_uref(struct bpf_map *map) 256 { 257 if (atomic_dec_and_test(&map->usercnt)) { 258 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 259 bpf_fd_array_map_clear(map); 260 } 261 } 262 263 /* decrement map refcnt and schedule it for freeing via workqueue 264 * (unrelying map implementation ops->map_free() might sleep) 265 */ 266 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 267 { 268 if (atomic_dec_and_test(&map->refcnt)) { 269 /* bpf_map_free_id() must be called first */ 270 bpf_map_free_id(map, do_idr_lock); 271 INIT_WORK(&map->work, bpf_map_free_deferred); 272 schedule_work(&map->work); 273 } 274 } 275 276 void bpf_map_put(struct bpf_map *map) 277 { 278 __bpf_map_put(map, true); 279 } 280 281 void bpf_map_put_with_uref(struct bpf_map *map) 282 { 283 bpf_map_put_uref(map); 284 bpf_map_put(map); 285 } 286 287 static int bpf_map_release(struct inode *inode, struct file *filp) 288 { 289 struct bpf_map *map = filp->private_data; 290 291 if (map->ops->map_release) 292 map->ops->map_release(map, filp); 293 294 bpf_map_put_with_uref(map); 295 return 0; 296 } 297 298 #ifdef CONFIG_PROC_FS 299 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 300 { 301 const struct bpf_map *map = filp->private_data; 302 const struct bpf_array *array; 303 u32 owner_prog_type = 0; 304 u32 owner_jited = 0; 305 306 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 307 array = container_of(map, struct bpf_array, map); 308 owner_prog_type = array->owner_prog_type; 309 owner_jited = array->owner_jited; 310 } 311 312 seq_printf(m, 313 "map_type:\t%u\n" 314 "key_size:\t%u\n" 315 "value_size:\t%u\n" 316 "max_entries:\t%u\n" 317 "map_flags:\t%#x\n" 318 "memlock:\t%llu\n", 319 map->map_type, 320 map->key_size, 321 map->value_size, 322 map->max_entries, 323 map->map_flags, 324 map->pages * 1ULL << PAGE_SHIFT); 325 326 if (owner_prog_type) { 327 seq_printf(m, "owner_prog_type:\t%u\n", 328 owner_prog_type); 329 seq_printf(m, "owner_jited:\t%u\n", 330 owner_jited); 331 } 332 } 333 #endif 334 335 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 336 loff_t *ppos) 337 { 338 /* We need this handler such that alloc_file() enables 339 * f_mode with FMODE_CAN_READ. 340 */ 341 return -EINVAL; 342 } 343 344 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 345 size_t siz, loff_t *ppos) 346 { 347 /* We need this handler such that alloc_file() enables 348 * f_mode with FMODE_CAN_WRITE. 349 */ 350 return -EINVAL; 351 } 352 353 const struct file_operations bpf_map_fops = { 354 #ifdef CONFIG_PROC_FS 355 .show_fdinfo = bpf_map_show_fdinfo, 356 #endif 357 .release = bpf_map_release, 358 .read = bpf_dummy_read, 359 .write = bpf_dummy_write, 360 }; 361 362 int bpf_map_new_fd(struct bpf_map *map, int flags) 363 { 364 int ret; 365 366 ret = security_bpf_map(map, OPEN_FMODE(flags)); 367 if (ret < 0) 368 return ret; 369 370 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 371 flags | O_CLOEXEC); 372 } 373 374 int bpf_get_file_flag(int flags) 375 { 376 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 377 return -EINVAL; 378 if (flags & BPF_F_RDONLY) 379 return O_RDONLY; 380 if (flags & BPF_F_WRONLY) 381 return O_WRONLY; 382 return O_RDWR; 383 } 384 385 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 386 #define CHECK_ATTR(CMD) \ 387 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 388 sizeof(attr->CMD##_LAST_FIELD), 0, \ 389 sizeof(*attr) - \ 390 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 391 sizeof(attr->CMD##_LAST_FIELD)) != NULL 392 393 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 394 * Return 0 on success and < 0 on error. 395 */ 396 static int bpf_obj_name_cpy(char *dst, const char *src) 397 { 398 const char *end = src + BPF_OBJ_NAME_LEN; 399 400 memset(dst, 0, BPF_OBJ_NAME_LEN); 401 402 /* Copy all isalnum() and '_' char */ 403 while (src < end && *src) { 404 if (!isalnum(*src) && *src != '_') 405 return -EINVAL; 406 *dst++ = *src++; 407 } 408 409 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 410 if (src == end) 411 return -EINVAL; 412 413 return 0; 414 } 415 416 #define BPF_MAP_CREATE_LAST_FIELD map_ifindex 417 /* called via syscall */ 418 static int map_create(union bpf_attr *attr) 419 { 420 int numa_node = bpf_map_attr_numa_node(attr); 421 struct bpf_map *map; 422 int f_flags; 423 int err; 424 425 err = CHECK_ATTR(BPF_MAP_CREATE); 426 if (err) 427 return -EINVAL; 428 429 f_flags = bpf_get_file_flag(attr->map_flags); 430 if (f_flags < 0) 431 return f_flags; 432 433 if (numa_node != NUMA_NO_NODE && 434 ((unsigned int)numa_node >= nr_node_ids || 435 !node_online(numa_node))) 436 return -EINVAL; 437 438 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 439 map = find_and_alloc_map(attr); 440 if (IS_ERR(map)) 441 return PTR_ERR(map); 442 443 err = bpf_obj_name_cpy(map->name, attr->map_name); 444 if (err) 445 goto free_map_nouncharge; 446 447 atomic_set(&map->refcnt, 1); 448 atomic_set(&map->usercnt, 1); 449 450 err = security_bpf_map_alloc(map); 451 if (err) 452 goto free_map_nouncharge; 453 454 err = bpf_map_charge_memlock(map); 455 if (err) 456 goto free_map_sec; 457 458 err = bpf_map_alloc_id(map); 459 if (err) 460 goto free_map; 461 462 err = bpf_map_new_fd(map, f_flags); 463 if (err < 0) { 464 /* failed to allocate fd. 465 * bpf_map_put() is needed because the above 466 * bpf_map_alloc_id() has published the map 467 * to the userspace and the userspace may 468 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 469 */ 470 bpf_map_put(map); 471 return err; 472 } 473 474 trace_bpf_map_create(map, err); 475 return err; 476 477 free_map: 478 bpf_map_uncharge_memlock(map); 479 free_map_sec: 480 security_bpf_map_free(map); 481 free_map_nouncharge: 482 map->ops->map_free(map); 483 return err; 484 } 485 486 /* if error is returned, fd is released. 487 * On success caller should complete fd access with matching fdput() 488 */ 489 struct bpf_map *__bpf_map_get(struct fd f) 490 { 491 if (!f.file) 492 return ERR_PTR(-EBADF); 493 if (f.file->f_op != &bpf_map_fops) { 494 fdput(f); 495 return ERR_PTR(-EINVAL); 496 } 497 498 return f.file->private_data; 499 } 500 501 /* prog's and map's refcnt limit */ 502 #define BPF_MAX_REFCNT 32768 503 504 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 505 { 506 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 507 atomic_dec(&map->refcnt); 508 return ERR_PTR(-EBUSY); 509 } 510 if (uref) 511 atomic_inc(&map->usercnt); 512 return map; 513 } 514 515 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 516 { 517 struct fd f = fdget(ufd); 518 struct bpf_map *map; 519 520 map = __bpf_map_get(f); 521 if (IS_ERR(map)) 522 return map; 523 524 map = bpf_map_inc(map, true); 525 fdput(f); 526 527 return map; 528 } 529 530 /* map_idr_lock should have been held */ 531 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 532 bool uref) 533 { 534 int refold; 535 536 refold = __atomic_add_unless(&map->refcnt, 1, 0); 537 538 if (refold >= BPF_MAX_REFCNT) { 539 __bpf_map_put(map, false); 540 return ERR_PTR(-EBUSY); 541 } 542 543 if (!refold) 544 return ERR_PTR(-ENOENT); 545 546 if (uref) 547 atomic_inc(&map->usercnt); 548 549 return map; 550 } 551 552 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 553 { 554 return -ENOTSUPP; 555 } 556 557 /* last field in 'union bpf_attr' used by this command */ 558 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 559 560 static int map_lookup_elem(union bpf_attr *attr) 561 { 562 void __user *ukey = u64_to_user_ptr(attr->key); 563 void __user *uvalue = u64_to_user_ptr(attr->value); 564 int ufd = attr->map_fd; 565 struct bpf_map *map; 566 void *key, *value, *ptr; 567 u32 value_size; 568 struct fd f; 569 int err; 570 571 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 572 return -EINVAL; 573 574 f = fdget(ufd); 575 map = __bpf_map_get(f); 576 if (IS_ERR(map)) 577 return PTR_ERR(map); 578 579 if (!(f.file->f_mode & FMODE_CAN_READ)) { 580 err = -EPERM; 581 goto err_put; 582 } 583 584 key = memdup_user(ukey, map->key_size); 585 if (IS_ERR(key)) { 586 err = PTR_ERR(key); 587 goto err_put; 588 } 589 590 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 591 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 592 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 593 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 594 else if (IS_FD_MAP(map)) 595 value_size = sizeof(u32); 596 else 597 value_size = map->value_size; 598 599 err = -ENOMEM; 600 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 601 if (!value) 602 goto free_key; 603 604 if (bpf_map_is_dev_bound(map)) { 605 err = bpf_map_offload_lookup_elem(map, key, value); 606 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 607 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 608 err = bpf_percpu_hash_copy(map, key, value); 609 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 610 err = bpf_percpu_array_copy(map, key, value); 611 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 612 err = bpf_stackmap_copy(map, key, value); 613 } else if (IS_FD_ARRAY(map)) { 614 err = bpf_fd_array_map_lookup_elem(map, key, value); 615 } else if (IS_FD_HASH(map)) { 616 err = bpf_fd_htab_map_lookup_elem(map, key, value); 617 } else { 618 rcu_read_lock(); 619 ptr = map->ops->map_lookup_elem(map, key); 620 if (ptr) 621 memcpy(value, ptr, value_size); 622 rcu_read_unlock(); 623 err = ptr ? 0 : -ENOENT; 624 } 625 626 if (err) 627 goto free_value; 628 629 err = -EFAULT; 630 if (copy_to_user(uvalue, value, value_size) != 0) 631 goto free_value; 632 633 trace_bpf_map_lookup_elem(map, ufd, key, value); 634 err = 0; 635 636 free_value: 637 kfree(value); 638 free_key: 639 kfree(key); 640 err_put: 641 fdput(f); 642 return err; 643 } 644 645 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 646 647 static int map_update_elem(union bpf_attr *attr) 648 { 649 void __user *ukey = u64_to_user_ptr(attr->key); 650 void __user *uvalue = u64_to_user_ptr(attr->value); 651 int ufd = attr->map_fd; 652 struct bpf_map *map; 653 void *key, *value; 654 u32 value_size; 655 struct fd f; 656 int err; 657 658 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 659 return -EINVAL; 660 661 f = fdget(ufd); 662 map = __bpf_map_get(f); 663 if (IS_ERR(map)) 664 return PTR_ERR(map); 665 666 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 667 err = -EPERM; 668 goto err_put; 669 } 670 671 key = memdup_user(ukey, map->key_size); 672 if (IS_ERR(key)) { 673 err = PTR_ERR(key); 674 goto err_put; 675 } 676 677 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 678 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 679 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 680 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 681 else 682 value_size = map->value_size; 683 684 err = -ENOMEM; 685 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 686 if (!value) 687 goto free_key; 688 689 err = -EFAULT; 690 if (copy_from_user(value, uvalue, value_size) != 0) 691 goto free_value; 692 693 /* Need to create a kthread, thus must support schedule */ 694 if (bpf_map_is_dev_bound(map)) { 695 err = bpf_map_offload_update_elem(map, key, value, attr->flags); 696 goto out; 697 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 698 err = map->ops->map_update_elem(map, key, value, attr->flags); 699 goto out; 700 } 701 702 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 703 * inside bpf map update or delete otherwise deadlocks are possible 704 */ 705 preempt_disable(); 706 __this_cpu_inc(bpf_prog_active); 707 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 708 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 709 err = bpf_percpu_hash_update(map, key, value, attr->flags); 710 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 711 err = bpf_percpu_array_update(map, key, value, attr->flags); 712 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 713 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 714 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 715 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 716 rcu_read_lock(); 717 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 718 attr->flags); 719 rcu_read_unlock(); 720 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 721 rcu_read_lock(); 722 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 723 attr->flags); 724 rcu_read_unlock(); 725 } else { 726 rcu_read_lock(); 727 err = map->ops->map_update_elem(map, key, value, attr->flags); 728 rcu_read_unlock(); 729 } 730 __this_cpu_dec(bpf_prog_active); 731 preempt_enable(); 732 out: 733 if (!err) 734 trace_bpf_map_update_elem(map, ufd, key, value); 735 free_value: 736 kfree(value); 737 free_key: 738 kfree(key); 739 err_put: 740 fdput(f); 741 return err; 742 } 743 744 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 745 746 static int map_delete_elem(union bpf_attr *attr) 747 { 748 void __user *ukey = u64_to_user_ptr(attr->key); 749 int ufd = attr->map_fd; 750 struct bpf_map *map; 751 struct fd f; 752 void *key; 753 int err; 754 755 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 756 return -EINVAL; 757 758 f = fdget(ufd); 759 map = __bpf_map_get(f); 760 if (IS_ERR(map)) 761 return PTR_ERR(map); 762 763 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 764 err = -EPERM; 765 goto err_put; 766 } 767 768 key = memdup_user(ukey, map->key_size); 769 if (IS_ERR(key)) { 770 err = PTR_ERR(key); 771 goto err_put; 772 } 773 774 if (bpf_map_is_dev_bound(map)) { 775 err = bpf_map_offload_delete_elem(map, key); 776 goto out; 777 } 778 779 preempt_disable(); 780 __this_cpu_inc(bpf_prog_active); 781 rcu_read_lock(); 782 err = map->ops->map_delete_elem(map, key); 783 rcu_read_unlock(); 784 __this_cpu_dec(bpf_prog_active); 785 preempt_enable(); 786 out: 787 if (!err) 788 trace_bpf_map_delete_elem(map, ufd, key); 789 kfree(key); 790 err_put: 791 fdput(f); 792 return err; 793 } 794 795 /* last field in 'union bpf_attr' used by this command */ 796 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 797 798 static int map_get_next_key(union bpf_attr *attr) 799 { 800 void __user *ukey = u64_to_user_ptr(attr->key); 801 void __user *unext_key = u64_to_user_ptr(attr->next_key); 802 int ufd = attr->map_fd; 803 struct bpf_map *map; 804 void *key, *next_key; 805 struct fd f; 806 int err; 807 808 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 809 return -EINVAL; 810 811 f = fdget(ufd); 812 map = __bpf_map_get(f); 813 if (IS_ERR(map)) 814 return PTR_ERR(map); 815 816 if (!(f.file->f_mode & FMODE_CAN_READ)) { 817 err = -EPERM; 818 goto err_put; 819 } 820 821 if (ukey) { 822 key = memdup_user(ukey, map->key_size); 823 if (IS_ERR(key)) { 824 err = PTR_ERR(key); 825 goto err_put; 826 } 827 } else { 828 key = NULL; 829 } 830 831 err = -ENOMEM; 832 next_key = kmalloc(map->key_size, GFP_USER); 833 if (!next_key) 834 goto free_key; 835 836 if (bpf_map_is_dev_bound(map)) { 837 err = bpf_map_offload_get_next_key(map, key, next_key); 838 goto out; 839 } 840 841 rcu_read_lock(); 842 err = map->ops->map_get_next_key(map, key, next_key); 843 rcu_read_unlock(); 844 out: 845 if (err) 846 goto free_next_key; 847 848 err = -EFAULT; 849 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 850 goto free_next_key; 851 852 trace_bpf_map_next_key(map, ufd, key, next_key); 853 err = 0; 854 855 free_next_key: 856 kfree(next_key); 857 free_key: 858 kfree(key); 859 err_put: 860 fdput(f); 861 return err; 862 } 863 864 static const struct bpf_prog_ops * const bpf_prog_types[] = { 865 #define BPF_PROG_TYPE(_id, _name) \ 866 [_id] = & _name ## _prog_ops, 867 #define BPF_MAP_TYPE(_id, _ops) 868 #include <linux/bpf_types.h> 869 #undef BPF_PROG_TYPE 870 #undef BPF_MAP_TYPE 871 }; 872 873 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 874 { 875 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 876 return -EINVAL; 877 878 if (!bpf_prog_is_dev_bound(prog->aux)) 879 prog->aux->ops = bpf_prog_types[type]; 880 else 881 prog->aux->ops = &bpf_offload_prog_ops; 882 prog->type = type; 883 return 0; 884 } 885 886 /* drop refcnt on maps used by eBPF program and free auxilary data */ 887 static void free_used_maps(struct bpf_prog_aux *aux) 888 { 889 int i; 890 891 for (i = 0; i < aux->used_map_cnt; i++) 892 bpf_map_put(aux->used_maps[i]); 893 894 kfree(aux->used_maps); 895 } 896 897 int __bpf_prog_charge(struct user_struct *user, u32 pages) 898 { 899 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 900 unsigned long user_bufs; 901 902 if (user) { 903 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 904 if (user_bufs > memlock_limit) { 905 atomic_long_sub(pages, &user->locked_vm); 906 return -EPERM; 907 } 908 } 909 910 return 0; 911 } 912 913 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 914 { 915 if (user) 916 atomic_long_sub(pages, &user->locked_vm); 917 } 918 919 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 920 { 921 struct user_struct *user = get_current_user(); 922 int ret; 923 924 ret = __bpf_prog_charge(user, prog->pages); 925 if (ret) { 926 free_uid(user); 927 return ret; 928 } 929 930 prog->aux->user = user; 931 return 0; 932 } 933 934 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 935 { 936 struct user_struct *user = prog->aux->user; 937 938 __bpf_prog_uncharge(user, prog->pages); 939 free_uid(user); 940 } 941 942 static int bpf_prog_alloc_id(struct bpf_prog *prog) 943 { 944 int id; 945 946 spin_lock_bh(&prog_idr_lock); 947 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 948 if (id > 0) 949 prog->aux->id = id; 950 spin_unlock_bh(&prog_idr_lock); 951 952 /* id is in [1, INT_MAX) */ 953 if (WARN_ON_ONCE(!id)) 954 return -ENOSPC; 955 956 return id > 0 ? 0 : id; 957 } 958 959 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 960 { 961 /* cBPF to eBPF migrations are currently not in the idr store. 962 * Offloaded programs are removed from the store when their device 963 * disappears - even if someone grabs an fd to them they are unusable, 964 * simply waiting for refcnt to drop to be freed. 965 */ 966 if (!prog->aux->id) 967 return; 968 969 if (do_idr_lock) 970 spin_lock_bh(&prog_idr_lock); 971 else 972 __acquire(&prog_idr_lock); 973 974 idr_remove(&prog_idr, prog->aux->id); 975 prog->aux->id = 0; 976 977 if (do_idr_lock) 978 spin_unlock_bh(&prog_idr_lock); 979 else 980 __release(&prog_idr_lock); 981 } 982 983 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 984 { 985 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 986 987 free_used_maps(aux); 988 bpf_prog_uncharge_memlock(aux->prog); 989 security_bpf_prog_free(aux); 990 bpf_prog_free(aux->prog); 991 } 992 993 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 994 { 995 if (atomic_dec_and_test(&prog->aux->refcnt)) { 996 int i; 997 998 trace_bpf_prog_put_rcu(prog); 999 /* bpf_prog_free_id() must be called first */ 1000 bpf_prog_free_id(prog, do_idr_lock); 1001 1002 for (i = 0; i < prog->aux->func_cnt; i++) 1003 bpf_prog_kallsyms_del(prog->aux->func[i]); 1004 bpf_prog_kallsyms_del(prog); 1005 1006 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 1007 } 1008 } 1009 1010 void bpf_prog_put(struct bpf_prog *prog) 1011 { 1012 __bpf_prog_put(prog, true); 1013 } 1014 EXPORT_SYMBOL_GPL(bpf_prog_put); 1015 1016 static int bpf_prog_release(struct inode *inode, struct file *filp) 1017 { 1018 struct bpf_prog *prog = filp->private_data; 1019 1020 bpf_prog_put(prog); 1021 return 0; 1022 } 1023 1024 #ifdef CONFIG_PROC_FS 1025 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 1026 { 1027 const struct bpf_prog *prog = filp->private_data; 1028 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 1029 1030 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 1031 seq_printf(m, 1032 "prog_type:\t%u\n" 1033 "prog_jited:\t%u\n" 1034 "prog_tag:\t%s\n" 1035 "memlock:\t%llu\n", 1036 prog->type, 1037 prog->jited, 1038 prog_tag, 1039 prog->pages * 1ULL << PAGE_SHIFT); 1040 } 1041 #endif 1042 1043 const struct file_operations bpf_prog_fops = { 1044 #ifdef CONFIG_PROC_FS 1045 .show_fdinfo = bpf_prog_show_fdinfo, 1046 #endif 1047 .release = bpf_prog_release, 1048 .read = bpf_dummy_read, 1049 .write = bpf_dummy_write, 1050 }; 1051 1052 int bpf_prog_new_fd(struct bpf_prog *prog) 1053 { 1054 int ret; 1055 1056 ret = security_bpf_prog(prog); 1057 if (ret < 0) 1058 return ret; 1059 1060 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 1061 O_RDWR | O_CLOEXEC); 1062 } 1063 1064 static struct bpf_prog *____bpf_prog_get(struct fd f) 1065 { 1066 if (!f.file) 1067 return ERR_PTR(-EBADF); 1068 if (f.file->f_op != &bpf_prog_fops) { 1069 fdput(f); 1070 return ERR_PTR(-EINVAL); 1071 } 1072 1073 return f.file->private_data; 1074 } 1075 1076 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 1077 { 1078 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 1079 atomic_sub(i, &prog->aux->refcnt); 1080 return ERR_PTR(-EBUSY); 1081 } 1082 return prog; 1083 } 1084 EXPORT_SYMBOL_GPL(bpf_prog_add); 1085 1086 void bpf_prog_sub(struct bpf_prog *prog, int i) 1087 { 1088 /* Only to be used for undoing previous bpf_prog_add() in some 1089 * error path. We still know that another entity in our call 1090 * path holds a reference to the program, thus atomic_sub() can 1091 * be safely used in such cases! 1092 */ 1093 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 1094 } 1095 EXPORT_SYMBOL_GPL(bpf_prog_sub); 1096 1097 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 1098 { 1099 return bpf_prog_add(prog, 1); 1100 } 1101 EXPORT_SYMBOL_GPL(bpf_prog_inc); 1102 1103 /* prog_idr_lock should have been held */ 1104 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 1105 { 1106 int refold; 1107 1108 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 1109 1110 if (refold >= BPF_MAX_REFCNT) { 1111 __bpf_prog_put(prog, false); 1112 return ERR_PTR(-EBUSY); 1113 } 1114 1115 if (!refold) 1116 return ERR_PTR(-ENOENT); 1117 1118 return prog; 1119 } 1120 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1121 1122 bool bpf_prog_get_ok(struct bpf_prog *prog, 1123 enum bpf_prog_type *attach_type, bool attach_drv) 1124 { 1125 /* not an attachment, just a refcount inc, always allow */ 1126 if (!attach_type) 1127 return true; 1128 1129 if (prog->type != *attach_type) 1130 return false; 1131 if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) 1132 return false; 1133 1134 return true; 1135 } 1136 1137 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 1138 bool attach_drv) 1139 { 1140 struct fd f = fdget(ufd); 1141 struct bpf_prog *prog; 1142 1143 prog = ____bpf_prog_get(f); 1144 if (IS_ERR(prog)) 1145 return prog; 1146 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 1147 prog = ERR_PTR(-EINVAL); 1148 goto out; 1149 } 1150 1151 prog = bpf_prog_inc(prog); 1152 out: 1153 fdput(f); 1154 return prog; 1155 } 1156 1157 struct bpf_prog *bpf_prog_get(u32 ufd) 1158 { 1159 return __bpf_prog_get(ufd, NULL, false); 1160 } 1161 1162 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 1163 bool attach_drv) 1164 { 1165 struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); 1166 1167 if (!IS_ERR(prog)) 1168 trace_bpf_prog_get_type(prog); 1169 return prog; 1170 } 1171 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 1172 1173 /* last field in 'union bpf_attr' used by this command */ 1174 #define BPF_PROG_LOAD_LAST_FIELD prog_ifindex 1175 1176 static int bpf_prog_load(union bpf_attr *attr) 1177 { 1178 enum bpf_prog_type type = attr->prog_type; 1179 struct bpf_prog *prog; 1180 int err; 1181 char license[128]; 1182 bool is_gpl; 1183 1184 if (CHECK_ATTR(BPF_PROG_LOAD)) 1185 return -EINVAL; 1186 1187 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1188 return -EINVAL; 1189 1190 /* copy eBPF program license from user space */ 1191 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1192 sizeof(license) - 1) < 0) 1193 return -EFAULT; 1194 license[sizeof(license) - 1] = 0; 1195 1196 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1197 is_gpl = license_is_gpl_compatible(license); 1198 1199 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1200 return -E2BIG; 1201 1202 if (type == BPF_PROG_TYPE_KPROBE && 1203 attr->kern_version != LINUX_VERSION_CODE) 1204 return -EINVAL; 1205 1206 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1207 type != BPF_PROG_TYPE_CGROUP_SKB && 1208 !capable(CAP_SYS_ADMIN)) 1209 return -EPERM; 1210 1211 /* plain bpf_prog allocation */ 1212 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1213 if (!prog) 1214 return -ENOMEM; 1215 1216 prog->aux->offload_requested = !!attr->prog_ifindex; 1217 1218 err = security_bpf_prog_alloc(prog->aux); 1219 if (err) 1220 goto free_prog_nouncharge; 1221 1222 err = bpf_prog_charge_memlock(prog); 1223 if (err) 1224 goto free_prog_sec; 1225 1226 prog->len = attr->insn_cnt; 1227 1228 err = -EFAULT; 1229 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1230 bpf_prog_insn_size(prog)) != 0) 1231 goto free_prog; 1232 1233 prog->orig_prog = NULL; 1234 prog->jited = 0; 1235 1236 atomic_set(&prog->aux->refcnt, 1); 1237 prog->gpl_compatible = is_gpl ? 1 : 0; 1238 1239 if (bpf_prog_is_dev_bound(prog->aux)) { 1240 err = bpf_prog_offload_init(prog, attr); 1241 if (err) 1242 goto free_prog; 1243 } 1244 1245 /* find program type: socket_filter vs tracing_filter */ 1246 err = find_prog_type(type, prog); 1247 if (err < 0) 1248 goto free_prog; 1249 1250 prog->aux->load_time = ktime_get_boot_ns(); 1251 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1252 if (err) 1253 goto free_prog; 1254 1255 /* run eBPF verifier */ 1256 err = bpf_check(&prog, attr); 1257 if (err < 0) 1258 goto free_used_maps; 1259 1260 /* eBPF program is ready to be JITed */ 1261 if (!prog->bpf_func) 1262 prog = bpf_prog_select_runtime(prog, &err); 1263 if (err < 0) 1264 goto free_used_maps; 1265 1266 err = bpf_prog_alloc_id(prog); 1267 if (err) 1268 goto free_used_maps; 1269 1270 err = bpf_prog_new_fd(prog); 1271 if (err < 0) { 1272 /* failed to allocate fd. 1273 * bpf_prog_put() is needed because the above 1274 * bpf_prog_alloc_id() has published the prog 1275 * to the userspace and the userspace may 1276 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1277 */ 1278 bpf_prog_put(prog); 1279 return err; 1280 } 1281 1282 bpf_prog_kallsyms_add(prog); 1283 trace_bpf_prog_load(prog, err); 1284 return err; 1285 1286 free_used_maps: 1287 free_used_maps(prog->aux); 1288 free_prog: 1289 bpf_prog_uncharge_memlock(prog); 1290 free_prog_sec: 1291 security_bpf_prog_free(prog->aux); 1292 free_prog_nouncharge: 1293 bpf_prog_free(prog); 1294 return err; 1295 } 1296 1297 #define BPF_OBJ_LAST_FIELD file_flags 1298 1299 static int bpf_obj_pin(const union bpf_attr *attr) 1300 { 1301 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 1302 return -EINVAL; 1303 1304 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1305 } 1306 1307 static int bpf_obj_get(const union bpf_attr *attr) 1308 { 1309 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 1310 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 1311 return -EINVAL; 1312 1313 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 1314 attr->file_flags); 1315 } 1316 1317 #ifdef CONFIG_CGROUP_BPF 1318 1319 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1320 1321 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1322 { 1323 struct bpf_prog *prog = NULL; 1324 int ufd = attr->target_fd; 1325 struct bpf_map *map; 1326 struct fd f; 1327 int err; 1328 1329 f = fdget(ufd); 1330 map = __bpf_map_get(f); 1331 if (IS_ERR(map)) 1332 return PTR_ERR(map); 1333 1334 if (attach) { 1335 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1336 BPF_PROG_TYPE_SK_SKB); 1337 if (IS_ERR(prog)) { 1338 fdput(f); 1339 return PTR_ERR(prog); 1340 } 1341 } 1342 1343 err = sock_map_prog(map, prog, attr->attach_type); 1344 if (err) { 1345 fdput(f); 1346 if (prog) 1347 bpf_prog_put(prog); 1348 return err; 1349 } 1350 1351 fdput(f); 1352 return 0; 1353 } 1354 1355 #define BPF_F_ATTACH_MASK \ 1356 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1357 1358 static int bpf_prog_attach(const union bpf_attr *attr) 1359 { 1360 enum bpf_prog_type ptype; 1361 struct bpf_prog *prog; 1362 struct cgroup *cgrp; 1363 int ret; 1364 1365 if (!capable(CAP_NET_ADMIN)) 1366 return -EPERM; 1367 1368 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1369 return -EINVAL; 1370 1371 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 1372 return -EINVAL; 1373 1374 switch (attr->attach_type) { 1375 case BPF_CGROUP_INET_INGRESS: 1376 case BPF_CGROUP_INET_EGRESS: 1377 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1378 break; 1379 case BPF_CGROUP_INET_SOCK_CREATE: 1380 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1381 break; 1382 case BPF_CGROUP_SOCK_OPS: 1383 ptype = BPF_PROG_TYPE_SOCK_OPS; 1384 break; 1385 case BPF_CGROUP_DEVICE: 1386 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1387 break; 1388 case BPF_SK_SKB_STREAM_PARSER: 1389 case BPF_SK_SKB_STREAM_VERDICT: 1390 return sockmap_get_from_fd(attr, true); 1391 default: 1392 return -EINVAL; 1393 } 1394 1395 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1396 if (IS_ERR(prog)) 1397 return PTR_ERR(prog); 1398 1399 cgrp = cgroup_get_from_fd(attr->target_fd); 1400 if (IS_ERR(cgrp)) { 1401 bpf_prog_put(prog); 1402 return PTR_ERR(cgrp); 1403 } 1404 1405 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1406 attr->attach_flags); 1407 if (ret) 1408 bpf_prog_put(prog); 1409 cgroup_put(cgrp); 1410 1411 return ret; 1412 } 1413 1414 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1415 1416 static int bpf_prog_detach(const union bpf_attr *attr) 1417 { 1418 enum bpf_prog_type ptype; 1419 struct bpf_prog *prog; 1420 struct cgroup *cgrp; 1421 int ret; 1422 1423 if (!capable(CAP_NET_ADMIN)) 1424 return -EPERM; 1425 1426 if (CHECK_ATTR(BPF_PROG_DETACH)) 1427 return -EINVAL; 1428 1429 switch (attr->attach_type) { 1430 case BPF_CGROUP_INET_INGRESS: 1431 case BPF_CGROUP_INET_EGRESS: 1432 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1433 break; 1434 case BPF_CGROUP_INET_SOCK_CREATE: 1435 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1436 break; 1437 case BPF_CGROUP_SOCK_OPS: 1438 ptype = BPF_PROG_TYPE_SOCK_OPS; 1439 break; 1440 case BPF_CGROUP_DEVICE: 1441 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1442 break; 1443 case BPF_SK_SKB_STREAM_PARSER: 1444 case BPF_SK_SKB_STREAM_VERDICT: 1445 return sockmap_get_from_fd(attr, false); 1446 default: 1447 return -EINVAL; 1448 } 1449 1450 cgrp = cgroup_get_from_fd(attr->target_fd); 1451 if (IS_ERR(cgrp)) 1452 return PTR_ERR(cgrp); 1453 1454 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1455 if (IS_ERR(prog)) 1456 prog = NULL; 1457 1458 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1459 if (prog) 1460 bpf_prog_put(prog); 1461 cgroup_put(cgrp); 1462 return ret; 1463 } 1464 1465 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1466 1467 static int bpf_prog_query(const union bpf_attr *attr, 1468 union bpf_attr __user *uattr) 1469 { 1470 struct cgroup *cgrp; 1471 int ret; 1472 1473 if (!capable(CAP_NET_ADMIN)) 1474 return -EPERM; 1475 if (CHECK_ATTR(BPF_PROG_QUERY)) 1476 return -EINVAL; 1477 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 1478 return -EINVAL; 1479 1480 switch (attr->query.attach_type) { 1481 case BPF_CGROUP_INET_INGRESS: 1482 case BPF_CGROUP_INET_EGRESS: 1483 case BPF_CGROUP_INET_SOCK_CREATE: 1484 case BPF_CGROUP_SOCK_OPS: 1485 case BPF_CGROUP_DEVICE: 1486 break; 1487 default: 1488 return -EINVAL; 1489 } 1490 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1491 if (IS_ERR(cgrp)) 1492 return PTR_ERR(cgrp); 1493 ret = cgroup_bpf_query(cgrp, attr, uattr); 1494 cgroup_put(cgrp); 1495 return ret; 1496 } 1497 #endif /* CONFIG_CGROUP_BPF */ 1498 1499 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1500 1501 static int bpf_prog_test_run(const union bpf_attr *attr, 1502 union bpf_attr __user *uattr) 1503 { 1504 struct bpf_prog *prog; 1505 int ret = -ENOTSUPP; 1506 1507 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1508 return -EINVAL; 1509 1510 prog = bpf_prog_get(attr->test.prog_fd); 1511 if (IS_ERR(prog)) 1512 return PTR_ERR(prog); 1513 1514 if (prog->aux->ops->test_run) 1515 ret = prog->aux->ops->test_run(prog, attr, uattr); 1516 1517 bpf_prog_put(prog); 1518 return ret; 1519 } 1520 1521 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1522 1523 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1524 union bpf_attr __user *uattr, 1525 struct idr *idr, 1526 spinlock_t *lock) 1527 { 1528 u32 next_id = attr->start_id; 1529 int err = 0; 1530 1531 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1532 return -EINVAL; 1533 1534 if (!capable(CAP_SYS_ADMIN)) 1535 return -EPERM; 1536 1537 next_id++; 1538 spin_lock_bh(lock); 1539 if (!idr_get_next(idr, &next_id)) 1540 err = -ENOENT; 1541 spin_unlock_bh(lock); 1542 1543 if (!err) 1544 err = put_user(next_id, &uattr->next_id); 1545 1546 return err; 1547 } 1548 1549 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1550 1551 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1552 { 1553 struct bpf_prog *prog; 1554 u32 id = attr->prog_id; 1555 int fd; 1556 1557 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1558 return -EINVAL; 1559 1560 if (!capable(CAP_SYS_ADMIN)) 1561 return -EPERM; 1562 1563 spin_lock_bh(&prog_idr_lock); 1564 prog = idr_find(&prog_idr, id); 1565 if (prog) 1566 prog = bpf_prog_inc_not_zero(prog); 1567 else 1568 prog = ERR_PTR(-ENOENT); 1569 spin_unlock_bh(&prog_idr_lock); 1570 1571 if (IS_ERR(prog)) 1572 return PTR_ERR(prog); 1573 1574 fd = bpf_prog_new_fd(prog); 1575 if (fd < 0) 1576 bpf_prog_put(prog); 1577 1578 return fd; 1579 } 1580 1581 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 1582 1583 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1584 { 1585 struct bpf_map *map; 1586 u32 id = attr->map_id; 1587 int f_flags; 1588 int fd; 1589 1590 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 1591 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 1592 return -EINVAL; 1593 1594 if (!capable(CAP_SYS_ADMIN)) 1595 return -EPERM; 1596 1597 f_flags = bpf_get_file_flag(attr->open_flags); 1598 if (f_flags < 0) 1599 return f_flags; 1600 1601 spin_lock_bh(&map_idr_lock); 1602 map = idr_find(&map_idr, id); 1603 if (map) 1604 map = bpf_map_inc_not_zero(map, true); 1605 else 1606 map = ERR_PTR(-ENOENT); 1607 spin_unlock_bh(&map_idr_lock); 1608 1609 if (IS_ERR(map)) 1610 return PTR_ERR(map); 1611 1612 fd = bpf_map_new_fd(map, f_flags); 1613 if (fd < 0) 1614 bpf_map_put(map); 1615 1616 return fd; 1617 } 1618 1619 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 1620 unsigned long addr) 1621 { 1622 int i; 1623 1624 for (i = 0; i < prog->aux->used_map_cnt; i++) 1625 if (prog->aux->used_maps[i] == (void *)addr) 1626 return prog->aux->used_maps[i]; 1627 return NULL; 1628 } 1629 1630 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) 1631 { 1632 const struct bpf_map *map; 1633 struct bpf_insn *insns; 1634 u64 imm; 1635 int i; 1636 1637 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 1638 GFP_USER); 1639 if (!insns) 1640 return insns; 1641 1642 for (i = 0; i < prog->len; i++) { 1643 if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) { 1644 insns[i].code = BPF_JMP | BPF_CALL; 1645 insns[i].imm = BPF_FUNC_tail_call; 1646 /* fall-through */ 1647 } 1648 if (insns[i].code == (BPF_JMP | BPF_CALL) || 1649 insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) { 1650 if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) 1651 insns[i].code = BPF_JMP | BPF_CALL; 1652 if (!bpf_dump_raw_ok()) 1653 insns[i].imm = 0; 1654 continue; 1655 } 1656 1657 if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW)) 1658 continue; 1659 1660 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 1661 map = bpf_map_from_imm(prog, imm); 1662 if (map) { 1663 insns[i].src_reg = BPF_PSEUDO_MAP_FD; 1664 insns[i].imm = map->id; 1665 insns[i + 1].imm = 0; 1666 continue; 1667 } 1668 1669 if (!bpf_dump_raw_ok() && 1670 imm == (unsigned long)prog->aux) { 1671 insns[i].imm = 0; 1672 insns[i + 1].imm = 0; 1673 continue; 1674 } 1675 } 1676 1677 return insns; 1678 } 1679 1680 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1681 const union bpf_attr *attr, 1682 union bpf_attr __user *uattr) 1683 { 1684 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1685 struct bpf_prog_info info = {}; 1686 u32 info_len = attr->info.info_len; 1687 char __user *uinsns; 1688 u32 ulen; 1689 int err; 1690 1691 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1692 if (err) 1693 return err; 1694 info_len = min_t(u32, sizeof(info), info_len); 1695 1696 if (copy_from_user(&info, uinfo, info_len)) 1697 return -EFAULT; 1698 1699 info.type = prog->type; 1700 info.id = prog->aux->id; 1701 info.load_time = prog->aux->load_time; 1702 info.created_by_uid = from_kuid_munged(current_user_ns(), 1703 prog->aux->user->uid); 1704 1705 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1706 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1707 1708 ulen = info.nr_map_ids; 1709 info.nr_map_ids = prog->aux->used_map_cnt; 1710 ulen = min_t(u32, info.nr_map_ids, ulen); 1711 if (ulen) { 1712 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1713 u32 i; 1714 1715 for (i = 0; i < ulen; i++) 1716 if (put_user(prog->aux->used_maps[i]->id, 1717 &user_map_ids[i])) 1718 return -EFAULT; 1719 } 1720 1721 if (!capable(CAP_SYS_ADMIN)) { 1722 info.jited_prog_len = 0; 1723 info.xlated_prog_len = 0; 1724 goto done; 1725 } 1726 1727 ulen = info.jited_prog_len; 1728 info.jited_prog_len = prog->jited_len; 1729 if (info.jited_prog_len && ulen) { 1730 if (bpf_dump_raw_ok()) { 1731 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1732 ulen = min_t(u32, info.jited_prog_len, ulen); 1733 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1734 return -EFAULT; 1735 } else { 1736 info.jited_prog_insns = 0; 1737 } 1738 } 1739 1740 ulen = info.xlated_prog_len; 1741 info.xlated_prog_len = bpf_prog_insn_size(prog); 1742 if (info.xlated_prog_len && ulen) { 1743 struct bpf_insn *insns_sanitized; 1744 bool fault; 1745 1746 if (prog->blinded && !bpf_dump_raw_ok()) { 1747 info.xlated_prog_insns = 0; 1748 goto done; 1749 } 1750 insns_sanitized = bpf_insn_prepare_dump(prog); 1751 if (!insns_sanitized) 1752 return -ENOMEM; 1753 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1754 ulen = min_t(u32, info.xlated_prog_len, ulen); 1755 fault = copy_to_user(uinsns, insns_sanitized, ulen); 1756 kfree(insns_sanitized); 1757 if (fault) 1758 return -EFAULT; 1759 } 1760 1761 if (bpf_prog_is_dev_bound(prog->aux)) { 1762 err = bpf_prog_offload_info_fill(&info, prog); 1763 if (err) 1764 return err; 1765 } 1766 1767 done: 1768 if (copy_to_user(uinfo, &info, info_len) || 1769 put_user(info_len, &uattr->info.info_len)) 1770 return -EFAULT; 1771 1772 return 0; 1773 } 1774 1775 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1776 const union bpf_attr *attr, 1777 union bpf_attr __user *uattr) 1778 { 1779 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1780 struct bpf_map_info info = {}; 1781 u32 info_len = attr->info.info_len; 1782 int err; 1783 1784 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1785 if (err) 1786 return err; 1787 info_len = min_t(u32, sizeof(info), info_len); 1788 1789 info.type = map->map_type; 1790 info.id = map->id; 1791 info.key_size = map->key_size; 1792 info.value_size = map->value_size; 1793 info.max_entries = map->max_entries; 1794 info.map_flags = map->map_flags; 1795 memcpy(info.name, map->name, sizeof(map->name)); 1796 1797 if (copy_to_user(uinfo, &info, info_len) || 1798 put_user(info_len, &uattr->info.info_len)) 1799 return -EFAULT; 1800 1801 return 0; 1802 } 1803 1804 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1805 1806 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1807 union bpf_attr __user *uattr) 1808 { 1809 int ufd = attr->info.bpf_fd; 1810 struct fd f; 1811 int err; 1812 1813 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1814 return -EINVAL; 1815 1816 f = fdget(ufd); 1817 if (!f.file) 1818 return -EBADFD; 1819 1820 if (f.file->f_op == &bpf_prog_fops) 1821 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1822 uattr); 1823 else if (f.file->f_op == &bpf_map_fops) 1824 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1825 uattr); 1826 else 1827 err = -EINVAL; 1828 1829 fdput(f); 1830 return err; 1831 } 1832 1833 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1834 { 1835 union bpf_attr attr = {}; 1836 int err; 1837 1838 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1839 return -EPERM; 1840 1841 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1842 if (err) 1843 return err; 1844 size = min_t(u32, size, sizeof(attr)); 1845 1846 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1847 if (copy_from_user(&attr, uattr, size) != 0) 1848 return -EFAULT; 1849 1850 err = security_bpf(cmd, &attr, size); 1851 if (err < 0) 1852 return err; 1853 1854 switch (cmd) { 1855 case BPF_MAP_CREATE: 1856 err = map_create(&attr); 1857 break; 1858 case BPF_MAP_LOOKUP_ELEM: 1859 err = map_lookup_elem(&attr); 1860 break; 1861 case BPF_MAP_UPDATE_ELEM: 1862 err = map_update_elem(&attr); 1863 break; 1864 case BPF_MAP_DELETE_ELEM: 1865 err = map_delete_elem(&attr); 1866 break; 1867 case BPF_MAP_GET_NEXT_KEY: 1868 err = map_get_next_key(&attr); 1869 break; 1870 case BPF_PROG_LOAD: 1871 err = bpf_prog_load(&attr); 1872 break; 1873 case BPF_OBJ_PIN: 1874 err = bpf_obj_pin(&attr); 1875 break; 1876 case BPF_OBJ_GET: 1877 err = bpf_obj_get(&attr); 1878 break; 1879 #ifdef CONFIG_CGROUP_BPF 1880 case BPF_PROG_ATTACH: 1881 err = bpf_prog_attach(&attr); 1882 break; 1883 case BPF_PROG_DETACH: 1884 err = bpf_prog_detach(&attr); 1885 break; 1886 case BPF_PROG_QUERY: 1887 err = bpf_prog_query(&attr, uattr); 1888 break; 1889 #endif 1890 case BPF_PROG_TEST_RUN: 1891 err = bpf_prog_test_run(&attr, uattr); 1892 break; 1893 case BPF_PROG_GET_NEXT_ID: 1894 err = bpf_obj_get_next_id(&attr, uattr, 1895 &prog_idr, &prog_idr_lock); 1896 break; 1897 case BPF_MAP_GET_NEXT_ID: 1898 err = bpf_obj_get_next_id(&attr, uattr, 1899 &map_idr, &map_idr_lock); 1900 break; 1901 case BPF_PROG_GET_FD_BY_ID: 1902 err = bpf_prog_get_fd_by_id(&attr); 1903 break; 1904 case BPF_MAP_GET_FD_BY_ID: 1905 err = bpf_map_get_fd_by_id(&attr); 1906 break; 1907 case BPF_OBJ_GET_INFO_BY_FD: 1908 err = bpf_obj_get_info_by_fd(&attr, uattr); 1909 break; 1910 default: 1911 err = -EINVAL; 1912 break; 1913 } 1914 1915 return err; 1916 } 1917