128fbcfa0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 228fbcfa0SAlexei Starovoitov * 328fbcfa0SAlexei Starovoitov * This program is free software; you can redistribute it and/or 428fbcfa0SAlexei Starovoitov * modify it under the terms of version 2 of the GNU General Public 528fbcfa0SAlexei Starovoitov * License as published by the Free Software Foundation. 628fbcfa0SAlexei Starovoitov * 728fbcfa0SAlexei Starovoitov * This program is distributed in the hope that it will be useful, but 828fbcfa0SAlexei Starovoitov * WITHOUT ANY WARRANTY; without even the implied warranty of 928fbcfa0SAlexei Starovoitov * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1028fbcfa0SAlexei Starovoitov * General Public License for more details. 1128fbcfa0SAlexei Starovoitov */ 1228fbcfa0SAlexei Starovoitov #include <linux/bpf.h> 1328fbcfa0SAlexei Starovoitov #include <linux/err.h> 1428fbcfa0SAlexei Starovoitov #include <linux/vmalloc.h> 1528fbcfa0SAlexei Starovoitov #include <linux/slab.h> 1628fbcfa0SAlexei Starovoitov #include <linux/mm.h> 1704fd61abSAlexei Starovoitov #include <linux/filter.h> 180cdf5640SDaniel Borkmann #include <linux/perf_event.h> 1928fbcfa0SAlexei Starovoitov 2028fbcfa0SAlexei Starovoitov /* Called from syscall */ 2128fbcfa0SAlexei Starovoitov static struct bpf_map *array_map_alloc(union bpf_attr *attr) 2228fbcfa0SAlexei Starovoitov { 2328fbcfa0SAlexei Starovoitov struct bpf_array *array; 24daaf427cSAlexei Starovoitov u32 elem_size, array_size; 2528fbcfa0SAlexei Starovoitov 2628fbcfa0SAlexei Starovoitov /* check sanity of attributes */ 2728fbcfa0SAlexei Starovoitov if (attr->max_entries == 0 || attr->key_size != 4 || 2828fbcfa0SAlexei Starovoitov attr->value_size == 0) 2928fbcfa0SAlexei Starovoitov return ERR_PTR(-EINVAL); 3028fbcfa0SAlexei Starovoitov 3101b3f521SAlexei Starovoitov if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) 3201b3f521SAlexei Starovoitov /* if value_size is bigger, the user space won't be able to 3301b3f521SAlexei Starovoitov * access the elements. 3401b3f521SAlexei Starovoitov */ 3501b3f521SAlexei Starovoitov return ERR_PTR(-E2BIG); 3601b3f521SAlexei Starovoitov 3728fbcfa0SAlexei Starovoitov elem_size = round_up(attr->value_size, 8); 3828fbcfa0SAlexei Starovoitov 39daaf427cSAlexei Starovoitov /* check round_up into zero and u32 overflow */ 40daaf427cSAlexei Starovoitov if (elem_size == 0 || 4101b3f521SAlexei Starovoitov attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) 42daaf427cSAlexei Starovoitov return ERR_PTR(-ENOMEM); 43daaf427cSAlexei Starovoitov 44daaf427cSAlexei Starovoitov array_size = sizeof(*array) + attr->max_entries * elem_size; 45daaf427cSAlexei Starovoitov 4628fbcfa0SAlexei Starovoitov /* allocate all map elements and zero-initialize them */ 47daaf427cSAlexei Starovoitov array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); 4828fbcfa0SAlexei Starovoitov if (!array) { 49daaf427cSAlexei Starovoitov array = vzalloc(array_size); 5028fbcfa0SAlexei Starovoitov if (!array) 5128fbcfa0SAlexei Starovoitov return ERR_PTR(-ENOMEM); 5228fbcfa0SAlexei Starovoitov } 5328fbcfa0SAlexei Starovoitov 5428fbcfa0SAlexei Starovoitov /* copy mandatory map attributes */ 5528fbcfa0SAlexei Starovoitov array->map.key_size = attr->key_size; 5628fbcfa0SAlexei Starovoitov array->map.value_size = attr->value_size; 5728fbcfa0SAlexei Starovoitov array->map.max_entries = attr->max_entries; 58aaac3ba9SAlexei Starovoitov array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; 5928fbcfa0SAlexei Starovoitov array->elem_size = elem_size; 6028fbcfa0SAlexei Starovoitov 6128fbcfa0SAlexei Starovoitov return &array->map; 6228fbcfa0SAlexei Starovoitov } 6328fbcfa0SAlexei Starovoitov 6428fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */ 6528fbcfa0SAlexei Starovoitov static void *array_map_lookup_elem(struct bpf_map *map, void *key) 6628fbcfa0SAlexei Starovoitov { 6728fbcfa0SAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 6828fbcfa0SAlexei Starovoitov u32 index = *(u32 *)key; 6928fbcfa0SAlexei Starovoitov 7028fbcfa0SAlexei Starovoitov if (index >= array->map.max_entries) 7128fbcfa0SAlexei Starovoitov return NULL; 7228fbcfa0SAlexei Starovoitov 7328fbcfa0SAlexei Starovoitov return array->value + array->elem_size * index; 7428fbcfa0SAlexei Starovoitov } 7528fbcfa0SAlexei Starovoitov 7628fbcfa0SAlexei Starovoitov /* Called from syscall */ 7728fbcfa0SAlexei Starovoitov static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 7828fbcfa0SAlexei Starovoitov { 7928fbcfa0SAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 8028fbcfa0SAlexei Starovoitov u32 index = *(u32 *)key; 8128fbcfa0SAlexei Starovoitov u32 *next = (u32 *)next_key; 8228fbcfa0SAlexei Starovoitov 8328fbcfa0SAlexei Starovoitov if (index >= array->map.max_entries) { 8428fbcfa0SAlexei Starovoitov *next = 0; 8528fbcfa0SAlexei Starovoitov return 0; 8628fbcfa0SAlexei Starovoitov } 8728fbcfa0SAlexei Starovoitov 8828fbcfa0SAlexei Starovoitov if (index == array->map.max_entries - 1) 8928fbcfa0SAlexei Starovoitov return -ENOENT; 9028fbcfa0SAlexei Starovoitov 9128fbcfa0SAlexei Starovoitov *next = index + 1; 9228fbcfa0SAlexei Starovoitov return 0; 9328fbcfa0SAlexei Starovoitov } 9428fbcfa0SAlexei Starovoitov 9528fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */ 9628fbcfa0SAlexei Starovoitov static int array_map_update_elem(struct bpf_map *map, void *key, void *value, 9728fbcfa0SAlexei Starovoitov u64 map_flags) 9828fbcfa0SAlexei Starovoitov { 9928fbcfa0SAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 10028fbcfa0SAlexei Starovoitov u32 index = *(u32 *)key; 10128fbcfa0SAlexei Starovoitov 10228fbcfa0SAlexei Starovoitov if (map_flags > BPF_EXIST) 10328fbcfa0SAlexei Starovoitov /* unknown flags */ 10428fbcfa0SAlexei Starovoitov return -EINVAL; 10528fbcfa0SAlexei Starovoitov 10628fbcfa0SAlexei Starovoitov if (index >= array->map.max_entries) 10728fbcfa0SAlexei Starovoitov /* all elements were pre-allocated, cannot insert a new one */ 10828fbcfa0SAlexei Starovoitov return -E2BIG; 10928fbcfa0SAlexei Starovoitov 11028fbcfa0SAlexei Starovoitov if (map_flags == BPF_NOEXIST) 111daaf427cSAlexei Starovoitov /* all elements already exist */ 11228fbcfa0SAlexei Starovoitov return -EEXIST; 11328fbcfa0SAlexei Starovoitov 114fbca9d2dSDaniel Borkmann memcpy(array->value + array->elem_size * index, value, map->value_size); 11528fbcfa0SAlexei Starovoitov return 0; 11628fbcfa0SAlexei Starovoitov } 11728fbcfa0SAlexei Starovoitov 11828fbcfa0SAlexei Starovoitov /* Called from syscall or from eBPF program */ 11928fbcfa0SAlexei Starovoitov static int array_map_delete_elem(struct bpf_map *map, void *key) 12028fbcfa0SAlexei Starovoitov { 12128fbcfa0SAlexei Starovoitov return -EINVAL; 12228fbcfa0SAlexei Starovoitov } 12328fbcfa0SAlexei Starovoitov 12428fbcfa0SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 12528fbcfa0SAlexei Starovoitov static void array_map_free(struct bpf_map *map) 12628fbcfa0SAlexei Starovoitov { 12728fbcfa0SAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 12828fbcfa0SAlexei Starovoitov 12928fbcfa0SAlexei Starovoitov /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 13028fbcfa0SAlexei Starovoitov * so the programs (can be more than one that used this map) were 13128fbcfa0SAlexei Starovoitov * disconnected from events. Wait for outstanding programs to complete 13228fbcfa0SAlexei Starovoitov * and free the array 13328fbcfa0SAlexei Starovoitov */ 13428fbcfa0SAlexei Starovoitov synchronize_rcu(); 13528fbcfa0SAlexei Starovoitov 13628fbcfa0SAlexei Starovoitov kvfree(array); 13728fbcfa0SAlexei Starovoitov } 13828fbcfa0SAlexei Starovoitov 139a2c83fffSDaniel Borkmann static const struct bpf_map_ops array_ops = { 14028fbcfa0SAlexei Starovoitov .map_alloc = array_map_alloc, 14128fbcfa0SAlexei Starovoitov .map_free = array_map_free, 14228fbcfa0SAlexei Starovoitov .map_get_next_key = array_map_get_next_key, 14328fbcfa0SAlexei Starovoitov .map_lookup_elem = array_map_lookup_elem, 14428fbcfa0SAlexei Starovoitov .map_update_elem = array_map_update_elem, 14528fbcfa0SAlexei Starovoitov .map_delete_elem = array_map_delete_elem, 14628fbcfa0SAlexei Starovoitov }; 14728fbcfa0SAlexei Starovoitov 148a2c83fffSDaniel Borkmann static struct bpf_map_type_list array_type __read_mostly = { 14928fbcfa0SAlexei Starovoitov .ops = &array_ops, 15028fbcfa0SAlexei Starovoitov .type = BPF_MAP_TYPE_ARRAY, 15128fbcfa0SAlexei Starovoitov }; 15228fbcfa0SAlexei Starovoitov 15328fbcfa0SAlexei Starovoitov static int __init register_array_map(void) 15428fbcfa0SAlexei Starovoitov { 155a2c83fffSDaniel Borkmann bpf_register_map_type(&array_type); 15628fbcfa0SAlexei Starovoitov return 0; 15728fbcfa0SAlexei Starovoitov } 15828fbcfa0SAlexei Starovoitov late_initcall(register_array_map); 15904fd61abSAlexei Starovoitov 1602a36f0b9SWang Nan static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) 16104fd61abSAlexei Starovoitov { 1622a36f0b9SWang Nan /* only file descriptors can be stored in this type of map */ 16304fd61abSAlexei Starovoitov if (attr->value_size != sizeof(u32)) 16404fd61abSAlexei Starovoitov return ERR_PTR(-EINVAL); 16504fd61abSAlexei Starovoitov return array_map_alloc(attr); 16604fd61abSAlexei Starovoitov } 16704fd61abSAlexei Starovoitov 1682a36f0b9SWang Nan static void fd_array_map_free(struct bpf_map *map) 16904fd61abSAlexei Starovoitov { 17004fd61abSAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 17104fd61abSAlexei Starovoitov int i; 17204fd61abSAlexei Starovoitov 17304fd61abSAlexei Starovoitov synchronize_rcu(); 17404fd61abSAlexei Starovoitov 17504fd61abSAlexei Starovoitov /* make sure it's empty */ 17604fd61abSAlexei Starovoitov for (i = 0; i < array->map.max_entries; i++) 1772a36f0b9SWang Nan BUG_ON(array->ptrs[i] != NULL); 17804fd61abSAlexei Starovoitov kvfree(array); 17904fd61abSAlexei Starovoitov } 18004fd61abSAlexei Starovoitov 1812a36f0b9SWang Nan static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) 18204fd61abSAlexei Starovoitov { 18304fd61abSAlexei Starovoitov return NULL; 18404fd61abSAlexei Starovoitov } 18504fd61abSAlexei Starovoitov 18604fd61abSAlexei Starovoitov /* only called from syscall */ 1872a36f0b9SWang Nan static int fd_array_map_update_elem(struct bpf_map *map, void *key, 18804fd61abSAlexei Starovoitov void *value, u64 map_flags) 18904fd61abSAlexei Starovoitov { 19004fd61abSAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 1912a36f0b9SWang Nan void *new_ptr, *old_ptr; 19204fd61abSAlexei Starovoitov u32 index = *(u32 *)key, ufd; 19304fd61abSAlexei Starovoitov 19404fd61abSAlexei Starovoitov if (map_flags != BPF_ANY) 19504fd61abSAlexei Starovoitov return -EINVAL; 19604fd61abSAlexei Starovoitov 19704fd61abSAlexei Starovoitov if (index >= array->map.max_entries) 19804fd61abSAlexei Starovoitov return -E2BIG; 19904fd61abSAlexei Starovoitov 20004fd61abSAlexei Starovoitov ufd = *(u32 *)value; 2012a36f0b9SWang Nan new_ptr = map->ops->map_fd_get_ptr(map, ufd); 2022a36f0b9SWang Nan if (IS_ERR(new_ptr)) 2032a36f0b9SWang Nan return PTR_ERR(new_ptr); 20404fd61abSAlexei Starovoitov 2052a36f0b9SWang Nan old_ptr = xchg(array->ptrs + index, new_ptr); 2062a36f0b9SWang Nan if (old_ptr) 2072a36f0b9SWang Nan map->ops->map_fd_put_ptr(old_ptr); 20804fd61abSAlexei Starovoitov 20904fd61abSAlexei Starovoitov return 0; 21004fd61abSAlexei Starovoitov } 21104fd61abSAlexei Starovoitov 2122a36f0b9SWang Nan static int fd_array_map_delete_elem(struct bpf_map *map, void *key) 21304fd61abSAlexei Starovoitov { 21404fd61abSAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 2152a36f0b9SWang Nan void *old_ptr; 21604fd61abSAlexei Starovoitov u32 index = *(u32 *)key; 21704fd61abSAlexei Starovoitov 21804fd61abSAlexei Starovoitov if (index >= array->map.max_entries) 21904fd61abSAlexei Starovoitov return -E2BIG; 22004fd61abSAlexei Starovoitov 2212a36f0b9SWang Nan old_ptr = xchg(array->ptrs + index, NULL); 2222a36f0b9SWang Nan if (old_ptr) { 2232a36f0b9SWang Nan map->ops->map_fd_put_ptr(old_ptr); 22404fd61abSAlexei Starovoitov return 0; 22504fd61abSAlexei Starovoitov } else { 22604fd61abSAlexei Starovoitov return -ENOENT; 22704fd61abSAlexei Starovoitov } 22804fd61abSAlexei Starovoitov } 22904fd61abSAlexei Starovoitov 2302a36f0b9SWang Nan static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) 2312a36f0b9SWang Nan { 2322a36f0b9SWang Nan struct bpf_array *array = container_of(map, struct bpf_array, map); 2332a36f0b9SWang Nan struct bpf_prog *prog = bpf_prog_get(fd); 2342a36f0b9SWang Nan if (IS_ERR(prog)) 2352a36f0b9SWang Nan return prog; 2362a36f0b9SWang Nan 2372a36f0b9SWang Nan if (!bpf_prog_array_compatible(array, prog)) { 2382a36f0b9SWang Nan bpf_prog_put(prog); 2392a36f0b9SWang Nan return ERR_PTR(-EINVAL); 2402a36f0b9SWang Nan } 2412a36f0b9SWang Nan return prog; 2422a36f0b9SWang Nan } 2432a36f0b9SWang Nan 2442a36f0b9SWang Nan static void prog_fd_array_put_ptr(void *ptr) 2452a36f0b9SWang Nan { 2462a36f0b9SWang Nan struct bpf_prog *prog = ptr; 2472a36f0b9SWang Nan 2482a36f0b9SWang Nan bpf_prog_put_rcu(prog); 2492a36f0b9SWang Nan } 2502a36f0b9SWang Nan 25104fd61abSAlexei Starovoitov /* decrement refcnt of all bpf_progs that are stored in this map */ 2522a36f0b9SWang Nan void bpf_fd_array_map_clear(struct bpf_map *map) 25304fd61abSAlexei Starovoitov { 25404fd61abSAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 25504fd61abSAlexei Starovoitov int i; 25604fd61abSAlexei Starovoitov 25704fd61abSAlexei Starovoitov for (i = 0; i < array->map.max_entries; i++) 2582a36f0b9SWang Nan fd_array_map_delete_elem(map, &i); 25904fd61abSAlexei Starovoitov } 26004fd61abSAlexei Starovoitov 26104fd61abSAlexei Starovoitov static const struct bpf_map_ops prog_array_ops = { 2622a36f0b9SWang Nan .map_alloc = fd_array_map_alloc, 2632a36f0b9SWang Nan .map_free = fd_array_map_free, 26404fd61abSAlexei Starovoitov .map_get_next_key = array_map_get_next_key, 2652a36f0b9SWang Nan .map_lookup_elem = fd_array_map_lookup_elem, 2662a36f0b9SWang Nan .map_update_elem = fd_array_map_update_elem, 2672a36f0b9SWang Nan .map_delete_elem = fd_array_map_delete_elem, 2682a36f0b9SWang Nan .map_fd_get_ptr = prog_fd_array_get_ptr, 2692a36f0b9SWang Nan .map_fd_put_ptr = prog_fd_array_put_ptr, 27004fd61abSAlexei Starovoitov }; 27104fd61abSAlexei Starovoitov 27204fd61abSAlexei Starovoitov static struct bpf_map_type_list prog_array_type __read_mostly = { 27304fd61abSAlexei Starovoitov .ops = &prog_array_ops, 27404fd61abSAlexei Starovoitov .type = BPF_MAP_TYPE_PROG_ARRAY, 27504fd61abSAlexei Starovoitov }; 27604fd61abSAlexei Starovoitov 27704fd61abSAlexei Starovoitov static int __init register_prog_array_map(void) 27804fd61abSAlexei Starovoitov { 27904fd61abSAlexei Starovoitov bpf_register_map_type(&prog_array_type); 28004fd61abSAlexei Starovoitov return 0; 28104fd61abSAlexei Starovoitov } 28204fd61abSAlexei Starovoitov late_initcall(register_prog_array_map); 283ea317b26SKaixu Xia 284ea317b26SKaixu Xia static void perf_event_array_map_free(struct bpf_map *map) 285ea317b26SKaixu Xia { 286ea317b26SKaixu Xia bpf_fd_array_map_clear(map); 287ea317b26SKaixu Xia fd_array_map_free(map); 288ea317b26SKaixu Xia } 289ea317b26SKaixu Xia 290ea317b26SKaixu Xia static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) 291ea317b26SKaixu Xia { 292ea317b26SKaixu Xia struct perf_event *event; 293ea317b26SKaixu Xia const struct perf_event_attr *attr; 294*e03e7ee3SAlexei Starovoitov struct file *file; 295ea317b26SKaixu Xia 296*e03e7ee3SAlexei Starovoitov file = perf_event_get(fd); 297*e03e7ee3SAlexei Starovoitov if (IS_ERR(file)) 298*e03e7ee3SAlexei Starovoitov return file; 299*e03e7ee3SAlexei Starovoitov 300*e03e7ee3SAlexei Starovoitov event = file->private_data; 301ea317b26SKaixu Xia 302ea317b26SKaixu Xia attr = perf_event_attrs(event); 303ea317b26SKaixu Xia if (IS_ERR(attr)) 30462544ce8SAlexei Starovoitov goto err; 305ea317b26SKaixu Xia 30662544ce8SAlexei Starovoitov if (attr->inherit) 30762544ce8SAlexei Starovoitov goto err; 30862544ce8SAlexei Starovoitov 30962544ce8SAlexei Starovoitov if (attr->type == PERF_TYPE_RAW) 310*e03e7ee3SAlexei Starovoitov return file; 31162544ce8SAlexei Starovoitov 31262544ce8SAlexei Starovoitov if (attr->type == PERF_TYPE_HARDWARE) 313*e03e7ee3SAlexei Starovoitov return file; 31462544ce8SAlexei Starovoitov 31562544ce8SAlexei Starovoitov if (attr->type == PERF_TYPE_SOFTWARE && 31662544ce8SAlexei Starovoitov attr->config == PERF_COUNT_SW_BPF_OUTPUT) 317*e03e7ee3SAlexei Starovoitov return file; 31862544ce8SAlexei Starovoitov err: 319*e03e7ee3SAlexei Starovoitov fput(file); 320ea317b26SKaixu Xia return ERR_PTR(-EINVAL); 321ea317b26SKaixu Xia } 322ea317b26SKaixu Xia 323ea317b26SKaixu Xia static void perf_event_fd_array_put_ptr(void *ptr) 324ea317b26SKaixu Xia { 325*e03e7ee3SAlexei Starovoitov fput((struct file *)ptr); 326ea317b26SKaixu Xia } 327ea317b26SKaixu Xia 328ea317b26SKaixu Xia static const struct bpf_map_ops perf_event_array_ops = { 329ea317b26SKaixu Xia .map_alloc = fd_array_map_alloc, 330ea317b26SKaixu Xia .map_free = perf_event_array_map_free, 331ea317b26SKaixu Xia .map_get_next_key = array_map_get_next_key, 332ea317b26SKaixu Xia .map_lookup_elem = fd_array_map_lookup_elem, 333ea317b26SKaixu Xia .map_update_elem = fd_array_map_update_elem, 334ea317b26SKaixu Xia .map_delete_elem = fd_array_map_delete_elem, 335ea317b26SKaixu Xia .map_fd_get_ptr = perf_event_fd_array_get_ptr, 336ea317b26SKaixu Xia .map_fd_put_ptr = perf_event_fd_array_put_ptr, 337ea317b26SKaixu Xia }; 338ea317b26SKaixu Xia 339ea317b26SKaixu Xia static struct bpf_map_type_list perf_event_array_type __read_mostly = { 340ea317b26SKaixu Xia .ops = &perf_event_array_ops, 341ea317b26SKaixu Xia .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 342ea317b26SKaixu Xia }; 343ea317b26SKaixu Xia 344ea317b26SKaixu Xia static int __init register_perf_event_array_map(void) 345ea317b26SKaixu Xia { 346ea317b26SKaixu Xia bpf_register_map_type(&perf_event_array_type); 347ea317b26SKaixu Xia return 0; 348ea317b26SKaixu Xia } 349ea317b26SKaixu Xia late_initcall(register_perf_event_array_map); 350