125763b3cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2d5a3b1f6SAlexei Starovoitov /* Copyright (c) 2016 Facebook 3d5a3b1f6SAlexei Starovoitov */ 4d5a3b1f6SAlexei Starovoitov #include <linux/bpf.h> 5d5a3b1f6SAlexei Starovoitov #include <linux/jhash.h> 6d5a3b1f6SAlexei Starovoitov #include <linux/filter.h> 77b04d6d6SSong Liu #include <linux/kernel.h> 8d5a3b1f6SAlexei Starovoitov #include <linux/stacktrace.h> 9d5a3b1f6SAlexei Starovoitov #include <linux/perf_event.h> 10c9a0f3b8SJiri Olsa #include <linux/btf_ids.h> 11bd7525daSJiri Olsa #include <linux/buildid.h> 12557c0c6eSAlexei Starovoitov #include "percpu_freelist.h" 137c7e3d31SSong Liu #include "mmap_unlock_work.h" 14d5a3b1f6SAlexei Starovoitov 156e71b04aSChenbo Feng #define STACK_CREATE_FLAG_MASK \ 16615755a7SSong Liu (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ 17615755a7SSong Liu BPF_F_STACK_BUILD_ID) 186e71b04aSChenbo Feng 19d5a3b1f6SAlexei Starovoitov struct stack_map_bucket { 20557c0c6eSAlexei Starovoitov struct pcpu_freelist_node fnode; 21d5a3b1f6SAlexei Starovoitov u32 hash; 22d5a3b1f6SAlexei Starovoitov u32 nr; 23615755a7SSong Liu u64 data[]; 24d5a3b1f6SAlexei Starovoitov }; 25d5a3b1f6SAlexei Starovoitov 26d5a3b1f6SAlexei Starovoitov struct bpf_stack_map { 27d5a3b1f6SAlexei Starovoitov struct bpf_map map; 28557c0c6eSAlexei Starovoitov void *elems; 29557c0c6eSAlexei Starovoitov struct pcpu_freelist freelist; 30d5a3b1f6SAlexei Starovoitov u32 n_buckets; 3184cb9cbdSKees Cook struct stack_map_bucket *buckets[] __counted_by(n_buckets); 32d5a3b1f6SAlexei Starovoitov }; 33d5a3b1f6SAlexei Starovoitov 34615755a7SSong Liu static inline bool stack_map_use_build_id(struct bpf_map *map) 35615755a7SSong Liu { 36615755a7SSong Liu return (map->map_flags & BPF_F_STACK_BUILD_ID); 37615755a7SSong Liu } 38615755a7SSong Liu 39615755a7SSong Liu static inline int stack_map_data_size(struct bpf_map *map) 40615755a7SSong Liu { 41615755a7SSong Liu return stack_map_use_build_id(map) ? 42615755a7SSong Liu sizeof(struct bpf_stack_build_id) : sizeof(u64); 43615755a7SSong Liu } 44615755a7SSong Liu 45557c0c6eSAlexei Starovoitov static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) 46557c0c6eSAlexei Starovoitov { 4730e29a9aSTatsuhiko Yasumatsu u64 elem_size = sizeof(struct stack_map_bucket) + 4830e29a9aSTatsuhiko Yasumatsu (u64)smap->map.value_size; 49557c0c6eSAlexei Starovoitov int err; 50557c0c6eSAlexei Starovoitov 5196eabe7aSMartin KaFai Lau smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, 5296eabe7aSMartin KaFai Lau smap->map.numa_node); 53557c0c6eSAlexei Starovoitov if (!smap->elems) 54557c0c6eSAlexei Starovoitov return -ENOMEM; 55557c0c6eSAlexei Starovoitov 56557c0c6eSAlexei Starovoitov err = pcpu_freelist_init(&smap->freelist); 57557c0c6eSAlexei Starovoitov if (err) 58557c0c6eSAlexei Starovoitov goto free_elems; 59557c0c6eSAlexei Starovoitov 60557c0c6eSAlexei Starovoitov pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, 61557c0c6eSAlexei Starovoitov smap->map.max_entries); 62557c0c6eSAlexei Starovoitov return 0; 63557c0c6eSAlexei Starovoitov 64557c0c6eSAlexei Starovoitov free_elems: 65d407bd25SDaniel Borkmann bpf_map_area_free(smap->elems); 66557c0c6eSAlexei Starovoitov return err; 67557c0c6eSAlexei Starovoitov } 68557c0c6eSAlexei Starovoitov 69d5a3b1f6SAlexei Starovoitov /* Called from syscall */ 70d5a3b1f6SAlexei Starovoitov static struct bpf_map *stack_map_alloc(union bpf_attr *attr) 71d5a3b1f6SAlexei Starovoitov { 72d5a3b1f6SAlexei Starovoitov u32 value_size = attr->value_size; 73d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap; 74d5a3b1f6SAlexei Starovoitov u64 cost, n_buckets; 75d5a3b1f6SAlexei Starovoitov int err; 76d5a3b1f6SAlexei Starovoitov 776e71b04aSChenbo Feng if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) 78823707b6SAlexei Starovoitov return ERR_PTR(-EINVAL); 79823707b6SAlexei Starovoitov 80d5a3b1f6SAlexei Starovoitov /* check sanity of attributes */ 81d5a3b1f6SAlexei Starovoitov if (attr->max_entries == 0 || attr->key_size != 4 || 82615755a7SSong Liu value_size < 8 || value_size % 8) 83615755a7SSong Liu return ERR_PTR(-EINVAL); 84615755a7SSong Liu 85615755a7SSong Liu BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); 86615755a7SSong Liu if (attr->map_flags & BPF_F_STACK_BUILD_ID) { 87615755a7SSong Liu if (value_size % sizeof(struct bpf_stack_build_id) || 88615755a7SSong Liu value_size / sizeof(struct bpf_stack_build_id) 89615755a7SSong Liu > sysctl_perf_event_max_stack) 90615755a7SSong Liu return ERR_PTR(-EINVAL); 91615755a7SSong Liu } else if (value_size / 8 > sysctl_perf_event_max_stack) 92d5a3b1f6SAlexei Starovoitov return ERR_PTR(-EINVAL); 93d5a3b1f6SAlexei Starovoitov 947a4b2125SToke Høiland-Jørgensen /* hash table size must be power of 2; roundup_pow_of_two() can overflow 957a4b2125SToke Høiland-Jørgensen * into UB on 32-bit arches, so check that first 967a4b2125SToke Høiland-Jørgensen */ 977a4b2125SToke Høiland-Jørgensen if (attr->max_entries > 1UL << 31) 986183f4d3SBui Quang Minh return ERR_PTR(-E2BIG); 99d5a3b1f6SAlexei Starovoitov 1007a4b2125SToke Høiland-Jørgensen n_buckets = roundup_pow_of_two(attr->max_entries); 1017a4b2125SToke Høiland-Jørgensen 102d5a3b1f6SAlexei Starovoitov cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); 103b936ca64SRoman Gushchin smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); 10437086810SRoman Gushchin if (!smap) 105b936ca64SRoman Gushchin return ERR_PTR(-ENOMEM); 106d5a3b1f6SAlexei Starovoitov 107bd475643SJakub Kicinski bpf_map_init_from_attr(&smap->map, attr); 108d5a3b1f6SAlexei Starovoitov smap->n_buckets = n_buckets; 109557c0c6eSAlexei Starovoitov 11097c79a38SArnaldo Carvalho de Melo err = get_callchain_buffers(sysctl_perf_event_max_stack); 111d5a3b1f6SAlexei Starovoitov if (err) 11237086810SRoman Gushchin goto free_smap; 113d5a3b1f6SAlexei Starovoitov 114557c0c6eSAlexei Starovoitov err = prealloc_elems_and_freelist(smap); 115557c0c6eSAlexei Starovoitov if (err) 116557c0c6eSAlexei Starovoitov goto put_buffers; 117557c0c6eSAlexei Starovoitov 118d5a3b1f6SAlexei Starovoitov return &smap->map; 119d5a3b1f6SAlexei Starovoitov 120557c0c6eSAlexei Starovoitov put_buffers: 121557c0c6eSAlexei Starovoitov put_callchain_buffers(); 12237086810SRoman Gushchin free_smap: 123d407bd25SDaniel Borkmann bpf_map_area_free(smap); 124d5a3b1f6SAlexei Starovoitov return ERR_PTR(err); 125d5a3b1f6SAlexei Starovoitov } 126d5a3b1f6SAlexei Starovoitov 1275f412632SYonghong Song static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, 128615755a7SSong Liu u64 *ips, u32 trace_nr, bool user) 129615755a7SSong Liu { 130615755a7SSong Liu int i; 1317c7e3d31SSong Liu struct mmap_unlock_irq_work *work = NULL; 1327c7e3d31SSong Liu bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); 133ceac059eSHao Luo struct vm_area_struct *vma, *prev_vma = NULL; 134ceac059eSHao Luo const char *prev_build_id; 135bae77c5eSSong Liu 1367c7e3d31SSong Liu /* If the irq_work is in use, fall back to report ips. Same 1377c7e3d31SSong Liu * fallback is used for kernel stack (!user) on a stackmap with 1387c7e3d31SSong Liu * build_id. 139615755a7SSong Liu */ 140bae77c5eSSong Liu if (!user || !current || !current->mm || irq_work_busy || 1412f1aaf3eSYonghong Song !mmap_read_trylock(current->mm)) { 142615755a7SSong Liu /* cannot access current->mm, fall back to ips */ 143615755a7SSong Liu for (i = 0; i < trace_nr; i++) { 144615755a7SSong Liu id_offs[i].status = BPF_STACK_BUILD_ID_IP; 145615755a7SSong Liu id_offs[i].ip = ips[i]; 146bd7525daSJiri Olsa memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 147615755a7SSong Liu } 148615755a7SSong Liu return; 149615755a7SSong Liu } 150615755a7SSong Liu 151615755a7SSong Liu for (i = 0; i < trace_nr; i++) { 152ceac059eSHao Luo if (range_in_vma(prev_vma, ips[i], ips[i])) { 153ceac059eSHao Luo vma = prev_vma; 154ceac059eSHao Luo memcpy(id_offs[i].build_id, prev_build_id, 155ceac059eSHao Luo BUILD_ID_SIZE_MAX); 156ceac059eSHao Luo goto build_id_valid; 157ceac059eSHao Luo } 158615755a7SSong Liu vma = find_vma(current->mm, ips[i]); 159*45b8fc30SAndrii Nakryiko if (!vma || build_id_parse_nofault(vma, id_offs[i].build_id, NULL)) { 160615755a7SSong Liu /* per entry fall back to ips */ 161615755a7SSong Liu id_offs[i].status = BPF_STACK_BUILD_ID_IP; 162615755a7SSong Liu id_offs[i].ip = ips[i]; 163bd7525daSJiri Olsa memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 164615755a7SSong Liu continue; 165615755a7SSong Liu } 166ceac059eSHao Luo build_id_valid: 167615755a7SSong Liu id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] 168615755a7SSong Liu - vma->vm_start; 169615755a7SSong Liu id_offs[i].status = BPF_STACK_BUILD_ID_VALID; 170ceac059eSHao Luo prev_vma = vma; 171ceac059eSHao Luo prev_build_id = id_offs[i].build_id; 172615755a7SSong Liu } 1737c7e3d31SSong Liu bpf_mmap_unlock_mm(work, current->mm); 174615755a7SSong Liu } 175615755a7SSong Liu 176fa28dcb8SSong Liu static struct perf_callchain_entry * 177ee2a0988SNamhyung Kim get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) 178fa28dcb8SSong Liu { 179046cc3ddSSong Liu #ifdef CONFIG_STACKTRACE 180fa28dcb8SSong Liu struct perf_callchain_entry *entry; 181fa28dcb8SSong Liu int rctx; 182fa28dcb8SSong Liu 183fa28dcb8SSong Liu entry = get_callchain_entry(&rctx); 184fa28dcb8SSong Liu 185fa28dcb8SSong Liu if (!entry) 186fa28dcb8SSong Liu return NULL; 187fa28dcb8SSong Liu 188ee2a0988SNamhyung Kim entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, 189ee2a0988SNamhyung Kim max_depth, 0); 190fa28dcb8SSong Liu 191fa28dcb8SSong Liu /* stack_trace_save_tsk() works on unsigned long array, while 192fa28dcb8SSong Liu * perf_callchain_entry uses u64 array. For 32-bit systems, it is 193fa28dcb8SSong Liu * necessary to fix this mismatch. 194fa28dcb8SSong Liu */ 195fa28dcb8SSong Liu if (__BITS_PER_LONG != 64) { 196fa28dcb8SSong Liu unsigned long *from = (unsigned long *) entry->ip; 197fa28dcb8SSong Liu u64 *to = entry->ip; 198fa28dcb8SSong Liu int i; 199fa28dcb8SSong Liu 200fa28dcb8SSong Liu /* copy data from the end to avoid using extra buffer */ 201ee2a0988SNamhyung Kim for (i = entry->nr - 1; i >= 0; i--) 202fa28dcb8SSong Liu to[i] = (u64)(from[i]); 203fa28dcb8SSong Liu } 204fa28dcb8SSong Liu 205fa28dcb8SSong Liu put_callchain_entry(rctx); 206fa28dcb8SSong Liu 207fa28dcb8SSong Liu return entry; 208046cc3ddSSong Liu #else /* CONFIG_STACKTRACE */ 209046cc3ddSSong Liu return NULL; 210046cc3ddSSong Liu #endif 211fa28dcb8SSong Liu } 212fa28dcb8SSong Liu 2137b04d6d6SSong Liu static long __bpf_get_stackid(struct bpf_map *map, 2147b04d6d6SSong Liu struct perf_callchain_entry *trace, u64 flags) 215d5a3b1f6SAlexei Starovoitov { 216d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 217d5a3b1f6SAlexei Starovoitov struct stack_map_bucket *bucket, *new_bucket, *old_bucket; 218d5a3b1f6SAlexei Starovoitov u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 219d5a3b1f6SAlexei Starovoitov u32 hash, id, trace_nr, trace_len; 220d5a3b1f6SAlexei Starovoitov bool user = flags & BPF_F_USER_STACK; 221d5a3b1f6SAlexei Starovoitov u64 *ips; 222615755a7SSong Liu bool hash_matches; 223d5a3b1f6SAlexei Starovoitov 224ee2a0988SNamhyung Kim if (trace->nr <= skip) 225d5a3b1f6SAlexei Starovoitov /* skipping more than usable stack trace */ 226d5a3b1f6SAlexei Starovoitov return -EFAULT; 227d5a3b1f6SAlexei Starovoitov 228ee2a0988SNamhyung Kim trace_nr = trace->nr - skip; 229d5a3b1f6SAlexei Starovoitov trace_len = trace_nr * sizeof(u64); 230ee2a0988SNamhyung Kim ips = trace->ip + skip; 231d5a3b1f6SAlexei Starovoitov hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); 232d5a3b1f6SAlexei Starovoitov id = hash & (smap->n_buckets - 1); 233557c0c6eSAlexei Starovoitov bucket = READ_ONCE(smap->buckets[id]); 234d5a3b1f6SAlexei Starovoitov 235615755a7SSong Liu hash_matches = bucket && bucket->hash == hash; 236615755a7SSong Liu /* fast cmp */ 237615755a7SSong Liu if (hash_matches && flags & BPF_F_FAST_STACK_CMP) 238d5a3b1f6SAlexei Starovoitov return id; 239615755a7SSong Liu 240615755a7SSong Liu if (stack_map_use_build_id(map)) { 241615755a7SSong Liu /* for build_id+offset, pop a bucket before slow cmp */ 242615755a7SSong Liu new_bucket = (struct stack_map_bucket *) 243615755a7SSong Liu pcpu_freelist_pop(&smap->freelist); 244615755a7SSong Liu if (unlikely(!new_bucket)) 245615755a7SSong Liu return -ENOMEM; 2465f412632SYonghong Song new_bucket->nr = trace_nr; 2475f412632SYonghong Song stack_map_get_build_id_offset( 2485f412632SYonghong Song (struct bpf_stack_build_id *)new_bucket->data, 2495f412632SYonghong Song ips, trace_nr, user); 250615755a7SSong Liu trace_len = trace_nr * sizeof(struct bpf_stack_build_id); 251615755a7SSong Liu if (hash_matches && bucket->nr == trace_nr && 252615755a7SSong Liu memcmp(bucket->data, new_bucket->data, trace_len) == 0) { 253615755a7SSong Liu pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 254d5a3b1f6SAlexei Starovoitov return id; 255d5a3b1f6SAlexei Starovoitov } 256615755a7SSong Liu if (bucket && !(flags & BPF_F_REUSE_STACKID)) { 257615755a7SSong Liu pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 258615755a7SSong Liu return -EEXIST; 259615755a7SSong Liu } 260615755a7SSong Liu } else { 261615755a7SSong Liu if (hash_matches && bucket->nr == trace_nr && 262615755a7SSong Liu memcmp(bucket->data, ips, trace_len) == 0) 263615755a7SSong Liu return id; 264d5a3b1f6SAlexei Starovoitov if (bucket && !(flags & BPF_F_REUSE_STACKID)) 265d5a3b1f6SAlexei Starovoitov return -EEXIST; 266d5a3b1f6SAlexei Starovoitov 267557c0c6eSAlexei Starovoitov new_bucket = (struct stack_map_bucket *) 268557c0c6eSAlexei Starovoitov pcpu_freelist_pop(&smap->freelist); 269d5a3b1f6SAlexei Starovoitov if (unlikely(!new_bucket)) 270d5a3b1f6SAlexei Starovoitov return -ENOMEM; 271615755a7SSong Liu memcpy(new_bucket->data, ips, trace_len); 272615755a7SSong Liu } 273d5a3b1f6SAlexei Starovoitov 274d5a3b1f6SAlexei Starovoitov new_bucket->hash = hash; 275d5a3b1f6SAlexei Starovoitov new_bucket->nr = trace_nr; 276d5a3b1f6SAlexei Starovoitov 277d5a3b1f6SAlexei Starovoitov old_bucket = xchg(&smap->buckets[id], new_bucket); 278d5a3b1f6SAlexei Starovoitov if (old_bucket) 279557c0c6eSAlexei Starovoitov pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 280d5a3b1f6SAlexei Starovoitov return id; 281d5a3b1f6SAlexei Starovoitov } 282d5a3b1f6SAlexei Starovoitov 2837b04d6d6SSong Liu BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, 2847b04d6d6SSong Liu u64, flags) 2857b04d6d6SSong Liu { 2867b04d6d6SSong Liu u32 max_depth = map->value_size / stack_map_data_size(map); 287ee2a0988SNamhyung Kim u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 2887b04d6d6SSong Liu bool user = flags & BPF_F_USER_STACK; 2897b04d6d6SSong Liu struct perf_callchain_entry *trace; 2907b04d6d6SSong Liu bool kernel = !user; 2917b04d6d6SSong Liu 2927b04d6d6SSong Liu if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 2937b04d6d6SSong Liu BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 2947b04d6d6SSong Liu return -EINVAL; 2957b04d6d6SSong Liu 296ee2a0988SNamhyung Kim max_depth += skip; 297ee2a0988SNamhyung Kim if (max_depth > sysctl_perf_event_max_stack) 298ee2a0988SNamhyung Kim max_depth = sysctl_perf_event_max_stack; 299ee2a0988SNamhyung Kim 300ee2a0988SNamhyung Kim trace = get_perf_callchain(regs, 0, kernel, user, max_depth, 301ee2a0988SNamhyung Kim false, false); 3027b04d6d6SSong Liu 3037b04d6d6SSong Liu if (unlikely(!trace)) 3047b04d6d6SSong Liu /* couldn't fetch the stack trace */ 3057b04d6d6SSong Liu return -EFAULT; 3067b04d6d6SSong Liu 3077b04d6d6SSong Liu return __bpf_get_stackid(map, trace, flags); 3087b04d6d6SSong Liu } 3097b04d6d6SSong Liu 310d5a3b1f6SAlexei Starovoitov const struct bpf_func_proto bpf_get_stackid_proto = { 311d5a3b1f6SAlexei Starovoitov .func = bpf_get_stackid, 312d5a3b1f6SAlexei Starovoitov .gpl_only = true, 313d5a3b1f6SAlexei Starovoitov .ret_type = RET_INTEGER, 314d5a3b1f6SAlexei Starovoitov .arg1_type = ARG_PTR_TO_CTX, 315d5a3b1f6SAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR, 316d5a3b1f6SAlexei Starovoitov .arg3_type = ARG_ANYTHING, 317d5a3b1f6SAlexei Starovoitov }; 318d5a3b1f6SAlexei Starovoitov 3197b04d6d6SSong Liu static __u64 count_kernel_ip(struct perf_callchain_entry *trace) 3207b04d6d6SSong Liu { 3217b04d6d6SSong Liu __u64 nr_kernel = 0; 3227b04d6d6SSong Liu 3237b04d6d6SSong Liu while (nr_kernel < trace->nr) { 3247b04d6d6SSong Liu if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) 3257b04d6d6SSong Liu break; 3267b04d6d6SSong Liu nr_kernel++; 3277b04d6d6SSong Liu } 3287b04d6d6SSong Liu return nr_kernel; 3297b04d6d6SSong Liu } 3307b04d6d6SSong Liu 3317b04d6d6SSong Liu BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, 3327b04d6d6SSong Liu struct bpf_map *, map, u64, flags) 3337b04d6d6SSong Liu { 3347b04d6d6SSong Liu struct perf_event *event = ctx->event; 3357b04d6d6SSong Liu struct perf_callchain_entry *trace; 3367b04d6d6SSong Liu bool kernel, user; 3377b04d6d6SSong Liu __u64 nr_kernel; 3387b04d6d6SSong Liu int ret; 3397b04d6d6SSong Liu 3407b04d6d6SSong Liu /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ 34116817ad7SNamhyung Kim if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) 3427b04d6d6SSong Liu return bpf_get_stackid((unsigned long)(ctx->regs), 3437b04d6d6SSong Liu (unsigned long) map, flags, 0, 0); 3447b04d6d6SSong Liu 3457b04d6d6SSong Liu if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 3467b04d6d6SSong Liu BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 3477b04d6d6SSong Liu return -EINVAL; 3487b04d6d6SSong Liu 3497b04d6d6SSong Liu user = flags & BPF_F_USER_STACK; 3507b04d6d6SSong Liu kernel = !user; 3517b04d6d6SSong Liu 3527b04d6d6SSong Liu trace = ctx->data->callchain; 3537b04d6d6SSong Liu if (unlikely(!trace)) 3547b04d6d6SSong Liu return -EFAULT; 3557b04d6d6SSong Liu 3567b04d6d6SSong Liu nr_kernel = count_kernel_ip(trace); 3577b04d6d6SSong Liu 3587b04d6d6SSong Liu if (kernel) { 3597b04d6d6SSong Liu __u64 nr = trace->nr; 3607b04d6d6SSong Liu 3617b04d6d6SSong Liu trace->nr = nr_kernel; 3627b04d6d6SSong Liu ret = __bpf_get_stackid(map, trace, flags); 3637b04d6d6SSong Liu 3647b04d6d6SSong Liu /* restore nr */ 3657b04d6d6SSong Liu trace->nr = nr; 3667b04d6d6SSong Liu } else { /* user */ 3677b04d6d6SSong Liu u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 3687b04d6d6SSong Liu 3697b04d6d6SSong Liu skip += nr_kernel; 3707b04d6d6SSong Liu if (skip > BPF_F_SKIP_FIELD_MASK) 3717b04d6d6SSong Liu return -EFAULT; 3727b04d6d6SSong Liu 3737b04d6d6SSong Liu flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 3747b04d6d6SSong Liu ret = __bpf_get_stackid(map, trace, flags); 3757b04d6d6SSong Liu } 3767b04d6d6SSong Liu return ret; 3777b04d6d6SSong Liu } 3787b04d6d6SSong Liu 3797b04d6d6SSong Liu const struct bpf_func_proto bpf_get_stackid_proto_pe = { 3807b04d6d6SSong Liu .func = bpf_get_stackid_pe, 3817b04d6d6SSong Liu .gpl_only = false, 3827b04d6d6SSong Liu .ret_type = RET_INTEGER, 3837b04d6d6SSong Liu .arg1_type = ARG_PTR_TO_CTX, 3847b04d6d6SSong Liu .arg2_type = ARG_CONST_MAP_PTR, 3857b04d6d6SSong Liu .arg3_type = ARG_ANYTHING, 3867b04d6d6SSong Liu }; 3877b04d6d6SSong Liu 388fa28dcb8SSong Liu static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, 3897b04d6d6SSong Liu struct perf_callchain_entry *trace_in, 390fa28dcb8SSong Liu void *buf, u32 size, u64 flags) 391c195651eSYonghong Song { 392ee2a0988SNamhyung Kim u32 trace_nr, copy_len, elem_size, num_elem, max_depth; 393c195651eSYonghong Song bool user_build_id = flags & BPF_F_USER_BUILD_ID; 394b8e3a87aSJordan Rome bool crosstask = task && task != current; 395c195651eSYonghong Song u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 396c195651eSYonghong Song bool user = flags & BPF_F_USER_STACK; 397c195651eSYonghong Song struct perf_callchain_entry *trace; 398c195651eSYonghong Song bool kernel = !user; 399c195651eSYonghong Song int err = -EINVAL; 400c195651eSYonghong Song u64 *ips; 401c195651eSYonghong Song 402c195651eSYonghong Song if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 403c195651eSYonghong Song BPF_F_USER_BUILD_ID))) 404c195651eSYonghong Song goto clear; 405c195651eSYonghong Song if (kernel && user_build_id) 406c195651eSYonghong Song goto clear; 407c195651eSYonghong Song 408c195651eSYonghong Song elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) 409c195651eSYonghong Song : sizeof(u64); 410c195651eSYonghong Song if (unlikely(size % elem_size)) 411c195651eSYonghong Song goto clear; 412c195651eSYonghong Song 413fa28dcb8SSong Liu /* cannot get valid user stack for task without user_mode regs */ 414fa28dcb8SSong Liu if (task && user && !user_mode(regs)) 415fa28dcb8SSong Liu goto err_fault; 416fa28dcb8SSong Liu 417b8e3a87aSJordan Rome /* get_perf_callchain does not support crosstask user stack walking 418b8e3a87aSJordan Rome * but returns an empty stack instead of NULL. 419b8e3a87aSJordan Rome */ 420b8e3a87aSJordan Rome if (crosstask && user) { 421b8e3a87aSJordan Rome err = -EOPNOTSUPP; 422b8e3a87aSJordan Rome goto clear; 423b8e3a87aSJordan Rome } 424b8e3a87aSJordan Rome 425c195651eSYonghong Song num_elem = size / elem_size; 426ee2a0988SNamhyung Kim max_depth = num_elem + skip; 427ee2a0988SNamhyung Kim if (sysctl_perf_event_max_stack < max_depth) 428ee2a0988SNamhyung Kim max_depth = sysctl_perf_event_max_stack; 429fa28dcb8SSong Liu 4307b04d6d6SSong Liu if (trace_in) 4317b04d6d6SSong Liu trace = trace_in; 4327b04d6d6SSong Liu else if (kernel && task) 433ee2a0988SNamhyung Kim trace = get_callchain_entry_for_task(task, max_depth); 434fa28dcb8SSong Liu else 435ee2a0988SNamhyung Kim trace = get_perf_callchain(regs, 0, kernel, user, max_depth, 436b8e3a87aSJordan Rome crosstask, false); 437c195651eSYonghong Song if (unlikely(!trace)) 438c195651eSYonghong Song goto err_fault; 439c195651eSYonghong Song 440ee2a0988SNamhyung Kim if (trace->nr < skip) 441c195651eSYonghong Song goto err_fault; 442c195651eSYonghong Song 443ee2a0988SNamhyung Kim trace_nr = trace->nr - skip; 444c195651eSYonghong Song trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; 445c195651eSYonghong Song copy_len = trace_nr * elem_size; 446ee2a0988SNamhyung Kim 447ee2a0988SNamhyung Kim ips = trace->ip + skip; 448c195651eSYonghong Song if (user && user_build_id) 449c195651eSYonghong Song stack_map_get_build_id_offset(buf, ips, trace_nr, user); 450c195651eSYonghong Song else 451c195651eSYonghong Song memcpy(buf, ips, copy_len); 452c195651eSYonghong Song 453c195651eSYonghong Song if (size > copy_len) 454c195651eSYonghong Song memset(buf + copy_len, 0, size - copy_len); 455c195651eSYonghong Song return copy_len; 456c195651eSYonghong Song 457c195651eSYonghong Song err_fault: 458c195651eSYonghong Song err = -EFAULT; 459c195651eSYonghong Song clear: 460c195651eSYonghong Song memset(buf, 0, size); 461c195651eSYonghong Song return err; 462c195651eSYonghong Song } 463c195651eSYonghong Song 464fa28dcb8SSong Liu BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, 465fa28dcb8SSong Liu u64, flags) 466fa28dcb8SSong Liu { 4677b04d6d6SSong Liu return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); 468fa28dcb8SSong Liu } 469fa28dcb8SSong Liu 470c195651eSYonghong Song const struct bpf_func_proto bpf_get_stack_proto = { 471c195651eSYonghong Song .func = bpf_get_stack, 472c195651eSYonghong Song .gpl_only = true, 473c195651eSYonghong Song .ret_type = RET_INTEGER, 474c195651eSYonghong Song .arg1_type = ARG_PTR_TO_CTX, 475c195651eSYonghong Song .arg2_type = ARG_PTR_TO_UNINIT_MEM, 476c195651eSYonghong Song .arg3_type = ARG_CONST_SIZE_OR_ZERO, 477c195651eSYonghong Song .arg4_type = ARG_ANYTHING, 478c195651eSYonghong Song }; 479c195651eSYonghong Song 480fa28dcb8SSong Liu BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, 481fa28dcb8SSong Liu u32, size, u64, flags) 482fa28dcb8SSong Liu { 48306ab134cSDave Marchevsky struct pt_regs *regs; 484b992f01eSNaveen N. Rao long res = -EINVAL; 485fa28dcb8SSong Liu 48606ab134cSDave Marchevsky if (!try_get_task_stack(task)) 48706ab134cSDave Marchevsky return -EFAULT; 48806ab134cSDave Marchevsky 48906ab134cSDave Marchevsky regs = task_pt_regs(task); 490b992f01eSNaveen N. Rao if (regs) 49106ab134cSDave Marchevsky res = __bpf_get_stack(regs, task, NULL, buf, size, flags); 49206ab134cSDave Marchevsky put_task_stack(task); 49306ab134cSDave Marchevsky 49406ab134cSDave Marchevsky return res; 495fa28dcb8SSong Liu } 496fa28dcb8SSong Liu 497fa28dcb8SSong Liu const struct bpf_func_proto bpf_get_task_stack_proto = { 498fa28dcb8SSong Liu .func = bpf_get_task_stack, 499fa28dcb8SSong Liu .gpl_only = false, 500fa28dcb8SSong Liu .ret_type = RET_INTEGER, 501fa28dcb8SSong Liu .arg1_type = ARG_PTR_TO_BTF_ID, 502d19ddb47SSong Liu .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 503fa28dcb8SSong Liu .arg2_type = ARG_PTR_TO_UNINIT_MEM, 504fa28dcb8SSong Liu .arg3_type = ARG_CONST_SIZE_OR_ZERO, 505fa28dcb8SSong Liu .arg4_type = ARG_ANYTHING, 506fa28dcb8SSong Liu }; 507fa28dcb8SSong Liu 5087b04d6d6SSong Liu BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, 5097b04d6d6SSong Liu void *, buf, u32, size, u64, flags) 5107b04d6d6SSong Liu { 5112b9b305fSSong Liu struct pt_regs *regs = (struct pt_regs *)(ctx->regs); 5127b04d6d6SSong Liu struct perf_event *event = ctx->event; 5137b04d6d6SSong Liu struct perf_callchain_entry *trace; 5147b04d6d6SSong Liu bool kernel, user; 5157b04d6d6SSong Liu int err = -EINVAL; 5167b04d6d6SSong Liu __u64 nr_kernel; 5177b04d6d6SSong Liu 51816817ad7SNamhyung Kim if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) 5192b9b305fSSong Liu return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); 5207b04d6d6SSong Liu 5217b04d6d6SSong Liu if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 5227b04d6d6SSong Liu BPF_F_USER_BUILD_ID))) 5237b04d6d6SSong Liu goto clear; 5247b04d6d6SSong Liu 5257b04d6d6SSong Liu user = flags & BPF_F_USER_STACK; 5267b04d6d6SSong Liu kernel = !user; 5277b04d6d6SSong Liu 5287b04d6d6SSong Liu err = -EFAULT; 5297b04d6d6SSong Liu trace = ctx->data->callchain; 5307b04d6d6SSong Liu if (unlikely(!trace)) 5317b04d6d6SSong Liu goto clear; 5327b04d6d6SSong Liu 5337b04d6d6SSong Liu nr_kernel = count_kernel_ip(trace); 5347b04d6d6SSong Liu 5357b04d6d6SSong Liu if (kernel) { 5367b04d6d6SSong Liu __u64 nr = trace->nr; 5377b04d6d6SSong Liu 5387b04d6d6SSong Liu trace->nr = nr_kernel; 5392b9b305fSSong Liu err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); 5407b04d6d6SSong Liu 5417b04d6d6SSong Liu /* restore nr */ 5427b04d6d6SSong Liu trace->nr = nr; 5437b04d6d6SSong Liu } else { /* user */ 5447b04d6d6SSong Liu u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 5457b04d6d6SSong Liu 5467b04d6d6SSong Liu skip += nr_kernel; 5477b04d6d6SSong Liu if (skip > BPF_F_SKIP_FIELD_MASK) 5487b04d6d6SSong Liu goto clear; 5497b04d6d6SSong Liu 5507b04d6d6SSong Liu flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 5512b9b305fSSong Liu err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); 5527b04d6d6SSong Liu } 5537b04d6d6SSong Liu return err; 5547b04d6d6SSong Liu 5557b04d6d6SSong Liu clear: 5567b04d6d6SSong Liu memset(buf, 0, size); 5577b04d6d6SSong Liu return err; 5587b04d6d6SSong Liu 5597b04d6d6SSong Liu } 5607b04d6d6SSong Liu 5617b04d6d6SSong Liu const struct bpf_func_proto bpf_get_stack_proto_pe = { 5627b04d6d6SSong Liu .func = bpf_get_stack_pe, 5637b04d6d6SSong Liu .gpl_only = true, 5647b04d6d6SSong Liu .ret_type = RET_INTEGER, 5657b04d6d6SSong Liu .arg1_type = ARG_PTR_TO_CTX, 5667b04d6d6SSong Liu .arg2_type = ARG_PTR_TO_UNINIT_MEM, 5677b04d6d6SSong Liu .arg3_type = ARG_CONST_SIZE_OR_ZERO, 5687b04d6d6SSong Liu .arg4_type = ARG_ANYTHING, 5697b04d6d6SSong Liu }; 5707b04d6d6SSong Liu 571557c0c6eSAlexei Starovoitov /* Called from eBPF program */ 572d5a3b1f6SAlexei Starovoitov static void *stack_map_lookup_elem(struct bpf_map *map, void *key) 573d5a3b1f6SAlexei Starovoitov { 5743b4a63f6SPrashant Bhole return ERR_PTR(-EOPNOTSUPP); 575557c0c6eSAlexei Starovoitov } 576557c0c6eSAlexei Starovoitov 577557c0c6eSAlexei Starovoitov /* Called from syscall */ 578557c0c6eSAlexei Starovoitov int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 579557c0c6eSAlexei Starovoitov { 580d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 581557c0c6eSAlexei Starovoitov struct stack_map_bucket *bucket, *old_bucket; 582557c0c6eSAlexei Starovoitov u32 id = *(u32 *)key, trace_len; 583d5a3b1f6SAlexei Starovoitov 584d5a3b1f6SAlexei Starovoitov if (unlikely(id >= smap->n_buckets)) 585557c0c6eSAlexei Starovoitov return -ENOENT; 586557c0c6eSAlexei Starovoitov 587557c0c6eSAlexei Starovoitov bucket = xchg(&smap->buckets[id], NULL); 588557c0c6eSAlexei Starovoitov if (!bucket) 589557c0c6eSAlexei Starovoitov return -ENOENT; 590557c0c6eSAlexei Starovoitov 591615755a7SSong Liu trace_len = bucket->nr * stack_map_data_size(map); 592615755a7SSong Liu memcpy(value, bucket->data, trace_len); 593557c0c6eSAlexei Starovoitov memset(value + trace_len, 0, map->value_size - trace_len); 594557c0c6eSAlexei Starovoitov 595557c0c6eSAlexei Starovoitov old_bucket = xchg(&smap->buckets[id], bucket); 596557c0c6eSAlexei Starovoitov if (old_bucket) 597557c0c6eSAlexei Starovoitov pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 598557c0c6eSAlexei Starovoitov return 0; 599d5a3b1f6SAlexei Starovoitov } 600d5a3b1f6SAlexei Starovoitov 60116f07c55SYonghong Song static int stack_map_get_next_key(struct bpf_map *map, void *key, 60216f07c55SYonghong Song void *next_key) 603d5a3b1f6SAlexei Starovoitov { 60416f07c55SYonghong Song struct bpf_stack_map *smap = container_of(map, 60516f07c55SYonghong Song struct bpf_stack_map, map); 60616f07c55SYonghong Song u32 id; 60716f07c55SYonghong Song 60816f07c55SYonghong Song WARN_ON_ONCE(!rcu_read_lock_held()); 60916f07c55SYonghong Song 61016f07c55SYonghong Song if (!key) { 61116f07c55SYonghong Song id = 0; 61216f07c55SYonghong Song } else { 61316f07c55SYonghong Song id = *(u32 *)key; 61416f07c55SYonghong Song if (id >= smap->n_buckets || !smap->buckets[id]) 61516f07c55SYonghong Song id = 0; 61616f07c55SYonghong Song else 61716f07c55SYonghong Song id++; 61816f07c55SYonghong Song } 61916f07c55SYonghong Song 62016f07c55SYonghong Song while (id < smap->n_buckets && !smap->buckets[id]) 62116f07c55SYonghong Song id++; 62216f07c55SYonghong Song 62316f07c55SYonghong Song if (id >= smap->n_buckets) 62416f07c55SYonghong Song return -ENOENT; 62516f07c55SYonghong Song 62616f07c55SYonghong Song *(u32 *)next_key = id; 62716f07c55SYonghong Song return 0; 628d5a3b1f6SAlexei Starovoitov } 629d5a3b1f6SAlexei Starovoitov 630d7ba4cc9SJP Kobryn static long stack_map_update_elem(struct bpf_map *map, void *key, void *value, 631d5a3b1f6SAlexei Starovoitov u64 map_flags) 632d5a3b1f6SAlexei Starovoitov { 633d5a3b1f6SAlexei Starovoitov return -EINVAL; 634d5a3b1f6SAlexei Starovoitov } 635d5a3b1f6SAlexei Starovoitov 636d5a3b1f6SAlexei Starovoitov /* Called from syscall or from eBPF program */ 637d7ba4cc9SJP Kobryn static long stack_map_delete_elem(struct bpf_map *map, void *key) 638d5a3b1f6SAlexei Starovoitov { 639d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 640d5a3b1f6SAlexei Starovoitov struct stack_map_bucket *old_bucket; 641d5a3b1f6SAlexei Starovoitov u32 id = *(u32 *)key; 642d5a3b1f6SAlexei Starovoitov 643d5a3b1f6SAlexei Starovoitov if (unlikely(id >= smap->n_buckets)) 644d5a3b1f6SAlexei Starovoitov return -E2BIG; 645d5a3b1f6SAlexei Starovoitov 646d5a3b1f6SAlexei Starovoitov old_bucket = xchg(&smap->buckets[id], NULL); 647d5a3b1f6SAlexei Starovoitov if (old_bucket) { 648557c0c6eSAlexei Starovoitov pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 649d5a3b1f6SAlexei Starovoitov return 0; 650d5a3b1f6SAlexei Starovoitov } else { 651d5a3b1f6SAlexei Starovoitov return -ENOENT; 652d5a3b1f6SAlexei Starovoitov } 653d5a3b1f6SAlexei Starovoitov } 654d5a3b1f6SAlexei Starovoitov 655d5a3b1f6SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 656d5a3b1f6SAlexei Starovoitov static void stack_map_free(struct bpf_map *map) 657d5a3b1f6SAlexei Starovoitov { 658d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 659d5a3b1f6SAlexei Starovoitov 660d407bd25SDaniel Borkmann bpf_map_area_free(smap->elems); 661557c0c6eSAlexei Starovoitov pcpu_freelist_destroy(&smap->freelist); 662d407bd25SDaniel Borkmann bpf_map_area_free(smap); 663d5a3b1f6SAlexei Starovoitov put_callchain_buffers(); 664d5a3b1f6SAlexei Starovoitov } 665d5a3b1f6SAlexei Starovoitov 666cbb9b606SYafang Shao static u64 stack_map_mem_usage(const struct bpf_map *map) 667cbb9b606SYafang Shao { 668cbb9b606SYafang Shao struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 669cbb9b606SYafang Shao u64 value_size = map->value_size; 670cbb9b606SYafang Shao u64 n_buckets = smap->n_buckets; 671cbb9b606SYafang Shao u64 enties = map->max_entries; 672cbb9b606SYafang Shao u64 usage = sizeof(*smap); 673cbb9b606SYafang Shao 674cbb9b606SYafang Shao usage += n_buckets * sizeof(struct stack_map_bucket *); 675cbb9b606SYafang Shao usage += enties * (sizeof(struct stack_map_bucket) + value_size); 676cbb9b606SYafang Shao return usage; 677cbb9b606SYafang Shao } 678cbb9b606SYafang Shao 679c317ab71SMenglong Dong BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map) 68014499160SMauricio Vasquez B const struct bpf_map_ops stack_trace_map_ops = { 681f4d05259SMartin KaFai Lau .map_meta_equal = bpf_map_meta_equal, 682d5a3b1f6SAlexei Starovoitov .map_alloc = stack_map_alloc, 683d5a3b1f6SAlexei Starovoitov .map_free = stack_map_free, 684d5a3b1f6SAlexei Starovoitov .map_get_next_key = stack_map_get_next_key, 685d5a3b1f6SAlexei Starovoitov .map_lookup_elem = stack_map_lookup_elem, 686d5a3b1f6SAlexei Starovoitov .map_update_elem = stack_map_update_elem, 687d5a3b1f6SAlexei Starovoitov .map_delete_elem = stack_map_delete_elem, 688e8d2bec0SDaniel Borkmann .map_check_btf = map_check_no_btf, 689cbb9b606SYafang Shao .map_mem_usage = stack_map_mem_usage, 690c317ab71SMenglong Dong .map_btf_id = &stack_trace_map_btf_ids[0], 691d5a3b1f6SAlexei Starovoitov }; 692