125763b3cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2d5a3b1f6SAlexei Starovoitov /* Copyright (c) 2016 Facebook 3d5a3b1f6SAlexei Starovoitov */ 4d5a3b1f6SAlexei Starovoitov #include <linux/bpf.h> 5d5a3b1f6SAlexei Starovoitov #include <linux/jhash.h> 6d5a3b1f6SAlexei Starovoitov #include <linux/filter.h> 77b04d6d6SSong Liu #include <linux/kernel.h> 8d5a3b1f6SAlexei Starovoitov #include <linux/stacktrace.h> 9d5a3b1f6SAlexei Starovoitov #include <linux/perf_event.h> 10615755a7SSong Liu #include <linux/elf.h> 11615755a7SSong Liu #include <linux/pagemap.h> 12bae77c5eSSong Liu #include <linux/irq_work.h> 13c9a0f3b8SJiri Olsa #include <linux/btf_ids.h> 14557c0c6eSAlexei Starovoitov #include "percpu_freelist.h" 15d5a3b1f6SAlexei Starovoitov 166e71b04aSChenbo Feng #define STACK_CREATE_FLAG_MASK \ 17615755a7SSong Liu (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ 18615755a7SSong Liu BPF_F_STACK_BUILD_ID) 196e71b04aSChenbo Feng 20d5a3b1f6SAlexei Starovoitov struct stack_map_bucket { 21557c0c6eSAlexei Starovoitov struct pcpu_freelist_node fnode; 22d5a3b1f6SAlexei Starovoitov u32 hash; 23d5a3b1f6SAlexei Starovoitov u32 nr; 24615755a7SSong Liu u64 data[]; 25d5a3b1f6SAlexei Starovoitov }; 26d5a3b1f6SAlexei Starovoitov 27d5a3b1f6SAlexei Starovoitov struct bpf_stack_map { 28d5a3b1f6SAlexei Starovoitov struct bpf_map map; 29557c0c6eSAlexei Starovoitov void *elems; 30557c0c6eSAlexei Starovoitov struct pcpu_freelist freelist; 31d5a3b1f6SAlexei Starovoitov u32 n_buckets; 32557c0c6eSAlexei Starovoitov struct stack_map_bucket *buckets[]; 33d5a3b1f6SAlexei Starovoitov }; 34d5a3b1f6SAlexei Starovoitov 35bae77c5eSSong Liu /* irq_work to run up_read() for build_id lookup in nmi context */ 36bae77c5eSSong Liu struct stack_map_irq_work { 37bae77c5eSSong Liu struct irq_work irq_work; 380cc55a02SMichel Lespinasse struct mm_struct *mm; 39bae77c5eSSong Liu }; 40bae77c5eSSong Liu 41bae77c5eSSong Liu static void do_up_read(struct irq_work *entry) 42bae77c5eSSong Liu { 43bae77c5eSSong Liu struct stack_map_irq_work *work; 44bae77c5eSSong Liu 45099bfaa7SDavid Miller if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) 46099bfaa7SDavid Miller return; 47099bfaa7SDavid Miller 48bae77c5eSSong Liu work = container_of(entry, struct stack_map_irq_work, irq_work); 490cc55a02SMichel Lespinasse mmap_read_unlock_non_owner(work->mm); 50bae77c5eSSong Liu } 51bae77c5eSSong Liu 52bae77c5eSSong Liu static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); 53bae77c5eSSong Liu 54615755a7SSong Liu static inline bool stack_map_use_build_id(struct bpf_map *map) 55615755a7SSong Liu { 56615755a7SSong Liu return (map->map_flags & BPF_F_STACK_BUILD_ID); 57615755a7SSong Liu } 58615755a7SSong Liu 59615755a7SSong Liu static inline int stack_map_data_size(struct bpf_map *map) 60615755a7SSong Liu { 61615755a7SSong Liu return stack_map_use_build_id(map) ? 62615755a7SSong Liu sizeof(struct bpf_stack_build_id) : sizeof(u64); 63615755a7SSong Liu } 64615755a7SSong Liu 65557c0c6eSAlexei Starovoitov static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) 66557c0c6eSAlexei Starovoitov { 67557c0c6eSAlexei Starovoitov u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; 68557c0c6eSAlexei Starovoitov int err; 69557c0c6eSAlexei Starovoitov 7096eabe7aSMartin KaFai Lau smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, 7196eabe7aSMartin KaFai Lau smap->map.numa_node); 72557c0c6eSAlexei Starovoitov if (!smap->elems) 73557c0c6eSAlexei Starovoitov return -ENOMEM; 74557c0c6eSAlexei Starovoitov 75557c0c6eSAlexei Starovoitov err = pcpu_freelist_init(&smap->freelist); 76557c0c6eSAlexei Starovoitov if (err) 77557c0c6eSAlexei Starovoitov goto free_elems; 78557c0c6eSAlexei Starovoitov 79557c0c6eSAlexei Starovoitov pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, 80557c0c6eSAlexei Starovoitov smap->map.max_entries); 81557c0c6eSAlexei Starovoitov return 0; 82557c0c6eSAlexei Starovoitov 83557c0c6eSAlexei Starovoitov free_elems: 84d407bd25SDaniel Borkmann bpf_map_area_free(smap->elems); 85557c0c6eSAlexei Starovoitov return err; 86557c0c6eSAlexei Starovoitov } 87557c0c6eSAlexei Starovoitov 88d5a3b1f6SAlexei Starovoitov /* Called from syscall */ 89d5a3b1f6SAlexei Starovoitov static struct bpf_map *stack_map_alloc(union bpf_attr *attr) 90d5a3b1f6SAlexei Starovoitov { 91d5a3b1f6SAlexei Starovoitov u32 value_size = attr->value_size; 92d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap; 93b936ca64SRoman Gushchin struct bpf_map_memory mem; 94d5a3b1f6SAlexei Starovoitov u64 cost, n_buckets; 95d5a3b1f6SAlexei Starovoitov int err; 96d5a3b1f6SAlexei Starovoitov 972c78ee89SAlexei Starovoitov if (!bpf_capable()) 98d5a3b1f6SAlexei Starovoitov return ERR_PTR(-EPERM); 99d5a3b1f6SAlexei Starovoitov 1006e71b04aSChenbo Feng if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) 101823707b6SAlexei Starovoitov return ERR_PTR(-EINVAL); 102823707b6SAlexei Starovoitov 103d5a3b1f6SAlexei Starovoitov /* check sanity of attributes */ 104d5a3b1f6SAlexei Starovoitov if (attr->max_entries == 0 || attr->key_size != 4 || 105615755a7SSong Liu value_size < 8 || value_size % 8) 106615755a7SSong Liu return ERR_PTR(-EINVAL); 107615755a7SSong Liu 108615755a7SSong Liu BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); 109615755a7SSong Liu if (attr->map_flags & BPF_F_STACK_BUILD_ID) { 110615755a7SSong Liu if (value_size % sizeof(struct bpf_stack_build_id) || 111615755a7SSong Liu value_size / sizeof(struct bpf_stack_build_id) 112615755a7SSong Liu > sysctl_perf_event_max_stack) 113615755a7SSong Liu return ERR_PTR(-EINVAL); 114615755a7SSong Liu } else if (value_size / 8 > sysctl_perf_event_max_stack) 115d5a3b1f6SAlexei Starovoitov return ERR_PTR(-EINVAL); 116d5a3b1f6SAlexei Starovoitov 117d5a3b1f6SAlexei Starovoitov /* hash table size must be power of 2 */ 118d5a3b1f6SAlexei Starovoitov n_buckets = roundup_pow_of_two(attr->max_entries); 119d5a3b1f6SAlexei Starovoitov 120d5a3b1f6SAlexei Starovoitov cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); 121d5a3b1f6SAlexei Starovoitov cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); 122c85d6913SRoman Gushchin err = bpf_map_charge_init(&mem, cost); 123b936ca64SRoman Gushchin if (err) 124b936ca64SRoman Gushchin return ERR_PTR(err); 125b936ca64SRoman Gushchin 126b936ca64SRoman Gushchin smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); 127b936ca64SRoman Gushchin if (!smap) { 128b936ca64SRoman Gushchin bpf_map_charge_finish(&mem); 129b936ca64SRoman Gushchin return ERR_PTR(-ENOMEM); 130b936ca64SRoman Gushchin } 131d5a3b1f6SAlexei Starovoitov 132bd475643SJakub Kicinski bpf_map_init_from_attr(&smap->map, attr); 133d5a3b1f6SAlexei Starovoitov smap->map.value_size = value_size; 134d5a3b1f6SAlexei Starovoitov smap->n_buckets = n_buckets; 135557c0c6eSAlexei Starovoitov 13697c79a38SArnaldo Carvalho de Melo err = get_callchain_buffers(sysctl_perf_event_max_stack); 137d5a3b1f6SAlexei Starovoitov if (err) 138b936ca64SRoman Gushchin goto free_charge; 139d5a3b1f6SAlexei Starovoitov 140557c0c6eSAlexei Starovoitov err = prealloc_elems_and_freelist(smap); 141557c0c6eSAlexei Starovoitov if (err) 142557c0c6eSAlexei Starovoitov goto put_buffers; 143557c0c6eSAlexei Starovoitov 144b936ca64SRoman Gushchin bpf_map_charge_move(&smap->map.memory, &mem); 145b936ca64SRoman Gushchin 146d5a3b1f6SAlexei Starovoitov return &smap->map; 147d5a3b1f6SAlexei Starovoitov 148557c0c6eSAlexei Starovoitov put_buffers: 149557c0c6eSAlexei Starovoitov put_callchain_buffers(); 150b936ca64SRoman Gushchin free_charge: 151b936ca64SRoman Gushchin bpf_map_charge_finish(&mem); 152d407bd25SDaniel Borkmann bpf_map_area_free(smap); 153d5a3b1f6SAlexei Starovoitov return ERR_PTR(err); 154d5a3b1f6SAlexei Starovoitov } 155d5a3b1f6SAlexei Starovoitov 156615755a7SSong Liu #define BPF_BUILD_ID 3 157615755a7SSong Liu /* 158615755a7SSong Liu * Parse build id from the note segment. This logic can be shared between 159615755a7SSong Liu * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are 160615755a7SSong Liu * identical. 161615755a7SSong Liu */ 162615755a7SSong Liu static inline int stack_map_parse_build_id(void *page_addr, 163615755a7SSong Liu unsigned char *build_id, 164615755a7SSong Liu void *note_start, 165615755a7SSong Liu Elf32_Word note_size) 166615755a7SSong Liu { 167615755a7SSong Liu Elf32_Word note_offs = 0, new_offs; 168615755a7SSong Liu 169615755a7SSong Liu /* check for overflow */ 170615755a7SSong Liu if (note_start < page_addr || note_start + note_size < note_start) 171615755a7SSong Liu return -EINVAL; 172615755a7SSong Liu 173615755a7SSong Liu /* only supports note that fits in the first page */ 174615755a7SSong Liu if (note_start + note_size > page_addr + PAGE_SIZE) 175615755a7SSong Liu return -EINVAL; 176615755a7SSong Liu 177615755a7SSong Liu while (note_offs + sizeof(Elf32_Nhdr) < note_size) { 178615755a7SSong Liu Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); 179615755a7SSong Liu 180615755a7SSong Liu if (nhdr->n_type == BPF_BUILD_ID && 181615755a7SSong Liu nhdr->n_namesz == sizeof("GNU") && 1820b698005SStanislav Fomichev nhdr->n_descsz > 0 && 1830b698005SStanislav Fomichev nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { 184615755a7SSong Liu memcpy(build_id, 185615755a7SSong Liu note_start + note_offs + 186615755a7SSong Liu ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), 1870b698005SStanislav Fomichev nhdr->n_descsz); 1880b698005SStanislav Fomichev memset(build_id + nhdr->n_descsz, 0, 1890b698005SStanislav Fomichev BPF_BUILD_ID_SIZE - nhdr->n_descsz); 190615755a7SSong Liu return 0; 191615755a7SSong Liu } 192615755a7SSong Liu new_offs = note_offs + sizeof(Elf32_Nhdr) + 193615755a7SSong Liu ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); 194615755a7SSong Liu if (new_offs <= note_offs) /* overflow */ 195615755a7SSong Liu break; 196615755a7SSong Liu note_offs = new_offs; 197615755a7SSong Liu } 198615755a7SSong Liu return -EINVAL; 199615755a7SSong Liu } 200615755a7SSong Liu 201615755a7SSong Liu /* Parse build ID from 32-bit ELF */ 202615755a7SSong Liu static int stack_map_get_build_id_32(void *page_addr, 203615755a7SSong Liu unsigned char *build_id) 204615755a7SSong Liu { 205615755a7SSong Liu Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr; 206615755a7SSong Liu Elf32_Phdr *phdr; 207615755a7SSong Liu int i; 208615755a7SSong Liu 209615755a7SSong Liu /* only supports phdr that fits in one page */ 210615755a7SSong Liu if (ehdr->e_phnum > 211615755a7SSong Liu (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) 212615755a7SSong Liu return -EINVAL; 213615755a7SSong Liu 214615755a7SSong Liu phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr)); 215615755a7SSong Liu 216*b33164f2SJiri Olsa for (i = 0; i < ehdr->e_phnum; ++i) { 217*b33164f2SJiri Olsa if (phdr[i].p_type == PT_NOTE && 218*b33164f2SJiri Olsa !stack_map_parse_build_id(page_addr, build_id, 219615755a7SSong Liu page_addr + phdr[i].p_offset, 220*b33164f2SJiri Olsa phdr[i].p_filesz)) 221*b33164f2SJiri Olsa return 0; 222*b33164f2SJiri Olsa } 223615755a7SSong Liu return -EINVAL; 224615755a7SSong Liu } 225615755a7SSong Liu 226615755a7SSong Liu /* Parse build ID from 64-bit ELF */ 227615755a7SSong Liu static int stack_map_get_build_id_64(void *page_addr, 228615755a7SSong Liu unsigned char *build_id) 229615755a7SSong Liu { 230615755a7SSong Liu Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr; 231615755a7SSong Liu Elf64_Phdr *phdr; 232615755a7SSong Liu int i; 233615755a7SSong Liu 234615755a7SSong Liu /* only supports phdr that fits in one page */ 235615755a7SSong Liu if (ehdr->e_phnum > 236615755a7SSong Liu (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) 237615755a7SSong Liu return -EINVAL; 238615755a7SSong Liu 239615755a7SSong Liu phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr)); 240615755a7SSong Liu 241*b33164f2SJiri Olsa for (i = 0; i < ehdr->e_phnum; ++i) { 242*b33164f2SJiri Olsa if (phdr[i].p_type == PT_NOTE && 243*b33164f2SJiri Olsa !stack_map_parse_build_id(page_addr, build_id, 244615755a7SSong Liu page_addr + phdr[i].p_offset, 245*b33164f2SJiri Olsa phdr[i].p_filesz)) 246*b33164f2SJiri Olsa return 0; 247*b33164f2SJiri Olsa } 248615755a7SSong Liu return -EINVAL; 249615755a7SSong Liu } 250615755a7SSong Liu 251615755a7SSong Liu /* Parse build ID of ELF file mapped to vma */ 252615755a7SSong Liu static int stack_map_get_build_id(struct vm_area_struct *vma, 253615755a7SSong Liu unsigned char *build_id) 254615755a7SSong Liu { 255615755a7SSong Liu Elf32_Ehdr *ehdr; 256615755a7SSong Liu struct page *page; 257615755a7SSong Liu void *page_addr; 258615755a7SSong Liu int ret; 259615755a7SSong Liu 260615755a7SSong Liu /* only works for page backed storage */ 261615755a7SSong Liu if (!vma->vm_file) 262615755a7SSong Liu return -EINVAL; 263615755a7SSong Liu 264615755a7SSong Liu page = find_get_page(vma->vm_file->f_mapping, 0); 265615755a7SSong Liu if (!page) 266615755a7SSong Liu return -EFAULT; /* page not mapped */ 267615755a7SSong Liu 268615755a7SSong Liu ret = -EINVAL; 269beaf3d19SSong Liu page_addr = kmap_atomic(page); 270615755a7SSong Liu ehdr = (Elf32_Ehdr *)page_addr; 271615755a7SSong Liu 272615755a7SSong Liu /* compare magic x7f "ELF" */ 273615755a7SSong Liu if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) 274615755a7SSong Liu goto out; 275615755a7SSong Liu 276615755a7SSong Liu /* only support executable file and shared object file */ 277615755a7SSong Liu if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) 278615755a7SSong Liu goto out; 279615755a7SSong Liu 280615755a7SSong Liu if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) 281615755a7SSong Liu ret = stack_map_get_build_id_32(page_addr, build_id); 282615755a7SSong Liu else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) 283615755a7SSong Liu ret = stack_map_get_build_id_64(page_addr, build_id); 284615755a7SSong Liu out: 285beaf3d19SSong Liu kunmap_atomic(page_addr); 286615755a7SSong Liu put_page(page); 287615755a7SSong Liu return ret; 288615755a7SSong Liu } 289615755a7SSong Liu 2905f412632SYonghong Song static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, 291615755a7SSong Liu u64 *ips, u32 trace_nr, bool user) 292615755a7SSong Liu { 293615755a7SSong Liu int i; 294615755a7SSong Liu struct vm_area_struct *vma; 295bae77c5eSSong Liu bool irq_work_busy = false; 296dc3b8ae9SArnd Bergmann struct stack_map_irq_work *work = NULL; 297bae77c5eSSong Liu 298eac9153fSSong Liu if (irqs_disabled()) { 299099bfaa7SDavid Miller if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 300bae77c5eSSong Liu work = this_cpu_ptr(&up_read_work); 301099bfaa7SDavid Miller if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) { 302bae77c5eSSong Liu /* cannot queue more up_read, fallback */ 303bae77c5eSSong Liu irq_work_busy = true; 304bae77c5eSSong Liu } 305099bfaa7SDavid Miller } else { 306099bfaa7SDavid Miller /* 307099bfaa7SDavid Miller * PREEMPT_RT does not allow to trylock mmap sem in 308099bfaa7SDavid Miller * interrupt disabled context. Force the fallback code. 309099bfaa7SDavid Miller */ 310099bfaa7SDavid Miller irq_work_busy = true; 311099bfaa7SDavid Miller } 312099bfaa7SDavid Miller } 313615755a7SSong Liu 314615755a7SSong Liu /* 315eac9153fSSong Liu * We cannot do up_read() when the irq is disabled, because of 316eac9153fSSong Liu * risk to deadlock with rq_lock. To do build_id lookup when the 317eac9153fSSong Liu * irqs are disabled, we need to run up_read() in irq_work. We use 318bae77c5eSSong Liu * a percpu variable to do the irq_work. If the irq_work is 319bae77c5eSSong Liu * already used by another lookup, we fall back to report ips. 320615755a7SSong Liu * 321615755a7SSong Liu * Same fallback is used for kernel stack (!user) on a stackmap 322615755a7SSong Liu * with build_id. 323615755a7SSong Liu */ 324bae77c5eSSong Liu if (!user || !current || !current->mm || irq_work_busy || 3250cc55a02SMichel Lespinasse !mmap_read_trylock_non_owner(current->mm)) { 326615755a7SSong Liu /* cannot access current->mm, fall back to ips */ 327615755a7SSong Liu for (i = 0; i < trace_nr; i++) { 328615755a7SSong Liu id_offs[i].status = BPF_STACK_BUILD_ID_IP; 329615755a7SSong Liu id_offs[i].ip = ips[i]; 3304af396aeSStanislav Fomichev memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); 331615755a7SSong Liu } 332615755a7SSong Liu return; 333615755a7SSong Liu } 334615755a7SSong Liu 335615755a7SSong Liu for (i = 0; i < trace_nr; i++) { 336615755a7SSong Liu vma = find_vma(current->mm, ips[i]); 337615755a7SSong Liu if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) { 338615755a7SSong Liu /* per entry fall back to ips */ 339615755a7SSong Liu id_offs[i].status = BPF_STACK_BUILD_ID_IP; 340615755a7SSong Liu id_offs[i].ip = ips[i]; 3414af396aeSStanislav Fomichev memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); 342615755a7SSong Liu continue; 343615755a7SSong Liu } 344615755a7SSong Liu id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] 345615755a7SSong Liu - vma->vm_start; 346615755a7SSong Liu id_offs[i].status = BPF_STACK_BUILD_ID_VALID; 347615755a7SSong Liu } 348bae77c5eSSong Liu 349dc3b8ae9SArnd Bergmann if (!work) { 3500cc55a02SMichel Lespinasse mmap_read_unlock_non_owner(current->mm); 351bae77c5eSSong Liu } else { 3520cc55a02SMichel Lespinasse work->mm = current->mm; 353bae77c5eSSong Liu irq_work_queue(&work->irq_work); 354bae77c5eSSong Liu } 355615755a7SSong Liu } 356615755a7SSong Liu 357fa28dcb8SSong Liu static struct perf_callchain_entry * 358fa28dcb8SSong Liu get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) 359fa28dcb8SSong Liu { 360046cc3ddSSong Liu #ifdef CONFIG_STACKTRACE 361fa28dcb8SSong Liu struct perf_callchain_entry *entry; 362fa28dcb8SSong Liu int rctx; 363fa28dcb8SSong Liu 364fa28dcb8SSong Liu entry = get_callchain_entry(&rctx); 365fa28dcb8SSong Liu 366fa28dcb8SSong Liu if (!entry) 367fa28dcb8SSong Liu return NULL; 368fa28dcb8SSong Liu 369fa28dcb8SSong Liu entry->nr = init_nr + 370fa28dcb8SSong Liu stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr), 371fa28dcb8SSong Liu sysctl_perf_event_max_stack - init_nr, 0); 372fa28dcb8SSong Liu 373fa28dcb8SSong Liu /* stack_trace_save_tsk() works on unsigned long array, while 374fa28dcb8SSong Liu * perf_callchain_entry uses u64 array. For 32-bit systems, it is 375fa28dcb8SSong Liu * necessary to fix this mismatch. 376fa28dcb8SSong Liu */ 377fa28dcb8SSong Liu if (__BITS_PER_LONG != 64) { 378fa28dcb8SSong Liu unsigned long *from = (unsigned long *) entry->ip; 379fa28dcb8SSong Liu u64 *to = entry->ip; 380fa28dcb8SSong Liu int i; 381fa28dcb8SSong Liu 382fa28dcb8SSong Liu /* copy data from the end to avoid using extra buffer */ 383fa28dcb8SSong Liu for (i = entry->nr - 1; i >= (int)init_nr; i--) 384fa28dcb8SSong Liu to[i] = (u64)(from[i]); 385fa28dcb8SSong Liu } 386fa28dcb8SSong Liu 387fa28dcb8SSong Liu put_callchain_entry(rctx); 388fa28dcb8SSong Liu 389fa28dcb8SSong Liu return entry; 390046cc3ddSSong Liu #else /* CONFIG_STACKTRACE */ 391046cc3ddSSong Liu return NULL; 392046cc3ddSSong Liu #endif 393fa28dcb8SSong Liu } 394fa28dcb8SSong Liu 3957b04d6d6SSong Liu static long __bpf_get_stackid(struct bpf_map *map, 3967b04d6d6SSong Liu struct perf_callchain_entry *trace, u64 flags) 397d5a3b1f6SAlexei Starovoitov { 398d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 399d5a3b1f6SAlexei Starovoitov struct stack_map_bucket *bucket, *new_bucket, *old_bucket; 400615755a7SSong Liu u32 max_depth = map->value_size / stack_map_data_size(map); 401c5dfd78eSArnaldo Carvalho de Melo /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ 402c5dfd78eSArnaldo Carvalho de Melo u32 init_nr = sysctl_perf_event_max_stack - max_depth; 403d5a3b1f6SAlexei Starovoitov u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 404d5a3b1f6SAlexei Starovoitov u32 hash, id, trace_nr, trace_len; 405d5a3b1f6SAlexei Starovoitov bool user = flags & BPF_F_USER_STACK; 406d5a3b1f6SAlexei Starovoitov u64 *ips; 407615755a7SSong Liu bool hash_matches; 408d5a3b1f6SAlexei Starovoitov 409d5a3b1f6SAlexei Starovoitov /* get_perf_callchain() guarantees that trace->nr >= init_nr 410c5dfd78eSArnaldo Carvalho de Melo * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth 411d5a3b1f6SAlexei Starovoitov */ 412d5a3b1f6SAlexei Starovoitov trace_nr = trace->nr - init_nr; 413d5a3b1f6SAlexei Starovoitov 414d5a3b1f6SAlexei Starovoitov if (trace_nr <= skip) 415d5a3b1f6SAlexei Starovoitov /* skipping more than usable stack trace */ 416d5a3b1f6SAlexei Starovoitov return -EFAULT; 417d5a3b1f6SAlexei Starovoitov 418d5a3b1f6SAlexei Starovoitov trace_nr -= skip; 419d5a3b1f6SAlexei Starovoitov trace_len = trace_nr * sizeof(u64); 420d5a3b1f6SAlexei Starovoitov ips = trace->ip + skip + init_nr; 421d5a3b1f6SAlexei Starovoitov hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); 422d5a3b1f6SAlexei Starovoitov id = hash & (smap->n_buckets - 1); 423557c0c6eSAlexei Starovoitov bucket = READ_ONCE(smap->buckets[id]); 424d5a3b1f6SAlexei Starovoitov 425615755a7SSong Liu hash_matches = bucket && bucket->hash == hash; 426615755a7SSong Liu /* fast cmp */ 427615755a7SSong Liu if (hash_matches && flags & BPF_F_FAST_STACK_CMP) 428d5a3b1f6SAlexei Starovoitov return id; 429615755a7SSong Liu 430615755a7SSong Liu if (stack_map_use_build_id(map)) { 431615755a7SSong Liu /* for build_id+offset, pop a bucket before slow cmp */ 432615755a7SSong Liu new_bucket = (struct stack_map_bucket *) 433615755a7SSong Liu pcpu_freelist_pop(&smap->freelist); 434615755a7SSong Liu if (unlikely(!new_bucket)) 435615755a7SSong Liu return -ENOMEM; 4365f412632SYonghong Song new_bucket->nr = trace_nr; 4375f412632SYonghong Song stack_map_get_build_id_offset( 4385f412632SYonghong Song (struct bpf_stack_build_id *)new_bucket->data, 4395f412632SYonghong Song ips, trace_nr, user); 440615755a7SSong Liu trace_len = trace_nr * sizeof(struct bpf_stack_build_id); 441615755a7SSong Liu if (hash_matches && bucket->nr == trace_nr && 442615755a7SSong Liu memcmp(bucket->data, new_bucket->data, trace_len) == 0) { 443615755a7SSong Liu pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 444d5a3b1f6SAlexei Starovoitov return id; 445d5a3b1f6SAlexei Starovoitov } 446615755a7SSong Liu if (bucket && !(flags & BPF_F_REUSE_STACKID)) { 447615755a7SSong Liu pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 448615755a7SSong Liu return -EEXIST; 449615755a7SSong Liu } 450615755a7SSong Liu } else { 451615755a7SSong Liu if (hash_matches && bucket->nr == trace_nr && 452615755a7SSong Liu memcmp(bucket->data, ips, trace_len) == 0) 453615755a7SSong Liu return id; 454d5a3b1f6SAlexei Starovoitov if (bucket && !(flags & BPF_F_REUSE_STACKID)) 455d5a3b1f6SAlexei Starovoitov return -EEXIST; 456d5a3b1f6SAlexei Starovoitov 457557c0c6eSAlexei Starovoitov new_bucket = (struct stack_map_bucket *) 458557c0c6eSAlexei Starovoitov pcpu_freelist_pop(&smap->freelist); 459d5a3b1f6SAlexei Starovoitov if (unlikely(!new_bucket)) 460d5a3b1f6SAlexei Starovoitov return -ENOMEM; 461615755a7SSong Liu memcpy(new_bucket->data, ips, trace_len); 462615755a7SSong Liu } 463d5a3b1f6SAlexei Starovoitov 464d5a3b1f6SAlexei Starovoitov new_bucket->hash = hash; 465d5a3b1f6SAlexei Starovoitov new_bucket->nr = trace_nr; 466d5a3b1f6SAlexei Starovoitov 467d5a3b1f6SAlexei Starovoitov old_bucket = xchg(&smap->buckets[id], new_bucket); 468d5a3b1f6SAlexei Starovoitov if (old_bucket) 469557c0c6eSAlexei Starovoitov pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 470d5a3b1f6SAlexei Starovoitov return id; 471d5a3b1f6SAlexei Starovoitov } 472d5a3b1f6SAlexei Starovoitov 4737b04d6d6SSong Liu BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, 4747b04d6d6SSong Liu u64, flags) 4757b04d6d6SSong Liu { 4767b04d6d6SSong Liu u32 max_depth = map->value_size / stack_map_data_size(map); 4777b04d6d6SSong Liu /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ 4787b04d6d6SSong Liu u32 init_nr = sysctl_perf_event_max_stack - max_depth; 4797b04d6d6SSong Liu bool user = flags & BPF_F_USER_STACK; 4807b04d6d6SSong Liu struct perf_callchain_entry *trace; 4817b04d6d6SSong Liu bool kernel = !user; 4827b04d6d6SSong Liu 4837b04d6d6SSong Liu if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 4847b04d6d6SSong Liu BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 4857b04d6d6SSong Liu return -EINVAL; 4867b04d6d6SSong Liu 4877b04d6d6SSong Liu trace = get_perf_callchain(regs, init_nr, kernel, user, 4887b04d6d6SSong Liu sysctl_perf_event_max_stack, false, false); 4897b04d6d6SSong Liu 4907b04d6d6SSong Liu if (unlikely(!trace)) 4917b04d6d6SSong Liu /* couldn't fetch the stack trace */ 4927b04d6d6SSong Liu return -EFAULT; 4937b04d6d6SSong Liu 4947b04d6d6SSong Liu return __bpf_get_stackid(map, trace, flags); 4957b04d6d6SSong Liu } 4967b04d6d6SSong Liu 497d5a3b1f6SAlexei Starovoitov const struct bpf_func_proto bpf_get_stackid_proto = { 498d5a3b1f6SAlexei Starovoitov .func = bpf_get_stackid, 499d5a3b1f6SAlexei Starovoitov .gpl_only = true, 500d5a3b1f6SAlexei Starovoitov .ret_type = RET_INTEGER, 501d5a3b1f6SAlexei Starovoitov .arg1_type = ARG_PTR_TO_CTX, 502d5a3b1f6SAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR, 503d5a3b1f6SAlexei Starovoitov .arg3_type = ARG_ANYTHING, 504d5a3b1f6SAlexei Starovoitov }; 505d5a3b1f6SAlexei Starovoitov 5067b04d6d6SSong Liu static __u64 count_kernel_ip(struct perf_callchain_entry *trace) 5077b04d6d6SSong Liu { 5087b04d6d6SSong Liu __u64 nr_kernel = 0; 5097b04d6d6SSong Liu 5107b04d6d6SSong Liu while (nr_kernel < trace->nr) { 5117b04d6d6SSong Liu if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) 5127b04d6d6SSong Liu break; 5137b04d6d6SSong Liu nr_kernel++; 5147b04d6d6SSong Liu } 5157b04d6d6SSong Liu return nr_kernel; 5167b04d6d6SSong Liu } 5177b04d6d6SSong Liu 5187b04d6d6SSong Liu BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, 5197b04d6d6SSong Liu struct bpf_map *, map, u64, flags) 5207b04d6d6SSong Liu { 5217b04d6d6SSong Liu struct perf_event *event = ctx->event; 5227b04d6d6SSong Liu struct perf_callchain_entry *trace; 5237b04d6d6SSong Liu bool kernel, user; 5247b04d6d6SSong Liu __u64 nr_kernel; 5257b04d6d6SSong Liu int ret; 5267b04d6d6SSong Liu 5277b04d6d6SSong Liu /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ 5287b04d6d6SSong Liu if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) 5297b04d6d6SSong Liu return bpf_get_stackid((unsigned long)(ctx->regs), 5307b04d6d6SSong Liu (unsigned long) map, flags, 0, 0); 5317b04d6d6SSong Liu 5327b04d6d6SSong Liu if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 5337b04d6d6SSong Liu BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 5347b04d6d6SSong Liu return -EINVAL; 5357b04d6d6SSong Liu 5367b04d6d6SSong Liu user = flags & BPF_F_USER_STACK; 5377b04d6d6SSong Liu kernel = !user; 5387b04d6d6SSong Liu 5397b04d6d6SSong Liu trace = ctx->data->callchain; 5407b04d6d6SSong Liu if (unlikely(!trace)) 5417b04d6d6SSong Liu return -EFAULT; 5427b04d6d6SSong Liu 5437b04d6d6SSong Liu nr_kernel = count_kernel_ip(trace); 5447b04d6d6SSong Liu 5457b04d6d6SSong Liu if (kernel) { 5467b04d6d6SSong Liu __u64 nr = trace->nr; 5477b04d6d6SSong Liu 5487b04d6d6SSong Liu trace->nr = nr_kernel; 5497b04d6d6SSong Liu ret = __bpf_get_stackid(map, trace, flags); 5507b04d6d6SSong Liu 5517b04d6d6SSong Liu /* restore nr */ 5527b04d6d6SSong Liu trace->nr = nr; 5537b04d6d6SSong Liu } else { /* user */ 5547b04d6d6SSong Liu u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 5557b04d6d6SSong Liu 5567b04d6d6SSong Liu skip += nr_kernel; 5577b04d6d6SSong Liu if (skip > BPF_F_SKIP_FIELD_MASK) 5587b04d6d6SSong Liu return -EFAULT; 5597b04d6d6SSong Liu 5607b04d6d6SSong Liu flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 5617b04d6d6SSong Liu ret = __bpf_get_stackid(map, trace, flags); 5627b04d6d6SSong Liu } 5637b04d6d6SSong Liu return ret; 5647b04d6d6SSong Liu } 5657b04d6d6SSong Liu 5667b04d6d6SSong Liu const struct bpf_func_proto bpf_get_stackid_proto_pe = { 5677b04d6d6SSong Liu .func = bpf_get_stackid_pe, 5687b04d6d6SSong Liu .gpl_only = false, 5697b04d6d6SSong Liu .ret_type = RET_INTEGER, 5707b04d6d6SSong Liu .arg1_type = ARG_PTR_TO_CTX, 5717b04d6d6SSong Liu .arg2_type = ARG_CONST_MAP_PTR, 5727b04d6d6SSong Liu .arg3_type = ARG_ANYTHING, 5737b04d6d6SSong Liu }; 5747b04d6d6SSong Liu 575fa28dcb8SSong Liu static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, 5767b04d6d6SSong Liu struct perf_callchain_entry *trace_in, 577fa28dcb8SSong Liu void *buf, u32 size, u64 flags) 578c195651eSYonghong Song { 579c195651eSYonghong Song u32 init_nr, trace_nr, copy_len, elem_size, num_elem; 580c195651eSYonghong Song bool user_build_id = flags & BPF_F_USER_BUILD_ID; 581c195651eSYonghong Song u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 582c195651eSYonghong Song bool user = flags & BPF_F_USER_STACK; 583c195651eSYonghong Song struct perf_callchain_entry *trace; 584c195651eSYonghong Song bool kernel = !user; 585c195651eSYonghong Song int err = -EINVAL; 586c195651eSYonghong Song u64 *ips; 587c195651eSYonghong Song 588c195651eSYonghong Song if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 589c195651eSYonghong Song BPF_F_USER_BUILD_ID))) 590c195651eSYonghong Song goto clear; 591c195651eSYonghong Song if (kernel && user_build_id) 592c195651eSYonghong Song goto clear; 593c195651eSYonghong Song 594c195651eSYonghong Song elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) 595c195651eSYonghong Song : sizeof(u64); 596c195651eSYonghong Song if (unlikely(size % elem_size)) 597c195651eSYonghong Song goto clear; 598c195651eSYonghong Song 599fa28dcb8SSong Liu /* cannot get valid user stack for task without user_mode regs */ 600fa28dcb8SSong Liu if (task && user && !user_mode(regs)) 601fa28dcb8SSong Liu goto err_fault; 602fa28dcb8SSong Liu 603c195651eSYonghong Song num_elem = size / elem_size; 604c195651eSYonghong Song if (sysctl_perf_event_max_stack < num_elem) 605c195651eSYonghong Song init_nr = 0; 606c195651eSYonghong Song else 607c195651eSYonghong Song init_nr = sysctl_perf_event_max_stack - num_elem; 608fa28dcb8SSong Liu 6097b04d6d6SSong Liu if (trace_in) 6107b04d6d6SSong Liu trace = trace_in; 6117b04d6d6SSong Liu else if (kernel && task) 612fa28dcb8SSong Liu trace = get_callchain_entry_for_task(task, init_nr); 613fa28dcb8SSong Liu else 614c195651eSYonghong Song trace = get_perf_callchain(regs, init_nr, kernel, user, 615fa28dcb8SSong Liu sysctl_perf_event_max_stack, 616fa28dcb8SSong Liu false, false); 617c195651eSYonghong Song if (unlikely(!trace)) 618c195651eSYonghong Song goto err_fault; 619c195651eSYonghong Song 620c195651eSYonghong Song trace_nr = trace->nr - init_nr; 621c195651eSYonghong Song if (trace_nr < skip) 622c195651eSYonghong Song goto err_fault; 623c195651eSYonghong Song 624c195651eSYonghong Song trace_nr -= skip; 625c195651eSYonghong Song trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; 626c195651eSYonghong Song copy_len = trace_nr * elem_size; 627c195651eSYonghong Song ips = trace->ip + skip + init_nr; 628c195651eSYonghong Song if (user && user_build_id) 629c195651eSYonghong Song stack_map_get_build_id_offset(buf, ips, trace_nr, user); 630c195651eSYonghong Song else 631c195651eSYonghong Song memcpy(buf, ips, copy_len); 632c195651eSYonghong Song 633c195651eSYonghong Song if (size > copy_len) 634c195651eSYonghong Song memset(buf + copy_len, 0, size - copy_len); 635c195651eSYonghong Song return copy_len; 636c195651eSYonghong Song 637c195651eSYonghong Song err_fault: 638c195651eSYonghong Song err = -EFAULT; 639c195651eSYonghong Song clear: 640c195651eSYonghong Song memset(buf, 0, size); 641c195651eSYonghong Song return err; 642c195651eSYonghong Song } 643c195651eSYonghong Song 644fa28dcb8SSong Liu BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, 645fa28dcb8SSong Liu u64, flags) 646fa28dcb8SSong Liu { 6477b04d6d6SSong Liu return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); 648fa28dcb8SSong Liu } 649fa28dcb8SSong Liu 650c195651eSYonghong Song const struct bpf_func_proto bpf_get_stack_proto = { 651c195651eSYonghong Song .func = bpf_get_stack, 652c195651eSYonghong Song .gpl_only = true, 653c195651eSYonghong Song .ret_type = RET_INTEGER, 654c195651eSYonghong Song .arg1_type = ARG_PTR_TO_CTX, 655c195651eSYonghong Song .arg2_type = ARG_PTR_TO_UNINIT_MEM, 656c195651eSYonghong Song .arg3_type = ARG_CONST_SIZE_OR_ZERO, 657c195651eSYonghong Song .arg4_type = ARG_ANYTHING, 658c195651eSYonghong Song }; 659c195651eSYonghong Song 660fa28dcb8SSong Liu BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, 661fa28dcb8SSong Liu u32, size, u64, flags) 662fa28dcb8SSong Liu { 663fa28dcb8SSong Liu struct pt_regs *regs = task_pt_regs(task); 664fa28dcb8SSong Liu 6657b04d6d6SSong Liu return __bpf_get_stack(regs, task, NULL, buf, size, flags); 666fa28dcb8SSong Liu } 667fa28dcb8SSong Liu 668c9a0f3b8SJiri Olsa BTF_ID_LIST(bpf_get_task_stack_btf_ids) 669c9a0f3b8SJiri Olsa BTF_ID(struct, task_struct) 670c9a0f3b8SJiri Olsa 671fa28dcb8SSong Liu const struct bpf_func_proto bpf_get_task_stack_proto = { 672fa28dcb8SSong Liu .func = bpf_get_task_stack, 673fa28dcb8SSong Liu .gpl_only = false, 674fa28dcb8SSong Liu .ret_type = RET_INTEGER, 675fa28dcb8SSong Liu .arg1_type = ARG_PTR_TO_BTF_ID, 676fa28dcb8SSong Liu .arg2_type = ARG_PTR_TO_UNINIT_MEM, 677fa28dcb8SSong Liu .arg3_type = ARG_CONST_SIZE_OR_ZERO, 678fa28dcb8SSong Liu .arg4_type = ARG_ANYTHING, 679fa28dcb8SSong Liu .btf_id = bpf_get_task_stack_btf_ids, 680fa28dcb8SSong Liu }; 681fa28dcb8SSong Liu 6827b04d6d6SSong Liu BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, 6837b04d6d6SSong Liu void *, buf, u32, size, u64, flags) 6847b04d6d6SSong Liu { 6852b9b305fSSong Liu struct pt_regs *regs = (struct pt_regs *)(ctx->regs); 6867b04d6d6SSong Liu struct perf_event *event = ctx->event; 6877b04d6d6SSong Liu struct perf_callchain_entry *trace; 6887b04d6d6SSong Liu bool kernel, user; 6897b04d6d6SSong Liu int err = -EINVAL; 6907b04d6d6SSong Liu __u64 nr_kernel; 6917b04d6d6SSong Liu 6927b04d6d6SSong Liu if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) 6932b9b305fSSong Liu return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); 6947b04d6d6SSong Liu 6957b04d6d6SSong Liu if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 6967b04d6d6SSong Liu BPF_F_USER_BUILD_ID))) 6977b04d6d6SSong Liu goto clear; 6987b04d6d6SSong Liu 6997b04d6d6SSong Liu user = flags & BPF_F_USER_STACK; 7007b04d6d6SSong Liu kernel = !user; 7017b04d6d6SSong Liu 7027b04d6d6SSong Liu err = -EFAULT; 7037b04d6d6SSong Liu trace = ctx->data->callchain; 7047b04d6d6SSong Liu if (unlikely(!trace)) 7057b04d6d6SSong Liu goto clear; 7067b04d6d6SSong Liu 7077b04d6d6SSong Liu nr_kernel = count_kernel_ip(trace); 7087b04d6d6SSong Liu 7097b04d6d6SSong Liu if (kernel) { 7107b04d6d6SSong Liu __u64 nr = trace->nr; 7117b04d6d6SSong Liu 7127b04d6d6SSong Liu trace->nr = nr_kernel; 7132b9b305fSSong Liu err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); 7147b04d6d6SSong Liu 7157b04d6d6SSong Liu /* restore nr */ 7167b04d6d6SSong Liu trace->nr = nr; 7177b04d6d6SSong Liu } else { /* user */ 7187b04d6d6SSong Liu u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 7197b04d6d6SSong Liu 7207b04d6d6SSong Liu skip += nr_kernel; 7217b04d6d6SSong Liu if (skip > BPF_F_SKIP_FIELD_MASK) 7227b04d6d6SSong Liu goto clear; 7237b04d6d6SSong Liu 7247b04d6d6SSong Liu flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 7252b9b305fSSong Liu err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); 7267b04d6d6SSong Liu } 7277b04d6d6SSong Liu return err; 7287b04d6d6SSong Liu 7297b04d6d6SSong Liu clear: 7307b04d6d6SSong Liu memset(buf, 0, size); 7317b04d6d6SSong Liu return err; 7327b04d6d6SSong Liu 7337b04d6d6SSong Liu } 7347b04d6d6SSong Liu 7357b04d6d6SSong Liu const struct bpf_func_proto bpf_get_stack_proto_pe = { 7367b04d6d6SSong Liu .func = bpf_get_stack_pe, 7377b04d6d6SSong Liu .gpl_only = true, 7387b04d6d6SSong Liu .ret_type = RET_INTEGER, 7397b04d6d6SSong Liu .arg1_type = ARG_PTR_TO_CTX, 7407b04d6d6SSong Liu .arg2_type = ARG_PTR_TO_UNINIT_MEM, 7417b04d6d6SSong Liu .arg3_type = ARG_CONST_SIZE_OR_ZERO, 7427b04d6d6SSong Liu .arg4_type = ARG_ANYTHING, 7437b04d6d6SSong Liu }; 7447b04d6d6SSong Liu 745557c0c6eSAlexei Starovoitov /* Called from eBPF program */ 746d5a3b1f6SAlexei Starovoitov static void *stack_map_lookup_elem(struct bpf_map *map, void *key) 747d5a3b1f6SAlexei Starovoitov { 7483b4a63f6SPrashant Bhole return ERR_PTR(-EOPNOTSUPP); 749557c0c6eSAlexei Starovoitov } 750557c0c6eSAlexei Starovoitov 751557c0c6eSAlexei Starovoitov /* Called from syscall */ 752557c0c6eSAlexei Starovoitov int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 753557c0c6eSAlexei Starovoitov { 754d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 755557c0c6eSAlexei Starovoitov struct stack_map_bucket *bucket, *old_bucket; 756557c0c6eSAlexei Starovoitov u32 id = *(u32 *)key, trace_len; 757d5a3b1f6SAlexei Starovoitov 758d5a3b1f6SAlexei Starovoitov if (unlikely(id >= smap->n_buckets)) 759557c0c6eSAlexei Starovoitov return -ENOENT; 760557c0c6eSAlexei Starovoitov 761557c0c6eSAlexei Starovoitov bucket = xchg(&smap->buckets[id], NULL); 762557c0c6eSAlexei Starovoitov if (!bucket) 763557c0c6eSAlexei Starovoitov return -ENOENT; 764557c0c6eSAlexei Starovoitov 765615755a7SSong Liu trace_len = bucket->nr * stack_map_data_size(map); 766615755a7SSong Liu memcpy(value, bucket->data, trace_len); 767557c0c6eSAlexei Starovoitov memset(value + trace_len, 0, map->value_size - trace_len); 768557c0c6eSAlexei Starovoitov 769557c0c6eSAlexei Starovoitov old_bucket = xchg(&smap->buckets[id], bucket); 770557c0c6eSAlexei Starovoitov if (old_bucket) 771557c0c6eSAlexei Starovoitov pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 772557c0c6eSAlexei Starovoitov return 0; 773d5a3b1f6SAlexei Starovoitov } 774d5a3b1f6SAlexei Starovoitov 77516f07c55SYonghong Song static int stack_map_get_next_key(struct bpf_map *map, void *key, 77616f07c55SYonghong Song void *next_key) 777d5a3b1f6SAlexei Starovoitov { 77816f07c55SYonghong Song struct bpf_stack_map *smap = container_of(map, 77916f07c55SYonghong Song struct bpf_stack_map, map); 78016f07c55SYonghong Song u32 id; 78116f07c55SYonghong Song 78216f07c55SYonghong Song WARN_ON_ONCE(!rcu_read_lock_held()); 78316f07c55SYonghong Song 78416f07c55SYonghong Song if (!key) { 78516f07c55SYonghong Song id = 0; 78616f07c55SYonghong Song } else { 78716f07c55SYonghong Song id = *(u32 *)key; 78816f07c55SYonghong Song if (id >= smap->n_buckets || !smap->buckets[id]) 78916f07c55SYonghong Song id = 0; 79016f07c55SYonghong Song else 79116f07c55SYonghong Song id++; 79216f07c55SYonghong Song } 79316f07c55SYonghong Song 79416f07c55SYonghong Song while (id < smap->n_buckets && !smap->buckets[id]) 79516f07c55SYonghong Song id++; 79616f07c55SYonghong Song 79716f07c55SYonghong Song if (id >= smap->n_buckets) 79816f07c55SYonghong Song return -ENOENT; 79916f07c55SYonghong Song 80016f07c55SYonghong Song *(u32 *)next_key = id; 80116f07c55SYonghong Song return 0; 802d5a3b1f6SAlexei Starovoitov } 803d5a3b1f6SAlexei Starovoitov 804d5a3b1f6SAlexei Starovoitov static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, 805d5a3b1f6SAlexei Starovoitov u64 map_flags) 806d5a3b1f6SAlexei Starovoitov { 807d5a3b1f6SAlexei Starovoitov return -EINVAL; 808d5a3b1f6SAlexei Starovoitov } 809d5a3b1f6SAlexei Starovoitov 810d5a3b1f6SAlexei Starovoitov /* Called from syscall or from eBPF program */ 811d5a3b1f6SAlexei Starovoitov static int stack_map_delete_elem(struct bpf_map *map, void *key) 812d5a3b1f6SAlexei Starovoitov { 813d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 814d5a3b1f6SAlexei Starovoitov struct stack_map_bucket *old_bucket; 815d5a3b1f6SAlexei Starovoitov u32 id = *(u32 *)key; 816d5a3b1f6SAlexei Starovoitov 817d5a3b1f6SAlexei Starovoitov if (unlikely(id >= smap->n_buckets)) 818d5a3b1f6SAlexei Starovoitov return -E2BIG; 819d5a3b1f6SAlexei Starovoitov 820d5a3b1f6SAlexei Starovoitov old_bucket = xchg(&smap->buckets[id], NULL); 821d5a3b1f6SAlexei Starovoitov if (old_bucket) { 822557c0c6eSAlexei Starovoitov pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 823d5a3b1f6SAlexei Starovoitov return 0; 824d5a3b1f6SAlexei Starovoitov } else { 825d5a3b1f6SAlexei Starovoitov return -ENOENT; 826d5a3b1f6SAlexei Starovoitov } 827d5a3b1f6SAlexei Starovoitov } 828d5a3b1f6SAlexei Starovoitov 829d5a3b1f6SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 830d5a3b1f6SAlexei Starovoitov static void stack_map_free(struct bpf_map *map) 831d5a3b1f6SAlexei Starovoitov { 832d5a3b1f6SAlexei Starovoitov struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 833d5a3b1f6SAlexei Starovoitov 834d407bd25SDaniel Borkmann bpf_map_area_free(smap->elems); 835557c0c6eSAlexei Starovoitov pcpu_freelist_destroy(&smap->freelist); 836d407bd25SDaniel Borkmann bpf_map_area_free(smap); 837d5a3b1f6SAlexei Starovoitov put_callchain_buffers(); 838d5a3b1f6SAlexei Starovoitov } 839d5a3b1f6SAlexei Starovoitov 8402872e9acSAndrey Ignatov static int stack_trace_map_btf_id; 84114499160SMauricio Vasquez B const struct bpf_map_ops stack_trace_map_ops = { 842d5a3b1f6SAlexei Starovoitov .map_alloc = stack_map_alloc, 843d5a3b1f6SAlexei Starovoitov .map_free = stack_map_free, 844d5a3b1f6SAlexei Starovoitov .map_get_next_key = stack_map_get_next_key, 845d5a3b1f6SAlexei Starovoitov .map_lookup_elem = stack_map_lookup_elem, 846d5a3b1f6SAlexei Starovoitov .map_update_elem = stack_map_update_elem, 847d5a3b1f6SAlexei Starovoitov .map_delete_elem = stack_map_delete_elem, 848e8d2bec0SDaniel Borkmann .map_check_btf = map_check_no_btf, 8492872e9acSAndrey Ignatov .map_btf_name = "bpf_stack_map", 8502872e9acSAndrey Ignatov .map_btf_id = &stack_trace_map_btf_id, 851d5a3b1f6SAlexei Starovoitov }; 852bae77c5eSSong Liu 853bae77c5eSSong Liu static int __init stack_map_init(void) 854bae77c5eSSong Liu { 855bae77c5eSSong Liu int cpu; 856bae77c5eSSong Liu struct stack_map_irq_work *work; 857bae77c5eSSong Liu 858bae77c5eSSong Liu for_each_possible_cpu(cpu) { 859bae77c5eSSong Liu work = per_cpu_ptr(&up_read_work, cpu); 860bae77c5eSSong Liu init_irq_work(&work->irq_work, do_up_read); 861bae77c5eSSong Liu } 862bae77c5eSSong Liu return 0; 863bae77c5eSSong Liu } 864bae77c5eSSong Liu subsys_initcall(stack_map_init); 865