xref: /linux-6.15/kernel/bpf/stackmap.c (revision b33164f2)
125763b3cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2d5a3b1f6SAlexei Starovoitov /* Copyright (c) 2016 Facebook
3d5a3b1f6SAlexei Starovoitov  */
4d5a3b1f6SAlexei Starovoitov #include <linux/bpf.h>
5d5a3b1f6SAlexei Starovoitov #include <linux/jhash.h>
6d5a3b1f6SAlexei Starovoitov #include <linux/filter.h>
77b04d6d6SSong Liu #include <linux/kernel.h>
8d5a3b1f6SAlexei Starovoitov #include <linux/stacktrace.h>
9d5a3b1f6SAlexei Starovoitov #include <linux/perf_event.h>
10615755a7SSong Liu #include <linux/elf.h>
11615755a7SSong Liu #include <linux/pagemap.h>
12bae77c5eSSong Liu #include <linux/irq_work.h>
13c9a0f3b8SJiri Olsa #include <linux/btf_ids.h>
14557c0c6eSAlexei Starovoitov #include "percpu_freelist.h"
15d5a3b1f6SAlexei Starovoitov 
166e71b04aSChenbo Feng #define STACK_CREATE_FLAG_MASK					\
17615755a7SSong Liu 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |	\
18615755a7SSong Liu 	 BPF_F_STACK_BUILD_ID)
196e71b04aSChenbo Feng 
20d5a3b1f6SAlexei Starovoitov struct stack_map_bucket {
21557c0c6eSAlexei Starovoitov 	struct pcpu_freelist_node fnode;
22d5a3b1f6SAlexei Starovoitov 	u32 hash;
23d5a3b1f6SAlexei Starovoitov 	u32 nr;
24615755a7SSong Liu 	u64 data[];
25d5a3b1f6SAlexei Starovoitov };
26d5a3b1f6SAlexei Starovoitov 
27d5a3b1f6SAlexei Starovoitov struct bpf_stack_map {
28d5a3b1f6SAlexei Starovoitov 	struct bpf_map map;
29557c0c6eSAlexei Starovoitov 	void *elems;
30557c0c6eSAlexei Starovoitov 	struct pcpu_freelist freelist;
31d5a3b1f6SAlexei Starovoitov 	u32 n_buckets;
32557c0c6eSAlexei Starovoitov 	struct stack_map_bucket *buckets[];
33d5a3b1f6SAlexei Starovoitov };
34d5a3b1f6SAlexei Starovoitov 
35bae77c5eSSong Liu /* irq_work to run up_read() for build_id lookup in nmi context */
36bae77c5eSSong Liu struct stack_map_irq_work {
37bae77c5eSSong Liu 	struct irq_work irq_work;
380cc55a02SMichel Lespinasse 	struct mm_struct *mm;
39bae77c5eSSong Liu };
40bae77c5eSSong Liu 
41bae77c5eSSong Liu static void do_up_read(struct irq_work *entry)
42bae77c5eSSong Liu {
43bae77c5eSSong Liu 	struct stack_map_irq_work *work;
44bae77c5eSSong Liu 
45099bfaa7SDavid Miller 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
46099bfaa7SDavid Miller 		return;
47099bfaa7SDavid Miller 
48bae77c5eSSong Liu 	work = container_of(entry, struct stack_map_irq_work, irq_work);
490cc55a02SMichel Lespinasse 	mmap_read_unlock_non_owner(work->mm);
50bae77c5eSSong Liu }
51bae77c5eSSong Liu 
52bae77c5eSSong Liu static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
53bae77c5eSSong Liu 
54615755a7SSong Liu static inline bool stack_map_use_build_id(struct bpf_map *map)
55615755a7SSong Liu {
56615755a7SSong Liu 	return (map->map_flags & BPF_F_STACK_BUILD_ID);
57615755a7SSong Liu }
58615755a7SSong Liu 
59615755a7SSong Liu static inline int stack_map_data_size(struct bpf_map *map)
60615755a7SSong Liu {
61615755a7SSong Liu 	return stack_map_use_build_id(map) ?
62615755a7SSong Liu 		sizeof(struct bpf_stack_build_id) : sizeof(u64);
63615755a7SSong Liu }
64615755a7SSong Liu 
65557c0c6eSAlexei Starovoitov static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
66557c0c6eSAlexei Starovoitov {
67557c0c6eSAlexei Starovoitov 	u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
68557c0c6eSAlexei Starovoitov 	int err;
69557c0c6eSAlexei Starovoitov 
7096eabe7aSMartin KaFai Lau 	smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
7196eabe7aSMartin KaFai Lau 					 smap->map.numa_node);
72557c0c6eSAlexei Starovoitov 	if (!smap->elems)
73557c0c6eSAlexei Starovoitov 		return -ENOMEM;
74557c0c6eSAlexei Starovoitov 
75557c0c6eSAlexei Starovoitov 	err = pcpu_freelist_init(&smap->freelist);
76557c0c6eSAlexei Starovoitov 	if (err)
77557c0c6eSAlexei Starovoitov 		goto free_elems;
78557c0c6eSAlexei Starovoitov 
79557c0c6eSAlexei Starovoitov 	pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
80557c0c6eSAlexei Starovoitov 			       smap->map.max_entries);
81557c0c6eSAlexei Starovoitov 	return 0;
82557c0c6eSAlexei Starovoitov 
83557c0c6eSAlexei Starovoitov free_elems:
84d407bd25SDaniel Borkmann 	bpf_map_area_free(smap->elems);
85557c0c6eSAlexei Starovoitov 	return err;
86557c0c6eSAlexei Starovoitov }
87557c0c6eSAlexei Starovoitov 
88d5a3b1f6SAlexei Starovoitov /* Called from syscall */
89d5a3b1f6SAlexei Starovoitov static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
90d5a3b1f6SAlexei Starovoitov {
91d5a3b1f6SAlexei Starovoitov 	u32 value_size = attr->value_size;
92d5a3b1f6SAlexei Starovoitov 	struct bpf_stack_map *smap;
93b936ca64SRoman Gushchin 	struct bpf_map_memory mem;
94d5a3b1f6SAlexei Starovoitov 	u64 cost, n_buckets;
95d5a3b1f6SAlexei Starovoitov 	int err;
96d5a3b1f6SAlexei Starovoitov 
972c78ee89SAlexei Starovoitov 	if (!bpf_capable())
98d5a3b1f6SAlexei Starovoitov 		return ERR_PTR(-EPERM);
99d5a3b1f6SAlexei Starovoitov 
1006e71b04aSChenbo Feng 	if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
101823707b6SAlexei Starovoitov 		return ERR_PTR(-EINVAL);
102823707b6SAlexei Starovoitov 
103d5a3b1f6SAlexei Starovoitov 	/* check sanity of attributes */
104d5a3b1f6SAlexei Starovoitov 	if (attr->max_entries == 0 || attr->key_size != 4 ||
105615755a7SSong Liu 	    value_size < 8 || value_size % 8)
106615755a7SSong Liu 		return ERR_PTR(-EINVAL);
107615755a7SSong Liu 
108615755a7SSong Liu 	BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
109615755a7SSong Liu 	if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
110615755a7SSong Liu 		if (value_size % sizeof(struct bpf_stack_build_id) ||
111615755a7SSong Liu 		    value_size / sizeof(struct bpf_stack_build_id)
112615755a7SSong Liu 		    > sysctl_perf_event_max_stack)
113615755a7SSong Liu 			return ERR_PTR(-EINVAL);
114615755a7SSong Liu 	} else if (value_size / 8 > sysctl_perf_event_max_stack)
115d5a3b1f6SAlexei Starovoitov 		return ERR_PTR(-EINVAL);
116d5a3b1f6SAlexei Starovoitov 
117d5a3b1f6SAlexei Starovoitov 	/* hash table size must be power of 2 */
118d5a3b1f6SAlexei Starovoitov 	n_buckets = roundup_pow_of_two(attr->max_entries);
119d5a3b1f6SAlexei Starovoitov 
120d5a3b1f6SAlexei Starovoitov 	cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
121d5a3b1f6SAlexei Starovoitov 	cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
122c85d6913SRoman Gushchin 	err = bpf_map_charge_init(&mem, cost);
123b936ca64SRoman Gushchin 	if (err)
124b936ca64SRoman Gushchin 		return ERR_PTR(err);
125b936ca64SRoman Gushchin 
126b936ca64SRoman Gushchin 	smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
127b936ca64SRoman Gushchin 	if (!smap) {
128b936ca64SRoman Gushchin 		bpf_map_charge_finish(&mem);
129b936ca64SRoman Gushchin 		return ERR_PTR(-ENOMEM);
130b936ca64SRoman Gushchin 	}
131d5a3b1f6SAlexei Starovoitov 
132bd475643SJakub Kicinski 	bpf_map_init_from_attr(&smap->map, attr);
133d5a3b1f6SAlexei Starovoitov 	smap->map.value_size = value_size;
134d5a3b1f6SAlexei Starovoitov 	smap->n_buckets = n_buckets;
135557c0c6eSAlexei Starovoitov 
13697c79a38SArnaldo Carvalho de Melo 	err = get_callchain_buffers(sysctl_perf_event_max_stack);
137d5a3b1f6SAlexei Starovoitov 	if (err)
138b936ca64SRoman Gushchin 		goto free_charge;
139d5a3b1f6SAlexei Starovoitov 
140557c0c6eSAlexei Starovoitov 	err = prealloc_elems_and_freelist(smap);
141557c0c6eSAlexei Starovoitov 	if (err)
142557c0c6eSAlexei Starovoitov 		goto put_buffers;
143557c0c6eSAlexei Starovoitov 
144b936ca64SRoman Gushchin 	bpf_map_charge_move(&smap->map.memory, &mem);
145b936ca64SRoman Gushchin 
146d5a3b1f6SAlexei Starovoitov 	return &smap->map;
147d5a3b1f6SAlexei Starovoitov 
148557c0c6eSAlexei Starovoitov put_buffers:
149557c0c6eSAlexei Starovoitov 	put_callchain_buffers();
150b936ca64SRoman Gushchin free_charge:
151b936ca64SRoman Gushchin 	bpf_map_charge_finish(&mem);
152d407bd25SDaniel Borkmann 	bpf_map_area_free(smap);
153d5a3b1f6SAlexei Starovoitov 	return ERR_PTR(err);
154d5a3b1f6SAlexei Starovoitov }
155d5a3b1f6SAlexei Starovoitov 
156615755a7SSong Liu #define BPF_BUILD_ID 3
157615755a7SSong Liu /*
158615755a7SSong Liu  * Parse build id from the note segment. This logic can be shared between
159615755a7SSong Liu  * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
160615755a7SSong Liu  * identical.
161615755a7SSong Liu  */
162615755a7SSong Liu static inline int stack_map_parse_build_id(void *page_addr,
163615755a7SSong Liu 					   unsigned char *build_id,
164615755a7SSong Liu 					   void *note_start,
165615755a7SSong Liu 					   Elf32_Word note_size)
166615755a7SSong Liu {
167615755a7SSong Liu 	Elf32_Word note_offs = 0, new_offs;
168615755a7SSong Liu 
169615755a7SSong Liu 	/* check for overflow */
170615755a7SSong Liu 	if (note_start < page_addr || note_start + note_size < note_start)
171615755a7SSong Liu 		return -EINVAL;
172615755a7SSong Liu 
173615755a7SSong Liu 	/* only supports note that fits in the first page */
174615755a7SSong Liu 	if (note_start + note_size > page_addr + PAGE_SIZE)
175615755a7SSong Liu 		return -EINVAL;
176615755a7SSong Liu 
177615755a7SSong Liu 	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
178615755a7SSong Liu 		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
179615755a7SSong Liu 
180615755a7SSong Liu 		if (nhdr->n_type == BPF_BUILD_ID &&
181615755a7SSong Liu 		    nhdr->n_namesz == sizeof("GNU") &&
1820b698005SStanislav Fomichev 		    nhdr->n_descsz > 0 &&
1830b698005SStanislav Fomichev 		    nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
184615755a7SSong Liu 			memcpy(build_id,
185615755a7SSong Liu 			       note_start + note_offs +
186615755a7SSong Liu 			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
1870b698005SStanislav Fomichev 			       nhdr->n_descsz);
1880b698005SStanislav Fomichev 			memset(build_id + nhdr->n_descsz, 0,
1890b698005SStanislav Fomichev 			       BPF_BUILD_ID_SIZE - nhdr->n_descsz);
190615755a7SSong Liu 			return 0;
191615755a7SSong Liu 		}
192615755a7SSong Liu 		new_offs = note_offs + sizeof(Elf32_Nhdr) +
193615755a7SSong Liu 			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
194615755a7SSong Liu 		if (new_offs <= note_offs)  /* overflow */
195615755a7SSong Liu 			break;
196615755a7SSong Liu 		note_offs = new_offs;
197615755a7SSong Liu 	}
198615755a7SSong Liu 	return -EINVAL;
199615755a7SSong Liu }
200615755a7SSong Liu 
201615755a7SSong Liu /* Parse build ID from 32-bit ELF */
202615755a7SSong Liu static int stack_map_get_build_id_32(void *page_addr,
203615755a7SSong Liu 				     unsigned char *build_id)
204615755a7SSong Liu {
205615755a7SSong Liu 	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
206615755a7SSong Liu 	Elf32_Phdr *phdr;
207615755a7SSong Liu 	int i;
208615755a7SSong Liu 
209615755a7SSong Liu 	/* only supports phdr that fits in one page */
210615755a7SSong Liu 	if (ehdr->e_phnum >
211615755a7SSong Liu 	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
212615755a7SSong Liu 		return -EINVAL;
213615755a7SSong Liu 
214615755a7SSong Liu 	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
215615755a7SSong Liu 
216*b33164f2SJiri Olsa 	for (i = 0; i < ehdr->e_phnum; ++i) {
217*b33164f2SJiri Olsa 		if (phdr[i].p_type == PT_NOTE &&
218*b33164f2SJiri Olsa 		    !stack_map_parse_build_id(page_addr, build_id,
219615755a7SSong Liu 					      page_addr + phdr[i].p_offset,
220*b33164f2SJiri Olsa 					      phdr[i].p_filesz))
221*b33164f2SJiri Olsa 			return 0;
222*b33164f2SJiri Olsa 	}
223615755a7SSong Liu 	return -EINVAL;
224615755a7SSong Liu }
225615755a7SSong Liu 
226615755a7SSong Liu /* Parse build ID from 64-bit ELF */
227615755a7SSong Liu static int stack_map_get_build_id_64(void *page_addr,
228615755a7SSong Liu 				     unsigned char *build_id)
229615755a7SSong Liu {
230615755a7SSong Liu 	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
231615755a7SSong Liu 	Elf64_Phdr *phdr;
232615755a7SSong Liu 	int i;
233615755a7SSong Liu 
234615755a7SSong Liu 	/* only supports phdr that fits in one page */
235615755a7SSong Liu 	if (ehdr->e_phnum >
236615755a7SSong Liu 	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
237615755a7SSong Liu 		return -EINVAL;
238615755a7SSong Liu 
239615755a7SSong Liu 	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
240615755a7SSong Liu 
241*b33164f2SJiri Olsa 	for (i = 0; i < ehdr->e_phnum; ++i) {
242*b33164f2SJiri Olsa 		if (phdr[i].p_type == PT_NOTE &&
243*b33164f2SJiri Olsa 		    !stack_map_parse_build_id(page_addr, build_id,
244615755a7SSong Liu 					      page_addr + phdr[i].p_offset,
245*b33164f2SJiri Olsa 					      phdr[i].p_filesz))
246*b33164f2SJiri Olsa 			return 0;
247*b33164f2SJiri Olsa 	}
248615755a7SSong Liu 	return -EINVAL;
249615755a7SSong Liu }
250615755a7SSong Liu 
251615755a7SSong Liu /* Parse build ID of ELF file mapped to vma */
252615755a7SSong Liu static int stack_map_get_build_id(struct vm_area_struct *vma,
253615755a7SSong Liu 				  unsigned char *build_id)
254615755a7SSong Liu {
255615755a7SSong Liu 	Elf32_Ehdr *ehdr;
256615755a7SSong Liu 	struct page *page;
257615755a7SSong Liu 	void *page_addr;
258615755a7SSong Liu 	int ret;
259615755a7SSong Liu 
260615755a7SSong Liu 	/* only works for page backed storage  */
261615755a7SSong Liu 	if (!vma->vm_file)
262615755a7SSong Liu 		return -EINVAL;
263615755a7SSong Liu 
264615755a7SSong Liu 	page = find_get_page(vma->vm_file->f_mapping, 0);
265615755a7SSong Liu 	if (!page)
266615755a7SSong Liu 		return -EFAULT;	/* page not mapped */
267615755a7SSong Liu 
268615755a7SSong Liu 	ret = -EINVAL;
269beaf3d19SSong Liu 	page_addr = kmap_atomic(page);
270615755a7SSong Liu 	ehdr = (Elf32_Ehdr *)page_addr;
271615755a7SSong Liu 
272615755a7SSong Liu 	/* compare magic x7f "ELF" */
273615755a7SSong Liu 	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
274615755a7SSong Liu 		goto out;
275615755a7SSong Liu 
276615755a7SSong Liu 	/* only support executable file and shared object file */
277615755a7SSong Liu 	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
278615755a7SSong Liu 		goto out;
279615755a7SSong Liu 
280615755a7SSong Liu 	if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
281615755a7SSong Liu 		ret = stack_map_get_build_id_32(page_addr, build_id);
282615755a7SSong Liu 	else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
283615755a7SSong Liu 		ret = stack_map_get_build_id_64(page_addr, build_id);
284615755a7SSong Liu out:
285beaf3d19SSong Liu 	kunmap_atomic(page_addr);
286615755a7SSong Liu 	put_page(page);
287615755a7SSong Liu 	return ret;
288615755a7SSong Liu }
289615755a7SSong Liu 
2905f412632SYonghong Song static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
291615755a7SSong Liu 					  u64 *ips, u32 trace_nr, bool user)
292615755a7SSong Liu {
293615755a7SSong Liu 	int i;
294615755a7SSong Liu 	struct vm_area_struct *vma;
295bae77c5eSSong Liu 	bool irq_work_busy = false;
296dc3b8ae9SArnd Bergmann 	struct stack_map_irq_work *work = NULL;
297bae77c5eSSong Liu 
298eac9153fSSong Liu 	if (irqs_disabled()) {
299099bfaa7SDavid Miller 		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
300bae77c5eSSong Liu 			work = this_cpu_ptr(&up_read_work);
301099bfaa7SDavid Miller 			if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) {
302bae77c5eSSong Liu 				/* cannot queue more up_read, fallback */
303bae77c5eSSong Liu 				irq_work_busy = true;
304bae77c5eSSong Liu 			}
305099bfaa7SDavid Miller 		} else {
306099bfaa7SDavid Miller 			/*
307099bfaa7SDavid Miller 			 * PREEMPT_RT does not allow to trylock mmap sem in
308099bfaa7SDavid Miller 			 * interrupt disabled context. Force the fallback code.
309099bfaa7SDavid Miller 			 */
310099bfaa7SDavid Miller 			irq_work_busy = true;
311099bfaa7SDavid Miller 		}
312099bfaa7SDavid Miller 	}
313615755a7SSong Liu 
314615755a7SSong Liu 	/*
315eac9153fSSong Liu 	 * We cannot do up_read() when the irq is disabled, because of
316eac9153fSSong Liu 	 * risk to deadlock with rq_lock. To do build_id lookup when the
317eac9153fSSong Liu 	 * irqs are disabled, we need to run up_read() in irq_work. We use
318bae77c5eSSong Liu 	 * a percpu variable to do the irq_work. If the irq_work is
319bae77c5eSSong Liu 	 * already used by another lookup, we fall back to report ips.
320615755a7SSong Liu 	 *
321615755a7SSong Liu 	 * Same fallback is used for kernel stack (!user) on a stackmap
322615755a7SSong Liu 	 * with build_id.
323615755a7SSong Liu 	 */
324bae77c5eSSong Liu 	if (!user || !current || !current->mm || irq_work_busy ||
3250cc55a02SMichel Lespinasse 	    !mmap_read_trylock_non_owner(current->mm)) {
326615755a7SSong Liu 		/* cannot access current->mm, fall back to ips */
327615755a7SSong Liu 		for (i = 0; i < trace_nr; i++) {
328615755a7SSong Liu 			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
329615755a7SSong Liu 			id_offs[i].ip = ips[i];
3304af396aeSStanislav Fomichev 			memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
331615755a7SSong Liu 		}
332615755a7SSong Liu 		return;
333615755a7SSong Liu 	}
334615755a7SSong Liu 
335615755a7SSong Liu 	for (i = 0; i < trace_nr; i++) {
336615755a7SSong Liu 		vma = find_vma(current->mm, ips[i]);
337615755a7SSong Liu 		if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
338615755a7SSong Liu 			/* per entry fall back to ips */
339615755a7SSong Liu 			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
340615755a7SSong Liu 			id_offs[i].ip = ips[i];
3414af396aeSStanislav Fomichev 			memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
342615755a7SSong Liu 			continue;
343615755a7SSong Liu 		}
344615755a7SSong Liu 		id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
345615755a7SSong Liu 			- vma->vm_start;
346615755a7SSong Liu 		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
347615755a7SSong Liu 	}
348bae77c5eSSong Liu 
349dc3b8ae9SArnd Bergmann 	if (!work) {
3500cc55a02SMichel Lespinasse 		mmap_read_unlock_non_owner(current->mm);
351bae77c5eSSong Liu 	} else {
3520cc55a02SMichel Lespinasse 		work->mm = current->mm;
353bae77c5eSSong Liu 		irq_work_queue(&work->irq_work);
354bae77c5eSSong Liu 	}
355615755a7SSong Liu }
356615755a7SSong Liu 
357fa28dcb8SSong Liu static struct perf_callchain_entry *
358fa28dcb8SSong Liu get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
359fa28dcb8SSong Liu {
360046cc3ddSSong Liu #ifdef CONFIG_STACKTRACE
361fa28dcb8SSong Liu 	struct perf_callchain_entry *entry;
362fa28dcb8SSong Liu 	int rctx;
363fa28dcb8SSong Liu 
364fa28dcb8SSong Liu 	entry = get_callchain_entry(&rctx);
365fa28dcb8SSong Liu 
366fa28dcb8SSong Liu 	if (!entry)
367fa28dcb8SSong Liu 		return NULL;
368fa28dcb8SSong Liu 
369fa28dcb8SSong Liu 	entry->nr = init_nr +
370fa28dcb8SSong Liu 		stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
371fa28dcb8SSong Liu 				     sysctl_perf_event_max_stack - init_nr, 0);
372fa28dcb8SSong Liu 
373fa28dcb8SSong Liu 	/* stack_trace_save_tsk() works on unsigned long array, while
374fa28dcb8SSong Liu 	 * perf_callchain_entry uses u64 array. For 32-bit systems, it is
375fa28dcb8SSong Liu 	 * necessary to fix this mismatch.
376fa28dcb8SSong Liu 	 */
377fa28dcb8SSong Liu 	if (__BITS_PER_LONG != 64) {
378fa28dcb8SSong Liu 		unsigned long *from = (unsigned long *) entry->ip;
379fa28dcb8SSong Liu 		u64 *to = entry->ip;
380fa28dcb8SSong Liu 		int i;
381fa28dcb8SSong Liu 
382fa28dcb8SSong Liu 		/* copy data from the end to avoid using extra buffer */
383fa28dcb8SSong Liu 		for (i = entry->nr - 1; i >= (int)init_nr; i--)
384fa28dcb8SSong Liu 			to[i] = (u64)(from[i]);
385fa28dcb8SSong Liu 	}
386fa28dcb8SSong Liu 
387fa28dcb8SSong Liu 	put_callchain_entry(rctx);
388fa28dcb8SSong Liu 
389fa28dcb8SSong Liu 	return entry;
390046cc3ddSSong Liu #else /* CONFIG_STACKTRACE */
391046cc3ddSSong Liu 	return NULL;
392046cc3ddSSong Liu #endif
393fa28dcb8SSong Liu }
394fa28dcb8SSong Liu 
3957b04d6d6SSong Liu static long __bpf_get_stackid(struct bpf_map *map,
3967b04d6d6SSong Liu 			      struct perf_callchain_entry *trace, u64 flags)
397d5a3b1f6SAlexei Starovoitov {
398d5a3b1f6SAlexei Starovoitov 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
399d5a3b1f6SAlexei Starovoitov 	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
400615755a7SSong Liu 	u32 max_depth = map->value_size / stack_map_data_size(map);
401c5dfd78eSArnaldo Carvalho de Melo 	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
402c5dfd78eSArnaldo Carvalho de Melo 	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
403d5a3b1f6SAlexei Starovoitov 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
404d5a3b1f6SAlexei Starovoitov 	u32 hash, id, trace_nr, trace_len;
405d5a3b1f6SAlexei Starovoitov 	bool user = flags & BPF_F_USER_STACK;
406d5a3b1f6SAlexei Starovoitov 	u64 *ips;
407615755a7SSong Liu 	bool hash_matches;
408d5a3b1f6SAlexei Starovoitov 
409d5a3b1f6SAlexei Starovoitov 	/* get_perf_callchain() guarantees that trace->nr >= init_nr
410c5dfd78eSArnaldo Carvalho de Melo 	 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
411d5a3b1f6SAlexei Starovoitov 	 */
412d5a3b1f6SAlexei Starovoitov 	trace_nr = trace->nr - init_nr;
413d5a3b1f6SAlexei Starovoitov 
414d5a3b1f6SAlexei Starovoitov 	if (trace_nr <= skip)
415d5a3b1f6SAlexei Starovoitov 		/* skipping more than usable stack trace */
416d5a3b1f6SAlexei Starovoitov 		return -EFAULT;
417d5a3b1f6SAlexei Starovoitov 
418d5a3b1f6SAlexei Starovoitov 	trace_nr -= skip;
419d5a3b1f6SAlexei Starovoitov 	trace_len = trace_nr * sizeof(u64);
420d5a3b1f6SAlexei Starovoitov 	ips = trace->ip + skip + init_nr;
421d5a3b1f6SAlexei Starovoitov 	hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
422d5a3b1f6SAlexei Starovoitov 	id = hash & (smap->n_buckets - 1);
423557c0c6eSAlexei Starovoitov 	bucket = READ_ONCE(smap->buckets[id]);
424d5a3b1f6SAlexei Starovoitov 
425615755a7SSong Liu 	hash_matches = bucket && bucket->hash == hash;
426615755a7SSong Liu 	/* fast cmp */
427615755a7SSong Liu 	if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
428d5a3b1f6SAlexei Starovoitov 		return id;
429615755a7SSong Liu 
430615755a7SSong Liu 	if (stack_map_use_build_id(map)) {
431615755a7SSong Liu 		/* for build_id+offset, pop a bucket before slow cmp */
432615755a7SSong Liu 		new_bucket = (struct stack_map_bucket *)
433615755a7SSong Liu 			pcpu_freelist_pop(&smap->freelist);
434615755a7SSong Liu 		if (unlikely(!new_bucket))
435615755a7SSong Liu 			return -ENOMEM;
4365f412632SYonghong Song 		new_bucket->nr = trace_nr;
4375f412632SYonghong Song 		stack_map_get_build_id_offset(
4385f412632SYonghong Song 			(struct bpf_stack_build_id *)new_bucket->data,
4395f412632SYonghong Song 			ips, trace_nr, user);
440615755a7SSong Liu 		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
441615755a7SSong Liu 		if (hash_matches && bucket->nr == trace_nr &&
442615755a7SSong Liu 		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
443615755a7SSong Liu 			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
444d5a3b1f6SAlexei Starovoitov 			return id;
445d5a3b1f6SAlexei Starovoitov 		}
446615755a7SSong Liu 		if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
447615755a7SSong Liu 			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
448615755a7SSong Liu 			return -EEXIST;
449615755a7SSong Liu 		}
450615755a7SSong Liu 	} else {
451615755a7SSong Liu 		if (hash_matches && bucket->nr == trace_nr &&
452615755a7SSong Liu 		    memcmp(bucket->data, ips, trace_len) == 0)
453615755a7SSong Liu 			return id;
454d5a3b1f6SAlexei Starovoitov 		if (bucket && !(flags & BPF_F_REUSE_STACKID))
455d5a3b1f6SAlexei Starovoitov 			return -EEXIST;
456d5a3b1f6SAlexei Starovoitov 
457557c0c6eSAlexei Starovoitov 		new_bucket = (struct stack_map_bucket *)
458557c0c6eSAlexei Starovoitov 			pcpu_freelist_pop(&smap->freelist);
459d5a3b1f6SAlexei Starovoitov 		if (unlikely(!new_bucket))
460d5a3b1f6SAlexei Starovoitov 			return -ENOMEM;
461615755a7SSong Liu 		memcpy(new_bucket->data, ips, trace_len);
462615755a7SSong Liu 	}
463d5a3b1f6SAlexei Starovoitov 
464d5a3b1f6SAlexei Starovoitov 	new_bucket->hash = hash;
465d5a3b1f6SAlexei Starovoitov 	new_bucket->nr = trace_nr;
466d5a3b1f6SAlexei Starovoitov 
467d5a3b1f6SAlexei Starovoitov 	old_bucket = xchg(&smap->buckets[id], new_bucket);
468d5a3b1f6SAlexei Starovoitov 	if (old_bucket)
469557c0c6eSAlexei Starovoitov 		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
470d5a3b1f6SAlexei Starovoitov 	return id;
471d5a3b1f6SAlexei Starovoitov }
472d5a3b1f6SAlexei Starovoitov 
4737b04d6d6SSong Liu BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
4747b04d6d6SSong Liu 	   u64, flags)
4757b04d6d6SSong Liu {
4767b04d6d6SSong Liu 	u32 max_depth = map->value_size / stack_map_data_size(map);
4777b04d6d6SSong Liu 	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
4787b04d6d6SSong Liu 	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
4797b04d6d6SSong Liu 	bool user = flags & BPF_F_USER_STACK;
4807b04d6d6SSong Liu 	struct perf_callchain_entry *trace;
4817b04d6d6SSong Liu 	bool kernel = !user;
4827b04d6d6SSong Liu 
4837b04d6d6SSong Liu 	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
4847b04d6d6SSong Liu 			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
4857b04d6d6SSong Liu 		return -EINVAL;
4867b04d6d6SSong Liu 
4877b04d6d6SSong Liu 	trace = get_perf_callchain(regs, init_nr, kernel, user,
4887b04d6d6SSong Liu 				   sysctl_perf_event_max_stack, false, false);
4897b04d6d6SSong Liu 
4907b04d6d6SSong Liu 	if (unlikely(!trace))
4917b04d6d6SSong Liu 		/* couldn't fetch the stack trace */
4927b04d6d6SSong Liu 		return -EFAULT;
4937b04d6d6SSong Liu 
4947b04d6d6SSong Liu 	return __bpf_get_stackid(map, trace, flags);
4957b04d6d6SSong Liu }
4967b04d6d6SSong Liu 
497d5a3b1f6SAlexei Starovoitov const struct bpf_func_proto bpf_get_stackid_proto = {
498d5a3b1f6SAlexei Starovoitov 	.func		= bpf_get_stackid,
499d5a3b1f6SAlexei Starovoitov 	.gpl_only	= true,
500d5a3b1f6SAlexei Starovoitov 	.ret_type	= RET_INTEGER,
501d5a3b1f6SAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_CTX,
502d5a3b1f6SAlexei Starovoitov 	.arg2_type	= ARG_CONST_MAP_PTR,
503d5a3b1f6SAlexei Starovoitov 	.arg3_type	= ARG_ANYTHING,
504d5a3b1f6SAlexei Starovoitov };
505d5a3b1f6SAlexei Starovoitov 
5067b04d6d6SSong Liu static __u64 count_kernel_ip(struct perf_callchain_entry *trace)
5077b04d6d6SSong Liu {
5087b04d6d6SSong Liu 	__u64 nr_kernel = 0;
5097b04d6d6SSong Liu 
5107b04d6d6SSong Liu 	while (nr_kernel < trace->nr) {
5117b04d6d6SSong Liu 		if (trace->ip[nr_kernel] == PERF_CONTEXT_USER)
5127b04d6d6SSong Liu 			break;
5137b04d6d6SSong Liu 		nr_kernel++;
5147b04d6d6SSong Liu 	}
5157b04d6d6SSong Liu 	return nr_kernel;
5167b04d6d6SSong Liu }
5177b04d6d6SSong Liu 
5187b04d6d6SSong Liu BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
5197b04d6d6SSong Liu 	   struct bpf_map *, map, u64, flags)
5207b04d6d6SSong Liu {
5217b04d6d6SSong Liu 	struct perf_event *event = ctx->event;
5227b04d6d6SSong Liu 	struct perf_callchain_entry *trace;
5237b04d6d6SSong Liu 	bool kernel, user;
5247b04d6d6SSong Liu 	__u64 nr_kernel;
5257b04d6d6SSong Liu 	int ret;
5267b04d6d6SSong Liu 
5277b04d6d6SSong Liu 	/* perf_sample_data doesn't have callchain, use bpf_get_stackid */
5287b04d6d6SSong Liu 	if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
5297b04d6d6SSong Liu 		return bpf_get_stackid((unsigned long)(ctx->regs),
5307b04d6d6SSong Liu 				       (unsigned long) map, flags, 0, 0);
5317b04d6d6SSong Liu 
5327b04d6d6SSong Liu 	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
5337b04d6d6SSong Liu 			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
5347b04d6d6SSong Liu 		return -EINVAL;
5357b04d6d6SSong Liu 
5367b04d6d6SSong Liu 	user = flags & BPF_F_USER_STACK;
5377b04d6d6SSong Liu 	kernel = !user;
5387b04d6d6SSong Liu 
5397b04d6d6SSong Liu 	trace = ctx->data->callchain;
5407b04d6d6SSong Liu 	if (unlikely(!trace))
5417b04d6d6SSong Liu 		return -EFAULT;
5427b04d6d6SSong Liu 
5437b04d6d6SSong Liu 	nr_kernel = count_kernel_ip(trace);
5447b04d6d6SSong Liu 
5457b04d6d6SSong Liu 	if (kernel) {
5467b04d6d6SSong Liu 		__u64 nr = trace->nr;
5477b04d6d6SSong Liu 
5487b04d6d6SSong Liu 		trace->nr = nr_kernel;
5497b04d6d6SSong Liu 		ret = __bpf_get_stackid(map, trace, flags);
5507b04d6d6SSong Liu 
5517b04d6d6SSong Liu 		/* restore nr */
5527b04d6d6SSong Liu 		trace->nr = nr;
5537b04d6d6SSong Liu 	} else { /* user */
5547b04d6d6SSong Liu 		u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
5557b04d6d6SSong Liu 
5567b04d6d6SSong Liu 		skip += nr_kernel;
5577b04d6d6SSong Liu 		if (skip > BPF_F_SKIP_FIELD_MASK)
5587b04d6d6SSong Liu 			return -EFAULT;
5597b04d6d6SSong Liu 
5607b04d6d6SSong Liu 		flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
5617b04d6d6SSong Liu 		ret = __bpf_get_stackid(map, trace, flags);
5627b04d6d6SSong Liu 	}
5637b04d6d6SSong Liu 	return ret;
5647b04d6d6SSong Liu }
5657b04d6d6SSong Liu 
5667b04d6d6SSong Liu const struct bpf_func_proto bpf_get_stackid_proto_pe = {
5677b04d6d6SSong Liu 	.func		= bpf_get_stackid_pe,
5687b04d6d6SSong Liu 	.gpl_only	= false,
5697b04d6d6SSong Liu 	.ret_type	= RET_INTEGER,
5707b04d6d6SSong Liu 	.arg1_type	= ARG_PTR_TO_CTX,
5717b04d6d6SSong Liu 	.arg2_type	= ARG_CONST_MAP_PTR,
5727b04d6d6SSong Liu 	.arg3_type	= ARG_ANYTHING,
5737b04d6d6SSong Liu };
5747b04d6d6SSong Liu 
575fa28dcb8SSong Liu static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
5767b04d6d6SSong Liu 			    struct perf_callchain_entry *trace_in,
577fa28dcb8SSong Liu 			    void *buf, u32 size, u64 flags)
578c195651eSYonghong Song {
579c195651eSYonghong Song 	u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
580c195651eSYonghong Song 	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
581c195651eSYonghong Song 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
582c195651eSYonghong Song 	bool user = flags & BPF_F_USER_STACK;
583c195651eSYonghong Song 	struct perf_callchain_entry *trace;
584c195651eSYonghong Song 	bool kernel = !user;
585c195651eSYonghong Song 	int err = -EINVAL;
586c195651eSYonghong Song 	u64 *ips;
587c195651eSYonghong Song 
588c195651eSYonghong Song 	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
589c195651eSYonghong Song 			       BPF_F_USER_BUILD_ID)))
590c195651eSYonghong Song 		goto clear;
591c195651eSYonghong Song 	if (kernel && user_build_id)
592c195651eSYonghong Song 		goto clear;
593c195651eSYonghong Song 
594c195651eSYonghong Song 	elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
595c195651eSYonghong Song 					    : sizeof(u64);
596c195651eSYonghong Song 	if (unlikely(size % elem_size))
597c195651eSYonghong Song 		goto clear;
598c195651eSYonghong Song 
599fa28dcb8SSong Liu 	/* cannot get valid user stack for task without user_mode regs */
600fa28dcb8SSong Liu 	if (task && user && !user_mode(regs))
601fa28dcb8SSong Liu 		goto err_fault;
602fa28dcb8SSong Liu 
603c195651eSYonghong Song 	num_elem = size / elem_size;
604c195651eSYonghong Song 	if (sysctl_perf_event_max_stack < num_elem)
605c195651eSYonghong Song 		init_nr = 0;
606c195651eSYonghong Song 	else
607c195651eSYonghong Song 		init_nr = sysctl_perf_event_max_stack - num_elem;
608fa28dcb8SSong Liu 
6097b04d6d6SSong Liu 	if (trace_in)
6107b04d6d6SSong Liu 		trace = trace_in;
6117b04d6d6SSong Liu 	else if (kernel && task)
612fa28dcb8SSong Liu 		trace = get_callchain_entry_for_task(task, init_nr);
613fa28dcb8SSong Liu 	else
614c195651eSYonghong Song 		trace = get_perf_callchain(regs, init_nr, kernel, user,
615fa28dcb8SSong Liu 					   sysctl_perf_event_max_stack,
616fa28dcb8SSong Liu 					   false, false);
617c195651eSYonghong Song 	if (unlikely(!trace))
618c195651eSYonghong Song 		goto err_fault;
619c195651eSYonghong Song 
620c195651eSYonghong Song 	trace_nr = trace->nr - init_nr;
621c195651eSYonghong Song 	if (trace_nr < skip)
622c195651eSYonghong Song 		goto err_fault;
623c195651eSYonghong Song 
624c195651eSYonghong Song 	trace_nr -= skip;
625c195651eSYonghong Song 	trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
626c195651eSYonghong Song 	copy_len = trace_nr * elem_size;
627c195651eSYonghong Song 	ips = trace->ip + skip + init_nr;
628c195651eSYonghong Song 	if (user && user_build_id)
629c195651eSYonghong Song 		stack_map_get_build_id_offset(buf, ips, trace_nr, user);
630c195651eSYonghong Song 	else
631c195651eSYonghong Song 		memcpy(buf, ips, copy_len);
632c195651eSYonghong Song 
633c195651eSYonghong Song 	if (size > copy_len)
634c195651eSYonghong Song 		memset(buf + copy_len, 0, size - copy_len);
635c195651eSYonghong Song 	return copy_len;
636c195651eSYonghong Song 
637c195651eSYonghong Song err_fault:
638c195651eSYonghong Song 	err = -EFAULT;
639c195651eSYonghong Song clear:
640c195651eSYonghong Song 	memset(buf, 0, size);
641c195651eSYonghong Song 	return err;
642c195651eSYonghong Song }
643c195651eSYonghong Song 
644fa28dcb8SSong Liu BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
645fa28dcb8SSong Liu 	   u64, flags)
646fa28dcb8SSong Liu {
6477b04d6d6SSong Liu 	return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
648fa28dcb8SSong Liu }
649fa28dcb8SSong Liu 
650c195651eSYonghong Song const struct bpf_func_proto bpf_get_stack_proto = {
651c195651eSYonghong Song 	.func		= bpf_get_stack,
652c195651eSYonghong Song 	.gpl_only	= true,
653c195651eSYonghong Song 	.ret_type	= RET_INTEGER,
654c195651eSYonghong Song 	.arg1_type	= ARG_PTR_TO_CTX,
655c195651eSYonghong Song 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
656c195651eSYonghong Song 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
657c195651eSYonghong Song 	.arg4_type	= ARG_ANYTHING,
658c195651eSYonghong Song };
659c195651eSYonghong Song 
660fa28dcb8SSong Liu BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
661fa28dcb8SSong Liu 	   u32, size, u64, flags)
662fa28dcb8SSong Liu {
663fa28dcb8SSong Liu 	struct pt_regs *regs = task_pt_regs(task);
664fa28dcb8SSong Liu 
6657b04d6d6SSong Liu 	return __bpf_get_stack(regs, task, NULL, buf, size, flags);
666fa28dcb8SSong Liu }
667fa28dcb8SSong Liu 
668c9a0f3b8SJiri Olsa BTF_ID_LIST(bpf_get_task_stack_btf_ids)
669c9a0f3b8SJiri Olsa BTF_ID(struct, task_struct)
670c9a0f3b8SJiri Olsa 
671fa28dcb8SSong Liu const struct bpf_func_proto bpf_get_task_stack_proto = {
672fa28dcb8SSong Liu 	.func		= bpf_get_task_stack,
673fa28dcb8SSong Liu 	.gpl_only	= false,
674fa28dcb8SSong Liu 	.ret_type	= RET_INTEGER,
675fa28dcb8SSong Liu 	.arg1_type	= ARG_PTR_TO_BTF_ID,
676fa28dcb8SSong Liu 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
677fa28dcb8SSong Liu 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
678fa28dcb8SSong Liu 	.arg4_type	= ARG_ANYTHING,
679fa28dcb8SSong Liu 	.btf_id		= bpf_get_task_stack_btf_ids,
680fa28dcb8SSong Liu };
681fa28dcb8SSong Liu 
6827b04d6d6SSong Liu BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
6837b04d6d6SSong Liu 	   void *, buf, u32, size, u64, flags)
6847b04d6d6SSong Liu {
6852b9b305fSSong Liu 	struct pt_regs *regs = (struct pt_regs *)(ctx->regs);
6867b04d6d6SSong Liu 	struct perf_event *event = ctx->event;
6877b04d6d6SSong Liu 	struct perf_callchain_entry *trace;
6887b04d6d6SSong Liu 	bool kernel, user;
6897b04d6d6SSong Liu 	int err = -EINVAL;
6907b04d6d6SSong Liu 	__u64 nr_kernel;
6917b04d6d6SSong Liu 
6927b04d6d6SSong Liu 	if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
6932b9b305fSSong Liu 		return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
6947b04d6d6SSong Liu 
6957b04d6d6SSong Liu 	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
6967b04d6d6SSong Liu 			       BPF_F_USER_BUILD_ID)))
6977b04d6d6SSong Liu 		goto clear;
6987b04d6d6SSong Liu 
6997b04d6d6SSong Liu 	user = flags & BPF_F_USER_STACK;
7007b04d6d6SSong Liu 	kernel = !user;
7017b04d6d6SSong Liu 
7027b04d6d6SSong Liu 	err = -EFAULT;
7037b04d6d6SSong Liu 	trace = ctx->data->callchain;
7047b04d6d6SSong Liu 	if (unlikely(!trace))
7057b04d6d6SSong Liu 		goto clear;
7067b04d6d6SSong Liu 
7077b04d6d6SSong Liu 	nr_kernel = count_kernel_ip(trace);
7087b04d6d6SSong Liu 
7097b04d6d6SSong Liu 	if (kernel) {
7107b04d6d6SSong Liu 		__u64 nr = trace->nr;
7117b04d6d6SSong Liu 
7127b04d6d6SSong Liu 		trace->nr = nr_kernel;
7132b9b305fSSong Liu 		err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
7147b04d6d6SSong Liu 
7157b04d6d6SSong Liu 		/* restore nr */
7167b04d6d6SSong Liu 		trace->nr = nr;
7177b04d6d6SSong Liu 	} else { /* user */
7187b04d6d6SSong Liu 		u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
7197b04d6d6SSong Liu 
7207b04d6d6SSong Liu 		skip += nr_kernel;
7217b04d6d6SSong Liu 		if (skip > BPF_F_SKIP_FIELD_MASK)
7227b04d6d6SSong Liu 			goto clear;
7237b04d6d6SSong Liu 
7247b04d6d6SSong Liu 		flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
7252b9b305fSSong Liu 		err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
7267b04d6d6SSong Liu 	}
7277b04d6d6SSong Liu 	return err;
7287b04d6d6SSong Liu 
7297b04d6d6SSong Liu clear:
7307b04d6d6SSong Liu 	memset(buf, 0, size);
7317b04d6d6SSong Liu 	return err;
7327b04d6d6SSong Liu 
7337b04d6d6SSong Liu }
7347b04d6d6SSong Liu 
7357b04d6d6SSong Liu const struct bpf_func_proto bpf_get_stack_proto_pe = {
7367b04d6d6SSong Liu 	.func		= bpf_get_stack_pe,
7377b04d6d6SSong Liu 	.gpl_only	= true,
7387b04d6d6SSong Liu 	.ret_type	= RET_INTEGER,
7397b04d6d6SSong Liu 	.arg1_type	= ARG_PTR_TO_CTX,
7407b04d6d6SSong Liu 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
7417b04d6d6SSong Liu 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
7427b04d6d6SSong Liu 	.arg4_type	= ARG_ANYTHING,
7437b04d6d6SSong Liu };
7447b04d6d6SSong Liu 
745557c0c6eSAlexei Starovoitov /* Called from eBPF program */
746d5a3b1f6SAlexei Starovoitov static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
747d5a3b1f6SAlexei Starovoitov {
7483b4a63f6SPrashant Bhole 	return ERR_PTR(-EOPNOTSUPP);
749557c0c6eSAlexei Starovoitov }
750557c0c6eSAlexei Starovoitov 
751557c0c6eSAlexei Starovoitov /* Called from syscall */
752557c0c6eSAlexei Starovoitov int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
753557c0c6eSAlexei Starovoitov {
754d5a3b1f6SAlexei Starovoitov 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
755557c0c6eSAlexei Starovoitov 	struct stack_map_bucket *bucket, *old_bucket;
756557c0c6eSAlexei Starovoitov 	u32 id = *(u32 *)key, trace_len;
757d5a3b1f6SAlexei Starovoitov 
758d5a3b1f6SAlexei Starovoitov 	if (unlikely(id >= smap->n_buckets))
759557c0c6eSAlexei Starovoitov 		return -ENOENT;
760557c0c6eSAlexei Starovoitov 
761557c0c6eSAlexei Starovoitov 	bucket = xchg(&smap->buckets[id], NULL);
762557c0c6eSAlexei Starovoitov 	if (!bucket)
763557c0c6eSAlexei Starovoitov 		return -ENOENT;
764557c0c6eSAlexei Starovoitov 
765615755a7SSong Liu 	trace_len = bucket->nr * stack_map_data_size(map);
766615755a7SSong Liu 	memcpy(value, bucket->data, trace_len);
767557c0c6eSAlexei Starovoitov 	memset(value + trace_len, 0, map->value_size - trace_len);
768557c0c6eSAlexei Starovoitov 
769557c0c6eSAlexei Starovoitov 	old_bucket = xchg(&smap->buckets[id], bucket);
770557c0c6eSAlexei Starovoitov 	if (old_bucket)
771557c0c6eSAlexei Starovoitov 		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
772557c0c6eSAlexei Starovoitov 	return 0;
773d5a3b1f6SAlexei Starovoitov }
774d5a3b1f6SAlexei Starovoitov 
77516f07c55SYonghong Song static int stack_map_get_next_key(struct bpf_map *map, void *key,
77616f07c55SYonghong Song 				  void *next_key)
777d5a3b1f6SAlexei Starovoitov {
77816f07c55SYonghong Song 	struct bpf_stack_map *smap = container_of(map,
77916f07c55SYonghong Song 						  struct bpf_stack_map, map);
78016f07c55SYonghong Song 	u32 id;
78116f07c55SYonghong Song 
78216f07c55SYonghong Song 	WARN_ON_ONCE(!rcu_read_lock_held());
78316f07c55SYonghong Song 
78416f07c55SYonghong Song 	if (!key) {
78516f07c55SYonghong Song 		id = 0;
78616f07c55SYonghong Song 	} else {
78716f07c55SYonghong Song 		id = *(u32 *)key;
78816f07c55SYonghong Song 		if (id >= smap->n_buckets || !smap->buckets[id])
78916f07c55SYonghong Song 			id = 0;
79016f07c55SYonghong Song 		else
79116f07c55SYonghong Song 			id++;
79216f07c55SYonghong Song 	}
79316f07c55SYonghong Song 
79416f07c55SYonghong Song 	while (id < smap->n_buckets && !smap->buckets[id])
79516f07c55SYonghong Song 		id++;
79616f07c55SYonghong Song 
79716f07c55SYonghong Song 	if (id >= smap->n_buckets)
79816f07c55SYonghong Song 		return -ENOENT;
79916f07c55SYonghong Song 
80016f07c55SYonghong Song 	*(u32 *)next_key = id;
80116f07c55SYonghong Song 	return 0;
802d5a3b1f6SAlexei Starovoitov }
803d5a3b1f6SAlexei Starovoitov 
804d5a3b1f6SAlexei Starovoitov static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,
805d5a3b1f6SAlexei Starovoitov 				 u64 map_flags)
806d5a3b1f6SAlexei Starovoitov {
807d5a3b1f6SAlexei Starovoitov 	return -EINVAL;
808d5a3b1f6SAlexei Starovoitov }
809d5a3b1f6SAlexei Starovoitov 
810d5a3b1f6SAlexei Starovoitov /* Called from syscall or from eBPF program */
811d5a3b1f6SAlexei Starovoitov static int stack_map_delete_elem(struct bpf_map *map, void *key)
812d5a3b1f6SAlexei Starovoitov {
813d5a3b1f6SAlexei Starovoitov 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
814d5a3b1f6SAlexei Starovoitov 	struct stack_map_bucket *old_bucket;
815d5a3b1f6SAlexei Starovoitov 	u32 id = *(u32 *)key;
816d5a3b1f6SAlexei Starovoitov 
817d5a3b1f6SAlexei Starovoitov 	if (unlikely(id >= smap->n_buckets))
818d5a3b1f6SAlexei Starovoitov 		return -E2BIG;
819d5a3b1f6SAlexei Starovoitov 
820d5a3b1f6SAlexei Starovoitov 	old_bucket = xchg(&smap->buckets[id], NULL);
821d5a3b1f6SAlexei Starovoitov 	if (old_bucket) {
822557c0c6eSAlexei Starovoitov 		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
823d5a3b1f6SAlexei Starovoitov 		return 0;
824d5a3b1f6SAlexei Starovoitov 	} else {
825d5a3b1f6SAlexei Starovoitov 		return -ENOENT;
826d5a3b1f6SAlexei Starovoitov 	}
827d5a3b1f6SAlexei Starovoitov }
828d5a3b1f6SAlexei Starovoitov 
829d5a3b1f6SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
830d5a3b1f6SAlexei Starovoitov static void stack_map_free(struct bpf_map *map)
831d5a3b1f6SAlexei Starovoitov {
832d5a3b1f6SAlexei Starovoitov 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
833d5a3b1f6SAlexei Starovoitov 
834d407bd25SDaniel Borkmann 	bpf_map_area_free(smap->elems);
835557c0c6eSAlexei Starovoitov 	pcpu_freelist_destroy(&smap->freelist);
836d407bd25SDaniel Borkmann 	bpf_map_area_free(smap);
837d5a3b1f6SAlexei Starovoitov 	put_callchain_buffers();
838d5a3b1f6SAlexei Starovoitov }
839d5a3b1f6SAlexei Starovoitov 
8402872e9acSAndrey Ignatov static int stack_trace_map_btf_id;
84114499160SMauricio Vasquez B const struct bpf_map_ops stack_trace_map_ops = {
842d5a3b1f6SAlexei Starovoitov 	.map_alloc = stack_map_alloc,
843d5a3b1f6SAlexei Starovoitov 	.map_free = stack_map_free,
844d5a3b1f6SAlexei Starovoitov 	.map_get_next_key = stack_map_get_next_key,
845d5a3b1f6SAlexei Starovoitov 	.map_lookup_elem = stack_map_lookup_elem,
846d5a3b1f6SAlexei Starovoitov 	.map_update_elem = stack_map_update_elem,
847d5a3b1f6SAlexei Starovoitov 	.map_delete_elem = stack_map_delete_elem,
848e8d2bec0SDaniel Borkmann 	.map_check_btf = map_check_no_btf,
8492872e9acSAndrey Ignatov 	.map_btf_name = "bpf_stack_map",
8502872e9acSAndrey Ignatov 	.map_btf_id = &stack_trace_map_btf_id,
851d5a3b1f6SAlexei Starovoitov };
852bae77c5eSSong Liu 
853bae77c5eSSong Liu static int __init stack_map_init(void)
854bae77c5eSSong Liu {
855bae77c5eSSong Liu 	int cpu;
856bae77c5eSSong Liu 	struct stack_map_irq_work *work;
857bae77c5eSSong Liu 
858bae77c5eSSong Liu 	for_each_possible_cpu(cpu) {
859bae77c5eSSong Liu 		work = per_cpu_ptr(&up_read_work, cpu);
860bae77c5eSSong Liu 		init_irq_work(&work->irq_work, do_up_read);
861bae77c5eSSong Liu 	}
862bae77c5eSSong Liu 	return 0;
863bae77c5eSSong Liu }
864bae77c5eSSong Liu subsys_initcall(stack_map_init);
865