15b497af4SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
20f8e4bd8SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
36c905981SAlexei Starovoitov * Copyright (c) 2016 Facebook
40f8e4bd8SAlexei Starovoitov */
50f8e4bd8SAlexei Starovoitov #include <linux/bpf.h>
6699c86d6SYonghong Song #include <linux/btf.h>
70f8e4bd8SAlexei Starovoitov #include <linux/jhash.h>
80f8e4bd8SAlexei Starovoitov #include <linux/filter.h>
94fe84359SAlexei Starovoitov #include <linux/rculist_nulls.h>
101e2f2d31SKent Overstreet #include <linux/rcupdate_wait.h>
11c0203475SDaniel Borkmann #include <linux/random.h>
12699c86d6SYonghong Song #include <uapi/linux/btf.h>
131e6c62a8SAlexei Starovoitov #include <linux/rcupdate_trace.h>
14c317ab71SMenglong Dong #include <linux/btf_ids.h>
156c905981SAlexei Starovoitov #include "percpu_freelist.h"
1629ba732aSMartin KaFai Lau #include "bpf_lru_list.h"
17bcc6b1b7SMartin KaFai Lau #include "map_in_map.h"
18fba1a1c6SAlexei Starovoitov #include <linux/bpf_mem_alloc.h>
194fa8d68aSKumar Kartikeya Dwivedi #include <asm/rqspinlock.h>
200f8e4bd8SAlexei Starovoitov
2196eabe7aSMartin KaFai Lau #define HTAB_CREATE_FLAG_MASK \
226e71b04aSChenbo Feng (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
23591fe988SDaniel Borkmann BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
2496eabe7aSMartin KaFai Lau
2505799638SYonghong Song #define BATCH_OPS(_name) \
2605799638SYonghong Song .map_lookup_batch = \
2705799638SYonghong Song _name##_map_lookup_batch, \
2805799638SYonghong Song .map_lookup_and_delete_batch = \
2905799638SYonghong Song _name##_map_lookup_and_delete_batch, \
3005799638SYonghong Song .map_update_batch = \
3105799638SYonghong Song generic_map_update_batch, \
3205799638SYonghong Song .map_delete_batch = \
3305799638SYonghong Song generic_map_delete_batch
3405799638SYonghong Song
35dbca151cSThomas Gleixner /*
36dbca151cSThomas Gleixner * The bucket lock has two protection scopes:
37dbca151cSThomas Gleixner *
386bd45f2eSLiu xuzhi * 1) Serializing concurrent operations from BPF programs on different
39dbca151cSThomas Gleixner * CPUs
40dbca151cSThomas Gleixner *
41dbca151cSThomas Gleixner * 2) Serializing concurrent operations from BPF programs and sys_bpf()
42dbca151cSThomas Gleixner *
43dbca151cSThomas Gleixner * BPF programs can execute in any context including perf, kprobes and
44dbca151cSThomas Gleixner * tracing. As there are almost no limits where perf, kprobes and tracing
45dbca151cSThomas Gleixner * can be invoked from the lock operations need to be protected against
46dbca151cSThomas Gleixner * deadlocks. Deadlocks can be caused by recursion and by an invocation in
47dbca151cSThomas Gleixner * the lock held section when functions which acquire this lock are invoked
48dbca151cSThomas Gleixner * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU
49dbca151cSThomas Gleixner * variable bpf_prog_active, which prevents BPF programs attached to perf
50dbca151cSThomas Gleixner * events, kprobes and tracing to be invoked before the prior invocation
51dbca151cSThomas Gleixner * from one of these contexts completed. sys_bpf() uses the same mechanism
52dbca151cSThomas Gleixner * by pinning the task to the current CPU and incrementing the recursion
538fb33b60SZhen Lei * protection across the map operation.
547f805d17SThomas Gleixner *
557f805d17SThomas Gleixner * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain
567f805d17SThomas Gleixner * operations like memory allocations (even with GFP_ATOMIC) from atomic
577f805d17SThomas Gleixner * contexts. This is required because even with GFP_ATOMIC the memory
588fb33b60SZhen Lei * allocator calls into code paths which acquire locks with long held lock
597f805d17SThomas Gleixner * sections. To ensure the deterministic behaviour these locks are regular
607f805d17SThomas Gleixner * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only
617f805d17SThomas Gleixner * true atomic contexts on an RT kernel are the low level hardware
627f805d17SThomas Gleixner * handling, scheduling, low level interrupt handling, NMIs etc. None of
637f805d17SThomas Gleixner * these contexts should ever do memory allocations.
647f805d17SThomas Gleixner *
657f805d17SThomas Gleixner * As regular device interrupt handlers and soft interrupts are forced into
667f805d17SThomas Gleixner * thread context, the existing code which does
67ace2bee8SYafang Shao * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*();
687f805d17SThomas Gleixner * just works.
697f805d17SThomas Gleixner *
707f805d17SThomas Gleixner * In theory the BPF locks could be converted to regular spinlocks as well,
717f805d17SThomas Gleixner * but the bucket locks and percpu_freelist locks can be taken from
727f805d17SThomas Gleixner * arbitrary contexts (perf, kprobes, tracepoints) which are required to be
731d8b82c6SHou Tao * atomic contexts even on RT. Before the introduction of bpf_mem_alloc,
741d8b82c6SHou Tao * it is only safe to use raw spinlock for preallocated hash map on a RT kernel,
751d8b82c6SHou Tao * because there is no memory allocation within the lock held sections. However
761d8b82c6SHou Tao * after hash map was fully converted to use bpf_mem_alloc, there will be
771d8b82c6SHou Tao * non-synchronous memory allocation for non-preallocated hash map, so it is
781d8b82c6SHou Tao * safe to always use raw spinlock for bucket lock.
79dbca151cSThomas Gleixner */
80688ecfe6S[email protected] struct bucket {
814fe84359SAlexei Starovoitov struct hlist_nulls_head head;
824fa8d68aSKumar Kartikeya Dwivedi rqspinlock_t raw_lock;
83688ecfe6S[email protected] };
84688ecfe6S[email protected]
8520b6cc34SSong Liu #define HASHTAB_MAP_LOCK_COUNT 8
8620b6cc34SSong Liu #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
8720b6cc34SSong Liu
880f8e4bd8SAlexei Starovoitov struct bpf_htab {
890f8e4bd8SAlexei Starovoitov struct bpf_map map;
90fba1a1c6SAlexei Starovoitov struct bpf_mem_alloc ma;
91ee4ed53cSAlexei Starovoitov struct bpf_mem_alloc pcpu_ma;
92688ecfe6S[email protected] struct bucket *buckets;
936c905981SAlexei Starovoitov void *elems;
9429ba732aSMartin KaFai Lau union {
956c905981SAlexei Starovoitov struct pcpu_freelist freelist;
9629ba732aSMartin KaFai Lau struct bpf_lru lru;
9729ba732aSMartin KaFai Lau };
988c290e60SAlexei Starovoitov struct htab_elem *__percpu *extra_elems;
9986fe28f7SAlexei Starovoitov /* number of elements in non-preallocated hashtable are kept
10086fe28f7SAlexei Starovoitov * in either pcount or count
10186fe28f7SAlexei Starovoitov */
10286fe28f7SAlexei Starovoitov struct percpu_counter pcount;
10386fe28f7SAlexei Starovoitov atomic_t count;
10486fe28f7SAlexei Starovoitov bool use_percpu_counter;
1050f8e4bd8SAlexei Starovoitov u32 n_buckets; /* number of hash buckets */
1060f8e4bd8SAlexei Starovoitov u32 elem_size; /* size of each element in bytes */
107c0203475SDaniel Borkmann u32 hashrnd;
1080f8e4bd8SAlexei Starovoitov };
1090f8e4bd8SAlexei Starovoitov
1100f8e4bd8SAlexei Starovoitov /* each htab element is struct htab_elem + key + value */
1110f8e4bd8SAlexei Starovoitov struct htab_elem {
112824bd0ceSAlexei Starovoitov union {
1134fe84359SAlexei Starovoitov struct hlist_nulls_node hash_node;
1149f691549SAlexei Starovoitov struct {
1159f691549SAlexei Starovoitov void *padding;
1169f691549SAlexei Starovoitov union {
1176c905981SAlexei Starovoitov struct pcpu_freelist_node fnode;
118b9aff38dSYonghong Song struct htab_elem *batch_flink;
119824bd0ceSAlexei Starovoitov };
1209f691549SAlexei Starovoitov };
1219f691549SAlexei Starovoitov };
122a6ed3ea6SAlexei Starovoitov union {
123ee4ed53cSAlexei Starovoitov /* pointer to per-cpu pointer */
124ee4ed53cSAlexei Starovoitov void *ptr_to_pptr;
12529ba732aSMartin KaFai Lau struct bpf_lru_node lru_node;
126a6ed3ea6SAlexei Starovoitov };
1276c905981SAlexei Starovoitov u32 hash;
128d7f10df8SGustavo A. R. Silva char key[] __aligned(8);
1290f8e4bd8SAlexei Starovoitov };
1300f8e4bd8SAlexei Starovoitov
htab_is_prealloc(const struct bpf_htab * htab)1317f805d17SThomas Gleixner static inline bool htab_is_prealloc(const struct bpf_htab *htab)
1327f805d17SThomas Gleixner {
1337f805d17SThomas Gleixner return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
1347f805d17SThomas Gleixner }
1357f805d17SThomas Gleixner
htab_init_buckets(struct bpf_htab * htab)136d01f9b19SThomas Gleixner static void htab_init_buckets(struct bpf_htab *htab)
137d01f9b19SThomas Gleixner {
1389263dddcSTakshak Chahande unsigned int i;
139d01f9b19SThomas Gleixner
140d01f9b19SThomas Gleixner for (i = 0; i < htab->n_buckets; i++) {
141d01f9b19SThomas Gleixner INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
1424fa8d68aSKumar Kartikeya Dwivedi raw_res_spin_lock_init(&htab->buckets[i].raw_lock);
143e7e51805SEric Dumazet cond_resched();
144d01f9b19SThomas Gleixner }
145d01f9b19SThomas Gleixner }
146d01f9b19SThomas Gleixner
htab_lock_bucket(struct bucket * b,unsigned long * pflags)1474fa8d68aSKumar Kartikeya Dwivedi static inline int htab_lock_bucket(struct bucket *b, unsigned long *pflags)
148d01f9b19SThomas Gleixner {
149d01f9b19SThomas Gleixner unsigned long flags;
1504fa8d68aSKumar Kartikeya Dwivedi int ret;
151d01f9b19SThomas Gleixner
1524fa8d68aSKumar Kartikeya Dwivedi ret = raw_res_spin_lock_irqsave(&b->raw_lock, flags);
1534fa8d68aSKumar Kartikeya Dwivedi if (ret)
1544fa8d68aSKumar Kartikeya Dwivedi return ret;
15520b6cc34SSong Liu *pflags = flags;
15620b6cc34SSong Liu return 0;
157d01f9b19SThomas Gleixner }
158d01f9b19SThomas Gleixner
htab_unlock_bucket(struct bucket * b,unsigned long flags)1594fa8d68aSKumar Kartikeya Dwivedi static inline void htab_unlock_bucket(struct bucket *b, unsigned long flags)
160d01f9b19SThomas Gleixner {
1614fa8d68aSKumar Kartikeya Dwivedi raw_res_spin_unlock_irqrestore(&b->raw_lock, flags);
162d01f9b19SThomas Gleixner }
163d01f9b19SThomas Gleixner
16429ba732aSMartin KaFai Lau static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
16529ba732aSMartin KaFai Lau
htab_is_lru(const struct bpf_htab * htab)16629ba732aSMartin KaFai Lau static bool htab_is_lru(const struct bpf_htab *htab)
16729ba732aSMartin KaFai Lau {
1688f844938SMartin KaFai Lau return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH ||
1698f844938SMartin KaFai Lau htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
1708f844938SMartin KaFai Lau }
1718f844938SMartin KaFai Lau
htab_is_percpu(const struct bpf_htab * htab)1728f844938SMartin KaFai Lau static bool htab_is_percpu(const struct bpf_htab *htab)
1738f844938SMartin KaFai Lau {
1748f844938SMartin KaFai Lau return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1758f844938SMartin KaFai Lau htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
17629ba732aSMartin KaFai Lau }
17729ba732aSMartin KaFai Lau
htab_elem_set_ptr(struct htab_elem * l,u32 key_size,void __percpu * pptr)1786c905981SAlexei Starovoitov static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
1796c905981SAlexei Starovoitov void __percpu *pptr)
1806c905981SAlexei Starovoitov {
18111ba7ce0SYonghong Song *(void __percpu **)(l->key + roundup(key_size, 8)) = pptr;
1826c905981SAlexei Starovoitov }
1836c905981SAlexei Starovoitov
htab_elem_get_ptr(struct htab_elem * l,u32 key_size)1846c905981SAlexei Starovoitov static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
1856c905981SAlexei Starovoitov {
18611ba7ce0SYonghong Song return *(void __percpu **)(l->key + roundup(key_size, 8));
1876c905981SAlexei Starovoitov }
1886c905981SAlexei Starovoitov
fd_htab_map_get_ptr(const struct bpf_map * map,struct htab_elem * l)189bcc6b1b7SMartin KaFai Lau static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
190bcc6b1b7SMartin KaFai Lau {
191bcc6b1b7SMartin KaFai Lau return *(void **)(l->key + roundup(map->key_size, 8));
192bcc6b1b7SMartin KaFai Lau }
193bcc6b1b7SMartin KaFai Lau
get_htab_elem(struct bpf_htab * htab,int i)1946c905981SAlexei Starovoitov static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
1956c905981SAlexei Starovoitov {
196e1868b9eSEric Dumazet return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
1976c905981SAlexei Starovoitov }
1986c905981SAlexei Starovoitov
htab_has_extra_elems(struct bpf_htab * htab)19968134668SAlexei Starovoitov static bool htab_has_extra_elems(struct bpf_htab *htab)
20068134668SAlexei Starovoitov {
20168134668SAlexei Starovoitov return !htab_is_percpu(htab) && !htab_is_lru(htab);
20268134668SAlexei Starovoitov }
20368134668SAlexei Starovoitov
htab_free_prealloced_timers_and_wq(struct bpf_htab * htab)204a891711dSBenjamin Tissoires static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
20568134668SAlexei Starovoitov {
20668134668SAlexei Starovoitov u32 num_entries = htab->map.max_entries;
20768134668SAlexei Starovoitov int i;
20868134668SAlexei Starovoitov
20968134668SAlexei Starovoitov if (htab_has_extra_elems(htab))
21068134668SAlexei Starovoitov num_entries += num_possible_cpus();
21168134668SAlexei Starovoitov
21268134668SAlexei Starovoitov for (i = 0; i < num_entries; i++) {
21368134668SAlexei Starovoitov struct htab_elem *elem;
21468134668SAlexei Starovoitov
21568134668SAlexei Starovoitov elem = get_htab_elem(htab, i);
216a891711dSBenjamin Tissoires if (btf_record_has_field(htab->map.record, BPF_TIMER))
217a891711dSBenjamin Tissoires bpf_obj_free_timer(htab->map.record,
218a891711dSBenjamin Tissoires elem->key + round_up(htab->map.key_size, 8));
219a891711dSBenjamin Tissoires if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
220246331e3SBenjamin Tissoires bpf_obj_free_workqueue(htab->map.record,
221246331e3SBenjamin Tissoires elem->key + round_up(htab->map.key_size, 8));
222246331e3SBenjamin Tissoires cond_resched();
223246331e3SBenjamin Tissoires }
224246331e3SBenjamin Tissoires }
225246331e3SBenjamin Tissoires
htab_free_prealloced_fields(struct bpf_htab * htab)226aa3496acSKumar Kartikeya Dwivedi static void htab_free_prealloced_fields(struct bpf_htab *htab)
22714a324f6SKumar Kartikeya Dwivedi {
22814a324f6SKumar Kartikeya Dwivedi u32 num_entries = htab->map.max_entries;
22914a324f6SKumar Kartikeya Dwivedi int i;
23014a324f6SKumar Kartikeya Dwivedi
231aa3496acSKumar Kartikeya Dwivedi if (IS_ERR_OR_NULL(htab->map.record))
23214a324f6SKumar Kartikeya Dwivedi return;
23314a324f6SKumar Kartikeya Dwivedi if (htab_has_extra_elems(htab))
23414a324f6SKumar Kartikeya Dwivedi num_entries += num_possible_cpus();
23514a324f6SKumar Kartikeya Dwivedi for (i = 0; i < num_entries; i++) {
23614a324f6SKumar Kartikeya Dwivedi struct htab_elem *elem;
23714a324f6SKumar Kartikeya Dwivedi
23814a324f6SKumar Kartikeya Dwivedi elem = get_htab_elem(htab, i);
23965334e64SKumar Kartikeya Dwivedi if (htab_is_percpu(htab)) {
24065334e64SKumar Kartikeya Dwivedi void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size);
24165334e64SKumar Kartikeya Dwivedi int cpu;
24265334e64SKumar Kartikeya Dwivedi
24365334e64SKumar Kartikeya Dwivedi for_each_possible_cpu(cpu) {
24465334e64SKumar Kartikeya Dwivedi bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
24565334e64SKumar Kartikeya Dwivedi cond_resched();
24665334e64SKumar Kartikeya Dwivedi }
24765334e64SKumar Kartikeya Dwivedi } else {
248aa3496acSKumar Kartikeya Dwivedi bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
24914a324f6SKumar Kartikeya Dwivedi cond_resched();
25014a324f6SKumar Kartikeya Dwivedi }
25165334e64SKumar Kartikeya Dwivedi cond_resched();
25265334e64SKumar Kartikeya Dwivedi }
25314a324f6SKumar Kartikeya Dwivedi }
25414a324f6SKumar Kartikeya Dwivedi
htab_free_elems(struct bpf_htab * htab)2556c905981SAlexei Starovoitov static void htab_free_elems(struct bpf_htab *htab)
2566c905981SAlexei Starovoitov {
2576c905981SAlexei Starovoitov int i;
2586c905981SAlexei Starovoitov
2598f844938SMartin KaFai Lau if (!htab_is_percpu(htab))
2606c905981SAlexei Starovoitov goto free_elems;
2616c905981SAlexei Starovoitov
2626c905981SAlexei Starovoitov for (i = 0; i < htab->map.max_entries; i++) {
2636c905981SAlexei Starovoitov void __percpu *pptr;
2646c905981SAlexei Starovoitov
2656c905981SAlexei Starovoitov pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
2666c905981SAlexei Starovoitov htab->map.key_size);
2676c905981SAlexei Starovoitov free_percpu(pptr);
2689147efcbSEric Dumazet cond_resched();
2696c905981SAlexei Starovoitov }
2706c905981SAlexei Starovoitov free_elems:
271d407bd25SDaniel Borkmann bpf_map_area_free(htab->elems);
2726c905981SAlexei Starovoitov }
2736c905981SAlexei Starovoitov
274b9aff38dSYonghong Song /* The LRU list has a lock (lru_lock). Each htab bucket has a lock
275b9aff38dSYonghong Song * (bucket_lock). If both locks need to be acquired together, the lock
276b9aff38dSYonghong Song * order is always lru_lock -> bucket_lock and this only happens in
277b9aff38dSYonghong Song * bpf_lru_list.c logic. For example, certain code path of
278b9aff38dSYonghong Song * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
279b9aff38dSYonghong Song * will acquire lru_lock first followed by acquiring bucket_lock.
280b9aff38dSYonghong Song *
281b9aff38dSYonghong Song * In hashtab.c, to avoid deadlock, lock acquisition of
282b9aff38dSYonghong Song * bucket_lock followed by lru_lock is not allowed. In such cases,
283b9aff38dSYonghong Song * bucket_lock needs to be released first before acquiring lru_lock.
284b9aff38dSYonghong Song */
prealloc_lru_pop(struct bpf_htab * htab,void * key,u32 hash)28529ba732aSMartin KaFai Lau static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
28629ba732aSMartin KaFai Lau u32 hash)
28729ba732aSMartin KaFai Lau {
28829ba732aSMartin KaFai Lau struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash);
28929ba732aSMartin KaFai Lau struct htab_elem *l;
29029ba732aSMartin KaFai Lau
29129ba732aSMartin KaFai Lau if (node) {
2929bc421b6SAnton Protopopov bpf_map_inc_elem_count(&htab->map);
29329ba732aSMartin KaFai Lau l = container_of(node, struct htab_elem, lru_node);
294275c30bcSKumar Kartikeya Dwivedi memcpy(l->key, key, htab->map.key_size);
29529ba732aSMartin KaFai Lau return l;
29629ba732aSMartin KaFai Lau }
29729ba732aSMartin KaFai Lau
29829ba732aSMartin KaFai Lau return NULL;
29929ba732aSMartin KaFai Lau }
30029ba732aSMartin KaFai Lau
prealloc_init(struct bpf_htab * htab)30129ba732aSMartin KaFai Lau static int prealloc_init(struct bpf_htab *htab)
3026c905981SAlexei Starovoitov {
3038c290e60SAlexei Starovoitov u32 num_entries = htab->map.max_entries;
3046c905981SAlexei Starovoitov int err = -ENOMEM, i;
3056c905981SAlexei Starovoitov
30668134668SAlexei Starovoitov if (htab_has_extra_elems(htab))
3078c290e60SAlexei Starovoitov num_entries += num_possible_cpus();
3088c290e60SAlexei Starovoitov
309e1868b9eSEric Dumazet htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
31096eabe7aSMartin KaFai Lau htab->map.numa_node);
3116c905981SAlexei Starovoitov if (!htab->elems)
3126c905981SAlexei Starovoitov return -ENOMEM;
3136c905981SAlexei Starovoitov
3148f844938SMartin KaFai Lau if (!htab_is_percpu(htab))
3156c905981SAlexei Starovoitov goto skip_percpu_elems;
3166c905981SAlexei Starovoitov
3178c290e60SAlexei Starovoitov for (i = 0; i < num_entries; i++) {
3186c905981SAlexei Starovoitov u32 size = round_up(htab->map.value_size, 8);
3196c905981SAlexei Starovoitov void __percpu *pptr;
3206c905981SAlexei Starovoitov
32188145681SRoman Gushchin pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
32288145681SRoman Gushchin GFP_USER | __GFP_NOWARN);
3236c905981SAlexei Starovoitov if (!pptr)
3246c905981SAlexei Starovoitov goto free_elems;
3256c905981SAlexei Starovoitov htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
3266c905981SAlexei Starovoitov pptr);
3279147efcbSEric Dumazet cond_resched();
3286c905981SAlexei Starovoitov }
3296c905981SAlexei Starovoitov
3306c905981SAlexei Starovoitov skip_percpu_elems:
33129ba732aSMartin KaFai Lau if (htab_is_lru(htab))
33229ba732aSMartin KaFai Lau err = bpf_lru_init(&htab->lru,
33329ba732aSMartin KaFai Lau htab->map.map_flags & BPF_F_NO_COMMON_LRU,
33429ba732aSMartin KaFai Lau offsetof(struct htab_elem, hash) -
33529ba732aSMartin KaFai Lau offsetof(struct htab_elem, lru_node),
33629ba732aSMartin KaFai Lau htab_lru_map_delete_node,
33729ba732aSMartin KaFai Lau htab);
33829ba732aSMartin KaFai Lau else
3396c905981SAlexei Starovoitov err = pcpu_freelist_init(&htab->freelist);
34029ba732aSMartin KaFai Lau
3416c905981SAlexei Starovoitov if (err)
3426c905981SAlexei Starovoitov goto free_elems;
3436c905981SAlexei Starovoitov
34429ba732aSMartin KaFai Lau if (htab_is_lru(htab))
34529ba732aSMartin KaFai Lau bpf_lru_populate(&htab->lru, htab->elems,
34629ba732aSMartin KaFai Lau offsetof(struct htab_elem, lru_node),
3478c290e60SAlexei Starovoitov htab->elem_size, num_entries);
34829ba732aSMartin KaFai Lau else
3499f691549SAlexei Starovoitov pcpu_freelist_populate(&htab->freelist,
3509f691549SAlexei Starovoitov htab->elems + offsetof(struct htab_elem, fnode),
3518c290e60SAlexei Starovoitov htab->elem_size, num_entries);
35229ba732aSMartin KaFai Lau
3536c905981SAlexei Starovoitov return 0;
3546c905981SAlexei Starovoitov
3556c905981SAlexei Starovoitov free_elems:
3566c905981SAlexei Starovoitov htab_free_elems(htab);
3576c905981SAlexei Starovoitov return err;
3586c905981SAlexei Starovoitov }
3596c905981SAlexei Starovoitov
prealloc_destroy(struct bpf_htab * htab)36029ba732aSMartin KaFai Lau static void prealloc_destroy(struct bpf_htab *htab)
36129ba732aSMartin KaFai Lau {
36229ba732aSMartin KaFai Lau htab_free_elems(htab);
36329ba732aSMartin KaFai Lau
36429ba732aSMartin KaFai Lau if (htab_is_lru(htab))
36529ba732aSMartin KaFai Lau bpf_lru_destroy(&htab->lru);
36629ba732aSMartin KaFai Lau else
36729ba732aSMartin KaFai Lau pcpu_freelist_destroy(&htab->freelist);
36829ba732aSMartin KaFai Lau }
36929ba732aSMartin KaFai Lau
alloc_extra_elems(struct bpf_htab * htab)370a6ed3ea6SAlexei Starovoitov static int alloc_extra_elems(struct bpf_htab *htab)
371a6ed3ea6SAlexei Starovoitov {
3728c290e60SAlexei Starovoitov struct htab_elem *__percpu *pptr, *l_new;
3738c290e60SAlexei Starovoitov struct pcpu_freelist_node *l;
374a6ed3ea6SAlexei Starovoitov int cpu;
375a6ed3ea6SAlexei Starovoitov
37688145681SRoman Gushchin pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8,
3778c290e60SAlexei Starovoitov GFP_USER | __GFP_NOWARN);
378a6ed3ea6SAlexei Starovoitov if (!pptr)
379a6ed3ea6SAlexei Starovoitov return -ENOMEM;
380a6ed3ea6SAlexei Starovoitov
381a6ed3ea6SAlexei Starovoitov for_each_possible_cpu(cpu) {
3828c290e60SAlexei Starovoitov l = pcpu_freelist_pop(&htab->freelist);
3838c290e60SAlexei Starovoitov /* pop will succeed, since prealloc_init()
3848c290e60SAlexei Starovoitov * preallocated extra num_possible_cpus elements
3858c290e60SAlexei Starovoitov */
3868c290e60SAlexei Starovoitov l_new = container_of(l, struct htab_elem, fnode);
3878c290e60SAlexei Starovoitov *per_cpu_ptr(pptr, cpu) = l_new;
388a6ed3ea6SAlexei Starovoitov }
389a6ed3ea6SAlexei Starovoitov htab->extra_elems = pptr;
390a6ed3ea6SAlexei Starovoitov return 0;
391a6ed3ea6SAlexei Starovoitov }
392a6ed3ea6SAlexei Starovoitov
3930f8e4bd8SAlexei Starovoitov /* Called from syscall */
htab_map_alloc_check(union bpf_attr * attr)3949328e0d1SJakub Kicinski static int htab_map_alloc_check(union bpf_attr *attr)
3959328e0d1SJakub Kicinski {
3969328e0d1SJakub Kicinski bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
3979328e0d1SJakub Kicinski attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
3989328e0d1SJakub Kicinski bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
3999328e0d1SJakub Kicinski attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
4009328e0d1SJakub Kicinski /* percpu_lru means each cpu has its own LRU list.
4019328e0d1SJakub Kicinski * it is different from BPF_MAP_TYPE_PERCPU_HASH where
4029328e0d1SJakub Kicinski * the map's value itself is percpu. percpu_lru has
4039328e0d1SJakub Kicinski * nothing to do with the map's value.
4049328e0d1SJakub Kicinski */
4059328e0d1SJakub Kicinski bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
4069328e0d1SJakub Kicinski bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
40796b3b6c9SLorenz Bauer bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
4089328e0d1SJakub Kicinski int numa_node = bpf_map_attr_numa_node(attr);
4099328e0d1SJakub Kicinski
4109328e0d1SJakub Kicinski BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
4119328e0d1SJakub Kicinski offsetof(struct htab_elem, hash_node.pprev));
4129328e0d1SJakub Kicinski
41396b3b6c9SLorenz Bauer if (zero_seed && !capable(CAP_SYS_ADMIN))
41496b3b6c9SLorenz Bauer /* Guard against local DoS, and discourage production use. */
41596b3b6c9SLorenz Bauer return -EPERM;
41696b3b6c9SLorenz Bauer
417591fe988SDaniel Borkmann if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
418591fe988SDaniel Borkmann !bpf_map_flags_access_ok(attr->map_flags))
4199328e0d1SJakub Kicinski return -EINVAL;
4209328e0d1SJakub Kicinski
4219328e0d1SJakub Kicinski if (!lru && percpu_lru)
4229328e0d1SJakub Kicinski return -EINVAL;
4239328e0d1SJakub Kicinski
4249328e0d1SJakub Kicinski if (lru && !prealloc)
4259328e0d1SJakub Kicinski return -ENOTSUPP;
4269328e0d1SJakub Kicinski
4279328e0d1SJakub Kicinski if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
4289328e0d1SJakub Kicinski return -EINVAL;
4299328e0d1SJakub Kicinski
4309328e0d1SJakub Kicinski /* check sanity of attributes.
4319328e0d1SJakub Kicinski * value_size == 0 may be allowed in the future to use map as a set
4329328e0d1SJakub Kicinski */
4339328e0d1SJakub Kicinski if (attr->max_entries == 0 || attr->key_size == 0 ||
4349328e0d1SJakub Kicinski attr->value_size == 0)
4359328e0d1SJakub Kicinski return -EINVAL;
4369328e0d1SJakub Kicinski
437c6bde958SFlorian Lehner if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE -
438c6bde958SFlorian Lehner sizeof(struct htab_elem))
439c6bde958SFlorian Lehner /* if key_size + value_size is bigger, the user space won't be
440c6bde958SFlorian Lehner * able to access the elements via bpf syscall. This check
441c6bde958SFlorian Lehner * also makes sure that the elem_size doesn't overflow and it's
4429328e0d1SJakub Kicinski * kmalloc-able later in htab_map_update_elem()
4439328e0d1SJakub Kicinski */
4449328e0d1SJakub Kicinski return -E2BIG;
4451d244784STao Chen /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
4461d244784STao Chen if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
4471d244784STao Chen return -E2BIG;
4489328e0d1SJakub Kicinski
4499328e0d1SJakub Kicinski return 0;
4509328e0d1SJakub Kicinski }
4519328e0d1SJakub Kicinski
htab_map_alloc(union bpf_attr * attr)4520f8e4bd8SAlexei Starovoitov static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
4530f8e4bd8SAlexei Starovoitov {
4548f844938SMartin KaFai Lau bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
4558f844938SMartin KaFai Lau attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
4568f844938SMartin KaFai Lau bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
4578f844938SMartin KaFai Lau attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
45829ba732aSMartin KaFai Lau /* percpu_lru means each cpu has its own LRU list.
45929ba732aSMartin KaFai Lau * it is different from BPF_MAP_TYPE_PERCPU_HASH where
46029ba732aSMartin KaFai Lau * the map's value itself is percpu. percpu_lru has
46129ba732aSMartin KaFai Lau * nothing to do with the map's value.
46229ba732aSMartin KaFai Lau */
46329ba732aSMartin KaFai Lau bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
46429ba732aSMartin KaFai Lau bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
4650f8e4bd8SAlexei Starovoitov struct bpf_htab *htab;
4664fa8d68aSKumar Kartikeya Dwivedi int err;
4670f8e4bd8SAlexei Starovoitov
46873cf09a3SYafang Shao htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE);
4690f8e4bd8SAlexei Starovoitov if (!htab)
4700f8e4bd8SAlexei Starovoitov return ERR_PTR(-ENOMEM);
4710f8e4bd8SAlexei Starovoitov
472bd475643SJakub Kicinski bpf_map_init_from_attr(&htab->map, attr);
4730f8e4bd8SAlexei Starovoitov
47429ba732aSMartin KaFai Lau if (percpu_lru) {
47529ba732aSMartin KaFai Lau /* ensure each CPU's lru list has >=1 elements.
47629ba732aSMartin KaFai Lau * since we are at it, make each lru list has the same
47729ba732aSMartin KaFai Lau * number of elements.
47829ba732aSMartin KaFai Lau */
47929ba732aSMartin KaFai Lau htab->map.max_entries = roundup(attr->max_entries,
48029ba732aSMartin KaFai Lau num_possible_cpus());
48129ba732aSMartin KaFai Lau if (htab->map.max_entries < attr->max_entries)
48229ba732aSMartin KaFai Lau htab->map.max_entries = rounddown(attr->max_entries,
48329ba732aSMartin KaFai Lau num_possible_cpus());
48429ba732aSMartin KaFai Lau }
48529ba732aSMartin KaFai Lau
4866787d916SToke Høiland-Jørgensen /* hash table size must be power of 2; roundup_pow_of_two() can overflow
4876787d916SToke Høiland-Jørgensen * into UB on 32-bit arches, so check that first
4886787d916SToke Høiland-Jørgensen */
4896787d916SToke Høiland-Jørgensen err = -E2BIG;
4906787d916SToke Høiland-Jørgensen if (htab->map.max_entries > 1UL << 31)
4916787d916SToke Høiland-Jørgensen goto free_htab;
4926787d916SToke Høiland-Jørgensen
4930f8e4bd8SAlexei Starovoitov htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
4940f8e4bd8SAlexei Starovoitov
49501b3f521SAlexei Starovoitov htab->elem_size = sizeof(struct htab_elem) +
496824bd0ceSAlexei Starovoitov round_up(htab->map.key_size, 8);
497824bd0ceSAlexei Starovoitov if (percpu)
498824bd0ceSAlexei Starovoitov htab->elem_size += sizeof(void *);
499824bd0ceSAlexei Starovoitov else
5006c905981SAlexei Starovoitov htab->elem_size += round_up(htab->map.value_size, 8);
50101b3f521SAlexei Starovoitov
5026787d916SToke Høiland-Jørgensen /* check for u32 overflow */
5036787d916SToke Høiland-Jørgensen if (htab->n_buckets > U32_MAX / sizeof(struct bucket))
504daaf427cSAlexei Starovoitov goto free_htab;
505daaf427cSAlexei Starovoitov
5069bc421b6SAnton Protopopov err = bpf_map_init_elem_count(&htab->map);
5079bc421b6SAnton Protopopov if (err)
5089bc421b6SAnton Protopopov goto free_htab;
5099bc421b6SAnton Protopopov
51001b3f521SAlexei Starovoitov err = -ENOMEM;
511d407bd25SDaniel Borkmann htab->buckets = bpf_map_area_alloc(htab->n_buckets *
51296eabe7aSMartin KaFai Lau sizeof(struct bucket),
51396eabe7aSMartin KaFai Lau htab->map.numa_node);
5140f8e4bd8SAlexei Starovoitov if (!htab->buckets)
5159bc421b6SAnton Protopopov goto free_elem_count;
5160f8e4bd8SAlexei Starovoitov
51796b3b6c9SLorenz Bauer if (htab->map.map_flags & BPF_F_ZERO_SEED)
51896b3b6c9SLorenz Bauer htab->hashrnd = 0;
51996b3b6c9SLorenz Bauer else
520a251c17aSJason A. Donenfeld htab->hashrnd = get_random_u32();
52196b3b6c9SLorenz Bauer
522d01f9b19SThomas Gleixner htab_init_buckets(htab);
5230f8e4bd8SAlexei Starovoitov
52486fe28f7SAlexei Starovoitov /* compute_batch_value() computes batch value as num_online_cpus() * 2
52586fe28f7SAlexei Starovoitov * and __percpu_counter_compare() needs
52686fe28f7SAlexei Starovoitov * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus()
52786fe28f7SAlexei Starovoitov * for percpu_counter to be faster than atomic_t. In practice the average bpf
52886fe28f7SAlexei Starovoitov * hash map size is 10k, which means that a system with 64 cpus will fill
52986fe28f7SAlexei Starovoitov * hashmap to 20% of 10k before percpu_counter becomes ineffective. Therefore
53086fe28f7SAlexei Starovoitov * define our own batch count as 32 then 10k hash map can be filled up to 80%:
53186fe28f7SAlexei Starovoitov * 10k - 8k > 32 _batch_ * 64 _cpus_
53286fe28f7SAlexei Starovoitov * and __percpu_counter_compare() will still be fast. At that point hash map
53386fe28f7SAlexei Starovoitov * collisions will dominate its performance anyway. Assume that hash map filled
53486fe28f7SAlexei Starovoitov * to 50+% isn't going to be O(1) and use the following formula to choose
53586fe28f7SAlexei Starovoitov * between percpu_counter and atomic_t.
53686fe28f7SAlexei Starovoitov */
53786fe28f7SAlexei Starovoitov #define PERCPU_COUNTER_BATCH 32
53886fe28f7SAlexei Starovoitov if (attr->max_entries / 2 > num_online_cpus() * PERCPU_COUNTER_BATCH)
53986fe28f7SAlexei Starovoitov htab->use_percpu_counter = true;
54086fe28f7SAlexei Starovoitov
54186fe28f7SAlexei Starovoitov if (htab->use_percpu_counter) {
54286fe28f7SAlexei Starovoitov err = percpu_counter_init(&htab->pcount, 0, GFP_KERNEL);
54386fe28f7SAlexei Starovoitov if (err)
54486fe28f7SAlexei Starovoitov goto free_map_locked;
54586fe28f7SAlexei Starovoitov }
54686fe28f7SAlexei Starovoitov
5478c290e60SAlexei Starovoitov if (prealloc) {
5488c290e60SAlexei Starovoitov err = prealloc_init(htab);
5498c290e60SAlexei Starovoitov if (err)
55020b6cc34SSong Liu goto free_map_locked;
5518c290e60SAlexei Starovoitov
55229ba732aSMartin KaFai Lau if (!percpu && !lru) {
55329ba732aSMartin KaFai Lau /* lru itself can remove the least used element, so
55429ba732aSMartin KaFai Lau * there is no need for an extra elem during map_update.
55529ba732aSMartin KaFai Lau */
556a6ed3ea6SAlexei Starovoitov err = alloc_extra_elems(htab);
5576c905981SAlexei Starovoitov if (err)
5588c290e60SAlexei Starovoitov goto free_prealloc;
5596c905981SAlexei Starovoitov }
560fba1a1c6SAlexei Starovoitov } else {
5614ab67149SAlexei Starovoitov err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false);
562fba1a1c6SAlexei Starovoitov if (err)
563fba1a1c6SAlexei Starovoitov goto free_map_locked;
564ee4ed53cSAlexei Starovoitov if (percpu) {
565ee4ed53cSAlexei Starovoitov err = bpf_mem_alloc_init(&htab->pcpu_ma,
566ee4ed53cSAlexei Starovoitov round_up(htab->map.value_size, 8), true);
567ee4ed53cSAlexei Starovoitov if (err)
568ee4ed53cSAlexei Starovoitov goto free_map_locked;
569ee4ed53cSAlexei Starovoitov }
570a6ed3ea6SAlexei Starovoitov }
571a6ed3ea6SAlexei Starovoitov
5720f8e4bd8SAlexei Starovoitov return &htab->map;
5730f8e4bd8SAlexei Starovoitov
5748c290e60SAlexei Starovoitov free_prealloc:
5758c290e60SAlexei Starovoitov prealloc_destroy(htab);
57620b6cc34SSong Liu free_map_locked:
577cf7de6a5STetsuo Handa if (htab->use_percpu_counter)
578cf7de6a5STetsuo Handa percpu_counter_destroy(&htab->pcount);
579d407bd25SDaniel Borkmann bpf_map_area_free(htab->buckets);
580ee4ed53cSAlexei Starovoitov bpf_mem_alloc_destroy(&htab->pcpu_ma);
581fba1a1c6SAlexei Starovoitov bpf_mem_alloc_destroy(&htab->ma);
5829bc421b6SAnton Protopopov free_elem_count:
5839bc421b6SAnton Protopopov bpf_map_free_elem_count(&htab->map);
5840f8e4bd8SAlexei Starovoitov free_htab:
58573cf09a3SYafang Shao bpf_map_area_free(htab);
5860f8e4bd8SAlexei Starovoitov return ERR_PTR(err);
5870f8e4bd8SAlexei Starovoitov }
5880f8e4bd8SAlexei Starovoitov
htab_map_hash(const void * key,u32 key_len,u32 hashrnd)589c0203475SDaniel Borkmann static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
5900f8e4bd8SAlexei Starovoitov {
5915b85575aSAnton Protopopov if (likely(key_len % 4 == 0))
5925b85575aSAnton Protopopov return jhash2(key, key_len / 4, hashrnd);
593c0203475SDaniel Borkmann return jhash(key, key_len, hashrnd);
5940f8e4bd8SAlexei Starovoitov }
5950f8e4bd8SAlexei Starovoitov
__select_bucket(struct bpf_htab * htab,u32 hash)596688ecfe6S[email protected] static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
5970f8e4bd8SAlexei Starovoitov {
5980f8e4bd8SAlexei Starovoitov return &htab->buckets[hash & (htab->n_buckets - 1)];
5990f8e4bd8SAlexei Starovoitov }
6000f8e4bd8SAlexei Starovoitov
select_bucket(struct bpf_htab * htab,u32 hash)6014fe84359SAlexei Starovoitov static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
602688ecfe6S[email protected] {
603688ecfe6S[email protected] return &__select_bucket(htab, hash)->head;
604688ecfe6S[email protected] }
605688ecfe6S[email protected]
6064fe84359SAlexei Starovoitov /* this lookup function can only be called with bucket lock taken */
lookup_elem_raw(struct hlist_nulls_head * head,u32 hash,void * key,u32 key_size)6074fe84359SAlexei Starovoitov static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
6080f8e4bd8SAlexei Starovoitov void *key, u32 key_size)
6090f8e4bd8SAlexei Starovoitov {
6104fe84359SAlexei Starovoitov struct hlist_nulls_node *n;
6110f8e4bd8SAlexei Starovoitov struct htab_elem *l;
6120f8e4bd8SAlexei Starovoitov
6134fe84359SAlexei Starovoitov hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
6140f8e4bd8SAlexei Starovoitov if (l->hash == hash && !memcmp(&l->key, key, key_size))
6150f8e4bd8SAlexei Starovoitov return l;
6160f8e4bd8SAlexei Starovoitov
6170f8e4bd8SAlexei Starovoitov return NULL;
6180f8e4bd8SAlexei Starovoitov }
6190f8e4bd8SAlexei Starovoitov
6204fe84359SAlexei Starovoitov /* can be called without bucket lock. it will repeat the loop in
6214fe84359SAlexei Starovoitov * the unlikely event when elements moved from one bucket into another
6224fe84359SAlexei Starovoitov * while link list is being walked
6234fe84359SAlexei Starovoitov */
lookup_nulls_elem_raw(struct hlist_nulls_head * head,u32 hash,void * key,u32 key_size,u32 n_buckets)6244fe84359SAlexei Starovoitov static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
6254fe84359SAlexei Starovoitov u32 hash, void *key,
6264fe84359SAlexei Starovoitov u32 key_size, u32 n_buckets)
6274fe84359SAlexei Starovoitov {
6284fe84359SAlexei Starovoitov struct hlist_nulls_node *n;
6294fe84359SAlexei Starovoitov struct htab_elem *l;
6304fe84359SAlexei Starovoitov
6314fe84359SAlexei Starovoitov again:
6324fe84359SAlexei Starovoitov hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
6334fe84359SAlexei Starovoitov if (l->hash == hash && !memcmp(&l->key, key, key_size))
6344fe84359SAlexei Starovoitov return l;
6354fe84359SAlexei Starovoitov
6364fe84359SAlexei Starovoitov if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
6374fe84359SAlexei Starovoitov goto again;
6384fe84359SAlexei Starovoitov
6394fe84359SAlexei Starovoitov return NULL;
6404fe84359SAlexei Starovoitov }
6414fe84359SAlexei Starovoitov
6429015d2f5SAlexei Starovoitov /* Called from syscall or from eBPF program directly, so
6439015d2f5SAlexei Starovoitov * arguments have to match bpf_map_lookup_elem() exactly.
6449015d2f5SAlexei Starovoitov * The return value is adjusted by BPF instructions
6459015d2f5SAlexei Starovoitov * in htab_map_gen_lookup().
6469015d2f5SAlexei Starovoitov */
__htab_map_lookup_elem(struct bpf_map * map,void * key)647824bd0ceSAlexei Starovoitov static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
6480f8e4bd8SAlexei Starovoitov {
6490f8e4bd8SAlexei Starovoitov struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
6504fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
6510f8e4bd8SAlexei Starovoitov struct htab_elem *l;
6520f8e4bd8SAlexei Starovoitov u32 hash, key_size;
6530f8e4bd8SAlexei Starovoitov
654694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
655694cea39SToke Høiland-Jørgensen !rcu_read_lock_bh_held());
6560f8e4bd8SAlexei Starovoitov
6570f8e4bd8SAlexei Starovoitov key_size = map->key_size;
6580f8e4bd8SAlexei Starovoitov
659c0203475SDaniel Borkmann hash = htab_map_hash(key, key_size, htab->hashrnd);
6600f8e4bd8SAlexei Starovoitov
6610f8e4bd8SAlexei Starovoitov head = select_bucket(htab, hash);
6620f8e4bd8SAlexei Starovoitov
6634fe84359SAlexei Starovoitov l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
6640f8e4bd8SAlexei Starovoitov
665824bd0ceSAlexei Starovoitov return l;
666824bd0ceSAlexei Starovoitov }
667824bd0ceSAlexei Starovoitov
htab_map_lookup_elem(struct bpf_map * map,void * key)668824bd0ceSAlexei Starovoitov static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
669824bd0ceSAlexei Starovoitov {
670824bd0ceSAlexei Starovoitov struct htab_elem *l = __htab_map_lookup_elem(map, key);
671824bd0ceSAlexei Starovoitov
6720f8e4bd8SAlexei Starovoitov if (l)
6730f8e4bd8SAlexei Starovoitov return l->key + round_up(map->key_size, 8);
6740f8e4bd8SAlexei Starovoitov
6750f8e4bd8SAlexei Starovoitov return NULL;
6760f8e4bd8SAlexei Starovoitov }
6770f8e4bd8SAlexei Starovoitov
6789015d2f5SAlexei Starovoitov /* inline bpf_map_lookup_elem() call.
6799015d2f5SAlexei Starovoitov * Instead of:
6809015d2f5SAlexei Starovoitov * bpf_prog
6819015d2f5SAlexei Starovoitov * bpf_map_lookup_elem
6829015d2f5SAlexei Starovoitov * map->ops->map_lookup_elem
6839015d2f5SAlexei Starovoitov * htab_map_lookup_elem
6849015d2f5SAlexei Starovoitov * __htab_map_lookup_elem
6859015d2f5SAlexei Starovoitov * do:
6869015d2f5SAlexei Starovoitov * bpf_prog
6879015d2f5SAlexei Starovoitov * __htab_map_lookup_elem
6889015d2f5SAlexei Starovoitov */
htab_map_gen_lookup(struct bpf_map * map,struct bpf_insn * insn_buf)6894a8f87e6SDaniel Borkmann static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
6909015d2f5SAlexei Starovoitov {
6919015d2f5SAlexei Starovoitov struct bpf_insn *insn = insn_buf;
6929015d2f5SAlexei Starovoitov const int ret = BPF_REG_0;
6939015d2f5SAlexei Starovoitov
69409772d92SDaniel Borkmann BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
69509772d92SDaniel Borkmann (void *(*)(struct bpf_map *map, void *key))NULL));
6963d717fadSKees Cook *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
6979015d2f5SAlexei Starovoitov *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
6989015d2f5SAlexei Starovoitov *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
6999015d2f5SAlexei Starovoitov offsetof(struct htab_elem, key) +
7009015d2f5SAlexei Starovoitov round_up(map->key_size, 8));
7019015d2f5SAlexei Starovoitov return insn - insn_buf;
7029015d2f5SAlexei Starovoitov }
7039015d2f5SAlexei Starovoitov
__htab_lru_map_lookup_elem(struct bpf_map * map,void * key,const bool mark)70450b045a8SDaniel Borkmann static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
70550b045a8SDaniel Borkmann void *key, const bool mark)
70629ba732aSMartin KaFai Lau {
70729ba732aSMartin KaFai Lau struct htab_elem *l = __htab_map_lookup_elem(map, key);
70829ba732aSMartin KaFai Lau
70929ba732aSMartin KaFai Lau if (l) {
71050b045a8SDaniel Borkmann if (mark)
71129ba732aSMartin KaFai Lau bpf_lru_node_set_ref(&l->lru_node);
71229ba732aSMartin KaFai Lau return l->key + round_up(map->key_size, 8);
71329ba732aSMartin KaFai Lau }
71429ba732aSMartin KaFai Lau
71529ba732aSMartin KaFai Lau return NULL;
71629ba732aSMartin KaFai Lau }
71729ba732aSMartin KaFai Lau
htab_lru_map_lookup_elem(struct bpf_map * map,void * key)71850b045a8SDaniel Borkmann static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
71950b045a8SDaniel Borkmann {
72050b045a8SDaniel Borkmann return __htab_lru_map_lookup_elem(map, key, true);
72150b045a8SDaniel Borkmann }
72250b045a8SDaniel Borkmann
htab_lru_map_lookup_elem_sys(struct bpf_map * map,void * key)72350b045a8SDaniel Borkmann static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
72450b045a8SDaniel Borkmann {
72550b045a8SDaniel Borkmann return __htab_lru_map_lookup_elem(map, key, false);
72650b045a8SDaniel Borkmann }
72750b045a8SDaniel Borkmann
htab_lru_map_gen_lookup(struct bpf_map * map,struct bpf_insn * insn_buf)7284a8f87e6SDaniel Borkmann static int htab_lru_map_gen_lookup(struct bpf_map *map,
729cc555421SMartin KaFai Lau struct bpf_insn *insn_buf)
730cc555421SMartin KaFai Lau {
731cc555421SMartin KaFai Lau struct bpf_insn *insn = insn_buf;
732cc555421SMartin KaFai Lau const int ret = BPF_REG_0;
733bb9b9f88SMartin KaFai Lau const int ref_reg = BPF_REG_1;
734cc555421SMartin KaFai Lau
73509772d92SDaniel Borkmann BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
73609772d92SDaniel Borkmann (void *(*)(struct bpf_map *map, void *key))NULL));
7373d717fadSKees Cook *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
738bb9b9f88SMartin KaFai Lau *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
739bb9b9f88SMartin KaFai Lau *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
740bb9b9f88SMartin KaFai Lau offsetof(struct htab_elem, lru_node) +
741bb9b9f88SMartin KaFai Lau offsetof(struct bpf_lru_node, ref));
742bb9b9f88SMartin KaFai Lau *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1);
743cc555421SMartin KaFai Lau *insn++ = BPF_ST_MEM(BPF_B, ret,
744cc555421SMartin KaFai Lau offsetof(struct htab_elem, lru_node) +
745cc555421SMartin KaFai Lau offsetof(struct bpf_lru_node, ref),
746cc555421SMartin KaFai Lau 1);
747cc555421SMartin KaFai Lau *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
748cc555421SMartin KaFai Lau offsetof(struct htab_elem, key) +
749cc555421SMartin KaFai Lau round_up(map->key_size, 8));
750cc555421SMartin KaFai Lau return insn - insn_buf;
751cc555421SMartin KaFai Lau }
752cc555421SMartin KaFai Lau
check_and_free_fields(struct bpf_htab * htab,struct htab_elem * elem)75314a324f6SKumar Kartikeya Dwivedi static void check_and_free_fields(struct bpf_htab *htab,
75414a324f6SKumar Kartikeya Dwivedi struct htab_elem *elem)
75568134668SAlexei Starovoitov {
756bb2243f4SHou Tao if (IS_ERR_OR_NULL(htab->map.record))
757bb2243f4SHou Tao return;
758bb2243f4SHou Tao
75965334e64SKumar Kartikeya Dwivedi if (htab_is_percpu(htab)) {
76065334e64SKumar Kartikeya Dwivedi void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size);
76165334e64SKumar Kartikeya Dwivedi int cpu;
76265334e64SKumar Kartikeya Dwivedi
76365334e64SKumar Kartikeya Dwivedi for_each_possible_cpu(cpu)
76465334e64SKumar Kartikeya Dwivedi bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
76565334e64SKumar Kartikeya Dwivedi } else {
76614a324f6SKumar Kartikeya Dwivedi void *map_value = elem->key + round_up(htab->map.key_size, 8);
76714a324f6SKumar Kartikeya Dwivedi
768aa3496acSKumar Kartikeya Dwivedi bpf_obj_free_fields(htab->map.record, map_value);
76968134668SAlexei Starovoitov }
77065334e64SKumar Kartikeya Dwivedi }
77168134668SAlexei Starovoitov
77229ba732aSMartin KaFai Lau /* It is called from the bpf_lru_list when the LRU needs to delete
77329ba732aSMartin KaFai Lau * older elements from the htab.
77429ba732aSMartin KaFai Lau */
htab_lru_map_delete_node(void * arg,struct bpf_lru_node * node)77529ba732aSMartin KaFai Lau static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
77629ba732aSMartin KaFai Lau {
777241d50ecSYu Zhe struct bpf_htab *htab = arg;
7784fe84359SAlexei Starovoitov struct htab_elem *l = NULL, *tgt_l;
7794fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
7804fe84359SAlexei Starovoitov struct hlist_nulls_node *n;
78129ba732aSMartin KaFai Lau unsigned long flags;
78229ba732aSMartin KaFai Lau struct bucket *b;
78320b6cc34SSong Liu int ret;
78429ba732aSMartin KaFai Lau
78529ba732aSMartin KaFai Lau tgt_l = container_of(node, struct htab_elem, lru_node);
78629ba732aSMartin KaFai Lau b = __select_bucket(htab, tgt_l->hash);
78729ba732aSMartin KaFai Lau head = &b->head;
78829ba732aSMartin KaFai Lau
7894fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &flags);
79020b6cc34SSong Liu if (ret)
79120b6cc34SSong Liu return false;
79229ba732aSMartin KaFai Lau
7934fe84359SAlexei Starovoitov hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
79429ba732aSMartin KaFai Lau if (l == tgt_l) {
7954fe84359SAlexei Starovoitov hlist_nulls_del_rcu(&l->hash_node);
7969bc421b6SAnton Protopopov bpf_map_dec_elem_count(&htab->map);
79729ba732aSMartin KaFai Lau break;
79829ba732aSMartin KaFai Lau }
79929ba732aSMartin KaFai Lau
8004fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
80129ba732aSMartin KaFai Lau
80245dc92c3SHou Tao if (l == tgt_l)
80345dc92c3SHou Tao check_and_free_fields(htab, l);
80429ba732aSMartin KaFai Lau return l == tgt_l;
80529ba732aSMartin KaFai Lau }
80629ba732aSMartin KaFai Lau
8070f8e4bd8SAlexei Starovoitov /* Called from syscall */
htab_map_get_next_key(struct bpf_map * map,void * key,void * next_key)8080f8e4bd8SAlexei Starovoitov static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
8090f8e4bd8SAlexei Starovoitov {
8100f8e4bd8SAlexei Starovoitov struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
8114fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
8120f8e4bd8SAlexei Starovoitov struct htab_elem *l, *next_l;
8130f8e4bd8SAlexei Starovoitov u32 hash, key_size;
8148fe45924STeng Qin int i = 0;
8150f8e4bd8SAlexei Starovoitov
8160f8e4bd8SAlexei Starovoitov WARN_ON_ONCE(!rcu_read_lock_held());
8170f8e4bd8SAlexei Starovoitov
8180f8e4bd8SAlexei Starovoitov key_size = map->key_size;
8190f8e4bd8SAlexei Starovoitov
8208fe45924STeng Qin if (!key)
8218fe45924STeng Qin goto find_first_elem;
8228fe45924STeng Qin
823c0203475SDaniel Borkmann hash = htab_map_hash(key, key_size, htab->hashrnd);
8240f8e4bd8SAlexei Starovoitov
8250f8e4bd8SAlexei Starovoitov head = select_bucket(htab, hash);
8260f8e4bd8SAlexei Starovoitov
8270f8e4bd8SAlexei Starovoitov /* lookup the key */
8284fe84359SAlexei Starovoitov l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
8290f8e4bd8SAlexei Starovoitov
8308fe45924STeng Qin if (!l)
8310f8e4bd8SAlexei Starovoitov goto find_first_elem;
8320f8e4bd8SAlexei Starovoitov
8330f8e4bd8SAlexei Starovoitov /* key was found, get next key in the same bucket */
8344fe84359SAlexei Starovoitov next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
8350f8e4bd8SAlexei Starovoitov struct htab_elem, hash_node);
8360f8e4bd8SAlexei Starovoitov
8370f8e4bd8SAlexei Starovoitov if (next_l) {
8380f8e4bd8SAlexei Starovoitov /* if next elem in this hash list is non-zero, just return it */
8390f8e4bd8SAlexei Starovoitov memcpy(next_key, next_l->key, key_size);
8400f8e4bd8SAlexei Starovoitov return 0;
8410f8e4bd8SAlexei Starovoitov }
8420f8e4bd8SAlexei Starovoitov
8430f8e4bd8SAlexei Starovoitov /* no more elements in this hash list, go to the next bucket */
8440f8e4bd8SAlexei Starovoitov i = hash & (htab->n_buckets - 1);
8450f8e4bd8SAlexei Starovoitov i++;
8460f8e4bd8SAlexei Starovoitov
8470f8e4bd8SAlexei Starovoitov find_first_elem:
8480f8e4bd8SAlexei Starovoitov /* iterate over buckets */
8490f8e4bd8SAlexei Starovoitov for (; i < htab->n_buckets; i++) {
8500f8e4bd8SAlexei Starovoitov head = select_bucket(htab, i);
8510f8e4bd8SAlexei Starovoitov
8520f8e4bd8SAlexei Starovoitov /* pick first element in the bucket */
8534fe84359SAlexei Starovoitov next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
8540f8e4bd8SAlexei Starovoitov struct htab_elem, hash_node);
8550f8e4bd8SAlexei Starovoitov if (next_l) {
8560f8e4bd8SAlexei Starovoitov /* if it's not empty, just return it */
8570f8e4bd8SAlexei Starovoitov memcpy(next_key, next_l->key, key_size);
8580f8e4bd8SAlexei Starovoitov return 0;
8590f8e4bd8SAlexei Starovoitov }
8600f8e4bd8SAlexei Starovoitov }
8610f8e4bd8SAlexei Starovoitov
8626c905981SAlexei Starovoitov /* iterated over all buckets and all elements */
8630f8e4bd8SAlexei Starovoitov return -ENOENT;
8640f8e4bd8SAlexei Starovoitov }
8650f8e4bd8SAlexei Starovoitov
htab_elem_free(struct bpf_htab * htab,struct htab_elem * l)8666c905981SAlexei Starovoitov static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
867824bd0ceSAlexei Starovoitov {
86865334e64SKumar Kartikeya Dwivedi check_and_free_fields(htab, l);
869b9e9ed90SHou Tao
8706c905981SAlexei Starovoitov if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
871ee4ed53cSAlexei Starovoitov bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
872fba1a1c6SAlexei Starovoitov bpf_mem_cache_free(&htab->ma, l);
873824bd0ceSAlexei Starovoitov }
874824bd0ceSAlexei Starovoitov
htab_put_fd_value(struct bpf_htab * htab,struct htab_elem * l)8751d4e1eabSAndrii Nakryiko static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
876824bd0ceSAlexei Starovoitov {
877bcc6b1b7SMartin KaFai Lau struct bpf_map *map = &htab->map;
8781d4e1eabSAndrii Nakryiko void *ptr;
879bcc6b1b7SMartin KaFai Lau
880bcc6b1b7SMartin KaFai Lau if (map->ops->map_fd_put_ptr) {
8811d4e1eabSAndrii Nakryiko ptr = fd_htab_map_get_ptr(map, l);
88220c20bd1SHou Tao map->ops->map_fd_put_ptr(map, ptr, true);
883bcc6b1b7SMartin KaFai Lau }
8841d4e1eabSAndrii Nakryiko }
8851d4e1eabSAndrii Nakryiko
is_map_full(struct bpf_htab * htab)88686fe28f7SAlexei Starovoitov static bool is_map_full(struct bpf_htab *htab)
88786fe28f7SAlexei Starovoitov {
88886fe28f7SAlexei Starovoitov if (htab->use_percpu_counter)
88986fe28f7SAlexei Starovoitov return __percpu_counter_compare(&htab->pcount, htab->map.max_entries,
89086fe28f7SAlexei Starovoitov PERCPU_COUNTER_BATCH) >= 0;
89186fe28f7SAlexei Starovoitov return atomic_read(&htab->count) >= htab->map.max_entries;
89286fe28f7SAlexei Starovoitov }
89386fe28f7SAlexei Starovoitov
inc_elem_count(struct bpf_htab * htab)89486fe28f7SAlexei Starovoitov static void inc_elem_count(struct bpf_htab *htab)
89586fe28f7SAlexei Starovoitov {
8969bc421b6SAnton Protopopov bpf_map_inc_elem_count(&htab->map);
8979bc421b6SAnton Protopopov
89886fe28f7SAlexei Starovoitov if (htab->use_percpu_counter)
89986fe28f7SAlexei Starovoitov percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH);
90086fe28f7SAlexei Starovoitov else
90186fe28f7SAlexei Starovoitov atomic_inc(&htab->count);
90286fe28f7SAlexei Starovoitov }
90386fe28f7SAlexei Starovoitov
dec_elem_count(struct bpf_htab * htab)90486fe28f7SAlexei Starovoitov static void dec_elem_count(struct bpf_htab *htab)
90586fe28f7SAlexei Starovoitov {
9069bc421b6SAnton Protopopov bpf_map_dec_elem_count(&htab->map);
9079bc421b6SAnton Protopopov
90886fe28f7SAlexei Starovoitov if (htab->use_percpu_counter)
90986fe28f7SAlexei Starovoitov percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH);
91086fe28f7SAlexei Starovoitov else
91186fe28f7SAlexei Starovoitov atomic_dec(&htab->count);
91286fe28f7SAlexei Starovoitov }
91386fe28f7SAlexei Starovoitov
91486fe28f7SAlexei Starovoitov
free_htab_elem(struct bpf_htab * htab,struct htab_elem * l)9151d4e1eabSAndrii Nakryiko static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
9161d4e1eabSAndrii Nakryiko {
9171d4e1eabSAndrii Nakryiko htab_put_fd_value(htab, l);
918bcc6b1b7SMartin KaFai Lau
9198c290e60SAlexei Starovoitov if (htab_is_prealloc(htab)) {
9209bc421b6SAnton Protopopov bpf_map_dec_elem_count(&htab->map);
92114a324f6SKumar Kartikeya Dwivedi check_and_free_fields(htab, l);
922b9e9ed90SHou Tao pcpu_freelist_push(&htab->freelist, &l->fnode);
923824bd0ceSAlexei Starovoitov } else {
92486fe28f7SAlexei Starovoitov dec_elem_count(htab);
9250fd7c5d4SAlexei Starovoitov htab_elem_free(htab, l);
9260fd7c5d4SAlexei Starovoitov }
927824bd0ceSAlexei Starovoitov }
928824bd0ceSAlexei Starovoitov
pcpu_copy_value(struct bpf_htab * htab,void __percpu * pptr,void * value,bool onallcpus)929fd91de7bSMartin KaFai Lau static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
930fd91de7bSMartin KaFai Lau void *value, bool onallcpus)
931fd91de7bSMartin KaFai Lau {
932fd91de7bSMartin KaFai Lau if (!onallcpus) {
933fd91de7bSMartin KaFai Lau /* copy true value_size bytes */
93465334e64SKumar Kartikeya Dwivedi copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
935fd91de7bSMartin KaFai Lau } else {
936fd91de7bSMartin KaFai Lau u32 size = round_up(htab->map.value_size, 8);
937fd91de7bSMartin KaFai Lau int off = 0, cpu;
938fd91de7bSMartin KaFai Lau
939fd91de7bSMartin KaFai Lau for_each_possible_cpu(cpu) {
94065334e64SKumar Kartikeya Dwivedi copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
941fd91de7bSMartin KaFai Lau off += size;
942fd91de7bSMartin KaFai Lau }
943fd91de7bSMartin KaFai Lau }
944fd91de7bSMartin KaFai Lau }
945fd91de7bSMartin KaFai Lau
pcpu_init_value(struct bpf_htab * htab,void __percpu * pptr,void * value,bool onallcpus)946d3bec013SDavid Verbeiren static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
947d3bec013SDavid Verbeiren void *value, bool onallcpus)
948d3bec013SDavid Verbeiren {
949ee4ed53cSAlexei Starovoitov /* When not setting the initial value on all cpus, zero-fill element
950ee4ed53cSAlexei Starovoitov * values for other cpus. Otherwise, bpf program has no way to ensure
951d3bec013SDavid Verbeiren * known initial values for cpus other than current one
952d3bec013SDavid Verbeiren * (onallcpus=false always when coming from bpf prog).
953d3bec013SDavid Verbeiren */
954ee4ed53cSAlexei Starovoitov if (!onallcpus) {
955d3bec013SDavid Verbeiren int current_cpu = raw_smp_processor_id();
956d3bec013SDavid Verbeiren int cpu;
957d3bec013SDavid Verbeiren
958d3bec013SDavid Verbeiren for_each_possible_cpu(cpu) {
959d3bec013SDavid Verbeiren if (cpu == current_cpu)
96065334e64SKumar Kartikeya Dwivedi copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value);
96165334e64SKumar Kartikeya Dwivedi else /* Since elem is preallocated, we cannot touch special fields */
96265334e64SKumar Kartikeya Dwivedi zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
963d3bec013SDavid Verbeiren }
964d3bec013SDavid Verbeiren } else {
965d3bec013SDavid Verbeiren pcpu_copy_value(htab, pptr, value, onallcpus);
966d3bec013SDavid Verbeiren }
967d3bec013SDavid Verbeiren }
968d3bec013SDavid Verbeiren
fd_htab_map_needs_adjust(const struct bpf_htab * htab)969cd36c3a2SDaniel Borkmann static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
970cd36c3a2SDaniel Borkmann {
971cd36c3a2SDaniel Borkmann return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
972cd36c3a2SDaniel Borkmann BITS_PER_LONG == 64;
973cd36c3a2SDaniel Borkmann }
974cd36c3a2SDaniel Borkmann
alloc_htab_elem(struct bpf_htab * htab,void * key,void * value,u32 key_size,u32 hash,bool percpu,bool onallcpus,struct htab_elem * old_elem)975824bd0ceSAlexei Starovoitov static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
976824bd0ceSAlexei Starovoitov void *value, u32 key_size, u32 hash,
977a6ed3ea6SAlexei Starovoitov bool percpu, bool onallcpus,
9788c290e60SAlexei Starovoitov struct htab_elem *old_elem)
979824bd0ceSAlexei Starovoitov {
980d83525caSAlexei Starovoitov u32 size = htab->map.value_size;
9818c290e60SAlexei Starovoitov bool prealloc = htab_is_prealloc(htab);
9828c290e60SAlexei Starovoitov struct htab_elem *l_new, **pl_new;
983824bd0ceSAlexei Starovoitov void __percpu *pptr;
984824bd0ceSAlexei Starovoitov
9856c905981SAlexei Starovoitov if (prealloc) {
9868c290e60SAlexei Starovoitov if (old_elem) {
9878c290e60SAlexei Starovoitov /* if we're updating the existing element,
9888c290e60SAlexei Starovoitov * use per-cpu extra elems to avoid freelist_pop/push
9898c290e60SAlexei Starovoitov */
9908c290e60SAlexei Starovoitov pl_new = this_cpu_ptr(htab->extra_elems);
9918c290e60SAlexei Starovoitov l_new = *pl_new;
9928c290e60SAlexei Starovoitov *pl_new = old_elem;
9938c290e60SAlexei Starovoitov } else {
9949f691549SAlexei Starovoitov struct pcpu_freelist_node *l;
9959f691549SAlexei Starovoitov
996a89fac57SAlexei Starovoitov l = __pcpu_freelist_pop(&htab->freelist);
9979f691549SAlexei Starovoitov if (!l)
9988c290e60SAlexei Starovoitov return ERR_PTR(-E2BIG);
9999f691549SAlexei Starovoitov l_new = container_of(l, struct htab_elem, fnode);
10009bc421b6SAnton Protopopov bpf_map_inc_elem_count(&htab->map);
10018c290e60SAlexei Starovoitov }
10026c905981SAlexei Starovoitov } else {
100386fe28f7SAlexei Starovoitov if (is_map_full(htab))
100486fe28f7SAlexei Starovoitov if (!old_elem)
10058c290e60SAlexei Starovoitov /* when map is full and update() is replacing
10068c290e60SAlexei Starovoitov * old element, it's ok to allocate, since
10078c290e60SAlexei Starovoitov * old element will be freed immediately.
10088c290e60SAlexei Starovoitov * Otherwise return an error
10098c290e60SAlexei Starovoitov */
101086fe28f7SAlexei Starovoitov return ERR_PTR(-E2BIG);
101186fe28f7SAlexei Starovoitov inc_elem_count(htab);
1012fba1a1c6SAlexei Starovoitov l_new = bpf_mem_cache_alloc(&htab->ma);
1013ed2b82c0SMauricio Vasquez B if (!l_new) {
1014ed2b82c0SMauricio Vasquez B l_new = ERR_PTR(-ENOMEM);
1015ed2b82c0SMauricio Vasquez B goto dec_count;
1016ed2b82c0SMauricio Vasquez B }
10176c905981SAlexei Starovoitov }
1018824bd0ceSAlexei Starovoitov
1019824bd0ceSAlexei Starovoitov memcpy(l_new->key, key, key_size);
1020824bd0ceSAlexei Starovoitov if (percpu) {
10216c905981SAlexei Starovoitov if (prealloc) {
10226c905981SAlexei Starovoitov pptr = htab_elem_get_ptr(l_new, key_size);
10236c905981SAlexei Starovoitov } else {
1024824bd0ceSAlexei Starovoitov /* alloc_percpu zero-fills */
10256d641ca5SUros Bizjak void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
10266d641ca5SUros Bizjak
10276d641ca5SUros Bizjak if (!ptr) {
1028fba1a1c6SAlexei Starovoitov bpf_mem_cache_free(&htab->ma, l_new);
1029ed2b82c0SMauricio Vasquez B l_new = ERR_PTR(-ENOMEM);
1030ed2b82c0SMauricio Vasquez B goto dec_count;
10316c905981SAlexei Starovoitov }
10326d641ca5SUros Bizjak l_new->ptr_to_pptr = ptr;
10336d641ca5SUros Bizjak pptr = *(void __percpu **)ptr;
1034824bd0ceSAlexei Starovoitov }
1035824bd0ceSAlexei Starovoitov
1036d3bec013SDavid Verbeiren pcpu_init_value(htab, pptr, value, onallcpus);
103715a07b33SAlexei Starovoitov
10386c905981SAlexei Starovoitov if (!prealloc)
1039824bd0ceSAlexei Starovoitov htab_elem_set_ptr(l_new, key_size, pptr);
1040d83525caSAlexei Starovoitov } else if (fd_htab_map_needs_adjust(htab)) {
1041d83525caSAlexei Starovoitov size = round_up(size, 8);
1042824bd0ceSAlexei Starovoitov memcpy(l_new->key + round_up(key_size, 8), value, size);
1043d83525caSAlexei Starovoitov } else {
1044d83525caSAlexei Starovoitov copy_map_value(&htab->map,
1045d83525caSAlexei Starovoitov l_new->key + round_up(key_size, 8),
1046d83525caSAlexei Starovoitov value);
1047824bd0ceSAlexei Starovoitov }
1048824bd0ceSAlexei Starovoitov
1049824bd0ceSAlexei Starovoitov l_new->hash = hash;
1050824bd0ceSAlexei Starovoitov return l_new;
1051ed2b82c0SMauricio Vasquez B dec_count:
105286fe28f7SAlexei Starovoitov dec_elem_count(htab);
1053ed2b82c0SMauricio Vasquez B return l_new;
1054824bd0ceSAlexei Starovoitov }
1055824bd0ceSAlexei Starovoitov
check_flags(struct bpf_htab * htab,struct htab_elem * l_old,u64 map_flags)1056824bd0ceSAlexei Starovoitov static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
1057824bd0ceSAlexei Starovoitov u64 map_flags)
1058824bd0ceSAlexei Starovoitov {
105996049f3aSAlexei Starovoitov if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
1060824bd0ceSAlexei Starovoitov /* elem already exists */
1061824bd0ceSAlexei Starovoitov return -EEXIST;
1062824bd0ceSAlexei Starovoitov
106396049f3aSAlexei Starovoitov if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
1064824bd0ceSAlexei Starovoitov /* elem doesn't exist, cannot update it */
1065824bd0ceSAlexei Starovoitov return -ENOENT;
1066824bd0ceSAlexei Starovoitov
1067824bd0ceSAlexei Starovoitov return 0;
1068824bd0ceSAlexei Starovoitov }
1069824bd0ceSAlexei Starovoitov
10700f8e4bd8SAlexei Starovoitov /* Called from syscall or from eBPF program */
htab_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)1071d7ba4cc9SJP Kobryn static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
10720f8e4bd8SAlexei Starovoitov u64 map_flags)
10730f8e4bd8SAlexei Starovoitov {
10740f8e4bd8SAlexei Starovoitov struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1075824bd0ceSAlexei Starovoitov struct htab_elem *l_new = NULL, *l_old;
10764fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
10770f8e4bd8SAlexei Starovoitov unsigned long flags;
1078b9e9ed90SHou Tao void *old_map_ptr;
1079824bd0ceSAlexei Starovoitov struct bucket *b;
1080824bd0ceSAlexei Starovoitov u32 key_size, hash;
10810f8e4bd8SAlexei Starovoitov int ret;
10820f8e4bd8SAlexei Starovoitov
108396049f3aSAlexei Starovoitov if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
10840f8e4bd8SAlexei Starovoitov /* unknown flags */
10850f8e4bd8SAlexei Starovoitov return -EINVAL;
10860f8e4bd8SAlexei Starovoitov
1087694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1088694cea39SToke Høiland-Jørgensen !rcu_read_lock_bh_held());
10890f8e4bd8SAlexei Starovoitov
1090824bd0ceSAlexei Starovoitov key_size = map->key_size;
1091824bd0ceSAlexei Starovoitov
1092c0203475SDaniel Borkmann hash = htab_map_hash(key, key_size, htab->hashrnd);
1093824bd0ceSAlexei Starovoitov
1094824bd0ceSAlexei Starovoitov b = __select_bucket(htab, hash);
1095688ecfe6S[email protected] head = &b->head;
10960f8e4bd8SAlexei Starovoitov
109796049f3aSAlexei Starovoitov if (unlikely(map_flags & BPF_F_LOCK)) {
1098db559117SKumar Kartikeya Dwivedi if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
109996049f3aSAlexei Starovoitov return -EINVAL;
110096049f3aSAlexei Starovoitov /* find an element without taking the bucket lock */
110196049f3aSAlexei Starovoitov l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
110296049f3aSAlexei Starovoitov htab->n_buckets);
110396049f3aSAlexei Starovoitov ret = check_flags(htab, l_old, map_flags);
110496049f3aSAlexei Starovoitov if (ret)
110596049f3aSAlexei Starovoitov return ret;
110696049f3aSAlexei Starovoitov if (l_old) {
110796049f3aSAlexei Starovoitov /* grab the element lock and update value in place */
110896049f3aSAlexei Starovoitov copy_map_value_locked(map,
110996049f3aSAlexei Starovoitov l_old->key + round_up(key_size, 8),
111096049f3aSAlexei Starovoitov value, false);
111196049f3aSAlexei Starovoitov return 0;
111296049f3aSAlexei Starovoitov }
111396049f3aSAlexei Starovoitov /* fall through, grab the bucket lock and lookup again.
111496049f3aSAlexei Starovoitov * 99.9% chance that the element won't be found,
111596049f3aSAlexei Starovoitov * but second lookup under lock has to be done.
111696049f3aSAlexei Starovoitov */
111796049f3aSAlexei Starovoitov }
111896049f3aSAlexei Starovoitov
11194fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &flags);
112020b6cc34SSong Liu if (ret)
112120b6cc34SSong Liu return ret;
11220f8e4bd8SAlexei Starovoitov
1123824bd0ceSAlexei Starovoitov l_old = lookup_elem_raw(head, hash, key, key_size);
11240f8e4bd8SAlexei Starovoitov
1125824bd0ceSAlexei Starovoitov ret = check_flags(htab, l_old, map_flags);
1126824bd0ceSAlexei Starovoitov if (ret)
11270f8e4bd8SAlexei Starovoitov goto err;
11280f8e4bd8SAlexei Starovoitov
112996049f3aSAlexei Starovoitov if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
113096049f3aSAlexei Starovoitov /* first lookup without the bucket lock didn't find the element,
113196049f3aSAlexei Starovoitov * but second lookup with the bucket lock found it.
113296049f3aSAlexei Starovoitov * This case is highly unlikely, but has to be dealt with:
113396049f3aSAlexei Starovoitov * grab the element lock in addition to the bucket lock
113496049f3aSAlexei Starovoitov * and update element in place
113596049f3aSAlexei Starovoitov */
113696049f3aSAlexei Starovoitov copy_map_value_locked(map,
113796049f3aSAlexei Starovoitov l_old->key + round_up(key_size, 8),
113896049f3aSAlexei Starovoitov value, false);
113996049f3aSAlexei Starovoitov ret = 0;
114096049f3aSAlexei Starovoitov goto err;
114196049f3aSAlexei Starovoitov }
114296049f3aSAlexei Starovoitov
1143a6ed3ea6SAlexei Starovoitov l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
11448c290e60SAlexei Starovoitov l_old);
11456c905981SAlexei Starovoitov if (IS_ERR(l_new)) {
11466c905981SAlexei Starovoitov /* all pre-allocated elements are in use or memory exhausted */
11476c905981SAlexei Starovoitov ret = PTR_ERR(l_new);
11486c905981SAlexei Starovoitov goto err;
11496c905981SAlexei Starovoitov }
11506c905981SAlexei Starovoitov
1151824bd0ceSAlexei Starovoitov /* add new element to the head of the list, so that
1152824bd0ceSAlexei Starovoitov * concurrent search will find it before old elem
11530f8e4bd8SAlexei Starovoitov */
11544fe84359SAlexei Starovoitov hlist_nulls_add_head_rcu(&l_new->hash_node, head);
11550f8e4bd8SAlexei Starovoitov if (l_old) {
11564fe84359SAlexei Starovoitov hlist_nulls_del_rcu(&l_old->hash_node);
1157b9e9ed90SHou Tao
1158b9e9ed90SHou Tao /* l_old has already been stashed in htab->extra_elems, free
1159b9e9ed90SHou Tao * its special fields before it is available for reuse. Also
1160b9e9ed90SHou Tao * save the old map pointer in htab of maps before unlock
1161b9e9ed90SHou Tao * and release it after unlock.
1162b9e9ed90SHou Tao */
1163b9e9ed90SHou Tao old_map_ptr = NULL;
1164b9e9ed90SHou Tao if (htab_is_prealloc(htab)) {
1165b9e9ed90SHou Tao if (map->ops->map_fd_put_ptr)
1166b9e9ed90SHou Tao old_map_ptr = fd_htab_map_get_ptr(map, l_old);
116714a324f6SKumar Kartikeya Dwivedi check_and_free_fields(htab, l_old);
11680f8e4bd8SAlexei Starovoitov }
1169b9e9ed90SHou Tao }
11704fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
1171b9e9ed90SHou Tao if (l_old) {
1172b9e9ed90SHou Tao if (old_map_ptr)
1173b9e9ed90SHou Tao map->ops->map_fd_put_ptr(map, old_map_ptr, true);
1174b9e9ed90SHou Tao if (!htab_is_prealloc(htab))
1175b9e9ed90SHou Tao free_htab_elem(htab, l_old);
1176b9e9ed90SHou Tao }
1177b9e9ed90SHou Tao return 0;
11780f8e4bd8SAlexei Starovoitov err:
11794fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
11800f8e4bd8SAlexei Starovoitov return ret;
11810f8e4bd8SAlexei Starovoitov }
11820f8e4bd8SAlexei Starovoitov
htab_lru_push_free(struct bpf_htab * htab,struct htab_elem * elem)118368134668SAlexei Starovoitov static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
118468134668SAlexei Starovoitov {
118514a324f6SKumar Kartikeya Dwivedi check_and_free_fields(htab, elem);
11869bc421b6SAnton Protopopov bpf_map_dec_elem_count(&htab->map);
118768134668SAlexei Starovoitov bpf_lru_push_free(&htab->lru, &elem->lru_node);
118868134668SAlexei Starovoitov }
118968134668SAlexei Starovoitov
htab_lru_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)1190d7ba4cc9SJP Kobryn static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
119129ba732aSMartin KaFai Lau u64 map_flags)
119229ba732aSMartin KaFai Lau {
119329ba732aSMartin KaFai Lau struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
119429ba732aSMartin KaFai Lau struct htab_elem *l_new, *l_old = NULL;
11954fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
119629ba732aSMartin KaFai Lau unsigned long flags;
119729ba732aSMartin KaFai Lau struct bucket *b;
119829ba732aSMartin KaFai Lau u32 key_size, hash;
119929ba732aSMartin KaFai Lau int ret;
120029ba732aSMartin KaFai Lau
120129ba732aSMartin KaFai Lau if (unlikely(map_flags > BPF_EXIST))
120229ba732aSMartin KaFai Lau /* unknown flags */
120329ba732aSMartin KaFai Lau return -EINVAL;
120429ba732aSMartin KaFai Lau
1205694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1206694cea39SToke Høiland-Jørgensen !rcu_read_lock_bh_held());
120729ba732aSMartin KaFai Lau
120829ba732aSMartin KaFai Lau key_size = map->key_size;
120929ba732aSMartin KaFai Lau
1210c0203475SDaniel Borkmann hash = htab_map_hash(key, key_size, htab->hashrnd);
121129ba732aSMartin KaFai Lau
121229ba732aSMartin KaFai Lau b = __select_bucket(htab, hash);
121329ba732aSMartin KaFai Lau head = &b->head;
121429ba732aSMartin KaFai Lau
121529ba732aSMartin KaFai Lau /* For LRU, we need to alloc before taking bucket's
121629ba732aSMartin KaFai Lau * spinlock because getting free nodes from LRU may need
121729ba732aSMartin KaFai Lau * to remove older elements from htab and this removal
121829ba732aSMartin KaFai Lau * operation will need a bucket lock.
121929ba732aSMartin KaFai Lau */
122029ba732aSMartin KaFai Lau l_new = prealloc_lru_pop(htab, key, hash);
122129ba732aSMartin KaFai Lau if (!l_new)
122229ba732aSMartin KaFai Lau return -ENOMEM;
122368134668SAlexei Starovoitov copy_map_value(&htab->map,
122468134668SAlexei Starovoitov l_new->key + round_up(map->key_size, 8), value);
122529ba732aSMartin KaFai Lau
12264fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &flags);
122720b6cc34SSong Liu if (ret)
1228b34ffb0cSAnton Protopopov goto err_lock_bucket;
122929ba732aSMartin KaFai Lau
123029ba732aSMartin KaFai Lau l_old = lookup_elem_raw(head, hash, key, key_size);
123129ba732aSMartin KaFai Lau
123229ba732aSMartin KaFai Lau ret = check_flags(htab, l_old, map_flags);
123329ba732aSMartin KaFai Lau if (ret)
123429ba732aSMartin KaFai Lau goto err;
123529ba732aSMartin KaFai Lau
123629ba732aSMartin KaFai Lau /* add new element to the head of the list, so that
123729ba732aSMartin KaFai Lau * concurrent search will find it before old elem
123829ba732aSMartin KaFai Lau */
12394fe84359SAlexei Starovoitov hlist_nulls_add_head_rcu(&l_new->hash_node, head);
124029ba732aSMartin KaFai Lau if (l_old) {
124129ba732aSMartin KaFai Lau bpf_lru_node_set_ref(&l_new->lru_node);
12424fe84359SAlexei Starovoitov hlist_nulls_del_rcu(&l_old->hash_node);
124329ba732aSMartin KaFai Lau }
124429ba732aSMartin KaFai Lau ret = 0;
124529ba732aSMartin KaFai Lau
124629ba732aSMartin KaFai Lau err:
12474fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
124829ba732aSMartin KaFai Lau
1249b34ffb0cSAnton Protopopov err_lock_bucket:
125029ba732aSMartin KaFai Lau if (ret)
125168134668SAlexei Starovoitov htab_lru_push_free(htab, l_new);
125229ba732aSMartin KaFai Lau else if (l_old)
125368134668SAlexei Starovoitov htab_lru_push_free(htab, l_old);
125429ba732aSMartin KaFai Lau
125529ba732aSMartin KaFai Lau return ret;
125629ba732aSMartin KaFai Lau }
125729ba732aSMartin KaFai Lau
__htab_percpu_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags,bool onallcpus)1258d7ba4cc9SJP Kobryn static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
125915a07b33SAlexei Starovoitov void *value, u64 map_flags,
126015a07b33SAlexei Starovoitov bool onallcpus)
1261824bd0ceSAlexei Starovoitov {
1262824bd0ceSAlexei Starovoitov struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1263824bd0ceSAlexei Starovoitov struct htab_elem *l_new = NULL, *l_old;
12644fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
1265824bd0ceSAlexei Starovoitov unsigned long flags;
1266824bd0ceSAlexei Starovoitov struct bucket *b;
1267824bd0ceSAlexei Starovoitov u32 key_size, hash;
1268824bd0ceSAlexei Starovoitov int ret;
1269824bd0ceSAlexei Starovoitov
1270824bd0ceSAlexei Starovoitov if (unlikely(map_flags > BPF_EXIST))
1271824bd0ceSAlexei Starovoitov /* unknown flags */
1272824bd0ceSAlexei Starovoitov return -EINVAL;
1273824bd0ceSAlexei Starovoitov
1274694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1275694cea39SToke Høiland-Jørgensen !rcu_read_lock_bh_held());
1276824bd0ceSAlexei Starovoitov
1277824bd0ceSAlexei Starovoitov key_size = map->key_size;
1278824bd0ceSAlexei Starovoitov
1279c0203475SDaniel Borkmann hash = htab_map_hash(key, key_size, htab->hashrnd);
1280824bd0ceSAlexei Starovoitov
1281824bd0ceSAlexei Starovoitov b = __select_bucket(htab, hash);
1282824bd0ceSAlexei Starovoitov head = &b->head;
1283824bd0ceSAlexei Starovoitov
12844fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &flags);
128520b6cc34SSong Liu if (ret)
128620b6cc34SSong Liu return ret;
1287824bd0ceSAlexei Starovoitov
1288824bd0ceSAlexei Starovoitov l_old = lookup_elem_raw(head, hash, key, key_size);
1289824bd0ceSAlexei Starovoitov
1290824bd0ceSAlexei Starovoitov ret = check_flags(htab, l_old, map_flags);
1291824bd0ceSAlexei Starovoitov if (ret)
1292824bd0ceSAlexei Starovoitov goto err;
1293824bd0ceSAlexei Starovoitov
1294824bd0ceSAlexei Starovoitov if (l_old) {
1295824bd0ceSAlexei Starovoitov /* per-cpu hash map can update value in-place */
1296fd91de7bSMartin KaFai Lau pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1297fd91de7bSMartin KaFai Lau value, onallcpus);
1298824bd0ceSAlexei Starovoitov } else {
1299824bd0ceSAlexei Starovoitov l_new = alloc_htab_elem(htab, key, value, key_size,
13008c290e60SAlexei Starovoitov hash, true, onallcpus, NULL);
13016c905981SAlexei Starovoitov if (IS_ERR(l_new)) {
13026c905981SAlexei Starovoitov ret = PTR_ERR(l_new);
1303824bd0ceSAlexei Starovoitov goto err;
1304824bd0ceSAlexei Starovoitov }
13054fe84359SAlexei Starovoitov hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1306824bd0ceSAlexei Starovoitov }
1307824bd0ceSAlexei Starovoitov ret = 0;
1308824bd0ceSAlexei Starovoitov err:
13094fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
1310824bd0ceSAlexei Starovoitov return ret;
1311824bd0ceSAlexei Starovoitov }
1312824bd0ceSAlexei Starovoitov
__htab_lru_percpu_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags,bool onallcpus)1313d7ba4cc9SJP Kobryn static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13148f844938SMartin KaFai Lau void *value, u64 map_flags,
13158f844938SMartin KaFai Lau bool onallcpus)
13168f844938SMartin KaFai Lau {
13178f844938SMartin KaFai Lau struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
13188f844938SMartin KaFai Lau struct htab_elem *l_new = NULL, *l_old;
13194fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
13208f844938SMartin KaFai Lau unsigned long flags;
13218f844938SMartin KaFai Lau struct bucket *b;
13228f844938SMartin KaFai Lau u32 key_size, hash;
13238f844938SMartin KaFai Lau int ret;
13248f844938SMartin KaFai Lau
13258f844938SMartin KaFai Lau if (unlikely(map_flags > BPF_EXIST))
13268f844938SMartin KaFai Lau /* unknown flags */
13278f844938SMartin KaFai Lau return -EINVAL;
13288f844938SMartin KaFai Lau
1329694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1330694cea39SToke Høiland-Jørgensen !rcu_read_lock_bh_held());
13318f844938SMartin KaFai Lau
13328f844938SMartin KaFai Lau key_size = map->key_size;
13338f844938SMartin KaFai Lau
1334c0203475SDaniel Borkmann hash = htab_map_hash(key, key_size, htab->hashrnd);
13358f844938SMartin KaFai Lau
13368f844938SMartin KaFai Lau b = __select_bucket(htab, hash);
13378f844938SMartin KaFai Lau head = &b->head;
13388f844938SMartin KaFai Lau
13398f844938SMartin KaFai Lau /* For LRU, we need to alloc before taking bucket's
13408f844938SMartin KaFai Lau * spinlock because LRU's elem alloc may need
13418f844938SMartin KaFai Lau * to remove older elem from htab and this removal
13428f844938SMartin KaFai Lau * operation will need a bucket lock.
13438f844938SMartin KaFai Lau */
13448f844938SMartin KaFai Lau if (map_flags != BPF_EXIST) {
13458f844938SMartin KaFai Lau l_new = prealloc_lru_pop(htab, key, hash);
13468f844938SMartin KaFai Lau if (!l_new)
13478f844938SMartin KaFai Lau return -ENOMEM;
13488f844938SMartin KaFai Lau }
13498f844938SMartin KaFai Lau
13504fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &flags);
135120b6cc34SSong Liu if (ret)
1352b34ffb0cSAnton Protopopov goto err_lock_bucket;
13538f844938SMartin KaFai Lau
13548f844938SMartin KaFai Lau l_old = lookup_elem_raw(head, hash, key, key_size);
13558f844938SMartin KaFai Lau
13568f844938SMartin KaFai Lau ret = check_flags(htab, l_old, map_flags);
13578f844938SMartin KaFai Lau if (ret)
13588f844938SMartin KaFai Lau goto err;
13598f844938SMartin KaFai Lau
13608f844938SMartin KaFai Lau if (l_old) {
13618f844938SMartin KaFai Lau bpf_lru_node_set_ref(&l_old->lru_node);
13628f844938SMartin KaFai Lau
13638f844938SMartin KaFai Lau /* per-cpu hash map can update value in-place */
13648f844938SMartin KaFai Lau pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
13658f844938SMartin KaFai Lau value, onallcpus);
13668f844938SMartin KaFai Lau } else {
1367d3bec013SDavid Verbeiren pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
13688f844938SMartin KaFai Lau value, onallcpus);
13694fe84359SAlexei Starovoitov hlist_nulls_add_head_rcu(&l_new->hash_node, head);
13708f844938SMartin KaFai Lau l_new = NULL;
13718f844938SMartin KaFai Lau }
13728f844938SMartin KaFai Lau ret = 0;
13738f844938SMartin KaFai Lau err:
13744fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
1375b34ffb0cSAnton Protopopov err_lock_bucket:
13769bc421b6SAnton Protopopov if (l_new) {
13779bc421b6SAnton Protopopov bpf_map_dec_elem_count(&htab->map);
13788f844938SMartin KaFai Lau bpf_lru_push_free(&htab->lru, &l_new->lru_node);
13799bc421b6SAnton Protopopov }
13808f844938SMartin KaFai Lau return ret;
13818f844938SMartin KaFai Lau }
13828f844938SMartin KaFai Lau
htab_percpu_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)1383d7ba4cc9SJP Kobryn static long htab_percpu_map_update_elem(struct bpf_map *map, void *key,
138415a07b33SAlexei Starovoitov void *value, u64 map_flags)
138515a07b33SAlexei Starovoitov {
138615a07b33SAlexei Starovoitov return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
138715a07b33SAlexei Starovoitov }
138815a07b33SAlexei Starovoitov
htab_lru_percpu_map_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)1389d7ba4cc9SJP Kobryn static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13908f844938SMartin KaFai Lau void *value, u64 map_flags)
13918f844938SMartin KaFai Lau {
13928f844938SMartin KaFai Lau return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
13938f844938SMartin KaFai Lau false);
13948f844938SMartin KaFai Lau }
13958f844938SMartin KaFai Lau
13960f8e4bd8SAlexei Starovoitov /* Called from syscall or from eBPF program */
htab_map_delete_elem(struct bpf_map * map,void * key)1397d7ba4cc9SJP Kobryn static long htab_map_delete_elem(struct bpf_map *map, void *key)
13980f8e4bd8SAlexei Starovoitov {
13990f8e4bd8SAlexei Starovoitov struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
14004fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
1401688ecfe6S[email protected] struct bucket *b;
14020f8e4bd8SAlexei Starovoitov struct htab_elem *l;
14030f8e4bd8SAlexei Starovoitov unsigned long flags;
14040f8e4bd8SAlexei Starovoitov u32 hash, key_size;
140520b6cc34SSong Liu int ret;
14060f8e4bd8SAlexei Starovoitov
1407694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1408694cea39SToke Høiland-Jørgensen !rcu_read_lock_bh_held());
14090f8e4bd8SAlexei Starovoitov
14100f8e4bd8SAlexei Starovoitov key_size = map->key_size;
14110f8e4bd8SAlexei Starovoitov
1412c0203475SDaniel Borkmann hash = htab_map_hash(key, key_size, htab->hashrnd);
1413688ecfe6S[email protected] b = __select_bucket(htab, hash);
1414688ecfe6S[email protected] head = &b->head;
14150f8e4bd8SAlexei Starovoitov
14164fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &flags);
141720b6cc34SSong Liu if (ret)
141820b6cc34SSong Liu return ret;
14190f8e4bd8SAlexei Starovoitov
14200f8e4bd8SAlexei Starovoitov l = lookup_elem_raw(head, hash, key, key_size);
1421b9e9ed90SHou Tao if (l)
14224fe84359SAlexei Starovoitov hlist_nulls_del_rcu(&l->hash_node);
1423b9e9ed90SHou Tao else
142420b6cc34SSong Liu ret = -ENOENT;
14250f8e4bd8SAlexei Starovoitov
14264fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
1427b9e9ed90SHou Tao
1428b9e9ed90SHou Tao if (l)
1429b9e9ed90SHou Tao free_htab_elem(htab, l);
14300f8e4bd8SAlexei Starovoitov return ret;
14310f8e4bd8SAlexei Starovoitov }
14320f8e4bd8SAlexei Starovoitov
htab_lru_map_delete_elem(struct bpf_map * map,void * key)1433d7ba4cc9SJP Kobryn static long htab_lru_map_delete_elem(struct bpf_map *map, void *key)
143429ba732aSMartin KaFai Lau {
143529ba732aSMartin KaFai Lau struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
14364fe84359SAlexei Starovoitov struct hlist_nulls_head *head;
143729ba732aSMartin KaFai Lau struct bucket *b;
143829ba732aSMartin KaFai Lau struct htab_elem *l;
143929ba732aSMartin KaFai Lau unsigned long flags;
144029ba732aSMartin KaFai Lau u32 hash, key_size;
144120b6cc34SSong Liu int ret;
144229ba732aSMartin KaFai Lau
1443694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1444694cea39SToke Høiland-Jørgensen !rcu_read_lock_bh_held());
144529ba732aSMartin KaFai Lau
144629ba732aSMartin KaFai Lau key_size = map->key_size;
144729ba732aSMartin KaFai Lau
1448c0203475SDaniel Borkmann hash = htab_map_hash(key, key_size, htab->hashrnd);
144929ba732aSMartin KaFai Lau b = __select_bucket(htab, hash);
145029ba732aSMartin KaFai Lau head = &b->head;
145129ba732aSMartin KaFai Lau
14524fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &flags);
145320b6cc34SSong Liu if (ret)
145420b6cc34SSong Liu return ret;
145529ba732aSMartin KaFai Lau
145629ba732aSMartin KaFai Lau l = lookup_elem_raw(head, hash, key, key_size);
145729ba732aSMartin KaFai Lau
145820b6cc34SSong Liu if (l)
14594fe84359SAlexei Starovoitov hlist_nulls_del_rcu(&l->hash_node);
146020b6cc34SSong Liu else
146120b6cc34SSong Liu ret = -ENOENT;
146229ba732aSMartin KaFai Lau
14634fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
146429ba732aSMartin KaFai Lau if (l)
146568134668SAlexei Starovoitov htab_lru_push_free(htab, l);
146629ba732aSMartin KaFai Lau return ret;
146729ba732aSMartin KaFai Lau }
146829ba732aSMartin KaFai Lau
delete_all_elements(struct bpf_htab * htab)14690f8e4bd8SAlexei Starovoitov static void delete_all_elements(struct bpf_htab *htab)
14700f8e4bd8SAlexei Starovoitov {
14710f8e4bd8SAlexei Starovoitov int i;
14720f8e4bd8SAlexei Starovoitov
14734b7e7cd1SHou Tao /* It's called from a worker thread and migration has been disabled,
14744b7e7cd1SHou Tao * therefore, it is OK to invoke bpf_mem_cache_free() directly.
1475fba1a1c6SAlexei Starovoitov */
14760f8e4bd8SAlexei Starovoitov for (i = 0; i < htab->n_buckets; i++) {
14774fe84359SAlexei Starovoitov struct hlist_nulls_head *head = select_bucket(htab, i);
14784fe84359SAlexei Starovoitov struct hlist_nulls_node *n;
14790f8e4bd8SAlexei Starovoitov struct htab_elem *l;
14800f8e4bd8SAlexei Starovoitov
14814fe84359SAlexei Starovoitov hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
14824fe84359SAlexei Starovoitov hlist_nulls_del_rcu(&l->hash_node);
14836c905981SAlexei Starovoitov htab_elem_free(htab, l);
14840f8e4bd8SAlexei Starovoitov }
1485ee3bad03SYafang Shao cond_resched();
14860f8e4bd8SAlexei Starovoitov }
14870f8e4bd8SAlexei Starovoitov }
1488bcc6b1b7SMartin KaFai Lau
htab_free_malloced_timers_and_wq(struct bpf_htab * htab)1489a891711dSBenjamin Tissoires static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
149068134668SAlexei Starovoitov {
149168134668SAlexei Starovoitov int i;
149268134668SAlexei Starovoitov
149368134668SAlexei Starovoitov rcu_read_lock();
149468134668SAlexei Starovoitov for (i = 0; i < htab->n_buckets; i++) {
149568134668SAlexei Starovoitov struct hlist_nulls_head *head = select_bucket(htab, i);
149668134668SAlexei Starovoitov struct hlist_nulls_node *n;
149768134668SAlexei Starovoitov struct htab_elem *l;
149868134668SAlexei Starovoitov
149914a324f6SKumar Kartikeya Dwivedi hlist_nulls_for_each_entry(l, n, head, hash_node) {
1500db559117SKumar Kartikeya Dwivedi /* We only free timer on uref dropping to zero */
1501a891711dSBenjamin Tissoires if (btf_record_has_field(htab->map.record, BPF_TIMER))
1502246331e3SBenjamin Tissoires bpf_obj_free_timer(htab->map.record,
1503246331e3SBenjamin Tissoires l->key + round_up(htab->map.key_size, 8));
1504a891711dSBenjamin Tissoires if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
1505246331e3SBenjamin Tissoires bpf_obj_free_workqueue(htab->map.record,
1506246331e3SBenjamin Tissoires l->key + round_up(htab->map.key_size, 8));
150714a324f6SKumar Kartikeya Dwivedi }
150868134668SAlexei Starovoitov cond_resched_rcu();
150968134668SAlexei Starovoitov }
151068134668SAlexei Starovoitov rcu_read_unlock();
151168134668SAlexei Starovoitov }
151268134668SAlexei Starovoitov
htab_map_free_timers_and_wq(struct bpf_map * map)1513246331e3SBenjamin Tissoires static void htab_map_free_timers_and_wq(struct bpf_map *map)
151468134668SAlexei Starovoitov {
151568134668SAlexei Starovoitov struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
151668134668SAlexei Starovoitov
1517246331e3SBenjamin Tissoires /* We only free timer and workqueue on uref dropping to zero */
1518a891711dSBenjamin Tissoires if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
151968134668SAlexei Starovoitov if (!htab_is_prealloc(htab))
1520a891711dSBenjamin Tissoires htab_free_malloced_timers_and_wq(htab);
152168134668SAlexei Starovoitov else
1522a891711dSBenjamin Tissoires htab_free_prealloced_timers_and_wq(htab);
1523246331e3SBenjamin Tissoires }
1524246331e3SBenjamin Tissoires }
152568134668SAlexei Starovoitov
15260f8e4bd8SAlexei Starovoitov /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
htab_map_free(struct bpf_map * map)15270f8e4bd8SAlexei Starovoitov static void htab_map_free(struct bpf_map *map)
15280f8e4bd8SAlexei Starovoitov {
15290f8e4bd8SAlexei Starovoitov struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
15300f8e4bd8SAlexei Starovoitov
1531bba1dc0bSAlexei Starovoitov /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
1532bba1dc0bSAlexei Starovoitov * bpf_free_used_maps() is called after bpf prog is no longer executing.
1533bba1dc0bSAlexei Starovoitov * There is no need to synchronize_rcu() here to protect map elements.
15340f8e4bd8SAlexei Starovoitov */
15350f8e4bd8SAlexei Starovoitov
15369f2c6e96SAlexei Starovoitov /* htab no longer uses call_rcu() directly. bpf_mem_alloc does it
1537a7de265cSRafael Passos * underneath and is responsible for waiting for callbacks to finish
15389f2c6e96SAlexei Starovoitov * during bpf_mem_alloc_destroy().
15390f8e4bd8SAlexei Starovoitov */
154014a324f6SKumar Kartikeya Dwivedi if (!htab_is_prealloc(htab)) {
15410f8e4bd8SAlexei Starovoitov delete_all_elements(htab);
154214a324f6SKumar Kartikeya Dwivedi } else {
1543aa3496acSKumar Kartikeya Dwivedi htab_free_prealloced_fields(htab);
154429ba732aSMartin KaFai Lau prealloc_destroy(htab);
154514a324f6SKumar Kartikeya Dwivedi }
154629ba732aSMartin KaFai Lau
15479bc421b6SAnton Protopopov bpf_map_free_elem_count(map);
1548a6ed3ea6SAlexei Starovoitov free_percpu(htab->extra_elems);
1549d407bd25SDaniel Borkmann bpf_map_area_free(htab->buckets);
1550ee4ed53cSAlexei Starovoitov bpf_mem_alloc_destroy(&htab->pcpu_ma);
1551fba1a1c6SAlexei Starovoitov bpf_mem_alloc_destroy(&htab->ma);
155286fe28f7SAlexei Starovoitov if (htab->use_percpu_counter)
155386fe28f7SAlexei Starovoitov percpu_counter_destroy(&htab->pcount);
155473cf09a3SYafang Shao bpf_map_area_free(htab);
15550f8e4bd8SAlexei Starovoitov }
15560f8e4bd8SAlexei Starovoitov
htab_map_seq_show_elem(struct bpf_map * map,void * key,struct seq_file * m)1557699c86d6SYonghong Song static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
1558699c86d6SYonghong Song struct seq_file *m)
1559699c86d6SYonghong Song {
1560699c86d6SYonghong Song void *value;
1561699c86d6SYonghong Song
1562699c86d6SYonghong Song rcu_read_lock();
1563699c86d6SYonghong Song
1564699c86d6SYonghong Song value = htab_map_lookup_elem(map, key);
1565699c86d6SYonghong Song if (!value) {
1566699c86d6SYonghong Song rcu_read_unlock();
1567699c86d6SYonghong Song return;
1568699c86d6SYonghong Song }
1569699c86d6SYonghong Song
1570699c86d6SYonghong Song btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
1571699c86d6SYonghong Song seq_puts(m, ": ");
1572699c86d6SYonghong Song btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
1573df862de4SMarkus Elfring seq_putc(m, '\n');
1574699c86d6SYonghong Song
1575699c86d6SYonghong Song rcu_read_unlock();
1576699c86d6SYonghong Song }
1577699c86d6SYonghong Song
__htab_map_lookup_and_delete_elem(struct bpf_map * map,void * key,void * value,bool is_lru_map,bool is_percpu,u64 flags)15783e87f192SDenis Salopek static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
15793e87f192SDenis Salopek void *value, bool is_lru_map,
15803e87f192SDenis Salopek bool is_percpu, u64 flags)
15813e87f192SDenis Salopek {
15823e87f192SDenis Salopek struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
15833e87f192SDenis Salopek struct hlist_nulls_head *head;
15843e87f192SDenis Salopek unsigned long bflags;
15853e87f192SDenis Salopek struct htab_elem *l;
15863e87f192SDenis Salopek u32 hash, key_size;
15873e87f192SDenis Salopek struct bucket *b;
15883e87f192SDenis Salopek int ret;
15893e87f192SDenis Salopek
15903e87f192SDenis Salopek key_size = map->key_size;
15913e87f192SDenis Salopek
15923e87f192SDenis Salopek hash = htab_map_hash(key, key_size, htab->hashrnd);
15933e87f192SDenis Salopek b = __select_bucket(htab, hash);
15943e87f192SDenis Salopek head = &b->head;
15953e87f192SDenis Salopek
15964fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &bflags);
15973e87f192SDenis Salopek if (ret)
15983e87f192SDenis Salopek return ret;
15993e87f192SDenis Salopek
16003e87f192SDenis Salopek l = lookup_elem_raw(head, hash, key, key_size);
16013e87f192SDenis Salopek if (!l) {
16023e87f192SDenis Salopek ret = -ENOENT;
1603588c6eadSHou Tao goto out_unlock;
1604588c6eadSHou Tao }
1605588c6eadSHou Tao
16063e87f192SDenis Salopek if (is_percpu) {
16073e87f192SDenis Salopek u32 roundup_value_size = round_up(map->value_size, 8);
16083e87f192SDenis Salopek void __percpu *pptr;
16093e87f192SDenis Salopek int off = 0, cpu;
16103e87f192SDenis Salopek
16113e87f192SDenis Salopek pptr = htab_elem_get_ptr(l, key_size);
16123e87f192SDenis Salopek for_each_possible_cpu(cpu) {
161365334e64SKumar Kartikeya Dwivedi copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu));
161465334e64SKumar Kartikeya Dwivedi check_and_init_map_value(&htab->map, value + off);
16153e87f192SDenis Salopek off += roundup_value_size;
16163e87f192SDenis Salopek }
16173e87f192SDenis Salopek } else {
16183e87f192SDenis Salopek u32 roundup_key_size = round_up(map->key_size, 8);
16193e87f192SDenis Salopek
16203e87f192SDenis Salopek if (flags & BPF_F_LOCK)
16213e87f192SDenis Salopek copy_map_value_locked(map, value, l->key +
16223e87f192SDenis Salopek roundup_key_size,
16233e87f192SDenis Salopek true);
16243e87f192SDenis Salopek else
16253e87f192SDenis Salopek copy_map_value(map, value, l->key +
16263e87f192SDenis Salopek roundup_key_size);
1627997849c4SHou Tao /* Zeroing special fields in the temp buffer */
162868134668SAlexei Starovoitov check_and_init_map_value(map, value);
16293e87f192SDenis Salopek }
16303e87f192SDenis Salopek hlist_nulls_del_rcu(&l->hash_node);
16313e87f192SDenis Salopek
1632588c6eadSHou Tao out_unlock:
16334fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, bflags);
16343e87f192SDenis Salopek
163547363f15SHou Tao if (l) {
163647363f15SHou Tao if (is_lru_map)
163768134668SAlexei Starovoitov htab_lru_push_free(htab, l);
163847363f15SHou Tao else
163947363f15SHou Tao free_htab_elem(htab, l);
164047363f15SHou Tao }
16413e87f192SDenis Salopek
16423e87f192SDenis Salopek return ret;
16433e87f192SDenis Salopek }
16443e87f192SDenis Salopek
htab_map_lookup_and_delete_elem(struct bpf_map * map,void * key,void * value,u64 flags)16453e87f192SDenis Salopek static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
16463e87f192SDenis Salopek void *value, u64 flags)
16473e87f192SDenis Salopek {
16483e87f192SDenis Salopek return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
16493e87f192SDenis Salopek flags);
16503e87f192SDenis Salopek }
16513e87f192SDenis Salopek
htab_percpu_map_lookup_and_delete_elem(struct bpf_map * map,void * key,void * value,u64 flags)16523e87f192SDenis Salopek static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
16533e87f192SDenis Salopek void *key, void *value,
16543e87f192SDenis Salopek u64 flags)
16553e87f192SDenis Salopek {
16563e87f192SDenis Salopek return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
16573e87f192SDenis Salopek flags);
16583e87f192SDenis Salopek }
16593e87f192SDenis Salopek
htab_lru_map_lookup_and_delete_elem(struct bpf_map * map,void * key,void * value,u64 flags)16603e87f192SDenis Salopek static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
16613e87f192SDenis Salopek void *value, u64 flags)
16623e87f192SDenis Salopek {
16633e87f192SDenis Salopek return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
16643e87f192SDenis Salopek flags);
16653e87f192SDenis Salopek }
16663e87f192SDenis Salopek
htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map * map,void * key,void * value,u64 flags)16673e87f192SDenis Salopek static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
16683e87f192SDenis Salopek void *key, void *value,
16693e87f192SDenis Salopek u64 flags)
16703e87f192SDenis Salopek {
16713e87f192SDenis Salopek return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
16723e87f192SDenis Salopek flags);
16733e87f192SDenis Salopek }
16743e87f192SDenis Salopek
167505799638SYonghong Song static int
__htab_map_lookup_and_delete_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr,bool do_delete,bool is_lru_map,bool is_percpu)167605799638SYonghong Song __htab_map_lookup_and_delete_batch(struct bpf_map *map,
167705799638SYonghong Song const union bpf_attr *attr,
167805799638SYonghong Song union bpf_attr __user *uattr,
167905799638SYonghong Song bool do_delete, bool is_lru_map,
168005799638SYonghong Song bool is_percpu)
168105799638SYonghong Song {
168205799638SYonghong Song struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
168305799638SYonghong Song u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
168405799638SYonghong Song void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
168505799638SYonghong Song void __user *uvalues = u64_to_user_ptr(attr->batch.values);
168605799638SYonghong Song void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
16872f4b0319SLukas Bulwahn void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
16889263dddcSTakshak Chahande u32 batch, max_count, size, bucket_size, map_id;
1689b9aff38dSYonghong Song struct htab_elem *node_to_free = NULL;
169005799638SYonghong Song u64 elem_map_flags, map_flags;
169105799638SYonghong Song struct hlist_nulls_head *head;
169205799638SYonghong Song struct hlist_nulls_node *n;
1693492e0d0dSBrian Vazquez unsigned long flags = 0;
1694492e0d0dSBrian Vazquez bool locked = false;
169505799638SYonghong Song struct htab_elem *l;
169605799638SYonghong Song struct bucket *b;
169705799638SYonghong Song int ret = 0;
169805799638SYonghong Song
169905799638SYonghong Song elem_map_flags = attr->batch.elem_flags;
170005799638SYonghong Song if ((elem_map_flags & ~BPF_F_LOCK) ||
1701db559117SKumar Kartikeya Dwivedi ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
170205799638SYonghong Song return -EINVAL;
170305799638SYonghong Song
170405799638SYonghong Song map_flags = attr->batch.flags;
170505799638SYonghong Song if (map_flags)
170605799638SYonghong Song return -EINVAL;
170705799638SYonghong Song
170805799638SYonghong Song max_count = attr->batch.count;
170905799638SYonghong Song if (!max_count)
171005799638SYonghong Song return 0;
171105799638SYonghong Song
171205799638SYonghong Song if (put_user(0, &uattr->batch.count))
171305799638SYonghong Song return -EFAULT;
171405799638SYonghong Song
171505799638SYonghong Song batch = 0;
171605799638SYonghong Song if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
171705799638SYonghong Song return -EFAULT;
171805799638SYonghong Song
171905799638SYonghong Song if (batch >= htab->n_buckets)
172005799638SYonghong Song return -ENOENT;
172105799638SYonghong Song
172205799638SYonghong Song key_size = htab->map.key_size;
172305799638SYonghong Song roundup_key_size = round_up(htab->map.key_size, 8);
172405799638SYonghong Song value_size = htab->map.value_size;
172505799638SYonghong Song size = round_up(value_size, 8);
172605799638SYonghong Song if (is_percpu)
172705799638SYonghong Song value_size = size * num_possible_cpus();
172805799638SYonghong Song total = 0;
172905799638SYonghong Song /* while experimenting with hash tables with sizes ranging from 10 to
173005799638SYonghong Song * 1000, it was observed that a bucket can have up to 5 entries.
173105799638SYonghong Song */
173205799638SYonghong Song bucket_size = 5;
173305799638SYonghong Song
173405799638SYonghong Song alloc:
173505799638SYonghong Song /* We cannot do copy_from_user or copy_to_user inside
173605799638SYonghong Song * the rcu_read_lock. Allocate enough space here.
173705799638SYonghong Song */
1738c4eb1f40STatsuhiko Yasumatsu keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN);
1739c4eb1f40STatsuhiko Yasumatsu values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN);
174005799638SYonghong Song if (!keys || !values) {
174105799638SYonghong Song ret = -ENOMEM;
174205799638SYonghong Song goto after_loop;
174305799638SYonghong Song }
174405799638SYonghong Song
174505799638SYonghong Song again:
1746085fee1aSThomas Gleixner bpf_disable_instrumentation();
174705799638SYonghong Song rcu_read_lock();
174805799638SYonghong Song again_nocopy:
174905799638SYonghong Song dst_key = keys;
175005799638SYonghong Song dst_val = values;
175105799638SYonghong Song b = &htab->buckets[batch];
175205799638SYonghong Song head = &b->head;
1753492e0d0dSBrian Vazquez /* do not grab the lock unless need it (bucket_cnt > 0). */
175420b6cc34SSong Liu if (locked) {
17554fa8d68aSKumar Kartikeya Dwivedi ret = htab_lock_bucket(b, &flags);
175666a7a92eSHou Tao if (ret) {
175766a7a92eSHou Tao rcu_read_unlock();
175866a7a92eSHou Tao bpf_enable_instrumentation();
175966a7a92eSHou Tao goto after_loop;
176066a7a92eSHou Tao }
176120b6cc34SSong Liu }
176205799638SYonghong Song
176305799638SYonghong Song bucket_cnt = 0;
176405799638SYonghong Song hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
176505799638SYonghong Song bucket_cnt++;
176605799638SYonghong Song
1767492e0d0dSBrian Vazquez if (bucket_cnt && !locked) {
1768492e0d0dSBrian Vazquez locked = true;
1769492e0d0dSBrian Vazquez goto again_nocopy;
1770492e0d0dSBrian Vazquez }
1771492e0d0dSBrian Vazquez
177205799638SYonghong Song if (bucket_cnt > (max_count - total)) {
177305799638SYonghong Song if (total == 0)
177405799638SYonghong Song ret = -ENOSPC;
1775492e0d0dSBrian Vazquez /* Note that since bucket_cnt > 0 here, it is implicit
1776492e0d0dSBrian Vazquez * that the locked was grabbed, so release it.
1777492e0d0dSBrian Vazquez */
17784fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
177905799638SYonghong Song rcu_read_unlock();
1780085fee1aSThomas Gleixner bpf_enable_instrumentation();
178105799638SYonghong Song goto after_loop;
178205799638SYonghong Song }
178305799638SYonghong Song
178405799638SYonghong Song if (bucket_cnt > bucket_size) {
178505799638SYonghong Song bucket_size = bucket_cnt;
1786492e0d0dSBrian Vazquez /* Note that since bucket_cnt > 0 here, it is implicit
1787492e0d0dSBrian Vazquez * that the locked was grabbed, so release it.
1788492e0d0dSBrian Vazquez */
17894fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
179005799638SYonghong Song rcu_read_unlock();
1791085fee1aSThomas Gleixner bpf_enable_instrumentation();
179205799638SYonghong Song kvfree(keys);
179305799638SYonghong Song kvfree(values);
179405799638SYonghong Song goto alloc;
179505799638SYonghong Song }
179605799638SYonghong Song
1797492e0d0dSBrian Vazquez /* Next block is only safe to run if you have grabbed the lock */
1798492e0d0dSBrian Vazquez if (!locked)
1799492e0d0dSBrian Vazquez goto next_batch;
1800492e0d0dSBrian Vazquez
180105799638SYonghong Song hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
180205799638SYonghong Song memcpy(dst_key, l->key, key_size);
180305799638SYonghong Song
180405799638SYonghong Song if (is_percpu) {
180505799638SYonghong Song int off = 0, cpu;
180605799638SYonghong Song void __percpu *pptr;
180705799638SYonghong Song
180805799638SYonghong Song pptr = htab_elem_get_ptr(l, map->key_size);
180905799638SYonghong Song for_each_possible_cpu(cpu) {
181065334e64SKumar Kartikeya Dwivedi copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
181165334e64SKumar Kartikeya Dwivedi check_and_init_map_value(&htab->map, dst_val + off);
181205799638SYonghong Song off += size;
181305799638SYonghong Song }
181405799638SYonghong Song } else {
181505799638SYonghong Song value = l->key + roundup_key_size;
18169263dddcSTakshak Chahande if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
18179263dddcSTakshak Chahande struct bpf_map **inner_map = value;
18189263dddcSTakshak Chahande
18199263dddcSTakshak Chahande /* Actual value is the id of the inner map */
18209263dddcSTakshak Chahande map_id = map->ops->map_fd_sys_lookup_elem(*inner_map);
18219263dddcSTakshak Chahande value = &map_id;
18229263dddcSTakshak Chahande }
18239263dddcSTakshak Chahande
182405799638SYonghong Song if (elem_map_flags & BPF_F_LOCK)
182505799638SYonghong Song copy_map_value_locked(map, dst_val, value,
182605799638SYonghong Song true);
182705799638SYonghong Song else
182805799638SYonghong Song copy_map_value(map, dst_val, value);
1829997849c4SHou Tao /* Zeroing special fields in the temp buffer */
183068134668SAlexei Starovoitov check_and_init_map_value(map, dst_val);
183105799638SYonghong Song }
183205799638SYonghong Song if (do_delete) {
183305799638SYonghong Song hlist_nulls_del_rcu(&l->hash_node);
1834b9aff38dSYonghong Song
1835b9aff38dSYonghong Song /* bpf_lru_push_free() will acquire lru_lock, which
1836b9aff38dSYonghong Song * may cause deadlock. See comments in function
1837b9aff38dSYonghong Song * prealloc_lru_pop(). Let us do bpf_lru_push_free()
1838b9aff38dSYonghong Song * after releasing the bucket lock.
1839b9e9ed90SHou Tao *
1840b9e9ed90SHou Tao * For htab of maps, htab_put_fd_value() in
1841b9e9ed90SHou Tao * free_htab_elem() may acquire a spinlock with bucket
1842b9e9ed90SHou Tao * lock being held and it violates the lock rule, so
1843b9e9ed90SHou Tao * invoke free_htab_elem() after unlock as well.
1844b9aff38dSYonghong Song */
1845b9aff38dSYonghong Song l->batch_flink = node_to_free;
1846b9aff38dSYonghong Song node_to_free = l;
1847b9aff38dSYonghong Song }
184805799638SYonghong Song dst_key += key_size;
184905799638SYonghong Song dst_val += value_size;
185005799638SYonghong Song }
185105799638SYonghong Song
18524fa8d68aSKumar Kartikeya Dwivedi htab_unlock_bucket(b, flags);
1853492e0d0dSBrian Vazquez locked = false;
1854b9aff38dSYonghong Song
1855b9aff38dSYonghong Song while (node_to_free) {
1856b9aff38dSYonghong Song l = node_to_free;
1857b9aff38dSYonghong Song node_to_free = node_to_free->batch_flink;
1858b9e9ed90SHou Tao if (is_lru_map)
185968134668SAlexei Starovoitov htab_lru_push_free(htab, l);
1860b9e9ed90SHou Tao else
1861b9e9ed90SHou Tao free_htab_elem(htab, l);
1862b9aff38dSYonghong Song }
1863b9aff38dSYonghong Song
1864492e0d0dSBrian Vazquez next_batch:
186505799638SYonghong Song /* If we are not copying data, we can go to next bucket and avoid
186605799638SYonghong Song * unlocking the rcu.
186705799638SYonghong Song */
186805799638SYonghong Song if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
186905799638SYonghong Song batch++;
187005799638SYonghong Song goto again_nocopy;
187105799638SYonghong Song }
187205799638SYonghong Song
187305799638SYonghong Song rcu_read_unlock();
1874085fee1aSThomas Gleixner bpf_enable_instrumentation();
187505799638SYonghong Song if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
187605799638SYonghong Song key_size * bucket_cnt) ||
187705799638SYonghong Song copy_to_user(uvalues + total * value_size, values,
187805799638SYonghong Song value_size * bucket_cnt))) {
187905799638SYonghong Song ret = -EFAULT;
188005799638SYonghong Song goto after_loop;
188105799638SYonghong Song }
188205799638SYonghong Song
188305799638SYonghong Song total += bucket_cnt;
188405799638SYonghong Song batch++;
188505799638SYonghong Song if (batch >= htab->n_buckets) {
188605799638SYonghong Song ret = -ENOENT;
188705799638SYonghong Song goto after_loop;
188805799638SYonghong Song }
188905799638SYonghong Song goto again;
189005799638SYonghong Song
189105799638SYonghong Song after_loop:
189205799638SYonghong Song if (ret == -EFAULT)
189305799638SYonghong Song goto out;
189405799638SYonghong Song
189505799638SYonghong Song /* copy # of entries and next batch */
189605799638SYonghong Song ubatch = u64_to_user_ptr(attr->batch.out_batch);
189705799638SYonghong Song if (copy_to_user(ubatch, &batch, sizeof(batch)) ||
189805799638SYonghong Song put_user(total, &uattr->batch.count))
189905799638SYonghong Song ret = -EFAULT;
190005799638SYonghong Song
190105799638SYonghong Song out:
190205799638SYonghong Song kvfree(keys);
190305799638SYonghong Song kvfree(values);
190405799638SYonghong Song return ret;
190505799638SYonghong Song }
190605799638SYonghong Song
190705799638SYonghong Song static int
htab_percpu_map_lookup_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)190805799638SYonghong Song htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
190905799638SYonghong Song union bpf_attr __user *uattr)
191005799638SYonghong Song {
191105799638SYonghong Song return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
191205799638SYonghong Song false, true);
191305799638SYonghong Song }
191405799638SYonghong Song
191505799638SYonghong Song static int
htab_percpu_map_lookup_and_delete_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)191605799638SYonghong Song htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
191705799638SYonghong Song const union bpf_attr *attr,
191805799638SYonghong Song union bpf_attr __user *uattr)
191905799638SYonghong Song {
192005799638SYonghong Song return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
192105799638SYonghong Song false, true);
192205799638SYonghong Song }
192305799638SYonghong Song
192405799638SYonghong Song static int
htab_map_lookup_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)192505799638SYonghong Song htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
192605799638SYonghong Song union bpf_attr __user *uattr)
192705799638SYonghong Song {
192805799638SYonghong Song return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
192905799638SYonghong Song false, false);
193005799638SYonghong Song }
193105799638SYonghong Song
193205799638SYonghong Song static int
htab_map_lookup_and_delete_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)193305799638SYonghong Song htab_map_lookup_and_delete_batch(struct bpf_map *map,
193405799638SYonghong Song const union bpf_attr *attr,
193505799638SYonghong Song union bpf_attr __user *uattr)
193605799638SYonghong Song {
193705799638SYonghong Song return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
193805799638SYonghong Song false, false);
193905799638SYonghong Song }
194005799638SYonghong Song
194105799638SYonghong Song static int
htab_lru_percpu_map_lookup_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)194205799638SYonghong Song htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
194305799638SYonghong Song const union bpf_attr *attr,
194405799638SYonghong Song union bpf_attr __user *uattr)
194505799638SYonghong Song {
194605799638SYonghong Song return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
194705799638SYonghong Song true, true);
194805799638SYonghong Song }
194905799638SYonghong Song
195005799638SYonghong Song static int
htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)195105799638SYonghong Song htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
195205799638SYonghong Song const union bpf_attr *attr,
195305799638SYonghong Song union bpf_attr __user *uattr)
195405799638SYonghong Song {
195505799638SYonghong Song return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
195605799638SYonghong Song true, true);
195705799638SYonghong Song }
195805799638SYonghong Song
195905799638SYonghong Song static int
htab_lru_map_lookup_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)196005799638SYonghong Song htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
196105799638SYonghong Song union bpf_attr __user *uattr)
196205799638SYonghong Song {
196305799638SYonghong Song return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
196405799638SYonghong Song true, false);
196505799638SYonghong Song }
196605799638SYonghong Song
196705799638SYonghong Song static int
htab_lru_map_lookup_and_delete_batch(struct bpf_map * map,const union bpf_attr * attr,union bpf_attr __user * uattr)196805799638SYonghong Song htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
196905799638SYonghong Song const union bpf_attr *attr,
197005799638SYonghong Song union bpf_attr __user *uattr)
197105799638SYonghong Song {
197205799638SYonghong Song return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
197305799638SYonghong Song true, false);
197405799638SYonghong Song }
197505799638SYonghong Song
1976d6c4503cSYonghong Song struct bpf_iter_seq_hash_map_info {
1977d6c4503cSYonghong Song struct bpf_map *map;
1978d6c4503cSYonghong Song struct bpf_htab *htab;
1979d6c4503cSYonghong Song void *percpu_value_buf; // non-zero means percpu hash
1980d6c4503cSYonghong Song u32 bucket_id;
1981d6c4503cSYonghong Song u32 skip_elems;
1982d6c4503cSYonghong Song };
1983d6c4503cSYonghong Song
1984d6c4503cSYonghong Song static struct htab_elem *
bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info * info,struct htab_elem * prev_elem)1985d6c4503cSYonghong Song bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
1986d6c4503cSYonghong Song struct htab_elem *prev_elem)
1987d6c4503cSYonghong Song {
1988d6c4503cSYonghong Song const struct bpf_htab *htab = info->htab;
1989d6c4503cSYonghong Song u32 skip_elems = info->skip_elems;
1990d6c4503cSYonghong Song u32 bucket_id = info->bucket_id;
1991d6c4503cSYonghong Song struct hlist_nulls_head *head;
1992d6c4503cSYonghong Song struct hlist_nulls_node *n;
1993d6c4503cSYonghong Song struct htab_elem *elem;
1994d6c4503cSYonghong Song struct bucket *b;
1995d6c4503cSYonghong Song u32 i, count;
1996d6c4503cSYonghong Song
1997d6c4503cSYonghong Song if (bucket_id >= htab->n_buckets)
1998d6c4503cSYonghong Song return NULL;
1999d6c4503cSYonghong Song
2000d6c4503cSYonghong Song /* try to find next elem in the same bucket */
2001d6c4503cSYonghong Song if (prev_elem) {
2002d6c4503cSYonghong Song /* no update/deletion on this bucket, prev_elem should be still valid
2003d6c4503cSYonghong Song * and we won't skip elements.
2004d6c4503cSYonghong Song */
2005d6c4503cSYonghong Song n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
2006d6c4503cSYonghong Song elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
2007d6c4503cSYonghong Song if (elem)
2008d6c4503cSYonghong Song return elem;
2009d6c4503cSYonghong Song
2010d6c4503cSYonghong Song /* not found, unlock and go to the next bucket */
2011d6c4503cSYonghong Song b = &htab->buckets[bucket_id++];
2012dc0988bbSYonghong Song rcu_read_unlock();
2013d6c4503cSYonghong Song skip_elems = 0;
2014d6c4503cSYonghong Song }
2015d6c4503cSYonghong Song
2016d6c4503cSYonghong Song for (i = bucket_id; i < htab->n_buckets; i++) {
2017d6c4503cSYonghong Song b = &htab->buckets[i];
2018dc0988bbSYonghong Song rcu_read_lock();
2019d6c4503cSYonghong Song
2020d6c4503cSYonghong Song count = 0;
2021d6c4503cSYonghong Song head = &b->head;
2022d6c4503cSYonghong Song hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
2023d6c4503cSYonghong Song if (count >= skip_elems) {
2024d6c4503cSYonghong Song info->bucket_id = i;
2025d6c4503cSYonghong Song info->skip_elems = count;
2026d6c4503cSYonghong Song return elem;
2027d6c4503cSYonghong Song }
2028d6c4503cSYonghong Song count++;
2029d6c4503cSYonghong Song }
2030d6c4503cSYonghong Song
2031dc0988bbSYonghong Song rcu_read_unlock();
2032d6c4503cSYonghong Song skip_elems = 0;
2033d6c4503cSYonghong Song }
2034d6c4503cSYonghong Song
2035d6c4503cSYonghong Song info->bucket_id = i;
2036d6c4503cSYonghong Song info->skip_elems = 0;
2037d6c4503cSYonghong Song return NULL;
2038d6c4503cSYonghong Song }
2039d6c4503cSYonghong Song
bpf_hash_map_seq_start(struct seq_file * seq,loff_t * pos)2040d6c4503cSYonghong Song static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
2041d6c4503cSYonghong Song {
2042d6c4503cSYonghong Song struct bpf_iter_seq_hash_map_info *info = seq->private;
2043d6c4503cSYonghong Song struct htab_elem *elem;
2044d6c4503cSYonghong Song
2045d6c4503cSYonghong Song elem = bpf_hash_map_seq_find_next(info, NULL);
2046d6c4503cSYonghong Song if (!elem)
2047d6c4503cSYonghong Song return NULL;
2048d6c4503cSYonghong Song
2049d6c4503cSYonghong Song if (*pos == 0)
2050d6c4503cSYonghong Song ++*pos;
2051d6c4503cSYonghong Song return elem;
2052d6c4503cSYonghong Song }
2053d6c4503cSYonghong Song
bpf_hash_map_seq_next(struct seq_file * seq,void * v,loff_t * pos)2054d6c4503cSYonghong Song static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2055d6c4503cSYonghong Song {
2056d6c4503cSYonghong Song struct bpf_iter_seq_hash_map_info *info = seq->private;
2057d6c4503cSYonghong Song
2058d6c4503cSYonghong Song ++*pos;
2059d6c4503cSYonghong Song ++info->skip_elems;
2060d6c4503cSYonghong Song return bpf_hash_map_seq_find_next(info, v);
2061d6c4503cSYonghong Song }
2062d6c4503cSYonghong Song
__bpf_hash_map_seq_show(struct seq_file * seq,struct htab_elem * elem)2063d6c4503cSYonghong Song static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
2064d6c4503cSYonghong Song {
2065d6c4503cSYonghong Song struct bpf_iter_seq_hash_map_info *info = seq->private;
2066d6c4503cSYonghong Song u32 roundup_key_size, roundup_value_size;
2067d6c4503cSYonghong Song struct bpf_iter__bpf_map_elem ctx = {};
2068d6c4503cSYonghong Song struct bpf_map *map = info->map;
2069d6c4503cSYonghong Song struct bpf_iter_meta meta;
2070d6c4503cSYonghong Song int ret = 0, off = 0, cpu;
2071d6c4503cSYonghong Song struct bpf_prog *prog;
2072d6c4503cSYonghong Song void __percpu *pptr;
2073d6c4503cSYonghong Song
2074d6c4503cSYonghong Song meta.seq = seq;
2075d6c4503cSYonghong Song prog = bpf_iter_get_info(&meta, elem == NULL);
2076d6c4503cSYonghong Song if (prog) {
2077d6c4503cSYonghong Song ctx.meta = &meta;
2078d6c4503cSYonghong Song ctx.map = info->map;
2079d6c4503cSYonghong Song if (elem) {
2080d6c4503cSYonghong Song roundup_key_size = round_up(map->key_size, 8);
2081d6c4503cSYonghong Song ctx.key = elem->key;
2082d6c4503cSYonghong Song if (!info->percpu_value_buf) {
2083d6c4503cSYonghong Song ctx.value = elem->key + roundup_key_size;
2084d6c4503cSYonghong Song } else {
2085d6c4503cSYonghong Song roundup_value_size = round_up(map->value_size, 8);
2086d6c4503cSYonghong Song pptr = htab_elem_get_ptr(elem, map->key_size);
2087d6c4503cSYonghong Song for_each_possible_cpu(cpu) {
208865334e64SKumar Kartikeya Dwivedi copy_map_value_long(map, info->percpu_value_buf + off,
208965334e64SKumar Kartikeya Dwivedi per_cpu_ptr(pptr, cpu));
209065334e64SKumar Kartikeya Dwivedi check_and_init_map_value(map, info->percpu_value_buf + off);
2091d6c4503cSYonghong Song off += roundup_value_size;
2092d6c4503cSYonghong Song }
2093d6c4503cSYonghong Song ctx.value = info->percpu_value_buf;
2094d6c4503cSYonghong Song }
2095d6c4503cSYonghong Song }
2096d6c4503cSYonghong Song ret = bpf_iter_run_prog(prog, &ctx);
2097d6c4503cSYonghong Song }
2098d6c4503cSYonghong Song
2099d6c4503cSYonghong Song return ret;
2100d6c4503cSYonghong Song }
2101d6c4503cSYonghong Song
bpf_hash_map_seq_show(struct seq_file * seq,void * v)2102d6c4503cSYonghong Song static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
2103d6c4503cSYonghong Song {
2104d6c4503cSYonghong Song return __bpf_hash_map_seq_show(seq, v);
2105d6c4503cSYonghong Song }
2106d6c4503cSYonghong Song
bpf_hash_map_seq_stop(struct seq_file * seq,void * v)2107d6c4503cSYonghong Song static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
2108d6c4503cSYonghong Song {
2109d6c4503cSYonghong Song if (!v)
2110d6c4503cSYonghong Song (void)__bpf_hash_map_seq_show(seq, NULL);
2111d6c4503cSYonghong Song else
2112dc0988bbSYonghong Song rcu_read_unlock();
2113d6c4503cSYonghong Song }
2114d6c4503cSYonghong Song
bpf_iter_init_hash_map(void * priv_data,struct bpf_iter_aux_info * aux)2115d6c4503cSYonghong Song static int bpf_iter_init_hash_map(void *priv_data,
2116d6c4503cSYonghong Song struct bpf_iter_aux_info *aux)
2117d6c4503cSYonghong Song {
2118d6c4503cSYonghong Song struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
2119d6c4503cSYonghong Song struct bpf_map *map = aux->map;
2120d6c4503cSYonghong Song void *value_buf;
2121d6c4503cSYonghong Song u32 buf_size;
2122d6c4503cSYonghong Song
2123d6c4503cSYonghong Song if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
2124d6c4503cSYonghong Song map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
2125d6c4503cSYonghong Song buf_size = round_up(map->value_size, 8) * num_possible_cpus();
2126d6c4503cSYonghong Song value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
2127d6c4503cSYonghong Song if (!value_buf)
2128d6c4503cSYonghong Song return -ENOMEM;
2129d6c4503cSYonghong Song
2130d6c4503cSYonghong Song seq_info->percpu_value_buf = value_buf;
2131d6c4503cSYonghong Song }
2132d6c4503cSYonghong Song
2133ef1e93d2SHou Tao bpf_map_inc_with_uref(map);
2134d6c4503cSYonghong Song seq_info->map = map;
2135d6c4503cSYonghong Song seq_info->htab = container_of(map, struct bpf_htab, map);
2136d6c4503cSYonghong Song return 0;
2137d6c4503cSYonghong Song }
2138d6c4503cSYonghong Song
bpf_iter_fini_hash_map(void * priv_data)2139d6c4503cSYonghong Song static void bpf_iter_fini_hash_map(void *priv_data)
2140d6c4503cSYonghong Song {
2141d6c4503cSYonghong Song struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
2142d6c4503cSYonghong Song
2143ef1e93d2SHou Tao bpf_map_put_with_uref(seq_info->map);
2144d6c4503cSYonghong Song kfree(seq_info->percpu_value_buf);
2145d6c4503cSYonghong Song }
2146d6c4503cSYonghong Song
2147d6c4503cSYonghong Song static const struct seq_operations bpf_hash_map_seq_ops = {
2148d6c4503cSYonghong Song .start = bpf_hash_map_seq_start,
2149d6c4503cSYonghong Song .next = bpf_hash_map_seq_next,
2150d6c4503cSYonghong Song .stop = bpf_hash_map_seq_stop,
2151d6c4503cSYonghong Song .show = bpf_hash_map_seq_show,
2152d6c4503cSYonghong Song };
2153d6c4503cSYonghong Song
2154d6c4503cSYonghong Song static const struct bpf_iter_seq_info iter_seq_info = {
2155d6c4503cSYonghong Song .seq_ops = &bpf_hash_map_seq_ops,
2156d6c4503cSYonghong Song .init_seq_private = bpf_iter_init_hash_map,
2157d6c4503cSYonghong Song .fini_seq_private = bpf_iter_fini_hash_map,
2158d6c4503cSYonghong Song .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
2159d6c4503cSYonghong Song };
2160d6c4503cSYonghong Song
bpf_for_each_hash_elem(struct bpf_map * map,bpf_callback_t callback_fn,void * callback_ctx,u64 flags)2161d7ba4cc9SJP Kobryn static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn,
2162314ee05eSYonghong Song void *callback_ctx, u64 flags)
2163314ee05eSYonghong Song {
2164314ee05eSYonghong Song struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2165314ee05eSYonghong Song struct hlist_nulls_head *head;
2166314ee05eSYonghong Song struct hlist_nulls_node *n;
2167314ee05eSYonghong Song struct htab_elem *elem;
2168314ee05eSYonghong Song u32 roundup_key_size;
2169314ee05eSYonghong Song int i, num_elems = 0;
2170314ee05eSYonghong Song void __percpu *pptr;
2171314ee05eSYonghong Song struct bucket *b;
2172314ee05eSYonghong Song void *key, *val;
2173314ee05eSYonghong Song bool is_percpu;
2174314ee05eSYonghong Song u64 ret = 0;
2175314ee05eSYonghong Song
2176ea5b2296SHou Tao cant_migrate();
2177ea5b2296SHou Tao
2178314ee05eSYonghong Song if (flags != 0)
2179314ee05eSYonghong Song return -EINVAL;
2180314ee05eSYonghong Song
2181314ee05eSYonghong Song is_percpu = htab_is_percpu(htab);
2182314ee05eSYonghong Song
2183314ee05eSYonghong Song roundup_key_size = round_up(map->key_size, 8);
2184ea5b2296SHou Tao /* migration has been disabled, so percpu value prepared here will be
2185ea5b2296SHou Tao * the same as the one seen by the bpf program with
2186ea5b2296SHou Tao * bpf_map_lookup_elem().
2187314ee05eSYonghong Song */
2188314ee05eSYonghong Song for (i = 0; i < htab->n_buckets; i++) {
2189314ee05eSYonghong Song b = &htab->buckets[i];
2190314ee05eSYonghong Song rcu_read_lock();
2191314ee05eSYonghong Song head = &b->head;
2192*75673fdaSBrandon Kammerdiener hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) {
2193314ee05eSYonghong Song key = elem->key;
2194314ee05eSYonghong Song if (is_percpu) {
2195314ee05eSYonghong Song /* current cpu value for percpu map */
2196314ee05eSYonghong Song pptr = htab_elem_get_ptr(elem, map->key_size);
2197314ee05eSYonghong Song val = this_cpu_ptr(pptr);
2198314ee05eSYonghong Song } else {
2199314ee05eSYonghong Song val = elem->key + roundup_key_size;
2200314ee05eSYonghong Song }
2201314ee05eSYonghong Song num_elems++;
2202102acbacSKees Cook ret = callback_fn((u64)(long)map, (u64)(long)key,
2203102acbacSKees Cook (u64)(long)val, (u64)(long)callback_ctx, 0);
2204314ee05eSYonghong Song /* return value: 0 - continue, 1 - stop and return */
2205314ee05eSYonghong Song if (ret) {
2206314ee05eSYonghong Song rcu_read_unlock();
2207314ee05eSYonghong Song goto out;
2208314ee05eSYonghong Song }
2209314ee05eSYonghong Song }
2210314ee05eSYonghong Song rcu_read_unlock();
2211314ee05eSYonghong Song }
2212314ee05eSYonghong Song out:
2213314ee05eSYonghong Song return num_elems;
2214314ee05eSYonghong Song }
2215314ee05eSYonghong Song
htab_map_mem_usage(const struct bpf_map * map)2216304849a2SYafang Shao static u64 htab_map_mem_usage(const struct bpf_map *map)
2217304849a2SYafang Shao {
2218304849a2SYafang Shao struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2219304849a2SYafang Shao u32 value_size = round_up(htab->map.value_size, 8);
2220304849a2SYafang Shao bool prealloc = htab_is_prealloc(htab);
2221304849a2SYafang Shao bool percpu = htab_is_percpu(htab);
2222304849a2SYafang Shao bool lru = htab_is_lru(htab);
2223304849a2SYafang Shao u64 num_entries;
2224304849a2SYafang Shao u64 usage = sizeof(struct bpf_htab);
2225304849a2SYafang Shao
2226304849a2SYafang Shao usage += sizeof(struct bucket) * htab->n_buckets;
2227304849a2SYafang Shao usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT;
2228304849a2SYafang Shao if (prealloc) {
2229304849a2SYafang Shao num_entries = map->max_entries;
2230304849a2SYafang Shao if (htab_has_extra_elems(htab))
2231304849a2SYafang Shao num_entries += num_possible_cpus();
2232304849a2SYafang Shao
2233304849a2SYafang Shao usage += htab->elem_size * num_entries;
2234304849a2SYafang Shao
2235304849a2SYafang Shao if (percpu)
2236304849a2SYafang Shao usage += value_size * num_possible_cpus() * num_entries;
2237304849a2SYafang Shao else if (!lru)
2238304849a2SYafang Shao usage += sizeof(struct htab_elem *) * num_possible_cpus();
2239304849a2SYafang Shao } else {
2240304849a2SYafang Shao #define LLIST_NODE_SZ sizeof(struct llist_node)
2241304849a2SYafang Shao
2242304849a2SYafang Shao num_entries = htab->use_percpu_counter ?
2243304849a2SYafang Shao percpu_counter_sum(&htab->pcount) :
2244304849a2SYafang Shao atomic_read(&htab->count);
2245304849a2SYafang Shao usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries;
2246304849a2SYafang Shao if (percpu) {
2247304849a2SYafang Shao usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries;
2248304849a2SYafang Shao usage += value_size * num_possible_cpus() * num_entries;
2249304849a2SYafang Shao }
2250304849a2SYafang Shao }
2251304849a2SYafang Shao return usage;
2252304849a2SYafang Shao }
2253304849a2SYafang Shao
2254c317ab71SMenglong Dong BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab)
225540077e0cSJohannes Berg const struct bpf_map_ops htab_map_ops = {
2256f4d05259SMartin KaFai Lau .map_meta_equal = bpf_map_meta_equal,
22579328e0d1SJakub Kicinski .map_alloc_check = htab_map_alloc_check,
22580f8e4bd8SAlexei Starovoitov .map_alloc = htab_map_alloc,
22590f8e4bd8SAlexei Starovoitov .map_free = htab_map_free,
22600f8e4bd8SAlexei Starovoitov .map_get_next_key = htab_map_get_next_key,
2261246331e3SBenjamin Tissoires .map_release_uref = htab_map_free_timers_and_wq,
22620f8e4bd8SAlexei Starovoitov .map_lookup_elem = htab_map_lookup_elem,
22633e87f192SDenis Salopek .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
22640f8e4bd8SAlexei Starovoitov .map_update_elem = htab_map_update_elem,
22650f8e4bd8SAlexei Starovoitov .map_delete_elem = htab_map_delete_elem,
22669015d2f5SAlexei Starovoitov .map_gen_lookup = htab_map_gen_lookup,
2267699c86d6SYonghong Song .map_seq_show_elem = htab_map_seq_show_elem,
2268314ee05eSYonghong Song .map_set_for_each_callback_args = map_set_for_each_callback_args,
2269314ee05eSYonghong Song .map_for_each_callback = bpf_for_each_hash_elem,
2270304849a2SYafang Shao .map_mem_usage = htab_map_mem_usage,
227105799638SYonghong Song BATCH_OPS(htab),
2272c317ab71SMenglong Dong .map_btf_id = &htab_map_btf_ids[0],
2273d6c4503cSYonghong Song .iter_seq_info = &iter_seq_info,
22740f8e4bd8SAlexei Starovoitov };
22750f8e4bd8SAlexei Starovoitov
227640077e0cSJohannes Berg const struct bpf_map_ops htab_lru_map_ops = {
2277f4d05259SMartin KaFai Lau .map_meta_equal = bpf_map_meta_equal,
22789328e0d1SJakub Kicinski .map_alloc_check = htab_map_alloc_check,
227929ba732aSMartin KaFai Lau .map_alloc = htab_map_alloc,
228029ba732aSMartin KaFai Lau .map_free = htab_map_free,
228129ba732aSMartin KaFai Lau .map_get_next_key = htab_map_get_next_key,
2282246331e3SBenjamin Tissoires .map_release_uref = htab_map_free_timers_and_wq,
228329ba732aSMartin KaFai Lau .map_lookup_elem = htab_lru_map_lookup_elem,
22843e87f192SDenis Salopek .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
228550b045a8SDaniel Borkmann .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
228629ba732aSMartin KaFai Lau .map_update_elem = htab_lru_map_update_elem,
228729ba732aSMartin KaFai Lau .map_delete_elem = htab_lru_map_delete_elem,
2288cc555421SMartin KaFai Lau .map_gen_lookup = htab_lru_map_gen_lookup,
2289699c86d6SYonghong Song .map_seq_show_elem = htab_map_seq_show_elem,
2290314ee05eSYonghong Song .map_set_for_each_callback_args = map_set_for_each_callback_args,
2291314ee05eSYonghong Song .map_for_each_callback = bpf_for_each_hash_elem,
2292304849a2SYafang Shao .map_mem_usage = htab_map_mem_usage,
229305799638SYonghong Song BATCH_OPS(htab_lru),
2294c317ab71SMenglong Dong .map_btf_id = &htab_map_btf_ids[0],
2295d6c4503cSYonghong Song .iter_seq_info = &iter_seq_info,
229629ba732aSMartin KaFai Lau };
229729ba732aSMartin KaFai Lau
2298824bd0ceSAlexei Starovoitov /* Called from eBPF program */
htab_percpu_map_lookup_elem(struct bpf_map * map,void * key)2299824bd0ceSAlexei Starovoitov static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
2300824bd0ceSAlexei Starovoitov {
2301824bd0ceSAlexei Starovoitov struct htab_elem *l = __htab_map_lookup_elem(map, key);
2302824bd0ceSAlexei Starovoitov
2303824bd0ceSAlexei Starovoitov if (l)
2304824bd0ceSAlexei Starovoitov return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
2305824bd0ceSAlexei Starovoitov else
2306824bd0ceSAlexei Starovoitov return NULL;
2307824bd0ceSAlexei Starovoitov }
2308824bd0ceSAlexei Starovoitov
23090b56e637SAndrii Nakryiko /* inline bpf_map_lookup_elem() call for per-CPU hashmap */
htab_percpu_map_gen_lookup(struct bpf_map * map,struct bpf_insn * insn_buf)23100b56e637SAndrii Nakryiko static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
23110b56e637SAndrii Nakryiko {
23120b56e637SAndrii Nakryiko struct bpf_insn *insn = insn_buf;
23130b56e637SAndrii Nakryiko
23140b56e637SAndrii Nakryiko if (!bpf_jit_supports_percpu_insn())
23150b56e637SAndrii Nakryiko return -EOPNOTSUPP;
23160b56e637SAndrii Nakryiko
23170b56e637SAndrii Nakryiko BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
23180b56e637SAndrii Nakryiko (void *(*)(struct bpf_map *map, void *key))NULL));
23190b56e637SAndrii Nakryiko *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
23200b56e637SAndrii Nakryiko *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3);
23210b56e637SAndrii Nakryiko *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0,
232211ba7ce0SYonghong Song offsetof(struct htab_elem, key) + roundup(map->key_size, 8));
23230b56e637SAndrii Nakryiko *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
23240b56e637SAndrii Nakryiko *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
23250b56e637SAndrii Nakryiko
23260b56e637SAndrii Nakryiko return insn - insn_buf;
23270b56e637SAndrii Nakryiko }
23280b56e637SAndrii Nakryiko
htab_percpu_map_lookup_percpu_elem(struct bpf_map * map,void * key,u32 cpu)232907343110SFeng Zhou static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
233007343110SFeng Zhou {
233107343110SFeng Zhou struct htab_elem *l;
233207343110SFeng Zhou
233307343110SFeng Zhou if (cpu >= nr_cpu_ids)
233407343110SFeng Zhou return NULL;
233507343110SFeng Zhou
233607343110SFeng Zhou l = __htab_map_lookup_elem(map, key);
233707343110SFeng Zhou if (l)
233807343110SFeng Zhou return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
233907343110SFeng Zhou else
234007343110SFeng Zhou return NULL;
234107343110SFeng Zhou }
234207343110SFeng Zhou
htab_lru_percpu_map_lookup_elem(struct bpf_map * map,void * key)23438f844938SMartin KaFai Lau static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
23448f844938SMartin KaFai Lau {
23458f844938SMartin KaFai Lau struct htab_elem *l = __htab_map_lookup_elem(map, key);
23468f844938SMartin KaFai Lau
23478f844938SMartin KaFai Lau if (l) {
23488f844938SMartin KaFai Lau bpf_lru_node_set_ref(&l->lru_node);
23498f844938SMartin KaFai Lau return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
23508f844938SMartin KaFai Lau }
23518f844938SMartin KaFai Lau
23528f844938SMartin KaFai Lau return NULL;
23538f844938SMartin KaFai Lau }
23548f844938SMartin KaFai Lau
htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map * map,void * key,u32 cpu)235507343110SFeng Zhou static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
235607343110SFeng Zhou {
235707343110SFeng Zhou struct htab_elem *l;
235807343110SFeng Zhou
235907343110SFeng Zhou if (cpu >= nr_cpu_ids)
236007343110SFeng Zhou return NULL;
236107343110SFeng Zhou
236207343110SFeng Zhou l = __htab_map_lookup_elem(map, key);
236307343110SFeng Zhou if (l) {
236407343110SFeng Zhou bpf_lru_node_set_ref(&l->lru_node);
236507343110SFeng Zhou return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
236607343110SFeng Zhou }
236707343110SFeng Zhou
236807343110SFeng Zhou return NULL;
236907343110SFeng Zhou }
237007343110SFeng Zhou
bpf_percpu_hash_copy(struct bpf_map * map,void * key,void * value)237115a07b33SAlexei Starovoitov int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
237215a07b33SAlexei Starovoitov {
237315a07b33SAlexei Starovoitov struct htab_elem *l;
237415a07b33SAlexei Starovoitov void __percpu *pptr;
237515a07b33SAlexei Starovoitov int ret = -ENOENT;
237615a07b33SAlexei Starovoitov int cpu, off = 0;
237715a07b33SAlexei Starovoitov u32 size;
237815a07b33SAlexei Starovoitov
237915a07b33SAlexei Starovoitov /* per_cpu areas are zero-filled and bpf programs can only
238015a07b33SAlexei Starovoitov * access 'value_size' of them, so copying rounded areas
238115a07b33SAlexei Starovoitov * will not leak any kernel data
238215a07b33SAlexei Starovoitov */
238315a07b33SAlexei Starovoitov size = round_up(map->value_size, 8);
238415a07b33SAlexei Starovoitov rcu_read_lock();
238515a07b33SAlexei Starovoitov l = __htab_map_lookup_elem(map, key);
238615a07b33SAlexei Starovoitov if (!l)
238715a07b33SAlexei Starovoitov goto out;
238850b045a8SDaniel Borkmann /* We do not mark LRU map element here in order to not mess up
238950b045a8SDaniel Borkmann * eviction heuristics when user space does a map walk.
239050b045a8SDaniel Borkmann */
239115a07b33SAlexei Starovoitov pptr = htab_elem_get_ptr(l, map->key_size);
239215a07b33SAlexei Starovoitov for_each_possible_cpu(cpu) {
239365334e64SKumar Kartikeya Dwivedi copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
239465334e64SKumar Kartikeya Dwivedi check_and_init_map_value(map, value + off);
239515a07b33SAlexei Starovoitov off += size;
239615a07b33SAlexei Starovoitov }
239715a07b33SAlexei Starovoitov ret = 0;
239815a07b33SAlexei Starovoitov out:
239915a07b33SAlexei Starovoitov rcu_read_unlock();
240015a07b33SAlexei Starovoitov return ret;
240115a07b33SAlexei Starovoitov }
240215a07b33SAlexei Starovoitov
bpf_percpu_hash_update(struct bpf_map * map,void * key,void * value,u64 map_flags)240315a07b33SAlexei Starovoitov int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
240415a07b33SAlexei Starovoitov u64 map_flags)
240515a07b33SAlexei Starovoitov {
24068f844938SMartin KaFai Lau struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
24076bbd9a05SSasha Levin int ret;
24086bbd9a05SSasha Levin
24096bbd9a05SSasha Levin rcu_read_lock();
24108f844938SMartin KaFai Lau if (htab_is_lru(htab))
24118f844938SMartin KaFai Lau ret = __htab_lru_percpu_map_update_elem(map, key, value,
24128f844938SMartin KaFai Lau map_flags, true);
24138f844938SMartin KaFai Lau else
24148f844938SMartin KaFai Lau ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
24158f844938SMartin KaFai Lau true);
24166bbd9a05SSasha Levin rcu_read_unlock();
24176bbd9a05SSasha Levin
24186bbd9a05SSasha Levin return ret;
241915a07b33SAlexei Starovoitov }
242015a07b33SAlexei Starovoitov
htab_percpu_map_seq_show_elem(struct bpf_map * map,void * key,struct seq_file * m)2421c7b27c37SYonghong Song static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
2422c7b27c37SYonghong Song struct seq_file *m)
2423c7b27c37SYonghong Song {
2424c7b27c37SYonghong Song struct htab_elem *l;
2425c7b27c37SYonghong Song void __percpu *pptr;
2426c7b27c37SYonghong Song int cpu;
2427c7b27c37SYonghong Song
2428c7b27c37SYonghong Song rcu_read_lock();
2429c7b27c37SYonghong Song
2430c7b27c37SYonghong Song l = __htab_map_lookup_elem(map, key);
2431c7b27c37SYonghong Song if (!l) {
2432c7b27c37SYonghong Song rcu_read_unlock();
2433c7b27c37SYonghong Song return;
2434c7b27c37SYonghong Song }
2435c7b27c37SYonghong Song
2436c7b27c37SYonghong Song btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
2437c7b27c37SYonghong Song seq_puts(m, ": {\n");
2438c7b27c37SYonghong Song pptr = htab_elem_get_ptr(l, map->key_size);
2439c7b27c37SYonghong Song for_each_possible_cpu(cpu) {
2440c7b27c37SYonghong Song seq_printf(m, "\tcpu%d: ", cpu);
2441c7b27c37SYonghong Song btf_type_seq_show(map->btf, map->btf_value_type_id,
2442c7b27c37SYonghong Song per_cpu_ptr(pptr, cpu), m);
2443df862de4SMarkus Elfring seq_putc(m, '\n');
2444c7b27c37SYonghong Song }
2445c7b27c37SYonghong Song seq_puts(m, "}\n");
2446c7b27c37SYonghong Song
2447c7b27c37SYonghong Song rcu_read_unlock();
2448c7b27c37SYonghong Song }
2449c7b27c37SYonghong Song
245040077e0cSJohannes Berg const struct bpf_map_ops htab_percpu_map_ops = {
2451f4d05259SMartin KaFai Lau .map_meta_equal = bpf_map_meta_equal,
24529328e0d1SJakub Kicinski .map_alloc_check = htab_map_alloc_check,
2453824bd0ceSAlexei Starovoitov .map_alloc = htab_map_alloc,
2454824bd0ceSAlexei Starovoitov .map_free = htab_map_free,
2455824bd0ceSAlexei Starovoitov .map_get_next_key = htab_map_get_next_key,
2456824bd0ceSAlexei Starovoitov .map_lookup_elem = htab_percpu_map_lookup_elem,
24570b56e637SAndrii Nakryiko .map_gen_lookup = htab_percpu_map_gen_lookup,
24583e87f192SDenis Salopek .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
2459824bd0ceSAlexei Starovoitov .map_update_elem = htab_percpu_map_update_elem,
2460824bd0ceSAlexei Starovoitov .map_delete_elem = htab_map_delete_elem,
246107343110SFeng Zhou .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem,
2462c7b27c37SYonghong Song .map_seq_show_elem = htab_percpu_map_seq_show_elem,
2463314ee05eSYonghong Song .map_set_for_each_callback_args = map_set_for_each_callback_args,
2464314ee05eSYonghong Song .map_for_each_callback = bpf_for_each_hash_elem,
2465304849a2SYafang Shao .map_mem_usage = htab_map_mem_usage,
246605799638SYonghong Song BATCH_OPS(htab_percpu),
2467c317ab71SMenglong Dong .map_btf_id = &htab_map_btf_ids[0],
2468d6c4503cSYonghong Song .iter_seq_info = &iter_seq_info,
2469824bd0ceSAlexei Starovoitov };
2470824bd0ceSAlexei Starovoitov
247140077e0cSJohannes Berg const struct bpf_map_ops htab_lru_percpu_map_ops = {
2472f4d05259SMartin KaFai Lau .map_meta_equal = bpf_map_meta_equal,
24739328e0d1SJakub Kicinski .map_alloc_check = htab_map_alloc_check,
24748f844938SMartin KaFai Lau .map_alloc = htab_map_alloc,
24758f844938SMartin KaFai Lau .map_free = htab_map_free,
24768f844938SMartin KaFai Lau .map_get_next_key = htab_map_get_next_key,
24778f844938SMartin KaFai Lau .map_lookup_elem = htab_lru_percpu_map_lookup_elem,
24783e87f192SDenis Salopek .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
24798f844938SMartin KaFai Lau .map_update_elem = htab_lru_percpu_map_update_elem,
24808f844938SMartin KaFai Lau .map_delete_elem = htab_lru_map_delete_elem,
248107343110SFeng Zhou .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem,
2482c7b27c37SYonghong Song .map_seq_show_elem = htab_percpu_map_seq_show_elem,
2483314ee05eSYonghong Song .map_set_for_each_callback_args = map_set_for_each_callback_args,
2484314ee05eSYonghong Song .map_for_each_callback = bpf_for_each_hash_elem,
2485304849a2SYafang Shao .map_mem_usage = htab_map_mem_usage,
248605799638SYonghong Song BATCH_OPS(htab_lru_percpu),
2487c317ab71SMenglong Dong .map_btf_id = &htab_map_btf_ids[0],
2488d6c4503cSYonghong Song .iter_seq_info = &iter_seq_info,
24898f844938SMartin KaFai Lau };
24908f844938SMartin KaFai Lau
fd_htab_map_alloc_check(union bpf_attr * attr)24919328e0d1SJakub Kicinski static int fd_htab_map_alloc_check(union bpf_attr *attr)
2492bcc6b1b7SMartin KaFai Lau {
2493bcc6b1b7SMartin KaFai Lau if (attr->value_size != sizeof(u32))
24949328e0d1SJakub Kicinski return -EINVAL;
24959328e0d1SJakub Kicinski return htab_map_alloc_check(attr);
2496bcc6b1b7SMartin KaFai Lau }
2497bcc6b1b7SMartin KaFai Lau
fd_htab_map_free(struct bpf_map * map)2498bcc6b1b7SMartin KaFai Lau static void fd_htab_map_free(struct bpf_map *map)
2499bcc6b1b7SMartin KaFai Lau {
2500bcc6b1b7SMartin KaFai Lau struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2501bcc6b1b7SMartin KaFai Lau struct hlist_nulls_node *n;
2502bcc6b1b7SMartin KaFai Lau struct hlist_nulls_head *head;
2503bcc6b1b7SMartin KaFai Lau struct htab_elem *l;
2504bcc6b1b7SMartin KaFai Lau int i;
2505bcc6b1b7SMartin KaFai Lau
2506bcc6b1b7SMartin KaFai Lau for (i = 0; i < htab->n_buckets; i++) {
2507bcc6b1b7SMartin KaFai Lau head = select_bucket(htab, i);
2508bcc6b1b7SMartin KaFai Lau
2509bcc6b1b7SMartin KaFai Lau hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
2510bcc6b1b7SMartin KaFai Lau void *ptr = fd_htab_map_get_ptr(map, l);
2511bcc6b1b7SMartin KaFai Lau
251220c20bd1SHou Tao map->ops->map_fd_put_ptr(map, ptr, false);
2513bcc6b1b7SMartin KaFai Lau }
2514bcc6b1b7SMartin KaFai Lau }
2515bcc6b1b7SMartin KaFai Lau
2516bcc6b1b7SMartin KaFai Lau htab_map_free(map);
2517bcc6b1b7SMartin KaFai Lau }
2518bcc6b1b7SMartin KaFai Lau
2519bcc6b1b7SMartin KaFai Lau /* only called from syscall */
bpf_fd_htab_map_lookup_elem(struct bpf_map * map,void * key,u32 * value)252014dc6f04SMartin KaFai Lau int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
252114dc6f04SMartin KaFai Lau {
252214dc6f04SMartin KaFai Lau void **ptr;
252314dc6f04SMartin KaFai Lau int ret = 0;
252414dc6f04SMartin KaFai Lau
252514dc6f04SMartin KaFai Lau if (!map->ops->map_fd_sys_lookup_elem)
252614dc6f04SMartin KaFai Lau return -ENOTSUPP;
252714dc6f04SMartin KaFai Lau
252814dc6f04SMartin KaFai Lau rcu_read_lock();
252914dc6f04SMartin KaFai Lau ptr = htab_map_lookup_elem(map, key);
253014dc6f04SMartin KaFai Lau if (ptr)
253114dc6f04SMartin KaFai Lau *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
253214dc6f04SMartin KaFai Lau else
253314dc6f04SMartin KaFai Lau ret = -ENOENT;
253414dc6f04SMartin KaFai Lau rcu_read_unlock();
253514dc6f04SMartin KaFai Lau
253614dc6f04SMartin KaFai Lau return ret;
253714dc6f04SMartin KaFai Lau }
253814dc6f04SMartin KaFai Lau
253914dc6f04SMartin KaFai Lau /* only called from syscall */
bpf_fd_htab_map_update_elem(struct bpf_map * map,struct file * map_file,void * key,void * value,u64 map_flags)2540bcc6b1b7SMartin KaFai Lau int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
2541bcc6b1b7SMartin KaFai Lau void *key, void *value, u64 map_flags)
2542bcc6b1b7SMartin KaFai Lau {
2543bcc6b1b7SMartin KaFai Lau void *ptr;
2544bcc6b1b7SMartin KaFai Lau int ret;
2545bcc6b1b7SMartin KaFai Lau u32 ufd = *(u32 *)value;
2546bcc6b1b7SMartin KaFai Lau
2547bcc6b1b7SMartin KaFai Lau ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
2548bcc6b1b7SMartin KaFai Lau if (IS_ERR(ptr))
2549bcc6b1b7SMartin KaFai Lau return PTR_ERR(ptr);
2550bcc6b1b7SMartin KaFai Lau
25518f82583fSHou Tao /* The htab bucket lock is always held during update operations in fd
25528f82583fSHou Tao * htab map, and the following rcu_read_lock() is only used to avoid
25538f82583fSHou Tao * the WARN_ON_ONCE in htab_map_update_elem().
25548f82583fSHou Tao */
25558f82583fSHou Tao rcu_read_lock();
2556bcc6b1b7SMartin KaFai Lau ret = htab_map_update_elem(map, key, &ptr, map_flags);
25578f82583fSHou Tao rcu_read_unlock();
2558bcc6b1b7SMartin KaFai Lau if (ret)
255920c20bd1SHou Tao map->ops->map_fd_put_ptr(map, ptr, false);
2560bcc6b1b7SMartin KaFai Lau
2561bcc6b1b7SMartin KaFai Lau return ret;
2562bcc6b1b7SMartin KaFai Lau }
2563bcc6b1b7SMartin KaFai Lau
htab_of_map_alloc(union bpf_attr * attr)2564bcc6b1b7SMartin KaFai Lau static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
2565bcc6b1b7SMartin KaFai Lau {
2566bcc6b1b7SMartin KaFai Lau struct bpf_map *map, *inner_map_meta;
2567bcc6b1b7SMartin KaFai Lau
2568bcc6b1b7SMartin KaFai Lau inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
2569bcc6b1b7SMartin KaFai Lau if (IS_ERR(inner_map_meta))
2570bcc6b1b7SMartin KaFai Lau return inner_map_meta;
2571bcc6b1b7SMartin KaFai Lau
25729328e0d1SJakub Kicinski map = htab_map_alloc(attr);
2573bcc6b1b7SMartin KaFai Lau if (IS_ERR(map)) {
2574bcc6b1b7SMartin KaFai Lau bpf_map_meta_free(inner_map_meta);
2575bcc6b1b7SMartin KaFai Lau return map;
2576bcc6b1b7SMartin KaFai Lau }
2577bcc6b1b7SMartin KaFai Lau
2578bcc6b1b7SMartin KaFai Lau map->inner_map_meta = inner_map_meta;
2579bcc6b1b7SMartin KaFai Lau
2580bcc6b1b7SMartin KaFai Lau return map;
2581bcc6b1b7SMartin KaFai Lau }
2582bcc6b1b7SMartin KaFai Lau
htab_of_map_lookup_elem(struct bpf_map * map,void * key)2583bcc6b1b7SMartin KaFai Lau static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
2584bcc6b1b7SMartin KaFai Lau {
2585bcc6b1b7SMartin KaFai Lau struct bpf_map **inner_map = htab_map_lookup_elem(map, key);
2586bcc6b1b7SMartin KaFai Lau
2587bcc6b1b7SMartin KaFai Lau if (!inner_map)
2588bcc6b1b7SMartin KaFai Lau return NULL;
2589bcc6b1b7SMartin KaFai Lau
2590bcc6b1b7SMartin KaFai Lau return READ_ONCE(*inner_map);
2591bcc6b1b7SMartin KaFai Lau }
2592bcc6b1b7SMartin KaFai Lau
htab_of_map_gen_lookup(struct bpf_map * map,struct bpf_insn * insn_buf)25934a8f87e6SDaniel Borkmann static int htab_of_map_gen_lookup(struct bpf_map *map,
25947b0c2a05SDaniel Borkmann struct bpf_insn *insn_buf)
25957b0c2a05SDaniel Borkmann {
25967b0c2a05SDaniel Borkmann struct bpf_insn *insn = insn_buf;
25977b0c2a05SDaniel Borkmann const int ret = BPF_REG_0;
25987b0c2a05SDaniel Borkmann
259909772d92SDaniel Borkmann BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
260009772d92SDaniel Borkmann (void *(*)(struct bpf_map *map, void *key))NULL));
26013d717fadSKees Cook *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
26027b0c2a05SDaniel Borkmann *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
26037b0c2a05SDaniel Borkmann *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
26047b0c2a05SDaniel Borkmann offsetof(struct htab_elem, key) +
26057b0c2a05SDaniel Borkmann round_up(map->key_size, 8));
26067b0c2a05SDaniel Borkmann *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
26077b0c2a05SDaniel Borkmann
26087b0c2a05SDaniel Borkmann return insn - insn_buf;
26097b0c2a05SDaniel Borkmann }
26107b0c2a05SDaniel Borkmann
htab_of_map_free(struct bpf_map * map)2611bcc6b1b7SMartin KaFai Lau static void htab_of_map_free(struct bpf_map *map)
2612bcc6b1b7SMartin KaFai Lau {
2613bcc6b1b7SMartin KaFai Lau bpf_map_meta_free(map->inner_map_meta);
2614bcc6b1b7SMartin KaFai Lau fd_htab_map_free(map);
2615bcc6b1b7SMartin KaFai Lau }
2616bcc6b1b7SMartin KaFai Lau
261740077e0cSJohannes Berg const struct bpf_map_ops htab_of_maps_map_ops = {
26189328e0d1SJakub Kicinski .map_alloc_check = fd_htab_map_alloc_check,
2619bcc6b1b7SMartin KaFai Lau .map_alloc = htab_of_map_alloc,
2620bcc6b1b7SMartin KaFai Lau .map_free = htab_of_map_free,
2621bcc6b1b7SMartin KaFai Lau .map_get_next_key = htab_map_get_next_key,
2622bcc6b1b7SMartin KaFai Lau .map_lookup_elem = htab_of_map_lookup_elem,
2623bcc6b1b7SMartin KaFai Lau .map_delete_elem = htab_map_delete_elem,
2624bcc6b1b7SMartin KaFai Lau .map_fd_get_ptr = bpf_map_fd_get_ptr,
2625bcc6b1b7SMartin KaFai Lau .map_fd_put_ptr = bpf_map_fd_put_ptr,
262614dc6f04SMartin KaFai Lau .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
26277b0c2a05SDaniel Borkmann .map_gen_lookup = htab_of_map_gen_lookup,
2628e8d2bec0SDaniel Borkmann .map_check_btf = map_check_no_btf,
2629304849a2SYafang Shao .map_mem_usage = htab_map_mem_usage,
26309263dddcSTakshak Chahande BATCH_OPS(htab),
2631c317ab71SMenglong Dong .map_btf_id = &htab_map_btf_ids[0],
2632bcc6b1b7SMartin KaFai Lau };
2633