15b497af4SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2546ac1ffSJohn Fastabend /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io 3546ac1ffSJohn Fastabend */ 4546ac1ffSJohn Fastabend 5546ac1ffSJohn Fastabend /* Devmaps primary use is as a backend map for XDP BPF helper call 6546ac1ffSJohn Fastabend * bpf_redirect_map(). Because XDP is mostly concerned with performance we 7546ac1ffSJohn Fastabend * spent some effort to ensure the datapath with redirect maps does not use 8546ac1ffSJohn Fastabend * any locking. This is a quick note on the details. 9546ac1ffSJohn Fastabend * 10546ac1ffSJohn Fastabend * We have three possible paths to get into the devmap control plane bpf 11546ac1ffSJohn Fastabend * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall 12546ac1ffSJohn Fastabend * will invoke an update, delete, or lookup operation. To ensure updates and 13546ac1ffSJohn Fastabend * deletes appear atomic from the datapath side xchg() is used to modify the 14546ac1ffSJohn Fastabend * netdev_map array. Then because the datapath does a lookup into the netdev_map 15546ac1ffSJohn Fastabend * array (read-only) from an RCU critical section we use call_rcu() to wait for 16546ac1ffSJohn Fastabend * an rcu grace period before free'ing the old data structures. This ensures the 17546ac1ffSJohn Fastabend * datapath always has a valid copy. However, the datapath does a "flush" 18546ac1ffSJohn Fastabend * operation that pushes any pending packets in the driver outside the RCU 19546ac1ffSJohn Fastabend * critical section. Each bpf_dtab_netdev tracks these pending operations using 20d5df2830SToke Høiland-Jørgensen * a per-cpu flush list. The bpf_dtab_netdev object will not be destroyed until 21d5df2830SToke Høiland-Jørgensen * this list is empty, indicating outstanding flush operations have completed. 22546ac1ffSJohn Fastabend * 23546ac1ffSJohn Fastabend * BPF syscalls may race with BPF program calls on any of the update, delete 24546ac1ffSJohn Fastabend * or lookup operations. As noted above the xchg() operation also keep the 25546ac1ffSJohn Fastabend * netdev_map consistent in this case. From the devmap side BPF programs 26546ac1ffSJohn Fastabend * calling into these operations are the same as multiple user space threads 27546ac1ffSJohn Fastabend * making system calls. 282ddf71e2SJohn Fastabend * 292ddf71e2SJohn Fastabend * Finally, any of the above may race with a netdev_unregister notifier. The 302ddf71e2SJohn Fastabend * unregister notifier must search for net devices in the map structure that 312ddf71e2SJohn Fastabend * contain a reference to the net device and remove them. This is a two step 322ddf71e2SJohn Fastabend * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b) 332ddf71e2SJohn Fastabend * check to see if the ifindex is the same as the net_device being removed. 344cc7b954SJohn Fastabend * When removing the dev a cmpxchg() is used to ensure the correct dev is 354cc7b954SJohn Fastabend * removed, in the case of a concurrent update or delete operation it is 364cc7b954SJohn Fastabend * possible that the initially referenced dev is no longer in the map. As the 374cc7b954SJohn Fastabend * notifier hook walks the map we know that new dev references can not be 384cc7b954SJohn Fastabend * added by the user because core infrastructure ensures dev_get_by_index() 394cc7b954SJohn Fastabend * calls will fail at this point. 406f9d451aSToke Høiland-Jørgensen * 416f9d451aSToke Høiland-Jørgensen * The devmap_hash type is a map type which interprets keys as ifindexes and 426f9d451aSToke Høiland-Jørgensen * indexes these using a hashmap. This allows maps that use ifindex as key to be 436f9d451aSToke Høiland-Jørgensen * densely packed instead of having holes in the lookup array for unused 446f9d451aSToke Høiland-Jørgensen * ifindexes. The setup and packet enqueue/send code is shared between the two 456f9d451aSToke Høiland-Jørgensen * types of devmap; only the lookup and insertion is different. 46546ac1ffSJohn Fastabend */ 47546ac1ffSJohn Fastabend #include <linux/bpf.h> 4867f29e07SJesper Dangaard Brouer #include <net/xdp.h> 49546ac1ffSJohn Fastabend #include <linux/filter.h> 5067f29e07SJesper Dangaard Brouer #include <trace/events/xdp.h> 51546ac1ffSJohn Fastabend 526e71b04aSChenbo Feng #define DEV_CREATE_FLAG_MASK \ 536e71b04aSChenbo Feng (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 546e71b04aSChenbo Feng 555d053f9dSJesper Dangaard Brouer #define DEV_MAP_BULK_SIZE 16 5675ccae62SToke Høiland-Jørgensen struct xdp_dev_bulk_queue { 575d053f9dSJesper Dangaard Brouer struct xdp_frame *q[DEV_MAP_BULK_SIZE]; 58d5df2830SToke Høiland-Jørgensen struct list_head flush_node; 5975ccae62SToke Høiland-Jørgensen struct net_device *dev; 6038edddb8SJesper Dangaard Brouer struct net_device *dev_rx; 615d053f9dSJesper Dangaard Brouer unsigned int count; 625d053f9dSJesper Dangaard Brouer }; 635d053f9dSJesper Dangaard Brouer 64546ac1ffSJohn Fastabend struct bpf_dtab_netdev { 6567f29e07SJesper Dangaard Brouer struct net_device *dev; /* must be first member, due to tracepoint */ 666f9d451aSToke Høiland-Jørgensen struct hlist_node index_hlist; 67546ac1ffSJohn Fastabend struct bpf_dtab *dtab; 68af4d045cSDaniel Borkmann struct rcu_head rcu; 6975ccae62SToke Høiland-Jørgensen unsigned int idx; 70546ac1ffSJohn Fastabend }; 71546ac1ffSJohn Fastabend 72546ac1ffSJohn Fastabend struct bpf_dtab { 73546ac1ffSJohn Fastabend struct bpf_map map; 74071cdeceSToke Høiland-Jørgensen struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */ 752ddf71e2SJohn Fastabend struct list_head list; 766f9d451aSToke Høiland-Jørgensen 776f9d451aSToke Høiland-Jørgensen /* these are only used for DEVMAP_HASH type maps */ 786f9d451aSToke Høiland-Jørgensen struct hlist_head *dev_index_head; 796f9d451aSToke Høiland-Jørgensen spinlock_t index_lock; 806f9d451aSToke Høiland-Jørgensen unsigned int items; 816f9d451aSToke Høiland-Jørgensen u32 n_buckets; 82546ac1ffSJohn Fastabend }; 83546ac1ffSJohn Fastabend 841d233886SToke Høiland-Jørgensen static DEFINE_PER_CPU(struct list_head, dev_flush_list); 854cc7b954SJohn Fastabend static DEFINE_SPINLOCK(dev_map_lock); 862ddf71e2SJohn Fastabend static LIST_HEAD(dev_map_list); 872ddf71e2SJohn Fastabend 886f9d451aSToke Høiland-Jørgensen static struct hlist_head *dev_map_create_hash(unsigned int entries) 896f9d451aSToke Høiland-Jørgensen { 906f9d451aSToke Høiland-Jørgensen int i; 916f9d451aSToke Høiland-Jørgensen struct hlist_head *hash; 926f9d451aSToke Høiland-Jørgensen 936f9d451aSToke Høiland-Jørgensen hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL); 946f9d451aSToke Høiland-Jørgensen if (hash != NULL) 956f9d451aSToke Høiland-Jørgensen for (i = 0; i < entries; i++) 966f9d451aSToke Høiland-Jørgensen INIT_HLIST_HEAD(&hash[i]); 976f9d451aSToke Høiland-Jørgensen 986f9d451aSToke Høiland-Jørgensen return hash; 996f9d451aSToke Høiland-Jørgensen } 1006f9d451aSToke Høiland-Jørgensen 101071cdeceSToke Høiland-Jørgensen static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, 102071cdeceSToke Høiland-Jørgensen int idx) 103071cdeceSToke Høiland-Jørgensen { 104071cdeceSToke Høiland-Jørgensen return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)]; 105071cdeceSToke Høiland-Jørgensen } 106071cdeceSToke Høiland-Jørgensen 107fca16e51SToke Høiland-Jørgensen static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) 108546ac1ffSJohn Fastabend { 10996360004SBjörn Töpel u64 cost = 0; 11096360004SBjörn Töpel int err; 111546ac1ffSJohn Fastabend 112546ac1ffSJohn Fastabend /* check sanity of attributes */ 113546ac1ffSJohn Fastabend if (attr->max_entries == 0 || attr->key_size != 4 || 1146e71b04aSChenbo Feng attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) 115fca16e51SToke Høiland-Jørgensen return -EINVAL; 116546ac1ffSJohn Fastabend 1170cdbb4b0SToke Høiland-Jørgensen /* Lookup returns a pointer straight to dev->ifindex, so make sure the 1180cdbb4b0SToke Høiland-Jørgensen * verifier prevents writes from the BPF side 1190cdbb4b0SToke Høiland-Jørgensen */ 1200cdbb4b0SToke Høiland-Jørgensen attr->map_flags |= BPF_F_RDONLY_PROG; 1210cdbb4b0SToke Høiland-Jørgensen 122546ac1ffSJohn Fastabend 123bd475643SJakub Kicinski bpf_map_init_from_attr(&dtab->map, attr); 124546ac1ffSJohn Fastabend 1256f9d451aSToke Høiland-Jørgensen if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 1266f9d451aSToke Høiland-Jørgensen dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); 1276f9d451aSToke Høiland-Jørgensen 1286f9d451aSToke Høiland-Jørgensen if (!dtab->n_buckets) /* Overflow check */ 1296f9d451aSToke Høiland-Jørgensen return -EINVAL; 13005679ca6SToke Høiland-Jørgensen cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets; 131071cdeceSToke Høiland-Jørgensen } else { 132071cdeceSToke Høiland-Jørgensen cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); 1336f9d451aSToke Høiland-Jørgensen } 1346f9d451aSToke Høiland-Jørgensen 135b936ca64SRoman Gushchin /* if map size is larger than memlock limit, reject it */ 136c85d6913SRoman Gushchin err = bpf_map_charge_init(&dtab->map.memory, cost); 137546ac1ffSJohn Fastabend if (err) 138fca16e51SToke Høiland-Jørgensen return -EINVAL; 139582db7e0STobias Klauser 140071cdeceSToke Høiland-Jørgensen if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 141071cdeceSToke Høiland-Jørgensen dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets); 142071cdeceSToke Høiland-Jørgensen if (!dtab->dev_index_head) 14396360004SBjörn Töpel goto free_charge; 144071cdeceSToke Høiland-Jørgensen 145071cdeceSToke Høiland-Jørgensen spin_lock_init(&dtab->index_lock); 146071cdeceSToke Høiland-Jørgensen } else { 147546ac1ffSJohn Fastabend dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * 14896eabe7aSMartin KaFai Lau sizeof(struct bpf_dtab_netdev *), 14996eabe7aSMartin KaFai Lau dtab->map.numa_node); 150546ac1ffSJohn Fastabend if (!dtab->netdev_map) 15196360004SBjörn Töpel goto free_charge; 1526f9d451aSToke Høiland-Jørgensen } 1536f9d451aSToke Høiland-Jørgensen 154fca16e51SToke Høiland-Jørgensen return 0; 155d5df2830SToke Høiland-Jørgensen 156b936ca64SRoman Gushchin free_charge: 157b936ca64SRoman Gushchin bpf_map_charge_finish(&dtab->map.memory); 158fca16e51SToke Høiland-Jørgensen return -ENOMEM; 159fca16e51SToke Høiland-Jørgensen } 160fca16e51SToke Høiland-Jørgensen 161fca16e51SToke Høiland-Jørgensen static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 162fca16e51SToke Høiland-Jørgensen { 163fca16e51SToke Høiland-Jørgensen struct bpf_dtab *dtab; 164fca16e51SToke Høiland-Jørgensen int err; 165fca16e51SToke Høiland-Jørgensen 166fca16e51SToke Høiland-Jørgensen if (!capable(CAP_NET_ADMIN)) 167fca16e51SToke Høiland-Jørgensen return ERR_PTR(-EPERM); 168fca16e51SToke Høiland-Jørgensen 169fca16e51SToke Høiland-Jørgensen dtab = kzalloc(sizeof(*dtab), GFP_USER); 170fca16e51SToke Høiland-Jørgensen if (!dtab) 171fca16e51SToke Høiland-Jørgensen return ERR_PTR(-ENOMEM); 172fca16e51SToke Høiland-Jørgensen 173fca16e51SToke Høiland-Jørgensen err = dev_map_init_map(dtab, attr); 174fca16e51SToke Høiland-Jørgensen if (err) { 175546ac1ffSJohn Fastabend kfree(dtab); 176582db7e0STobias Klauser return ERR_PTR(err); 177546ac1ffSJohn Fastabend } 178546ac1ffSJohn Fastabend 179fca16e51SToke Høiland-Jørgensen spin_lock(&dev_map_lock); 180fca16e51SToke Høiland-Jørgensen list_add_tail_rcu(&dtab->list, &dev_map_list); 181fca16e51SToke Høiland-Jørgensen spin_unlock(&dev_map_lock); 182fca16e51SToke Høiland-Jørgensen 183fca16e51SToke Høiland-Jørgensen return &dtab->map; 184fca16e51SToke Høiland-Jørgensen } 185fca16e51SToke Høiland-Jørgensen 186546ac1ffSJohn Fastabend static void dev_map_free(struct bpf_map *map) 187546ac1ffSJohn Fastabend { 188546ac1ffSJohn Fastabend struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 1890536b852SBjörn Töpel int i; 190546ac1ffSJohn Fastabend 191546ac1ffSJohn Fastabend /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 192546ac1ffSJohn Fastabend * so the programs (can be more than one that used this map) were 19342a84a8cSJohn Fastabend * disconnected from events. The following synchronize_rcu() guarantees 19442a84a8cSJohn Fastabend * both rcu read critical sections complete and waits for 19542a84a8cSJohn Fastabend * preempt-disable regions (NAPI being the relevant context here) so we 19642a84a8cSJohn Fastabend * are certain there will be no further reads against the netdev_map and 19742a84a8cSJohn Fastabend * all flush operations are complete. Flush operations can only be done 19842a84a8cSJohn Fastabend * from NAPI context for this reason. 199546ac1ffSJohn Fastabend */ 200274043c6SDaniel Borkmann 201274043c6SDaniel Borkmann spin_lock(&dev_map_lock); 202274043c6SDaniel Borkmann list_del_rcu(&dtab->list); 203274043c6SDaniel Borkmann spin_unlock(&dev_map_lock); 204274043c6SDaniel Borkmann 205f6069b9aSDaniel Borkmann bpf_clear_redirect_map(map); 206546ac1ffSJohn Fastabend synchronize_rcu(); 207546ac1ffSJohn Fastabend 2082baae354SEric Dumazet /* Make sure prior __dev_map_entry_free() have completed. */ 2092baae354SEric Dumazet rcu_barrier(); 2102baae354SEric Dumazet 211071cdeceSToke Høiland-Jørgensen if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 212071cdeceSToke Høiland-Jørgensen for (i = 0; i < dtab->n_buckets; i++) { 213071cdeceSToke Høiland-Jørgensen struct bpf_dtab_netdev *dev; 214071cdeceSToke Høiland-Jørgensen struct hlist_head *head; 215071cdeceSToke Høiland-Jørgensen struct hlist_node *next; 216071cdeceSToke Høiland-Jørgensen 217071cdeceSToke Høiland-Jørgensen head = dev_map_index_hash(dtab, i); 218071cdeceSToke Høiland-Jørgensen 219071cdeceSToke Høiland-Jørgensen hlist_for_each_entry_safe(dev, next, head, index_hlist) { 220071cdeceSToke Høiland-Jørgensen hlist_del_rcu(&dev->index_hlist); 221071cdeceSToke Høiland-Jørgensen dev_put(dev->dev); 222071cdeceSToke Høiland-Jørgensen kfree(dev); 223071cdeceSToke Høiland-Jørgensen } 224071cdeceSToke Høiland-Jørgensen } 225071cdeceSToke Høiland-Jørgensen 226071cdeceSToke Høiland-Jørgensen kfree(dtab->dev_index_head); 227071cdeceSToke Høiland-Jørgensen } else { 228546ac1ffSJohn Fastabend for (i = 0; i < dtab->map.max_entries; i++) { 229546ac1ffSJohn Fastabend struct bpf_dtab_netdev *dev; 230546ac1ffSJohn Fastabend 231546ac1ffSJohn Fastabend dev = dtab->netdev_map[i]; 232546ac1ffSJohn Fastabend if (!dev) 233546ac1ffSJohn Fastabend continue; 234546ac1ffSJohn Fastabend 235546ac1ffSJohn Fastabend dev_put(dev->dev); 236546ac1ffSJohn Fastabend kfree(dev); 237546ac1ffSJohn Fastabend } 238546ac1ffSJohn Fastabend 239546ac1ffSJohn Fastabend bpf_map_area_free(dtab->netdev_map); 240071cdeceSToke Høiland-Jørgensen } 241071cdeceSToke Høiland-Jørgensen 242546ac1ffSJohn Fastabend kfree(dtab); 243546ac1ffSJohn Fastabend } 244546ac1ffSJohn Fastabend 245546ac1ffSJohn Fastabend static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 246546ac1ffSJohn Fastabend { 247546ac1ffSJohn Fastabend struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 248546ac1ffSJohn Fastabend u32 index = key ? *(u32 *)key : U32_MAX; 249af4d045cSDaniel Borkmann u32 *next = next_key; 250546ac1ffSJohn Fastabend 251546ac1ffSJohn Fastabend if (index >= dtab->map.max_entries) { 252546ac1ffSJohn Fastabend *next = 0; 253546ac1ffSJohn Fastabend return 0; 254546ac1ffSJohn Fastabend } 255546ac1ffSJohn Fastabend 256546ac1ffSJohn Fastabend if (index == dtab->map.max_entries - 1) 257546ac1ffSJohn Fastabend return -ENOENT; 258546ac1ffSJohn Fastabend *next = index + 1; 259546ac1ffSJohn Fastabend return 0; 260546ac1ffSJohn Fastabend } 261546ac1ffSJohn Fastabend 2626f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) 2636f9d451aSToke Høiland-Jørgensen { 2646f9d451aSToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 2656f9d451aSToke Høiland-Jørgensen struct hlist_head *head = dev_map_index_hash(dtab, key); 2666f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *dev; 2676f9d451aSToke Høiland-Jørgensen 268485ec2eaSAmol Grover hlist_for_each_entry_rcu(dev, head, index_hlist, 269485ec2eaSAmol Grover lockdep_is_held(&dtab->index_lock)) 2706f9d451aSToke Høiland-Jørgensen if (dev->idx == key) 2716f9d451aSToke Høiland-Jørgensen return dev; 2726f9d451aSToke Høiland-Jørgensen 2736f9d451aSToke Høiland-Jørgensen return NULL; 2746f9d451aSToke Høiland-Jørgensen } 2756f9d451aSToke Høiland-Jørgensen 2766f9d451aSToke Høiland-Jørgensen static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, 2776f9d451aSToke Høiland-Jørgensen void *next_key) 2786f9d451aSToke Høiland-Jørgensen { 2796f9d451aSToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 2806f9d451aSToke Høiland-Jørgensen u32 idx, *next = next_key; 2816f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *dev, *next_dev; 2826f9d451aSToke Høiland-Jørgensen struct hlist_head *head; 2836f9d451aSToke Høiland-Jørgensen int i = 0; 2846f9d451aSToke Høiland-Jørgensen 2856f9d451aSToke Høiland-Jørgensen if (!key) 2866f9d451aSToke Høiland-Jørgensen goto find_first; 2876f9d451aSToke Høiland-Jørgensen 2886f9d451aSToke Høiland-Jørgensen idx = *(u32 *)key; 2896f9d451aSToke Høiland-Jørgensen 2906f9d451aSToke Høiland-Jørgensen dev = __dev_map_hash_lookup_elem(map, idx); 2916f9d451aSToke Høiland-Jørgensen if (!dev) 2926f9d451aSToke Høiland-Jørgensen goto find_first; 2936f9d451aSToke Høiland-Jørgensen 2946f9d451aSToke Høiland-Jørgensen next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)), 2956f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev, index_hlist); 2966f9d451aSToke Høiland-Jørgensen 2976f9d451aSToke Høiland-Jørgensen if (next_dev) { 2986f9d451aSToke Høiland-Jørgensen *next = next_dev->idx; 2996f9d451aSToke Høiland-Jørgensen return 0; 3006f9d451aSToke Høiland-Jørgensen } 3016f9d451aSToke Høiland-Jørgensen 3026f9d451aSToke Høiland-Jørgensen i = idx & (dtab->n_buckets - 1); 3036f9d451aSToke Høiland-Jørgensen i++; 3046f9d451aSToke Høiland-Jørgensen 3056f9d451aSToke Høiland-Jørgensen find_first: 3066f9d451aSToke Høiland-Jørgensen for (; i < dtab->n_buckets; i++) { 3076f9d451aSToke Høiland-Jørgensen head = dev_map_index_hash(dtab, i); 3086f9d451aSToke Høiland-Jørgensen 3096f9d451aSToke Høiland-Jørgensen next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), 3106f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev, 3116f9d451aSToke Høiland-Jørgensen index_hlist); 3126f9d451aSToke Høiland-Jørgensen if (next_dev) { 3136f9d451aSToke Høiland-Jørgensen *next = next_dev->idx; 3146f9d451aSToke Høiland-Jørgensen return 0; 3156f9d451aSToke Høiland-Jørgensen } 3166f9d451aSToke Høiland-Jørgensen } 3176f9d451aSToke Høiland-Jørgensen 3186f9d451aSToke Høiland-Jørgensen return -ENOENT; 3196f9d451aSToke Høiland-Jørgensen } 3206f9d451aSToke Høiland-Jørgensen 32175ccae62SToke Høiland-Jørgensen static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) 3225d053f9dSJesper Dangaard Brouer { 32375ccae62SToke Høiland-Jørgensen struct net_device *dev = bq->dev; 324e74de52eSJesper Dangaard Brouer int sent = 0, drops = 0, err = 0; 3255d053f9dSJesper Dangaard Brouer int i; 3265d053f9dSJesper Dangaard Brouer 3275d053f9dSJesper Dangaard Brouer if (unlikely(!bq->count)) 3285d053f9dSJesper Dangaard Brouer return 0; 3295d053f9dSJesper Dangaard Brouer 3305d053f9dSJesper Dangaard Brouer for (i = 0; i < bq->count; i++) { 3315d053f9dSJesper Dangaard Brouer struct xdp_frame *xdpf = bq->q[i]; 3325d053f9dSJesper Dangaard Brouer 3335d053f9dSJesper Dangaard Brouer prefetch(xdpf); 3345d053f9dSJesper Dangaard Brouer } 3355d053f9dSJesper Dangaard Brouer 336c1ece6b2SJesper Dangaard Brouer sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags); 337735fc405SJesper Dangaard Brouer if (sent < 0) { 338e74de52eSJesper Dangaard Brouer err = sent; 339735fc405SJesper Dangaard Brouer sent = 0; 340735fc405SJesper Dangaard Brouer goto error; 34138edddb8SJesper Dangaard Brouer } 342735fc405SJesper Dangaard Brouer drops = bq->count - sent; 343735fc405SJesper Dangaard Brouer out: 3445d053f9dSJesper Dangaard Brouer bq->count = 0; 3455d053f9dSJesper Dangaard Brouer 34658aa94f9SJesper Dangaard Brouer trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err); 34738edddb8SJesper Dangaard Brouer bq->dev_rx = NULL; 348d5df2830SToke Høiland-Jørgensen __list_del_clearprev(&bq->flush_node); 3495d053f9dSJesper Dangaard Brouer return 0; 350735fc405SJesper Dangaard Brouer error: 351735fc405SJesper Dangaard Brouer /* If ndo_xdp_xmit fails with an errno, no frames have been 352735fc405SJesper Dangaard Brouer * xmit'ed and it's our responsibility to them free all. 353735fc405SJesper Dangaard Brouer */ 354735fc405SJesper Dangaard Brouer for (i = 0; i < bq->count; i++) { 355735fc405SJesper Dangaard Brouer struct xdp_frame *xdpf = bq->q[i]; 356735fc405SJesper Dangaard Brouer 357735fc405SJesper Dangaard Brouer xdp_return_frame_rx_napi(xdpf); 358735fc405SJesper Dangaard Brouer drops++; 359735fc405SJesper Dangaard Brouer } 360735fc405SJesper Dangaard Brouer goto out; 3615d053f9dSJesper Dangaard Brouer } 3625d053f9dSJesper Dangaard Brouer 3631d233886SToke Høiland-Jørgensen /* __dev_flush is called from xdp_do_flush() which _must_ be signaled 36411393cc9SJohn Fastabend * from the driver before returning from its napi->poll() routine. The poll() 36511393cc9SJohn Fastabend * routine is called either from busy_poll context or net_rx_action signaled 36611393cc9SJohn Fastabend * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the 367d5df2830SToke Høiland-Jørgensen * net device can be torn down. On devmap tear down we ensure the flush list 368d5df2830SToke Høiland-Jørgensen * is empty before completing to ensure all flush operations have completed. 369*b23bfa56SJohn Fastabend * When drivers update the bpf program they may need to ensure any flush ops 370*b23bfa56SJohn Fastabend * are also complete. Using synchronize_rcu or call_rcu will suffice for this 371*b23bfa56SJohn Fastabend * because both wait for napi context to exit. 37211393cc9SJohn Fastabend */ 3731d233886SToke Høiland-Jørgensen void __dev_flush(void) 37411393cc9SJohn Fastabend { 3751d233886SToke Høiland-Jørgensen struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); 37675ccae62SToke Høiland-Jørgensen struct xdp_dev_bulk_queue *bq, *tmp; 37711393cc9SJohn Fastabend 378d5df2830SToke Høiland-Jørgensen list_for_each_entry_safe(bq, tmp, flush_list, flush_node) 3790536b852SBjörn Töpel bq_xmit_all(bq, XDP_XMIT_FLUSH); 38011393cc9SJohn Fastabend } 38111393cc9SJohn Fastabend 382546ac1ffSJohn Fastabend /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or 383546ac1ffSJohn Fastabend * update happens in parallel here a dev_put wont happen until after reading the 384546ac1ffSJohn Fastabend * ifindex. 385546ac1ffSJohn Fastabend */ 38667f29e07SJesper Dangaard Brouer struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) 387546ac1ffSJohn Fastabend { 388546ac1ffSJohn Fastabend struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 38967f29e07SJesper Dangaard Brouer struct bpf_dtab_netdev *obj; 390546ac1ffSJohn Fastabend 391af4d045cSDaniel Borkmann if (key >= map->max_entries) 392546ac1ffSJohn Fastabend return NULL; 393546ac1ffSJohn Fastabend 39467f29e07SJesper Dangaard Brouer obj = READ_ONCE(dtab->netdev_map[key]); 39567f29e07SJesper Dangaard Brouer return obj; 39667f29e07SJesper Dangaard Brouer } 39767f29e07SJesper Dangaard Brouer 3985d053f9dSJesper Dangaard Brouer /* Runs under RCU-read-side, plus in softirq under NAPI protection. 3995d053f9dSJesper Dangaard Brouer * Thus, safe percpu variable access. 4005d053f9dSJesper Dangaard Brouer */ 40175ccae62SToke Høiland-Jørgensen static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, 40238edddb8SJesper Dangaard Brouer struct net_device *dev_rx) 4035d053f9dSJesper Dangaard Brouer { 4041d233886SToke Høiland-Jørgensen struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); 40575ccae62SToke Høiland-Jørgensen struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); 4065d053f9dSJesper Dangaard Brouer 4075d053f9dSJesper Dangaard Brouer if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) 4080536b852SBjörn Töpel bq_xmit_all(bq, 0); 4095d053f9dSJesper Dangaard Brouer 41038edddb8SJesper Dangaard Brouer /* Ingress dev_rx will be the same for all xdp_frame's in 41138edddb8SJesper Dangaard Brouer * bulk_queue, because bq stored per-CPU and must be flushed 41238edddb8SJesper Dangaard Brouer * from net_device drivers NAPI func end. 41338edddb8SJesper Dangaard Brouer */ 41438edddb8SJesper Dangaard Brouer if (!bq->dev_rx) 41538edddb8SJesper Dangaard Brouer bq->dev_rx = dev_rx; 41638edddb8SJesper Dangaard Brouer 4175d053f9dSJesper Dangaard Brouer bq->q[bq->count++] = xdpf; 418d5df2830SToke Høiland-Jørgensen 419d5df2830SToke Høiland-Jørgensen if (!bq->flush_node.prev) 420d5df2830SToke Høiland-Jørgensen list_add(&bq->flush_node, flush_list); 421d5df2830SToke Høiland-Jørgensen 4225d053f9dSJesper Dangaard Brouer return 0; 4235d053f9dSJesper Dangaard Brouer } 4245d053f9dSJesper Dangaard Brouer 4251d233886SToke Høiland-Jørgensen static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, 42638edddb8SJesper Dangaard Brouer struct net_device *dev_rx) 42767f29e07SJesper Dangaard Brouer { 42867f29e07SJesper Dangaard Brouer struct xdp_frame *xdpf; 429d8d7218aSToshiaki Makita int err; 43067f29e07SJesper Dangaard Brouer 43167f29e07SJesper Dangaard Brouer if (!dev->netdev_ops->ndo_xdp_xmit) 43267f29e07SJesper Dangaard Brouer return -EOPNOTSUPP; 43367f29e07SJesper Dangaard Brouer 434d8d7218aSToshiaki Makita err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); 435d8d7218aSToshiaki Makita if (unlikely(err)) 436d8d7218aSToshiaki Makita return err; 437d8d7218aSToshiaki Makita 43867f29e07SJesper Dangaard Brouer xdpf = convert_to_xdp_frame(xdp); 43967f29e07SJesper Dangaard Brouer if (unlikely(!xdpf)) 44067f29e07SJesper Dangaard Brouer return -EOVERFLOW; 44167f29e07SJesper Dangaard Brouer 44275ccae62SToke Høiland-Jørgensen return bq_enqueue(dev, xdpf, dev_rx); 443546ac1ffSJohn Fastabend } 444546ac1ffSJohn Fastabend 4451d233886SToke Høiland-Jørgensen int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, 4461d233886SToke Høiland-Jørgensen struct net_device *dev_rx) 4471d233886SToke Høiland-Jørgensen { 4481d233886SToke Høiland-Jørgensen return __xdp_enqueue(dev, xdp, dev_rx); 4491d233886SToke Høiland-Jørgensen } 4501d233886SToke Høiland-Jørgensen 4511d233886SToke Høiland-Jørgensen int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, 4521d233886SToke Høiland-Jørgensen struct net_device *dev_rx) 4531d233886SToke Høiland-Jørgensen { 4541d233886SToke Høiland-Jørgensen struct net_device *dev = dst->dev; 4551d233886SToke Høiland-Jørgensen 4561d233886SToke Høiland-Jørgensen return __xdp_enqueue(dev, xdp, dev_rx); 4571d233886SToke Høiland-Jørgensen } 4581d233886SToke Høiland-Jørgensen 4596d5fc195SToshiaki Makita int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, 4606d5fc195SToshiaki Makita struct bpf_prog *xdp_prog) 4616d5fc195SToshiaki Makita { 4626d5fc195SToshiaki Makita int err; 4636d5fc195SToshiaki Makita 464d8d7218aSToshiaki Makita err = xdp_ok_fwd_dev(dst->dev, skb->len); 4656d5fc195SToshiaki Makita if (unlikely(err)) 4666d5fc195SToshiaki Makita return err; 4676d5fc195SToshiaki Makita skb->dev = dst->dev; 4686d5fc195SToshiaki Makita generic_xdp_tx(skb, xdp_prog); 4696d5fc195SToshiaki Makita 4706d5fc195SToshiaki Makita return 0; 4716d5fc195SToshiaki Makita } 4726d5fc195SToshiaki Makita 473af4d045cSDaniel Borkmann static void *dev_map_lookup_elem(struct bpf_map *map, void *key) 47411393cc9SJohn Fastabend { 47567f29e07SJesper Dangaard Brouer struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); 47671b2c87dSColin Ian King struct net_device *dev = obj ? obj->dev : NULL; 477af4d045cSDaniel Borkmann 478af4d045cSDaniel Borkmann return dev ? &dev->ifindex : NULL; 479af4d045cSDaniel Borkmann } 480af4d045cSDaniel Borkmann 4816f9d451aSToke Høiland-Jørgensen static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key) 4826f9d451aSToke Høiland-Jørgensen { 4836f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map, 4846f9d451aSToke Høiland-Jørgensen *(u32 *)key); 4856f9d451aSToke Høiland-Jørgensen struct net_device *dev = obj ? obj->dev : NULL; 4866f9d451aSToke Høiland-Jørgensen 4876f9d451aSToke Høiland-Jørgensen return dev ? &dev->ifindex : NULL; 4886f9d451aSToke Høiland-Jørgensen } 4896f9d451aSToke Høiland-Jørgensen 490546ac1ffSJohn Fastabend static void __dev_map_entry_free(struct rcu_head *rcu) 491546ac1ffSJohn Fastabend { 492af4d045cSDaniel Borkmann struct bpf_dtab_netdev *dev; 493546ac1ffSJohn Fastabend 494af4d045cSDaniel Borkmann dev = container_of(rcu, struct bpf_dtab_netdev, rcu); 495af4d045cSDaniel Borkmann dev_put(dev->dev); 496af4d045cSDaniel Borkmann kfree(dev); 497546ac1ffSJohn Fastabend } 498546ac1ffSJohn Fastabend 499546ac1ffSJohn Fastabend static int dev_map_delete_elem(struct bpf_map *map, void *key) 500546ac1ffSJohn Fastabend { 501546ac1ffSJohn Fastabend struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 502546ac1ffSJohn Fastabend struct bpf_dtab_netdev *old_dev; 503546ac1ffSJohn Fastabend int k = *(u32 *)key; 504546ac1ffSJohn Fastabend 505546ac1ffSJohn Fastabend if (k >= map->max_entries) 506546ac1ffSJohn Fastabend return -EINVAL; 507546ac1ffSJohn Fastabend 508af4d045cSDaniel Borkmann /* Use call_rcu() here to ensure any rcu critical sections have 50942a84a8cSJohn Fastabend * completed as well as any flush operations because call_rcu 51042a84a8cSJohn Fastabend * will wait for preempt-disable region to complete, NAPI in this 51142a84a8cSJohn Fastabend * context. And additionally, the driver tear down ensures all 51242a84a8cSJohn Fastabend * soft irqs are complete before removing the net device in the 51342a84a8cSJohn Fastabend * case of dev_put equals zero. 514546ac1ffSJohn Fastabend */ 515546ac1ffSJohn Fastabend old_dev = xchg(&dtab->netdev_map[k], NULL); 516546ac1ffSJohn Fastabend if (old_dev) 517546ac1ffSJohn Fastabend call_rcu(&old_dev->rcu, __dev_map_entry_free); 518546ac1ffSJohn Fastabend return 0; 519546ac1ffSJohn Fastabend } 520546ac1ffSJohn Fastabend 5216f9d451aSToke Høiland-Jørgensen static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) 5226f9d451aSToke Høiland-Jørgensen { 5236f9d451aSToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 5246f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *old_dev; 5256f9d451aSToke Høiland-Jørgensen int k = *(u32 *)key; 5266f9d451aSToke Høiland-Jørgensen unsigned long flags; 5276f9d451aSToke Høiland-Jørgensen int ret = -ENOENT; 5286f9d451aSToke Høiland-Jørgensen 5296f9d451aSToke Høiland-Jørgensen spin_lock_irqsave(&dtab->index_lock, flags); 5306f9d451aSToke Høiland-Jørgensen 5316f9d451aSToke Høiland-Jørgensen old_dev = __dev_map_hash_lookup_elem(map, k); 5326f9d451aSToke Høiland-Jørgensen if (old_dev) { 5336f9d451aSToke Høiland-Jørgensen dtab->items--; 5346f9d451aSToke Høiland-Jørgensen hlist_del_init_rcu(&old_dev->index_hlist); 5356f9d451aSToke Høiland-Jørgensen call_rcu(&old_dev->rcu, __dev_map_entry_free); 5366f9d451aSToke Høiland-Jørgensen ret = 0; 5376f9d451aSToke Høiland-Jørgensen } 5386f9d451aSToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 5396f9d451aSToke Høiland-Jørgensen 5406f9d451aSToke Høiland-Jørgensen return ret; 5416f9d451aSToke Høiland-Jørgensen } 5426f9d451aSToke Høiland-Jørgensen 543fca16e51SToke Høiland-Jørgensen static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, 544fca16e51SToke Høiland-Jørgensen struct bpf_dtab *dtab, 545fca16e51SToke Høiland-Jørgensen u32 ifindex, 546fca16e51SToke Høiland-Jørgensen unsigned int idx) 547546ac1ffSJohn Fastabend { 548fca16e51SToke Høiland-Jørgensen struct bpf_dtab_netdev *dev; 549546ac1ffSJohn Fastabend 55075ccae62SToke Høiland-Jørgensen dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, 55175ccae62SToke Høiland-Jørgensen dtab->map.numa_node); 552546ac1ffSJohn Fastabend if (!dev) 553fca16e51SToke Høiland-Jørgensen return ERR_PTR(-ENOMEM); 554546ac1ffSJohn Fastabend 555546ac1ffSJohn Fastabend dev->dev = dev_get_by_index(net, ifindex); 556546ac1ffSJohn Fastabend if (!dev->dev) { 557546ac1ffSJohn Fastabend kfree(dev); 558fca16e51SToke Høiland-Jørgensen return ERR_PTR(-EINVAL); 559546ac1ffSJohn Fastabend } 560546ac1ffSJohn Fastabend 561fca16e51SToke Høiland-Jørgensen dev->idx = idx; 562546ac1ffSJohn Fastabend dev->dtab = dtab; 563fca16e51SToke Høiland-Jørgensen 564fca16e51SToke Høiland-Jørgensen return dev; 565fca16e51SToke Høiland-Jørgensen } 566fca16e51SToke Høiland-Jørgensen 567fca16e51SToke Høiland-Jørgensen static int __dev_map_update_elem(struct net *net, struct bpf_map *map, 568fca16e51SToke Høiland-Jørgensen void *key, void *value, u64 map_flags) 569fca16e51SToke Høiland-Jørgensen { 570fca16e51SToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 571fca16e51SToke Høiland-Jørgensen struct bpf_dtab_netdev *dev, *old_dev; 572fca16e51SToke Høiland-Jørgensen u32 ifindex = *(u32 *)value; 573fca16e51SToke Høiland-Jørgensen u32 i = *(u32 *)key; 574fca16e51SToke Høiland-Jørgensen 575fca16e51SToke Høiland-Jørgensen if (unlikely(map_flags > BPF_EXIST)) 576fca16e51SToke Høiland-Jørgensen return -EINVAL; 577fca16e51SToke Høiland-Jørgensen if (unlikely(i >= dtab->map.max_entries)) 578fca16e51SToke Høiland-Jørgensen return -E2BIG; 579fca16e51SToke Høiland-Jørgensen if (unlikely(map_flags == BPF_NOEXIST)) 580fca16e51SToke Høiland-Jørgensen return -EEXIST; 581fca16e51SToke Høiland-Jørgensen 582fca16e51SToke Høiland-Jørgensen if (!ifindex) { 583fca16e51SToke Høiland-Jørgensen dev = NULL; 584fca16e51SToke Høiland-Jørgensen } else { 585fca16e51SToke Høiland-Jørgensen dev = __dev_map_alloc_node(net, dtab, ifindex, i); 586fca16e51SToke Høiland-Jørgensen if (IS_ERR(dev)) 587fca16e51SToke Høiland-Jørgensen return PTR_ERR(dev); 588546ac1ffSJohn Fastabend } 589546ac1ffSJohn Fastabend 590546ac1ffSJohn Fastabend /* Use call_rcu() here to ensure rcu critical sections have completed 591546ac1ffSJohn Fastabend * Remembering the driver side flush operation will happen before the 592546ac1ffSJohn Fastabend * net device is removed. 593546ac1ffSJohn Fastabend */ 594546ac1ffSJohn Fastabend old_dev = xchg(&dtab->netdev_map[i], dev); 595546ac1ffSJohn Fastabend if (old_dev) 596546ac1ffSJohn Fastabend call_rcu(&old_dev->rcu, __dev_map_entry_free); 597546ac1ffSJohn Fastabend 598546ac1ffSJohn Fastabend return 0; 599546ac1ffSJohn Fastabend } 600546ac1ffSJohn Fastabend 601fca16e51SToke Høiland-Jørgensen static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, 602fca16e51SToke Høiland-Jørgensen u64 map_flags) 603fca16e51SToke Høiland-Jørgensen { 604fca16e51SToke Høiland-Jørgensen return __dev_map_update_elem(current->nsproxy->net_ns, 605fca16e51SToke Høiland-Jørgensen map, key, value, map_flags); 606fca16e51SToke Høiland-Jørgensen } 607fca16e51SToke Høiland-Jørgensen 6086f9d451aSToke Høiland-Jørgensen static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, 6096f9d451aSToke Høiland-Jørgensen void *key, void *value, u64 map_flags) 6106f9d451aSToke Høiland-Jørgensen { 6116f9d451aSToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 6126f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *dev, *old_dev; 6136f9d451aSToke Høiland-Jørgensen u32 ifindex = *(u32 *)value; 6146f9d451aSToke Høiland-Jørgensen u32 idx = *(u32 *)key; 6156f9d451aSToke Høiland-Jørgensen unsigned long flags; 616af58e7eeSToke Høiland-Jørgensen int err = -EEXIST; 6176f9d451aSToke Høiland-Jørgensen 6186f9d451aSToke Høiland-Jørgensen if (unlikely(map_flags > BPF_EXIST || !ifindex)) 6196f9d451aSToke Høiland-Jørgensen return -EINVAL; 6206f9d451aSToke Høiland-Jørgensen 621af58e7eeSToke Høiland-Jørgensen spin_lock_irqsave(&dtab->index_lock, flags); 622af58e7eeSToke Høiland-Jørgensen 6236f9d451aSToke Høiland-Jørgensen old_dev = __dev_map_hash_lookup_elem(map, idx); 6246f9d451aSToke Høiland-Jørgensen if (old_dev && (map_flags & BPF_NOEXIST)) 625af58e7eeSToke Høiland-Jørgensen goto out_err; 6266f9d451aSToke Høiland-Jørgensen 6276f9d451aSToke Høiland-Jørgensen dev = __dev_map_alloc_node(net, dtab, ifindex, idx); 628af58e7eeSToke Høiland-Jørgensen if (IS_ERR(dev)) { 629af58e7eeSToke Høiland-Jørgensen err = PTR_ERR(dev); 630af58e7eeSToke Høiland-Jørgensen goto out_err; 631af58e7eeSToke Høiland-Jørgensen } 6326f9d451aSToke Høiland-Jørgensen 6336f9d451aSToke Høiland-Jørgensen if (old_dev) { 6346f9d451aSToke Høiland-Jørgensen hlist_del_rcu(&old_dev->index_hlist); 6356f9d451aSToke Høiland-Jørgensen } else { 6366f9d451aSToke Høiland-Jørgensen if (dtab->items >= dtab->map.max_entries) { 6376f9d451aSToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 6386f9d451aSToke Høiland-Jørgensen call_rcu(&dev->rcu, __dev_map_entry_free); 6396f9d451aSToke Høiland-Jørgensen return -E2BIG; 6406f9d451aSToke Høiland-Jørgensen } 6416f9d451aSToke Høiland-Jørgensen dtab->items++; 6426f9d451aSToke Høiland-Jørgensen } 6436f9d451aSToke Høiland-Jørgensen 6446f9d451aSToke Høiland-Jørgensen hlist_add_head_rcu(&dev->index_hlist, 6456f9d451aSToke Høiland-Jørgensen dev_map_index_hash(dtab, idx)); 6466f9d451aSToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 6476f9d451aSToke Høiland-Jørgensen 6486f9d451aSToke Høiland-Jørgensen if (old_dev) 6496f9d451aSToke Høiland-Jørgensen call_rcu(&old_dev->rcu, __dev_map_entry_free); 6506f9d451aSToke Høiland-Jørgensen 6516f9d451aSToke Høiland-Jørgensen return 0; 652af58e7eeSToke Høiland-Jørgensen 653af58e7eeSToke Høiland-Jørgensen out_err: 654af58e7eeSToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 655af58e7eeSToke Høiland-Jørgensen return err; 6566f9d451aSToke Høiland-Jørgensen } 6576f9d451aSToke Høiland-Jørgensen 6586f9d451aSToke Høiland-Jørgensen static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, 6596f9d451aSToke Høiland-Jørgensen u64 map_flags) 6606f9d451aSToke Høiland-Jørgensen { 6616f9d451aSToke Høiland-Jørgensen return __dev_map_hash_update_elem(current->nsproxy->net_ns, 6626f9d451aSToke Høiland-Jørgensen map, key, value, map_flags); 6636f9d451aSToke Høiland-Jørgensen } 6646f9d451aSToke Høiland-Jørgensen 665546ac1ffSJohn Fastabend const struct bpf_map_ops dev_map_ops = { 666546ac1ffSJohn Fastabend .map_alloc = dev_map_alloc, 667546ac1ffSJohn Fastabend .map_free = dev_map_free, 668546ac1ffSJohn Fastabend .map_get_next_key = dev_map_get_next_key, 669546ac1ffSJohn Fastabend .map_lookup_elem = dev_map_lookup_elem, 670546ac1ffSJohn Fastabend .map_update_elem = dev_map_update_elem, 671546ac1ffSJohn Fastabend .map_delete_elem = dev_map_delete_elem, 672e8d2bec0SDaniel Borkmann .map_check_btf = map_check_no_btf, 673546ac1ffSJohn Fastabend }; 6742ddf71e2SJohn Fastabend 6756f9d451aSToke Høiland-Jørgensen const struct bpf_map_ops dev_map_hash_ops = { 6766f9d451aSToke Høiland-Jørgensen .map_alloc = dev_map_alloc, 6776f9d451aSToke Høiland-Jørgensen .map_free = dev_map_free, 6786f9d451aSToke Høiland-Jørgensen .map_get_next_key = dev_map_hash_get_next_key, 6796f9d451aSToke Høiland-Jørgensen .map_lookup_elem = dev_map_hash_lookup_elem, 6806f9d451aSToke Høiland-Jørgensen .map_update_elem = dev_map_hash_update_elem, 6816f9d451aSToke Høiland-Jørgensen .map_delete_elem = dev_map_hash_delete_elem, 6826f9d451aSToke Høiland-Jørgensen .map_check_btf = map_check_no_btf, 6836f9d451aSToke Høiland-Jørgensen }; 6846f9d451aSToke Høiland-Jørgensen 685ce197d83SToke Høiland-Jørgensen static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab, 686ce197d83SToke Høiland-Jørgensen struct net_device *netdev) 687ce197d83SToke Høiland-Jørgensen { 688ce197d83SToke Høiland-Jørgensen unsigned long flags; 689ce197d83SToke Høiland-Jørgensen u32 i; 690ce197d83SToke Høiland-Jørgensen 691ce197d83SToke Høiland-Jørgensen spin_lock_irqsave(&dtab->index_lock, flags); 692ce197d83SToke Høiland-Jørgensen for (i = 0; i < dtab->n_buckets; i++) { 693ce197d83SToke Høiland-Jørgensen struct bpf_dtab_netdev *dev; 694ce197d83SToke Høiland-Jørgensen struct hlist_head *head; 695ce197d83SToke Høiland-Jørgensen struct hlist_node *next; 696ce197d83SToke Høiland-Jørgensen 697ce197d83SToke Høiland-Jørgensen head = dev_map_index_hash(dtab, i); 698ce197d83SToke Høiland-Jørgensen 699ce197d83SToke Høiland-Jørgensen hlist_for_each_entry_safe(dev, next, head, index_hlist) { 700ce197d83SToke Høiland-Jørgensen if (netdev != dev->dev) 701ce197d83SToke Høiland-Jørgensen continue; 702ce197d83SToke Høiland-Jørgensen 703ce197d83SToke Høiland-Jørgensen dtab->items--; 704ce197d83SToke Høiland-Jørgensen hlist_del_rcu(&dev->index_hlist); 705ce197d83SToke Høiland-Jørgensen call_rcu(&dev->rcu, __dev_map_entry_free); 706ce197d83SToke Høiland-Jørgensen } 707ce197d83SToke Høiland-Jørgensen } 708ce197d83SToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 709ce197d83SToke Høiland-Jørgensen } 710ce197d83SToke Høiland-Jørgensen 7112ddf71e2SJohn Fastabend static int dev_map_notification(struct notifier_block *notifier, 7122ddf71e2SJohn Fastabend ulong event, void *ptr) 7132ddf71e2SJohn Fastabend { 7142ddf71e2SJohn Fastabend struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 7152ddf71e2SJohn Fastabend struct bpf_dtab *dtab; 71675ccae62SToke Høiland-Jørgensen int i, cpu; 7172ddf71e2SJohn Fastabend 7182ddf71e2SJohn Fastabend switch (event) { 71975ccae62SToke Høiland-Jørgensen case NETDEV_REGISTER: 72075ccae62SToke Høiland-Jørgensen if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq) 72175ccae62SToke Høiland-Jørgensen break; 72275ccae62SToke Høiland-Jørgensen 72375ccae62SToke Høiland-Jørgensen /* will be freed in free_netdev() */ 72475ccae62SToke Høiland-Jørgensen netdev->xdp_bulkq = 72575ccae62SToke Høiland-Jørgensen __alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue), 72675ccae62SToke Høiland-Jørgensen sizeof(void *), GFP_ATOMIC); 72775ccae62SToke Høiland-Jørgensen if (!netdev->xdp_bulkq) 72875ccae62SToke Høiland-Jørgensen return NOTIFY_BAD; 72975ccae62SToke Høiland-Jørgensen 73075ccae62SToke Høiland-Jørgensen for_each_possible_cpu(cpu) 73175ccae62SToke Høiland-Jørgensen per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev; 73275ccae62SToke Høiland-Jørgensen break; 7332ddf71e2SJohn Fastabend case NETDEV_UNREGISTER: 7344cc7b954SJohn Fastabend /* This rcu_read_lock/unlock pair is needed because 7354cc7b954SJohn Fastabend * dev_map_list is an RCU list AND to ensure a delete 7364cc7b954SJohn Fastabend * operation does not free a netdev_map entry while we 7374cc7b954SJohn Fastabend * are comparing it against the netdev being unregistered. 7384cc7b954SJohn Fastabend */ 7394cc7b954SJohn Fastabend rcu_read_lock(); 7404cc7b954SJohn Fastabend list_for_each_entry_rcu(dtab, &dev_map_list, list) { 741ce197d83SToke Høiland-Jørgensen if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 742ce197d83SToke Høiland-Jørgensen dev_map_hash_remove_netdev(dtab, netdev); 743ce197d83SToke Høiland-Jørgensen continue; 744ce197d83SToke Høiland-Jørgensen } 745ce197d83SToke Høiland-Jørgensen 7462ddf71e2SJohn Fastabend for (i = 0; i < dtab->map.max_entries; i++) { 7474cc7b954SJohn Fastabend struct bpf_dtab_netdev *dev, *odev; 7482ddf71e2SJohn Fastabend 7494cc7b954SJohn Fastabend dev = READ_ONCE(dtab->netdev_map[i]); 750f592f804STaehee Yoo if (!dev || netdev != dev->dev) 7512ddf71e2SJohn Fastabend continue; 7524cc7b954SJohn Fastabend odev = cmpxchg(&dtab->netdev_map[i], dev, NULL); 7534cc7b954SJohn Fastabend if (dev == odev) 7542ddf71e2SJohn Fastabend call_rcu(&dev->rcu, 7552ddf71e2SJohn Fastabend __dev_map_entry_free); 7562ddf71e2SJohn Fastabend } 7572ddf71e2SJohn Fastabend } 7584cc7b954SJohn Fastabend rcu_read_unlock(); 7592ddf71e2SJohn Fastabend break; 7602ddf71e2SJohn Fastabend default: 7612ddf71e2SJohn Fastabend break; 7622ddf71e2SJohn Fastabend } 7632ddf71e2SJohn Fastabend return NOTIFY_OK; 7642ddf71e2SJohn Fastabend } 7652ddf71e2SJohn Fastabend 7662ddf71e2SJohn Fastabend static struct notifier_block dev_map_notifier = { 7672ddf71e2SJohn Fastabend .notifier_call = dev_map_notification, 7682ddf71e2SJohn Fastabend }; 7692ddf71e2SJohn Fastabend 7702ddf71e2SJohn Fastabend static int __init dev_map_init(void) 7712ddf71e2SJohn Fastabend { 77296360004SBjörn Töpel int cpu; 77396360004SBjörn Töpel 77467f29e07SJesper Dangaard Brouer /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ 77567f29e07SJesper Dangaard Brouer BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != 77667f29e07SJesper Dangaard Brouer offsetof(struct _bpf_dtab_netdev, dev)); 7772ddf71e2SJohn Fastabend register_netdevice_notifier(&dev_map_notifier); 77896360004SBjörn Töpel 77996360004SBjörn Töpel for_each_possible_cpu(cpu) 7801d233886SToke Høiland-Jørgensen INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu)); 7812ddf71e2SJohn Fastabend return 0; 7822ddf71e2SJohn Fastabend } 7832ddf71e2SJohn Fastabend 7842ddf71e2SJohn Fastabend subsys_initcall(dev_map_init); 785