15b497af4SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2546ac1ffSJohn Fastabend /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io 3546ac1ffSJohn Fastabend */ 4546ac1ffSJohn Fastabend 5546ac1ffSJohn Fastabend /* Devmaps primary use is as a backend map for XDP BPF helper call 6546ac1ffSJohn Fastabend * bpf_redirect_map(). Because XDP is mostly concerned with performance we 7546ac1ffSJohn Fastabend * spent some effort to ensure the datapath with redirect maps does not use 8546ac1ffSJohn Fastabend * any locking. This is a quick note on the details. 9546ac1ffSJohn Fastabend * 10546ac1ffSJohn Fastabend * We have three possible paths to get into the devmap control plane bpf 11546ac1ffSJohn Fastabend * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall 12546ac1ffSJohn Fastabend * will invoke an update, delete, or lookup operation. To ensure updates and 13546ac1ffSJohn Fastabend * deletes appear atomic from the datapath side xchg() is used to modify the 14546ac1ffSJohn Fastabend * netdev_map array. Then because the datapath does a lookup into the netdev_map 15546ac1ffSJohn Fastabend * array (read-only) from an RCU critical section we use call_rcu() to wait for 16546ac1ffSJohn Fastabend * an rcu grace period before free'ing the old data structures. This ensures the 17546ac1ffSJohn Fastabend * datapath always has a valid copy. However, the datapath does a "flush" 18546ac1ffSJohn Fastabend * operation that pushes any pending packets in the driver outside the RCU 19546ac1ffSJohn Fastabend * critical section. Each bpf_dtab_netdev tracks these pending operations using 20d5df2830SToke Høiland-Jørgensen * a per-cpu flush list. The bpf_dtab_netdev object will not be destroyed until 21d5df2830SToke Høiland-Jørgensen * this list is empty, indicating outstanding flush operations have completed. 22546ac1ffSJohn Fastabend * 23546ac1ffSJohn Fastabend * BPF syscalls may race with BPF program calls on any of the update, delete 24546ac1ffSJohn Fastabend * or lookup operations. As noted above the xchg() operation also keep the 25546ac1ffSJohn Fastabend * netdev_map consistent in this case. From the devmap side BPF programs 26546ac1ffSJohn Fastabend * calling into these operations are the same as multiple user space threads 27546ac1ffSJohn Fastabend * making system calls. 282ddf71e2SJohn Fastabend * 292ddf71e2SJohn Fastabend * Finally, any of the above may race with a netdev_unregister notifier. The 302ddf71e2SJohn Fastabend * unregister notifier must search for net devices in the map structure that 312ddf71e2SJohn Fastabend * contain a reference to the net device and remove them. This is a two step 322ddf71e2SJohn Fastabend * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b) 332ddf71e2SJohn Fastabend * check to see if the ifindex is the same as the net_device being removed. 344cc7b954SJohn Fastabend * When removing the dev a cmpxchg() is used to ensure the correct dev is 354cc7b954SJohn Fastabend * removed, in the case of a concurrent update or delete operation it is 364cc7b954SJohn Fastabend * possible that the initially referenced dev is no longer in the map. As the 374cc7b954SJohn Fastabend * notifier hook walks the map we know that new dev references can not be 384cc7b954SJohn Fastabend * added by the user because core infrastructure ensures dev_get_by_index() 394cc7b954SJohn Fastabend * calls will fail at this point. 406f9d451aSToke Høiland-Jørgensen * 416f9d451aSToke Høiland-Jørgensen * The devmap_hash type is a map type which interprets keys as ifindexes and 426f9d451aSToke Høiland-Jørgensen * indexes these using a hashmap. This allows maps that use ifindex as key to be 436f9d451aSToke Høiland-Jørgensen * densely packed instead of having holes in the lookup array for unused 446f9d451aSToke Høiland-Jørgensen * ifindexes. The setup and packet enqueue/send code is shared between the two 456f9d451aSToke Høiland-Jørgensen * types of devmap; only the lookup and insertion is different. 46546ac1ffSJohn Fastabend */ 47546ac1ffSJohn Fastabend #include <linux/bpf.h> 4867f29e07SJesper Dangaard Brouer #include <net/xdp.h> 49546ac1ffSJohn Fastabend #include <linux/filter.h> 5067f29e07SJesper Dangaard Brouer #include <trace/events/xdp.h> 51546ac1ffSJohn Fastabend 526e71b04aSChenbo Feng #define DEV_CREATE_FLAG_MASK \ 536e71b04aSChenbo Feng (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 546e71b04aSChenbo Feng 5575ccae62SToke Høiland-Jørgensen struct xdp_dev_bulk_queue { 565d053f9dSJesper Dangaard Brouer struct xdp_frame *q[DEV_MAP_BULK_SIZE]; 57d5df2830SToke Høiland-Jørgensen struct list_head flush_node; 5875ccae62SToke Høiland-Jørgensen struct net_device *dev; 5938edddb8SJesper Dangaard Brouer struct net_device *dev_rx; 605d053f9dSJesper Dangaard Brouer unsigned int count; 615d053f9dSJesper Dangaard Brouer }; 625d053f9dSJesper Dangaard Brouer 63546ac1ffSJohn Fastabend struct bpf_dtab_netdev { 6467f29e07SJesper Dangaard Brouer struct net_device *dev; /* must be first member, due to tracepoint */ 656f9d451aSToke Høiland-Jørgensen struct hlist_node index_hlist; 66546ac1ffSJohn Fastabend struct bpf_dtab *dtab; 67fbee97feSDavid Ahern struct bpf_prog *xdp_prog; 68af4d045cSDaniel Borkmann struct rcu_head rcu; 6975ccae62SToke Høiland-Jørgensen unsigned int idx; 707f1c0426SDavid Ahern struct bpf_devmap_val val; 71546ac1ffSJohn Fastabend }; 72546ac1ffSJohn Fastabend 73546ac1ffSJohn Fastabend struct bpf_dtab { 74546ac1ffSJohn Fastabend struct bpf_map map; 75071cdeceSToke Høiland-Jørgensen struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */ 762ddf71e2SJohn Fastabend struct list_head list; 776f9d451aSToke Høiland-Jørgensen 786f9d451aSToke Høiland-Jørgensen /* these are only used for DEVMAP_HASH type maps */ 796f9d451aSToke Høiland-Jørgensen struct hlist_head *dev_index_head; 806f9d451aSToke Høiland-Jørgensen spinlock_t index_lock; 816f9d451aSToke Høiland-Jørgensen unsigned int items; 826f9d451aSToke Høiland-Jørgensen u32 n_buckets; 83546ac1ffSJohn Fastabend }; 84546ac1ffSJohn Fastabend 851d233886SToke Høiland-Jørgensen static DEFINE_PER_CPU(struct list_head, dev_flush_list); 864cc7b954SJohn Fastabend static DEFINE_SPINLOCK(dev_map_lock); 872ddf71e2SJohn Fastabend static LIST_HEAD(dev_map_list); 882ddf71e2SJohn Fastabend 8999c51064SToke Høiland-Jørgensen static struct hlist_head *dev_map_create_hash(unsigned int entries, 9099c51064SToke Høiland-Jørgensen int numa_node) 916f9d451aSToke Høiland-Jørgensen { 926f9d451aSToke Høiland-Jørgensen int i; 936f9d451aSToke Høiland-Jørgensen struct hlist_head *hash; 946f9d451aSToke Høiland-Jørgensen 9599c51064SToke Høiland-Jørgensen hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node); 966f9d451aSToke Høiland-Jørgensen if (hash != NULL) 976f9d451aSToke Høiland-Jørgensen for (i = 0; i < entries; i++) 986f9d451aSToke Høiland-Jørgensen INIT_HLIST_HEAD(&hash[i]); 996f9d451aSToke Høiland-Jørgensen 1006f9d451aSToke Høiland-Jørgensen return hash; 1016f9d451aSToke Høiland-Jørgensen } 1026f9d451aSToke Høiland-Jørgensen 103071cdeceSToke Høiland-Jørgensen static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, 104071cdeceSToke Høiland-Jørgensen int idx) 105071cdeceSToke Høiland-Jørgensen { 106071cdeceSToke Høiland-Jørgensen return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)]; 107071cdeceSToke Høiland-Jørgensen } 108071cdeceSToke Høiland-Jørgensen 109fca16e51SToke Høiland-Jørgensen static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) 110546ac1ffSJohn Fastabend { 111fbee97feSDavid Ahern u32 valsize = attr->value_size; 11296360004SBjörn Töpel u64 cost = 0; 11396360004SBjörn Töpel int err; 114546ac1ffSJohn Fastabend 115fbee97feSDavid Ahern /* check sanity of attributes. 2 value sizes supported: 116fbee97feSDavid Ahern * 4 bytes: ifindex 117fbee97feSDavid Ahern * 8 bytes: ifindex + prog fd 118fbee97feSDavid Ahern */ 119546ac1ffSJohn Fastabend if (attr->max_entries == 0 || attr->key_size != 4 || 120fbee97feSDavid Ahern (valsize != offsetofend(struct bpf_devmap_val, ifindex) && 121fbee97feSDavid Ahern valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) || 122fbee97feSDavid Ahern attr->map_flags & ~DEV_CREATE_FLAG_MASK) 123fca16e51SToke Høiland-Jørgensen return -EINVAL; 124546ac1ffSJohn Fastabend 1250cdbb4b0SToke Høiland-Jørgensen /* Lookup returns a pointer straight to dev->ifindex, so make sure the 1260cdbb4b0SToke Høiland-Jørgensen * verifier prevents writes from the BPF side 1270cdbb4b0SToke Høiland-Jørgensen */ 1280cdbb4b0SToke Høiland-Jørgensen attr->map_flags |= BPF_F_RDONLY_PROG; 1290cdbb4b0SToke Høiland-Jørgensen 130546ac1ffSJohn Fastabend 131bd475643SJakub Kicinski bpf_map_init_from_attr(&dtab->map, attr); 132546ac1ffSJohn Fastabend 1336f9d451aSToke Høiland-Jørgensen if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 1346f9d451aSToke Høiland-Jørgensen dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); 1356f9d451aSToke Høiland-Jørgensen 1366f9d451aSToke Høiland-Jørgensen if (!dtab->n_buckets) /* Overflow check */ 1376f9d451aSToke Høiland-Jørgensen return -EINVAL; 13805679ca6SToke Høiland-Jørgensen cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets; 139071cdeceSToke Høiland-Jørgensen } else { 140071cdeceSToke Høiland-Jørgensen cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); 1416f9d451aSToke Høiland-Jørgensen } 1426f9d451aSToke Høiland-Jørgensen 143b936ca64SRoman Gushchin /* if map size is larger than memlock limit, reject it */ 144c85d6913SRoman Gushchin err = bpf_map_charge_init(&dtab->map.memory, cost); 145546ac1ffSJohn Fastabend if (err) 146fca16e51SToke Høiland-Jørgensen return -EINVAL; 147582db7e0STobias Klauser 148071cdeceSToke Høiland-Jørgensen if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 14999c51064SToke Høiland-Jørgensen dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, 15099c51064SToke Høiland-Jørgensen dtab->map.numa_node); 151071cdeceSToke Høiland-Jørgensen if (!dtab->dev_index_head) 15296360004SBjörn Töpel goto free_charge; 153071cdeceSToke Høiland-Jørgensen 154071cdeceSToke Høiland-Jørgensen spin_lock_init(&dtab->index_lock); 155071cdeceSToke Høiland-Jørgensen } else { 156546ac1ffSJohn Fastabend dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * 15796eabe7aSMartin KaFai Lau sizeof(struct bpf_dtab_netdev *), 15896eabe7aSMartin KaFai Lau dtab->map.numa_node); 159546ac1ffSJohn Fastabend if (!dtab->netdev_map) 16096360004SBjörn Töpel goto free_charge; 1616f9d451aSToke Høiland-Jørgensen } 1626f9d451aSToke Høiland-Jørgensen 163fca16e51SToke Høiland-Jørgensen return 0; 164d5df2830SToke Høiland-Jørgensen 165b936ca64SRoman Gushchin free_charge: 166b936ca64SRoman Gushchin bpf_map_charge_finish(&dtab->map.memory); 167fca16e51SToke Høiland-Jørgensen return -ENOMEM; 168fca16e51SToke Høiland-Jørgensen } 169fca16e51SToke Høiland-Jørgensen 170fca16e51SToke Høiland-Jørgensen static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 171fca16e51SToke Høiland-Jørgensen { 172fca16e51SToke Høiland-Jørgensen struct bpf_dtab *dtab; 173fca16e51SToke Høiland-Jørgensen int err; 174fca16e51SToke Høiland-Jørgensen 175fca16e51SToke Høiland-Jørgensen if (!capable(CAP_NET_ADMIN)) 176fca16e51SToke Høiland-Jørgensen return ERR_PTR(-EPERM); 177fca16e51SToke Høiland-Jørgensen 178fca16e51SToke Høiland-Jørgensen dtab = kzalloc(sizeof(*dtab), GFP_USER); 179fca16e51SToke Høiland-Jørgensen if (!dtab) 180fca16e51SToke Høiland-Jørgensen return ERR_PTR(-ENOMEM); 181fca16e51SToke Høiland-Jørgensen 182fca16e51SToke Høiland-Jørgensen err = dev_map_init_map(dtab, attr); 183fca16e51SToke Høiland-Jørgensen if (err) { 184546ac1ffSJohn Fastabend kfree(dtab); 185582db7e0STobias Klauser return ERR_PTR(err); 186546ac1ffSJohn Fastabend } 187546ac1ffSJohn Fastabend 188fca16e51SToke Høiland-Jørgensen spin_lock(&dev_map_lock); 189fca16e51SToke Høiland-Jørgensen list_add_tail_rcu(&dtab->list, &dev_map_list); 190fca16e51SToke Høiland-Jørgensen spin_unlock(&dev_map_lock); 191fca16e51SToke Høiland-Jørgensen 192fca16e51SToke Høiland-Jørgensen return &dtab->map; 193fca16e51SToke Høiland-Jørgensen } 194fca16e51SToke Høiland-Jørgensen 195546ac1ffSJohn Fastabend static void dev_map_free(struct bpf_map *map) 196546ac1ffSJohn Fastabend { 197546ac1ffSJohn Fastabend struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 1980536b852SBjörn Töpel int i; 199546ac1ffSJohn Fastabend 200546ac1ffSJohn Fastabend /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 201546ac1ffSJohn Fastabend * so the programs (can be more than one that used this map) were 20242a84a8cSJohn Fastabend * disconnected from events. The following synchronize_rcu() guarantees 20342a84a8cSJohn Fastabend * both rcu read critical sections complete and waits for 20442a84a8cSJohn Fastabend * preempt-disable regions (NAPI being the relevant context here) so we 20542a84a8cSJohn Fastabend * are certain there will be no further reads against the netdev_map and 20642a84a8cSJohn Fastabend * all flush operations are complete. Flush operations can only be done 20742a84a8cSJohn Fastabend * from NAPI context for this reason. 208546ac1ffSJohn Fastabend */ 209274043c6SDaniel Borkmann 210274043c6SDaniel Borkmann spin_lock(&dev_map_lock); 211274043c6SDaniel Borkmann list_del_rcu(&dtab->list); 212274043c6SDaniel Borkmann spin_unlock(&dev_map_lock); 213274043c6SDaniel Borkmann 214f6069b9aSDaniel Borkmann bpf_clear_redirect_map(map); 215546ac1ffSJohn Fastabend synchronize_rcu(); 216546ac1ffSJohn Fastabend 2172baae354SEric Dumazet /* Make sure prior __dev_map_entry_free() have completed. */ 2182baae354SEric Dumazet rcu_barrier(); 2192baae354SEric Dumazet 220071cdeceSToke Høiland-Jørgensen if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 221071cdeceSToke Høiland-Jørgensen for (i = 0; i < dtab->n_buckets; i++) { 222071cdeceSToke Høiland-Jørgensen struct bpf_dtab_netdev *dev; 223071cdeceSToke Høiland-Jørgensen struct hlist_head *head; 224071cdeceSToke Høiland-Jørgensen struct hlist_node *next; 225071cdeceSToke Høiland-Jørgensen 226071cdeceSToke Høiland-Jørgensen head = dev_map_index_hash(dtab, i); 227071cdeceSToke Høiland-Jørgensen 228071cdeceSToke Høiland-Jørgensen hlist_for_each_entry_safe(dev, next, head, index_hlist) { 229071cdeceSToke Høiland-Jørgensen hlist_del_rcu(&dev->index_hlist); 230fbee97feSDavid Ahern if (dev->xdp_prog) 231fbee97feSDavid Ahern bpf_prog_put(dev->xdp_prog); 232071cdeceSToke Høiland-Jørgensen dev_put(dev->dev); 233071cdeceSToke Høiland-Jørgensen kfree(dev); 234071cdeceSToke Høiland-Jørgensen } 235071cdeceSToke Høiland-Jørgensen } 236071cdeceSToke Høiland-Jørgensen 23799c51064SToke Høiland-Jørgensen bpf_map_area_free(dtab->dev_index_head); 238071cdeceSToke Høiland-Jørgensen } else { 239546ac1ffSJohn Fastabend for (i = 0; i < dtab->map.max_entries; i++) { 240546ac1ffSJohn Fastabend struct bpf_dtab_netdev *dev; 241546ac1ffSJohn Fastabend 242546ac1ffSJohn Fastabend dev = dtab->netdev_map[i]; 243546ac1ffSJohn Fastabend if (!dev) 244546ac1ffSJohn Fastabend continue; 245546ac1ffSJohn Fastabend 246fbee97feSDavid Ahern if (dev->xdp_prog) 247fbee97feSDavid Ahern bpf_prog_put(dev->xdp_prog); 248546ac1ffSJohn Fastabend dev_put(dev->dev); 249546ac1ffSJohn Fastabend kfree(dev); 250546ac1ffSJohn Fastabend } 251546ac1ffSJohn Fastabend 252546ac1ffSJohn Fastabend bpf_map_area_free(dtab->netdev_map); 253071cdeceSToke Høiland-Jørgensen } 254071cdeceSToke Høiland-Jørgensen 255546ac1ffSJohn Fastabend kfree(dtab); 256546ac1ffSJohn Fastabend } 257546ac1ffSJohn Fastabend 258546ac1ffSJohn Fastabend static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 259546ac1ffSJohn Fastabend { 260546ac1ffSJohn Fastabend struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 261546ac1ffSJohn Fastabend u32 index = key ? *(u32 *)key : U32_MAX; 262af4d045cSDaniel Borkmann u32 *next = next_key; 263546ac1ffSJohn Fastabend 264546ac1ffSJohn Fastabend if (index >= dtab->map.max_entries) { 265546ac1ffSJohn Fastabend *next = 0; 266546ac1ffSJohn Fastabend return 0; 267546ac1ffSJohn Fastabend } 268546ac1ffSJohn Fastabend 269546ac1ffSJohn Fastabend if (index == dtab->map.max_entries - 1) 270546ac1ffSJohn Fastabend return -ENOENT; 271546ac1ffSJohn Fastabend *next = index + 1; 272546ac1ffSJohn Fastabend return 0; 273546ac1ffSJohn Fastabend } 274546ac1ffSJohn Fastabend 2756f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) 2766f9d451aSToke Høiland-Jørgensen { 2776f9d451aSToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 2786f9d451aSToke Høiland-Jørgensen struct hlist_head *head = dev_map_index_hash(dtab, key); 2796f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *dev; 2806f9d451aSToke Høiland-Jørgensen 281485ec2eaSAmol Grover hlist_for_each_entry_rcu(dev, head, index_hlist, 282485ec2eaSAmol Grover lockdep_is_held(&dtab->index_lock)) 2836f9d451aSToke Høiland-Jørgensen if (dev->idx == key) 2846f9d451aSToke Høiland-Jørgensen return dev; 2856f9d451aSToke Høiland-Jørgensen 2866f9d451aSToke Høiland-Jørgensen return NULL; 2876f9d451aSToke Høiland-Jørgensen } 2886f9d451aSToke Høiland-Jørgensen 2896f9d451aSToke Høiland-Jørgensen static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, 2906f9d451aSToke Høiland-Jørgensen void *next_key) 2916f9d451aSToke Høiland-Jørgensen { 2926f9d451aSToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 2936f9d451aSToke Høiland-Jørgensen u32 idx, *next = next_key; 2946f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *dev, *next_dev; 2956f9d451aSToke Høiland-Jørgensen struct hlist_head *head; 2966f9d451aSToke Høiland-Jørgensen int i = 0; 2976f9d451aSToke Høiland-Jørgensen 2986f9d451aSToke Høiland-Jørgensen if (!key) 2996f9d451aSToke Høiland-Jørgensen goto find_first; 3006f9d451aSToke Høiland-Jørgensen 3016f9d451aSToke Høiland-Jørgensen idx = *(u32 *)key; 3026f9d451aSToke Høiland-Jørgensen 3036f9d451aSToke Høiland-Jørgensen dev = __dev_map_hash_lookup_elem(map, idx); 3046f9d451aSToke Høiland-Jørgensen if (!dev) 3056f9d451aSToke Høiland-Jørgensen goto find_first; 3066f9d451aSToke Høiland-Jørgensen 3076f9d451aSToke Høiland-Jørgensen next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)), 3086f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev, index_hlist); 3096f9d451aSToke Høiland-Jørgensen 3106f9d451aSToke Høiland-Jørgensen if (next_dev) { 3116f9d451aSToke Høiland-Jørgensen *next = next_dev->idx; 3126f9d451aSToke Høiland-Jørgensen return 0; 3136f9d451aSToke Høiland-Jørgensen } 3146f9d451aSToke Høiland-Jørgensen 3156f9d451aSToke Høiland-Jørgensen i = idx & (dtab->n_buckets - 1); 3166f9d451aSToke Høiland-Jørgensen i++; 3176f9d451aSToke Høiland-Jørgensen 3186f9d451aSToke Høiland-Jørgensen find_first: 3196f9d451aSToke Høiland-Jørgensen for (; i < dtab->n_buckets; i++) { 3206f9d451aSToke Høiland-Jørgensen head = dev_map_index_hash(dtab, i); 3216f9d451aSToke Høiland-Jørgensen 3226f9d451aSToke Høiland-Jørgensen next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), 3236f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev, 3246f9d451aSToke Høiland-Jørgensen index_hlist); 3256f9d451aSToke Høiland-Jørgensen if (next_dev) { 3266f9d451aSToke Høiland-Jørgensen *next = next_dev->idx; 3276f9d451aSToke Høiland-Jørgensen return 0; 3286f9d451aSToke Høiland-Jørgensen } 3296f9d451aSToke Høiland-Jørgensen } 3306f9d451aSToke Høiland-Jørgensen 3316f9d451aSToke Høiland-Jørgensen return -ENOENT; 3326f9d451aSToke Høiland-Jørgensen } 3336f9d451aSToke Høiland-Jørgensen 334fbee97feSDavid Ahern bool dev_map_can_have_prog(struct bpf_map *map) 335fbee97feSDavid Ahern { 336fbee97feSDavid Ahern if ((map->map_type == BPF_MAP_TYPE_DEVMAP || 337fbee97feSDavid Ahern map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) && 338fbee97feSDavid Ahern map->value_size != offsetofend(struct bpf_devmap_val, ifindex)) 339fbee97feSDavid Ahern return true; 340fbee97feSDavid Ahern 341fbee97feSDavid Ahern return false; 342fbee97feSDavid Ahern } 343fbee97feSDavid Ahern 344*ebc4ecd4SBjörn Töpel static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) 3455d053f9dSJesper Dangaard Brouer { 34675ccae62SToke Høiland-Jørgensen struct net_device *dev = bq->dev; 347e74de52eSJesper Dangaard Brouer int sent = 0, drops = 0, err = 0; 3485d053f9dSJesper Dangaard Brouer int i; 3495d053f9dSJesper Dangaard Brouer 3505d053f9dSJesper Dangaard Brouer if (unlikely(!bq->count)) 351*ebc4ecd4SBjörn Töpel return; 3525d053f9dSJesper Dangaard Brouer 3535d053f9dSJesper Dangaard Brouer for (i = 0; i < bq->count; i++) { 3545d053f9dSJesper Dangaard Brouer struct xdp_frame *xdpf = bq->q[i]; 3555d053f9dSJesper Dangaard Brouer 3565d053f9dSJesper Dangaard Brouer prefetch(xdpf); 3575d053f9dSJesper Dangaard Brouer } 3585d053f9dSJesper Dangaard Brouer 359c1ece6b2SJesper Dangaard Brouer sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags); 360735fc405SJesper Dangaard Brouer if (sent < 0) { 361e74de52eSJesper Dangaard Brouer err = sent; 362735fc405SJesper Dangaard Brouer sent = 0; 363735fc405SJesper Dangaard Brouer goto error; 36438edddb8SJesper Dangaard Brouer } 365735fc405SJesper Dangaard Brouer drops = bq->count - sent; 366735fc405SJesper Dangaard Brouer out: 3675d053f9dSJesper Dangaard Brouer bq->count = 0; 3685d053f9dSJesper Dangaard Brouer 36958aa94f9SJesper Dangaard Brouer trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err); 37038edddb8SJesper Dangaard Brouer bq->dev_rx = NULL; 371d5df2830SToke Høiland-Jørgensen __list_del_clearprev(&bq->flush_node); 372*ebc4ecd4SBjörn Töpel return; 373735fc405SJesper Dangaard Brouer error: 374735fc405SJesper Dangaard Brouer /* If ndo_xdp_xmit fails with an errno, no frames have been 375735fc405SJesper Dangaard Brouer * xmit'ed and it's our responsibility to them free all. 376735fc405SJesper Dangaard Brouer */ 377735fc405SJesper Dangaard Brouer for (i = 0; i < bq->count; i++) { 378735fc405SJesper Dangaard Brouer struct xdp_frame *xdpf = bq->q[i]; 379735fc405SJesper Dangaard Brouer 380735fc405SJesper Dangaard Brouer xdp_return_frame_rx_napi(xdpf); 381735fc405SJesper Dangaard Brouer drops++; 382735fc405SJesper Dangaard Brouer } 383735fc405SJesper Dangaard Brouer goto out; 3845d053f9dSJesper Dangaard Brouer } 3855d053f9dSJesper Dangaard Brouer 3861d233886SToke Høiland-Jørgensen /* __dev_flush is called from xdp_do_flush() which _must_ be signaled 38711393cc9SJohn Fastabend * from the driver before returning from its napi->poll() routine. The poll() 38811393cc9SJohn Fastabend * routine is called either from busy_poll context or net_rx_action signaled 38911393cc9SJohn Fastabend * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the 390d5df2830SToke Høiland-Jørgensen * net device can be torn down. On devmap tear down we ensure the flush list 391d5df2830SToke Høiland-Jørgensen * is empty before completing to ensure all flush operations have completed. 392b23bfa56SJohn Fastabend * When drivers update the bpf program they may need to ensure any flush ops 393b23bfa56SJohn Fastabend * are also complete. Using synchronize_rcu or call_rcu will suffice for this 394b23bfa56SJohn Fastabend * because both wait for napi context to exit. 39511393cc9SJohn Fastabend */ 3961d233886SToke Høiland-Jørgensen void __dev_flush(void) 39711393cc9SJohn Fastabend { 3981d233886SToke Høiland-Jørgensen struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); 39975ccae62SToke Høiland-Jørgensen struct xdp_dev_bulk_queue *bq, *tmp; 40011393cc9SJohn Fastabend 401d5df2830SToke Høiland-Jørgensen list_for_each_entry_safe(bq, tmp, flush_list, flush_node) 4020536b852SBjörn Töpel bq_xmit_all(bq, XDP_XMIT_FLUSH); 40311393cc9SJohn Fastabend } 40411393cc9SJohn Fastabend 405546ac1ffSJohn Fastabend /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or 406546ac1ffSJohn Fastabend * update happens in parallel here a dev_put wont happen until after reading the 407546ac1ffSJohn Fastabend * ifindex. 408546ac1ffSJohn Fastabend */ 40967f29e07SJesper Dangaard Brouer struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) 410546ac1ffSJohn Fastabend { 411546ac1ffSJohn Fastabend struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 41267f29e07SJesper Dangaard Brouer struct bpf_dtab_netdev *obj; 413546ac1ffSJohn Fastabend 414af4d045cSDaniel Borkmann if (key >= map->max_entries) 415546ac1ffSJohn Fastabend return NULL; 416546ac1ffSJohn Fastabend 41767f29e07SJesper Dangaard Brouer obj = READ_ONCE(dtab->netdev_map[key]); 41867f29e07SJesper Dangaard Brouer return obj; 41967f29e07SJesper Dangaard Brouer } 42067f29e07SJesper Dangaard Brouer 4215d053f9dSJesper Dangaard Brouer /* Runs under RCU-read-side, plus in softirq under NAPI protection. 4225d053f9dSJesper Dangaard Brouer * Thus, safe percpu variable access. 4235d053f9dSJesper Dangaard Brouer */ 424*ebc4ecd4SBjörn Töpel static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, 42538edddb8SJesper Dangaard Brouer struct net_device *dev_rx) 4265d053f9dSJesper Dangaard Brouer { 4271d233886SToke Høiland-Jørgensen struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); 42875ccae62SToke Høiland-Jørgensen struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); 4295d053f9dSJesper Dangaard Brouer 4305d053f9dSJesper Dangaard Brouer if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) 4310536b852SBjörn Töpel bq_xmit_all(bq, 0); 4325d053f9dSJesper Dangaard Brouer 43338edddb8SJesper Dangaard Brouer /* Ingress dev_rx will be the same for all xdp_frame's in 43438edddb8SJesper Dangaard Brouer * bulk_queue, because bq stored per-CPU and must be flushed 43538edddb8SJesper Dangaard Brouer * from net_device drivers NAPI func end. 43638edddb8SJesper Dangaard Brouer */ 43738edddb8SJesper Dangaard Brouer if (!bq->dev_rx) 43838edddb8SJesper Dangaard Brouer bq->dev_rx = dev_rx; 43938edddb8SJesper Dangaard Brouer 4405d053f9dSJesper Dangaard Brouer bq->q[bq->count++] = xdpf; 441d5df2830SToke Høiland-Jørgensen 442d5df2830SToke Høiland-Jørgensen if (!bq->flush_node.prev) 443d5df2830SToke Høiland-Jørgensen list_add(&bq->flush_node, flush_list); 4445d053f9dSJesper Dangaard Brouer } 4455d053f9dSJesper Dangaard Brouer 4461d233886SToke Høiland-Jørgensen static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, 44738edddb8SJesper Dangaard Brouer struct net_device *dev_rx) 44867f29e07SJesper Dangaard Brouer { 44967f29e07SJesper Dangaard Brouer struct xdp_frame *xdpf; 450d8d7218aSToshiaki Makita int err; 45167f29e07SJesper Dangaard Brouer 45267f29e07SJesper Dangaard Brouer if (!dev->netdev_ops->ndo_xdp_xmit) 45367f29e07SJesper Dangaard Brouer return -EOPNOTSUPP; 45467f29e07SJesper Dangaard Brouer 455d8d7218aSToshiaki Makita err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); 456d8d7218aSToshiaki Makita if (unlikely(err)) 457d8d7218aSToshiaki Makita return err; 458d8d7218aSToshiaki Makita 4591b698fa5SLorenzo Bianconi xdpf = xdp_convert_buff_to_frame(xdp); 46067f29e07SJesper Dangaard Brouer if (unlikely(!xdpf)) 46167f29e07SJesper Dangaard Brouer return -EOVERFLOW; 46267f29e07SJesper Dangaard Brouer 463*ebc4ecd4SBjörn Töpel bq_enqueue(dev, xdpf, dev_rx); 464*ebc4ecd4SBjörn Töpel return 0; 465546ac1ffSJohn Fastabend } 466546ac1ffSJohn Fastabend 467fbee97feSDavid Ahern static struct xdp_buff *dev_map_run_prog(struct net_device *dev, 468fbee97feSDavid Ahern struct xdp_buff *xdp, 469fbee97feSDavid Ahern struct bpf_prog *xdp_prog) 470fbee97feSDavid Ahern { 47164b59025SDavid Ahern struct xdp_txq_info txq = { .dev = dev }; 472fbee97feSDavid Ahern u32 act; 473fbee97feSDavid Ahern 47426afa0a4SDavid Ahern xdp_set_data_meta_invalid(xdp); 47564b59025SDavid Ahern xdp->txq = &txq; 47664b59025SDavid Ahern 477fbee97feSDavid Ahern act = bpf_prog_run_xdp(xdp_prog, xdp); 478fbee97feSDavid Ahern switch (act) { 479fbee97feSDavid Ahern case XDP_PASS: 480fbee97feSDavid Ahern return xdp; 481fbee97feSDavid Ahern case XDP_DROP: 482fbee97feSDavid Ahern break; 483fbee97feSDavid Ahern default: 484fbee97feSDavid Ahern bpf_warn_invalid_xdp_action(act); 485fbee97feSDavid Ahern fallthrough; 486fbee97feSDavid Ahern case XDP_ABORTED: 487fbee97feSDavid Ahern trace_xdp_exception(dev, xdp_prog, act); 488fbee97feSDavid Ahern break; 489fbee97feSDavid Ahern } 490fbee97feSDavid Ahern 491fbee97feSDavid Ahern xdp_return_buff(xdp); 492fbee97feSDavid Ahern return NULL; 493fbee97feSDavid Ahern } 494fbee97feSDavid Ahern 4951d233886SToke Høiland-Jørgensen int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, 4961d233886SToke Høiland-Jørgensen struct net_device *dev_rx) 4971d233886SToke Høiland-Jørgensen { 4981d233886SToke Høiland-Jørgensen return __xdp_enqueue(dev, xdp, dev_rx); 4991d233886SToke Høiland-Jørgensen } 5001d233886SToke Høiland-Jørgensen 5011d233886SToke Høiland-Jørgensen int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, 5021d233886SToke Høiland-Jørgensen struct net_device *dev_rx) 5031d233886SToke Høiland-Jørgensen { 5041d233886SToke Høiland-Jørgensen struct net_device *dev = dst->dev; 5051d233886SToke Høiland-Jørgensen 506fbee97feSDavid Ahern if (dst->xdp_prog) { 507fbee97feSDavid Ahern xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog); 508fbee97feSDavid Ahern if (!xdp) 509fbee97feSDavid Ahern return 0; 510fbee97feSDavid Ahern } 5111d233886SToke Høiland-Jørgensen return __xdp_enqueue(dev, xdp, dev_rx); 5121d233886SToke Høiland-Jørgensen } 5131d233886SToke Høiland-Jørgensen 5146d5fc195SToshiaki Makita int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, 5156d5fc195SToshiaki Makita struct bpf_prog *xdp_prog) 5166d5fc195SToshiaki Makita { 5176d5fc195SToshiaki Makita int err; 5186d5fc195SToshiaki Makita 519d8d7218aSToshiaki Makita err = xdp_ok_fwd_dev(dst->dev, skb->len); 5206d5fc195SToshiaki Makita if (unlikely(err)) 5216d5fc195SToshiaki Makita return err; 5226d5fc195SToshiaki Makita skb->dev = dst->dev; 5236d5fc195SToshiaki Makita generic_xdp_tx(skb, xdp_prog); 5246d5fc195SToshiaki Makita 5256d5fc195SToshiaki Makita return 0; 5266d5fc195SToshiaki Makita } 5276d5fc195SToshiaki Makita 528af4d045cSDaniel Borkmann static void *dev_map_lookup_elem(struct bpf_map *map, void *key) 52911393cc9SJohn Fastabend { 53067f29e07SJesper Dangaard Brouer struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); 531af4d045cSDaniel Borkmann 5327f1c0426SDavid Ahern return obj ? &obj->val : NULL; 533af4d045cSDaniel Borkmann } 534af4d045cSDaniel Borkmann 5356f9d451aSToke Høiland-Jørgensen static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key) 5366f9d451aSToke Høiland-Jørgensen { 5376f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map, 5386f9d451aSToke Høiland-Jørgensen *(u32 *)key); 5397f1c0426SDavid Ahern return obj ? &obj->val : NULL; 5406f9d451aSToke Høiland-Jørgensen } 5416f9d451aSToke Høiland-Jørgensen 542546ac1ffSJohn Fastabend static void __dev_map_entry_free(struct rcu_head *rcu) 543546ac1ffSJohn Fastabend { 544af4d045cSDaniel Borkmann struct bpf_dtab_netdev *dev; 545546ac1ffSJohn Fastabend 546af4d045cSDaniel Borkmann dev = container_of(rcu, struct bpf_dtab_netdev, rcu); 547fbee97feSDavid Ahern if (dev->xdp_prog) 548fbee97feSDavid Ahern bpf_prog_put(dev->xdp_prog); 549af4d045cSDaniel Borkmann dev_put(dev->dev); 550af4d045cSDaniel Borkmann kfree(dev); 551546ac1ffSJohn Fastabend } 552546ac1ffSJohn Fastabend 553546ac1ffSJohn Fastabend static int dev_map_delete_elem(struct bpf_map *map, void *key) 554546ac1ffSJohn Fastabend { 555546ac1ffSJohn Fastabend struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 556546ac1ffSJohn Fastabend struct bpf_dtab_netdev *old_dev; 557546ac1ffSJohn Fastabend int k = *(u32 *)key; 558546ac1ffSJohn Fastabend 559546ac1ffSJohn Fastabend if (k >= map->max_entries) 560546ac1ffSJohn Fastabend return -EINVAL; 561546ac1ffSJohn Fastabend 562af4d045cSDaniel Borkmann /* Use call_rcu() here to ensure any rcu critical sections have 56342a84a8cSJohn Fastabend * completed as well as any flush operations because call_rcu 56442a84a8cSJohn Fastabend * will wait for preempt-disable region to complete, NAPI in this 56542a84a8cSJohn Fastabend * context. And additionally, the driver tear down ensures all 56642a84a8cSJohn Fastabend * soft irqs are complete before removing the net device in the 56742a84a8cSJohn Fastabend * case of dev_put equals zero. 568546ac1ffSJohn Fastabend */ 569546ac1ffSJohn Fastabend old_dev = xchg(&dtab->netdev_map[k], NULL); 570546ac1ffSJohn Fastabend if (old_dev) 571546ac1ffSJohn Fastabend call_rcu(&old_dev->rcu, __dev_map_entry_free); 572546ac1ffSJohn Fastabend return 0; 573546ac1ffSJohn Fastabend } 574546ac1ffSJohn Fastabend 5756f9d451aSToke Høiland-Jørgensen static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) 5766f9d451aSToke Høiland-Jørgensen { 5776f9d451aSToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 5786f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *old_dev; 5796f9d451aSToke Høiland-Jørgensen int k = *(u32 *)key; 5806f9d451aSToke Høiland-Jørgensen unsigned long flags; 5816f9d451aSToke Høiland-Jørgensen int ret = -ENOENT; 5826f9d451aSToke Høiland-Jørgensen 5836f9d451aSToke Høiland-Jørgensen spin_lock_irqsave(&dtab->index_lock, flags); 5846f9d451aSToke Høiland-Jørgensen 5856f9d451aSToke Høiland-Jørgensen old_dev = __dev_map_hash_lookup_elem(map, k); 5866f9d451aSToke Høiland-Jørgensen if (old_dev) { 5876f9d451aSToke Høiland-Jørgensen dtab->items--; 5886f9d451aSToke Høiland-Jørgensen hlist_del_init_rcu(&old_dev->index_hlist); 5896f9d451aSToke Høiland-Jørgensen call_rcu(&old_dev->rcu, __dev_map_entry_free); 5906f9d451aSToke Høiland-Jørgensen ret = 0; 5916f9d451aSToke Høiland-Jørgensen } 5926f9d451aSToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 5936f9d451aSToke Høiland-Jørgensen 5946f9d451aSToke Høiland-Jørgensen return ret; 5956f9d451aSToke Høiland-Jørgensen } 5966f9d451aSToke Høiland-Jørgensen 597fca16e51SToke Høiland-Jørgensen static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, 598fca16e51SToke Høiland-Jørgensen struct bpf_dtab *dtab, 5997f1c0426SDavid Ahern struct bpf_devmap_val *val, 600fca16e51SToke Høiland-Jørgensen unsigned int idx) 601546ac1ffSJohn Fastabend { 602fbee97feSDavid Ahern struct bpf_prog *prog = NULL; 603fca16e51SToke Høiland-Jørgensen struct bpf_dtab_netdev *dev; 604546ac1ffSJohn Fastabend 60575ccae62SToke Høiland-Jørgensen dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, 60675ccae62SToke Høiland-Jørgensen dtab->map.numa_node); 607546ac1ffSJohn Fastabend if (!dev) 608fca16e51SToke Høiland-Jørgensen return ERR_PTR(-ENOMEM); 609546ac1ffSJohn Fastabend 6107f1c0426SDavid Ahern dev->dev = dev_get_by_index(net, val->ifindex); 6117f1c0426SDavid Ahern if (!dev->dev) 6127f1c0426SDavid Ahern goto err_out; 613546ac1ffSJohn Fastabend 614281920b7SJesper Dangaard Brouer if (val->bpf_prog.fd > 0) { 615fbee97feSDavid Ahern prog = bpf_prog_get_type_dev(val->bpf_prog.fd, 616fbee97feSDavid Ahern BPF_PROG_TYPE_XDP, false); 617fbee97feSDavid Ahern if (IS_ERR(prog)) 618fbee97feSDavid Ahern goto err_put_dev; 619fbee97feSDavid Ahern if (prog->expected_attach_type != BPF_XDP_DEVMAP) 620fbee97feSDavid Ahern goto err_put_prog; 621fbee97feSDavid Ahern } 622fbee97feSDavid Ahern 623fca16e51SToke Høiland-Jørgensen dev->idx = idx; 624546ac1ffSJohn Fastabend dev->dtab = dtab; 625fbee97feSDavid Ahern if (prog) { 626fbee97feSDavid Ahern dev->xdp_prog = prog; 627fbee97feSDavid Ahern dev->val.bpf_prog.id = prog->aux->id; 628fbee97feSDavid Ahern } else { 629fbee97feSDavid Ahern dev->xdp_prog = NULL; 630fbee97feSDavid Ahern dev->val.bpf_prog.id = 0; 631fbee97feSDavid Ahern } 6327f1c0426SDavid Ahern dev->val.ifindex = val->ifindex; 633fca16e51SToke Høiland-Jørgensen 634fca16e51SToke Høiland-Jørgensen return dev; 635fbee97feSDavid Ahern err_put_prog: 636fbee97feSDavid Ahern bpf_prog_put(prog); 637fbee97feSDavid Ahern err_put_dev: 638fbee97feSDavid Ahern dev_put(dev->dev); 6397f1c0426SDavid Ahern err_out: 6407f1c0426SDavid Ahern kfree(dev); 6417f1c0426SDavid Ahern return ERR_PTR(-EINVAL); 642fca16e51SToke Høiland-Jørgensen } 643fca16e51SToke Høiland-Jørgensen 644fca16e51SToke Høiland-Jørgensen static int __dev_map_update_elem(struct net *net, struct bpf_map *map, 645fca16e51SToke Høiland-Jørgensen void *key, void *value, u64 map_flags) 646fca16e51SToke Høiland-Jørgensen { 647fca16e51SToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 648fca16e51SToke Høiland-Jørgensen struct bpf_dtab_netdev *dev, *old_dev; 649281920b7SJesper Dangaard Brouer struct bpf_devmap_val val = {}; 650fca16e51SToke Høiland-Jørgensen u32 i = *(u32 *)key; 651fca16e51SToke Høiland-Jørgensen 652fca16e51SToke Høiland-Jørgensen if (unlikely(map_flags > BPF_EXIST)) 653fca16e51SToke Høiland-Jørgensen return -EINVAL; 654fca16e51SToke Høiland-Jørgensen if (unlikely(i >= dtab->map.max_entries)) 655fca16e51SToke Høiland-Jørgensen return -E2BIG; 656fca16e51SToke Høiland-Jørgensen if (unlikely(map_flags == BPF_NOEXIST)) 657fca16e51SToke Høiland-Jørgensen return -EEXIST; 658fca16e51SToke Høiland-Jørgensen 6597f1c0426SDavid Ahern /* already verified value_size <= sizeof val */ 6607f1c0426SDavid Ahern memcpy(&val, value, map->value_size); 6617f1c0426SDavid Ahern 6627f1c0426SDavid Ahern if (!val.ifindex) { 663fca16e51SToke Høiland-Jørgensen dev = NULL; 664fbee97feSDavid Ahern /* can not specify fd if ifindex is 0 */ 665281920b7SJesper Dangaard Brouer if (val.bpf_prog.fd > 0) 666fbee97feSDavid Ahern return -EINVAL; 667fca16e51SToke Høiland-Jørgensen } else { 6687f1c0426SDavid Ahern dev = __dev_map_alloc_node(net, dtab, &val, i); 669fca16e51SToke Høiland-Jørgensen if (IS_ERR(dev)) 670fca16e51SToke Høiland-Jørgensen return PTR_ERR(dev); 671546ac1ffSJohn Fastabend } 672546ac1ffSJohn Fastabend 673546ac1ffSJohn Fastabend /* Use call_rcu() here to ensure rcu critical sections have completed 674546ac1ffSJohn Fastabend * Remembering the driver side flush operation will happen before the 675546ac1ffSJohn Fastabend * net device is removed. 676546ac1ffSJohn Fastabend */ 677546ac1ffSJohn Fastabend old_dev = xchg(&dtab->netdev_map[i], dev); 678546ac1ffSJohn Fastabend if (old_dev) 679546ac1ffSJohn Fastabend call_rcu(&old_dev->rcu, __dev_map_entry_free); 680546ac1ffSJohn Fastabend 681546ac1ffSJohn Fastabend return 0; 682546ac1ffSJohn Fastabend } 683546ac1ffSJohn Fastabend 684fca16e51SToke Høiland-Jørgensen static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, 685fca16e51SToke Høiland-Jørgensen u64 map_flags) 686fca16e51SToke Høiland-Jørgensen { 687fca16e51SToke Høiland-Jørgensen return __dev_map_update_elem(current->nsproxy->net_ns, 688fca16e51SToke Høiland-Jørgensen map, key, value, map_flags); 689fca16e51SToke Høiland-Jørgensen } 690fca16e51SToke Høiland-Jørgensen 6916f9d451aSToke Høiland-Jørgensen static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, 6926f9d451aSToke Høiland-Jørgensen void *key, void *value, u64 map_flags) 6936f9d451aSToke Høiland-Jørgensen { 6946f9d451aSToke Høiland-Jørgensen struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 6956f9d451aSToke Høiland-Jørgensen struct bpf_dtab_netdev *dev, *old_dev; 696281920b7SJesper Dangaard Brouer struct bpf_devmap_val val = {}; 6976f9d451aSToke Høiland-Jørgensen u32 idx = *(u32 *)key; 6986f9d451aSToke Høiland-Jørgensen unsigned long flags; 699af58e7eeSToke Høiland-Jørgensen int err = -EEXIST; 7006f9d451aSToke Høiland-Jørgensen 7017f1c0426SDavid Ahern /* already verified value_size <= sizeof val */ 7027f1c0426SDavid Ahern memcpy(&val, value, map->value_size); 7037f1c0426SDavid Ahern 7047f1c0426SDavid Ahern if (unlikely(map_flags > BPF_EXIST || !val.ifindex)) 7056f9d451aSToke Høiland-Jørgensen return -EINVAL; 7066f9d451aSToke Høiland-Jørgensen 707af58e7eeSToke Høiland-Jørgensen spin_lock_irqsave(&dtab->index_lock, flags); 708af58e7eeSToke Høiland-Jørgensen 7096f9d451aSToke Høiland-Jørgensen old_dev = __dev_map_hash_lookup_elem(map, idx); 7106f9d451aSToke Høiland-Jørgensen if (old_dev && (map_flags & BPF_NOEXIST)) 711af58e7eeSToke Høiland-Jørgensen goto out_err; 7126f9d451aSToke Høiland-Jørgensen 7137f1c0426SDavid Ahern dev = __dev_map_alloc_node(net, dtab, &val, idx); 714af58e7eeSToke Høiland-Jørgensen if (IS_ERR(dev)) { 715af58e7eeSToke Høiland-Jørgensen err = PTR_ERR(dev); 716af58e7eeSToke Høiland-Jørgensen goto out_err; 717af58e7eeSToke Høiland-Jørgensen } 7186f9d451aSToke Høiland-Jørgensen 7196f9d451aSToke Høiland-Jørgensen if (old_dev) { 7206f9d451aSToke Høiland-Jørgensen hlist_del_rcu(&old_dev->index_hlist); 7216f9d451aSToke Høiland-Jørgensen } else { 7226f9d451aSToke Høiland-Jørgensen if (dtab->items >= dtab->map.max_entries) { 7236f9d451aSToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 7246f9d451aSToke Høiland-Jørgensen call_rcu(&dev->rcu, __dev_map_entry_free); 7256f9d451aSToke Høiland-Jørgensen return -E2BIG; 7266f9d451aSToke Høiland-Jørgensen } 7276f9d451aSToke Høiland-Jørgensen dtab->items++; 7286f9d451aSToke Høiland-Jørgensen } 7296f9d451aSToke Høiland-Jørgensen 7306f9d451aSToke Høiland-Jørgensen hlist_add_head_rcu(&dev->index_hlist, 7316f9d451aSToke Høiland-Jørgensen dev_map_index_hash(dtab, idx)); 7326f9d451aSToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 7336f9d451aSToke Høiland-Jørgensen 7346f9d451aSToke Høiland-Jørgensen if (old_dev) 7356f9d451aSToke Høiland-Jørgensen call_rcu(&old_dev->rcu, __dev_map_entry_free); 7366f9d451aSToke Høiland-Jørgensen 7376f9d451aSToke Høiland-Jørgensen return 0; 738af58e7eeSToke Høiland-Jørgensen 739af58e7eeSToke Høiland-Jørgensen out_err: 740af58e7eeSToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 741af58e7eeSToke Høiland-Jørgensen return err; 7426f9d451aSToke Høiland-Jørgensen } 7436f9d451aSToke Høiland-Jørgensen 7446f9d451aSToke Høiland-Jørgensen static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, 7456f9d451aSToke Høiland-Jørgensen u64 map_flags) 7466f9d451aSToke Høiland-Jørgensen { 7476f9d451aSToke Høiland-Jørgensen return __dev_map_hash_update_elem(current->nsproxy->net_ns, 7486f9d451aSToke Høiland-Jørgensen map, key, value, map_flags); 7496f9d451aSToke Høiland-Jørgensen } 7506f9d451aSToke Høiland-Jørgensen 7512872e9acSAndrey Ignatov static int dev_map_btf_id; 752546ac1ffSJohn Fastabend const struct bpf_map_ops dev_map_ops = { 753f4d05259SMartin KaFai Lau .map_meta_equal = bpf_map_meta_equal, 754546ac1ffSJohn Fastabend .map_alloc = dev_map_alloc, 755546ac1ffSJohn Fastabend .map_free = dev_map_free, 756546ac1ffSJohn Fastabend .map_get_next_key = dev_map_get_next_key, 757546ac1ffSJohn Fastabend .map_lookup_elem = dev_map_lookup_elem, 758546ac1ffSJohn Fastabend .map_update_elem = dev_map_update_elem, 759546ac1ffSJohn Fastabend .map_delete_elem = dev_map_delete_elem, 760e8d2bec0SDaniel Borkmann .map_check_btf = map_check_no_btf, 7612872e9acSAndrey Ignatov .map_btf_name = "bpf_dtab", 7622872e9acSAndrey Ignatov .map_btf_id = &dev_map_btf_id, 763546ac1ffSJohn Fastabend }; 7642ddf71e2SJohn Fastabend 7652872e9acSAndrey Ignatov static int dev_map_hash_map_btf_id; 7666f9d451aSToke Høiland-Jørgensen const struct bpf_map_ops dev_map_hash_ops = { 767f4d05259SMartin KaFai Lau .map_meta_equal = bpf_map_meta_equal, 7686f9d451aSToke Høiland-Jørgensen .map_alloc = dev_map_alloc, 7696f9d451aSToke Høiland-Jørgensen .map_free = dev_map_free, 7706f9d451aSToke Høiland-Jørgensen .map_get_next_key = dev_map_hash_get_next_key, 7716f9d451aSToke Høiland-Jørgensen .map_lookup_elem = dev_map_hash_lookup_elem, 7726f9d451aSToke Høiland-Jørgensen .map_update_elem = dev_map_hash_update_elem, 7736f9d451aSToke Høiland-Jørgensen .map_delete_elem = dev_map_hash_delete_elem, 7746f9d451aSToke Høiland-Jørgensen .map_check_btf = map_check_no_btf, 7752872e9acSAndrey Ignatov .map_btf_name = "bpf_dtab", 7762872e9acSAndrey Ignatov .map_btf_id = &dev_map_hash_map_btf_id, 7776f9d451aSToke Høiland-Jørgensen }; 7786f9d451aSToke Høiland-Jørgensen 779ce197d83SToke Høiland-Jørgensen static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab, 780ce197d83SToke Høiland-Jørgensen struct net_device *netdev) 781ce197d83SToke Høiland-Jørgensen { 782ce197d83SToke Høiland-Jørgensen unsigned long flags; 783ce197d83SToke Høiland-Jørgensen u32 i; 784ce197d83SToke Høiland-Jørgensen 785ce197d83SToke Høiland-Jørgensen spin_lock_irqsave(&dtab->index_lock, flags); 786ce197d83SToke Høiland-Jørgensen for (i = 0; i < dtab->n_buckets; i++) { 787ce197d83SToke Høiland-Jørgensen struct bpf_dtab_netdev *dev; 788ce197d83SToke Høiland-Jørgensen struct hlist_head *head; 789ce197d83SToke Høiland-Jørgensen struct hlist_node *next; 790ce197d83SToke Høiland-Jørgensen 791ce197d83SToke Høiland-Jørgensen head = dev_map_index_hash(dtab, i); 792ce197d83SToke Høiland-Jørgensen 793ce197d83SToke Høiland-Jørgensen hlist_for_each_entry_safe(dev, next, head, index_hlist) { 794ce197d83SToke Høiland-Jørgensen if (netdev != dev->dev) 795ce197d83SToke Høiland-Jørgensen continue; 796ce197d83SToke Høiland-Jørgensen 797ce197d83SToke Høiland-Jørgensen dtab->items--; 798ce197d83SToke Høiland-Jørgensen hlist_del_rcu(&dev->index_hlist); 799ce197d83SToke Høiland-Jørgensen call_rcu(&dev->rcu, __dev_map_entry_free); 800ce197d83SToke Høiland-Jørgensen } 801ce197d83SToke Høiland-Jørgensen } 802ce197d83SToke Høiland-Jørgensen spin_unlock_irqrestore(&dtab->index_lock, flags); 803ce197d83SToke Høiland-Jørgensen } 804ce197d83SToke Høiland-Jørgensen 8052ddf71e2SJohn Fastabend static int dev_map_notification(struct notifier_block *notifier, 8062ddf71e2SJohn Fastabend ulong event, void *ptr) 8072ddf71e2SJohn Fastabend { 8082ddf71e2SJohn Fastabend struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 8092ddf71e2SJohn Fastabend struct bpf_dtab *dtab; 81075ccae62SToke Høiland-Jørgensen int i, cpu; 8112ddf71e2SJohn Fastabend 8122ddf71e2SJohn Fastabend switch (event) { 81375ccae62SToke Høiland-Jørgensen case NETDEV_REGISTER: 81475ccae62SToke Høiland-Jørgensen if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq) 81575ccae62SToke Høiland-Jørgensen break; 81675ccae62SToke Høiland-Jørgensen 81775ccae62SToke Høiland-Jørgensen /* will be freed in free_netdev() */ 81875ccae62SToke Høiland-Jørgensen netdev->xdp_bulkq = 81975ccae62SToke Høiland-Jørgensen __alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue), 82075ccae62SToke Høiland-Jørgensen sizeof(void *), GFP_ATOMIC); 82175ccae62SToke Høiland-Jørgensen if (!netdev->xdp_bulkq) 82275ccae62SToke Høiland-Jørgensen return NOTIFY_BAD; 82375ccae62SToke Høiland-Jørgensen 82475ccae62SToke Høiland-Jørgensen for_each_possible_cpu(cpu) 82575ccae62SToke Høiland-Jørgensen per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev; 82675ccae62SToke Høiland-Jørgensen break; 8272ddf71e2SJohn Fastabend case NETDEV_UNREGISTER: 8284cc7b954SJohn Fastabend /* This rcu_read_lock/unlock pair is needed because 8294cc7b954SJohn Fastabend * dev_map_list is an RCU list AND to ensure a delete 8304cc7b954SJohn Fastabend * operation does not free a netdev_map entry while we 8314cc7b954SJohn Fastabend * are comparing it against the netdev being unregistered. 8324cc7b954SJohn Fastabend */ 8334cc7b954SJohn Fastabend rcu_read_lock(); 8344cc7b954SJohn Fastabend list_for_each_entry_rcu(dtab, &dev_map_list, list) { 835ce197d83SToke Høiland-Jørgensen if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 836ce197d83SToke Høiland-Jørgensen dev_map_hash_remove_netdev(dtab, netdev); 837ce197d83SToke Høiland-Jørgensen continue; 838ce197d83SToke Høiland-Jørgensen } 839ce197d83SToke Høiland-Jørgensen 8402ddf71e2SJohn Fastabend for (i = 0; i < dtab->map.max_entries; i++) { 8414cc7b954SJohn Fastabend struct bpf_dtab_netdev *dev, *odev; 8422ddf71e2SJohn Fastabend 8434cc7b954SJohn Fastabend dev = READ_ONCE(dtab->netdev_map[i]); 844f592f804STaehee Yoo if (!dev || netdev != dev->dev) 8452ddf71e2SJohn Fastabend continue; 8464cc7b954SJohn Fastabend odev = cmpxchg(&dtab->netdev_map[i], dev, NULL); 8474cc7b954SJohn Fastabend if (dev == odev) 8482ddf71e2SJohn Fastabend call_rcu(&dev->rcu, 8492ddf71e2SJohn Fastabend __dev_map_entry_free); 8502ddf71e2SJohn Fastabend } 8512ddf71e2SJohn Fastabend } 8524cc7b954SJohn Fastabend rcu_read_unlock(); 8532ddf71e2SJohn Fastabend break; 8542ddf71e2SJohn Fastabend default: 8552ddf71e2SJohn Fastabend break; 8562ddf71e2SJohn Fastabend } 8572ddf71e2SJohn Fastabend return NOTIFY_OK; 8582ddf71e2SJohn Fastabend } 8592ddf71e2SJohn Fastabend 8602ddf71e2SJohn Fastabend static struct notifier_block dev_map_notifier = { 8612ddf71e2SJohn Fastabend .notifier_call = dev_map_notification, 8622ddf71e2SJohn Fastabend }; 8632ddf71e2SJohn Fastabend 8642ddf71e2SJohn Fastabend static int __init dev_map_init(void) 8652ddf71e2SJohn Fastabend { 86696360004SBjörn Töpel int cpu; 86796360004SBjörn Töpel 86867f29e07SJesper Dangaard Brouer /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ 86967f29e07SJesper Dangaard Brouer BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != 87067f29e07SJesper Dangaard Brouer offsetof(struct _bpf_dtab_netdev, dev)); 8712ddf71e2SJohn Fastabend register_netdevice_notifier(&dev_map_notifier); 87296360004SBjörn Töpel 87396360004SBjörn Töpel for_each_possible_cpu(cpu) 8741d233886SToke Høiland-Jørgensen INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu)); 8752ddf71e2SJohn Fastabend return 0; 8762ddf71e2SJohn Fastabend } 8772ddf71e2SJohn Fastabend 8782ddf71e2SJohn Fastabend subsys_initcall(dev_map_init); 879