xref: /linux-6.15/drivers/net/netdevsim/netdev.c (revision cae03e5b)
1 /*
2  * Copyright (C) 2017 Netronome Systems, Inc.
3  *
4  * This software is licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree.
7  *
8  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
9  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
10  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
11  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
12  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
13  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
14  */
15 
16 #include <linux/debugfs.h>
17 #include <linux/etherdevice.h>
18 #include <linux/ethtool_netlink.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/netdevice.h>
22 #include <linux/slab.h>
23 #include <net/netdev_queues.h>
24 #include <net/netdev_rx_queue.h>
25 #include <net/page_pool/helpers.h>
26 #include <net/netlink.h>
27 #include <net/net_shaper.h>
28 #include <net/pkt_cls.h>
29 #include <net/rtnetlink.h>
30 #include <net/udp_tunnel.h>
31 
32 #include "netdevsim.h"
33 
34 MODULE_IMPORT_NS("NETDEV_INTERNAL");
35 
36 #define NSIM_RING_SIZE		256
37 
38 static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
39 {
40 	if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
41 		dev_kfree_skb_any(skb);
42 		return NET_RX_DROP;
43 	}
44 
45 	skb_queue_tail(&rq->skb_queue, skb);
46 	return NET_RX_SUCCESS;
47 }
48 
49 static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb,
50 			    struct nsim_rq *rq)
51 {
52 	return __dev_forward_skb(dev, skb) ?: nsim_napi_rx(rq, skb);
53 }
54 
55 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
56 {
57 	struct netdevsim *ns = netdev_priv(dev);
58 	struct net_device *peer_dev;
59 	unsigned int len = skb->len;
60 	struct netdevsim *peer_ns;
61 	struct netdev_config *cfg;
62 	struct nsim_rq *rq;
63 	int rxq;
64 
65 	rcu_read_lock();
66 	if (!nsim_ipsec_tx(ns, skb))
67 		goto out_drop_free;
68 
69 	peer_ns = rcu_dereference(ns->peer);
70 	if (!peer_ns)
71 		goto out_drop_free;
72 
73 	peer_dev = peer_ns->netdev;
74 	rxq = skb_get_queue_mapping(skb);
75 	if (rxq >= peer_dev->num_rx_queues)
76 		rxq = rxq % peer_dev->num_rx_queues;
77 	rq = peer_ns->rq[rxq];
78 
79 	cfg = peer_dev->cfg;
80 	if (skb_is_nonlinear(skb) &&
81 	    (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
82 	     (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
83 	      cfg->hds_thresh > len)))
84 		skb_linearize(skb);
85 
86 	skb_tx_timestamp(skb);
87 	if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP))
88 		goto out_drop_cnt;
89 
90 	if (!hrtimer_active(&rq->napi_timer))
91 		hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL);
92 
93 	rcu_read_unlock();
94 	u64_stats_update_begin(&ns->syncp);
95 	ns->tx_packets++;
96 	ns->tx_bytes += len;
97 	u64_stats_update_end(&ns->syncp);
98 	return NETDEV_TX_OK;
99 
100 out_drop_free:
101 	dev_kfree_skb(skb);
102 out_drop_cnt:
103 	rcu_read_unlock();
104 	u64_stats_update_begin(&ns->syncp);
105 	ns->tx_dropped++;
106 	u64_stats_update_end(&ns->syncp);
107 	return NETDEV_TX_OK;
108 }
109 
110 static void nsim_set_rx_mode(struct net_device *dev)
111 {
112 }
113 
114 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
115 {
116 	struct netdevsim *ns = netdev_priv(dev);
117 
118 	if (ns->xdp.prog && new_mtu > NSIM_XDP_MAX_MTU)
119 		return -EBUSY;
120 
121 	WRITE_ONCE(dev->mtu, new_mtu);
122 
123 	return 0;
124 }
125 
126 static void
127 nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
128 {
129 	struct netdevsim *ns = netdev_priv(dev);
130 	unsigned int start;
131 
132 	do {
133 		start = u64_stats_fetch_begin(&ns->syncp);
134 		stats->tx_bytes = ns->tx_bytes;
135 		stats->tx_packets = ns->tx_packets;
136 		stats->tx_dropped = ns->tx_dropped;
137 	} while (u64_stats_fetch_retry(&ns->syncp, start));
138 }
139 
140 static int
141 nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
142 {
143 	return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv);
144 }
145 
146 static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
147 {
148 	struct netdevsim *ns = netdev_priv(dev);
149 	struct nsim_dev *nsim_dev = ns->nsim_dev;
150 
151 	/* Only refuse multicast addresses, zero address can mean unset/any. */
152 	if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac))
153 		return -EINVAL;
154 	memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
155 
156 	return 0;
157 }
158 
159 static int nsim_set_vf_vlan(struct net_device *dev, int vf,
160 			    u16 vlan, u8 qos, __be16 vlan_proto)
161 {
162 	struct netdevsim *ns = netdev_priv(dev);
163 	struct nsim_dev *nsim_dev = ns->nsim_dev;
164 
165 	if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7)
166 		return -EINVAL;
167 
168 	nsim_dev->vfconfigs[vf].vlan = vlan;
169 	nsim_dev->vfconfigs[vf].qos = qos;
170 	nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto;
171 
172 	return 0;
173 }
174 
175 static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
176 {
177 	struct netdevsim *ns = netdev_priv(dev);
178 	struct nsim_dev *nsim_dev = ns->nsim_dev;
179 
180 	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
181 		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
182 		return -EOPNOTSUPP;
183 	}
184 
185 	if (vf >= nsim_dev_get_vfs(nsim_dev))
186 		return -EINVAL;
187 
188 	nsim_dev->vfconfigs[vf].min_tx_rate = min;
189 	nsim_dev->vfconfigs[vf].max_tx_rate = max;
190 
191 	return 0;
192 }
193 
194 static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
195 {
196 	struct netdevsim *ns = netdev_priv(dev);
197 	struct nsim_dev *nsim_dev = ns->nsim_dev;
198 
199 	if (vf >= nsim_dev_get_vfs(nsim_dev))
200 		return -EINVAL;
201 	nsim_dev->vfconfigs[vf].spoofchk_enabled = val;
202 
203 	return 0;
204 }
205 
206 static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
207 {
208 	struct netdevsim *ns = netdev_priv(dev);
209 	struct nsim_dev *nsim_dev = ns->nsim_dev;
210 
211 	if (vf >= nsim_dev_get_vfs(nsim_dev))
212 		return -EINVAL;
213 	nsim_dev->vfconfigs[vf].rss_query_enabled = val;
214 
215 	return 0;
216 }
217 
218 static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
219 {
220 	struct netdevsim *ns = netdev_priv(dev);
221 	struct nsim_dev *nsim_dev = ns->nsim_dev;
222 
223 	if (vf >= nsim_dev_get_vfs(nsim_dev))
224 		return -EINVAL;
225 	nsim_dev->vfconfigs[vf].trusted = val;
226 
227 	return 0;
228 }
229 
230 static int
231 nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
232 {
233 	struct netdevsim *ns = netdev_priv(dev);
234 	struct nsim_dev *nsim_dev = ns->nsim_dev;
235 
236 	if (vf >= nsim_dev_get_vfs(nsim_dev))
237 		return -EINVAL;
238 
239 	ivi->vf = vf;
240 	ivi->linkstate = nsim_dev->vfconfigs[vf].link_state;
241 	ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate;
242 	ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate;
243 	ivi->vlan = nsim_dev->vfconfigs[vf].vlan;
244 	ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto;
245 	ivi->qos = nsim_dev->vfconfigs[vf].qos;
246 	memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
247 	ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled;
248 	ivi->trusted = nsim_dev->vfconfigs[vf].trusted;
249 	ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled;
250 
251 	return 0;
252 }
253 
254 static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
255 {
256 	struct netdevsim *ns = netdev_priv(dev);
257 	struct nsim_dev *nsim_dev = ns->nsim_dev;
258 
259 	if (vf >= nsim_dev_get_vfs(nsim_dev))
260 		return -EINVAL;
261 
262 	switch (state) {
263 	case IFLA_VF_LINK_STATE_AUTO:
264 	case IFLA_VF_LINK_STATE_ENABLE:
265 	case IFLA_VF_LINK_STATE_DISABLE:
266 		break;
267 	default:
268 		return -EINVAL;
269 	}
270 
271 	nsim_dev->vfconfigs[vf].link_state = state;
272 
273 	return 0;
274 }
275 
276 static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats)
277 {
278 	stats->window_drops = 0;
279 	stats->tx_overruns = 0;
280 }
281 
282 static int nsim_setup_tc_taprio(struct net_device *dev,
283 				struct tc_taprio_qopt_offload *offload)
284 {
285 	int err = 0;
286 
287 	switch (offload->cmd) {
288 	case TAPRIO_CMD_REPLACE:
289 	case TAPRIO_CMD_DESTROY:
290 		break;
291 	case TAPRIO_CMD_STATS:
292 		nsim_taprio_stats(&offload->stats);
293 		break;
294 	default:
295 		err = -EOPNOTSUPP;
296 	}
297 
298 	return err;
299 }
300 
301 static LIST_HEAD(nsim_block_cb_list);
302 
303 static int
304 nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data)
305 {
306 	struct netdevsim *ns = netdev_priv(dev);
307 
308 	switch (type) {
309 	case TC_SETUP_QDISC_TAPRIO:
310 		return nsim_setup_tc_taprio(dev, type_data);
311 	case TC_SETUP_BLOCK:
312 		return flow_block_cb_setup_simple(type_data,
313 						  &nsim_block_cb_list,
314 						  nsim_setup_tc_block_cb,
315 						  ns, ns, true);
316 	default:
317 		return -EOPNOTSUPP;
318 	}
319 }
320 
321 static int
322 nsim_set_features(struct net_device *dev, netdev_features_t features)
323 {
324 	struct netdevsim *ns = netdev_priv(dev);
325 
326 	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
327 		return nsim_bpf_disable_tc(ns);
328 
329 	return 0;
330 }
331 
332 static int nsim_get_iflink(const struct net_device *dev)
333 {
334 	struct netdevsim *nsim, *peer;
335 	int iflink;
336 
337 	nsim = netdev_priv(dev);
338 
339 	rcu_read_lock();
340 	peer = rcu_dereference(nsim->peer);
341 	iflink = peer ? READ_ONCE(peer->netdev->ifindex) :
342 			READ_ONCE(dev->ifindex);
343 	rcu_read_unlock();
344 
345 	return iflink;
346 }
347 
348 static int nsim_rcv(struct nsim_rq *rq, int budget)
349 {
350 	struct sk_buff *skb;
351 	int i;
352 
353 	for (i = 0; i < budget; i++) {
354 		if (skb_queue_empty(&rq->skb_queue))
355 			break;
356 
357 		skb = skb_dequeue(&rq->skb_queue);
358 		netif_receive_skb(skb);
359 	}
360 
361 	return i;
362 }
363 
364 static int nsim_poll(struct napi_struct *napi, int budget)
365 {
366 	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
367 	int done;
368 
369 	done = nsim_rcv(rq, budget);
370 	napi_complete(napi);
371 
372 	return done;
373 }
374 
375 static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
376 {
377 	struct page_pool_params params = {
378 		.order = 0,
379 		.pool_size = NSIM_RING_SIZE,
380 		.nid = NUMA_NO_NODE,
381 		.dev = &napi->dev->dev,
382 		.napi = napi,
383 		.dma_dir = DMA_BIDIRECTIONAL,
384 		.netdev = napi->dev,
385 	};
386 	struct page_pool *pool;
387 
388 	pool = page_pool_create(&params);
389 	if (IS_ERR(pool))
390 		return PTR_ERR(pool);
391 
392 	*p = pool;
393 	return 0;
394 }
395 
396 static int nsim_init_napi(struct netdevsim *ns)
397 {
398 	struct net_device *dev = ns->netdev;
399 	struct nsim_rq *rq;
400 	int err, i;
401 
402 	for (i = 0; i < dev->num_rx_queues; i++) {
403 		rq = ns->rq[i];
404 
405 		netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i);
406 	}
407 
408 	for (i = 0; i < dev->num_rx_queues; i++) {
409 		rq = ns->rq[i];
410 
411 		err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
412 		if (err)
413 			goto err_pp_destroy;
414 	}
415 
416 	return 0;
417 
418 err_pp_destroy:
419 	while (i--) {
420 		page_pool_destroy(ns->rq[i]->page_pool);
421 		ns->rq[i]->page_pool = NULL;
422 	}
423 
424 	for (i = 0; i < dev->num_rx_queues; i++)
425 		__netif_napi_del_locked(&ns->rq[i]->napi);
426 
427 	return err;
428 }
429 
430 static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer)
431 {
432 	struct nsim_rq *rq;
433 
434 	rq = container_of(timer, struct nsim_rq, napi_timer);
435 	napi_schedule(&rq->napi);
436 
437 	return HRTIMER_NORESTART;
438 }
439 
440 static void nsim_rq_timer_init(struct nsim_rq *rq)
441 {
442 	hrtimer_init(&rq->napi_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
443 	rq->napi_timer.function = nsim_napi_schedule;
444 }
445 
446 static void nsim_enable_napi(struct netdevsim *ns)
447 {
448 	struct net_device *dev = ns->netdev;
449 	int i;
450 
451 	for (i = 0; i < dev->num_rx_queues; i++) {
452 		struct nsim_rq *rq = ns->rq[i];
453 
454 		netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
455 		napi_enable_locked(&rq->napi);
456 	}
457 }
458 
459 static int nsim_open(struct net_device *dev)
460 {
461 	struct netdevsim *ns = netdev_priv(dev);
462 	int err;
463 
464 	netdev_assert_locked(dev);
465 
466 	err = nsim_init_napi(ns);
467 	if (err)
468 		return err;
469 
470 	nsim_enable_napi(ns);
471 
472 	return 0;
473 }
474 
475 static void nsim_del_napi(struct netdevsim *ns)
476 {
477 	struct net_device *dev = ns->netdev;
478 	int i;
479 
480 	for (i = 0; i < dev->num_rx_queues; i++) {
481 		struct nsim_rq *rq = ns->rq[i];
482 
483 		napi_disable_locked(&rq->napi);
484 		__netif_napi_del_locked(&rq->napi);
485 	}
486 	synchronize_net();
487 
488 	for (i = 0; i < dev->num_rx_queues; i++) {
489 		page_pool_destroy(ns->rq[i]->page_pool);
490 		ns->rq[i]->page_pool = NULL;
491 	}
492 }
493 
494 static int nsim_stop(struct net_device *dev)
495 {
496 	struct netdevsim *ns = netdev_priv(dev);
497 	struct netdevsim *peer;
498 
499 	netdev_assert_locked(dev);
500 
501 	netif_carrier_off(dev);
502 	peer = rtnl_dereference(ns->peer);
503 	if (peer)
504 		netif_carrier_off(peer->netdev);
505 
506 	nsim_del_napi(ns);
507 
508 	return 0;
509 }
510 
511 static int nsim_shaper_set(struct net_shaper_binding *binding,
512 			   const struct net_shaper *shaper,
513 			   struct netlink_ext_ack *extack)
514 {
515 	return 0;
516 }
517 
518 static int nsim_shaper_del(struct net_shaper_binding *binding,
519 			   const struct net_shaper_handle *handle,
520 			   struct netlink_ext_ack *extack)
521 {
522 	return 0;
523 }
524 
525 static int nsim_shaper_group(struct net_shaper_binding *binding,
526 			     int leaves_count,
527 			     const struct net_shaper *leaves,
528 			     const struct net_shaper *root,
529 			     struct netlink_ext_ack *extack)
530 {
531 	return 0;
532 }
533 
534 static void nsim_shaper_cap(struct net_shaper_binding *binding,
535 			    enum net_shaper_scope scope,
536 			    unsigned long *flags)
537 {
538 	*flags = ULONG_MAX;
539 }
540 
541 static const struct net_shaper_ops nsim_shaper_ops = {
542 	.set			= nsim_shaper_set,
543 	.delete			= nsim_shaper_del,
544 	.group			= nsim_shaper_group,
545 	.capabilities		= nsim_shaper_cap,
546 };
547 
548 static const struct net_device_ops nsim_netdev_ops = {
549 	.ndo_start_xmit		= nsim_start_xmit,
550 	.ndo_set_rx_mode	= nsim_set_rx_mode,
551 	.ndo_set_mac_address	= eth_mac_addr,
552 	.ndo_validate_addr	= eth_validate_addr,
553 	.ndo_change_mtu		= nsim_change_mtu,
554 	.ndo_get_stats64	= nsim_get_stats64,
555 	.ndo_set_vf_mac		= nsim_set_vf_mac,
556 	.ndo_set_vf_vlan	= nsim_set_vf_vlan,
557 	.ndo_set_vf_rate	= nsim_set_vf_rate,
558 	.ndo_set_vf_spoofchk	= nsim_set_vf_spoofchk,
559 	.ndo_set_vf_trust	= nsim_set_vf_trust,
560 	.ndo_get_vf_config	= nsim_get_vf_config,
561 	.ndo_set_vf_link_state	= nsim_set_vf_link_state,
562 	.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
563 	.ndo_setup_tc		= nsim_setup_tc,
564 	.ndo_set_features	= nsim_set_features,
565 	.ndo_get_iflink		= nsim_get_iflink,
566 	.ndo_bpf		= nsim_bpf,
567 	.ndo_open		= nsim_open,
568 	.ndo_stop		= nsim_stop,
569 	.net_shaper_ops		= &nsim_shaper_ops,
570 };
571 
572 static const struct net_device_ops nsim_vf_netdev_ops = {
573 	.ndo_start_xmit		= nsim_start_xmit,
574 	.ndo_set_rx_mode	= nsim_set_rx_mode,
575 	.ndo_set_mac_address	= eth_mac_addr,
576 	.ndo_validate_addr	= eth_validate_addr,
577 	.ndo_change_mtu		= nsim_change_mtu,
578 	.ndo_get_stats64	= nsim_get_stats64,
579 	.ndo_setup_tc		= nsim_setup_tc,
580 	.ndo_set_features	= nsim_set_features,
581 };
582 
583 /* We don't have true per-queue stats, yet, so do some random fakery here.
584  * Only report stuff for queue 0.
585  */
586 static void nsim_get_queue_stats_rx(struct net_device *dev, int idx,
587 				    struct netdev_queue_stats_rx *stats)
588 {
589 	struct rtnl_link_stats64 rtstats = {};
590 
591 	if (!idx)
592 		nsim_get_stats64(dev, &rtstats);
593 
594 	stats->packets = rtstats.rx_packets - !!rtstats.rx_packets;
595 	stats->bytes = rtstats.rx_bytes;
596 }
597 
598 static void nsim_get_queue_stats_tx(struct net_device *dev, int idx,
599 				    struct netdev_queue_stats_tx *stats)
600 {
601 	struct rtnl_link_stats64 rtstats = {};
602 
603 	if (!idx)
604 		nsim_get_stats64(dev, &rtstats);
605 
606 	stats->packets = rtstats.tx_packets - !!rtstats.tx_packets;
607 	stats->bytes = rtstats.tx_bytes;
608 }
609 
610 static void nsim_get_base_stats(struct net_device *dev,
611 				struct netdev_queue_stats_rx *rx,
612 				struct netdev_queue_stats_tx *tx)
613 {
614 	struct rtnl_link_stats64 rtstats = {};
615 
616 	nsim_get_stats64(dev, &rtstats);
617 
618 	rx->packets = !!rtstats.rx_packets;
619 	rx->bytes = 0;
620 	tx->packets = !!rtstats.tx_packets;
621 	tx->bytes = 0;
622 }
623 
624 static const struct netdev_stat_ops nsim_stat_ops = {
625 	.get_queue_stats_tx	= nsim_get_queue_stats_tx,
626 	.get_queue_stats_rx	= nsim_get_queue_stats_rx,
627 	.get_base_stats		= nsim_get_base_stats,
628 };
629 
630 static struct nsim_rq *nsim_queue_alloc(void)
631 {
632 	struct nsim_rq *rq;
633 
634 	rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT);
635 	if (!rq)
636 		return NULL;
637 
638 	skb_queue_head_init(&rq->skb_queue);
639 	nsim_rq_timer_init(rq);
640 	return rq;
641 }
642 
643 static void nsim_queue_free(struct nsim_rq *rq)
644 {
645 	hrtimer_cancel(&rq->napi_timer);
646 	skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
647 	kfree(rq);
648 }
649 
650 /* Queue reset mode is controlled by ns->rq_reset_mode.
651  * - normal - new NAPI new pool (old NAPI enabled when new added)
652  * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
653  * - mode 2 - new NAPI new pool (old NAPI removed before new added)
654  * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
655  */
656 struct nsim_queue_mem {
657 	struct nsim_rq *rq;
658 	struct page_pool *pp;
659 };
660 
661 static int
662 nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
663 {
664 	struct nsim_queue_mem *qmem = per_queue_mem;
665 	struct netdevsim *ns = netdev_priv(dev);
666 	int err;
667 
668 	if (ns->rq_reset_mode > 3)
669 		return -EINVAL;
670 
671 	if (ns->rq_reset_mode == 1) {
672 		if (!netif_running(ns->netdev))
673 			return -ENETDOWN;
674 		return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
675 	}
676 
677 	qmem->rq = nsim_queue_alloc();
678 	if (!qmem->rq)
679 		return -ENOMEM;
680 
681 	err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
682 	if (err)
683 		goto err_free;
684 
685 	if (!ns->rq_reset_mode)
686 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
687 					     idx);
688 
689 	return 0;
690 
691 err_free:
692 	nsim_queue_free(qmem->rq);
693 	return err;
694 }
695 
696 static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
697 {
698 	struct nsim_queue_mem *qmem = per_queue_mem;
699 	struct netdevsim *ns = netdev_priv(dev);
700 
701 	page_pool_destroy(qmem->pp);
702 	if (qmem->rq) {
703 		if (!ns->rq_reset_mode)
704 			netif_napi_del_locked(&qmem->rq->napi);
705 		page_pool_destroy(qmem->rq->page_pool);
706 		nsim_queue_free(qmem->rq);
707 	}
708 }
709 
710 static int
711 nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx)
712 {
713 	struct nsim_queue_mem *qmem = per_queue_mem;
714 	struct netdevsim *ns = netdev_priv(dev);
715 
716 	netdev_assert_locked(dev);
717 
718 	if (ns->rq_reset_mode == 1) {
719 		ns->rq[idx]->page_pool = qmem->pp;
720 		napi_enable_locked(&ns->rq[idx]->napi);
721 		return 0;
722 	}
723 
724 	/* netif_napi_add()/_del() should normally be called from alloc/free,
725 	 * here we want to test various call orders.
726 	 */
727 	if (ns->rq_reset_mode == 2) {
728 		netif_napi_del_locked(&ns->rq[idx]->napi);
729 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
730 					     idx);
731 	} else if (ns->rq_reset_mode == 3) {
732 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
733 					     idx);
734 		netif_napi_del_locked(&ns->rq[idx]->napi);
735 	}
736 
737 	ns->rq[idx] = qmem->rq;
738 	napi_enable_locked(&ns->rq[idx]->napi);
739 
740 	return 0;
741 }
742 
743 static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
744 {
745 	struct nsim_queue_mem *qmem = per_queue_mem;
746 	struct netdevsim *ns = netdev_priv(dev);
747 
748 	netdev_assert_locked(dev);
749 
750 	napi_disable_locked(&ns->rq[idx]->napi);
751 
752 	if (ns->rq_reset_mode == 1) {
753 		qmem->pp = ns->rq[idx]->page_pool;
754 		page_pool_disable_direct_recycling(qmem->pp);
755 	} else {
756 		qmem->rq = ns->rq[idx];
757 	}
758 
759 	return 0;
760 }
761 
762 static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
763 	.ndo_queue_mem_size	= sizeof(struct nsim_queue_mem),
764 	.ndo_queue_mem_alloc	= nsim_queue_mem_alloc,
765 	.ndo_queue_mem_free	= nsim_queue_mem_free,
766 	.ndo_queue_start	= nsim_queue_start,
767 	.ndo_queue_stop		= nsim_queue_stop,
768 };
769 
770 static ssize_t
771 nsim_qreset_write(struct file *file, const char __user *data,
772 		  size_t count, loff_t *ppos)
773 {
774 	struct netdevsim *ns = file->private_data;
775 	unsigned int queue, mode;
776 	char buf[32];
777 	ssize_t ret;
778 
779 	if (count >= sizeof(buf))
780 		return -EINVAL;
781 	if (copy_from_user(buf, data, count))
782 		return -EFAULT;
783 	buf[count] = '\0';
784 
785 	ret = sscanf(buf, "%u %u", &queue, &mode);
786 	if (ret != 2)
787 		return -EINVAL;
788 
789 	rtnl_lock();
790 	if (queue >= ns->netdev->real_num_rx_queues) {
791 		ret = -EINVAL;
792 		goto exit_unlock;
793 	}
794 
795 	ns->rq_reset_mode = mode;
796 	ret = netdev_rx_queue_restart(ns->netdev, queue);
797 	ns->rq_reset_mode = 0;
798 	if (ret)
799 		goto exit_unlock;
800 
801 	ret = count;
802 exit_unlock:
803 	rtnl_unlock();
804 	return ret;
805 }
806 
807 static const struct file_operations nsim_qreset_fops = {
808 	.open = simple_open,
809 	.write = nsim_qreset_write,
810 	.owner = THIS_MODULE,
811 };
812 
813 static ssize_t
814 nsim_pp_hold_read(struct file *file, char __user *data,
815 		  size_t count, loff_t *ppos)
816 {
817 	struct netdevsim *ns = file->private_data;
818 	char buf[3] = "n\n";
819 
820 	if (ns->page)
821 		buf[0] = 'y';
822 
823 	return simple_read_from_buffer(data, count, ppos, buf, 2);
824 }
825 
826 static ssize_t
827 nsim_pp_hold_write(struct file *file, const char __user *data,
828 		   size_t count, loff_t *ppos)
829 {
830 	struct netdevsim *ns = file->private_data;
831 	ssize_t ret;
832 	bool val;
833 
834 	ret = kstrtobool_from_user(data, count, &val);
835 	if (ret)
836 		return ret;
837 
838 	rtnl_lock();
839 	ret = count;
840 	if (val == !!ns->page)
841 		goto exit;
842 
843 	if (!netif_running(ns->netdev) && val) {
844 		ret = -ENETDOWN;
845 	} else if (val) {
846 		ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
847 		if (!ns->page)
848 			ret = -ENOMEM;
849 	} else {
850 		page_pool_put_full_page(ns->page->pp, ns->page, false);
851 		ns->page = NULL;
852 	}
853 
854 exit:
855 	rtnl_unlock();
856 	return ret;
857 }
858 
859 static const struct file_operations nsim_pp_hold_fops = {
860 	.open = simple_open,
861 	.read = nsim_pp_hold_read,
862 	.write = nsim_pp_hold_write,
863 	.llseek = generic_file_llseek,
864 	.owner = THIS_MODULE,
865 };
866 
867 static void nsim_setup(struct net_device *dev)
868 {
869 	ether_setup(dev);
870 	eth_hw_addr_random(dev);
871 
872 	dev->tx_queue_len = 0;
873 	dev->flags &= ~IFF_MULTICAST;
874 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
875 			   IFF_NO_QUEUE;
876 	dev->features |= NETIF_F_HIGHDMA |
877 			 NETIF_F_SG |
878 			 NETIF_F_FRAGLIST |
879 			 NETIF_F_HW_CSUM |
880 			 NETIF_F_TSO;
881 	dev->hw_features |= NETIF_F_HW_TC |
882 			    NETIF_F_SG |
883 			    NETIF_F_FRAGLIST |
884 			    NETIF_F_HW_CSUM |
885 			    NETIF_F_TSO;
886 	dev->max_mtu = ETH_MAX_MTU;
887 	dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD;
888 }
889 
890 static int nsim_queue_init(struct netdevsim *ns)
891 {
892 	struct net_device *dev = ns->netdev;
893 	int i;
894 
895 	ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq),
896 			 GFP_KERNEL_ACCOUNT);
897 	if (!ns->rq)
898 		return -ENOMEM;
899 
900 	for (i = 0; i < dev->num_rx_queues; i++) {
901 		ns->rq[i] = nsim_queue_alloc();
902 		if (!ns->rq[i])
903 			goto err_free_prev;
904 	}
905 
906 	return 0;
907 
908 err_free_prev:
909 	while (i--)
910 		kfree(ns->rq[i]);
911 	kfree(ns->rq);
912 	return -ENOMEM;
913 }
914 
915 static void nsim_queue_uninit(struct netdevsim *ns)
916 {
917 	struct net_device *dev = ns->netdev;
918 	int i;
919 
920 	for (i = 0; i < dev->num_rx_queues; i++)
921 		nsim_queue_free(ns->rq[i]);
922 
923 	kfree(ns->rq);
924 	ns->rq = NULL;
925 }
926 
927 static int nsim_init_netdevsim(struct netdevsim *ns)
928 {
929 	struct mock_phc *phc;
930 	int err;
931 
932 	phc = mock_phc_create(&ns->nsim_bus_dev->dev);
933 	if (IS_ERR(phc))
934 		return PTR_ERR(phc);
935 
936 	ns->phc = phc;
937 	ns->netdev->netdev_ops = &nsim_netdev_ops;
938 	ns->netdev->stat_ops = &nsim_stat_ops;
939 	ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
940 
941 	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
942 	if (err)
943 		goto err_phc_destroy;
944 
945 	rtnl_lock();
946 	err = nsim_queue_init(ns);
947 	if (err)
948 		goto err_utn_destroy;
949 
950 	err = nsim_bpf_init(ns);
951 	if (err)
952 		goto err_rq_destroy;
953 
954 	nsim_macsec_init(ns);
955 	nsim_ipsec_init(ns);
956 
957 	err = register_netdevice(ns->netdev);
958 	if (err)
959 		goto err_ipsec_teardown;
960 	rtnl_unlock();
961 	return 0;
962 
963 err_ipsec_teardown:
964 	nsim_ipsec_teardown(ns);
965 	nsim_macsec_teardown(ns);
966 	nsim_bpf_uninit(ns);
967 err_rq_destroy:
968 	nsim_queue_uninit(ns);
969 err_utn_destroy:
970 	rtnl_unlock();
971 	nsim_udp_tunnels_info_destroy(ns->netdev);
972 err_phc_destroy:
973 	mock_phc_destroy(ns->phc);
974 	return err;
975 }
976 
977 static int nsim_init_netdevsim_vf(struct netdevsim *ns)
978 {
979 	int err;
980 
981 	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
982 	rtnl_lock();
983 	err = register_netdevice(ns->netdev);
984 	rtnl_unlock();
985 	return err;
986 }
987 
988 static void nsim_exit_netdevsim(struct netdevsim *ns)
989 {
990 	nsim_udp_tunnels_info_destroy(ns->netdev);
991 	mock_phc_destroy(ns->phc);
992 }
993 
994 struct netdevsim *
995 nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
996 {
997 	struct net_device *dev;
998 	struct netdevsim *ns;
999 	int err;
1000 
1001 	dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
1002 			      nsim_dev->nsim_bus_dev->num_queues);
1003 	if (!dev)
1004 		return ERR_PTR(-ENOMEM);
1005 
1006 	dev_net_set(dev, nsim_dev_net(nsim_dev));
1007 	ns = netdev_priv(dev);
1008 	ns->netdev = dev;
1009 	u64_stats_init(&ns->syncp);
1010 	ns->nsim_dev = nsim_dev;
1011 	ns->nsim_dev_port = nsim_dev_port;
1012 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
1013 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
1014 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
1015 	nsim_ethtool_init(ns);
1016 	if (nsim_dev_port_is_pf(nsim_dev_port))
1017 		err = nsim_init_netdevsim(ns);
1018 	else
1019 		err = nsim_init_netdevsim_vf(ns);
1020 	if (err)
1021 		goto err_free_netdev;
1022 
1023 	ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
1024 					 ns, &nsim_pp_hold_fops);
1025 	ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
1026 					 nsim_dev_port->ddir, ns,
1027 					 &nsim_qreset_fops);
1028 
1029 	return ns;
1030 
1031 err_free_netdev:
1032 	free_netdev(dev);
1033 	return ERR_PTR(err);
1034 }
1035 
1036 void nsim_destroy(struct netdevsim *ns)
1037 {
1038 	struct net_device *dev = ns->netdev;
1039 	struct netdevsim *peer;
1040 
1041 	debugfs_remove(ns->qr_dfs);
1042 	debugfs_remove(ns->pp_dfs);
1043 
1044 	rtnl_lock();
1045 	peer = rtnl_dereference(ns->peer);
1046 	if (peer)
1047 		RCU_INIT_POINTER(peer->peer, NULL);
1048 	RCU_INIT_POINTER(ns->peer, NULL);
1049 	unregister_netdevice(dev);
1050 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
1051 		nsim_macsec_teardown(ns);
1052 		nsim_ipsec_teardown(ns);
1053 		nsim_bpf_uninit(ns);
1054 		nsim_queue_uninit(ns);
1055 	}
1056 	rtnl_unlock();
1057 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
1058 		nsim_exit_netdevsim(ns);
1059 
1060 	/* Put this intentionally late to exercise the orphaning path */
1061 	if (ns->page) {
1062 		page_pool_put_full_page(ns->page->pp, ns->page, false);
1063 		ns->page = NULL;
1064 	}
1065 
1066 	free_netdev(dev);
1067 }
1068 
1069 bool netdev_is_nsim(struct net_device *dev)
1070 {
1071 	return dev->netdev_ops == &nsim_netdev_ops;
1072 }
1073 
1074 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
1075 			 struct netlink_ext_ack *extack)
1076 {
1077 	NL_SET_ERR_MSG_MOD(extack,
1078 			   "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
1079 	return -EOPNOTSUPP;
1080 }
1081 
1082 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
1083 	.kind		= DRV_NAME,
1084 	.validate	= nsim_validate,
1085 };
1086 
1087 static int __init nsim_module_init(void)
1088 {
1089 	int err;
1090 
1091 	err = nsim_dev_init();
1092 	if (err)
1093 		return err;
1094 
1095 	err = nsim_bus_init();
1096 	if (err)
1097 		goto err_dev_exit;
1098 
1099 	err = rtnl_link_register(&nsim_link_ops);
1100 	if (err)
1101 		goto err_bus_exit;
1102 
1103 	return 0;
1104 
1105 err_bus_exit:
1106 	nsim_bus_exit();
1107 err_dev_exit:
1108 	nsim_dev_exit();
1109 	return err;
1110 }
1111 
1112 static void __exit nsim_module_exit(void)
1113 {
1114 	rtnl_link_unregister(&nsim_link_ops);
1115 	nsim_bus_exit();
1116 	nsim_dev_exit();
1117 }
1118 
1119 module_init(nsim_module_init);
1120 module_exit(nsim_module_exit);
1121 MODULE_LICENSE("GPL");
1122 MODULE_DESCRIPTION("Simulated networking device for testing");
1123 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1124