xref: /dpdk/drivers/net/ice/ice_dcf_ethdev.c (revision 29fd052d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 
5 #include <errno.h>
6 #include <stdbool.h>
7 #include <sys/queue.h>
8 #include <sys/types.h>
9 #include <unistd.h>
10 
11 #include <rte_interrupts.h>
12 #include <rte_debug.h>
13 #include <rte_pci.h>
14 #include <rte_atomic.h>
15 #include <rte_eal.h>
16 #include <rte_ether.h>
17 #include <ethdev_pci.h>
18 #include <rte_kvargs.h>
19 #include <rte_malloc.h>
20 #include <rte_memzone.h>
21 #include <rte_dev.h>
22 
23 #include <iavf_devids.h>
24 
25 #include "ice_generic_flow.h"
26 #include "ice_dcf_ethdev.h"
27 #include "ice_rxtx.h"
28 
29 static int
30 ice_dcf_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
31 				struct rte_eth_udp_tunnel *udp_tunnel);
32 static int
33 ice_dcf_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
34 				struct rte_eth_udp_tunnel *udp_tunnel);
35 
36 static int
37 ice_dcf_dev_init(struct rte_eth_dev *eth_dev);
38 
39 static int
40 ice_dcf_dev_uninit(struct rte_eth_dev *eth_dev);
41 
42 static uint16_t
43 ice_dcf_recv_pkts(__rte_unused void *rx_queue,
44 		  __rte_unused struct rte_mbuf **bufs,
45 		  __rte_unused uint16_t nb_pkts)
46 {
47 	return 0;
48 }
49 
50 static uint16_t
51 ice_dcf_xmit_pkts(__rte_unused void *tx_queue,
52 		  __rte_unused struct rte_mbuf **bufs,
53 		  __rte_unused uint16_t nb_pkts)
54 {
55 	return 0;
56 }
57 
58 static int
59 ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
60 {
61 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
62 	struct rte_eth_dev_data *dev_data = dev->data;
63 	struct iavf_hw *hw = &dcf_ad->real_hw.avf;
64 	uint16_t buf_size, max_pkt_len;
65 
66 	buf_size = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM;
67 	rxq->rx_hdr_len = 0;
68 	rxq->rx_buf_len = RTE_ALIGN(buf_size, (1 << ICE_RLAN_CTX_DBUF_S));
69 	max_pkt_len = RTE_MIN(ICE_SUPPORT_CHAIN_NUM * rxq->rx_buf_len,
70 			      dev->data->mtu + ICE_ETH_OVERHEAD);
71 
72 	/* Check maximum packet length is set correctly.  */
73 	if (max_pkt_len <= RTE_ETHER_MIN_LEN ||
74 	    max_pkt_len > ICE_FRAME_SIZE_MAX) {
75 		PMD_DRV_LOG(ERR, "maximum packet length must be "
76 			    "larger than %u and smaller than %u",
77 			    (uint32_t)RTE_ETHER_MIN_LEN,
78 			    (uint32_t)ICE_FRAME_SIZE_MAX);
79 		return -EINVAL;
80 	}
81 
82 	rxq->max_pkt_len = max_pkt_len;
83 	if ((dev_data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER) ||
84 	    (rxq->max_pkt_len + 2 * RTE_VLAN_HLEN) > buf_size) {
85 		dev_data->scattered_rx = 1;
86 	}
87 	rxq->qrx_tail = hw->hw_addr + IAVF_QRX_TAIL1(rxq->queue_id);
88 	IAVF_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
89 	IAVF_WRITE_FLUSH(hw);
90 
91 	return 0;
92 }
93 
94 static int
95 ice_dcf_init_rx_queues(struct rte_eth_dev *dev)
96 {
97 	struct ice_rx_queue **rxq =
98 		(struct ice_rx_queue **)dev->data->rx_queues;
99 	int i, ret;
100 
101 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
102 		if (!rxq[i] || !rxq[i]->q_set)
103 			continue;
104 		ret = ice_dcf_init_rxq(dev, rxq[i]);
105 		if (ret)
106 			return ret;
107 	}
108 
109 	ice_set_rx_function(dev);
110 	ice_set_tx_function(dev);
111 
112 	return 0;
113 }
114 
115 #define IAVF_MISC_VEC_ID                RTE_INTR_VEC_ZERO_OFFSET
116 #define IAVF_RX_VEC_START               RTE_INTR_VEC_RXTX_OFFSET
117 
118 #define IAVF_ITR_INDEX_DEFAULT          0
119 #define IAVF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
120 #define IAVF_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
121 
122 static inline uint16_t
123 iavf_calc_itr_interval(int16_t interval)
124 {
125 	if (interval < 0 || interval > IAVF_QUEUE_ITR_INTERVAL_MAX)
126 		interval = IAVF_QUEUE_ITR_INTERVAL_DEFAULT;
127 
128 	/* Convert to hardware count, as writing each 1 represents 2 us */
129 	return interval / 2;
130 }
131 
132 static int
133 ice_dcf_config_rx_queues_irqs(struct rte_eth_dev *dev,
134 				     struct rte_intr_handle *intr_handle)
135 {
136 	struct ice_dcf_adapter *adapter = dev->data->dev_private;
137 	struct ice_dcf_hw *hw = &adapter->real_hw;
138 	uint16_t interval, i;
139 	int vec;
140 
141 	if (rte_intr_cap_multiple(intr_handle) &&
142 	    dev->data->dev_conf.intr_conf.rxq) {
143 		if (rte_intr_efd_enable(intr_handle, dev->data->nb_rx_queues))
144 			return -1;
145 	}
146 
147 	if (rte_intr_dp_is_en(intr_handle)) {
148 		if (rte_intr_vec_list_alloc(intr_handle, "intr_vec",
149 						   dev->data->nb_rx_queues)) {
150 			PMD_DRV_LOG(ERR, "Failed to allocate %d rx intr_vec",
151 				    dev->data->nb_rx_queues);
152 			return -1;
153 		}
154 	}
155 
156 	if (!dev->data->dev_conf.intr_conf.rxq ||
157 	    !rte_intr_dp_is_en(intr_handle)) {
158 		/* Rx interrupt disabled, Map interrupt only for writeback */
159 		hw->nb_msix = 1;
160 		if (hw->vf_res->vf_cap_flags &
161 		    VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) {
162 			/* If WB_ON_ITR supports, enable it */
163 			hw->msix_base = IAVF_RX_VEC_START;
164 			/* Set the ITR for index zero, to 2us to make sure that
165 			 * we leave time for aggregation to occur, but don't
166 			 * increase latency dramatically.
167 			 */
168 			IAVF_WRITE_REG(&hw->avf,
169 				       IAVF_VFINT_DYN_CTLN1(hw->msix_base - 1),
170 				       (0 << IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
171 				       IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK |
172 				       (2UL << IAVF_VFINT_DYN_CTLN1_INTERVAL_SHIFT));
173 		} else {
174 			/* If no WB_ON_ITR offload flags, need to set
175 			 * interrupt for descriptor write back.
176 			 */
177 			hw->msix_base = IAVF_MISC_VEC_ID;
178 
179 			/* set ITR to max */
180 			interval =
181 			iavf_calc_itr_interval(IAVF_QUEUE_ITR_INTERVAL_MAX);
182 			IAVF_WRITE_REG(&hw->avf, IAVF_VFINT_DYN_CTL01,
183 				       IAVF_VFINT_DYN_CTL01_INTENA_MASK |
184 				       (IAVF_ITR_INDEX_DEFAULT <<
185 					IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT) |
186 				       (interval <<
187 					IAVF_VFINT_DYN_CTL01_INTERVAL_SHIFT));
188 		}
189 		IAVF_WRITE_FLUSH(&hw->avf);
190 		/* map all queues to the same interrupt */
191 		for (i = 0; i < dev->data->nb_rx_queues; i++)
192 			hw->rxq_map[hw->msix_base] |= 1 << i;
193 	} else {
194 		if (!rte_intr_allow_others(intr_handle)) {
195 			hw->nb_msix = 1;
196 			hw->msix_base = IAVF_MISC_VEC_ID;
197 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
198 				hw->rxq_map[hw->msix_base] |= 1 << i;
199 				rte_intr_vec_list_index_set(intr_handle,
200 							i, IAVF_MISC_VEC_ID);
201 			}
202 			PMD_DRV_LOG(DEBUG,
203 				    "vector %u are mapping to all Rx queues",
204 				    hw->msix_base);
205 		} else {
206 			/* If Rx interrupt is required, and we can use
207 			 * multi interrupts, then the vec is from 1
208 			 */
209 			hw->nb_msix = RTE_MIN(hw->vf_res->max_vectors,
210 				      rte_intr_nb_efd_get(intr_handle));
211 			hw->msix_base = IAVF_MISC_VEC_ID;
212 			vec = IAVF_MISC_VEC_ID;
213 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
214 				hw->rxq_map[vec] |= 1 << i;
215 				rte_intr_vec_list_index_set(intr_handle,
216 								   i, vec++);
217 				if (vec >= hw->nb_msix)
218 					vec = IAVF_RX_VEC_START;
219 			}
220 			PMD_DRV_LOG(DEBUG,
221 				    "%u vectors are mapping to %u Rx queues",
222 				    hw->nb_msix, dev->data->nb_rx_queues);
223 		}
224 	}
225 
226 	if (ice_dcf_config_irq_map(hw)) {
227 		PMD_DRV_LOG(ERR, "config interrupt mapping failed");
228 		return -1;
229 	}
230 	return 0;
231 }
232 
233 static int
234 alloc_rxq_mbufs(struct ice_rx_queue *rxq)
235 {
236 	volatile union ice_rx_flex_desc *rxd;
237 	struct rte_mbuf *mbuf = NULL;
238 	uint64_t dma_addr;
239 	uint16_t i;
240 
241 	for (i = 0; i < rxq->nb_rx_desc; i++) {
242 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
243 		if (unlikely(!mbuf)) {
244 			PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
245 			return -ENOMEM;
246 		}
247 
248 		rte_mbuf_refcnt_set(mbuf, 1);
249 		mbuf->next = NULL;
250 		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
251 		mbuf->nb_segs = 1;
252 		mbuf->port = rxq->port_id;
253 
254 		dma_addr =
255 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
256 
257 		rxd = &rxq->rx_ring[i];
258 		rxd->read.pkt_addr = dma_addr;
259 		rxd->read.hdr_addr = 0;
260 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
261 		rxd->read.rsvd1 = 0;
262 		rxd->read.rsvd2 = 0;
263 #endif
264 
265 		rxq->sw_ring[i].mbuf = (void *)mbuf;
266 	}
267 
268 	return 0;
269 }
270 
271 static int
272 ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
273 {
274 	struct ice_dcf_adapter *ad = dev->data->dev_private;
275 	struct iavf_hw *hw = &ad->real_hw.avf;
276 	struct ice_rx_queue *rxq;
277 	int err = 0;
278 
279 	if (rx_queue_id >= dev->data->nb_rx_queues)
280 		return -EINVAL;
281 
282 	rxq = dev->data->rx_queues[rx_queue_id];
283 
284 	err = alloc_rxq_mbufs(rxq);
285 	if (err) {
286 		PMD_DRV_LOG(ERR, "Failed to allocate RX queue mbuf");
287 		return err;
288 	}
289 
290 	rte_wmb();
291 
292 	/* Init the RX tail register. */
293 	IAVF_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
294 	IAVF_WRITE_FLUSH(hw);
295 
296 	/* Ready to switch the queue on */
297 	err = ice_dcf_switch_queue(&ad->real_hw, rx_queue_id, true, true);
298 	if (err) {
299 		PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
300 			    rx_queue_id);
301 		return err;
302 	}
303 
304 	dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
305 
306 	return 0;
307 }
308 
309 static inline void
310 reset_rx_queue(struct ice_rx_queue *rxq)
311 {
312 	uint16_t len;
313 	uint32_t i;
314 
315 	if (!rxq)
316 		return;
317 
318 	len = rxq->nb_rx_desc + ICE_RX_MAX_BURST;
319 
320 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
321 		((volatile char *)rxq->rx_ring)[i] = 0;
322 
323 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
324 
325 	for (i = 0; i < ICE_RX_MAX_BURST; i++)
326 		rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
327 
328 	/* for rx bulk */
329 	rxq->rx_nb_avail = 0;
330 	rxq->rx_next_avail = 0;
331 	rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
332 
333 	rxq->rx_tail = 0;
334 	rxq->nb_rx_hold = 0;
335 	rxq->pkt_first_seg = NULL;
336 	rxq->pkt_last_seg = NULL;
337 }
338 
339 static inline void
340 reset_tx_queue(struct ice_tx_queue *txq)
341 {
342 	struct ice_tx_entry *txe;
343 	uint32_t i, size;
344 	uint16_t prev;
345 
346 	if (!txq) {
347 		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
348 		return;
349 	}
350 
351 	txe = txq->sw_ring;
352 	size = sizeof(struct ice_tx_desc) * txq->nb_tx_desc;
353 	for (i = 0; i < size; i++)
354 		((volatile char *)txq->tx_ring)[i] = 0;
355 
356 	prev = (uint16_t)(txq->nb_tx_desc - 1);
357 	for (i = 0; i < txq->nb_tx_desc; i++) {
358 		txq->tx_ring[i].cmd_type_offset_bsz =
359 			rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
360 		txe[i].mbuf =  NULL;
361 		txe[i].last_id = i;
362 		txe[prev].next_id = i;
363 		prev = i;
364 	}
365 
366 	txq->tx_tail = 0;
367 	txq->nb_tx_used = 0;
368 
369 	txq->last_desc_cleaned = txq->nb_tx_desc - 1;
370 	txq->nb_tx_free = txq->nb_tx_desc - 1;
371 
372 	txq->tx_next_dd = txq->tx_rs_thresh - 1;
373 	txq->tx_next_rs = txq->tx_rs_thresh - 1;
374 }
375 
376 static int
377 ice_dcf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
378 {
379 	struct ice_dcf_adapter *ad = dev->data->dev_private;
380 	struct ice_dcf_hw *hw = &ad->real_hw;
381 	struct ice_rx_queue *rxq;
382 	int err;
383 
384 	if (rx_queue_id >= dev->data->nb_rx_queues)
385 		return -EINVAL;
386 
387 	err = ice_dcf_switch_queue(hw, rx_queue_id, true, false);
388 	if (err) {
389 		PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off",
390 			    rx_queue_id);
391 		return err;
392 	}
393 
394 	rxq = dev->data->rx_queues[rx_queue_id];
395 	rxq->rx_rel_mbufs(rxq);
396 	reset_rx_queue(rxq);
397 	dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
398 
399 	return 0;
400 }
401 
402 static int
403 ice_dcf_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
404 {
405 	struct ice_dcf_adapter *ad = dev->data->dev_private;
406 	struct iavf_hw *hw = &ad->real_hw.avf;
407 	struct ice_tx_queue *txq;
408 	int err = 0;
409 
410 	if (tx_queue_id >= dev->data->nb_tx_queues)
411 		return -EINVAL;
412 
413 	txq = dev->data->tx_queues[tx_queue_id];
414 
415 	/* Init the RX tail register. */
416 	txq->qtx_tail = hw->hw_addr + IAVF_QTX_TAIL1(tx_queue_id);
417 	IAVF_PCI_REG_WRITE(txq->qtx_tail, 0);
418 	IAVF_WRITE_FLUSH(hw);
419 
420 	/* Ready to switch the queue on */
421 	err = ice_dcf_switch_queue(&ad->real_hw, tx_queue_id, false, true);
422 
423 	if (err) {
424 		PMD_DRV_LOG(ERR, "Failed to switch TX queue %u on",
425 			    tx_queue_id);
426 		return err;
427 	}
428 
429 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
430 
431 	return 0;
432 }
433 
434 static int
435 ice_dcf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
436 {
437 	struct ice_dcf_adapter *ad = dev->data->dev_private;
438 	struct ice_dcf_hw *hw = &ad->real_hw;
439 	struct ice_tx_queue *txq;
440 	int err;
441 
442 	if (tx_queue_id >= dev->data->nb_tx_queues)
443 		return -EINVAL;
444 
445 	err = ice_dcf_switch_queue(hw, tx_queue_id, false, false);
446 	if (err) {
447 		PMD_DRV_LOG(ERR, "Failed to switch TX queue %u off",
448 			    tx_queue_id);
449 		return err;
450 	}
451 
452 	txq = dev->data->tx_queues[tx_queue_id];
453 	txq->tx_rel_mbufs(txq);
454 	reset_tx_queue(txq);
455 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
456 
457 	return 0;
458 }
459 
460 static int
461 ice_dcf_start_queues(struct rte_eth_dev *dev)
462 {
463 	struct ice_rx_queue *rxq;
464 	struct ice_tx_queue *txq;
465 	int nb_rxq = 0;
466 	int nb_txq, i;
467 
468 	for (nb_txq = 0; nb_txq < dev->data->nb_tx_queues; nb_txq++) {
469 		txq = dev->data->tx_queues[nb_txq];
470 		if (txq->tx_deferred_start)
471 			continue;
472 		if (ice_dcf_tx_queue_start(dev, nb_txq) != 0) {
473 			PMD_DRV_LOG(ERR, "Fail to start queue %u", nb_txq);
474 			goto tx_err;
475 		}
476 	}
477 
478 	for (nb_rxq = 0; nb_rxq < dev->data->nb_rx_queues; nb_rxq++) {
479 		rxq = dev->data->rx_queues[nb_rxq];
480 		if (rxq->rx_deferred_start)
481 			continue;
482 		if (ice_dcf_rx_queue_start(dev, nb_rxq) != 0) {
483 			PMD_DRV_LOG(ERR, "Fail to start queue %u", nb_rxq);
484 			goto rx_err;
485 		}
486 	}
487 
488 	return 0;
489 
490 	/* stop the started queues if failed to start all queues */
491 rx_err:
492 	for (i = 0; i < nb_rxq; i++)
493 		ice_dcf_rx_queue_stop(dev, i);
494 tx_err:
495 	for (i = 0; i < nb_txq; i++)
496 		ice_dcf_tx_queue_stop(dev, i);
497 
498 	return -1;
499 }
500 
501 static int
502 ice_dcf_dev_start(struct rte_eth_dev *dev)
503 {
504 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
505 	struct rte_intr_handle *intr_handle = dev->intr_handle;
506 	struct ice_adapter *ad = &dcf_ad->parent;
507 	struct ice_dcf_hw *hw = &dcf_ad->real_hw;
508 	int ret;
509 
510 	if (hw->resetting) {
511 		PMD_DRV_LOG(ERR,
512 			    "The DCF has been reset by PF, please reinit first");
513 		return -EIO;
514 	}
515 
516 	if (hw->tm_conf.root && !hw->tm_conf.committed) {
517 		PMD_DRV_LOG(ERR,
518 			"please call hierarchy_commit() before starting the port");
519 		return -EIO;
520 	}
521 
522 	ad->pf.adapter_stopped = 0;
523 
524 	hw->num_queue_pairs = RTE_MAX(dev->data->nb_rx_queues,
525 				      dev->data->nb_tx_queues);
526 
527 	ret = ice_dcf_init_rx_queues(dev);
528 	if (ret) {
529 		PMD_DRV_LOG(ERR, "Fail to init queues");
530 		return ret;
531 	}
532 
533 	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
534 		ret = ice_dcf_init_rss(hw);
535 		if (ret) {
536 			PMD_DRV_LOG(ERR, "Failed to configure RSS");
537 			return ret;
538 		}
539 	}
540 
541 	ret = ice_dcf_configure_queues(hw);
542 	if (ret) {
543 		PMD_DRV_LOG(ERR, "Fail to config queues");
544 		return ret;
545 	}
546 
547 	ret = ice_dcf_config_rx_queues_irqs(dev, intr_handle);
548 	if (ret) {
549 		PMD_DRV_LOG(ERR, "Fail to config rx queues' irqs");
550 		return ret;
551 	}
552 
553 	if (dev->data->dev_conf.intr_conf.rxq != 0) {
554 		rte_intr_disable(intr_handle);
555 		rte_intr_enable(intr_handle);
556 	}
557 
558 	ret = ice_dcf_start_queues(dev);
559 	if (ret) {
560 		PMD_DRV_LOG(ERR, "Failed to enable queues");
561 		return ret;
562 	}
563 
564 	ret = ice_dcf_add_del_all_mac_addr(hw, true);
565 	if (ret) {
566 		PMD_DRV_LOG(ERR, "Failed to add mac addr");
567 		return ret;
568 	}
569 
570 	dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
571 
572 	return 0;
573 }
574 
575 static void
576 ice_dcf_stop_queues(struct rte_eth_dev *dev)
577 {
578 	struct ice_dcf_adapter *ad = dev->data->dev_private;
579 	struct ice_dcf_hw *hw = &ad->real_hw;
580 	struct ice_rx_queue *rxq;
581 	struct ice_tx_queue *txq;
582 	int ret, i;
583 
584 	/* Stop All queues */
585 	ret = ice_dcf_disable_queues(hw);
586 	if (ret)
587 		PMD_DRV_LOG(WARNING, "Fail to stop queues");
588 
589 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
590 		txq = dev->data->tx_queues[i];
591 		if (!txq)
592 			continue;
593 		txq->tx_rel_mbufs(txq);
594 		reset_tx_queue(txq);
595 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
596 	}
597 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
598 		rxq = dev->data->rx_queues[i];
599 		if (!rxq)
600 			continue;
601 		rxq->rx_rel_mbufs(rxq);
602 		reset_rx_queue(rxq);
603 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
604 	}
605 }
606 
607 static int
608 ice_dcf_dev_stop(struct rte_eth_dev *dev)
609 {
610 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
611 	struct rte_intr_handle *intr_handle = dev->intr_handle;
612 	struct ice_adapter *ad = &dcf_ad->parent;
613 	struct ice_dcf_hw *hw = &dcf_ad->real_hw;
614 
615 	if (ad->pf.adapter_stopped == 1) {
616 		PMD_DRV_LOG(DEBUG, "Port is already stopped");
617 		return 0;
618 	}
619 
620 	/* Stop the VF representors for this device */
621 	ice_dcf_vf_repr_stop_all(dcf_ad);
622 
623 	ice_dcf_stop_queues(dev);
624 
625 	rte_intr_efd_disable(intr_handle);
626 	rte_intr_vec_list_free(intr_handle);
627 
628 	ice_dcf_add_del_all_mac_addr(&dcf_ad->real_hw, false);
629 	dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
630 	ad->pf.adapter_stopped = 1;
631 	hw->tm_conf.committed = false;
632 
633 	return 0;
634 }
635 
636 static int
637 ice_dcf_dev_configure(struct rte_eth_dev *dev)
638 {
639 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
640 	struct ice_adapter *ad = &dcf_ad->parent;
641 
642 	ad->rx_bulk_alloc_allowed = true;
643 	ad->tx_simple_allowed = true;
644 
645 	if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
646 		dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
647 
648 	return 0;
649 }
650 
651 static int
652 ice_dcf_dev_info_get(struct rte_eth_dev *dev,
653 		     struct rte_eth_dev_info *dev_info)
654 {
655 	struct ice_dcf_adapter *adapter = dev->data->dev_private;
656 	struct ice_dcf_hw *hw = &adapter->real_hw;
657 
658 	dev_info->max_mac_addrs = 1;
659 	dev_info->max_rx_queues = hw->vsi_res->num_queue_pairs;
660 	dev_info->max_tx_queues = hw->vsi_res->num_queue_pairs;
661 	dev_info->min_rx_bufsize = ICE_BUF_SIZE_MIN;
662 	dev_info->max_rx_pktlen = ICE_FRAME_SIZE_MAX;
663 	dev_info->hash_key_size = hw->vf_res->rss_key_size;
664 	dev_info->reta_size = hw->vf_res->rss_lut_size;
665 	dev_info->flow_type_rss_offloads = ICE_RSS_OFFLOAD_ALL;
666 	dev_info->dev_capa &= ~RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
667 
668 	dev_info->rx_offload_capa =
669 		RTE_ETH_RX_OFFLOAD_VLAN_STRIP |
670 		RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
671 		RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
672 		RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
673 		RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |
674 		RTE_ETH_RX_OFFLOAD_SCATTER |
675 		RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
676 		RTE_ETH_RX_OFFLOAD_RSS_HASH;
677 	dev_info->tx_offload_capa =
678 		RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
679 		RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
680 		RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
681 		RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
682 		RTE_ETH_TX_OFFLOAD_SCTP_CKSUM |
683 		RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |
684 		RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM |
685 		RTE_ETH_TX_OFFLOAD_TCP_TSO |
686 		RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
687 		RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO |
688 		RTE_ETH_TX_OFFLOAD_IPIP_TNL_TSO |
689 		RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO |
690 		RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
691 
692 	dev_info->default_rxconf = (struct rte_eth_rxconf) {
693 		.rx_thresh = {
694 			.pthresh = ICE_DEFAULT_RX_PTHRESH,
695 			.hthresh = ICE_DEFAULT_RX_HTHRESH,
696 			.wthresh = ICE_DEFAULT_RX_WTHRESH,
697 		},
698 		.rx_free_thresh = ICE_DEFAULT_RX_FREE_THRESH,
699 		.rx_drop_en = 0,
700 		.offloads = 0,
701 	};
702 
703 	dev_info->default_txconf = (struct rte_eth_txconf) {
704 		.tx_thresh = {
705 			.pthresh = ICE_DEFAULT_TX_PTHRESH,
706 			.hthresh = ICE_DEFAULT_TX_HTHRESH,
707 			.wthresh = ICE_DEFAULT_TX_WTHRESH,
708 		},
709 		.tx_free_thresh = ICE_DEFAULT_TX_FREE_THRESH,
710 		.tx_rs_thresh = ICE_DEFAULT_TX_RSBIT_THRESH,
711 		.offloads = 0,
712 	};
713 
714 	dev_info->rx_desc_lim = (struct rte_eth_desc_lim) {
715 		.nb_max = ICE_MAX_RING_DESC,
716 		.nb_min = ICE_MIN_RING_DESC,
717 		.nb_align = ICE_ALIGN_RING_DESC,
718 	};
719 
720 	dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
721 		.nb_max = ICE_MAX_RING_DESC,
722 		.nb_min = ICE_MIN_RING_DESC,
723 		.nb_align = ICE_ALIGN_RING_DESC,
724 	};
725 
726 	return 0;
727 }
728 
729 static int
730 ice_dcf_dev_promiscuous_enable(__rte_unused struct rte_eth_dev *dev)
731 {
732 	return 0;
733 }
734 
735 static int
736 ice_dcf_dev_promiscuous_disable(__rte_unused struct rte_eth_dev *dev)
737 {
738 	return 0;
739 }
740 
741 static int
742 ice_dcf_dev_allmulticast_enable(__rte_unused struct rte_eth_dev *dev)
743 {
744 	return 0;
745 }
746 
747 static int
748 ice_dcf_dev_allmulticast_disable(__rte_unused struct rte_eth_dev *dev)
749 {
750 	return 0;
751 }
752 
753 static int
754 ice_dcf_dev_flow_ops_get(struct rte_eth_dev *dev,
755 			 const struct rte_flow_ops **ops)
756 {
757 	if (!dev)
758 		return -EINVAL;
759 
760 	*ops = &ice_flow_ops;
761 	return 0;
762 }
763 
764 #define ICE_DCF_32_BIT_WIDTH (CHAR_BIT * 4)
765 #define ICE_DCF_48_BIT_WIDTH (CHAR_BIT * 6)
766 #define ICE_DCF_48_BIT_MASK  RTE_LEN2MASK(ICE_DCF_48_BIT_WIDTH, uint64_t)
767 
768 static void
769 ice_dcf_stat_update_48(uint64_t *offset, uint64_t *stat)
770 {
771 	if (*stat >= *offset)
772 		*stat = *stat - *offset;
773 	else
774 		*stat = (uint64_t)((*stat +
775 			((uint64_t)1 << ICE_DCF_48_BIT_WIDTH)) - *offset);
776 
777 	*stat &= ICE_DCF_48_BIT_MASK;
778 }
779 
780 static void
781 ice_dcf_stat_update_32(uint64_t *offset, uint64_t *stat)
782 {
783 	if (*stat >= *offset)
784 		*stat = (uint64_t)(*stat - *offset);
785 	else
786 		*stat = (uint64_t)((*stat +
787 			((uint64_t)1 << ICE_DCF_32_BIT_WIDTH)) - *offset);
788 }
789 
790 static void
791 ice_dcf_update_stats(struct virtchnl_eth_stats *oes,
792 		     struct virtchnl_eth_stats *nes)
793 {
794 	ice_dcf_stat_update_48(&oes->rx_bytes, &nes->rx_bytes);
795 	ice_dcf_stat_update_48(&oes->rx_unicast, &nes->rx_unicast);
796 	ice_dcf_stat_update_48(&oes->rx_multicast, &nes->rx_multicast);
797 	ice_dcf_stat_update_48(&oes->rx_broadcast, &nes->rx_broadcast);
798 	ice_dcf_stat_update_32(&oes->rx_discards, &nes->rx_discards);
799 	ice_dcf_stat_update_48(&oes->tx_bytes, &nes->tx_bytes);
800 	ice_dcf_stat_update_48(&oes->tx_unicast, &nes->tx_unicast);
801 	ice_dcf_stat_update_48(&oes->tx_multicast, &nes->tx_multicast);
802 	ice_dcf_stat_update_48(&oes->tx_broadcast, &nes->tx_broadcast);
803 	ice_dcf_stat_update_32(&oes->tx_errors, &nes->tx_errors);
804 	ice_dcf_stat_update_32(&oes->tx_discards, &nes->tx_discards);
805 }
806 
807 
808 static int
809 ice_dcf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
810 {
811 	struct ice_dcf_adapter *ad = dev->data->dev_private;
812 	struct ice_dcf_hw *hw = &ad->real_hw;
813 	struct virtchnl_eth_stats pstats;
814 	int ret;
815 
816 	if (hw->resetting) {
817 		PMD_DRV_LOG(ERR,
818 			    "The DCF has been reset by PF, please reinit first");
819 		return -EIO;
820 	}
821 
822 	ret = ice_dcf_query_stats(hw, &pstats);
823 	if (ret == 0) {
824 		ice_dcf_update_stats(&hw->eth_stats_offset, &pstats);
825 		stats->ipackets = pstats.rx_unicast + pstats.rx_multicast +
826 				pstats.rx_broadcast - pstats.rx_discards;
827 		stats->opackets = pstats.tx_broadcast + pstats.tx_multicast +
828 						pstats.tx_unicast;
829 		stats->imissed = pstats.rx_discards;
830 		stats->oerrors = pstats.tx_errors + pstats.tx_discards;
831 		stats->ibytes = pstats.rx_bytes;
832 		stats->ibytes -= stats->ipackets * RTE_ETHER_CRC_LEN;
833 		stats->obytes = pstats.tx_bytes;
834 	} else {
835 		PMD_DRV_LOG(ERR, "Get statistics failed");
836 	}
837 	return ret;
838 }
839 
840 static int
841 ice_dcf_stats_reset(struct rte_eth_dev *dev)
842 {
843 	struct ice_dcf_adapter *ad = dev->data->dev_private;
844 	struct ice_dcf_hw *hw = &ad->real_hw;
845 	struct virtchnl_eth_stats pstats;
846 	int ret;
847 
848 	if (hw->resetting)
849 		return 0;
850 
851 	/* read stat values to clear hardware registers */
852 	ret = ice_dcf_query_stats(hw, &pstats);
853 	if (ret != 0)
854 		return ret;
855 
856 	/* set stats offset base on current values */
857 	hw->eth_stats_offset = pstats;
858 
859 	return 0;
860 }
861 
862 static void
863 ice_dcf_free_repr_info(struct ice_dcf_adapter *dcf_adapter)
864 {
865 	if (dcf_adapter->repr_infos) {
866 		rte_free(dcf_adapter->repr_infos);
867 		dcf_adapter->repr_infos = NULL;
868 	}
869 }
870 
871 static int
872 ice_dcf_init_repr_info(struct ice_dcf_adapter *dcf_adapter)
873 {
874 	dcf_adapter->repr_infos =
875 			rte_calloc("ice_dcf_rep_info",
876 				   dcf_adapter->real_hw.num_vfs,
877 				   sizeof(dcf_adapter->repr_infos[0]), 0);
878 	if (!dcf_adapter->repr_infos) {
879 		PMD_DRV_LOG(ERR, "Failed to alloc memory for VF representors\n");
880 		return -ENOMEM;
881 	}
882 
883 	return 0;
884 }
885 
886 static int
887 ice_dcf_dev_close(struct rte_eth_dev *dev)
888 {
889 	struct ice_dcf_adapter *adapter = dev->data->dev_private;
890 
891 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
892 		return 0;
893 
894 	(void)ice_dcf_dev_stop(dev);
895 
896 	ice_free_queues(dev);
897 
898 	ice_dcf_free_repr_info(adapter);
899 	ice_dcf_uninit_parent_adapter(dev);
900 	ice_dcf_uninit_hw(dev, &adapter->real_hw);
901 
902 	return 0;
903 }
904 
905 int
906 ice_dcf_link_update(struct rte_eth_dev *dev,
907 		    __rte_unused int wait_to_complete)
908 {
909 	struct ice_dcf_adapter *ad = dev->data->dev_private;
910 	struct ice_dcf_hw *hw = &ad->real_hw;
911 	struct rte_eth_link new_link;
912 
913 	memset(&new_link, 0, sizeof(new_link));
914 
915 	/* Only read status info stored in VF, and the info is updated
916 	 * when receive LINK_CHANGE event from PF by virtchnl.
917 	 */
918 	switch (hw->link_speed) {
919 	case 10:
920 		new_link.link_speed = RTE_ETH_SPEED_NUM_10M;
921 		break;
922 	case 100:
923 		new_link.link_speed = RTE_ETH_SPEED_NUM_100M;
924 		break;
925 	case 1000:
926 		new_link.link_speed = RTE_ETH_SPEED_NUM_1G;
927 		break;
928 	case 10000:
929 		new_link.link_speed = RTE_ETH_SPEED_NUM_10G;
930 		break;
931 	case 20000:
932 		new_link.link_speed = RTE_ETH_SPEED_NUM_20G;
933 		break;
934 	case 25000:
935 		new_link.link_speed = RTE_ETH_SPEED_NUM_25G;
936 		break;
937 	case 40000:
938 		new_link.link_speed = RTE_ETH_SPEED_NUM_40G;
939 		break;
940 	case 50000:
941 		new_link.link_speed = RTE_ETH_SPEED_NUM_50G;
942 		break;
943 	case 100000:
944 		new_link.link_speed = RTE_ETH_SPEED_NUM_100G;
945 		break;
946 	default:
947 		new_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
948 		break;
949 	}
950 
951 	new_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
952 	new_link.link_status = hw->link_up ? RTE_ETH_LINK_UP :
953 					     RTE_ETH_LINK_DOWN;
954 	new_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
955 				RTE_ETH_LINK_SPEED_FIXED);
956 
957 	return rte_eth_linkstatus_set(dev, &new_link);
958 }
959 
960 bool
961 ice_dcf_adminq_need_retry(struct ice_adapter *ad)
962 {
963 	return ad->hw.dcf_enabled &&
964 	       !__atomic_load_n(&ad->dcf_state_on, __ATOMIC_RELAXED);
965 }
966 
967 /* Add UDP tunneling port */
968 static int
969 ice_dcf_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
970 				struct rte_eth_udp_tunnel *udp_tunnel)
971 {
972 	struct ice_dcf_adapter *adapter = dev->data->dev_private;
973 	struct ice_adapter *parent_adapter = &adapter->parent;
974 	struct ice_hw *parent_hw = &parent_adapter->hw;
975 	int ret = 0;
976 
977 	if (!udp_tunnel)
978 		return -EINVAL;
979 
980 	switch (udp_tunnel->prot_type) {
981 	case RTE_ETH_TUNNEL_TYPE_VXLAN:
982 		ret = ice_create_tunnel(parent_hw, TNL_VXLAN,
983 					udp_tunnel->udp_port);
984 		break;
985 	case RTE_ETH_TUNNEL_TYPE_ECPRI:
986 		ret = ice_create_tunnel(parent_hw, TNL_ECPRI,
987 					udp_tunnel->udp_port);
988 		break;
989 	default:
990 		PMD_DRV_LOG(ERR, "Invalid tunnel type");
991 		ret = -EINVAL;
992 		break;
993 	}
994 
995 	return ret;
996 }
997 
998 /* Delete UDP tunneling port */
999 static int
1000 ice_dcf_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
1001 				struct rte_eth_udp_tunnel *udp_tunnel)
1002 {
1003 	struct ice_dcf_adapter *adapter = dev->data->dev_private;
1004 	struct ice_adapter *parent_adapter = &adapter->parent;
1005 	struct ice_hw *parent_hw = &parent_adapter->hw;
1006 	int ret = 0;
1007 
1008 	if (!udp_tunnel)
1009 		return -EINVAL;
1010 
1011 	switch (udp_tunnel->prot_type) {
1012 	case RTE_ETH_TUNNEL_TYPE_VXLAN:
1013 	case RTE_ETH_TUNNEL_TYPE_ECPRI:
1014 		ret = ice_destroy_tunnel(parent_hw, udp_tunnel->udp_port, 0);
1015 		break;
1016 	default:
1017 		PMD_DRV_LOG(ERR, "Invalid tunnel type");
1018 		ret = -EINVAL;
1019 		break;
1020 	}
1021 
1022 	return ret;
1023 }
1024 
1025 static int
1026 ice_dcf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
1027 		void *arg)
1028 {
1029 	if (!arg)
1030 		return -EINVAL;
1031 
1032 	*(const void **)arg = &ice_dcf_tm_ops;
1033 
1034 	return 0;
1035 }
1036 
1037 static inline void
1038 ice_dcf_reset_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
1039 {
1040 	ice_dcf_uninit_hw(eth_dev, hw);
1041 	ice_dcf_init_hw(eth_dev, hw);
1042 }
1043 
1044 /* Check if reset has been triggered by PF */
1045 static inline bool
1046 ice_dcf_is_reset(struct rte_eth_dev *dev)
1047 {
1048 	struct ice_dcf_adapter *ad = dev->data->dev_private;
1049 	struct iavf_hw *hw = &ad->real_hw.avf;
1050 
1051 	return !(IAVF_READ_REG(hw, IAVF_VF_ARQLEN1) &
1052 		 IAVF_VF_ARQLEN1_ARQENABLE_MASK);
1053 }
1054 
1055 static int
1056 ice_dcf_dev_reset(struct rte_eth_dev *dev)
1057 {
1058 	struct ice_dcf_adapter *ad = dev->data->dev_private;
1059 	struct ice_dcf_hw *hw = &ad->real_hw;
1060 	int ret;
1061 
1062 	if (ice_dcf_is_reset(dev)) {
1063 		if (!ad->real_hw.resetting)
1064 			ad->real_hw.resetting = true;
1065 		PMD_DRV_LOG(ERR, "The DCF has been reset by PF");
1066 
1067 		/*
1068 		 * Simply reset hw to trigger an additional DCF enable/disable
1069 		 * cycle which help to workaround the issue that kernel driver
1070 		 * may not clean up resource during previous reset.
1071 		 */
1072 		ice_dcf_reset_hw(dev, hw);
1073 	}
1074 
1075 	ret = ice_dcf_dev_uninit(dev);
1076 	if (ret)
1077 		return ret;
1078 
1079 	ret = ice_dcf_dev_init(dev);
1080 
1081 	return ret;
1082 }
1083 
1084 static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
1085 	.dev_start               = ice_dcf_dev_start,
1086 	.dev_stop                = ice_dcf_dev_stop,
1087 	.dev_close               = ice_dcf_dev_close,
1088 	.dev_reset               = ice_dcf_dev_reset,
1089 	.dev_configure           = ice_dcf_dev_configure,
1090 	.dev_infos_get           = ice_dcf_dev_info_get,
1091 	.rx_queue_setup          = ice_rx_queue_setup,
1092 	.tx_queue_setup          = ice_tx_queue_setup,
1093 	.rx_queue_release        = ice_dev_rx_queue_release,
1094 	.tx_queue_release        = ice_dev_tx_queue_release,
1095 	.rx_queue_start          = ice_dcf_rx_queue_start,
1096 	.tx_queue_start          = ice_dcf_tx_queue_start,
1097 	.rx_queue_stop           = ice_dcf_rx_queue_stop,
1098 	.tx_queue_stop           = ice_dcf_tx_queue_stop,
1099 	.link_update             = ice_dcf_link_update,
1100 	.stats_get               = ice_dcf_stats_get,
1101 	.stats_reset             = ice_dcf_stats_reset,
1102 	.promiscuous_enable      = ice_dcf_dev_promiscuous_enable,
1103 	.promiscuous_disable     = ice_dcf_dev_promiscuous_disable,
1104 	.allmulticast_enable     = ice_dcf_dev_allmulticast_enable,
1105 	.allmulticast_disable    = ice_dcf_dev_allmulticast_disable,
1106 	.flow_ops_get            = ice_dcf_dev_flow_ops_get,
1107 	.udp_tunnel_port_add	 = ice_dcf_dev_udp_tunnel_port_add,
1108 	.udp_tunnel_port_del	 = ice_dcf_dev_udp_tunnel_port_del,
1109 	.tm_ops_get              = ice_dcf_tm_ops_get,
1110 };
1111 
1112 static int
1113 ice_dcf_dev_init(struct rte_eth_dev *eth_dev)
1114 {
1115 	struct ice_dcf_adapter *adapter = eth_dev->data->dev_private;
1116 	struct ice_adapter *parent_adapter = &adapter->parent;
1117 
1118 	eth_dev->dev_ops = &ice_dcf_eth_dev_ops;
1119 	eth_dev->rx_pkt_burst = ice_dcf_recv_pkts;
1120 	eth_dev->tx_pkt_burst = ice_dcf_xmit_pkts;
1121 
1122 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1123 		return 0;
1124 
1125 	adapter->real_hw.vc_event_msg_cb = ice_dcf_handle_pf_event_msg;
1126 	if (ice_dcf_init_hw(eth_dev, &adapter->real_hw) != 0) {
1127 		PMD_INIT_LOG(ERR, "Failed to init DCF hardware");
1128 		__atomic_store_n(&parent_adapter->dcf_state_on, false,
1129 				 __ATOMIC_RELAXED);
1130 		return -1;
1131 	}
1132 
1133 	__atomic_store_n(&parent_adapter->dcf_state_on, true, __ATOMIC_RELAXED);
1134 
1135 	if (ice_dcf_init_parent_adapter(eth_dev) != 0) {
1136 		PMD_INIT_LOG(ERR, "Failed to init DCF parent adapter");
1137 		ice_dcf_uninit_hw(eth_dev, &adapter->real_hw);
1138 		return -1;
1139 	}
1140 
1141 	return 0;
1142 }
1143 
1144 static int
1145 ice_dcf_dev_uninit(struct rte_eth_dev *eth_dev)
1146 {
1147 	ice_dcf_dev_close(eth_dev);
1148 
1149 	return 0;
1150 }
1151 
1152 static int
1153 ice_dcf_cap_check_handler(__rte_unused const char *key,
1154 			  const char *value, __rte_unused void *opaque)
1155 {
1156 	if (strcmp(value, "dcf"))
1157 		return -1;
1158 
1159 	return 0;
1160 }
1161 
1162 static int
1163 ice_dcf_cap_selected(struct rte_devargs *devargs)
1164 {
1165 	struct rte_kvargs *kvlist;
1166 	const char *key = "cap";
1167 	int ret = 0;
1168 
1169 	if (devargs == NULL)
1170 		return 0;
1171 
1172 	kvlist = rte_kvargs_parse(devargs->args, NULL);
1173 	if (kvlist == NULL)
1174 		return 0;
1175 
1176 	if (!rte_kvargs_count(kvlist, key))
1177 		goto exit;
1178 
1179 	/* dcf capability selected when there's a key-value pair: cap=dcf */
1180 	if (rte_kvargs_process(kvlist, key,
1181 			       ice_dcf_cap_check_handler, NULL) < 0)
1182 		goto exit;
1183 
1184 	ret = 1;
1185 
1186 exit:
1187 	rte_kvargs_free(kvlist);
1188 	return ret;
1189 }
1190 
1191 static int
1192 eth_ice_dcf_pci_probe(__rte_unused struct rte_pci_driver *pci_drv,
1193 		      struct rte_pci_device *pci_dev)
1194 {
1195 	struct rte_eth_devargs eth_da = { .nb_representor_ports = 0 };
1196 	struct ice_dcf_vf_repr_param repr_param;
1197 	char repr_name[RTE_ETH_NAME_MAX_LEN];
1198 	struct ice_dcf_adapter *dcf_adapter;
1199 	struct rte_eth_dev *dcf_ethdev;
1200 	uint16_t dcf_vsi_id;
1201 	int i, ret;
1202 
1203 	if (!ice_dcf_cap_selected(pci_dev->device.devargs))
1204 		return 1;
1205 
1206 	ret = rte_eth_devargs_parse(pci_dev->device.devargs->args, &eth_da);
1207 	if (ret)
1208 		return ret;
1209 
1210 	ret = rte_eth_dev_pci_generic_probe(pci_dev,
1211 					    sizeof(struct ice_dcf_adapter),
1212 					    ice_dcf_dev_init);
1213 	if (ret || !eth_da.nb_representor_ports)
1214 		return ret;
1215 	if (eth_da.type != RTE_ETH_REPRESENTOR_VF)
1216 		return -ENOTSUP;
1217 
1218 	dcf_ethdev = rte_eth_dev_allocated(pci_dev->device.name);
1219 	if (dcf_ethdev == NULL)
1220 		return -ENODEV;
1221 
1222 	dcf_adapter = dcf_ethdev->data->dev_private;
1223 	ret = ice_dcf_init_repr_info(dcf_adapter);
1224 	if (ret)
1225 		return ret;
1226 
1227 	if (eth_da.nb_representor_ports > dcf_adapter->real_hw.num_vfs ||
1228 	    eth_da.nb_representor_ports >= RTE_MAX_ETHPORTS) {
1229 		PMD_DRV_LOG(ERR, "the number of port representors is too large: %u",
1230 			    eth_da.nb_representor_ports);
1231 		ice_dcf_free_repr_info(dcf_adapter);
1232 		return -EINVAL;
1233 	}
1234 
1235 	dcf_vsi_id = dcf_adapter->real_hw.vsi_id | VIRTCHNL_DCF_VF_VSI_VALID;
1236 
1237 	repr_param.dcf_eth_dev = dcf_ethdev;
1238 	repr_param.switch_domain_id = 0;
1239 
1240 	for (i = 0; i < eth_da.nb_representor_ports; i++) {
1241 		uint16_t vf_id = eth_da.representor_ports[i];
1242 		struct rte_eth_dev *vf_rep_eth_dev;
1243 
1244 		if (vf_id >= dcf_adapter->real_hw.num_vfs) {
1245 			PMD_DRV_LOG(ERR, "VF ID %u is out of range (0 ~ %u)",
1246 				    vf_id, dcf_adapter->real_hw.num_vfs - 1);
1247 			ret = -EINVAL;
1248 			break;
1249 		}
1250 
1251 		if (dcf_adapter->real_hw.vf_vsi_map[vf_id] == dcf_vsi_id) {
1252 			PMD_DRV_LOG(ERR, "VF ID %u is DCF's ID.\n", vf_id);
1253 			ret = -EINVAL;
1254 			break;
1255 		}
1256 
1257 		repr_param.vf_id = vf_id;
1258 		snprintf(repr_name, sizeof(repr_name), "net_%s_representor_%u",
1259 			 pci_dev->device.name, vf_id);
1260 		ret = rte_eth_dev_create(&pci_dev->device, repr_name,
1261 					 sizeof(struct ice_dcf_vf_repr),
1262 					 NULL, NULL, ice_dcf_vf_repr_init,
1263 					 &repr_param);
1264 		if (ret) {
1265 			PMD_DRV_LOG(ERR, "failed to create DCF VF representor %s",
1266 				    repr_name);
1267 			break;
1268 		}
1269 
1270 		vf_rep_eth_dev = rte_eth_dev_allocated(repr_name);
1271 		if (!vf_rep_eth_dev) {
1272 			PMD_DRV_LOG(ERR,
1273 				    "Failed to find the ethdev for DCF VF representor: %s",
1274 				    repr_name);
1275 			ret = -ENODEV;
1276 			break;
1277 		}
1278 
1279 		dcf_adapter->repr_infos[vf_id].vf_rep_eth_dev = vf_rep_eth_dev;
1280 		dcf_adapter->num_reprs++;
1281 	}
1282 
1283 	return ret;
1284 }
1285 
1286 static int
1287 eth_ice_dcf_pci_remove(struct rte_pci_device *pci_dev)
1288 {
1289 	struct rte_eth_dev *eth_dev;
1290 
1291 	eth_dev = rte_eth_dev_allocated(pci_dev->device.name);
1292 	if (!eth_dev)
1293 		return 0;
1294 
1295 	if (eth_dev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
1296 		return rte_eth_dev_pci_generic_remove(pci_dev,
1297 						      ice_dcf_vf_repr_uninit);
1298 	else
1299 		return rte_eth_dev_pci_generic_remove(pci_dev,
1300 						      ice_dcf_dev_uninit);
1301 }
1302 
1303 static const struct rte_pci_id pci_id_ice_dcf_map[] = {
1304 	{ RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
1305 	{ .vendor_id = 0, /* sentinel */ },
1306 };
1307 
1308 static struct rte_pci_driver rte_ice_dcf_pmd = {
1309 	.id_table = pci_id_ice_dcf_map,
1310 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1311 	.probe = eth_ice_dcf_pci_probe,
1312 	.remove = eth_ice_dcf_pci_remove,
1313 };
1314 
1315 RTE_PMD_REGISTER_PCI(net_ice_dcf, rte_ice_dcf_pmd);
1316 RTE_PMD_REGISTER_PCI_TABLE(net_ice_dcf, pci_id_ice_dcf_map);
1317 RTE_PMD_REGISTER_KMOD_DEP(net_ice_dcf, "* igb_uio | vfio-pci");
1318 RTE_PMD_REGISTER_PARAM_STRING(net_ice_dcf, "cap=dcf");
1319