xref: /f-stack/dpdk/drivers/net/igc/igc_txrx.c (revision 2d9fd380)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 
5 #include <rte_config.h>
6 #include <rte_malloc.h>
7 #include <rte_ethdev_driver.h>
8 #include <rte_net.h>
9 
10 #include "igc_logs.h"
11 #include "igc_txrx.h"
12 
13 #ifdef RTE_PMD_USE_PREFETCH
14 #define rte_igc_prefetch(p)		rte_prefetch0(p)
15 #else
16 #define rte_igc_prefetch(p)		do {} while (0)
17 #endif
18 
19 #ifdef RTE_PMD_PACKET_PREFETCH
20 #define rte_packet_prefetch(p)		rte_prefetch1(p)
21 #else
22 #define rte_packet_prefetch(p)		do {} while (0)
23 #endif
24 
25 /* Multicast / Unicast table offset mask. */
26 #define IGC_RCTL_MO_MSK			(3u << IGC_RCTL_MO_SHIFT)
27 
28 /* Loopback mode. */
29 #define IGC_RCTL_LBM_SHIFT		6
30 #define IGC_RCTL_LBM_MSK		(3u << IGC_RCTL_LBM_SHIFT)
31 
32 /* Hash select for MTA */
33 #define IGC_RCTL_HSEL_SHIFT		8
34 #define IGC_RCTL_HSEL_MSK		(3u << IGC_RCTL_HSEL_SHIFT)
35 #define IGC_RCTL_PSP			(1u << 21)
36 
37 /* Receive buffer size for header buffer */
38 #define IGC_SRRCTL_BSIZEHEADER_SHIFT	8
39 
40 /* RX descriptor status and error flags */
41 #define IGC_RXD_STAT_L4CS		(1u << 5)
42 #define IGC_RXD_STAT_VEXT		(1u << 9)
43 #define IGC_RXD_STAT_LLINT		(1u << 11)
44 #define IGC_RXD_STAT_SCRC		(1u << 12)
45 #define IGC_RXD_STAT_SMDT_MASK		(3u << 13)
46 #define IGC_RXD_STAT_MC			(1u << 19)
47 #define IGC_RXD_EXT_ERR_L4E		(1u << 29)
48 #define IGC_RXD_EXT_ERR_IPE		(1u << 30)
49 #define IGC_RXD_EXT_ERR_RXE		(1u << 31)
50 #define IGC_RXD_RSS_TYPE_MASK		0xfu
51 #define IGC_RXD_PCTYPE_MASK		(0x7fu << 4)
52 #define IGC_RXD_ETQF_SHIFT		12
53 #define IGC_RXD_ETQF_MSK		(0xfu << IGC_RXD_ETQF_SHIFT)
54 #define IGC_RXD_VPKT			(1u << 16)
55 
56 /* TXD control bits */
57 #define IGC_TXDCTL_PTHRESH_SHIFT	0
58 #define IGC_TXDCTL_HTHRESH_SHIFT	8
59 #define IGC_TXDCTL_WTHRESH_SHIFT	16
60 #define IGC_TXDCTL_PTHRESH_MSK		(0x1fu << IGC_TXDCTL_PTHRESH_SHIFT)
61 #define IGC_TXDCTL_HTHRESH_MSK		(0x1fu << IGC_TXDCTL_HTHRESH_SHIFT)
62 #define IGC_TXDCTL_WTHRESH_MSK		(0x1fu << IGC_TXDCTL_WTHRESH_SHIFT)
63 
64 /* RXD control bits */
65 #define IGC_RXDCTL_PTHRESH_SHIFT	0
66 #define IGC_RXDCTL_HTHRESH_SHIFT	8
67 #define IGC_RXDCTL_WTHRESH_SHIFT	16
68 #define IGC_RXDCTL_PTHRESH_MSK		(0x1fu << IGC_RXDCTL_PTHRESH_SHIFT)
69 #define IGC_RXDCTL_HTHRESH_MSK		(0x1fu << IGC_RXDCTL_HTHRESH_SHIFT)
70 #define IGC_RXDCTL_WTHRESH_MSK		(0x1fu << IGC_RXDCTL_WTHRESH_SHIFT)
71 
72 #define IGC_TSO_MAX_HDRLEN		512
73 #define IGC_TSO_MAX_MSS			9216
74 
75 /* Bit Mask to indicate what bits required for building TX context */
76 #define IGC_TX_OFFLOAD_MASK (		\
77 		PKT_TX_OUTER_IPV4 |	\
78 		PKT_TX_IPV6 |		\
79 		PKT_TX_IPV4 |		\
80 		PKT_TX_VLAN_PKT |	\
81 		PKT_TX_IP_CKSUM |	\
82 		PKT_TX_L4_MASK |	\
83 		PKT_TX_TCP_SEG |	\
84 		PKT_TX_UDP_SEG)
85 
86 #define IGC_TX_OFFLOAD_SEG	(PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)
87 
88 #define IGC_ADVTXD_POPTS_TXSM	0x00000200 /* L4 Checksum offload request */
89 #define IGC_ADVTXD_POPTS_IXSM	0x00000100 /* IP Checksum offload request */
90 
91 /* L4 Packet TYPE of Reserved */
92 #define IGC_ADVTXD_TUCMD_L4T_RSV	0x00001800
93 
94 #define IGC_TX_OFFLOAD_NOTSUP_MASK (PKT_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK)
95 
96 /**
97  * Structure associated with each descriptor of the RX ring of a RX queue.
98  */
99 struct igc_rx_entry {
100 	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
101 };
102 
103 /**
104  * Structure associated with each RX queue.
105  */
106 struct igc_rx_queue {
107 	struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
108 	volatile union igc_adv_rx_desc *rx_ring;
109 	/**< RX ring virtual address. */
110 	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
111 	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
112 	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
113 	struct igc_rx_entry *sw_ring;   /**< address of RX software ring. */
114 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
115 	struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
116 	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
117 	uint16_t            rx_tail;    /**< current value of RDT register. */
118 	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
119 	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
120 	uint16_t            queue_id;   /**< RX queue index. */
121 	uint16_t            reg_idx;    /**< RX queue register index. */
122 	uint16_t            port_id;    /**< Device port identifier. */
123 	uint8_t             pthresh;    /**< Prefetch threshold register. */
124 	uint8_t             hthresh;    /**< Host threshold register. */
125 	uint8_t             wthresh;    /**< Write-back threshold register. */
126 	uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
127 	uint8_t             drop_en;	/**< If not 0, set SRRCTL.Drop_En. */
128 	uint32_t            flags;      /**< RX flags. */
129 	uint64_t	    offloads;   /**< offloads of DEV_RX_OFFLOAD_* */
130 };
131 
132 /** Offload features */
133 union igc_tx_offload {
134 	uint64_t data;
135 	struct {
136 		uint64_t l3_len:9; /**< L3 (IP) Header Length. */
137 		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
138 		uint64_t vlan_tci:16;
139 		/**< VLAN Tag Control Identifier(CPU order). */
140 		uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
141 		uint64_t tso_segsz:16; /**< TCP TSO segment size. */
142 		/* uint64_t unused:8; */
143 	};
144 };
145 
146 /*
147  * Compare mask for igc_tx_offload.data,
148  * should be in sync with igc_tx_offload layout.
149  */
150 #define TX_MACIP_LEN_CMP_MASK	0x000000000000FFFFULL /**< L2L3 header mask. */
151 #define TX_VLAN_CMP_MASK	0x00000000FFFF0000ULL /**< Vlan mask. */
152 #define TX_TCP_LEN_CMP_MASK	0x000000FF00000000ULL /**< TCP header mask. */
153 #define TX_TSO_MSS_CMP_MASK	0x00FFFF0000000000ULL /**< TSO segsz mask. */
154 /** Mac + IP + TCP + Mss mask. */
155 #define TX_TSO_CMP_MASK	\
156 	(TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
157 
158 /**
159  * Structure to check if new context need be built
160  */
161 struct igc_advctx_info {
162 	uint64_t flags;           /**< ol_flags related to context build. */
163 	/** tx offload: vlan, tso, l2-l3-l4 lengths. */
164 	union igc_tx_offload tx_offload;
165 	/** compare mask for tx offload. */
166 	union igc_tx_offload tx_offload_mask;
167 };
168 
169 /**
170  * Hardware context number
171  */
172 enum {
173 	IGC_CTX_0    = 0, /**< CTX0    */
174 	IGC_CTX_1    = 1, /**< CTX1    */
175 	IGC_CTX_NUM  = 2, /**< CTX_NUM */
176 };
177 
178 /**
179  * Structure associated with each descriptor of the TX ring of a TX queue.
180  */
181 struct igc_tx_entry {
182 	struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
183 	uint16_t next_id; /**< Index of next descriptor in ring. */
184 	uint16_t last_id; /**< Index of last scattered descriptor. */
185 };
186 
187 /**
188  * Structure associated with each TX queue.
189  */
190 struct igc_tx_queue {
191 	volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */
192 	uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
193 	struct igc_tx_entry    *sw_ring; /**< virtual address of SW ring. */
194 	volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
195 	uint32_t               txd_type;      /**< Device-specific TXD type */
196 	uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
197 	uint16_t               tx_tail;  /**< Current value of TDT register. */
198 	uint16_t               tx_head;
199 	/**< Index of first used TX descriptor. */
200 	uint16_t               queue_id; /**< TX queue index. */
201 	uint16_t               reg_idx;  /**< TX queue register index. */
202 	uint16_t               port_id;  /**< Device port identifier. */
203 	uint8_t                pthresh;  /**< Prefetch threshold register. */
204 	uint8_t                hthresh;  /**< Host threshold register. */
205 	uint8_t                wthresh;  /**< Write-back threshold register. */
206 	uint8_t                ctx_curr;
207 
208 	/**< Start context position for transmit queue. */
209 	struct igc_advctx_info ctx_cache[IGC_CTX_NUM];
210 	/**< Hardware context history.*/
211 	uint64_t	       offloads; /**< offloads of DEV_TX_OFFLOAD_* */
212 };
213 
214 static inline uint64_t
rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)215 rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)
216 {
217 	static uint64_t l4_chksum_flags[] = {0, 0, PKT_RX_L4_CKSUM_GOOD,
218 			PKT_RX_L4_CKSUM_BAD};
219 
220 	static uint64_t l3_chksum_flags[] = {0, 0, PKT_RX_IP_CKSUM_GOOD,
221 			PKT_RX_IP_CKSUM_BAD};
222 	uint64_t pkt_flags = 0;
223 	uint32_t tmp;
224 
225 	if (statuserr & IGC_RXD_STAT_VP)
226 		pkt_flags |= PKT_RX_VLAN_STRIPPED;
227 
228 	tmp = !!(statuserr & (IGC_RXD_STAT_L4CS | IGC_RXD_STAT_UDPCS));
229 	tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_L4E);
230 	pkt_flags |= l4_chksum_flags[tmp];
231 
232 	tmp = !!(statuserr & IGC_RXD_STAT_IPCS);
233 	tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_IPE);
234 	pkt_flags |= l3_chksum_flags[tmp];
235 
236 	return pkt_flags;
237 }
238 
239 #define IGC_PACKET_TYPE_IPV4              0X01
240 #define IGC_PACKET_TYPE_IPV4_TCP          0X11
241 #define IGC_PACKET_TYPE_IPV4_UDP          0X21
242 #define IGC_PACKET_TYPE_IPV4_SCTP         0X41
243 #define IGC_PACKET_TYPE_IPV4_EXT          0X03
244 #define IGC_PACKET_TYPE_IPV4_EXT_SCTP     0X43
245 #define IGC_PACKET_TYPE_IPV6              0X04
246 #define IGC_PACKET_TYPE_IPV6_TCP          0X14
247 #define IGC_PACKET_TYPE_IPV6_UDP          0X24
248 #define IGC_PACKET_TYPE_IPV6_EXT          0X0C
249 #define IGC_PACKET_TYPE_IPV6_EXT_TCP      0X1C
250 #define IGC_PACKET_TYPE_IPV6_EXT_UDP      0X2C
251 #define IGC_PACKET_TYPE_IPV4_IPV6         0X05
252 #define IGC_PACKET_TYPE_IPV4_IPV6_TCP     0X15
253 #define IGC_PACKET_TYPE_IPV4_IPV6_UDP     0X25
254 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
255 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
256 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
257 #define IGC_PACKET_TYPE_MAX               0X80
258 #define IGC_PACKET_TYPE_MASK              0X7F
259 #define IGC_PACKET_TYPE_SHIFT             0X04
260 
261 static inline uint32_t
rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)262 rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)
263 {
264 	static const uint32_t
265 		ptype_table[IGC_PACKET_TYPE_MAX] __rte_cache_aligned = {
266 		[IGC_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
267 			RTE_PTYPE_L3_IPV4,
268 		[IGC_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
269 			RTE_PTYPE_L3_IPV4_EXT,
270 		[IGC_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
271 			RTE_PTYPE_L3_IPV6,
272 		[IGC_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
273 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
274 			RTE_PTYPE_INNER_L3_IPV6,
275 		[IGC_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
276 			RTE_PTYPE_L3_IPV6_EXT,
277 		[IGC_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
278 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
279 			RTE_PTYPE_INNER_L3_IPV6_EXT,
280 		[IGC_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
281 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
282 		[IGC_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
283 			RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
284 		[IGC_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
285 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
286 			RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
287 		[IGC_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
288 			RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
289 		[IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
290 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
291 			RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
292 		[IGC_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
293 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
294 		[IGC_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
295 			RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
296 		[IGC_PACKET_TYPE_IPV4_IPV6_UDP] =  RTE_PTYPE_L2_ETHER |
297 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
298 			RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
299 		[IGC_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
300 			RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
301 		[IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
302 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
303 			RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
304 		[IGC_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
305 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
306 		[IGC_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
307 			RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
308 	};
309 	if (unlikely(pkt_info & IGC_RXDADV_PKTTYPE_ETQF))
310 		return RTE_PTYPE_UNKNOWN;
311 
312 	pkt_info = (pkt_info >> IGC_PACKET_TYPE_SHIFT) & IGC_PACKET_TYPE_MASK;
313 
314 	return ptype_table[pkt_info];
315 }
316 
317 static inline void
rx_desc_get_pkt_info(struct igc_rx_queue * rxq,struct rte_mbuf * rxm,union igc_adv_rx_desc * rxd,uint32_t staterr)318 rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm,
319 		union igc_adv_rx_desc *rxd, uint32_t staterr)
320 {
321 	uint64_t pkt_flags;
322 	uint32_t hlen_type_rss;
323 	uint16_t pkt_info;
324 
325 	/* Prefetch data of first segment, if configured to do so. */
326 	rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
327 
328 	rxm->port = rxq->port_id;
329 	hlen_type_rss = rte_le_to_cpu_32(rxd->wb.lower.lo_dword.data);
330 	rxm->hash.rss = rte_le_to_cpu_32(rxd->wb.lower.hi_dword.rss);
331 	rxm->vlan_tci = rte_le_to_cpu_16(rxd->wb.upper.vlan);
332 
333 	pkt_flags = (hlen_type_rss & IGC_RXD_RSS_TYPE_MASK) ?
334 			PKT_RX_RSS_HASH : 0;
335 
336 	if (hlen_type_rss & IGC_RXD_VPKT)
337 		pkt_flags |= PKT_RX_VLAN;
338 
339 	pkt_flags |= rx_desc_statuserr_to_pkt_flags(staterr);
340 
341 	rxm->ol_flags = pkt_flags;
342 	pkt_info = rte_le_to_cpu_16(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
343 	rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info);
344 }
345 
346 static uint16_t
igc_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)347 igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
348 {
349 	struct igc_rx_queue * const rxq = rx_queue;
350 	volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
351 	struct igc_rx_entry * const sw_ring = rxq->sw_ring;
352 	uint16_t rx_id = rxq->rx_tail;
353 	uint16_t nb_rx = 0;
354 	uint16_t nb_hold = 0;
355 
356 	while (nb_rx < nb_pkts) {
357 		volatile union igc_adv_rx_desc *rxdp;
358 		struct igc_rx_entry *rxe;
359 		struct rte_mbuf *rxm;
360 		struct rte_mbuf *nmb;
361 		union igc_adv_rx_desc rxd;
362 		uint32_t staterr;
363 		uint16_t data_len;
364 
365 		/*
366 		 * The order of operations here is important as the DD status
367 		 * bit must not be read after any other descriptor fields.
368 		 * rx_ring and rxdp are pointing to volatile data so the order
369 		 * of accesses cannot be reordered by the compiler. If they were
370 		 * not volatile, they could be reordered which could lead to
371 		 * using invalid descriptor fields when read from rxd.
372 		 */
373 		rxdp = &rx_ring[rx_id];
374 		staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
375 		if (!(staterr & IGC_RXD_STAT_DD))
376 			break;
377 		rxd = *rxdp;
378 
379 		/*
380 		 * End of packet.
381 		 *
382 		 * If the IGC_RXD_STAT_EOP flag is not set, the RX packet is
383 		 * likely to be invalid and to be dropped by the various
384 		 * validation checks performed by the network stack.
385 		 *
386 		 * Allocate a new mbuf to replenish the RX ring descriptor.
387 		 * If the allocation fails:
388 		 *    - arrange for that RX descriptor to be the first one
389 		 *      being parsed the next time the receive function is
390 		 *      invoked [on the same queue].
391 		 *
392 		 *    - Stop parsing the RX ring and return immediately.
393 		 *
394 		 * This policy does not drop the packet received in the RX
395 		 * descriptor for which the allocation of a new mbuf failed.
396 		 * Thus, it allows that packet to be later retrieved if
397 		 * mbuf have been freed in the mean time.
398 		 * As a side effect, holding RX descriptors instead of
399 		 * systematically giving them back to the NIC may lead to
400 		 * RX ring exhaustion situations.
401 		 * However, the NIC can gracefully prevent such situations
402 		 * to happen by sending specific "back-pressure" flow control
403 		 * frames to its peer(s).
404 		 */
405 		PMD_RX_LOG(DEBUG,
406 			"port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
407 			rxq->port_id, rxq->queue_id, rx_id, staterr,
408 			rte_le_to_cpu_16(rxd.wb.upper.length));
409 
410 		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
411 		if (nmb == NULL) {
412 			unsigned int id;
413 			PMD_RX_LOG(DEBUG,
414 				"RX mbuf alloc failed, port_id=%u queue_id=%u",
415 				rxq->port_id, rxq->queue_id);
416 			id = rxq->port_id;
417 			rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
418 			break;
419 		}
420 
421 		nb_hold++;
422 		rxe = &sw_ring[rx_id];
423 		rx_id++;
424 		if (rx_id >= rxq->nb_rx_desc)
425 			rx_id = 0;
426 
427 		/* Prefetch next mbuf while processing current one. */
428 		rte_igc_prefetch(sw_ring[rx_id].mbuf);
429 
430 		/*
431 		 * When next RX descriptor is on a cache-line boundary,
432 		 * prefetch the next 4 RX descriptors and the next 8 pointers
433 		 * to mbufs.
434 		 */
435 		if ((rx_id & 0x3) == 0) {
436 			rte_igc_prefetch(&rx_ring[rx_id]);
437 			rte_igc_prefetch(&sw_ring[rx_id]);
438 		}
439 
440 		/*
441 		 * Update RX descriptor with the physical address of the new
442 		 * data buffer of the new allocated mbuf.
443 		 */
444 		rxm = rxe->mbuf;
445 		rxe->mbuf = nmb;
446 		rxdp->read.hdr_addr = 0;
447 		rxdp->read.pkt_addr =
448 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
449 		rxm->next = NULL;
450 
451 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
452 		data_len = rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len;
453 		rxm->data_len = data_len;
454 		rxm->pkt_len = data_len;
455 		rxm->nb_segs = 1;
456 
457 		rx_desc_get_pkt_info(rxq, rxm, &rxd, staterr);
458 
459 		/*
460 		 * Store the mbuf address into the next entry of the array
461 		 * of returned packets.
462 		 */
463 		rx_pkts[nb_rx++] = rxm;
464 	}
465 	rxq->rx_tail = rx_id;
466 
467 	/*
468 	 * If the number of free RX descriptors is greater than the RX free
469 	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
470 	 * register.
471 	 * Update the RDT with the value of the last processed RX descriptor
472 	 * minus 1, to guarantee that the RDT register is never equal to the
473 	 * RDH register, which creates a "full" ring situation from the
474 	 * hardware point of view...
475 	 */
476 	nb_hold = nb_hold + rxq->nb_rx_hold;
477 	if (nb_hold > rxq->rx_free_thresh) {
478 		PMD_RX_LOG(DEBUG,
479 			"port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
480 			rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
481 		rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
482 		IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
483 		nb_hold = 0;
484 	}
485 	rxq->nb_rx_hold = nb_hold;
486 	return nb_rx;
487 }
488 
489 static uint16_t
igc_recv_scattered_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)490 igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
491 			uint16_t nb_pkts)
492 {
493 	struct igc_rx_queue * const rxq = rx_queue;
494 	volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
495 	struct igc_rx_entry * const sw_ring = rxq->sw_ring;
496 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
497 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
498 
499 	uint16_t rx_id = rxq->rx_tail;
500 	uint16_t nb_rx = 0;
501 	uint16_t nb_hold = 0;
502 
503 	while (nb_rx < nb_pkts) {
504 		volatile union igc_adv_rx_desc *rxdp;
505 		struct igc_rx_entry *rxe;
506 		struct rte_mbuf *rxm;
507 		struct rte_mbuf *nmb;
508 		union igc_adv_rx_desc rxd;
509 		uint32_t staterr;
510 		uint16_t data_len;
511 
512 next_desc:
513 		/*
514 		 * The order of operations here is important as the DD status
515 		 * bit must not be read after any other descriptor fields.
516 		 * rx_ring and rxdp are pointing to volatile data so the order
517 		 * of accesses cannot be reordered by the compiler. If they were
518 		 * not volatile, they could be reordered which could lead to
519 		 * using invalid descriptor fields when read from rxd.
520 		 */
521 		rxdp = &rx_ring[rx_id];
522 		staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
523 		if (!(staterr & IGC_RXD_STAT_DD))
524 			break;
525 		rxd = *rxdp;
526 
527 		/*
528 		 * Descriptor done.
529 		 *
530 		 * Allocate a new mbuf to replenish the RX ring descriptor.
531 		 * If the allocation fails:
532 		 *    - arrange for that RX descriptor to be the first one
533 		 *      being parsed the next time the receive function is
534 		 *      invoked [on the same queue].
535 		 *
536 		 *    - Stop parsing the RX ring and return immediately.
537 		 *
538 		 * This policy does not drop the packet received in the RX
539 		 * descriptor for which the allocation of a new mbuf failed.
540 		 * Thus, it allows that packet to be later retrieved if
541 		 * mbuf have been freed in the mean time.
542 		 * As a side effect, holding RX descriptors instead of
543 		 * systematically giving them back to the NIC may lead to
544 		 * RX ring exhaustion situations.
545 		 * However, the NIC can gracefully prevent such situations
546 		 * to happen by sending specific "back-pressure" flow control
547 		 * frames to its peer(s).
548 		 */
549 		PMD_RX_LOG(DEBUG,
550 			"port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
551 			rxq->port_id, rxq->queue_id, rx_id, staterr,
552 			rte_le_to_cpu_16(rxd.wb.upper.length));
553 
554 		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
555 		if (nmb == NULL) {
556 			unsigned int id;
557 			PMD_RX_LOG(DEBUG,
558 				"RX mbuf alloc failed, port_id=%u queue_id=%u",
559 				rxq->port_id, rxq->queue_id);
560 			id = rxq->port_id;
561 			rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
562 			break;
563 		}
564 
565 		nb_hold++;
566 		rxe = &sw_ring[rx_id];
567 		rx_id++;
568 		if (rx_id >= rxq->nb_rx_desc)
569 			rx_id = 0;
570 
571 		/* Prefetch next mbuf while processing current one. */
572 		rte_igc_prefetch(sw_ring[rx_id].mbuf);
573 
574 		/*
575 		 * When next RX descriptor is on a cache-line boundary,
576 		 * prefetch the next 4 RX descriptors and the next 8 pointers
577 		 * to mbufs.
578 		 */
579 		if ((rx_id & 0x3) == 0) {
580 			rte_igc_prefetch(&rx_ring[rx_id]);
581 			rte_igc_prefetch(&sw_ring[rx_id]);
582 		}
583 
584 		/*
585 		 * Update RX descriptor with the physical address of the new
586 		 * data buffer of the new allocated mbuf.
587 		 */
588 		rxm = rxe->mbuf;
589 		rxe->mbuf = nmb;
590 		rxdp->read.hdr_addr = 0;
591 		rxdp->read.pkt_addr =
592 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
593 		rxm->next = NULL;
594 
595 		/*
596 		 * Set data length & data buffer address of mbuf.
597 		 */
598 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
599 		data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
600 		rxm->data_len = data_len;
601 
602 		/*
603 		 * If this is the first buffer of the received packet,
604 		 * set the pointer to the first mbuf of the packet and
605 		 * initialize its context.
606 		 * Otherwise, update the total length and the number of segments
607 		 * of the current scattered packet, and update the pointer to
608 		 * the last mbuf of the current packet.
609 		 */
610 		if (first_seg == NULL) {
611 			first_seg = rxm;
612 			first_seg->pkt_len = data_len;
613 			first_seg->nb_segs = 1;
614 		} else {
615 			first_seg->pkt_len += data_len;
616 			first_seg->nb_segs++;
617 			last_seg->next = rxm;
618 		}
619 
620 		/*
621 		 * If this is not the last buffer of the received packet,
622 		 * update the pointer to the last mbuf of the current scattered
623 		 * packet and continue to parse the RX ring.
624 		 */
625 		if (!(staterr & IGC_RXD_STAT_EOP)) {
626 			last_seg = rxm;
627 			goto next_desc;
628 		}
629 
630 		/*
631 		 * This is the last buffer of the received packet.
632 		 * If the CRC is not stripped by the hardware:
633 		 *   - Subtract the CRC	length from the total packet length.
634 		 *   - If the last buffer only contains the whole CRC or a part
635 		 *     of it, free the mbuf associated to the last buffer.
636 		 *     If part of the CRC is also contained in the previous
637 		 *     mbuf, subtract the length of that CRC part from the
638 		 *     data length of the previous mbuf.
639 		 */
640 		if (unlikely(rxq->crc_len > 0)) {
641 			first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
642 			if (data_len <= RTE_ETHER_CRC_LEN) {
643 				rte_pktmbuf_free_seg(rxm);
644 				first_seg->nb_segs--;
645 				last_seg->data_len = last_seg->data_len -
646 					 (RTE_ETHER_CRC_LEN - data_len);
647 				last_seg->next = NULL;
648 			} else {
649 				rxm->data_len = (uint16_t)
650 					(data_len - RTE_ETHER_CRC_LEN);
651 			}
652 		}
653 
654 		rx_desc_get_pkt_info(rxq, first_seg, &rxd, staterr);
655 
656 		/*
657 		 * Store the mbuf address into the next entry of the array
658 		 * of returned packets.
659 		 */
660 		rx_pkts[nb_rx++] = first_seg;
661 
662 		/* Setup receipt context for a new packet. */
663 		first_seg = NULL;
664 	}
665 	rxq->rx_tail = rx_id;
666 
667 	/*
668 	 * Save receive context.
669 	 */
670 	rxq->pkt_first_seg = first_seg;
671 	rxq->pkt_last_seg = last_seg;
672 
673 	/*
674 	 * If the number of free RX descriptors is greater than the RX free
675 	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
676 	 * register.
677 	 * Update the RDT with the value of the last processed RX descriptor
678 	 * minus 1, to guarantee that the RDT register is never equal to the
679 	 * RDH register, which creates a "full" ring situation from the
680 	 * hardware point of view...
681 	 */
682 	nb_hold = nb_hold + rxq->nb_rx_hold;
683 	if (nb_hold > rxq->rx_free_thresh) {
684 		PMD_RX_LOG(DEBUG,
685 			"port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
686 			rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
687 		rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
688 		IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
689 		nb_hold = 0;
690 	}
691 	rxq->nb_rx_hold = nb_hold;
692 	return nb_rx;
693 }
694 
695 static void
igc_rx_queue_release_mbufs(struct igc_rx_queue * rxq)696 igc_rx_queue_release_mbufs(struct igc_rx_queue *rxq)
697 {
698 	unsigned int i;
699 
700 	if (rxq->sw_ring != NULL) {
701 		for (i = 0; i < rxq->nb_rx_desc; i++) {
702 			if (rxq->sw_ring[i].mbuf != NULL) {
703 				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
704 				rxq->sw_ring[i].mbuf = NULL;
705 			}
706 		}
707 	}
708 }
709 
710 static void
igc_rx_queue_release(struct igc_rx_queue * rxq)711 igc_rx_queue_release(struct igc_rx_queue *rxq)
712 {
713 	igc_rx_queue_release_mbufs(rxq);
714 	rte_free(rxq->sw_ring);
715 	rte_free(rxq);
716 }
717 
eth_igc_rx_queue_release(void * rxq)718 void eth_igc_rx_queue_release(void *rxq)
719 {
720 	if (rxq)
721 		igc_rx_queue_release(rxq);
722 }
723 
eth_igc_rx_queue_count(struct rte_eth_dev * dev,uint16_t rx_queue_id)724 uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev,
725 		uint16_t rx_queue_id)
726 {
727 	/**
728 	 * Check the DD bit of a rx descriptor of each 4 in a group,
729 	 * to avoid checking too frequently and downgrading performance
730 	 * too much.
731 	 */
732 #define IGC_RXQ_SCAN_INTERVAL 4
733 
734 	volatile union igc_adv_rx_desc *rxdp;
735 	struct igc_rx_queue *rxq;
736 	uint16_t desc = 0;
737 
738 	rxq = dev->data->rx_queues[rx_queue_id];
739 	rxdp = &rxq->rx_ring[rxq->rx_tail];
740 
741 	while (desc < rxq->nb_rx_desc - rxq->rx_tail) {
742 		if (unlikely(!(rxdp->wb.upper.status_error &
743 				IGC_RXD_STAT_DD)))
744 			return desc;
745 		desc += IGC_RXQ_SCAN_INTERVAL;
746 		rxdp += IGC_RXQ_SCAN_INTERVAL;
747 	}
748 	rxdp = &rxq->rx_ring[rxq->rx_tail + desc - rxq->nb_rx_desc];
749 
750 	while (desc < rxq->nb_rx_desc &&
751 		(rxdp->wb.upper.status_error & IGC_RXD_STAT_DD)) {
752 		desc += IGC_RXQ_SCAN_INTERVAL;
753 		rxdp += IGC_RXQ_SCAN_INTERVAL;
754 	}
755 
756 	return desc;
757 }
758 
eth_igc_rx_descriptor_done(void * rx_queue,uint16_t offset)759 int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset)
760 {
761 	volatile union igc_adv_rx_desc *rxdp;
762 	struct igc_rx_queue *rxq = rx_queue;
763 	uint32_t desc;
764 
765 	if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
766 		return 0;
767 
768 	desc = rxq->rx_tail + offset;
769 	if (desc >= rxq->nb_rx_desc)
770 		desc -= rxq->nb_rx_desc;
771 
772 	rxdp = &rxq->rx_ring[desc];
773 	return !!(rxdp->wb.upper.status_error &
774 			rte_cpu_to_le_32(IGC_RXD_STAT_DD));
775 }
776 
eth_igc_rx_descriptor_status(void * rx_queue,uint16_t offset)777 int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset)
778 {
779 	struct igc_rx_queue *rxq = rx_queue;
780 	volatile uint32_t *status;
781 	uint32_t desc;
782 
783 	if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
784 		return -EINVAL;
785 
786 	if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
787 		return RTE_ETH_RX_DESC_UNAVAIL;
788 
789 	desc = rxq->rx_tail + offset;
790 	if (desc >= rxq->nb_rx_desc)
791 		desc -= rxq->nb_rx_desc;
792 
793 	status = &rxq->rx_ring[desc].wb.upper.status_error;
794 	if (*status & rte_cpu_to_le_32(IGC_RXD_STAT_DD))
795 		return RTE_ETH_RX_DESC_DONE;
796 
797 	return RTE_ETH_RX_DESC_AVAIL;
798 }
799 
800 static int
igc_alloc_rx_queue_mbufs(struct igc_rx_queue * rxq)801 igc_alloc_rx_queue_mbufs(struct igc_rx_queue *rxq)
802 {
803 	struct igc_rx_entry *rxe = rxq->sw_ring;
804 	uint64_t dma_addr;
805 	unsigned int i;
806 
807 	/* Initialize software ring entries. */
808 	for (i = 0; i < rxq->nb_rx_desc; i++) {
809 		volatile union igc_adv_rx_desc *rxd;
810 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
811 
812 		if (mbuf == NULL) {
813 			PMD_DRV_LOG(ERR, "RX mbuf alloc failed, queue_id=%hu",
814 				rxq->queue_id);
815 			return -ENOMEM;
816 		}
817 		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
818 		rxd = &rxq->rx_ring[i];
819 		rxd->read.hdr_addr = 0;
820 		rxd->read.pkt_addr = dma_addr;
821 		rxe[i].mbuf = mbuf;
822 	}
823 
824 	return 0;
825 }
826 
827 /*
828  * RSS random key supplied in section 7.1.2.9.3 of the Intel I225 datasheet.
829  * Used as the default key.
830  */
831 static uint8_t default_rss_key[40] = {
832 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
833 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
834 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
835 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
836 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
837 };
838 
839 void
igc_rss_disable(struct rte_eth_dev * dev)840 igc_rss_disable(struct rte_eth_dev *dev)
841 {
842 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
843 	uint32_t mrqc;
844 
845 	mrqc = IGC_READ_REG(hw, IGC_MRQC);
846 	mrqc &= ~IGC_MRQC_ENABLE_MASK;
847 	IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
848 }
849 
850 void
igc_hw_rss_hash_set(struct igc_hw * hw,struct rte_eth_rss_conf * rss_conf)851 igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf)
852 {
853 	uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
854 	uint32_t mrqc;
855 	uint64_t rss_hf;
856 
857 	if (hash_key != NULL) {
858 		uint8_t i;
859 
860 		/* Fill in RSS hash key */
861 		for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
862 			IGC_WRITE_REG_LE_VALUE(hw, IGC_RSSRK(i), hash_key[i]);
863 	}
864 
865 	/* Set configured hashing protocols in MRQC register */
866 	rss_hf = rss_conf->rss_hf;
867 	mrqc = IGC_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
868 	if (rss_hf & ETH_RSS_IPV4)
869 		mrqc |= IGC_MRQC_RSS_FIELD_IPV4;
870 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
871 		mrqc |= IGC_MRQC_RSS_FIELD_IPV4_TCP;
872 	if (rss_hf & ETH_RSS_IPV6)
873 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6;
874 	if (rss_hf & ETH_RSS_IPV6_EX)
875 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_EX;
876 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
877 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP;
878 	if (rss_hf & ETH_RSS_IPV6_TCP_EX)
879 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
880 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
881 		mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
882 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
883 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
884 	if (rss_hf & ETH_RSS_IPV6_UDP_EX)
885 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP_EX;
886 	IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
887 }
888 
889 static void
igc_rss_configure(struct rte_eth_dev * dev)890 igc_rss_configure(struct rte_eth_dev *dev)
891 {
892 	struct rte_eth_rss_conf rss_conf;
893 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
894 	uint16_t i;
895 
896 	/* Fill in redirection table. */
897 	for (i = 0; i < IGC_RSS_RDT_SIZD; i++) {
898 		union igc_rss_reta_reg reta;
899 		uint16_t q_idx, reta_idx;
900 
901 		q_idx = (uint8_t)((dev->data->nb_rx_queues > 1) ?
902 				   i % dev->data->nb_rx_queues : 0);
903 		reta_idx = i % sizeof(reta);
904 		reta.bytes[reta_idx] = q_idx;
905 		if (reta_idx == sizeof(reta) - 1)
906 			IGC_WRITE_REG_LE_VALUE(hw,
907 				IGC_RETA(i / sizeof(reta)), reta.dword);
908 	}
909 
910 	/*
911 	 * Configure the RSS key and the RSS protocols used to compute
912 	 * the RSS hash of input packets.
913 	 */
914 	rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
915 	if (rss_conf.rss_key == NULL)
916 		rss_conf.rss_key = default_rss_key;
917 	igc_hw_rss_hash_set(hw, &rss_conf);
918 }
919 
920 int
igc_del_rss_filter(struct rte_eth_dev * dev)921 igc_del_rss_filter(struct rte_eth_dev *dev)
922 {
923 	struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
924 
925 	if (rss_filter->enable) {
926 		/* recover default RSS configuration */
927 		igc_rss_configure(dev);
928 
929 		/* disable RSS logic and clear filter data */
930 		igc_rss_disable(dev);
931 		memset(rss_filter, 0, sizeof(*rss_filter));
932 		return 0;
933 	}
934 	PMD_DRV_LOG(ERR, "filter not exist!");
935 	return -ENOENT;
936 }
937 
938 /* Initiate the filter structure by the structure of rte_flow_action_rss */
939 void
igc_rss_conf_set(struct igc_rss_filter * out,const struct rte_flow_action_rss * rss)940 igc_rss_conf_set(struct igc_rss_filter *out,
941 		const struct rte_flow_action_rss *rss)
942 {
943 	out->conf.func = rss->func;
944 	out->conf.level = rss->level;
945 	out->conf.types = rss->types;
946 
947 	if (rss->key_len == sizeof(out->key)) {
948 		memcpy(out->key, rss->key, rss->key_len);
949 		out->conf.key = out->key;
950 		out->conf.key_len = rss->key_len;
951 	} else {
952 		out->conf.key = NULL;
953 		out->conf.key_len = 0;
954 	}
955 
956 	if (rss->queue_num <= IGC_RSS_RDT_SIZD) {
957 		memcpy(out->queue, rss->queue,
958 			sizeof(*out->queue) * rss->queue_num);
959 		out->conf.queue = out->queue;
960 		out->conf.queue_num = rss->queue_num;
961 	} else {
962 		out->conf.queue = NULL;
963 		out->conf.queue_num = 0;
964 	}
965 }
966 
967 int
igc_add_rss_filter(struct rte_eth_dev * dev,struct igc_rss_filter * rss)968 igc_add_rss_filter(struct rte_eth_dev *dev, struct igc_rss_filter *rss)
969 {
970 	struct rte_eth_rss_conf rss_conf = {
971 		.rss_key = rss->conf.key_len ?
972 			(void *)(uintptr_t)rss->conf.key : NULL,
973 		.rss_key_len = rss->conf.key_len,
974 		.rss_hf = rss->conf.types,
975 	};
976 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
977 	struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
978 	uint32_t i, j;
979 
980 	/* check RSS type is valid */
981 	if ((rss_conf.rss_hf & IGC_RSS_OFFLOAD_ALL) == 0) {
982 		PMD_DRV_LOG(ERR,
983 			"RSS type(0x%" PRIx64 ") error!, only 0x%" PRIx64
984 			" been supported", rss_conf.rss_hf,
985 			(uint64_t)IGC_RSS_OFFLOAD_ALL);
986 		return -EINVAL;
987 	}
988 
989 	/* check queue count is not zero */
990 	if (!rss->conf.queue_num) {
991 		PMD_DRV_LOG(ERR, "Queue number should not be 0!");
992 		return -EINVAL;
993 	}
994 
995 	/* check queue id is valid */
996 	for (i = 0; i < rss->conf.queue_num; i++)
997 		if (rss->conf.queue[i] >= dev->data->nb_rx_queues) {
998 			PMD_DRV_LOG(ERR, "Queue id %u is invalid!",
999 					rss->conf.queue[i]);
1000 			return -EINVAL;
1001 		}
1002 
1003 	/* only support one filter */
1004 	if (rss_filter->enable) {
1005 		PMD_DRV_LOG(ERR, "Only support one RSS filter!");
1006 		return -ENOTSUP;
1007 	}
1008 	rss_filter->enable = 1;
1009 
1010 	igc_rss_conf_set(rss_filter, &rss->conf);
1011 
1012 	/* Fill in redirection table. */
1013 	for (i = 0, j = 0; i < IGC_RSS_RDT_SIZD; i++, j++) {
1014 		union igc_rss_reta_reg reta;
1015 		uint16_t q_idx, reta_idx;
1016 
1017 		if (j == rss->conf.queue_num)
1018 			j = 0;
1019 		q_idx = rss->conf.queue[j];
1020 		reta_idx = i % sizeof(reta);
1021 		reta.bytes[reta_idx] = q_idx;
1022 		if (reta_idx == sizeof(reta) - 1)
1023 			IGC_WRITE_REG_LE_VALUE(hw,
1024 				IGC_RETA(i / sizeof(reta)), reta.dword);
1025 	}
1026 
1027 	if (rss_conf.rss_key == NULL)
1028 		rss_conf.rss_key = default_rss_key;
1029 	igc_hw_rss_hash_set(hw, &rss_conf);
1030 	return 0;
1031 }
1032 
1033 void
igc_clear_rss_filter(struct rte_eth_dev * dev)1034 igc_clear_rss_filter(struct rte_eth_dev *dev)
1035 {
1036 	struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
1037 
1038 	if (!rss_filter->enable)
1039 		return;
1040 
1041 	/* recover default RSS configuration */
1042 	igc_rss_configure(dev);
1043 
1044 	/* disable RSS logic and clear filter data */
1045 	igc_rss_disable(dev);
1046 	memset(rss_filter, 0, sizeof(*rss_filter));
1047 }
1048 
1049 static int
igc_dev_mq_rx_configure(struct rte_eth_dev * dev)1050 igc_dev_mq_rx_configure(struct rte_eth_dev *dev)
1051 {
1052 	if (RTE_ETH_DEV_SRIOV(dev).active) {
1053 		PMD_DRV_LOG(ERR, "SRIOV unsupported!");
1054 		return -EINVAL;
1055 	}
1056 
1057 	switch (dev->data->dev_conf.rxmode.mq_mode) {
1058 	case ETH_MQ_RX_RSS:
1059 		igc_rss_configure(dev);
1060 		break;
1061 	case ETH_MQ_RX_NONE:
1062 		/*
1063 		 * configure RSS register for following,
1064 		 * then disable the RSS logic
1065 		 */
1066 		igc_rss_configure(dev);
1067 		igc_rss_disable(dev);
1068 		break;
1069 	default:
1070 		PMD_DRV_LOG(ERR, "rx mode(%d) not supported!",
1071 			dev->data->dev_conf.rxmode.mq_mode);
1072 		return -EINVAL;
1073 	}
1074 	return 0;
1075 }
1076 
1077 int
igc_rx_init(struct rte_eth_dev * dev)1078 igc_rx_init(struct rte_eth_dev *dev)
1079 {
1080 	struct igc_rx_queue *rxq;
1081 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1082 	uint64_t offloads = dev->data->dev_conf.rxmode.offloads;
1083 	uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
1084 	uint32_t rctl;
1085 	uint32_t rxcsum;
1086 	uint16_t buf_size;
1087 	uint16_t rctl_bsize;
1088 	uint16_t i;
1089 	int ret;
1090 
1091 	dev->rx_pkt_burst = igc_recv_pkts;
1092 
1093 	/*
1094 	 * Make sure receives are disabled while setting
1095 	 * up the descriptor ring.
1096 	 */
1097 	rctl = IGC_READ_REG(hw, IGC_RCTL);
1098 	IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN);
1099 
1100 	/* Configure support of jumbo frames, if any. */
1101 	if (offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
1102 		rctl |= IGC_RCTL_LPE;
1103 
1104 		/*
1105 		 * Set maximum packet length by default, and might be updated
1106 		 * together with enabling/disabling dual VLAN.
1107 		 */
1108 		IGC_WRITE_REG(hw, IGC_RLPML, max_rx_pkt_len);
1109 	} else {
1110 		rctl &= ~IGC_RCTL_LPE;
1111 	}
1112 
1113 	/* Configure and enable each RX queue. */
1114 	rctl_bsize = 0;
1115 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1116 		uint64_t bus_addr;
1117 		uint32_t rxdctl;
1118 		uint32_t srrctl;
1119 
1120 		rxq = dev->data->rx_queues[i];
1121 		rxq->flags = 0;
1122 
1123 		/* Allocate buffers for descriptor rings and set up queue */
1124 		ret = igc_alloc_rx_queue_mbufs(rxq);
1125 		if (ret)
1126 			return ret;
1127 
1128 		/*
1129 		 * Reset crc_len in case it was changed after queue setup by a
1130 		 * call to configure
1131 		 */
1132 		rxq->crc_len = (offloads & DEV_RX_OFFLOAD_KEEP_CRC) ?
1133 				RTE_ETHER_CRC_LEN : 0;
1134 
1135 		bus_addr = rxq->rx_ring_phys_addr;
1136 		IGC_WRITE_REG(hw, IGC_RDLEN(rxq->reg_idx),
1137 				rxq->nb_rx_desc *
1138 				sizeof(union igc_adv_rx_desc));
1139 		IGC_WRITE_REG(hw, IGC_RDBAH(rxq->reg_idx),
1140 				(uint32_t)(bus_addr >> 32));
1141 		IGC_WRITE_REG(hw, IGC_RDBAL(rxq->reg_idx),
1142 				(uint32_t)bus_addr);
1143 
1144 		/* set descriptor configuration */
1145 		srrctl = IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
1146 
1147 		srrctl |= (uint32_t)(RTE_PKTMBUF_HEADROOM / 64) <<
1148 				IGC_SRRCTL_BSIZEHEADER_SHIFT;
1149 		/*
1150 		 * Configure RX buffer size.
1151 		 */
1152 		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
1153 			RTE_PKTMBUF_HEADROOM);
1154 		if (buf_size >= 1024) {
1155 			/*
1156 			 * Configure the BSIZEPACKET field of the SRRCTL
1157 			 * register of the queue.
1158 			 * Value is in 1 KB resolution, from 1 KB to 16 KB.
1159 			 * If this field is equal to 0b, then RCTL.BSIZE
1160 			 * determines the RX packet buffer size.
1161 			 */
1162 
1163 			srrctl |= ((buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) &
1164 				   IGC_SRRCTL_BSIZEPKT_MASK);
1165 			buf_size = (uint16_t)((srrctl &
1166 					IGC_SRRCTL_BSIZEPKT_MASK) <<
1167 					IGC_SRRCTL_BSIZEPKT_SHIFT);
1168 
1169 			/* It adds dual VLAN length for supporting dual VLAN */
1170 			if (max_rx_pkt_len + 2 * VLAN_TAG_SIZE > buf_size)
1171 				dev->data->scattered_rx = 1;
1172 		} else {
1173 			/*
1174 			 * Use BSIZE field of the device RCTL register.
1175 			 */
1176 			if (rctl_bsize == 0 || rctl_bsize > buf_size)
1177 				rctl_bsize = buf_size;
1178 			dev->data->scattered_rx = 1;
1179 		}
1180 
1181 		/* Set if packets are dropped when no descriptors available */
1182 		if (rxq->drop_en)
1183 			srrctl |= IGC_SRRCTL_DROP_EN;
1184 
1185 		IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl);
1186 
1187 		/* Enable this RX queue. */
1188 		rxdctl = IGC_RXDCTL_QUEUE_ENABLE;
1189 		rxdctl |= ((uint32_t)rxq->pthresh << IGC_RXDCTL_PTHRESH_SHIFT) &
1190 				IGC_RXDCTL_PTHRESH_MSK;
1191 		rxdctl |= ((uint32_t)rxq->hthresh << IGC_RXDCTL_HTHRESH_SHIFT) &
1192 				IGC_RXDCTL_HTHRESH_MSK;
1193 		rxdctl |= ((uint32_t)rxq->wthresh << IGC_RXDCTL_WTHRESH_SHIFT) &
1194 				IGC_RXDCTL_WTHRESH_MSK;
1195 		IGC_WRITE_REG(hw, IGC_RXDCTL(rxq->reg_idx), rxdctl);
1196 	}
1197 
1198 	if (offloads & DEV_RX_OFFLOAD_SCATTER)
1199 		dev->data->scattered_rx = 1;
1200 
1201 	if (dev->data->scattered_rx) {
1202 		PMD_DRV_LOG(DEBUG, "forcing scatter mode");
1203 		dev->rx_pkt_burst = igc_recv_scattered_pkts;
1204 	}
1205 	/*
1206 	 * Setup BSIZE field of RCTL register, if needed.
1207 	 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1208 	 * register, since the code above configures the SRRCTL register of
1209 	 * the RX queue in such a case.
1210 	 * All configurable sizes are:
1211 	 * 16384: rctl |= (IGC_RCTL_SZ_16384 | IGC_RCTL_BSEX);
1212 	 *  8192: rctl |= (IGC_RCTL_SZ_8192  | IGC_RCTL_BSEX);
1213 	 *  4096: rctl |= (IGC_RCTL_SZ_4096  | IGC_RCTL_BSEX);
1214 	 *  2048: rctl |= IGC_RCTL_SZ_2048;
1215 	 *  1024: rctl |= IGC_RCTL_SZ_1024;
1216 	 *   512: rctl |= IGC_RCTL_SZ_512;
1217 	 *   256: rctl |= IGC_RCTL_SZ_256;
1218 	 */
1219 	if (rctl_bsize > 0) {
1220 		if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1221 			rctl |= IGC_RCTL_SZ_512;
1222 		else /* 256 <= buf_size < 512 - use 256 */
1223 			rctl |= IGC_RCTL_SZ_256;
1224 	}
1225 
1226 	/*
1227 	 * Configure RSS if device configured with multiple RX queues.
1228 	 */
1229 	igc_dev_mq_rx_configure(dev);
1230 
1231 	/* Update the rctl since igc_dev_mq_rx_configure may change its value */
1232 	rctl |= IGC_READ_REG(hw, IGC_RCTL);
1233 
1234 	/*
1235 	 * Setup the Checksum Register.
1236 	 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1237 	 */
1238 	rxcsum = IGC_READ_REG(hw, IGC_RXCSUM);
1239 	rxcsum |= IGC_RXCSUM_PCSD;
1240 
1241 	/* Enable both L3/L4 rx checksum offload */
1242 	if (offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
1243 		rxcsum |= IGC_RXCSUM_IPOFL;
1244 	else
1245 		rxcsum &= ~IGC_RXCSUM_IPOFL;
1246 
1247 	if (offloads &
1248 		(DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)) {
1249 		rxcsum |= IGC_RXCSUM_TUOFL;
1250 		offloads |= DEV_RX_OFFLOAD_SCTP_CKSUM;
1251 	} else {
1252 		rxcsum &= ~IGC_RXCSUM_TUOFL;
1253 	}
1254 
1255 	if (offloads & DEV_RX_OFFLOAD_SCTP_CKSUM)
1256 		rxcsum |= IGC_RXCSUM_CRCOFL;
1257 	else
1258 		rxcsum &= ~IGC_RXCSUM_CRCOFL;
1259 
1260 	IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum);
1261 
1262 	/* Setup the Receive Control Register. */
1263 	if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1264 		rctl &= ~IGC_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1265 	else
1266 		rctl |= IGC_RCTL_SECRC; /* Strip Ethernet CRC. */
1267 
1268 	rctl &= ~IGC_RCTL_MO_MSK;
1269 	rctl &= ~IGC_RCTL_LBM_MSK;
1270 	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO |
1271 			IGC_RCTL_DPF |
1272 			(hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
1273 
1274 	if (dev->data->dev_conf.lpbk_mode == 1)
1275 		rctl |= IGC_RCTL_LBM_MAC;
1276 
1277 	rctl &= ~(IGC_RCTL_HSEL_MSK | IGC_RCTL_CFIEN | IGC_RCTL_CFI |
1278 			IGC_RCTL_PSP | IGC_RCTL_PMCF);
1279 
1280 	/* Make sure VLAN Filters are off. */
1281 	rctl &= ~IGC_RCTL_VFE;
1282 	/* Don't store bad packets. */
1283 	rctl &= ~IGC_RCTL_SBP;
1284 
1285 	/* Enable Receives. */
1286 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1287 
1288 	/*
1289 	 * Setup the HW Rx Head and Tail Descriptor Pointers.
1290 	 * This needs to be done after enable.
1291 	 */
1292 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1293 		rxq = dev->data->rx_queues[i];
1294 		IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0);
1295 		IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx),
1296 				rxq->nb_rx_desc - 1);
1297 
1298 		/* strip queue vlan offload */
1299 		if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) {
1300 			uint32_t dvmolr;
1301 			dvmolr = IGC_READ_REG(hw, IGC_DVMOLR(rxq->queue_id));
1302 
1303 			/* If vlan been stripped off, the CRC is meaningless. */
1304 			dvmolr |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC;
1305 			IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr);
1306 		}
1307 	}
1308 
1309 	return 0;
1310 }
1311 
1312 static void
igc_reset_rx_queue(struct igc_rx_queue * rxq)1313 igc_reset_rx_queue(struct igc_rx_queue *rxq)
1314 {
1315 	static const union igc_adv_rx_desc zeroed_desc = { {0} };
1316 	unsigned int i;
1317 
1318 	/* Zero out HW ring memory */
1319 	for (i = 0; i < rxq->nb_rx_desc; i++)
1320 		rxq->rx_ring[i] = zeroed_desc;
1321 
1322 	rxq->rx_tail = 0;
1323 	rxq->pkt_first_seg = NULL;
1324 	rxq->pkt_last_seg = NULL;
1325 }
1326 
1327 int
eth_igc_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mp)1328 eth_igc_rx_queue_setup(struct rte_eth_dev *dev,
1329 			 uint16_t queue_idx,
1330 			 uint16_t nb_desc,
1331 			 unsigned int socket_id,
1332 			 const struct rte_eth_rxconf *rx_conf,
1333 			 struct rte_mempool *mp)
1334 {
1335 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1336 	const struct rte_memzone *rz;
1337 	struct igc_rx_queue *rxq;
1338 	unsigned int size;
1339 
1340 	/*
1341 	 * Validate number of receive descriptors.
1342 	 * It must not exceed hardware maximum, and must be multiple
1343 	 * of IGC_RX_DESCRIPTOR_MULTIPLE.
1344 	 */
1345 	if (nb_desc % IGC_RX_DESCRIPTOR_MULTIPLE != 0 ||
1346 		nb_desc > IGC_MAX_RXD || nb_desc < IGC_MIN_RXD) {
1347 		PMD_DRV_LOG(ERR,
1348 			"RX descriptor must be multiple of %u(cur: %u) and between %u and %u",
1349 			IGC_RX_DESCRIPTOR_MULTIPLE, nb_desc,
1350 			IGC_MIN_RXD, IGC_MAX_RXD);
1351 		return -EINVAL;
1352 	}
1353 
1354 	/* Free memory prior to re-allocation if needed */
1355 	if (dev->data->rx_queues[queue_idx] != NULL) {
1356 		igc_rx_queue_release(dev->data->rx_queues[queue_idx]);
1357 		dev->data->rx_queues[queue_idx] = NULL;
1358 	}
1359 
1360 	/* First allocate the RX queue data structure. */
1361 	rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igc_rx_queue),
1362 			  RTE_CACHE_LINE_SIZE);
1363 	if (rxq == NULL)
1364 		return -ENOMEM;
1365 	rxq->offloads = rx_conf->offloads;
1366 	rxq->mb_pool = mp;
1367 	rxq->nb_rx_desc = nb_desc;
1368 	rxq->pthresh = rx_conf->rx_thresh.pthresh;
1369 	rxq->hthresh = rx_conf->rx_thresh.hthresh;
1370 	rxq->wthresh = rx_conf->rx_thresh.wthresh;
1371 	rxq->drop_en = rx_conf->rx_drop_en;
1372 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1373 	rxq->queue_id = queue_idx;
1374 	rxq->reg_idx = queue_idx;
1375 	rxq->port_id = dev->data->port_id;
1376 
1377 	/*
1378 	 *  Allocate RX ring hardware descriptors. A memzone large enough to
1379 	 *  handle the maximum ring size is allocated in order to allow for
1380 	 *  resizing in later calls to the queue setup function.
1381 	 */
1382 	size = sizeof(union igc_adv_rx_desc) * IGC_MAX_RXD;
1383 	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1384 				      IGC_ALIGN, socket_id);
1385 	if (rz == NULL) {
1386 		igc_rx_queue_release(rxq);
1387 		return -ENOMEM;
1388 	}
1389 	rxq->rdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDT(rxq->reg_idx));
1390 	rxq->rdh_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDH(rxq->reg_idx));
1391 	rxq->rx_ring_phys_addr = rz->iova;
1392 	rxq->rx_ring = (union igc_adv_rx_desc *)rz->addr;
1393 
1394 	/* Allocate software ring. */
1395 	rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1396 				   sizeof(struct igc_rx_entry) * nb_desc,
1397 				   RTE_CACHE_LINE_SIZE);
1398 	if (rxq->sw_ring == NULL) {
1399 		igc_rx_queue_release(rxq);
1400 		return -ENOMEM;
1401 	}
1402 
1403 	PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
1404 		rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1405 
1406 	dev->data->rx_queues[queue_idx] = rxq;
1407 	igc_reset_rx_queue(rxq);
1408 
1409 	return 0;
1410 }
1411 
1412 /* prepare packets for transmit */
1413 static uint16_t
eth_igc_prep_pkts(__rte_unused void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)1414 eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1415 		uint16_t nb_pkts)
1416 {
1417 	int i, ret;
1418 	struct rte_mbuf *m;
1419 
1420 	for (i = 0; i < nb_pkts; i++) {
1421 		m = tx_pkts[i];
1422 
1423 		/* Check some limitations for TSO in hardware */
1424 		if (m->ol_flags & IGC_TX_OFFLOAD_SEG)
1425 			if (m->tso_segsz > IGC_TSO_MAX_MSS ||
1426 				m->l2_len + m->l3_len + m->l4_len >
1427 				IGC_TSO_MAX_HDRLEN) {
1428 				rte_errno = EINVAL;
1429 				return i;
1430 			}
1431 
1432 		if (m->ol_flags & IGC_TX_OFFLOAD_NOTSUP_MASK) {
1433 			rte_errno = ENOTSUP;
1434 			return i;
1435 		}
1436 
1437 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1438 		ret = rte_validate_tx_offload(m);
1439 		if (ret != 0) {
1440 			rte_errno = -ret;
1441 			return i;
1442 		}
1443 #endif
1444 		ret = rte_net_intel_cksum_prepare(m);
1445 		if (ret != 0) {
1446 			rte_errno = -ret;
1447 			return i;
1448 		}
1449 	}
1450 
1451 	return i;
1452 }
1453 
1454 /*
1455  *There're some limitations in hardware for TCP segmentation offload. We
1456  *should check whether the parameters are valid.
1457  */
1458 static inline uint64_t
check_tso_para(uint64_t ol_req,union igc_tx_offload ol_para)1459 check_tso_para(uint64_t ol_req, union igc_tx_offload ol_para)
1460 {
1461 	if (!(ol_req & IGC_TX_OFFLOAD_SEG))
1462 		return ol_req;
1463 	if (ol_para.tso_segsz > IGC_TSO_MAX_MSS || ol_para.l2_len +
1464 		ol_para.l3_len + ol_para.l4_len > IGC_TSO_MAX_HDRLEN) {
1465 		ol_req &= ~IGC_TX_OFFLOAD_SEG;
1466 		ol_req |= PKT_TX_TCP_CKSUM;
1467 	}
1468 	return ol_req;
1469 }
1470 
1471 /*
1472  * Check which hardware context can be used. Use the existing match
1473  * or create a new context descriptor.
1474  */
1475 static inline uint32_t
what_advctx_update(struct igc_tx_queue * txq,uint64_t flags,union igc_tx_offload tx_offload)1476 what_advctx_update(struct igc_tx_queue *txq, uint64_t flags,
1477 		union igc_tx_offload tx_offload)
1478 {
1479 	uint32_t curr = txq->ctx_curr;
1480 
1481 	/* If match with the current context */
1482 	if (likely(txq->ctx_cache[curr].flags == flags &&
1483 		txq->ctx_cache[curr].tx_offload.data ==
1484 		(txq->ctx_cache[curr].tx_offload_mask.data &
1485 		tx_offload.data))) {
1486 		return curr;
1487 	}
1488 
1489 	/* Total two context, if match with the second context */
1490 	curr ^= 1;
1491 	if (likely(txq->ctx_cache[curr].flags == flags &&
1492 		txq->ctx_cache[curr].tx_offload.data ==
1493 		(txq->ctx_cache[curr].tx_offload_mask.data &
1494 		tx_offload.data))) {
1495 		txq->ctx_curr = curr;
1496 		return curr;
1497 	}
1498 
1499 	/* Mismatch, create new one */
1500 	return IGC_CTX_NUM;
1501 }
1502 
1503 /*
1504  * This is a separate function, looking for optimization opportunity here
1505  * Rework required to go with the pre-defined values.
1506  */
1507 static inline void
igc_set_xmit_ctx(struct igc_tx_queue * txq,volatile struct igc_adv_tx_context_desc * ctx_txd,uint64_t ol_flags,union igc_tx_offload tx_offload)1508 igc_set_xmit_ctx(struct igc_tx_queue *txq,
1509 		volatile struct igc_adv_tx_context_desc *ctx_txd,
1510 		uint64_t ol_flags, union igc_tx_offload tx_offload)
1511 {
1512 	uint32_t type_tucmd_mlhl;
1513 	uint32_t mss_l4len_idx;
1514 	uint32_t ctx_curr;
1515 	uint32_t vlan_macip_lens;
1516 	union igc_tx_offload tx_offload_mask;
1517 
1518 	/* Use the previous context */
1519 	txq->ctx_curr ^= 1;
1520 	ctx_curr = txq->ctx_curr;
1521 
1522 	tx_offload_mask.data = 0;
1523 	type_tucmd_mlhl = 0;
1524 
1525 	/* Specify which HW CTX to upload. */
1526 	mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT);
1527 
1528 	if (ol_flags & PKT_TX_VLAN_PKT)
1529 		tx_offload_mask.vlan_tci = 0xffff;
1530 
1531 	/* check if TCP segmentation required for this packet */
1532 	if (ol_flags & IGC_TX_OFFLOAD_SEG) {
1533 		/* implies IP cksum in IPv4 */
1534 		if (ol_flags & PKT_TX_IP_CKSUM)
1535 			type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4 |
1536 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1537 		else
1538 			type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV6 |
1539 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1540 
1541 		if (ol_flags & PKT_TX_TCP_SEG)
1542 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
1543 		else
1544 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
1545 
1546 		tx_offload_mask.data |= TX_TSO_CMP_MASK;
1547 		mss_l4len_idx |= (uint32_t)tx_offload.tso_segsz <<
1548 				IGC_ADVTXD_MSS_SHIFT;
1549 		mss_l4len_idx |= (uint32_t)tx_offload.l4_len <<
1550 				IGC_ADVTXD_L4LEN_SHIFT;
1551 	} else { /* no TSO, check if hardware checksum is needed */
1552 		if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
1553 			tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
1554 
1555 		if (ol_flags & PKT_TX_IP_CKSUM)
1556 			type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4;
1557 
1558 		switch (ol_flags & PKT_TX_L4_MASK) {
1559 		case PKT_TX_TCP_CKSUM:
1560 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP |
1561 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1562 			mss_l4len_idx |= (uint32_t)sizeof(struct rte_tcp_hdr)
1563 				<< IGC_ADVTXD_L4LEN_SHIFT;
1564 			break;
1565 		case PKT_TX_UDP_CKSUM:
1566 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP |
1567 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1568 			mss_l4len_idx |= (uint32_t)sizeof(struct rte_udp_hdr)
1569 				<< IGC_ADVTXD_L4LEN_SHIFT;
1570 			break;
1571 		case PKT_TX_SCTP_CKSUM:
1572 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP |
1573 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1574 			mss_l4len_idx |= (uint32_t)sizeof(struct rte_sctp_hdr)
1575 				<< IGC_ADVTXD_L4LEN_SHIFT;
1576 			break;
1577 		default:
1578 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_RSV |
1579 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1580 			break;
1581 		}
1582 	}
1583 
1584 	txq->ctx_cache[ctx_curr].flags = ol_flags;
1585 	txq->ctx_cache[ctx_curr].tx_offload.data =
1586 		tx_offload_mask.data & tx_offload.data;
1587 	txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
1588 
1589 	ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
1590 	vlan_macip_lens = (uint32_t)tx_offload.data;
1591 	ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
1592 	ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
1593 	ctx_txd->u.launch_time = 0;
1594 }
1595 
1596 static inline uint32_t
tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)1597 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
1598 {
1599 	uint32_t cmdtype;
1600 	static uint32_t vlan_cmd[2] = {0, IGC_ADVTXD_DCMD_VLE};
1601 	static uint32_t tso_cmd[2] = {0, IGC_ADVTXD_DCMD_TSE};
1602 	cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
1603 	cmdtype |= tso_cmd[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1604 	return cmdtype;
1605 }
1606 
1607 static inline uint32_t
tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)1608 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
1609 {
1610 	static const uint32_t l4_olinfo[2] = {0, IGC_ADVTXD_POPTS_TXSM};
1611 	static const uint32_t l3_olinfo[2] = {0, IGC_ADVTXD_POPTS_IXSM};
1612 	uint32_t tmp;
1613 
1614 	tmp  = l4_olinfo[(ol_flags & PKT_TX_L4_MASK)  != PKT_TX_L4_NO_CKSUM];
1615 	tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
1616 	tmp |= l4_olinfo[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1617 	return tmp;
1618 }
1619 
1620 static uint16_t
igc_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)1621 igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1622 {
1623 	struct igc_tx_queue * const txq = tx_queue;
1624 	struct igc_tx_entry * const sw_ring = txq->sw_ring;
1625 	struct igc_tx_entry *txe, *txn;
1626 	volatile union igc_adv_tx_desc * const txr = txq->tx_ring;
1627 	volatile union igc_adv_tx_desc *txd;
1628 	struct rte_mbuf *tx_pkt;
1629 	struct rte_mbuf *m_seg;
1630 	uint64_t buf_dma_addr;
1631 	uint32_t olinfo_status;
1632 	uint32_t cmd_type_len;
1633 	uint32_t pkt_len;
1634 	uint16_t slen;
1635 	uint64_t ol_flags;
1636 	uint16_t tx_end;
1637 	uint16_t tx_id;
1638 	uint16_t tx_last;
1639 	uint16_t nb_tx;
1640 	uint64_t tx_ol_req;
1641 	uint32_t new_ctx = 0;
1642 	union igc_tx_offload tx_offload = {0};
1643 
1644 	tx_id = txq->tx_tail;
1645 	txe = &sw_ring[tx_id];
1646 
1647 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1648 		tx_pkt = *tx_pkts++;
1649 		pkt_len = tx_pkt->pkt_len;
1650 
1651 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
1652 
1653 		/*
1654 		 * The number of descriptors that must be allocated for a
1655 		 * packet is the number of segments of that packet, plus 1
1656 		 * Context Descriptor for the VLAN Tag Identifier, if any.
1657 		 * Determine the last TX descriptor to allocate in the TX ring
1658 		 * for the packet, starting from the current position (tx_id)
1659 		 * in the ring.
1660 		 */
1661 		tx_last = (uint16_t)(tx_id + tx_pkt->nb_segs - 1);
1662 
1663 		ol_flags = tx_pkt->ol_flags;
1664 		tx_ol_req = ol_flags & IGC_TX_OFFLOAD_MASK;
1665 
1666 		/* If a Context Descriptor need be built . */
1667 		if (tx_ol_req) {
1668 			tx_offload.l2_len = tx_pkt->l2_len;
1669 			tx_offload.l3_len = tx_pkt->l3_len;
1670 			tx_offload.l4_len = tx_pkt->l4_len;
1671 			tx_offload.vlan_tci = tx_pkt->vlan_tci;
1672 			tx_offload.tso_segsz = tx_pkt->tso_segsz;
1673 			tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
1674 
1675 			new_ctx = what_advctx_update(txq, tx_ol_req,
1676 					tx_offload);
1677 			/* Only allocate context descriptor if required*/
1678 			new_ctx = (new_ctx >= IGC_CTX_NUM);
1679 			tx_last = (uint16_t)(tx_last + new_ctx);
1680 		}
1681 		if (tx_last >= txq->nb_tx_desc)
1682 			tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
1683 
1684 		PMD_TX_LOG(DEBUG,
1685 			"port_id=%u queue_id=%u pktlen=%u tx_first=%u tx_last=%u",
1686 			txq->port_id, txq->queue_id, pkt_len, tx_id, tx_last);
1687 
1688 		/*
1689 		 * Check if there are enough free descriptors in the TX ring
1690 		 * to transmit the next packet.
1691 		 * This operation is based on the two following rules:
1692 		 *
1693 		 *   1- Only check that the last needed TX descriptor can be
1694 		 *      allocated (by construction, if that descriptor is free,
1695 		 *      all intermediate ones are also free).
1696 		 *
1697 		 *      For this purpose, the index of the last TX descriptor
1698 		 *      used for a packet (the "last descriptor" of a packet)
1699 		 *      is recorded in the TX entries (the last one included)
1700 		 *      that are associated with all TX descriptors allocated
1701 		 *      for that packet.
1702 		 *
1703 		 *   2- Avoid to allocate the last free TX descriptor of the
1704 		 *      ring, in order to never set the TDT register with the
1705 		 *      same value stored in parallel by the NIC in the TDH
1706 		 *      register, which makes the TX engine of the NIC enter
1707 		 *      in a deadlock situation.
1708 		 *
1709 		 *      By extension, avoid to allocate a free descriptor that
1710 		 *      belongs to the last set of free descriptors allocated
1711 		 *      to the same packet previously transmitted.
1712 		 */
1713 
1714 		/*
1715 		 * The "last descriptor" of the previously sent packet, if any,
1716 		 * which used the last descriptor to allocate.
1717 		 */
1718 		tx_end = sw_ring[tx_last].last_id;
1719 
1720 		/*
1721 		 * The next descriptor following that "last descriptor" in the
1722 		 * ring.
1723 		 */
1724 		tx_end = sw_ring[tx_end].next_id;
1725 
1726 		/*
1727 		 * The "last descriptor" associated with that next descriptor.
1728 		 */
1729 		tx_end = sw_ring[tx_end].last_id;
1730 
1731 		/*
1732 		 * Check that this descriptor is free.
1733 		 */
1734 		if (!(txr[tx_end].wb.status & IGC_TXD_STAT_DD)) {
1735 			if (nb_tx == 0)
1736 				return 0;
1737 			goto end_of_tx;
1738 		}
1739 
1740 		/*
1741 		 * Set common flags of all TX Data Descriptors.
1742 		 *
1743 		 * The following bits must be set in all Data Descriptors:
1744 		 *   - IGC_ADVTXD_DTYP_DATA
1745 		 *   - IGC_ADVTXD_DCMD_DEXT
1746 		 *
1747 		 * The following bits must be set in the first Data Descriptor
1748 		 * and are ignored in the other ones:
1749 		 *   - IGC_ADVTXD_DCMD_IFCS
1750 		 *   - IGC_ADVTXD_MAC_1588
1751 		 *   - IGC_ADVTXD_DCMD_VLE
1752 		 *
1753 		 * The following bits must only be set in the last Data
1754 		 * Descriptor:
1755 		 *   - IGC_TXD_CMD_EOP
1756 		 *
1757 		 * The following bits can be set in any Data Descriptor, but
1758 		 * are only set in the last Data Descriptor:
1759 		 *   - IGC_TXD_CMD_RS
1760 		 */
1761 		cmd_type_len = txq->txd_type |
1762 			IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT;
1763 		if (tx_ol_req & IGC_TX_OFFLOAD_SEG)
1764 			pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len +
1765 					tx_pkt->l4_len);
1766 		olinfo_status = (pkt_len << IGC_ADVTXD_PAYLEN_SHIFT);
1767 
1768 		/*
1769 		 * Timer 0 should be used to for packet timestamping,
1770 		 * sample the packet timestamp to reg 0
1771 		 */
1772 		if (ol_flags & PKT_TX_IEEE1588_TMST)
1773 			cmd_type_len |= IGC_ADVTXD_MAC_TSTAMP;
1774 
1775 		if (tx_ol_req) {
1776 			/* Setup TX Advanced context descriptor if required */
1777 			if (new_ctx) {
1778 				volatile struct igc_adv_tx_context_desc *
1779 					ctx_txd = (volatile struct
1780 					igc_adv_tx_context_desc *)&txr[tx_id];
1781 
1782 				txn = &sw_ring[txe->next_id];
1783 				RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1784 
1785 				if (txe->mbuf != NULL) {
1786 					rte_pktmbuf_free_seg(txe->mbuf);
1787 					txe->mbuf = NULL;
1788 				}
1789 
1790 				igc_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
1791 						tx_offload);
1792 
1793 				txe->last_id = tx_last;
1794 				tx_id = txe->next_id;
1795 				txe = txn;
1796 			}
1797 
1798 			/* Setup the TX Advanced Data Descriptor */
1799 			cmd_type_len |=
1800 				tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
1801 			olinfo_status |=
1802 				tx_desc_cksum_flags_to_olinfo(tx_ol_req);
1803 			olinfo_status |= (uint32_t)txq->ctx_curr <<
1804 					IGC_ADVTXD_IDX_SHIFT;
1805 		}
1806 
1807 		m_seg = tx_pkt;
1808 		do {
1809 			txn = &sw_ring[txe->next_id];
1810 			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1811 
1812 			txd = &txr[tx_id];
1813 
1814 			if (txe->mbuf != NULL)
1815 				rte_pktmbuf_free_seg(txe->mbuf);
1816 			txe->mbuf = m_seg;
1817 
1818 			/* Set up transmit descriptor */
1819 			slen = (uint16_t)m_seg->data_len;
1820 			buf_dma_addr = rte_mbuf_data_iova(m_seg);
1821 			txd->read.buffer_addr =
1822 				rte_cpu_to_le_64(buf_dma_addr);
1823 			txd->read.cmd_type_len =
1824 				rte_cpu_to_le_32(cmd_type_len | slen);
1825 			txd->read.olinfo_status =
1826 				rte_cpu_to_le_32(olinfo_status);
1827 			txe->last_id = tx_last;
1828 			tx_id = txe->next_id;
1829 			txe = txn;
1830 			m_seg = m_seg->next;
1831 		} while (m_seg != NULL);
1832 
1833 		/*
1834 		 * The last packet data descriptor needs End Of Packet (EOP)
1835 		 * and Report Status (RS).
1836 		 */
1837 		txd->read.cmd_type_len |=
1838 			rte_cpu_to_le_32(IGC_TXD_CMD_EOP | IGC_TXD_CMD_RS);
1839 	}
1840 end_of_tx:
1841 	rte_wmb();
1842 
1843 	/*
1844 	 * Set the Transmit Descriptor Tail (TDT).
1845 	 */
1846 	IGC_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
1847 	PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
1848 		txq->port_id, txq->queue_id, tx_id, nb_tx);
1849 	txq->tx_tail = tx_id;
1850 
1851 	return nb_tx;
1852 }
1853 
eth_igc_tx_descriptor_status(void * tx_queue,uint16_t offset)1854 int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset)
1855 {
1856 	struct igc_tx_queue *txq = tx_queue;
1857 	volatile uint32_t *status;
1858 	uint32_t desc;
1859 
1860 	if (unlikely(!txq || offset >= txq->nb_tx_desc))
1861 		return -EINVAL;
1862 
1863 	desc = txq->tx_tail + offset;
1864 	if (desc >= txq->nb_tx_desc)
1865 		desc -= txq->nb_tx_desc;
1866 
1867 	status = &txq->tx_ring[desc].wb.status;
1868 	if (*status & rte_cpu_to_le_32(IGC_TXD_STAT_DD))
1869 		return RTE_ETH_TX_DESC_DONE;
1870 
1871 	return RTE_ETH_TX_DESC_FULL;
1872 }
1873 
1874 static void
igc_tx_queue_release_mbufs(struct igc_tx_queue * txq)1875 igc_tx_queue_release_mbufs(struct igc_tx_queue *txq)
1876 {
1877 	unsigned int i;
1878 
1879 	if (txq->sw_ring != NULL) {
1880 		for (i = 0; i < txq->nb_tx_desc; i++) {
1881 			if (txq->sw_ring[i].mbuf != NULL) {
1882 				rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1883 				txq->sw_ring[i].mbuf = NULL;
1884 			}
1885 		}
1886 	}
1887 }
1888 
1889 static void
igc_tx_queue_release(struct igc_tx_queue * txq)1890 igc_tx_queue_release(struct igc_tx_queue *txq)
1891 {
1892 	igc_tx_queue_release_mbufs(txq);
1893 	rte_free(txq->sw_ring);
1894 	rte_free(txq);
1895 }
1896 
eth_igc_tx_queue_release(void * txq)1897 void eth_igc_tx_queue_release(void *txq)
1898 {
1899 	if (txq)
1900 		igc_tx_queue_release(txq);
1901 }
1902 
1903 static void
igc_reset_tx_queue_stat(struct igc_tx_queue * txq)1904 igc_reset_tx_queue_stat(struct igc_tx_queue *txq)
1905 {
1906 	txq->tx_head = 0;
1907 	txq->tx_tail = 0;
1908 	txq->ctx_curr = 0;
1909 	memset((void *)&txq->ctx_cache, 0,
1910 		IGC_CTX_NUM * sizeof(struct igc_advctx_info));
1911 }
1912 
1913 static void
igc_reset_tx_queue(struct igc_tx_queue * txq)1914 igc_reset_tx_queue(struct igc_tx_queue *txq)
1915 {
1916 	struct igc_tx_entry *txe = txq->sw_ring;
1917 	uint16_t i, prev;
1918 
1919 	/* Initialize ring entries */
1920 	prev = (uint16_t)(txq->nb_tx_desc - 1);
1921 	for (i = 0; i < txq->nb_tx_desc; i++) {
1922 		volatile union igc_adv_tx_desc *txd = &txq->tx_ring[i];
1923 
1924 		txd->wb.status = IGC_TXD_STAT_DD;
1925 		txe[i].mbuf = NULL;
1926 		txe[i].last_id = i;
1927 		txe[prev].next_id = i;
1928 		prev = i;
1929 	}
1930 
1931 	txq->txd_type = IGC_ADVTXD_DTYP_DATA;
1932 	igc_reset_tx_queue_stat(txq);
1933 }
1934 
1935 /*
1936  * clear all rx/tx queue
1937  */
1938 void
igc_dev_clear_queues(struct rte_eth_dev * dev)1939 igc_dev_clear_queues(struct rte_eth_dev *dev)
1940 {
1941 	uint16_t i;
1942 	struct igc_tx_queue *txq;
1943 	struct igc_rx_queue *rxq;
1944 
1945 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1946 		txq = dev->data->tx_queues[i];
1947 		if (txq != NULL) {
1948 			igc_tx_queue_release_mbufs(txq);
1949 			igc_reset_tx_queue(txq);
1950 		}
1951 	}
1952 
1953 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1954 		rxq = dev->data->rx_queues[i];
1955 		if (rxq != NULL) {
1956 			igc_rx_queue_release_mbufs(rxq);
1957 			igc_reset_rx_queue(rxq);
1958 		}
1959 	}
1960 }
1961 
eth_igc_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf)1962 int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
1963 		uint16_t nb_desc, unsigned int socket_id,
1964 		const struct rte_eth_txconf *tx_conf)
1965 {
1966 	const struct rte_memzone *tz;
1967 	struct igc_tx_queue *txq;
1968 	struct igc_hw *hw;
1969 	uint32_t size;
1970 
1971 	if (nb_desc % IGC_TX_DESCRIPTOR_MULTIPLE != 0 ||
1972 		nb_desc > IGC_MAX_TXD || nb_desc < IGC_MIN_TXD) {
1973 		PMD_DRV_LOG(ERR,
1974 			"TX-descriptor must be a multiple of %u and between %u and %u, cur: %u",
1975 			IGC_TX_DESCRIPTOR_MULTIPLE,
1976 			IGC_MAX_TXD, IGC_MIN_TXD, nb_desc);
1977 		return -EINVAL;
1978 	}
1979 
1980 	hw = IGC_DEV_PRIVATE_HW(dev);
1981 
1982 	/*
1983 	 * The tx_free_thresh and tx_rs_thresh values are not used in the 2.5G
1984 	 * driver.
1985 	 */
1986 	if (tx_conf->tx_free_thresh != 0)
1987 		PMD_DRV_LOG(INFO,
1988 			"The tx_free_thresh parameter is not used for the 2.5G driver");
1989 	if (tx_conf->tx_rs_thresh != 0)
1990 		PMD_DRV_LOG(INFO,
1991 			"The tx_rs_thresh parameter is not used for the 2.5G driver");
1992 	if (tx_conf->tx_thresh.wthresh == 0)
1993 		PMD_DRV_LOG(INFO,
1994 			"To improve 2.5G driver performance, consider setting the TX WTHRESH value to 4, 8, or 16.");
1995 
1996 	/* Free memory prior to re-allocation if needed */
1997 	if (dev->data->tx_queues[queue_idx] != NULL) {
1998 		igc_tx_queue_release(dev->data->tx_queues[queue_idx]);
1999 		dev->data->tx_queues[queue_idx] = NULL;
2000 	}
2001 
2002 	/* First allocate the tx queue data structure */
2003 	txq = rte_zmalloc("ethdev TX queue", sizeof(struct igc_tx_queue),
2004 						RTE_CACHE_LINE_SIZE);
2005 	if (txq == NULL)
2006 		return -ENOMEM;
2007 
2008 	/*
2009 	 * Allocate TX ring hardware descriptors. A memzone large enough to
2010 	 * handle the maximum ring size is allocated in order to allow for
2011 	 * resizing in later calls to the queue setup function.
2012 	 */
2013 	size = sizeof(union igc_adv_tx_desc) * IGC_MAX_TXD;
2014 	tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
2015 				      IGC_ALIGN, socket_id);
2016 	if (tz == NULL) {
2017 		igc_tx_queue_release(txq);
2018 		return -ENOMEM;
2019 	}
2020 
2021 	txq->nb_tx_desc = nb_desc;
2022 	txq->pthresh = tx_conf->tx_thresh.pthresh;
2023 	txq->hthresh = tx_conf->tx_thresh.hthresh;
2024 	txq->wthresh = tx_conf->tx_thresh.wthresh;
2025 
2026 	txq->queue_id = queue_idx;
2027 	txq->reg_idx = queue_idx;
2028 	txq->port_id = dev->data->port_id;
2029 
2030 	txq->tdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_TDT(txq->reg_idx));
2031 	txq->tx_ring_phys_addr = tz->iova;
2032 
2033 	txq->tx_ring = (union igc_adv_tx_desc *)tz->addr;
2034 	/* Allocate software ring */
2035 	txq->sw_ring = rte_zmalloc("txq->sw_ring",
2036 				   sizeof(struct igc_tx_entry) * nb_desc,
2037 				   RTE_CACHE_LINE_SIZE);
2038 	if (txq->sw_ring == NULL) {
2039 		igc_tx_queue_release(txq);
2040 		return -ENOMEM;
2041 	}
2042 	PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2043 		txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2044 
2045 	igc_reset_tx_queue(txq);
2046 	dev->tx_pkt_burst = igc_xmit_pkts;
2047 	dev->tx_pkt_prepare = &eth_igc_prep_pkts;
2048 	dev->data->tx_queues[queue_idx] = txq;
2049 	txq->offloads = tx_conf->offloads;
2050 
2051 	return 0;
2052 }
2053 
2054 int
eth_igc_tx_done_cleanup(void * txqueue,uint32_t free_cnt)2055 eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt)
2056 {
2057 	struct igc_tx_queue *txq = txqueue;
2058 	struct igc_tx_entry *sw_ring;
2059 	volatile union igc_adv_tx_desc *txr;
2060 	uint16_t tx_first; /* First segment analyzed. */
2061 	uint16_t tx_id;    /* Current segment being processed. */
2062 	uint16_t tx_last;  /* Last segment in the current packet. */
2063 	uint16_t tx_next;  /* First segment of the next packet. */
2064 	uint32_t count;
2065 
2066 	if (txq == NULL)
2067 		return -ENODEV;
2068 
2069 	count = 0;
2070 	sw_ring = txq->sw_ring;
2071 	txr = txq->tx_ring;
2072 
2073 	/*
2074 	 * tx_tail is the last sent packet on the sw_ring. Goto the end
2075 	 * of that packet (the last segment in the packet chain) and
2076 	 * then the next segment will be the start of the oldest segment
2077 	 * in the sw_ring. This is the first packet that will be
2078 	 * attempted to be freed.
2079 	 */
2080 
2081 	/* Get last segment in most recently added packet. */
2082 	tx_first = sw_ring[txq->tx_tail].last_id;
2083 
2084 	/* Get the next segment, which is the oldest segment in ring. */
2085 	tx_first = sw_ring[tx_first].next_id;
2086 
2087 	/* Set the current index to the first. */
2088 	tx_id = tx_first;
2089 
2090 	/*
2091 	 * Loop through each packet. For each packet, verify that an
2092 	 * mbuf exists and that the last segment is free. If so, free
2093 	 * it and move on.
2094 	 */
2095 	while (1) {
2096 		tx_last = sw_ring[tx_id].last_id;
2097 
2098 		if (sw_ring[tx_last].mbuf) {
2099 			if (!(txr[tx_last].wb.status &
2100 					rte_cpu_to_le_32(IGC_TXD_STAT_DD)))
2101 				break;
2102 
2103 			/* Get the start of the next packet. */
2104 			tx_next = sw_ring[tx_last].next_id;
2105 
2106 			/*
2107 			 * Loop through all segments in a
2108 			 * packet.
2109 			 */
2110 			do {
2111 				rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
2112 				sw_ring[tx_id].mbuf = NULL;
2113 				sw_ring[tx_id].last_id = tx_id;
2114 
2115 				/* Move to next segemnt. */
2116 				tx_id = sw_ring[tx_id].next_id;
2117 			} while (tx_id != tx_next);
2118 
2119 			/*
2120 			 * Increment the number of packets
2121 			 * freed.
2122 			 */
2123 			count++;
2124 			if (unlikely(count == free_cnt))
2125 				break;
2126 		} else {
2127 			/*
2128 			 * There are multiple reasons to be here:
2129 			 * 1) All the packets on the ring have been
2130 			 *    freed - tx_id is equal to tx_first
2131 			 *    and some packets have been freed.
2132 			 *    - Done, exit
2133 			 * 2) Interfaces has not sent a rings worth of
2134 			 *    packets yet, so the segment after tail is
2135 			 *    still empty. Or a previous call to this
2136 			 *    function freed some of the segments but
2137 			 *    not all so there is a hole in the list.
2138 			 *    Hopefully this is a rare case.
2139 			 *    - Walk the list and find the next mbuf. If
2140 			 *      there isn't one, then done.
2141 			 */
2142 			if (likely(tx_id == tx_first && count != 0))
2143 				break;
2144 
2145 			/*
2146 			 * Walk the list and find the next mbuf, if any.
2147 			 */
2148 			do {
2149 				/* Move to next segemnt. */
2150 				tx_id = sw_ring[tx_id].next_id;
2151 
2152 				if (sw_ring[tx_id].mbuf)
2153 					break;
2154 
2155 			} while (tx_id != tx_first);
2156 
2157 			/*
2158 			 * Determine why previous loop bailed. If there
2159 			 * is not an mbuf, done.
2160 			 */
2161 			if (sw_ring[tx_id].mbuf == NULL)
2162 				break;
2163 		}
2164 	}
2165 
2166 	return count;
2167 }
2168 
2169 void
igc_tx_init(struct rte_eth_dev * dev)2170 igc_tx_init(struct rte_eth_dev *dev)
2171 {
2172 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2173 	uint32_t tctl;
2174 	uint32_t txdctl;
2175 	uint16_t i;
2176 
2177 	/* Setup the Base and Length of the Tx Descriptor Rings. */
2178 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2179 		struct igc_tx_queue *txq = dev->data->tx_queues[i];
2180 		uint64_t bus_addr = txq->tx_ring_phys_addr;
2181 
2182 		IGC_WRITE_REG(hw, IGC_TDLEN(txq->reg_idx),
2183 				txq->nb_tx_desc *
2184 				sizeof(union igc_adv_tx_desc));
2185 		IGC_WRITE_REG(hw, IGC_TDBAH(txq->reg_idx),
2186 				(uint32_t)(bus_addr >> 32));
2187 		IGC_WRITE_REG(hw, IGC_TDBAL(txq->reg_idx),
2188 				(uint32_t)bus_addr);
2189 
2190 		/* Setup the HW Tx Head and Tail descriptor pointers. */
2191 		IGC_WRITE_REG(hw, IGC_TDT(txq->reg_idx), 0);
2192 		IGC_WRITE_REG(hw, IGC_TDH(txq->reg_idx), 0);
2193 
2194 		/* Setup Transmit threshold registers. */
2195 		txdctl = ((uint32_t)txq->pthresh << IGC_TXDCTL_PTHRESH_SHIFT) &
2196 				IGC_TXDCTL_PTHRESH_MSK;
2197 		txdctl |= ((uint32_t)txq->hthresh << IGC_TXDCTL_HTHRESH_SHIFT) &
2198 				IGC_TXDCTL_HTHRESH_MSK;
2199 		txdctl |= ((uint32_t)txq->wthresh << IGC_TXDCTL_WTHRESH_SHIFT) &
2200 				IGC_TXDCTL_WTHRESH_MSK;
2201 		txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
2202 		IGC_WRITE_REG(hw, IGC_TXDCTL(txq->reg_idx), txdctl);
2203 	}
2204 
2205 	igc_config_collision_dist(hw);
2206 
2207 	/* Program the Transmit Control Register. */
2208 	tctl = IGC_READ_REG(hw, IGC_TCTL);
2209 	tctl &= ~IGC_TCTL_CT;
2210 	tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
2211 		 ((uint32_t)IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT));
2212 
2213 	/* This write will effectively turn on the transmit unit. */
2214 	IGC_WRITE_REG(hw, IGC_TCTL, tctl);
2215 }
2216 
2217 void
eth_igc_rxq_info_get(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_rxq_info * qinfo)2218 eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2219 	struct rte_eth_rxq_info *qinfo)
2220 {
2221 	struct igc_rx_queue *rxq;
2222 
2223 	rxq = dev->data->rx_queues[queue_id];
2224 
2225 	qinfo->mp = rxq->mb_pool;
2226 	qinfo->scattered_rx = dev->data->scattered_rx;
2227 	qinfo->nb_desc = rxq->nb_rx_desc;
2228 
2229 	qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2230 	qinfo->conf.rx_drop_en = rxq->drop_en;
2231 	qinfo->conf.offloads = rxq->offloads;
2232 	qinfo->conf.rx_thresh.hthresh = rxq->hthresh;
2233 	qinfo->conf.rx_thresh.pthresh = rxq->pthresh;
2234 	qinfo->conf.rx_thresh.wthresh = rxq->wthresh;
2235 }
2236 
2237 void
eth_igc_txq_info_get(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_txq_info * qinfo)2238 eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2239 	struct rte_eth_txq_info *qinfo)
2240 {
2241 	struct igc_tx_queue *txq;
2242 
2243 	txq = dev->data->tx_queues[queue_id];
2244 
2245 	qinfo->nb_desc = txq->nb_tx_desc;
2246 
2247 	qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2248 	qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2249 	qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2250 	qinfo->conf.offloads = txq->offloads;
2251 }
2252 
2253 void
eth_igc_vlan_strip_queue_set(struct rte_eth_dev * dev,uint16_t rx_queue_id,int on)2254 eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev,
2255 			uint16_t rx_queue_id, int on)
2256 {
2257 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2258 	struct igc_rx_queue *rxq = dev->data->rx_queues[rx_queue_id];
2259 	uint32_t reg_val;
2260 
2261 	if (rx_queue_id >= IGC_QUEUE_PAIRS_NUM) {
2262 		PMD_DRV_LOG(ERR, "Queue index(%u) illegal, max is %u",
2263 			rx_queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2264 		return;
2265 	}
2266 
2267 	reg_val = IGC_READ_REG(hw, IGC_DVMOLR(rx_queue_id));
2268 	if (on) {
2269 		/* If vlan been stripped off, the CRC is meaningless. */
2270 		reg_val |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC;
2271 		rxq->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2272 	} else {
2273 		reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN |
2274 				IGC_DVMOLR_STRCRC);
2275 		rxq->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
2276 	}
2277 
2278 	IGC_WRITE_REG(hw, IGC_DVMOLR(rx_queue_id), reg_val);
2279 }
2280