xref: /dpdk/drivers/net/igc/igc_txrx.c (revision 7be78d02)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 
5 #include <rte_config.h>
6 #include <rte_flow.h>
7 #include <rte_malloc.h>
8 #include <ethdev_driver.h>
9 #include <rte_net.h>
10 
11 #include "igc_logs.h"
12 #include "igc_txrx.h"
13 
14 #ifdef RTE_PMD_USE_PREFETCH
15 #define rte_igc_prefetch(p)		rte_prefetch0(p)
16 #else
17 #define rte_igc_prefetch(p)		do {} while (0)
18 #endif
19 
20 #ifdef RTE_PMD_PACKET_PREFETCH
21 #define rte_packet_prefetch(p)		rte_prefetch1(p)
22 #else
23 #define rte_packet_prefetch(p)		do {} while (0)
24 #endif
25 
26 /* Multicast / Unicast table offset mask. */
27 #define IGC_RCTL_MO_MSK			(3u << IGC_RCTL_MO_SHIFT)
28 
29 /* Loopback mode. */
30 #define IGC_RCTL_LBM_SHIFT		6
31 #define IGC_RCTL_LBM_MSK		(3u << IGC_RCTL_LBM_SHIFT)
32 
33 /* Hash select for MTA */
34 #define IGC_RCTL_HSEL_SHIFT		8
35 #define IGC_RCTL_HSEL_MSK		(3u << IGC_RCTL_HSEL_SHIFT)
36 #define IGC_RCTL_PSP			(1u << 21)
37 
38 /* Receive buffer size for header buffer */
39 #define IGC_SRRCTL_BSIZEHEADER_SHIFT	8
40 
41 /* RX descriptor status and error flags */
42 #define IGC_RXD_STAT_L4CS		(1u << 5)
43 #define IGC_RXD_STAT_VEXT		(1u << 9)
44 #define IGC_RXD_STAT_LLINT		(1u << 11)
45 #define IGC_RXD_STAT_SCRC		(1u << 12)
46 #define IGC_RXD_STAT_SMDT_MASK		(3u << 13)
47 #define IGC_RXD_STAT_MC			(1u << 19)
48 #define IGC_RXD_EXT_ERR_L4E		(1u << 29)
49 #define IGC_RXD_EXT_ERR_IPE		(1u << 30)
50 #define IGC_RXD_EXT_ERR_RXE		(1u << 31)
51 #define IGC_RXD_RSS_TYPE_MASK		0xfu
52 #define IGC_RXD_PCTYPE_MASK		(0x7fu << 4)
53 #define IGC_RXD_ETQF_SHIFT		12
54 #define IGC_RXD_ETQF_MSK		(0xfu << IGC_RXD_ETQF_SHIFT)
55 #define IGC_RXD_VPKT			(1u << 16)
56 
57 /* TXD control bits */
58 #define IGC_TXDCTL_PTHRESH_SHIFT	0
59 #define IGC_TXDCTL_HTHRESH_SHIFT	8
60 #define IGC_TXDCTL_WTHRESH_SHIFT	16
61 #define IGC_TXDCTL_PTHRESH_MSK		(0x1fu << IGC_TXDCTL_PTHRESH_SHIFT)
62 #define IGC_TXDCTL_HTHRESH_MSK		(0x1fu << IGC_TXDCTL_HTHRESH_SHIFT)
63 #define IGC_TXDCTL_WTHRESH_MSK		(0x1fu << IGC_TXDCTL_WTHRESH_SHIFT)
64 
65 /* RXD control bits */
66 #define IGC_RXDCTL_PTHRESH_SHIFT	0
67 #define IGC_RXDCTL_HTHRESH_SHIFT	8
68 #define IGC_RXDCTL_WTHRESH_SHIFT	16
69 #define IGC_RXDCTL_PTHRESH_MSK		(0x1fu << IGC_RXDCTL_PTHRESH_SHIFT)
70 #define IGC_RXDCTL_HTHRESH_MSK		(0x1fu << IGC_RXDCTL_HTHRESH_SHIFT)
71 #define IGC_RXDCTL_WTHRESH_MSK		(0x1fu << IGC_RXDCTL_WTHRESH_SHIFT)
72 
73 #define IGC_TSO_MAX_HDRLEN		512
74 #define IGC_TSO_MAX_MSS			9216
75 
76 /* Bit Mask to indicate what bits required for building TX context */
77 #define IGC_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_OUTER_IPV4 |	\
78 		RTE_MBUF_F_TX_IPV6 |		\
79 		RTE_MBUF_F_TX_IPV4 |		\
80 		RTE_MBUF_F_TX_VLAN |	\
81 		RTE_MBUF_F_TX_IP_CKSUM |	\
82 		RTE_MBUF_F_TX_L4_MASK |	\
83 		RTE_MBUF_F_TX_TCP_SEG |	\
84 		RTE_MBUF_F_TX_UDP_SEG)
85 
86 #define IGC_TX_OFFLOAD_SEG	(RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)
87 
88 #define IGC_ADVTXD_POPTS_TXSM	0x00000200 /* L4 Checksum offload request */
89 #define IGC_ADVTXD_POPTS_IXSM	0x00000100 /* IP Checksum offload request */
90 
91 /* L4 Packet TYPE of Reserved */
92 #define IGC_ADVTXD_TUCMD_L4T_RSV	0x00001800
93 
94 #define IGC_TX_OFFLOAD_NOTSUP_MASK (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK)
95 
96 /**
97  * Structure associated with each descriptor of the RX ring of a RX queue.
98  */
99 struct igc_rx_entry {
100 	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
101 };
102 
103 /**
104  * Structure associated with each RX queue.
105  */
106 struct igc_rx_queue {
107 	struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
108 	volatile union igc_adv_rx_desc *rx_ring;
109 	/**< RX ring virtual address. */
110 	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
111 	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
112 	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
113 	struct igc_rx_entry *sw_ring;   /**< address of RX software ring. */
114 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
115 	struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
116 	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
117 	uint16_t            rx_tail;    /**< current value of RDT register. */
118 	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
119 	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
120 	uint16_t            queue_id;   /**< RX queue index. */
121 	uint16_t            reg_idx;    /**< RX queue register index. */
122 	uint16_t            port_id;    /**< Device port identifier. */
123 	uint8_t             pthresh;    /**< Prefetch threshold register. */
124 	uint8_t             hthresh;    /**< Host threshold register. */
125 	uint8_t             wthresh;    /**< Write-back threshold register. */
126 	uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
127 	uint8_t             drop_en;	/**< If not 0, set SRRCTL.Drop_En. */
128 	uint32_t            flags;      /**< RX flags. */
129 	uint64_t	    offloads;   /**< offloads of RTE_ETH_RX_OFFLOAD_* */
130 };
131 
132 /** Offload features */
133 union igc_tx_offload {
134 	uint64_t data;
135 	struct {
136 		uint64_t l3_len:9; /**< L3 (IP) Header Length. */
137 		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
138 		uint64_t vlan_tci:16;
139 		/**< VLAN Tag Control Identifier(CPU order). */
140 		uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
141 		uint64_t tso_segsz:16; /**< TCP TSO segment size. */
142 		/* uint64_t unused:8; */
143 	};
144 };
145 
146 /*
147  * Compare mask for igc_tx_offload.data,
148  * should be in sync with igc_tx_offload layout.
149  */
150 #define TX_MACIP_LEN_CMP_MASK	0x000000000000FFFFULL /**< L2L3 header mask. */
151 #define TX_VLAN_CMP_MASK	0x00000000FFFF0000ULL /**< Vlan mask. */
152 #define TX_TCP_LEN_CMP_MASK	0x000000FF00000000ULL /**< TCP header mask. */
153 #define TX_TSO_MSS_CMP_MASK	0x00FFFF0000000000ULL /**< TSO segsz mask. */
154 /** Mac + IP + TCP + Mss mask. */
155 #define TX_TSO_CMP_MASK	\
156 	(TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
157 
158 /**
159  * Structure to check if new context need be built
160  */
161 struct igc_advctx_info {
162 	uint64_t flags;           /**< ol_flags related to context build. */
163 	/** tx offload: vlan, tso, l2-l3-l4 lengths. */
164 	union igc_tx_offload tx_offload;
165 	/** compare mask for tx offload. */
166 	union igc_tx_offload tx_offload_mask;
167 };
168 
169 /**
170  * Hardware context number
171  */
172 enum {
173 	IGC_CTX_0    = 0, /**< CTX0    */
174 	IGC_CTX_1    = 1, /**< CTX1    */
175 	IGC_CTX_NUM  = 2, /**< CTX_NUM */
176 };
177 
178 /**
179  * Structure associated with each descriptor of the TX ring of a TX queue.
180  */
181 struct igc_tx_entry {
182 	struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
183 	uint16_t next_id; /**< Index of next descriptor in ring. */
184 	uint16_t last_id; /**< Index of last scattered descriptor. */
185 };
186 
187 /**
188  * Structure associated with each TX queue.
189  */
190 struct igc_tx_queue {
191 	volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */
192 	uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
193 	struct igc_tx_entry    *sw_ring; /**< virtual address of SW ring. */
194 	volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
195 	uint32_t               txd_type;      /**< Device-specific TXD type */
196 	uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
197 	uint16_t               tx_tail;  /**< Current value of TDT register. */
198 	uint16_t               tx_head;
199 	/**< Index of first used TX descriptor. */
200 	uint16_t               queue_id; /**< TX queue index. */
201 	uint16_t               reg_idx;  /**< TX queue register index. */
202 	uint16_t               port_id;  /**< Device port identifier. */
203 	uint8_t                pthresh;  /**< Prefetch threshold register. */
204 	uint8_t                hthresh;  /**< Host threshold register. */
205 	uint8_t                wthresh;  /**< Write-back threshold register. */
206 	uint8_t                ctx_curr;
207 
208 	/**< Start context position for transmit queue. */
209 	struct igc_advctx_info ctx_cache[IGC_CTX_NUM];
210 	/**< Hardware context history.*/
211 	uint64_t	       offloads; /**< offloads of RTE_ETH_TX_OFFLOAD_* */
212 };
213 
214 static inline uint64_t
rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)215 rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)
216 {
217 	static uint64_t l4_chksum_flags[] = {0, 0,
218 			RTE_MBUF_F_RX_L4_CKSUM_GOOD,
219 			RTE_MBUF_F_RX_L4_CKSUM_BAD};
220 
221 	static uint64_t l3_chksum_flags[] = {0, 0,
222 			RTE_MBUF_F_RX_IP_CKSUM_GOOD,
223 			RTE_MBUF_F_RX_IP_CKSUM_BAD};
224 	uint64_t pkt_flags = 0;
225 	uint32_t tmp;
226 
227 	if (statuserr & IGC_RXD_STAT_VP)
228 		pkt_flags |= RTE_MBUF_F_RX_VLAN_STRIPPED;
229 
230 	tmp = !!(statuserr & (IGC_RXD_STAT_L4CS | IGC_RXD_STAT_UDPCS));
231 	tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_L4E);
232 	pkt_flags |= l4_chksum_flags[tmp];
233 
234 	tmp = !!(statuserr & IGC_RXD_STAT_IPCS);
235 	tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_IPE);
236 	pkt_flags |= l3_chksum_flags[tmp];
237 
238 	return pkt_flags;
239 }
240 
241 #define IGC_PACKET_TYPE_IPV4              0X01
242 #define IGC_PACKET_TYPE_IPV4_TCP          0X11
243 #define IGC_PACKET_TYPE_IPV4_UDP          0X21
244 #define IGC_PACKET_TYPE_IPV4_SCTP         0X41
245 #define IGC_PACKET_TYPE_IPV4_EXT          0X03
246 #define IGC_PACKET_TYPE_IPV4_EXT_SCTP     0X43
247 #define IGC_PACKET_TYPE_IPV6              0X04
248 #define IGC_PACKET_TYPE_IPV6_TCP          0X14
249 #define IGC_PACKET_TYPE_IPV6_UDP          0X24
250 #define IGC_PACKET_TYPE_IPV6_EXT          0X0C
251 #define IGC_PACKET_TYPE_IPV6_EXT_TCP      0X1C
252 #define IGC_PACKET_TYPE_IPV6_EXT_UDP      0X2C
253 #define IGC_PACKET_TYPE_IPV4_IPV6         0X05
254 #define IGC_PACKET_TYPE_IPV4_IPV6_TCP     0X15
255 #define IGC_PACKET_TYPE_IPV4_IPV6_UDP     0X25
256 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
257 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
258 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
259 #define IGC_PACKET_TYPE_MAX               0X80
260 #define IGC_PACKET_TYPE_MASK              0X7F
261 #define IGC_PACKET_TYPE_SHIFT             0X04
262 
263 static inline uint32_t
rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)264 rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)
265 {
266 	static const uint32_t
267 		ptype_table[IGC_PACKET_TYPE_MAX] __rte_cache_aligned = {
268 		[IGC_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
269 			RTE_PTYPE_L3_IPV4,
270 		[IGC_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
271 			RTE_PTYPE_L3_IPV4_EXT,
272 		[IGC_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
273 			RTE_PTYPE_L3_IPV6,
274 		[IGC_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
275 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
276 			RTE_PTYPE_INNER_L3_IPV6,
277 		[IGC_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
278 			RTE_PTYPE_L3_IPV6_EXT,
279 		[IGC_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
280 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
281 			RTE_PTYPE_INNER_L3_IPV6_EXT,
282 		[IGC_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
283 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
284 		[IGC_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
285 			RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
286 		[IGC_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
287 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
288 			RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
289 		[IGC_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
290 			RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
291 		[IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
292 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
293 			RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
294 		[IGC_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
295 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
296 		[IGC_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
297 			RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
298 		[IGC_PACKET_TYPE_IPV4_IPV6_UDP] =  RTE_PTYPE_L2_ETHER |
299 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
300 			RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
301 		[IGC_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
302 			RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
303 		[IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
304 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
305 			RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
306 		[IGC_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
307 			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
308 		[IGC_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
309 			RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
310 	};
311 	if (unlikely(pkt_info & IGC_RXDADV_PKTTYPE_ETQF))
312 		return RTE_PTYPE_UNKNOWN;
313 
314 	pkt_info = (pkt_info >> IGC_PACKET_TYPE_SHIFT) & IGC_PACKET_TYPE_MASK;
315 
316 	return ptype_table[pkt_info];
317 }
318 
319 static inline void
rx_desc_get_pkt_info(struct igc_rx_queue * rxq,struct rte_mbuf * rxm,union igc_adv_rx_desc * rxd,uint32_t staterr)320 rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm,
321 		union igc_adv_rx_desc *rxd, uint32_t staterr)
322 {
323 	uint64_t pkt_flags;
324 	uint32_t hlen_type_rss;
325 	uint16_t pkt_info;
326 
327 	/* Prefetch data of first segment, if configured to do so. */
328 	rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
329 
330 	rxm->port = rxq->port_id;
331 	hlen_type_rss = rte_le_to_cpu_32(rxd->wb.lower.lo_dword.data);
332 	rxm->hash.rss = rte_le_to_cpu_32(rxd->wb.lower.hi_dword.rss);
333 	rxm->vlan_tci = rte_le_to_cpu_16(rxd->wb.upper.vlan);
334 
335 	pkt_flags = (hlen_type_rss & IGC_RXD_RSS_TYPE_MASK) ?
336 			RTE_MBUF_F_RX_RSS_HASH : 0;
337 
338 	if (hlen_type_rss & IGC_RXD_VPKT)
339 		pkt_flags |= RTE_MBUF_F_RX_VLAN;
340 
341 	pkt_flags |= rx_desc_statuserr_to_pkt_flags(staterr);
342 
343 	rxm->ol_flags = pkt_flags;
344 	pkt_info = rte_le_to_cpu_16(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
345 	rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info);
346 }
347 
348 static uint16_t
igc_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)349 igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
350 {
351 	struct igc_rx_queue * const rxq = rx_queue;
352 	volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
353 	struct igc_rx_entry * const sw_ring = rxq->sw_ring;
354 	uint16_t rx_id = rxq->rx_tail;
355 	uint16_t nb_rx = 0;
356 	uint16_t nb_hold = 0;
357 
358 	while (nb_rx < nb_pkts) {
359 		volatile union igc_adv_rx_desc *rxdp;
360 		struct igc_rx_entry *rxe;
361 		struct rte_mbuf *rxm;
362 		struct rte_mbuf *nmb;
363 		union igc_adv_rx_desc rxd;
364 		uint32_t staterr;
365 		uint16_t data_len;
366 
367 		/*
368 		 * The order of operations here is important as the DD status
369 		 * bit must not be read after any other descriptor fields.
370 		 * rx_ring and rxdp are pointing to volatile data so the order
371 		 * of accesses cannot be reordered by the compiler. If they were
372 		 * not volatile, they could be reordered which could lead to
373 		 * using invalid descriptor fields when read from rxd.
374 		 */
375 		rxdp = &rx_ring[rx_id];
376 		staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
377 		if (!(staterr & IGC_RXD_STAT_DD))
378 			break;
379 		rxd = *rxdp;
380 
381 		/*
382 		 * End of packet.
383 		 *
384 		 * If the IGC_RXD_STAT_EOP flag is not set, the RX packet is
385 		 * likely to be invalid and to be dropped by the various
386 		 * validation checks performed by the network stack.
387 		 *
388 		 * Allocate a new mbuf to replenish the RX ring descriptor.
389 		 * If the allocation fails:
390 		 *    - arrange for that RX descriptor to be the first one
391 		 *      being parsed the next time the receive function is
392 		 *      invoked [on the same queue].
393 		 *
394 		 *    - Stop parsing the RX ring and return immediately.
395 		 *
396 		 * This policy does not drop the packet received in the RX
397 		 * descriptor for which the allocation of a new mbuf failed.
398 		 * Thus, it allows that packet to be later retrieved if
399 		 * mbuf have been freed in the mean time.
400 		 * As a side effect, holding RX descriptors instead of
401 		 * systematically giving them back to the NIC may lead to
402 		 * RX ring exhaustion situations.
403 		 * However, the NIC can gracefully prevent such situations
404 		 * to happen by sending specific "back-pressure" flow control
405 		 * frames to its peer(s).
406 		 */
407 		PMD_RX_LOG(DEBUG,
408 			"port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
409 			rxq->port_id, rxq->queue_id, rx_id, staterr,
410 			rte_le_to_cpu_16(rxd.wb.upper.length));
411 
412 		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
413 		if (nmb == NULL) {
414 			unsigned int id;
415 			PMD_RX_LOG(DEBUG,
416 				"RX mbuf alloc failed, port_id=%u queue_id=%u",
417 				rxq->port_id, rxq->queue_id);
418 			id = rxq->port_id;
419 			rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
420 			break;
421 		}
422 
423 		nb_hold++;
424 		rxe = &sw_ring[rx_id];
425 		rx_id++;
426 		if (rx_id >= rxq->nb_rx_desc)
427 			rx_id = 0;
428 
429 		/* Prefetch next mbuf while processing current one. */
430 		rte_igc_prefetch(sw_ring[rx_id].mbuf);
431 
432 		/*
433 		 * When next RX descriptor is on a cache-line boundary,
434 		 * prefetch the next 4 RX descriptors and the next 8 pointers
435 		 * to mbufs.
436 		 */
437 		if ((rx_id & 0x3) == 0) {
438 			rte_igc_prefetch(&rx_ring[rx_id]);
439 			rte_igc_prefetch(&sw_ring[rx_id]);
440 		}
441 
442 		/*
443 		 * Update RX descriptor with the physical address of the new
444 		 * data buffer of the new allocated mbuf.
445 		 */
446 		rxm = rxe->mbuf;
447 		rxe->mbuf = nmb;
448 		rxdp->read.hdr_addr = 0;
449 		rxdp->read.pkt_addr =
450 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
451 		rxm->next = NULL;
452 
453 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
454 		data_len = rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len;
455 		rxm->data_len = data_len;
456 		rxm->pkt_len = data_len;
457 		rxm->nb_segs = 1;
458 
459 		rx_desc_get_pkt_info(rxq, rxm, &rxd, staterr);
460 
461 		/*
462 		 * Store the mbuf address into the next entry of the array
463 		 * of returned packets.
464 		 */
465 		rx_pkts[nb_rx++] = rxm;
466 	}
467 	rxq->rx_tail = rx_id;
468 
469 	/*
470 	 * If the number of free RX descriptors is greater than the RX free
471 	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
472 	 * register.
473 	 * Update the RDT with the value of the last processed RX descriptor
474 	 * minus 1, to guarantee that the RDT register is never equal to the
475 	 * RDH register, which creates a "full" ring situation from the
476 	 * hardware point of view...
477 	 */
478 	nb_hold = nb_hold + rxq->nb_rx_hold;
479 	if (nb_hold > rxq->rx_free_thresh) {
480 		PMD_RX_LOG(DEBUG,
481 			"port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
482 			rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
483 		rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
484 		IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
485 		nb_hold = 0;
486 	}
487 	rxq->nb_rx_hold = nb_hold;
488 	return nb_rx;
489 }
490 
491 static uint16_t
igc_recv_scattered_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)492 igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
493 			uint16_t nb_pkts)
494 {
495 	struct igc_rx_queue * const rxq = rx_queue;
496 	volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
497 	struct igc_rx_entry * const sw_ring = rxq->sw_ring;
498 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
499 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
500 
501 	uint16_t rx_id = rxq->rx_tail;
502 	uint16_t nb_rx = 0;
503 	uint16_t nb_hold = 0;
504 
505 	while (nb_rx < nb_pkts) {
506 		volatile union igc_adv_rx_desc *rxdp;
507 		struct igc_rx_entry *rxe;
508 		struct rte_mbuf *rxm;
509 		struct rte_mbuf *nmb;
510 		union igc_adv_rx_desc rxd;
511 		uint32_t staterr;
512 		uint16_t data_len;
513 
514 next_desc:
515 		/*
516 		 * The order of operations here is important as the DD status
517 		 * bit must not be read after any other descriptor fields.
518 		 * rx_ring and rxdp are pointing to volatile data so the order
519 		 * of accesses cannot be reordered by the compiler. If they were
520 		 * not volatile, they could be reordered which could lead to
521 		 * using invalid descriptor fields when read from rxd.
522 		 */
523 		rxdp = &rx_ring[rx_id];
524 		staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
525 		if (!(staterr & IGC_RXD_STAT_DD))
526 			break;
527 		rxd = *rxdp;
528 
529 		/*
530 		 * Descriptor done.
531 		 *
532 		 * Allocate a new mbuf to replenish the RX ring descriptor.
533 		 * If the allocation fails:
534 		 *    - arrange for that RX descriptor to be the first one
535 		 *      being parsed the next time the receive function is
536 		 *      invoked [on the same queue].
537 		 *
538 		 *    - Stop parsing the RX ring and return immediately.
539 		 *
540 		 * This policy does not drop the packet received in the RX
541 		 * descriptor for which the allocation of a new mbuf failed.
542 		 * Thus, it allows that packet to be later retrieved if
543 		 * mbuf have been freed in the mean time.
544 		 * As a side effect, holding RX descriptors instead of
545 		 * systematically giving them back to the NIC may lead to
546 		 * RX ring exhaustion situations.
547 		 * However, the NIC can gracefully prevent such situations
548 		 * to happen by sending specific "back-pressure" flow control
549 		 * frames to its peer(s).
550 		 */
551 		PMD_RX_LOG(DEBUG,
552 			"port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
553 			rxq->port_id, rxq->queue_id, rx_id, staterr,
554 			rte_le_to_cpu_16(rxd.wb.upper.length));
555 
556 		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
557 		if (nmb == NULL) {
558 			unsigned int id;
559 			PMD_RX_LOG(DEBUG,
560 				"RX mbuf alloc failed, port_id=%u queue_id=%u",
561 				rxq->port_id, rxq->queue_id);
562 			id = rxq->port_id;
563 			rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
564 			break;
565 		}
566 
567 		nb_hold++;
568 		rxe = &sw_ring[rx_id];
569 		rx_id++;
570 		if (rx_id >= rxq->nb_rx_desc)
571 			rx_id = 0;
572 
573 		/* Prefetch next mbuf while processing current one. */
574 		rte_igc_prefetch(sw_ring[rx_id].mbuf);
575 
576 		/*
577 		 * When next RX descriptor is on a cache-line boundary,
578 		 * prefetch the next 4 RX descriptors and the next 8 pointers
579 		 * to mbufs.
580 		 */
581 		if ((rx_id & 0x3) == 0) {
582 			rte_igc_prefetch(&rx_ring[rx_id]);
583 			rte_igc_prefetch(&sw_ring[rx_id]);
584 		}
585 
586 		/*
587 		 * Update RX descriptor with the physical address of the new
588 		 * data buffer of the new allocated mbuf.
589 		 */
590 		rxm = rxe->mbuf;
591 		rxe->mbuf = nmb;
592 		rxdp->read.hdr_addr = 0;
593 		rxdp->read.pkt_addr =
594 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
595 		rxm->next = NULL;
596 
597 		/*
598 		 * Set data length & data buffer address of mbuf.
599 		 */
600 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
601 		data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
602 		rxm->data_len = data_len;
603 
604 		/*
605 		 * If this is the first buffer of the received packet,
606 		 * set the pointer to the first mbuf of the packet and
607 		 * initialize its context.
608 		 * Otherwise, update the total length and the number of segments
609 		 * of the current scattered packet, and update the pointer to
610 		 * the last mbuf of the current packet.
611 		 */
612 		if (first_seg == NULL) {
613 			first_seg = rxm;
614 			first_seg->pkt_len = data_len;
615 			first_seg->nb_segs = 1;
616 		} else {
617 			first_seg->pkt_len += data_len;
618 			first_seg->nb_segs++;
619 			last_seg->next = rxm;
620 		}
621 
622 		/*
623 		 * If this is not the last buffer of the received packet,
624 		 * update the pointer to the last mbuf of the current scattered
625 		 * packet and continue to parse the RX ring.
626 		 */
627 		if (!(staterr & IGC_RXD_STAT_EOP)) {
628 			last_seg = rxm;
629 			goto next_desc;
630 		}
631 
632 		/*
633 		 * This is the last buffer of the received packet.
634 		 * If the CRC is not stripped by the hardware:
635 		 *   - Subtract the CRC	length from the total packet length.
636 		 *   - If the last buffer only contains the whole CRC or a part
637 		 *     of it, free the mbuf associated to the last buffer.
638 		 *     If part of the CRC is also contained in the previous
639 		 *     mbuf, subtract the length of that CRC part from the
640 		 *     data length of the previous mbuf.
641 		 */
642 		if (unlikely(rxq->crc_len > 0)) {
643 			first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
644 			if (data_len <= RTE_ETHER_CRC_LEN) {
645 				rte_pktmbuf_free_seg(rxm);
646 				first_seg->nb_segs--;
647 				last_seg->data_len = last_seg->data_len -
648 					 (RTE_ETHER_CRC_LEN - data_len);
649 				last_seg->next = NULL;
650 			} else {
651 				rxm->data_len = (uint16_t)
652 					(data_len - RTE_ETHER_CRC_LEN);
653 			}
654 		}
655 
656 		rx_desc_get_pkt_info(rxq, first_seg, &rxd, staterr);
657 
658 		/*
659 		 * Store the mbuf address into the next entry of the array
660 		 * of returned packets.
661 		 */
662 		rx_pkts[nb_rx++] = first_seg;
663 
664 		/* Setup receipt context for a new packet. */
665 		first_seg = NULL;
666 	}
667 	rxq->rx_tail = rx_id;
668 
669 	/*
670 	 * Save receive context.
671 	 */
672 	rxq->pkt_first_seg = first_seg;
673 	rxq->pkt_last_seg = last_seg;
674 
675 	/*
676 	 * If the number of free RX descriptors is greater than the RX free
677 	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
678 	 * register.
679 	 * Update the RDT with the value of the last processed RX descriptor
680 	 * minus 1, to guarantee that the RDT register is never equal to the
681 	 * RDH register, which creates a "full" ring situation from the
682 	 * hardware point of view...
683 	 */
684 	nb_hold = nb_hold + rxq->nb_rx_hold;
685 	if (nb_hold > rxq->rx_free_thresh) {
686 		PMD_RX_LOG(DEBUG,
687 			"port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
688 			rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
689 		rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
690 		IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
691 		nb_hold = 0;
692 	}
693 	rxq->nb_rx_hold = nb_hold;
694 	return nb_rx;
695 }
696 
697 static void
igc_rx_queue_release_mbufs(struct igc_rx_queue * rxq)698 igc_rx_queue_release_mbufs(struct igc_rx_queue *rxq)
699 {
700 	unsigned int i;
701 
702 	if (rxq->sw_ring != NULL) {
703 		for (i = 0; i < rxq->nb_rx_desc; i++) {
704 			if (rxq->sw_ring[i].mbuf != NULL) {
705 				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
706 				rxq->sw_ring[i].mbuf = NULL;
707 			}
708 		}
709 	}
710 }
711 
712 static void
igc_rx_queue_release(struct igc_rx_queue * rxq)713 igc_rx_queue_release(struct igc_rx_queue *rxq)
714 {
715 	igc_rx_queue_release_mbufs(rxq);
716 	rte_free(rxq->sw_ring);
717 	rte_free(rxq);
718 }
719 
eth_igc_rx_queue_release(struct rte_eth_dev * dev,uint16_t qid)720 void eth_igc_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
721 {
722 	if (dev->data->rx_queues[qid])
723 		igc_rx_queue_release(dev->data->rx_queues[qid]);
724 }
725 
eth_igc_rx_queue_count(void * rx_queue)726 uint32_t eth_igc_rx_queue_count(void *rx_queue)
727 {
728 	/**
729 	 * Check the DD bit of a rx descriptor of each 4 in a group,
730 	 * to avoid checking too frequently and downgrading performance
731 	 * too much.
732 	 */
733 #define IGC_RXQ_SCAN_INTERVAL 4
734 
735 	volatile union igc_adv_rx_desc *rxdp;
736 	struct igc_rx_queue *rxq;
737 	uint16_t desc = 0;
738 
739 	rxq = rx_queue;
740 	rxdp = &rxq->rx_ring[rxq->rx_tail];
741 
742 	while (desc < rxq->nb_rx_desc - rxq->rx_tail) {
743 		if (unlikely(!(rxdp->wb.upper.status_error &
744 				IGC_RXD_STAT_DD)))
745 			return desc;
746 		desc += IGC_RXQ_SCAN_INTERVAL;
747 		rxdp += IGC_RXQ_SCAN_INTERVAL;
748 	}
749 	rxdp = &rxq->rx_ring[rxq->rx_tail + desc - rxq->nb_rx_desc];
750 
751 	while (desc < rxq->nb_rx_desc &&
752 		(rxdp->wb.upper.status_error & IGC_RXD_STAT_DD)) {
753 		desc += IGC_RXQ_SCAN_INTERVAL;
754 		rxdp += IGC_RXQ_SCAN_INTERVAL;
755 	}
756 
757 	return desc;
758 }
759 
eth_igc_rx_descriptor_status(void * rx_queue,uint16_t offset)760 int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset)
761 {
762 	struct igc_rx_queue *rxq = rx_queue;
763 	volatile uint32_t *status;
764 	uint32_t desc;
765 
766 	if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
767 		return -EINVAL;
768 
769 	if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
770 		return RTE_ETH_RX_DESC_UNAVAIL;
771 
772 	desc = rxq->rx_tail + offset;
773 	if (desc >= rxq->nb_rx_desc)
774 		desc -= rxq->nb_rx_desc;
775 
776 	status = &rxq->rx_ring[desc].wb.upper.status_error;
777 	if (*status & rte_cpu_to_le_32(IGC_RXD_STAT_DD))
778 		return RTE_ETH_RX_DESC_DONE;
779 
780 	return RTE_ETH_RX_DESC_AVAIL;
781 }
782 
783 static int
igc_alloc_rx_queue_mbufs(struct igc_rx_queue * rxq)784 igc_alloc_rx_queue_mbufs(struct igc_rx_queue *rxq)
785 {
786 	struct igc_rx_entry *rxe = rxq->sw_ring;
787 	uint64_t dma_addr;
788 	unsigned int i;
789 
790 	/* Initialize software ring entries. */
791 	for (i = 0; i < rxq->nb_rx_desc; i++) {
792 		volatile union igc_adv_rx_desc *rxd;
793 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
794 
795 		if (mbuf == NULL) {
796 			PMD_DRV_LOG(ERR, "RX mbuf alloc failed, queue_id=%hu",
797 				rxq->queue_id);
798 			return -ENOMEM;
799 		}
800 		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
801 		rxd = &rxq->rx_ring[i];
802 		rxd->read.hdr_addr = 0;
803 		rxd->read.pkt_addr = dma_addr;
804 		rxe[i].mbuf = mbuf;
805 	}
806 
807 	return 0;
808 }
809 
810 /*
811  * RSS random key supplied in section 7.1.2.9.3 of the Intel I225 datasheet.
812  * Used as the default key.
813  */
814 static uint8_t default_rss_key[40] = {
815 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
816 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
817 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
818 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
819 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
820 };
821 
822 void
igc_rss_disable(struct rte_eth_dev * dev)823 igc_rss_disable(struct rte_eth_dev *dev)
824 {
825 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
826 	uint32_t mrqc;
827 
828 	mrqc = IGC_READ_REG(hw, IGC_MRQC);
829 	mrqc &= ~IGC_MRQC_ENABLE_MASK;
830 	IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
831 }
832 
833 void
igc_hw_rss_hash_set(struct igc_hw * hw,struct rte_eth_rss_conf * rss_conf)834 igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf)
835 {
836 	uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
837 	uint32_t mrqc;
838 	uint64_t rss_hf;
839 
840 	if (hash_key != NULL) {
841 		uint8_t i;
842 
843 		/* Fill in RSS hash key */
844 		for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
845 			IGC_WRITE_REG_LE_VALUE(hw, IGC_RSSRK(i), hash_key[i]);
846 	}
847 
848 	/* Set configured hashing protocols in MRQC register */
849 	rss_hf = rss_conf->rss_hf;
850 	mrqc = IGC_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
851 	if (rss_hf & RTE_ETH_RSS_IPV4)
852 		mrqc |= IGC_MRQC_RSS_FIELD_IPV4;
853 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
854 		mrqc |= IGC_MRQC_RSS_FIELD_IPV4_TCP;
855 	if (rss_hf & RTE_ETH_RSS_IPV6)
856 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6;
857 	if (rss_hf & RTE_ETH_RSS_IPV6_EX)
858 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_EX;
859 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
860 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP;
861 	if (rss_hf & RTE_ETH_RSS_IPV6_TCP_EX)
862 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
863 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
864 		mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
865 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
866 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
867 	if (rss_hf & RTE_ETH_RSS_IPV6_UDP_EX)
868 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP_EX;
869 	IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
870 }
871 
872 static void
igc_rss_configure(struct rte_eth_dev * dev)873 igc_rss_configure(struct rte_eth_dev *dev)
874 {
875 	struct rte_eth_rss_conf rss_conf;
876 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
877 	uint16_t i;
878 
879 	/* Fill in redirection table. */
880 	for (i = 0; i < IGC_RSS_RDT_SIZD; i++) {
881 		union igc_rss_reta_reg reta;
882 		uint16_t q_idx, reta_idx;
883 
884 		q_idx = (uint8_t)((dev->data->nb_rx_queues > 1) ?
885 				   i % dev->data->nb_rx_queues : 0);
886 		reta_idx = i % sizeof(reta);
887 		reta.bytes[reta_idx] = q_idx;
888 		if (reta_idx == sizeof(reta) - 1)
889 			IGC_WRITE_REG_LE_VALUE(hw,
890 				IGC_RETA(i / sizeof(reta)), reta.dword);
891 	}
892 
893 	/*
894 	 * Configure the RSS key and the RSS protocols used to compute
895 	 * the RSS hash of input packets.
896 	 */
897 	rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
898 	if (rss_conf.rss_key == NULL)
899 		rss_conf.rss_key = default_rss_key;
900 	igc_hw_rss_hash_set(hw, &rss_conf);
901 }
902 
903 int
igc_del_rss_filter(struct rte_eth_dev * dev)904 igc_del_rss_filter(struct rte_eth_dev *dev)
905 {
906 	struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
907 
908 	if (rss_filter->enable) {
909 		/* recover default RSS configuration */
910 		igc_rss_configure(dev);
911 
912 		/* disable RSS logic and clear filter data */
913 		igc_rss_disable(dev);
914 		memset(rss_filter, 0, sizeof(*rss_filter));
915 		return 0;
916 	}
917 	PMD_DRV_LOG(ERR, "filter not exist!");
918 	return -ENOENT;
919 }
920 
921 /* Initiate the filter structure by the structure of rte_flow_action_rss */
922 void
igc_rss_conf_set(struct igc_rss_filter * out,const struct rte_flow_action_rss * rss)923 igc_rss_conf_set(struct igc_rss_filter *out,
924 		const struct rte_flow_action_rss *rss)
925 {
926 	out->conf.func = rss->func;
927 	out->conf.level = rss->level;
928 	out->conf.types = rss->types;
929 
930 	if (rss->key_len == sizeof(out->key)) {
931 		memcpy(out->key, rss->key, rss->key_len);
932 		out->conf.key = out->key;
933 		out->conf.key_len = rss->key_len;
934 	} else {
935 		out->conf.key = NULL;
936 		out->conf.key_len = 0;
937 	}
938 
939 	if (rss->queue_num <= IGC_RSS_RDT_SIZD) {
940 		memcpy(out->queue, rss->queue,
941 			sizeof(*out->queue) * rss->queue_num);
942 		out->conf.queue = out->queue;
943 		out->conf.queue_num = rss->queue_num;
944 	} else {
945 		out->conf.queue = NULL;
946 		out->conf.queue_num = 0;
947 	}
948 }
949 
950 int
igc_add_rss_filter(struct rte_eth_dev * dev,struct igc_rss_filter * rss)951 igc_add_rss_filter(struct rte_eth_dev *dev, struct igc_rss_filter *rss)
952 {
953 	struct rte_eth_rss_conf rss_conf = {
954 		.rss_key = rss->conf.key_len ?
955 			(void *)(uintptr_t)rss->conf.key : NULL,
956 		.rss_key_len = rss->conf.key_len,
957 		.rss_hf = rss->conf.types,
958 	};
959 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
960 	struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
961 	uint32_t i, j;
962 
963 	/* check RSS type is valid */
964 	if ((rss_conf.rss_hf & IGC_RSS_OFFLOAD_ALL) == 0) {
965 		PMD_DRV_LOG(ERR,
966 			"RSS type(0x%" PRIx64 ") error!, only 0x%" PRIx64
967 			" been supported", rss_conf.rss_hf,
968 			(uint64_t)IGC_RSS_OFFLOAD_ALL);
969 		return -EINVAL;
970 	}
971 
972 	/* check queue count is not zero */
973 	if (!rss->conf.queue_num) {
974 		PMD_DRV_LOG(ERR, "Queue number should not be 0!");
975 		return -EINVAL;
976 	}
977 
978 	/* check queue id is valid */
979 	for (i = 0; i < rss->conf.queue_num; i++)
980 		if (rss->conf.queue[i] >= dev->data->nb_rx_queues) {
981 			PMD_DRV_LOG(ERR, "Queue id %u is invalid!",
982 					rss->conf.queue[i]);
983 			return -EINVAL;
984 		}
985 
986 	/* only support one filter */
987 	if (rss_filter->enable) {
988 		PMD_DRV_LOG(ERR, "Only support one RSS filter!");
989 		return -ENOTSUP;
990 	}
991 	rss_filter->enable = 1;
992 
993 	igc_rss_conf_set(rss_filter, &rss->conf);
994 
995 	/* Fill in redirection table. */
996 	for (i = 0, j = 0; i < IGC_RSS_RDT_SIZD; i++, j++) {
997 		union igc_rss_reta_reg reta;
998 		uint16_t q_idx, reta_idx;
999 
1000 		if (j == rss->conf.queue_num)
1001 			j = 0;
1002 		q_idx = rss->conf.queue[j];
1003 		reta_idx = i % sizeof(reta);
1004 		reta.bytes[reta_idx] = q_idx;
1005 		if (reta_idx == sizeof(reta) - 1)
1006 			IGC_WRITE_REG_LE_VALUE(hw,
1007 				IGC_RETA(i / sizeof(reta)), reta.dword);
1008 	}
1009 
1010 	if (rss_conf.rss_key == NULL)
1011 		rss_conf.rss_key = default_rss_key;
1012 	igc_hw_rss_hash_set(hw, &rss_conf);
1013 	return 0;
1014 }
1015 
1016 void
igc_clear_rss_filter(struct rte_eth_dev * dev)1017 igc_clear_rss_filter(struct rte_eth_dev *dev)
1018 {
1019 	struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
1020 
1021 	if (!rss_filter->enable)
1022 		return;
1023 
1024 	/* recover default RSS configuration */
1025 	igc_rss_configure(dev);
1026 
1027 	/* disable RSS logic and clear filter data */
1028 	igc_rss_disable(dev);
1029 	memset(rss_filter, 0, sizeof(*rss_filter));
1030 }
1031 
1032 static int
igc_dev_mq_rx_configure(struct rte_eth_dev * dev)1033 igc_dev_mq_rx_configure(struct rte_eth_dev *dev)
1034 {
1035 	if (RTE_ETH_DEV_SRIOV(dev).active) {
1036 		PMD_DRV_LOG(ERR, "SRIOV unsupported!");
1037 		return -EINVAL;
1038 	}
1039 
1040 	switch (dev->data->dev_conf.rxmode.mq_mode) {
1041 	case RTE_ETH_MQ_RX_RSS:
1042 		igc_rss_configure(dev);
1043 		break;
1044 	case RTE_ETH_MQ_RX_NONE:
1045 		/*
1046 		 * configure RSS register for following,
1047 		 * then disable the RSS logic
1048 		 */
1049 		igc_rss_configure(dev);
1050 		igc_rss_disable(dev);
1051 		break;
1052 	default:
1053 		PMD_DRV_LOG(ERR, "rx mode(%d) not supported!",
1054 			dev->data->dev_conf.rxmode.mq_mode);
1055 		return -EINVAL;
1056 	}
1057 	return 0;
1058 }
1059 
1060 int
igc_rx_init(struct rte_eth_dev * dev)1061 igc_rx_init(struct rte_eth_dev *dev)
1062 {
1063 	struct igc_rx_queue *rxq;
1064 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1065 	uint64_t offloads = dev->data->dev_conf.rxmode.offloads;
1066 	uint32_t max_rx_pktlen;
1067 	uint32_t rctl;
1068 	uint32_t rxcsum;
1069 	uint16_t buf_size;
1070 	uint16_t rctl_bsize;
1071 	uint16_t i;
1072 	int ret;
1073 
1074 	dev->rx_pkt_burst = igc_recv_pkts;
1075 
1076 	/*
1077 	 * Make sure receives are disabled while setting
1078 	 * up the descriptor ring.
1079 	 */
1080 	rctl = IGC_READ_REG(hw, IGC_RCTL);
1081 	IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN);
1082 
1083 	/* Configure support of jumbo frames, if any. */
1084 	if (dev->data->mtu > RTE_ETHER_MTU)
1085 		rctl |= IGC_RCTL_LPE;
1086 	else
1087 		rctl &= ~IGC_RCTL_LPE;
1088 
1089 	max_rx_pktlen = dev->data->mtu + IGC_ETH_OVERHEAD;
1090 	/*
1091 	 * Set maximum packet length by default, and might be updated
1092 	 * together with enabling/disabling dual VLAN.
1093 	 */
1094 	IGC_WRITE_REG(hw, IGC_RLPML, max_rx_pktlen);
1095 
1096 	/* Configure and enable each RX queue. */
1097 	rctl_bsize = 0;
1098 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1099 		uint64_t bus_addr;
1100 		uint32_t rxdctl;
1101 		uint32_t srrctl;
1102 
1103 		rxq = dev->data->rx_queues[i];
1104 		rxq->flags = 0;
1105 
1106 		/* Allocate buffers for descriptor rings and set up queue */
1107 		ret = igc_alloc_rx_queue_mbufs(rxq);
1108 		if (ret)
1109 			return ret;
1110 
1111 		/*
1112 		 * Reset crc_len in case it was changed after queue setup by a
1113 		 * call to configure
1114 		 */
1115 		rxq->crc_len = (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) ?
1116 				RTE_ETHER_CRC_LEN : 0;
1117 
1118 		bus_addr = rxq->rx_ring_phys_addr;
1119 		IGC_WRITE_REG(hw, IGC_RDLEN(rxq->reg_idx),
1120 				rxq->nb_rx_desc *
1121 				sizeof(union igc_adv_rx_desc));
1122 		IGC_WRITE_REG(hw, IGC_RDBAH(rxq->reg_idx),
1123 				(uint32_t)(bus_addr >> 32));
1124 		IGC_WRITE_REG(hw, IGC_RDBAL(rxq->reg_idx),
1125 				(uint32_t)bus_addr);
1126 
1127 		/* set descriptor configuration */
1128 		srrctl = IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
1129 
1130 		srrctl |= (uint32_t)(RTE_PKTMBUF_HEADROOM / 64) <<
1131 				IGC_SRRCTL_BSIZEHEADER_SHIFT;
1132 		/*
1133 		 * Configure RX buffer size.
1134 		 */
1135 		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
1136 			RTE_PKTMBUF_HEADROOM);
1137 		if (buf_size >= 1024) {
1138 			/*
1139 			 * Configure the BSIZEPACKET field of the SRRCTL
1140 			 * register of the queue.
1141 			 * Value is in 1 KB resolution, from 1 KB to 16 KB.
1142 			 * If this field is equal to 0b, then RCTL.BSIZE
1143 			 * determines the RX packet buffer size.
1144 			 */
1145 
1146 			srrctl |= ((buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) &
1147 				   IGC_SRRCTL_BSIZEPKT_MASK);
1148 			buf_size = (uint16_t)((srrctl &
1149 					IGC_SRRCTL_BSIZEPKT_MASK) <<
1150 					IGC_SRRCTL_BSIZEPKT_SHIFT);
1151 
1152 			/* It adds dual VLAN length for supporting dual VLAN */
1153 			if (max_rx_pktlen > buf_size)
1154 				dev->data->scattered_rx = 1;
1155 		} else {
1156 			/*
1157 			 * Use BSIZE field of the device RCTL register.
1158 			 */
1159 			if (rctl_bsize == 0 || rctl_bsize > buf_size)
1160 				rctl_bsize = buf_size;
1161 			dev->data->scattered_rx = 1;
1162 		}
1163 
1164 		/* Set if packets are dropped when no descriptors available */
1165 		if (rxq->drop_en)
1166 			srrctl |= IGC_SRRCTL_DROP_EN;
1167 
1168 		IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl);
1169 
1170 		/* Enable this RX queue. */
1171 		rxdctl = IGC_RXDCTL_QUEUE_ENABLE;
1172 		rxdctl |= ((uint32_t)rxq->pthresh << IGC_RXDCTL_PTHRESH_SHIFT) &
1173 				IGC_RXDCTL_PTHRESH_MSK;
1174 		rxdctl |= ((uint32_t)rxq->hthresh << IGC_RXDCTL_HTHRESH_SHIFT) &
1175 				IGC_RXDCTL_HTHRESH_MSK;
1176 		rxdctl |= ((uint32_t)rxq->wthresh << IGC_RXDCTL_WTHRESH_SHIFT) &
1177 				IGC_RXDCTL_WTHRESH_MSK;
1178 		IGC_WRITE_REG(hw, IGC_RXDCTL(rxq->reg_idx), rxdctl);
1179 	}
1180 
1181 	if (offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
1182 		dev->data->scattered_rx = 1;
1183 
1184 	if (dev->data->scattered_rx) {
1185 		PMD_DRV_LOG(DEBUG, "forcing scatter mode");
1186 		dev->rx_pkt_burst = igc_recv_scattered_pkts;
1187 	}
1188 	/*
1189 	 * Setup BSIZE field of RCTL register, if needed.
1190 	 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1191 	 * register, since the code above configures the SRRCTL register of
1192 	 * the RX queue in such a case.
1193 	 * All configurable sizes are:
1194 	 * 16384: rctl |= (IGC_RCTL_SZ_16384 | IGC_RCTL_BSEX);
1195 	 *  8192: rctl |= (IGC_RCTL_SZ_8192  | IGC_RCTL_BSEX);
1196 	 *  4096: rctl |= (IGC_RCTL_SZ_4096  | IGC_RCTL_BSEX);
1197 	 *  2048: rctl |= IGC_RCTL_SZ_2048;
1198 	 *  1024: rctl |= IGC_RCTL_SZ_1024;
1199 	 *   512: rctl |= IGC_RCTL_SZ_512;
1200 	 *   256: rctl |= IGC_RCTL_SZ_256;
1201 	 */
1202 	if (rctl_bsize > 0) {
1203 		if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1204 			rctl |= IGC_RCTL_SZ_512;
1205 		else /* 256 <= buf_size < 512 - use 256 */
1206 			rctl |= IGC_RCTL_SZ_256;
1207 	}
1208 
1209 	/*
1210 	 * Configure RSS if device configured with multiple RX queues.
1211 	 */
1212 	igc_dev_mq_rx_configure(dev);
1213 
1214 	/* Update the rctl since igc_dev_mq_rx_configure may change its value */
1215 	rctl |= IGC_READ_REG(hw, IGC_RCTL);
1216 
1217 	/*
1218 	 * Setup the Checksum Register.
1219 	 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1220 	 */
1221 	rxcsum = IGC_READ_REG(hw, IGC_RXCSUM);
1222 	rxcsum |= IGC_RXCSUM_PCSD;
1223 
1224 	/* Enable both L3/L4 rx checksum offload */
1225 	if (offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM)
1226 		rxcsum |= IGC_RXCSUM_IPOFL;
1227 	else
1228 		rxcsum &= ~IGC_RXCSUM_IPOFL;
1229 
1230 	if (offloads &
1231 		(RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM)) {
1232 		rxcsum |= IGC_RXCSUM_TUOFL;
1233 		offloads |= RTE_ETH_RX_OFFLOAD_SCTP_CKSUM;
1234 	} else {
1235 		rxcsum &= ~IGC_RXCSUM_TUOFL;
1236 	}
1237 
1238 	if (offloads & RTE_ETH_RX_OFFLOAD_SCTP_CKSUM)
1239 		rxcsum |= IGC_RXCSUM_CRCOFL;
1240 	else
1241 		rxcsum &= ~IGC_RXCSUM_CRCOFL;
1242 
1243 	IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum);
1244 
1245 	/* Setup the Receive Control Register. */
1246 	if (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
1247 		rctl &= ~IGC_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1248 	else
1249 		rctl |= IGC_RCTL_SECRC; /* Strip Ethernet CRC. */
1250 
1251 	rctl &= ~IGC_RCTL_MO_MSK;
1252 	rctl &= ~IGC_RCTL_LBM_MSK;
1253 	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO |
1254 			IGC_RCTL_DPF |
1255 			(hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
1256 
1257 	if (dev->data->dev_conf.lpbk_mode == 1)
1258 		rctl |= IGC_RCTL_LBM_MAC;
1259 
1260 	rctl &= ~(IGC_RCTL_HSEL_MSK | IGC_RCTL_CFIEN | IGC_RCTL_CFI |
1261 			IGC_RCTL_PSP | IGC_RCTL_PMCF);
1262 
1263 	/* Make sure VLAN Filters are off. */
1264 	rctl &= ~IGC_RCTL_VFE;
1265 	/* Don't store bad packets. */
1266 	rctl &= ~IGC_RCTL_SBP;
1267 
1268 	/* Enable Receives. */
1269 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1270 
1271 	/*
1272 	 * Setup the HW Rx Head and Tail Descriptor Pointers.
1273 	 * This needs to be done after enable.
1274 	 */
1275 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1276 		uint32_t dvmolr;
1277 
1278 		rxq = dev->data->rx_queues[i];
1279 		IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0);
1280 		IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
1281 
1282 		dvmolr = IGC_READ_REG(hw, IGC_DVMOLR(rxq->reg_idx));
1283 		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
1284 			dvmolr |= IGC_DVMOLR_STRVLAN;
1285 		else
1286 			dvmolr &= ~IGC_DVMOLR_STRVLAN;
1287 
1288 		if (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
1289 			dvmolr &= ~IGC_DVMOLR_STRCRC;
1290 		else
1291 			dvmolr |= IGC_DVMOLR_STRCRC;
1292 
1293 		IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr);
1294 	}
1295 
1296 	return 0;
1297 }
1298 
1299 static void
igc_reset_rx_queue(struct igc_rx_queue * rxq)1300 igc_reset_rx_queue(struct igc_rx_queue *rxq)
1301 {
1302 	static const union igc_adv_rx_desc zeroed_desc = { {0} };
1303 	unsigned int i;
1304 
1305 	/* Zero out HW ring memory */
1306 	for (i = 0; i < rxq->nb_rx_desc; i++)
1307 		rxq->rx_ring[i] = zeroed_desc;
1308 
1309 	rxq->rx_tail = 0;
1310 	rxq->pkt_first_seg = NULL;
1311 	rxq->pkt_last_seg = NULL;
1312 }
1313 
1314 int
eth_igc_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mp)1315 eth_igc_rx_queue_setup(struct rte_eth_dev *dev,
1316 			 uint16_t queue_idx,
1317 			 uint16_t nb_desc,
1318 			 unsigned int socket_id,
1319 			 const struct rte_eth_rxconf *rx_conf,
1320 			 struct rte_mempool *mp)
1321 {
1322 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1323 	const struct rte_memzone *rz;
1324 	struct igc_rx_queue *rxq;
1325 	unsigned int size;
1326 
1327 	/*
1328 	 * Validate number of receive descriptors.
1329 	 * It must not exceed hardware maximum, and must be multiple
1330 	 * of IGC_RX_DESCRIPTOR_MULTIPLE.
1331 	 */
1332 	if (nb_desc % IGC_RX_DESCRIPTOR_MULTIPLE != 0 ||
1333 		nb_desc > IGC_MAX_RXD || nb_desc < IGC_MIN_RXD) {
1334 		PMD_DRV_LOG(ERR,
1335 			"RX descriptor must be multiple of %u(cur: %u) and between %u and %u",
1336 			IGC_RX_DESCRIPTOR_MULTIPLE, nb_desc,
1337 			IGC_MIN_RXD, IGC_MAX_RXD);
1338 		return -EINVAL;
1339 	}
1340 
1341 	/* Free memory prior to re-allocation if needed */
1342 	if (dev->data->rx_queues[queue_idx] != NULL) {
1343 		igc_rx_queue_release(dev->data->rx_queues[queue_idx]);
1344 		dev->data->rx_queues[queue_idx] = NULL;
1345 	}
1346 
1347 	/* First allocate the RX queue data structure. */
1348 	rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igc_rx_queue),
1349 			  RTE_CACHE_LINE_SIZE);
1350 	if (rxq == NULL)
1351 		return -ENOMEM;
1352 	rxq->offloads = rx_conf->offloads;
1353 	rxq->mb_pool = mp;
1354 	rxq->nb_rx_desc = nb_desc;
1355 	rxq->pthresh = rx_conf->rx_thresh.pthresh;
1356 	rxq->hthresh = rx_conf->rx_thresh.hthresh;
1357 	rxq->wthresh = rx_conf->rx_thresh.wthresh;
1358 	rxq->drop_en = rx_conf->rx_drop_en;
1359 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1360 	rxq->queue_id = queue_idx;
1361 	rxq->reg_idx = queue_idx;
1362 	rxq->port_id = dev->data->port_id;
1363 
1364 	/*
1365 	 *  Allocate RX ring hardware descriptors. A memzone large enough to
1366 	 *  handle the maximum ring size is allocated in order to allow for
1367 	 *  resizing in later calls to the queue setup function.
1368 	 */
1369 	size = sizeof(union igc_adv_rx_desc) * IGC_MAX_RXD;
1370 	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1371 				      IGC_ALIGN, socket_id);
1372 	if (rz == NULL) {
1373 		igc_rx_queue_release(rxq);
1374 		return -ENOMEM;
1375 	}
1376 	rxq->rdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDT(rxq->reg_idx));
1377 	rxq->rdh_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDH(rxq->reg_idx));
1378 	rxq->rx_ring_phys_addr = rz->iova;
1379 	rxq->rx_ring = (union igc_adv_rx_desc *)rz->addr;
1380 
1381 	/* Allocate software ring. */
1382 	rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1383 				   sizeof(struct igc_rx_entry) * nb_desc,
1384 				   RTE_CACHE_LINE_SIZE);
1385 	if (rxq->sw_ring == NULL) {
1386 		igc_rx_queue_release(rxq);
1387 		return -ENOMEM;
1388 	}
1389 
1390 	PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
1391 		rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1392 
1393 	dev->data->rx_queues[queue_idx] = rxq;
1394 	igc_reset_rx_queue(rxq);
1395 
1396 	return 0;
1397 }
1398 
1399 /* prepare packets for transmit */
1400 static uint16_t
eth_igc_prep_pkts(__rte_unused void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)1401 eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1402 		uint16_t nb_pkts)
1403 {
1404 	int i, ret;
1405 	struct rte_mbuf *m;
1406 
1407 	for (i = 0; i < nb_pkts; i++) {
1408 		m = tx_pkts[i];
1409 
1410 		/* Check some limitations for TSO in hardware */
1411 		if (m->ol_flags & IGC_TX_OFFLOAD_SEG)
1412 			if (m->tso_segsz > IGC_TSO_MAX_MSS ||
1413 				m->l2_len + m->l3_len + m->l4_len >
1414 				IGC_TSO_MAX_HDRLEN) {
1415 				rte_errno = EINVAL;
1416 				return i;
1417 			}
1418 
1419 		if (m->ol_flags & IGC_TX_OFFLOAD_NOTSUP_MASK) {
1420 			rte_errno = ENOTSUP;
1421 			return i;
1422 		}
1423 
1424 #ifdef RTE_ETHDEV_DEBUG_TX
1425 		ret = rte_validate_tx_offload(m);
1426 		if (ret != 0) {
1427 			rte_errno = -ret;
1428 			return i;
1429 		}
1430 #endif
1431 		ret = rte_net_intel_cksum_prepare(m);
1432 		if (ret != 0) {
1433 			rte_errno = -ret;
1434 			return i;
1435 		}
1436 	}
1437 
1438 	return i;
1439 }
1440 
1441 /*
1442  *There're some limitations in hardware for TCP segmentation offload. We
1443  *should check whether the parameters are valid.
1444  */
1445 static inline uint64_t
check_tso_para(uint64_t ol_req,union igc_tx_offload ol_para)1446 check_tso_para(uint64_t ol_req, union igc_tx_offload ol_para)
1447 {
1448 	if (!(ol_req & IGC_TX_OFFLOAD_SEG))
1449 		return ol_req;
1450 	if (ol_para.tso_segsz > IGC_TSO_MAX_MSS || ol_para.l2_len +
1451 		ol_para.l3_len + ol_para.l4_len > IGC_TSO_MAX_HDRLEN) {
1452 		ol_req &= ~IGC_TX_OFFLOAD_SEG;
1453 		ol_req |= RTE_MBUF_F_TX_TCP_CKSUM;
1454 	}
1455 	return ol_req;
1456 }
1457 
1458 /*
1459  * Check which hardware context can be used. Use the existing match
1460  * or create a new context descriptor.
1461  */
1462 static inline uint32_t
what_advctx_update(struct igc_tx_queue * txq,uint64_t flags,union igc_tx_offload tx_offload)1463 what_advctx_update(struct igc_tx_queue *txq, uint64_t flags,
1464 		union igc_tx_offload tx_offload)
1465 {
1466 	uint32_t curr = txq->ctx_curr;
1467 
1468 	/* If match with the current context */
1469 	if (likely(txq->ctx_cache[curr].flags == flags &&
1470 		txq->ctx_cache[curr].tx_offload.data ==
1471 		(txq->ctx_cache[curr].tx_offload_mask.data &
1472 		tx_offload.data))) {
1473 		return curr;
1474 	}
1475 
1476 	/* Total two context, if match with the second context */
1477 	curr ^= 1;
1478 	if (likely(txq->ctx_cache[curr].flags == flags &&
1479 		txq->ctx_cache[curr].tx_offload.data ==
1480 		(txq->ctx_cache[curr].tx_offload_mask.data &
1481 		tx_offload.data))) {
1482 		txq->ctx_curr = curr;
1483 		return curr;
1484 	}
1485 
1486 	/* Mismatch, create new one */
1487 	return IGC_CTX_NUM;
1488 }
1489 
1490 /*
1491  * This is a separate function, looking for optimization opportunity here
1492  * Rework required to go with the pre-defined values.
1493  */
1494 static inline void
igc_set_xmit_ctx(struct igc_tx_queue * txq,volatile struct igc_adv_tx_context_desc * ctx_txd,uint64_t ol_flags,union igc_tx_offload tx_offload)1495 igc_set_xmit_ctx(struct igc_tx_queue *txq,
1496 		volatile struct igc_adv_tx_context_desc *ctx_txd,
1497 		uint64_t ol_flags, union igc_tx_offload tx_offload)
1498 {
1499 	uint32_t type_tucmd_mlhl;
1500 	uint32_t mss_l4len_idx;
1501 	uint32_t ctx_curr;
1502 	uint32_t vlan_macip_lens;
1503 	union igc_tx_offload tx_offload_mask;
1504 
1505 	/* Use the previous context */
1506 	txq->ctx_curr ^= 1;
1507 	ctx_curr = txq->ctx_curr;
1508 
1509 	tx_offload_mask.data = 0;
1510 	type_tucmd_mlhl = 0;
1511 
1512 	/* Specify which HW CTX to upload. */
1513 	mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT);
1514 
1515 	if (ol_flags & RTE_MBUF_F_TX_VLAN)
1516 		tx_offload_mask.vlan_tci = 0xffff;
1517 
1518 	/* check if TCP segmentation required for this packet */
1519 	if (ol_flags & IGC_TX_OFFLOAD_SEG) {
1520 		/* implies IP cksum in IPv4 */
1521 		if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
1522 			type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4 |
1523 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1524 		else
1525 			type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV6 |
1526 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1527 
1528 		if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
1529 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
1530 		else
1531 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
1532 
1533 		tx_offload_mask.data |= TX_TSO_CMP_MASK;
1534 		mss_l4len_idx |= (uint32_t)tx_offload.tso_segsz <<
1535 				IGC_ADVTXD_MSS_SHIFT;
1536 		mss_l4len_idx |= (uint32_t)tx_offload.l4_len <<
1537 				IGC_ADVTXD_L4LEN_SHIFT;
1538 	} else { /* no TSO, check if hardware checksum is needed */
1539 		if (ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK))
1540 			tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
1541 
1542 		if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
1543 			type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4;
1544 
1545 		switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
1546 		case RTE_MBUF_F_TX_TCP_CKSUM:
1547 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP |
1548 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1549 			mss_l4len_idx |= (uint32_t)sizeof(struct rte_tcp_hdr)
1550 				<< IGC_ADVTXD_L4LEN_SHIFT;
1551 			break;
1552 		case RTE_MBUF_F_TX_UDP_CKSUM:
1553 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP |
1554 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1555 			mss_l4len_idx |= (uint32_t)sizeof(struct rte_udp_hdr)
1556 				<< IGC_ADVTXD_L4LEN_SHIFT;
1557 			break;
1558 		case RTE_MBUF_F_TX_SCTP_CKSUM:
1559 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP |
1560 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1561 			mss_l4len_idx |= (uint32_t)sizeof(struct rte_sctp_hdr)
1562 				<< IGC_ADVTXD_L4LEN_SHIFT;
1563 			break;
1564 		default:
1565 			type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_RSV |
1566 				IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1567 			break;
1568 		}
1569 	}
1570 
1571 	txq->ctx_cache[ctx_curr].flags = ol_flags;
1572 	txq->ctx_cache[ctx_curr].tx_offload.data =
1573 		tx_offload_mask.data & tx_offload.data;
1574 	txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
1575 
1576 	ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
1577 	vlan_macip_lens = (uint32_t)tx_offload.data;
1578 	ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
1579 	ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
1580 	ctx_txd->u.launch_time = 0;
1581 }
1582 
1583 static inline uint32_t
tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)1584 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
1585 {
1586 	uint32_t cmdtype;
1587 	static uint32_t vlan_cmd[2] = {0, IGC_ADVTXD_DCMD_VLE};
1588 	static uint32_t tso_cmd[2] = {0, IGC_ADVTXD_DCMD_TSE};
1589 	cmdtype = vlan_cmd[(ol_flags & RTE_MBUF_F_TX_VLAN) != 0];
1590 	cmdtype |= tso_cmd[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1591 	return cmdtype;
1592 }
1593 
1594 static inline uint32_t
tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)1595 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
1596 {
1597 	static const uint32_t l4_olinfo[2] = {0, IGC_ADVTXD_POPTS_TXSM};
1598 	static const uint32_t l3_olinfo[2] = {0, IGC_ADVTXD_POPTS_IXSM};
1599 	uint32_t tmp;
1600 
1601 	tmp  = l4_olinfo[(ol_flags & RTE_MBUF_F_TX_L4_MASK)  != RTE_MBUF_F_TX_L4_NO_CKSUM];
1602 	tmp |= l3_olinfo[(ol_flags & RTE_MBUF_F_TX_IP_CKSUM) != 0];
1603 	tmp |= l4_olinfo[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1604 	return tmp;
1605 }
1606 
1607 static uint16_t
igc_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)1608 igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1609 {
1610 	struct igc_tx_queue * const txq = tx_queue;
1611 	struct igc_tx_entry * const sw_ring = txq->sw_ring;
1612 	struct igc_tx_entry *txe, *txn;
1613 	volatile union igc_adv_tx_desc * const txr = txq->tx_ring;
1614 	volatile union igc_adv_tx_desc *txd;
1615 	struct rte_mbuf *tx_pkt;
1616 	struct rte_mbuf *m_seg;
1617 	uint64_t buf_dma_addr;
1618 	uint32_t olinfo_status;
1619 	uint32_t cmd_type_len;
1620 	uint32_t pkt_len;
1621 	uint16_t slen;
1622 	uint64_t ol_flags;
1623 	uint16_t tx_end;
1624 	uint16_t tx_id;
1625 	uint16_t tx_last;
1626 	uint16_t nb_tx;
1627 	uint64_t tx_ol_req;
1628 	uint32_t new_ctx = 0;
1629 	union igc_tx_offload tx_offload = {0};
1630 
1631 	tx_id = txq->tx_tail;
1632 	txe = &sw_ring[tx_id];
1633 
1634 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1635 		tx_pkt = *tx_pkts++;
1636 		pkt_len = tx_pkt->pkt_len;
1637 
1638 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
1639 
1640 		/*
1641 		 * The number of descriptors that must be allocated for a
1642 		 * packet is the number of segments of that packet, plus 1
1643 		 * Context Descriptor for the VLAN Tag Identifier, if any.
1644 		 * Determine the last TX descriptor to allocate in the TX ring
1645 		 * for the packet, starting from the current position (tx_id)
1646 		 * in the ring.
1647 		 */
1648 		tx_last = (uint16_t)(tx_id + tx_pkt->nb_segs - 1);
1649 
1650 		ol_flags = tx_pkt->ol_flags;
1651 		tx_ol_req = ol_flags & IGC_TX_OFFLOAD_MASK;
1652 
1653 		/* If a Context Descriptor need be built . */
1654 		if (tx_ol_req) {
1655 			tx_offload.l2_len = tx_pkt->l2_len;
1656 			tx_offload.l3_len = tx_pkt->l3_len;
1657 			tx_offload.l4_len = tx_pkt->l4_len;
1658 			tx_offload.vlan_tci = tx_pkt->vlan_tci;
1659 			tx_offload.tso_segsz = tx_pkt->tso_segsz;
1660 			tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
1661 
1662 			new_ctx = what_advctx_update(txq, tx_ol_req,
1663 					tx_offload);
1664 			/* Only allocate context descriptor if required*/
1665 			new_ctx = (new_ctx >= IGC_CTX_NUM);
1666 			tx_last = (uint16_t)(tx_last + new_ctx);
1667 		}
1668 		if (tx_last >= txq->nb_tx_desc)
1669 			tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
1670 
1671 		PMD_TX_LOG(DEBUG,
1672 			"port_id=%u queue_id=%u pktlen=%u tx_first=%u tx_last=%u",
1673 			txq->port_id, txq->queue_id, pkt_len, tx_id, tx_last);
1674 
1675 		/*
1676 		 * Check if there are enough free descriptors in the TX ring
1677 		 * to transmit the next packet.
1678 		 * This operation is based on the two following rules:
1679 		 *
1680 		 *   1- Only check that the last needed TX descriptor can be
1681 		 *      allocated (by construction, if that descriptor is free,
1682 		 *      all intermediate ones are also free).
1683 		 *
1684 		 *      For this purpose, the index of the last TX descriptor
1685 		 *      used for a packet (the "last descriptor" of a packet)
1686 		 *      is recorded in the TX entries (the last one included)
1687 		 *      that are associated with all TX descriptors allocated
1688 		 *      for that packet.
1689 		 *
1690 		 *   2- Avoid to allocate the last free TX descriptor of the
1691 		 *      ring, in order to never set the TDT register with the
1692 		 *      same value stored in parallel by the NIC in the TDH
1693 		 *      register, which makes the TX engine of the NIC enter
1694 		 *      in a deadlock situation.
1695 		 *
1696 		 *      By extension, avoid to allocate a free descriptor that
1697 		 *      belongs to the last set of free descriptors allocated
1698 		 *      to the same packet previously transmitted.
1699 		 */
1700 
1701 		/*
1702 		 * The "last descriptor" of the previously sent packet, if any,
1703 		 * which used the last descriptor to allocate.
1704 		 */
1705 		tx_end = sw_ring[tx_last].last_id;
1706 
1707 		/*
1708 		 * The next descriptor following that "last descriptor" in the
1709 		 * ring.
1710 		 */
1711 		tx_end = sw_ring[tx_end].next_id;
1712 
1713 		/*
1714 		 * The "last descriptor" associated with that next descriptor.
1715 		 */
1716 		tx_end = sw_ring[tx_end].last_id;
1717 
1718 		/*
1719 		 * Check that this descriptor is free.
1720 		 */
1721 		if (!(txr[tx_end].wb.status & IGC_TXD_STAT_DD)) {
1722 			if (nb_tx == 0)
1723 				return 0;
1724 			goto end_of_tx;
1725 		}
1726 
1727 		/*
1728 		 * Set common flags of all TX Data Descriptors.
1729 		 *
1730 		 * The following bits must be set in all Data Descriptors:
1731 		 *   - IGC_ADVTXD_DTYP_DATA
1732 		 *   - IGC_ADVTXD_DCMD_DEXT
1733 		 *
1734 		 * The following bits must be set in the first Data Descriptor
1735 		 * and are ignored in the other ones:
1736 		 *   - IGC_ADVTXD_DCMD_IFCS
1737 		 *   - IGC_ADVTXD_MAC_1588
1738 		 *   - IGC_ADVTXD_DCMD_VLE
1739 		 *
1740 		 * The following bits must only be set in the last Data
1741 		 * Descriptor:
1742 		 *   - IGC_TXD_CMD_EOP
1743 		 *
1744 		 * The following bits can be set in any Data Descriptor, but
1745 		 * are only set in the last Data Descriptor:
1746 		 *   - IGC_TXD_CMD_RS
1747 		 */
1748 		cmd_type_len = txq->txd_type |
1749 			IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT;
1750 		if (tx_ol_req & IGC_TX_OFFLOAD_SEG)
1751 			pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len +
1752 					tx_pkt->l4_len);
1753 		olinfo_status = (pkt_len << IGC_ADVTXD_PAYLEN_SHIFT);
1754 
1755 		/*
1756 		 * Timer 0 should be used to for packet timestamping,
1757 		 * sample the packet timestamp to reg 0
1758 		 */
1759 		if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
1760 			cmd_type_len |= IGC_ADVTXD_MAC_TSTAMP;
1761 
1762 		if (tx_ol_req) {
1763 			/* Setup TX Advanced context descriptor if required */
1764 			if (new_ctx) {
1765 				volatile struct igc_adv_tx_context_desc *
1766 					ctx_txd = (volatile struct
1767 					igc_adv_tx_context_desc *)&txr[tx_id];
1768 
1769 				txn = &sw_ring[txe->next_id];
1770 				RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1771 
1772 				if (txe->mbuf != NULL) {
1773 					rte_pktmbuf_free_seg(txe->mbuf);
1774 					txe->mbuf = NULL;
1775 				}
1776 
1777 				igc_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
1778 						tx_offload);
1779 
1780 				txe->last_id = tx_last;
1781 				tx_id = txe->next_id;
1782 				txe = txn;
1783 			}
1784 
1785 			/* Setup the TX Advanced Data Descriptor */
1786 			cmd_type_len |=
1787 				tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
1788 			olinfo_status |=
1789 				tx_desc_cksum_flags_to_olinfo(tx_ol_req);
1790 			olinfo_status |= (uint32_t)txq->ctx_curr <<
1791 					IGC_ADVTXD_IDX_SHIFT;
1792 		}
1793 
1794 		m_seg = tx_pkt;
1795 		do {
1796 			txn = &sw_ring[txe->next_id];
1797 			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1798 
1799 			txd = &txr[tx_id];
1800 
1801 			if (txe->mbuf != NULL)
1802 				rte_pktmbuf_free_seg(txe->mbuf);
1803 			txe->mbuf = m_seg;
1804 
1805 			/* Set up transmit descriptor */
1806 			slen = (uint16_t)m_seg->data_len;
1807 			buf_dma_addr = rte_mbuf_data_iova(m_seg);
1808 			txd->read.buffer_addr =
1809 				rte_cpu_to_le_64(buf_dma_addr);
1810 			txd->read.cmd_type_len =
1811 				rte_cpu_to_le_32(cmd_type_len | slen);
1812 			txd->read.olinfo_status =
1813 				rte_cpu_to_le_32(olinfo_status);
1814 			txe->last_id = tx_last;
1815 			tx_id = txe->next_id;
1816 			txe = txn;
1817 			m_seg = m_seg->next;
1818 		} while (m_seg != NULL);
1819 
1820 		/*
1821 		 * The last packet data descriptor needs End Of Packet (EOP)
1822 		 * and Report Status (RS).
1823 		 */
1824 		txd->read.cmd_type_len |=
1825 			rte_cpu_to_le_32(IGC_TXD_CMD_EOP | IGC_TXD_CMD_RS);
1826 	}
1827 end_of_tx:
1828 	rte_wmb();
1829 
1830 	/*
1831 	 * Set the Transmit Descriptor Tail (TDT).
1832 	 */
1833 	IGC_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
1834 	PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
1835 		txq->port_id, txq->queue_id, tx_id, nb_tx);
1836 	txq->tx_tail = tx_id;
1837 
1838 	return nb_tx;
1839 }
1840 
eth_igc_tx_descriptor_status(void * tx_queue,uint16_t offset)1841 int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset)
1842 {
1843 	struct igc_tx_queue *txq = tx_queue;
1844 	volatile uint32_t *status;
1845 	uint32_t desc;
1846 
1847 	if (unlikely(!txq || offset >= txq->nb_tx_desc))
1848 		return -EINVAL;
1849 
1850 	desc = txq->tx_tail + offset;
1851 	if (desc >= txq->nb_tx_desc)
1852 		desc -= txq->nb_tx_desc;
1853 
1854 	status = &txq->tx_ring[desc].wb.status;
1855 	if (*status & rte_cpu_to_le_32(IGC_TXD_STAT_DD))
1856 		return RTE_ETH_TX_DESC_DONE;
1857 
1858 	return RTE_ETH_TX_DESC_FULL;
1859 }
1860 
1861 static void
igc_tx_queue_release_mbufs(struct igc_tx_queue * txq)1862 igc_tx_queue_release_mbufs(struct igc_tx_queue *txq)
1863 {
1864 	unsigned int i;
1865 
1866 	if (txq->sw_ring != NULL) {
1867 		for (i = 0; i < txq->nb_tx_desc; i++) {
1868 			if (txq->sw_ring[i].mbuf != NULL) {
1869 				rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1870 				txq->sw_ring[i].mbuf = NULL;
1871 			}
1872 		}
1873 	}
1874 }
1875 
1876 static void
igc_tx_queue_release(struct igc_tx_queue * txq)1877 igc_tx_queue_release(struct igc_tx_queue *txq)
1878 {
1879 	igc_tx_queue_release_mbufs(txq);
1880 	rte_free(txq->sw_ring);
1881 	rte_free(txq);
1882 }
1883 
eth_igc_tx_queue_release(struct rte_eth_dev * dev,uint16_t qid)1884 void eth_igc_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1885 {
1886 	if (dev->data->tx_queues[qid])
1887 		igc_tx_queue_release(dev->data->tx_queues[qid]);
1888 }
1889 
1890 static void
igc_reset_tx_queue_stat(struct igc_tx_queue * txq)1891 igc_reset_tx_queue_stat(struct igc_tx_queue *txq)
1892 {
1893 	txq->tx_head = 0;
1894 	txq->tx_tail = 0;
1895 	txq->ctx_curr = 0;
1896 	memset((void *)&txq->ctx_cache, 0,
1897 		IGC_CTX_NUM * sizeof(struct igc_advctx_info));
1898 }
1899 
1900 static void
igc_reset_tx_queue(struct igc_tx_queue * txq)1901 igc_reset_tx_queue(struct igc_tx_queue *txq)
1902 {
1903 	struct igc_tx_entry *txe = txq->sw_ring;
1904 	uint16_t i, prev;
1905 
1906 	/* Initialize ring entries */
1907 	prev = (uint16_t)(txq->nb_tx_desc - 1);
1908 	for (i = 0; i < txq->nb_tx_desc; i++) {
1909 		volatile union igc_adv_tx_desc *txd = &txq->tx_ring[i];
1910 
1911 		txd->wb.status = IGC_TXD_STAT_DD;
1912 		txe[i].mbuf = NULL;
1913 		txe[i].last_id = i;
1914 		txe[prev].next_id = i;
1915 		prev = i;
1916 	}
1917 
1918 	txq->txd_type = IGC_ADVTXD_DTYP_DATA;
1919 	igc_reset_tx_queue_stat(txq);
1920 }
1921 
1922 /*
1923  * clear all rx/tx queue
1924  */
1925 void
igc_dev_clear_queues(struct rte_eth_dev * dev)1926 igc_dev_clear_queues(struct rte_eth_dev *dev)
1927 {
1928 	uint16_t i;
1929 	struct igc_tx_queue *txq;
1930 	struct igc_rx_queue *rxq;
1931 
1932 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1933 		txq = dev->data->tx_queues[i];
1934 		if (txq != NULL) {
1935 			igc_tx_queue_release_mbufs(txq);
1936 			igc_reset_tx_queue(txq);
1937 		}
1938 	}
1939 
1940 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1941 		rxq = dev->data->rx_queues[i];
1942 		if (rxq != NULL) {
1943 			igc_rx_queue_release_mbufs(rxq);
1944 			igc_reset_rx_queue(rxq);
1945 		}
1946 	}
1947 }
1948 
eth_igc_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf)1949 int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
1950 		uint16_t nb_desc, unsigned int socket_id,
1951 		const struct rte_eth_txconf *tx_conf)
1952 {
1953 	const struct rte_memzone *tz;
1954 	struct igc_tx_queue *txq;
1955 	struct igc_hw *hw;
1956 	uint32_t size;
1957 
1958 	if (nb_desc % IGC_TX_DESCRIPTOR_MULTIPLE != 0 ||
1959 		nb_desc > IGC_MAX_TXD || nb_desc < IGC_MIN_TXD) {
1960 		PMD_DRV_LOG(ERR,
1961 			"TX-descriptor must be a multiple of %u and between %u and %u, cur: %u",
1962 			IGC_TX_DESCRIPTOR_MULTIPLE,
1963 			IGC_MAX_TXD, IGC_MIN_TXD, nb_desc);
1964 		return -EINVAL;
1965 	}
1966 
1967 	hw = IGC_DEV_PRIVATE_HW(dev);
1968 
1969 	/*
1970 	 * The tx_free_thresh and tx_rs_thresh values are not used in the 2.5G
1971 	 * driver.
1972 	 */
1973 	if (tx_conf->tx_free_thresh != 0)
1974 		PMD_DRV_LOG(INFO,
1975 			"The tx_free_thresh parameter is not used for the 2.5G driver");
1976 	if (tx_conf->tx_rs_thresh != 0)
1977 		PMD_DRV_LOG(INFO,
1978 			"The tx_rs_thresh parameter is not used for the 2.5G driver");
1979 	if (tx_conf->tx_thresh.wthresh == 0)
1980 		PMD_DRV_LOG(INFO,
1981 			"To improve 2.5G driver performance, consider setting the TX WTHRESH value to 4, 8, or 16.");
1982 
1983 	/* Free memory prior to re-allocation if needed */
1984 	if (dev->data->tx_queues[queue_idx] != NULL) {
1985 		igc_tx_queue_release(dev->data->tx_queues[queue_idx]);
1986 		dev->data->tx_queues[queue_idx] = NULL;
1987 	}
1988 
1989 	/* First allocate the tx queue data structure */
1990 	txq = rte_zmalloc("ethdev TX queue", sizeof(struct igc_tx_queue),
1991 						RTE_CACHE_LINE_SIZE);
1992 	if (txq == NULL)
1993 		return -ENOMEM;
1994 
1995 	/*
1996 	 * Allocate TX ring hardware descriptors. A memzone large enough to
1997 	 * handle the maximum ring size is allocated in order to allow for
1998 	 * resizing in later calls to the queue setup function.
1999 	 */
2000 	size = sizeof(union igc_adv_tx_desc) * IGC_MAX_TXD;
2001 	tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
2002 				      IGC_ALIGN, socket_id);
2003 	if (tz == NULL) {
2004 		igc_tx_queue_release(txq);
2005 		return -ENOMEM;
2006 	}
2007 
2008 	txq->nb_tx_desc = nb_desc;
2009 	txq->pthresh = tx_conf->tx_thresh.pthresh;
2010 	txq->hthresh = tx_conf->tx_thresh.hthresh;
2011 	txq->wthresh = tx_conf->tx_thresh.wthresh;
2012 
2013 	txq->queue_id = queue_idx;
2014 	txq->reg_idx = queue_idx;
2015 	txq->port_id = dev->data->port_id;
2016 
2017 	txq->tdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_TDT(txq->reg_idx));
2018 	txq->tx_ring_phys_addr = tz->iova;
2019 
2020 	txq->tx_ring = (union igc_adv_tx_desc *)tz->addr;
2021 	/* Allocate software ring */
2022 	txq->sw_ring = rte_zmalloc("txq->sw_ring",
2023 				   sizeof(struct igc_tx_entry) * nb_desc,
2024 				   RTE_CACHE_LINE_SIZE);
2025 	if (txq->sw_ring == NULL) {
2026 		igc_tx_queue_release(txq);
2027 		return -ENOMEM;
2028 	}
2029 	PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2030 		txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2031 
2032 	igc_reset_tx_queue(txq);
2033 	dev->tx_pkt_burst = igc_xmit_pkts;
2034 	dev->tx_pkt_prepare = &eth_igc_prep_pkts;
2035 	dev->data->tx_queues[queue_idx] = txq;
2036 	txq->offloads = tx_conf->offloads;
2037 
2038 	return 0;
2039 }
2040 
2041 int
eth_igc_tx_done_cleanup(void * txqueue,uint32_t free_cnt)2042 eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt)
2043 {
2044 	struct igc_tx_queue *txq = txqueue;
2045 	struct igc_tx_entry *sw_ring;
2046 	volatile union igc_adv_tx_desc *txr;
2047 	uint16_t tx_first; /* First segment analyzed. */
2048 	uint16_t tx_id;    /* Current segment being processed. */
2049 	uint16_t tx_last;  /* Last segment in the current packet. */
2050 	uint16_t tx_next;  /* First segment of the next packet. */
2051 	uint32_t count;
2052 
2053 	if (txq == NULL)
2054 		return -ENODEV;
2055 
2056 	count = 0;
2057 	sw_ring = txq->sw_ring;
2058 	txr = txq->tx_ring;
2059 
2060 	/*
2061 	 * tx_tail is the last sent packet on the sw_ring. Goto the end
2062 	 * of that packet (the last segment in the packet chain) and
2063 	 * then the next segment will be the start of the oldest segment
2064 	 * in the sw_ring. This is the first packet that will be
2065 	 * attempted to be freed.
2066 	 */
2067 
2068 	/* Get last segment in most recently added packet. */
2069 	tx_first = sw_ring[txq->tx_tail].last_id;
2070 
2071 	/* Get the next segment, which is the oldest segment in ring. */
2072 	tx_first = sw_ring[tx_first].next_id;
2073 
2074 	/* Set the current index to the first. */
2075 	tx_id = tx_first;
2076 
2077 	/*
2078 	 * Loop through each packet. For each packet, verify that an
2079 	 * mbuf exists and that the last segment is free. If so, free
2080 	 * it and move on.
2081 	 */
2082 	while (1) {
2083 		tx_last = sw_ring[tx_id].last_id;
2084 
2085 		if (sw_ring[tx_last].mbuf) {
2086 			if (!(txr[tx_last].wb.status &
2087 					rte_cpu_to_le_32(IGC_TXD_STAT_DD)))
2088 				break;
2089 
2090 			/* Get the start of the next packet. */
2091 			tx_next = sw_ring[tx_last].next_id;
2092 
2093 			/*
2094 			 * Loop through all segments in a
2095 			 * packet.
2096 			 */
2097 			do {
2098 				rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
2099 				sw_ring[tx_id].mbuf = NULL;
2100 				sw_ring[tx_id].last_id = tx_id;
2101 
2102 				/* Move to next segment. */
2103 				tx_id = sw_ring[tx_id].next_id;
2104 			} while (tx_id != tx_next);
2105 
2106 			/*
2107 			 * Increment the number of packets
2108 			 * freed.
2109 			 */
2110 			count++;
2111 			if (unlikely(count == free_cnt))
2112 				break;
2113 		} else {
2114 			/*
2115 			 * There are multiple reasons to be here:
2116 			 * 1) All the packets on the ring have been
2117 			 *    freed - tx_id is equal to tx_first
2118 			 *    and some packets have been freed.
2119 			 *    - Done, exit
2120 			 * 2) Interfaces has not sent a rings worth of
2121 			 *    packets yet, so the segment after tail is
2122 			 *    still empty. Or a previous call to this
2123 			 *    function freed some of the segments but
2124 			 *    not all so there is a hole in the list.
2125 			 *    Hopefully this is a rare case.
2126 			 *    - Walk the list and find the next mbuf. If
2127 			 *      there isn't one, then done.
2128 			 */
2129 			if (likely(tx_id == tx_first && count != 0))
2130 				break;
2131 
2132 			/*
2133 			 * Walk the list and find the next mbuf, if any.
2134 			 */
2135 			do {
2136 				/* Move to next segment. */
2137 				tx_id = sw_ring[tx_id].next_id;
2138 
2139 				if (sw_ring[tx_id].mbuf)
2140 					break;
2141 
2142 			} while (tx_id != tx_first);
2143 
2144 			/*
2145 			 * Determine why previous loop bailed. If there
2146 			 * is not an mbuf, done.
2147 			 */
2148 			if (sw_ring[tx_id].mbuf == NULL)
2149 				break;
2150 		}
2151 	}
2152 
2153 	return count;
2154 }
2155 
2156 void
igc_tx_init(struct rte_eth_dev * dev)2157 igc_tx_init(struct rte_eth_dev *dev)
2158 {
2159 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2160 	uint32_t tctl;
2161 	uint32_t txdctl;
2162 	uint16_t i;
2163 
2164 	/* Setup the Base and Length of the Tx Descriptor Rings. */
2165 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2166 		struct igc_tx_queue *txq = dev->data->tx_queues[i];
2167 		uint64_t bus_addr = txq->tx_ring_phys_addr;
2168 
2169 		IGC_WRITE_REG(hw, IGC_TDLEN(txq->reg_idx),
2170 				txq->nb_tx_desc *
2171 				sizeof(union igc_adv_tx_desc));
2172 		IGC_WRITE_REG(hw, IGC_TDBAH(txq->reg_idx),
2173 				(uint32_t)(bus_addr >> 32));
2174 		IGC_WRITE_REG(hw, IGC_TDBAL(txq->reg_idx),
2175 				(uint32_t)bus_addr);
2176 
2177 		/* Setup the HW Tx Head and Tail descriptor pointers. */
2178 		IGC_WRITE_REG(hw, IGC_TDT(txq->reg_idx), 0);
2179 		IGC_WRITE_REG(hw, IGC_TDH(txq->reg_idx), 0);
2180 
2181 		/* Setup Transmit threshold registers. */
2182 		txdctl = ((uint32_t)txq->pthresh << IGC_TXDCTL_PTHRESH_SHIFT) &
2183 				IGC_TXDCTL_PTHRESH_MSK;
2184 		txdctl |= ((uint32_t)txq->hthresh << IGC_TXDCTL_HTHRESH_SHIFT) &
2185 				IGC_TXDCTL_HTHRESH_MSK;
2186 		txdctl |= ((uint32_t)txq->wthresh << IGC_TXDCTL_WTHRESH_SHIFT) &
2187 				IGC_TXDCTL_WTHRESH_MSK;
2188 		txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
2189 		IGC_WRITE_REG(hw, IGC_TXDCTL(txq->reg_idx), txdctl);
2190 	}
2191 
2192 	igc_config_collision_dist(hw);
2193 
2194 	/* Program the Transmit Control Register. */
2195 	tctl = IGC_READ_REG(hw, IGC_TCTL);
2196 	tctl &= ~IGC_TCTL_CT;
2197 	tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
2198 		 ((uint32_t)IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT));
2199 
2200 	/* This write will effectively turn on the transmit unit. */
2201 	IGC_WRITE_REG(hw, IGC_TCTL, tctl);
2202 }
2203 
2204 void
eth_igc_rxq_info_get(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_rxq_info * qinfo)2205 eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2206 	struct rte_eth_rxq_info *qinfo)
2207 {
2208 	struct igc_rx_queue *rxq;
2209 
2210 	rxq = dev->data->rx_queues[queue_id];
2211 
2212 	qinfo->mp = rxq->mb_pool;
2213 	qinfo->scattered_rx = dev->data->scattered_rx;
2214 	qinfo->nb_desc = rxq->nb_rx_desc;
2215 
2216 	qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2217 	qinfo->conf.rx_drop_en = rxq->drop_en;
2218 	qinfo->conf.offloads = rxq->offloads;
2219 	qinfo->conf.rx_thresh.hthresh = rxq->hthresh;
2220 	qinfo->conf.rx_thresh.pthresh = rxq->pthresh;
2221 	qinfo->conf.rx_thresh.wthresh = rxq->wthresh;
2222 }
2223 
2224 void
eth_igc_txq_info_get(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_txq_info * qinfo)2225 eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2226 	struct rte_eth_txq_info *qinfo)
2227 {
2228 	struct igc_tx_queue *txq;
2229 
2230 	txq = dev->data->tx_queues[queue_id];
2231 
2232 	qinfo->nb_desc = txq->nb_tx_desc;
2233 
2234 	qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2235 	qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2236 	qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2237 	qinfo->conf.offloads = txq->offloads;
2238 }
2239 
2240 void
eth_igc_vlan_strip_queue_set(struct rte_eth_dev * dev,uint16_t rx_queue_id,int on)2241 eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev,
2242 			uint16_t rx_queue_id, int on)
2243 {
2244 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2245 	struct igc_rx_queue *rxq = dev->data->rx_queues[rx_queue_id];
2246 	uint32_t reg_val;
2247 
2248 	if (rx_queue_id >= IGC_QUEUE_PAIRS_NUM) {
2249 		PMD_DRV_LOG(ERR, "Queue index(%u) illegal, max is %u",
2250 			rx_queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2251 		return;
2252 	}
2253 
2254 	reg_val = IGC_READ_REG(hw, IGC_DVMOLR(rx_queue_id));
2255 	if (on) {
2256 		reg_val |= IGC_DVMOLR_STRVLAN;
2257 		rxq->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
2258 	} else {
2259 		reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN);
2260 		rxq->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
2261 	}
2262 
2263 	IGC_WRITE_REG(hw, IGC_DVMOLR(rx_queue_id), reg_val);
2264 }
2265