1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
3 */
4
5 #include <rte_config.h>
6 #include <rte_malloc.h>
7 #include <rte_ethdev_driver.h>
8 #include <rte_net.h>
9
10 #include "igc_logs.h"
11 #include "igc_txrx.h"
12
13 #ifdef RTE_PMD_USE_PREFETCH
14 #define rte_igc_prefetch(p) rte_prefetch0(p)
15 #else
16 #define rte_igc_prefetch(p) do {} while (0)
17 #endif
18
19 #ifdef RTE_PMD_PACKET_PREFETCH
20 #define rte_packet_prefetch(p) rte_prefetch1(p)
21 #else
22 #define rte_packet_prefetch(p) do {} while (0)
23 #endif
24
25 /* Multicast / Unicast table offset mask. */
26 #define IGC_RCTL_MO_MSK (3u << IGC_RCTL_MO_SHIFT)
27
28 /* Loopback mode. */
29 #define IGC_RCTL_LBM_SHIFT 6
30 #define IGC_RCTL_LBM_MSK (3u << IGC_RCTL_LBM_SHIFT)
31
32 /* Hash select for MTA */
33 #define IGC_RCTL_HSEL_SHIFT 8
34 #define IGC_RCTL_HSEL_MSK (3u << IGC_RCTL_HSEL_SHIFT)
35 #define IGC_RCTL_PSP (1u << 21)
36
37 /* Receive buffer size for header buffer */
38 #define IGC_SRRCTL_BSIZEHEADER_SHIFT 8
39
40 /* RX descriptor status and error flags */
41 #define IGC_RXD_STAT_L4CS (1u << 5)
42 #define IGC_RXD_STAT_VEXT (1u << 9)
43 #define IGC_RXD_STAT_LLINT (1u << 11)
44 #define IGC_RXD_STAT_SCRC (1u << 12)
45 #define IGC_RXD_STAT_SMDT_MASK (3u << 13)
46 #define IGC_RXD_STAT_MC (1u << 19)
47 #define IGC_RXD_EXT_ERR_L4E (1u << 29)
48 #define IGC_RXD_EXT_ERR_IPE (1u << 30)
49 #define IGC_RXD_EXT_ERR_RXE (1u << 31)
50 #define IGC_RXD_RSS_TYPE_MASK 0xfu
51 #define IGC_RXD_PCTYPE_MASK (0x7fu << 4)
52 #define IGC_RXD_ETQF_SHIFT 12
53 #define IGC_RXD_ETQF_MSK (0xfu << IGC_RXD_ETQF_SHIFT)
54 #define IGC_RXD_VPKT (1u << 16)
55
56 /* TXD control bits */
57 #define IGC_TXDCTL_PTHRESH_SHIFT 0
58 #define IGC_TXDCTL_HTHRESH_SHIFT 8
59 #define IGC_TXDCTL_WTHRESH_SHIFT 16
60 #define IGC_TXDCTL_PTHRESH_MSK (0x1fu << IGC_TXDCTL_PTHRESH_SHIFT)
61 #define IGC_TXDCTL_HTHRESH_MSK (0x1fu << IGC_TXDCTL_HTHRESH_SHIFT)
62 #define IGC_TXDCTL_WTHRESH_MSK (0x1fu << IGC_TXDCTL_WTHRESH_SHIFT)
63
64 /* RXD control bits */
65 #define IGC_RXDCTL_PTHRESH_SHIFT 0
66 #define IGC_RXDCTL_HTHRESH_SHIFT 8
67 #define IGC_RXDCTL_WTHRESH_SHIFT 16
68 #define IGC_RXDCTL_PTHRESH_MSK (0x1fu << IGC_RXDCTL_PTHRESH_SHIFT)
69 #define IGC_RXDCTL_HTHRESH_MSK (0x1fu << IGC_RXDCTL_HTHRESH_SHIFT)
70 #define IGC_RXDCTL_WTHRESH_MSK (0x1fu << IGC_RXDCTL_WTHRESH_SHIFT)
71
72 #define IGC_TSO_MAX_HDRLEN 512
73 #define IGC_TSO_MAX_MSS 9216
74
75 /* Bit Mask to indicate what bits required for building TX context */
76 #define IGC_TX_OFFLOAD_MASK ( \
77 PKT_TX_OUTER_IPV4 | \
78 PKT_TX_IPV6 | \
79 PKT_TX_IPV4 | \
80 PKT_TX_VLAN_PKT | \
81 PKT_TX_IP_CKSUM | \
82 PKT_TX_L4_MASK | \
83 PKT_TX_TCP_SEG | \
84 PKT_TX_UDP_SEG)
85
86 #define IGC_TX_OFFLOAD_SEG (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)
87
88 #define IGC_ADVTXD_POPTS_TXSM 0x00000200 /* L4 Checksum offload request */
89 #define IGC_ADVTXD_POPTS_IXSM 0x00000100 /* IP Checksum offload request */
90
91 /* L4 Packet TYPE of Reserved */
92 #define IGC_ADVTXD_TUCMD_L4T_RSV 0x00001800
93
94 #define IGC_TX_OFFLOAD_NOTSUP_MASK (PKT_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK)
95
96 /**
97 * Structure associated with each descriptor of the RX ring of a RX queue.
98 */
99 struct igc_rx_entry {
100 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
101 };
102
103 /**
104 * Structure associated with each RX queue.
105 */
106 struct igc_rx_queue {
107 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
108 volatile union igc_adv_rx_desc *rx_ring;
109 /**< RX ring virtual address. */
110 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
111 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
112 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
113 struct igc_rx_entry *sw_ring; /**< address of RX software ring. */
114 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
115 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
116 uint16_t nb_rx_desc; /**< number of RX descriptors. */
117 uint16_t rx_tail; /**< current value of RDT register. */
118 uint16_t nb_rx_hold; /**< number of held free RX desc. */
119 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
120 uint16_t queue_id; /**< RX queue index. */
121 uint16_t reg_idx; /**< RX queue register index. */
122 uint16_t port_id; /**< Device port identifier. */
123 uint8_t pthresh; /**< Prefetch threshold register. */
124 uint8_t hthresh; /**< Host threshold register. */
125 uint8_t wthresh; /**< Write-back threshold register. */
126 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
127 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
128 uint32_t flags; /**< RX flags. */
129 uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */
130 };
131
132 /** Offload features */
133 union igc_tx_offload {
134 uint64_t data;
135 struct {
136 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
137 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
138 uint64_t vlan_tci:16;
139 /**< VLAN Tag Control Identifier(CPU order). */
140 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
141 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
142 /* uint64_t unused:8; */
143 };
144 };
145
146 /*
147 * Compare mask for igc_tx_offload.data,
148 * should be in sync with igc_tx_offload layout.
149 */
150 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
151 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
152 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
153 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
154 /** Mac + IP + TCP + Mss mask. */
155 #define TX_TSO_CMP_MASK \
156 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
157
158 /**
159 * Structure to check if new context need be built
160 */
161 struct igc_advctx_info {
162 uint64_t flags; /**< ol_flags related to context build. */
163 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
164 union igc_tx_offload tx_offload;
165 /** compare mask for tx offload. */
166 union igc_tx_offload tx_offload_mask;
167 };
168
169 /**
170 * Hardware context number
171 */
172 enum {
173 IGC_CTX_0 = 0, /**< CTX0 */
174 IGC_CTX_1 = 1, /**< CTX1 */
175 IGC_CTX_NUM = 2, /**< CTX_NUM */
176 };
177
178 /**
179 * Structure associated with each descriptor of the TX ring of a TX queue.
180 */
181 struct igc_tx_entry {
182 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
183 uint16_t next_id; /**< Index of next descriptor in ring. */
184 uint16_t last_id; /**< Index of last scattered descriptor. */
185 };
186
187 /**
188 * Structure associated with each TX queue.
189 */
190 struct igc_tx_queue {
191 volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */
192 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
193 struct igc_tx_entry *sw_ring; /**< virtual address of SW ring. */
194 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
195 uint32_t txd_type; /**< Device-specific TXD type */
196 uint16_t nb_tx_desc; /**< number of TX descriptors. */
197 uint16_t tx_tail; /**< Current value of TDT register. */
198 uint16_t tx_head;
199 /**< Index of first used TX descriptor. */
200 uint16_t queue_id; /**< TX queue index. */
201 uint16_t reg_idx; /**< TX queue register index. */
202 uint16_t port_id; /**< Device port identifier. */
203 uint8_t pthresh; /**< Prefetch threshold register. */
204 uint8_t hthresh; /**< Host threshold register. */
205 uint8_t wthresh; /**< Write-back threshold register. */
206 uint8_t ctx_curr;
207
208 /**< Start context position for transmit queue. */
209 struct igc_advctx_info ctx_cache[IGC_CTX_NUM];
210 /**< Hardware context history.*/
211 uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */
212 };
213
214 static inline uint64_t
rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)215 rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)
216 {
217 static uint64_t l4_chksum_flags[] = {0, 0, PKT_RX_L4_CKSUM_GOOD,
218 PKT_RX_L4_CKSUM_BAD};
219
220 static uint64_t l3_chksum_flags[] = {0, 0, PKT_RX_IP_CKSUM_GOOD,
221 PKT_RX_IP_CKSUM_BAD};
222 uint64_t pkt_flags = 0;
223 uint32_t tmp;
224
225 if (statuserr & IGC_RXD_STAT_VP)
226 pkt_flags |= PKT_RX_VLAN_STRIPPED;
227
228 tmp = !!(statuserr & (IGC_RXD_STAT_L4CS | IGC_RXD_STAT_UDPCS));
229 tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_L4E);
230 pkt_flags |= l4_chksum_flags[tmp];
231
232 tmp = !!(statuserr & IGC_RXD_STAT_IPCS);
233 tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_IPE);
234 pkt_flags |= l3_chksum_flags[tmp];
235
236 return pkt_flags;
237 }
238
239 #define IGC_PACKET_TYPE_IPV4 0X01
240 #define IGC_PACKET_TYPE_IPV4_TCP 0X11
241 #define IGC_PACKET_TYPE_IPV4_UDP 0X21
242 #define IGC_PACKET_TYPE_IPV4_SCTP 0X41
243 #define IGC_PACKET_TYPE_IPV4_EXT 0X03
244 #define IGC_PACKET_TYPE_IPV4_EXT_SCTP 0X43
245 #define IGC_PACKET_TYPE_IPV6 0X04
246 #define IGC_PACKET_TYPE_IPV6_TCP 0X14
247 #define IGC_PACKET_TYPE_IPV6_UDP 0X24
248 #define IGC_PACKET_TYPE_IPV6_EXT 0X0C
249 #define IGC_PACKET_TYPE_IPV6_EXT_TCP 0X1C
250 #define IGC_PACKET_TYPE_IPV6_EXT_UDP 0X2C
251 #define IGC_PACKET_TYPE_IPV4_IPV6 0X05
252 #define IGC_PACKET_TYPE_IPV4_IPV6_TCP 0X15
253 #define IGC_PACKET_TYPE_IPV4_IPV6_UDP 0X25
254 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
255 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
256 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
257 #define IGC_PACKET_TYPE_MAX 0X80
258 #define IGC_PACKET_TYPE_MASK 0X7F
259 #define IGC_PACKET_TYPE_SHIFT 0X04
260
261 static inline uint32_t
rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)262 rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)
263 {
264 static const uint32_t
265 ptype_table[IGC_PACKET_TYPE_MAX] __rte_cache_aligned = {
266 [IGC_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
267 RTE_PTYPE_L3_IPV4,
268 [IGC_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
269 RTE_PTYPE_L3_IPV4_EXT,
270 [IGC_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
271 RTE_PTYPE_L3_IPV6,
272 [IGC_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
273 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
274 RTE_PTYPE_INNER_L3_IPV6,
275 [IGC_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
276 RTE_PTYPE_L3_IPV6_EXT,
277 [IGC_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
278 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
279 RTE_PTYPE_INNER_L3_IPV6_EXT,
280 [IGC_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
281 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
282 [IGC_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
283 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
284 [IGC_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
285 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
286 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
287 [IGC_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
288 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
289 [IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
290 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
291 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
292 [IGC_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
293 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
294 [IGC_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
295 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
296 [IGC_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
297 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
298 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
299 [IGC_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
300 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
301 [IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
302 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
303 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
304 [IGC_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
305 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
306 [IGC_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
307 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
308 };
309 if (unlikely(pkt_info & IGC_RXDADV_PKTTYPE_ETQF))
310 return RTE_PTYPE_UNKNOWN;
311
312 pkt_info = (pkt_info >> IGC_PACKET_TYPE_SHIFT) & IGC_PACKET_TYPE_MASK;
313
314 return ptype_table[pkt_info];
315 }
316
317 static inline void
rx_desc_get_pkt_info(struct igc_rx_queue * rxq,struct rte_mbuf * rxm,union igc_adv_rx_desc * rxd,uint32_t staterr)318 rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm,
319 union igc_adv_rx_desc *rxd, uint32_t staterr)
320 {
321 uint64_t pkt_flags;
322 uint32_t hlen_type_rss;
323 uint16_t pkt_info;
324
325 /* Prefetch data of first segment, if configured to do so. */
326 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
327
328 rxm->port = rxq->port_id;
329 hlen_type_rss = rte_le_to_cpu_32(rxd->wb.lower.lo_dword.data);
330 rxm->hash.rss = rte_le_to_cpu_32(rxd->wb.lower.hi_dword.rss);
331 rxm->vlan_tci = rte_le_to_cpu_16(rxd->wb.upper.vlan);
332
333 pkt_flags = (hlen_type_rss & IGC_RXD_RSS_TYPE_MASK) ?
334 PKT_RX_RSS_HASH : 0;
335
336 if (hlen_type_rss & IGC_RXD_VPKT)
337 pkt_flags |= PKT_RX_VLAN;
338
339 pkt_flags |= rx_desc_statuserr_to_pkt_flags(staterr);
340
341 rxm->ol_flags = pkt_flags;
342 pkt_info = rte_le_to_cpu_16(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
343 rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info);
344 }
345
346 static uint16_t
igc_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)347 igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
348 {
349 struct igc_rx_queue * const rxq = rx_queue;
350 volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
351 struct igc_rx_entry * const sw_ring = rxq->sw_ring;
352 uint16_t rx_id = rxq->rx_tail;
353 uint16_t nb_rx = 0;
354 uint16_t nb_hold = 0;
355
356 while (nb_rx < nb_pkts) {
357 volatile union igc_adv_rx_desc *rxdp;
358 struct igc_rx_entry *rxe;
359 struct rte_mbuf *rxm;
360 struct rte_mbuf *nmb;
361 union igc_adv_rx_desc rxd;
362 uint32_t staterr;
363 uint16_t data_len;
364
365 /*
366 * The order of operations here is important as the DD status
367 * bit must not be read after any other descriptor fields.
368 * rx_ring and rxdp are pointing to volatile data so the order
369 * of accesses cannot be reordered by the compiler. If they were
370 * not volatile, they could be reordered which could lead to
371 * using invalid descriptor fields when read from rxd.
372 */
373 rxdp = &rx_ring[rx_id];
374 staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
375 if (!(staterr & IGC_RXD_STAT_DD))
376 break;
377 rxd = *rxdp;
378
379 /*
380 * End of packet.
381 *
382 * If the IGC_RXD_STAT_EOP flag is not set, the RX packet is
383 * likely to be invalid and to be dropped by the various
384 * validation checks performed by the network stack.
385 *
386 * Allocate a new mbuf to replenish the RX ring descriptor.
387 * If the allocation fails:
388 * - arrange for that RX descriptor to be the first one
389 * being parsed the next time the receive function is
390 * invoked [on the same queue].
391 *
392 * - Stop parsing the RX ring and return immediately.
393 *
394 * This policy does not drop the packet received in the RX
395 * descriptor for which the allocation of a new mbuf failed.
396 * Thus, it allows that packet to be later retrieved if
397 * mbuf have been freed in the mean time.
398 * As a side effect, holding RX descriptors instead of
399 * systematically giving them back to the NIC may lead to
400 * RX ring exhaustion situations.
401 * However, the NIC can gracefully prevent such situations
402 * to happen by sending specific "back-pressure" flow control
403 * frames to its peer(s).
404 */
405 PMD_RX_LOG(DEBUG,
406 "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
407 rxq->port_id, rxq->queue_id, rx_id, staterr,
408 rte_le_to_cpu_16(rxd.wb.upper.length));
409
410 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
411 if (nmb == NULL) {
412 unsigned int id;
413 PMD_RX_LOG(DEBUG,
414 "RX mbuf alloc failed, port_id=%u queue_id=%u",
415 rxq->port_id, rxq->queue_id);
416 id = rxq->port_id;
417 rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
418 break;
419 }
420
421 nb_hold++;
422 rxe = &sw_ring[rx_id];
423 rx_id++;
424 if (rx_id >= rxq->nb_rx_desc)
425 rx_id = 0;
426
427 /* Prefetch next mbuf while processing current one. */
428 rte_igc_prefetch(sw_ring[rx_id].mbuf);
429
430 /*
431 * When next RX descriptor is on a cache-line boundary,
432 * prefetch the next 4 RX descriptors and the next 8 pointers
433 * to mbufs.
434 */
435 if ((rx_id & 0x3) == 0) {
436 rte_igc_prefetch(&rx_ring[rx_id]);
437 rte_igc_prefetch(&sw_ring[rx_id]);
438 }
439
440 /*
441 * Update RX descriptor with the physical address of the new
442 * data buffer of the new allocated mbuf.
443 */
444 rxm = rxe->mbuf;
445 rxe->mbuf = nmb;
446 rxdp->read.hdr_addr = 0;
447 rxdp->read.pkt_addr =
448 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
449 rxm->next = NULL;
450
451 rxm->data_off = RTE_PKTMBUF_HEADROOM;
452 data_len = rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len;
453 rxm->data_len = data_len;
454 rxm->pkt_len = data_len;
455 rxm->nb_segs = 1;
456
457 rx_desc_get_pkt_info(rxq, rxm, &rxd, staterr);
458
459 /*
460 * Store the mbuf address into the next entry of the array
461 * of returned packets.
462 */
463 rx_pkts[nb_rx++] = rxm;
464 }
465 rxq->rx_tail = rx_id;
466
467 /*
468 * If the number of free RX descriptors is greater than the RX free
469 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
470 * register.
471 * Update the RDT with the value of the last processed RX descriptor
472 * minus 1, to guarantee that the RDT register is never equal to the
473 * RDH register, which creates a "full" ring situation from the
474 * hardware point of view...
475 */
476 nb_hold = nb_hold + rxq->nb_rx_hold;
477 if (nb_hold > rxq->rx_free_thresh) {
478 PMD_RX_LOG(DEBUG,
479 "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
480 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
481 rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
482 IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
483 nb_hold = 0;
484 }
485 rxq->nb_rx_hold = nb_hold;
486 return nb_rx;
487 }
488
489 static uint16_t
igc_recv_scattered_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)490 igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
491 uint16_t nb_pkts)
492 {
493 struct igc_rx_queue * const rxq = rx_queue;
494 volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
495 struct igc_rx_entry * const sw_ring = rxq->sw_ring;
496 struct rte_mbuf *first_seg = rxq->pkt_first_seg;
497 struct rte_mbuf *last_seg = rxq->pkt_last_seg;
498
499 uint16_t rx_id = rxq->rx_tail;
500 uint16_t nb_rx = 0;
501 uint16_t nb_hold = 0;
502
503 while (nb_rx < nb_pkts) {
504 volatile union igc_adv_rx_desc *rxdp;
505 struct igc_rx_entry *rxe;
506 struct rte_mbuf *rxm;
507 struct rte_mbuf *nmb;
508 union igc_adv_rx_desc rxd;
509 uint32_t staterr;
510 uint16_t data_len;
511
512 next_desc:
513 /*
514 * The order of operations here is important as the DD status
515 * bit must not be read after any other descriptor fields.
516 * rx_ring and rxdp are pointing to volatile data so the order
517 * of accesses cannot be reordered by the compiler. If they were
518 * not volatile, they could be reordered which could lead to
519 * using invalid descriptor fields when read from rxd.
520 */
521 rxdp = &rx_ring[rx_id];
522 staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
523 if (!(staterr & IGC_RXD_STAT_DD))
524 break;
525 rxd = *rxdp;
526
527 /*
528 * Descriptor done.
529 *
530 * Allocate a new mbuf to replenish the RX ring descriptor.
531 * If the allocation fails:
532 * - arrange for that RX descriptor to be the first one
533 * being parsed the next time the receive function is
534 * invoked [on the same queue].
535 *
536 * - Stop parsing the RX ring and return immediately.
537 *
538 * This policy does not drop the packet received in the RX
539 * descriptor for which the allocation of a new mbuf failed.
540 * Thus, it allows that packet to be later retrieved if
541 * mbuf have been freed in the mean time.
542 * As a side effect, holding RX descriptors instead of
543 * systematically giving them back to the NIC may lead to
544 * RX ring exhaustion situations.
545 * However, the NIC can gracefully prevent such situations
546 * to happen by sending specific "back-pressure" flow control
547 * frames to its peer(s).
548 */
549 PMD_RX_LOG(DEBUG,
550 "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
551 rxq->port_id, rxq->queue_id, rx_id, staterr,
552 rte_le_to_cpu_16(rxd.wb.upper.length));
553
554 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
555 if (nmb == NULL) {
556 unsigned int id;
557 PMD_RX_LOG(DEBUG,
558 "RX mbuf alloc failed, port_id=%u queue_id=%u",
559 rxq->port_id, rxq->queue_id);
560 id = rxq->port_id;
561 rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
562 break;
563 }
564
565 nb_hold++;
566 rxe = &sw_ring[rx_id];
567 rx_id++;
568 if (rx_id >= rxq->nb_rx_desc)
569 rx_id = 0;
570
571 /* Prefetch next mbuf while processing current one. */
572 rte_igc_prefetch(sw_ring[rx_id].mbuf);
573
574 /*
575 * When next RX descriptor is on a cache-line boundary,
576 * prefetch the next 4 RX descriptors and the next 8 pointers
577 * to mbufs.
578 */
579 if ((rx_id & 0x3) == 0) {
580 rte_igc_prefetch(&rx_ring[rx_id]);
581 rte_igc_prefetch(&sw_ring[rx_id]);
582 }
583
584 /*
585 * Update RX descriptor with the physical address of the new
586 * data buffer of the new allocated mbuf.
587 */
588 rxm = rxe->mbuf;
589 rxe->mbuf = nmb;
590 rxdp->read.hdr_addr = 0;
591 rxdp->read.pkt_addr =
592 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
593 rxm->next = NULL;
594
595 /*
596 * Set data length & data buffer address of mbuf.
597 */
598 rxm->data_off = RTE_PKTMBUF_HEADROOM;
599 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
600 rxm->data_len = data_len;
601
602 /*
603 * If this is the first buffer of the received packet,
604 * set the pointer to the first mbuf of the packet and
605 * initialize its context.
606 * Otherwise, update the total length and the number of segments
607 * of the current scattered packet, and update the pointer to
608 * the last mbuf of the current packet.
609 */
610 if (first_seg == NULL) {
611 first_seg = rxm;
612 first_seg->pkt_len = data_len;
613 first_seg->nb_segs = 1;
614 } else {
615 first_seg->pkt_len += data_len;
616 first_seg->nb_segs++;
617 last_seg->next = rxm;
618 }
619
620 /*
621 * If this is not the last buffer of the received packet,
622 * update the pointer to the last mbuf of the current scattered
623 * packet and continue to parse the RX ring.
624 */
625 if (!(staterr & IGC_RXD_STAT_EOP)) {
626 last_seg = rxm;
627 goto next_desc;
628 }
629
630 /*
631 * This is the last buffer of the received packet.
632 * If the CRC is not stripped by the hardware:
633 * - Subtract the CRC length from the total packet length.
634 * - If the last buffer only contains the whole CRC or a part
635 * of it, free the mbuf associated to the last buffer.
636 * If part of the CRC is also contained in the previous
637 * mbuf, subtract the length of that CRC part from the
638 * data length of the previous mbuf.
639 */
640 if (unlikely(rxq->crc_len > 0)) {
641 first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
642 if (data_len <= RTE_ETHER_CRC_LEN) {
643 rte_pktmbuf_free_seg(rxm);
644 first_seg->nb_segs--;
645 last_seg->data_len = last_seg->data_len -
646 (RTE_ETHER_CRC_LEN - data_len);
647 last_seg->next = NULL;
648 } else {
649 rxm->data_len = (uint16_t)
650 (data_len - RTE_ETHER_CRC_LEN);
651 }
652 }
653
654 rx_desc_get_pkt_info(rxq, first_seg, &rxd, staterr);
655
656 /*
657 * Store the mbuf address into the next entry of the array
658 * of returned packets.
659 */
660 rx_pkts[nb_rx++] = first_seg;
661
662 /* Setup receipt context for a new packet. */
663 first_seg = NULL;
664 }
665 rxq->rx_tail = rx_id;
666
667 /*
668 * Save receive context.
669 */
670 rxq->pkt_first_seg = first_seg;
671 rxq->pkt_last_seg = last_seg;
672
673 /*
674 * If the number of free RX descriptors is greater than the RX free
675 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
676 * register.
677 * Update the RDT with the value of the last processed RX descriptor
678 * minus 1, to guarantee that the RDT register is never equal to the
679 * RDH register, which creates a "full" ring situation from the
680 * hardware point of view...
681 */
682 nb_hold = nb_hold + rxq->nb_rx_hold;
683 if (nb_hold > rxq->rx_free_thresh) {
684 PMD_RX_LOG(DEBUG,
685 "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
686 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
687 rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
688 IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
689 nb_hold = 0;
690 }
691 rxq->nb_rx_hold = nb_hold;
692 return nb_rx;
693 }
694
695 static void
igc_rx_queue_release_mbufs(struct igc_rx_queue * rxq)696 igc_rx_queue_release_mbufs(struct igc_rx_queue *rxq)
697 {
698 unsigned int i;
699
700 if (rxq->sw_ring != NULL) {
701 for (i = 0; i < rxq->nb_rx_desc; i++) {
702 if (rxq->sw_ring[i].mbuf != NULL) {
703 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
704 rxq->sw_ring[i].mbuf = NULL;
705 }
706 }
707 }
708 }
709
710 static void
igc_rx_queue_release(struct igc_rx_queue * rxq)711 igc_rx_queue_release(struct igc_rx_queue *rxq)
712 {
713 igc_rx_queue_release_mbufs(rxq);
714 rte_free(rxq->sw_ring);
715 rte_free(rxq);
716 }
717
eth_igc_rx_queue_release(void * rxq)718 void eth_igc_rx_queue_release(void *rxq)
719 {
720 if (rxq)
721 igc_rx_queue_release(rxq);
722 }
723
eth_igc_rx_queue_count(struct rte_eth_dev * dev,uint16_t rx_queue_id)724 uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev,
725 uint16_t rx_queue_id)
726 {
727 /**
728 * Check the DD bit of a rx descriptor of each 4 in a group,
729 * to avoid checking too frequently and downgrading performance
730 * too much.
731 */
732 #define IGC_RXQ_SCAN_INTERVAL 4
733
734 volatile union igc_adv_rx_desc *rxdp;
735 struct igc_rx_queue *rxq;
736 uint16_t desc = 0;
737
738 rxq = dev->data->rx_queues[rx_queue_id];
739 rxdp = &rxq->rx_ring[rxq->rx_tail];
740
741 while (desc < rxq->nb_rx_desc - rxq->rx_tail) {
742 if (unlikely(!(rxdp->wb.upper.status_error &
743 IGC_RXD_STAT_DD)))
744 return desc;
745 desc += IGC_RXQ_SCAN_INTERVAL;
746 rxdp += IGC_RXQ_SCAN_INTERVAL;
747 }
748 rxdp = &rxq->rx_ring[rxq->rx_tail + desc - rxq->nb_rx_desc];
749
750 while (desc < rxq->nb_rx_desc &&
751 (rxdp->wb.upper.status_error & IGC_RXD_STAT_DD)) {
752 desc += IGC_RXQ_SCAN_INTERVAL;
753 rxdp += IGC_RXQ_SCAN_INTERVAL;
754 }
755
756 return desc;
757 }
758
eth_igc_rx_descriptor_done(void * rx_queue,uint16_t offset)759 int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset)
760 {
761 volatile union igc_adv_rx_desc *rxdp;
762 struct igc_rx_queue *rxq = rx_queue;
763 uint32_t desc;
764
765 if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
766 return 0;
767
768 desc = rxq->rx_tail + offset;
769 if (desc >= rxq->nb_rx_desc)
770 desc -= rxq->nb_rx_desc;
771
772 rxdp = &rxq->rx_ring[desc];
773 return !!(rxdp->wb.upper.status_error &
774 rte_cpu_to_le_32(IGC_RXD_STAT_DD));
775 }
776
eth_igc_rx_descriptor_status(void * rx_queue,uint16_t offset)777 int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset)
778 {
779 struct igc_rx_queue *rxq = rx_queue;
780 volatile uint32_t *status;
781 uint32_t desc;
782
783 if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
784 return -EINVAL;
785
786 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
787 return RTE_ETH_RX_DESC_UNAVAIL;
788
789 desc = rxq->rx_tail + offset;
790 if (desc >= rxq->nb_rx_desc)
791 desc -= rxq->nb_rx_desc;
792
793 status = &rxq->rx_ring[desc].wb.upper.status_error;
794 if (*status & rte_cpu_to_le_32(IGC_RXD_STAT_DD))
795 return RTE_ETH_RX_DESC_DONE;
796
797 return RTE_ETH_RX_DESC_AVAIL;
798 }
799
800 static int
igc_alloc_rx_queue_mbufs(struct igc_rx_queue * rxq)801 igc_alloc_rx_queue_mbufs(struct igc_rx_queue *rxq)
802 {
803 struct igc_rx_entry *rxe = rxq->sw_ring;
804 uint64_t dma_addr;
805 unsigned int i;
806
807 /* Initialize software ring entries. */
808 for (i = 0; i < rxq->nb_rx_desc; i++) {
809 volatile union igc_adv_rx_desc *rxd;
810 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
811
812 if (mbuf == NULL) {
813 PMD_DRV_LOG(ERR, "RX mbuf alloc failed, queue_id=%hu",
814 rxq->queue_id);
815 return -ENOMEM;
816 }
817 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
818 rxd = &rxq->rx_ring[i];
819 rxd->read.hdr_addr = 0;
820 rxd->read.pkt_addr = dma_addr;
821 rxe[i].mbuf = mbuf;
822 }
823
824 return 0;
825 }
826
827 /*
828 * RSS random key supplied in section 7.1.2.9.3 of the Intel I225 datasheet.
829 * Used as the default key.
830 */
831 static uint8_t default_rss_key[40] = {
832 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
833 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
834 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
835 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
836 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
837 };
838
839 void
igc_rss_disable(struct rte_eth_dev * dev)840 igc_rss_disable(struct rte_eth_dev *dev)
841 {
842 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
843 uint32_t mrqc;
844
845 mrqc = IGC_READ_REG(hw, IGC_MRQC);
846 mrqc &= ~IGC_MRQC_ENABLE_MASK;
847 IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
848 }
849
850 void
igc_hw_rss_hash_set(struct igc_hw * hw,struct rte_eth_rss_conf * rss_conf)851 igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf)
852 {
853 uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
854 uint32_t mrqc;
855 uint64_t rss_hf;
856
857 if (hash_key != NULL) {
858 uint8_t i;
859
860 /* Fill in RSS hash key */
861 for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
862 IGC_WRITE_REG_LE_VALUE(hw, IGC_RSSRK(i), hash_key[i]);
863 }
864
865 /* Set configured hashing protocols in MRQC register */
866 rss_hf = rss_conf->rss_hf;
867 mrqc = IGC_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
868 if (rss_hf & ETH_RSS_IPV4)
869 mrqc |= IGC_MRQC_RSS_FIELD_IPV4;
870 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
871 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_TCP;
872 if (rss_hf & ETH_RSS_IPV6)
873 mrqc |= IGC_MRQC_RSS_FIELD_IPV6;
874 if (rss_hf & ETH_RSS_IPV6_EX)
875 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_EX;
876 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
877 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP;
878 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
879 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
880 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
881 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
882 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
883 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
884 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
885 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP_EX;
886 IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
887 }
888
889 static void
igc_rss_configure(struct rte_eth_dev * dev)890 igc_rss_configure(struct rte_eth_dev *dev)
891 {
892 struct rte_eth_rss_conf rss_conf;
893 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
894 uint16_t i;
895
896 /* Fill in redirection table. */
897 for (i = 0; i < IGC_RSS_RDT_SIZD; i++) {
898 union igc_rss_reta_reg reta;
899 uint16_t q_idx, reta_idx;
900
901 q_idx = (uint8_t)((dev->data->nb_rx_queues > 1) ?
902 i % dev->data->nb_rx_queues : 0);
903 reta_idx = i % sizeof(reta);
904 reta.bytes[reta_idx] = q_idx;
905 if (reta_idx == sizeof(reta) - 1)
906 IGC_WRITE_REG_LE_VALUE(hw,
907 IGC_RETA(i / sizeof(reta)), reta.dword);
908 }
909
910 /*
911 * Configure the RSS key and the RSS protocols used to compute
912 * the RSS hash of input packets.
913 */
914 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
915 if (rss_conf.rss_key == NULL)
916 rss_conf.rss_key = default_rss_key;
917 igc_hw_rss_hash_set(hw, &rss_conf);
918 }
919
920 int
igc_del_rss_filter(struct rte_eth_dev * dev)921 igc_del_rss_filter(struct rte_eth_dev *dev)
922 {
923 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
924
925 if (rss_filter->enable) {
926 /* recover default RSS configuration */
927 igc_rss_configure(dev);
928
929 /* disable RSS logic and clear filter data */
930 igc_rss_disable(dev);
931 memset(rss_filter, 0, sizeof(*rss_filter));
932 return 0;
933 }
934 PMD_DRV_LOG(ERR, "filter not exist!");
935 return -ENOENT;
936 }
937
938 /* Initiate the filter structure by the structure of rte_flow_action_rss */
939 void
igc_rss_conf_set(struct igc_rss_filter * out,const struct rte_flow_action_rss * rss)940 igc_rss_conf_set(struct igc_rss_filter *out,
941 const struct rte_flow_action_rss *rss)
942 {
943 out->conf.func = rss->func;
944 out->conf.level = rss->level;
945 out->conf.types = rss->types;
946
947 if (rss->key_len == sizeof(out->key)) {
948 memcpy(out->key, rss->key, rss->key_len);
949 out->conf.key = out->key;
950 out->conf.key_len = rss->key_len;
951 } else {
952 out->conf.key = NULL;
953 out->conf.key_len = 0;
954 }
955
956 if (rss->queue_num <= IGC_RSS_RDT_SIZD) {
957 memcpy(out->queue, rss->queue,
958 sizeof(*out->queue) * rss->queue_num);
959 out->conf.queue = out->queue;
960 out->conf.queue_num = rss->queue_num;
961 } else {
962 out->conf.queue = NULL;
963 out->conf.queue_num = 0;
964 }
965 }
966
967 int
igc_add_rss_filter(struct rte_eth_dev * dev,struct igc_rss_filter * rss)968 igc_add_rss_filter(struct rte_eth_dev *dev, struct igc_rss_filter *rss)
969 {
970 struct rte_eth_rss_conf rss_conf = {
971 .rss_key = rss->conf.key_len ?
972 (void *)(uintptr_t)rss->conf.key : NULL,
973 .rss_key_len = rss->conf.key_len,
974 .rss_hf = rss->conf.types,
975 };
976 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
977 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
978 uint32_t i, j;
979
980 /* check RSS type is valid */
981 if ((rss_conf.rss_hf & IGC_RSS_OFFLOAD_ALL) == 0) {
982 PMD_DRV_LOG(ERR,
983 "RSS type(0x%" PRIx64 ") error!, only 0x%" PRIx64
984 " been supported", rss_conf.rss_hf,
985 (uint64_t)IGC_RSS_OFFLOAD_ALL);
986 return -EINVAL;
987 }
988
989 /* check queue count is not zero */
990 if (!rss->conf.queue_num) {
991 PMD_DRV_LOG(ERR, "Queue number should not be 0!");
992 return -EINVAL;
993 }
994
995 /* check queue id is valid */
996 for (i = 0; i < rss->conf.queue_num; i++)
997 if (rss->conf.queue[i] >= dev->data->nb_rx_queues) {
998 PMD_DRV_LOG(ERR, "Queue id %u is invalid!",
999 rss->conf.queue[i]);
1000 return -EINVAL;
1001 }
1002
1003 /* only support one filter */
1004 if (rss_filter->enable) {
1005 PMD_DRV_LOG(ERR, "Only support one RSS filter!");
1006 return -ENOTSUP;
1007 }
1008 rss_filter->enable = 1;
1009
1010 igc_rss_conf_set(rss_filter, &rss->conf);
1011
1012 /* Fill in redirection table. */
1013 for (i = 0, j = 0; i < IGC_RSS_RDT_SIZD; i++, j++) {
1014 union igc_rss_reta_reg reta;
1015 uint16_t q_idx, reta_idx;
1016
1017 if (j == rss->conf.queue_num)
1018 j = 0;
1019 q_idx = rss->conf.queue[j];
1020 reta_idx = i % sizeof(reta);
1021 reta.bytes[reta_idx] = q_idx;
1022 if (reta_idx == sizeof(reta) - 1)
1023 IGC_WRITE_REG_LE_VALUE(hw,
1024 IGC_RETA(i / sizeof(reta)), reta.dword);
1025 }
1026
1027 if (rss_conf.rss_key == NULL)
1028 rss_conf.rss_key = default_rss_key;
1029 igc_hw_rss_hash_set(hw, &rss_conf);
1030 return 0;
1031 }
1032
1033 void
igc_clear_rss_filter(struct rte_eth_dev * dev)1034 igc_clear_rss_filter(struct rte_eth_dev *dev)
1035 {
1036 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
1037
1038 if (!rss_filter->enable)
1039 return;
1040
1041 /* recover default RSS configuration */
1042 igc_rss_configure(dev);
1043
1044 /* disable RSS logic and clear filter data */
1045 igc_rss_disable(dev);
1046 memset(rss_filter, 0, sizeof(*rss_filter));
1047 }
1048
1049 static int
igc_dev_mq_rx_configure(struct rte_eth_dev * dev)1050 igc_dev_mq_rx_configure(struct rte_eth_dev *dev)
1051 {
1052 if (RTE_ETH_DEV_SRIOV(dev).active) {
1053 PMD_DRV_LOG(ERR, "SRIOV unsupported!");
1054 return -EINVAL;
1055 }
1056
1057 switch (dev->data->dev_conf.rxmode.mq_mode) {
1058 case ETH_MQ_RX_RSS:
1059 igc_rss_configure(dev);
1060 break;
1061 case ETH_MQ_RX_NONE:
1062 /*
1063 * configure RSS register for following,
1064 * then disable the RSS logic
1065 */
1066 igc_rss_configure(dev);
1067 igc_rss_disable(dev);
1068 break;
1069 default:
1070 PMD_DRV_LOG(ERR, "rx mode(%d) not supported!",
1071 dev->data->dev_conf.rxmode.mq_mode);
1072 return -EINVAL;
1073 }
1074 return 0;
1075 }
1076
1077 int
igc_rx_init(struct rte_eth_dev * dev)1078 igc_rx_init(struct rte_eth_dev *dev)
1079 {
1080 struct igc_rx_queue *rxq;
1081 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1082 uint64_t offloads = dev->data->dev_conf.rxmode.offloads;
1083 uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
1084 uint32_t rctl;
1085 uint32_t rxcsum;
1086 uint16_t buf_size;
1087 uint16_t rctl_bsize;
1088 uint16_t i;
1089 int ret;
1090
1091 dev->rx_pkt_burst = igc_recv_pkts;
1092
1093 /*
1094 * Make sure receives are disabled while setting
1095 * up the descriptor ring.
1096 */
1097 rctl = IGC_READ_REG(hw, IGC_RCTL);
1098 IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN);
1099
1100 /* Configure support of jumbo frames, if any. */
1101 if (offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
1102 rctl |= IGC_RCTL_LPE;
1103
1104 /*
1105 * Set maximum packet length by default, and might be updated
1106 * together with enabling/disabling dual VLAN.
1107 */
1108 IGC_WRITE_REG(hw, IGC_RLPML, max_rx_pkt_len);
1109 } else {
1110 rctl &= ~IGC_RCTL_LPE;
1111 }
1112
1113 /* Configure and enable each RX queue. */
1114 rctl_bsize = 0;
1115 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1116 uint64_t bus_addr;
1117 uint32_t rxdctl;
1118 uint32_t srrctl;
1119
1120 rxq = dev->data->rx_queues[i];
1121 rxq->flags = 0;
1122
1123 /* Allocate buffers for descriptor rings and set up queue */
1124 ret = igc_alloc_rx_queue_mbufs(rxq);
1125 if (ret)
1126 return ret;
1127
1128 /*
1129 * Reset crc_len in case it was changed after queue setup by a
1130 * call to configure
1131 */
1132 rxq->crc_len = (offloads & DEV_RX_OFFLOAD_KEEP_CRC) ?
1133 RTE_ETHER_CRC_LEN : 0;
1134
1135 bus_addr = rxq->rx_ring_phys_addr;
1136 IGC_WRITE_REG(hw, IGC_RDLEN(rxq->reg_idx),
1137 rxq->nb_rx_desc *
1138 sizeof(union igc_adv_rx_desc));
1139 IGC_WRITE_REG(hw, IGC_RDBAH(rxq->reg_idx),
1140 (uint32_t)(bus_addr >> 32));
1141 IGC_WRITE_REG(hw, IGC_RDBAL(rxq->reg_idx),
1142 (uint32_t)bus_addr);
1143
1144 /* set descriptor configuration */
1145 srrctl = IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
1146
1147 srrctl |= (uint32_t)(RTE_PKTMBUF_HEADROOM / 64) <<
1148 IGC_SRRCTL_BSIZEHEADER_SHIFT;
1149 /*
1150 * Configure RX buffer size.
1151 */
1152 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
1153 RTE_PKTMBUF_HEADROOM);
1154 if (buf_size >= 1024) {
1155 /*
1156 * Configure the BSIZEPACKET field of the SRRCTL
1157 * register of the queue.
1158 * Value is in 1 KB resolution, from 1 KB to 16 KB.
1159 * If this field is equal to 0b, then RCTL.BSIZE
1160 * determines the RX packet buffer size.
1161 */
1162
1163 srrctl |= ((buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) &
1164 IGC_SRRCTL_BSIZEPKT_MASK);
1165 buf_size = (uint16_t)((srrctl &
1166 IGC_SRRCTL_BSIZEPKT_MASK) <<
1167 IGC_SRRCTL_BSIZEPKT_SHIFT);
1168
1169 /* It adds dual VLAN length for supporting dual VLAN */
1170 if (max_rx_pkt_len + 2 * VLAN_TAG_SIZE > buf_size)
1171 dev->data->scattered_rx = 1;
1172 } else {
1173 /*
1174 * Use BSIZE field of the device RCTL register.
1175 */
1176 if (rctl_bsize == 0 || rctl_bsize > buf_size)
1177 rctl_bsize = buf_size;
1178 dev->data->scattered_rx = 1;
1179 }
1180
1181 /* Set if packets are dropped when no descriptors available */
1182 if (rxq->drop_en)
1183 srrctl |= IGC_SRRCTL_DROP_EN;
1184
1185 IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl);
1186
1187 /* Enable this RX queue. */
1188 rxdctl = IGC_RXDCTL_QUEUE_ENABLE;
1189 rxdctl |= ((uint32_t)rxq->pthresh << IGC_RXDCTL_PTHRESH_SHIFT) &
1190 IGC_RXDCTL_PTHRESH_MSK;
1191 rxdctl |= ((uint32_t)rxq->hthresh << IGC_RXDCTL_HTHRESH_SHIFT) &
1192 IGC_RXDCTL_HTHRESH_MSK;
1193 rxdctl |= ((uint32_t)rxq->wthresh << IGC_RXDCTL_WTHRESH_SHIFT) &
1194 IGC_RXDCTL_WTHRESH_MSK;
1195 IGC_WRITE_REG(hw, IGC_RXDCTL(rxq->reg_idx), rxdctl);
1196 }
1197
1198 if (offloads & DEV_RX_OFFLOAD_SCATTER)
1199 dev->data->scattered_rx = 1;
1200
1201 if (dev->data->scattered_rx) {
1202 PMD_DRV_LOG(DEBUG, "forcing scatter mode");
1203 dev->rx_pkt_burst = igc_recv_scattered_pkts;
1204 }
1205 /*
1206 * Setup BSIZE field of RCTL register, if needed.
1207 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1208 * register, since the code above configures the SRRCTL register of
1209 * the RX queue in such a case.
1210 * All configurable sizes are:
1211 * 16384: rctl |= (IGC_RCTL_SZ_16384 | IGC_RCTL_BSEX);
1212 * 8192: rctl |= (IGC_RCTL_SZ_8192 | IGC_RCTL_BSEX);
1213 * 4096: rctl |= (IGC_RCTL_SZ_4096 | IGC_RCTL_BSEX);
1214 * 2048: rctl |= IGC_RCTL_SZ_2048;
1215 * 1024: rctl |= IGC_RCTL_SZ_1024;
1216 * 512: rctl |= IGC_RCTL_SZ_512;
1217 * 256: rctl |= IGC_RCTL_SZ_256;
1218 */
1219 if (rctl_bsize > 0) {
1220 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1221 rctl |= IGC_RCTL_SZ_512;
1222 else /* 256 <= buf_size < 512 - use 256 */
1223 rctl |= IGC_RCTL_SZ_256;
1224 }
1225
1226 /*
1227 * Configure RSS if device configured with multiple RX queues.
1228 */
1229 igc_dev_mq_rx_configure(dev);
1230
1231 /* Update the rctl since igc_dev_mq_rx_configure may change its value */
1232 rctl |= IGC_READ_REG(hw, IGC_RCTL);
1233
1234 /*
1235 * Setup the Checksum Register.
1236 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1237 */
1238 rxcsum = IGC_READ_REG(hw, IGC_RXCSUM);
1239 rxcsum |= IGC_RXCSUM_PCSD;
1240
1241 /* Enable both L3/L4 rx checksum offload */
1242 if (offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
1243 rxcsum |= IGC_RXCSUM_IPOFL;
1244 else
1245 rxcsum &= ~IGC_RXCSUM_IPOFL;
1246
1247 if (offloads &
1248 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)) {
1249 rxcsum |= IGC_RXCSUM_TUOFL;
1250 offloads |= DEV_RX_OFFLOAD_SCTP_CKSUM;
1251 } else {
1252 rxcsum &= ~IGC_RXCSUM_TUOFL;
1253 }
1254
1255 if (offloads & DEV_RX_OFFLOAD_SCTP_CKSUM)
1256 rxcsum |= IGC_RXCSUM_CRCOFL;
1257 else
1258 rxcsum &= ~IGC_RXCSUM_CRCOFL;
1259
1260 IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum);
1261
1262 /* Setup the Receive Control Register. */
1263 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1264 rctl &= ~IGC_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1265 else
1266 rctl |= IGC_RCTL_SECRC; /* Strip Ethernet CRC. */
1267
1268 rctl &= ~IGC_RCTL_MO_MSK;
1269 rctl &= ~IGC_RCTL_LBM_MSK;
1270 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO |
1271 IGC_RCTL_DPF |
1272 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
1273
1274 if (dev->data->dev_conf.lpbk_mode == 1)
1275 rctl |= IGC_RCTL_LBM_MAC;
1276
1277 rctl &= ~(IGC_RCTL_HSEL_MSK | IGC_RCTL_CFIEN | IGC_RCTL_CFI |
1278 IGC_RCTL_PSP | IGC_RCTL_PMCF);
1279
1280 /* Make sure VLAN Filters are off. */
1281 rctl &= ~IGC_RCTL_VFE;
1282 /* Don't store bad packets. */
1283 rctl &= ~IGC_RCTL_SBP;
1284
1285 /* Enable Receives. */
1286 IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1287
1288 /*
1289 * Setup the HW Rx Head and Tail Descriptor Pointers.
1290 * This needs to be done after enable.
1291 */
1292 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1293 rxq = dev->data->rx_queues[i];
1294 IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0);
1295 IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx),
1296 rxq->nb_rx_desc - 1);
1297
1298 /* strip queue vlan offload */
1299 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) {
1300 uint32_t dvmolr;
1301 dvmolr = IGC_READ_REG(hw, IGC_DVMOLR(rxq->queue_id));
1302
1303 /* If vlan been stripped off, the CRC is meaningless. */
1304 dvmolr |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC;
1305 IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr);
1306 }
1307 }
1308
1309 return 0;
1310 }
1311
1312 static void
igc_reset_rx_queue(struct igc_rx_queue * rxq)1313 igc_reset_rx_queue(struct igc_rx_queue *rxq)
1314 {
1315 static const union igc_adv_rx_desc zeroed_desc = { {0} };
1316 unsigned int i;
1317
1318 /* Zero out HW ring memory */
1319 for (i = 0; i < rxq->nb_rx_desc; i++)
1320 rxq->rx_ring[i] = zeroed_desc;
1321
1322 rxq->rx_tail = 0;
1323 rxq->pkt_first_seg = NULL;
1324 rxq->pkt_last_seg = NULL;
1325 }
1326
1327 int
eth_igc_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mp)1328 eth_igc_rx_queue_setup(struct rte_eth_dev *dev,
1329 uint16_t queue_idx,
1330 uint16_t nb_desc,
1331 unsigned int socket_id,
1332 const struct rte_eth_rxconf *rx_conf,
1333 struct rte_mempool *mp)
1334 {
1335 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1336 const struct rte_memzone *rz;
1337 struct igc_rx_queue *rxq;
1338 unsigned int size;
1339
1340 /*
1341 * Validate number of receive descriptors.
1342 * It must not exceed hardware maximum, and must be multiple
1343 * of IGC_RX_DESCRIPTOR_MULTIPLE.
1344 */
1345 if (nb_desc % IGC_RX_DESCRIPTOR_MULTIPLE != 0 ||
1346 nb_desc > IGC_MAX_RXD || nb_desc < IGC_MIN_RXD) {
1347 PMD_DRV_LOG(ERR,
1348 "RX descriptor must be multiple of %u(cur: %u) and between %u and %u",
1349 IGC_RX_DESCRIPTOR_MULTIPLE, nb_desc,
1350 IGC_MIN_RXD, IGC_MAX_RXD);
1351 return -EINVAL;
1352 }
1353
1354 /* Free memory prior to re-allocation if needed */
1355 if (dev->data->rx_queues[queue_idx] != NULL) {
1356 igc_rx_queue_release(dev->data->rx_queues[queue_idx]);
1357 dev->data->rx_queues[queue_idx] = NULL;
1358 }
1359
1360 /* First allocate the RX queue data structure. */
1361 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igc_rx_queue),
1362 RTE_CACHE_LINE_SIZE);
1363 if (rxq == NULL)
1364 return -ENOMEM;
1365 rxq->offloads = rx_conf->offloads;
1366 rxq->mb_pool = mp;
1367 rxq->nb_rx_desc = nb_desc;
1368 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1369 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1370 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1371 rxq->drop_en = rx_conf->rx_drop_en;
1372 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1373 rxq->queue_id = queue_idx;
1374 rxq->reg_idx = queue_idx;
1375 rxq->port_id = dev->data->port_id;
1376
1377 /*
1378 * Allocate RX ring hardware descriptors. A memzone large enough to
1379 * handle the maximum ring size is allocated in order to allow for
1380 * resizing in later calls to the queue setup function.
1381 */
1382 size = sizeof(union igc_adv_rx_desc) * IGC_MAX_RXD;
1383 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1384 IGC_ALIGN, socket_id);
1385 if (rz == NULL) {
1386 igc_rx_queue_release(rxq);
1387 return -ENOMEM;
1388 }
1389 rxq->rdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDT(rxq->reg_idx));
1390 rxq->rdh_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDH(rxq->reg_idx));
1391 rxq->rx_ring_phys_addr = rz->iova;
1392 rxq->rx_ring = (union igc_adv_rx_desc *)rz->addr;
1393
1394 /* Allocate software ring. */
1395 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1396 sizeof(struct igc_rx_entry) * nb_desc,
1397 RTE_CACHE_LINE_SIZE);
1398 if (rxq->sw_ring == NULL) {
1399 igc_rx_queue_release(rxq);
1400 return -ENOMEM;
1401 }
1402
1403 PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
1404 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1405
1406 dev->data->rx_queues[queue_idx] = rxq;
1407 igc_reset_rx_queue(rxq);
1408
1409 return 0;
1410 }
1411
1412 /* prepare packets for transmit */
1413 static uint16_t
eth_igc_prep_pkts(__rte_unused void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)1414 eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1415 uint16_t nb_pkts)
1416 {
1417 int i, ret;
1418 struct rte_mbuf *m;
1419
1420 for (i = 0; i < nb_pkts; i++) {
1421 m = tx_pkts[i];
1422
1423 /* Check some limitations for TSO in hardware */
1424 if (m->ol_flags & IGC_TX_OFFLOAD_SEG)
1425 if (m->tso_segsz > IGC_TSO_MAX_MSS ||
1426 m->l2_len + m->l3_len + m->l4_len >
1427 IGC_TSO_MAX_HDRLEN) {
1428 rte_errno = EINVAL;
1429 return i;
1430 }
1431
1432 if (m->ol_flags & IGC_TX_OFFLOAD_NOTSUP_MASK) {
1433 rte_errno = ENOTSUP;
1434 return i;
1435 }
1436
1437 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1438 ret = rte_validate_tx_offload(m);
1439 if (ret != 0) {
1440 rte_errno = -ret;
1441 return i;
1442 }
1443 #endif
1444 ret = rte_net_intel_cksum_prepare(m);
1445 if (ret != 0) {
1446 rte_errno = -ret;
1447 return i;
1448 }
1449 }
1450
1451 return i;
1452 }
1453
1454 /*
1455 *There're some limitations in hardware for TCP segmentation offload. We
1456 *should check whether the parameters are valid.
1457 */
1458 static inline uint64_t
check_tso_para(uint64_t ol_req,union igc_tx_offload ol_para)1459 check_tso_para(uint64_t ol_req, union igc_tx_offload ol_para)
1460 {
1461 if (!(ol_req & IGC_TX_OFFLOAD_SEG))
1462 return ol_req;
1463 if (ol_para.tso_segsz > IGC_TSO_MAX_MSS || ol_para.l2_len +
1464 ol_para.l3_len + ol_para.l4_len > IGC_TSO_MAX_HDRLEN) {
1465 ol_req &= ~IGC_TX_OFFLOAD_SEG;
1466 ol_req |= PKT_TX_TCP_CKSUM;
1467 }
1468 return ol_req;
1469 }
1470
1471 /*
1472 * Check which hardware context can be used. Use the existing match
1473 * or create a new context descriptor.
1474 */
1475 static inline uint32_t
what_advctx_update(struct igc_tx_queue * txq,uint64_t flags,union igc_tx_offload tx_offload)1476 what_advctx_update(struct igc_tx_queue *txq, uint64_t flags,
1477 union igc_tx_offload tx_offload)
1478 {
1479 uint32_t curr = txq->ctx_curr;
1480
1481 /* If match with the current context */
1482 if (likely(txq->ctx_cache[curr].flags == flags &&
1483 txq->ctx_cache[curr].tx_offload.data ==
1484 (txq->ctx_cache[curr].tx_offload_mask.data &
1485 tx_offload.data))) {
1486 return curr;
1487 }
1488
1489 /* Total two context, if match with the second context */
1490 curr ^= 1;
1491 if (likely(txq->ctx_cache[curr].flags == flags &&
1492 txq->ctx_cache[curr].tx_offload.data ==
1493 (txq->ctx_cache[curr].tx_offload_mask.data &
1494 tx_offload.data))) {
1495 txq->ctx_curr = curr;
1496 return curr;
1497 }
1498
1499 /* Mismatch, create new one */
1500 return IGC_CTX_NUM;
1501 }
1502
1503 /*
1504 * This is a separate function, looking for optimization opportunity here
1505 * Rework required to go with the pre-defined values.
1506 */
1507 static inline void
igc_set_xmit_ctx(struct igc_tx_queue * txq,volatile struct igc_adv_tx_context_desc * ctx_txd,uint64_t ol_flags,union igc_tx_offload tx_offload)1508 igc_set_xmit_ctx(struct igc_tx_queue *txq,
1509 volatile struct igc_adv_tx_context_desc *ctx_txd,
1510 uint64_t ol_flags, union igc_tx_offload tx_offload)
1511 {
1512 uint32_t type_tucmd_mlhl;
1513 uint32_t mss_l4len_idx;
1514 uint32_t ctx_curr;
1515 uint32_t vlan_macip_lens;
1516 union igc_tx_offload tx_offload_mask;
1517
1518 /* Use the previous context */
1519 txq->ctx_curr ^= 1;
1520 ctx_curr = txq->ctx_curr;
1521
1522 tx_offload_mask.data = 0;
1523 type_tucmd_mlhl = 0;
1524
1525 /* Specify which HW CTX to upload. */
1526 mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT);
1527
1528 if (ol_flags & PKT_TX_VLAN_PKT)
1529 tx_offload_mask.vlan_tci = 0xffff;
1530
1531 /* check if TCP segmentation required for this packet */
1532 if (ol_flags & IGC_TX_OFFLOAD_SEG) {
1533 /* implies IP cksum in IPv4 */
1534 if (ol_flags & PKT_TX_IP_CKSUM)
1535 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4 |
1536 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1537 else
1538 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV6 |
1539 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1540
1541 if (ol_flags & PKT_TX_TCP_SEG)
1542 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
1543 else
1544 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
1545
1546 tx_offload_mask.data |= TX_TSO_CMP_MASK;
1547 mss_l4len_idx |= (uint32_t)tx_offload.tso_segsz <<
1548 IGC_ADVTXD_MSS_SHIFT;
1549 mss_l4len_idx |= (uint32_t)tx_offload.l4_len <<
1550 IGC_ADVTXD_L4LEN_SHIFT;
1551 } else { /* no TSO, check if hardware checksum is needed */
1552 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
1553 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
1554
1555 if (ol_flags & PKT_TX_IP_CKSUM)
1556 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4;
1557
1558 switch (ol_flags & PKT_TX_L4_MASK) {
1559 case PKT_TX_TCP_CKSUM:
1560 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP |
1561 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1562 mss_l4len_idx |= (uint32_t)sizeof(struct rte_tcp_hdr)
1563 << IGC_ADVTXD_L4LEN_SHIFT;
1564 break;
1565 case PKT_TX_UDP_CKSUM:
1566 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP |
1567 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1568 mss_l4len_idx |= (uint32_t)sizeof(struct rte_udp_hdr)
1569 << IGC_ADVTXD_L4LEN_SHIFT;
1570 break;
1571 case PKT_TX_SCTP_CKSUM:
1572 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP |
1573 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1574 mss_l4len_idx |= (uint32_t)sizeof(struct rte_sctp_hdr)
1575 << IGC_ADVTXD_L4LEN_SHIFT;
1576 break;
1577 default:
1578 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_RSV |
1579 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1580 break;
1581 }
1582 }
1583
1584 txq->ctx_cache[ctx_curr].flags = ol_flags;
1585 txq->ctx_cache[ctx_curr].tx_offload.data =
1586 tx_offload_mask.data & tx_offload.data;
1587 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
1588
1589 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
1590 vlan_macip_lens = (uint32_t)tx_offload.data;
1591 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
1592 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
1593 ctx_txd->u.launch_time = 0;
1594 }
1595
1596 static inline uint32_t
tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)1597 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
1598 {
1599 uint32_t cmdtype;
1600 static uint32_t vlan_cmd[2] = {0, IGC_ADVTXD_DCMD_VLE};
1601 static uint32_t tso_cmd[2] = {0, IGC_ADVTXD_DCMD_TSE};
1602 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
1603 cmdtype |= tso_cmd[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1604 return cmdtype;
1605 }
1606
1607 static inline uint32_t
tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)1608 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
1609 {
1610 static const uint32_t l4_olinfo[2] = {0, IGC_ADVTXD_POPTS_TXSM};
1611 static const uint32_t l3_olinfo[2] = {0, IGC_ADVTXD_POPTS_IXSM};
1612 uint32_t tmp;
1613
1614 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
1615 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
1616 tmp |= l4_olinfo[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1617 return tmp;
1618 }
1619
1620 static uint16_t
igc_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)1621 igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1622 {
1623 struct igc_tx_queue * const txq = tx_queue;
1624 struct igc_tx_entry * const sw_ring = txq->sw_ring;
1625 struct igc_tx_entry *txe, *txn;
1626 volatile union igc_adv_tx_desc * const txr = txq->tx_ring;
1627 volatile union igc_adv_tx_desc *txd;
1628 struct rte_mbuf *tx_pkt;
1629 struct rte_mbuf *m_seg;
1630 uint64_t buf_dma_addr;
1631 uint32_t olinfo_status;
1632 uint32_t cmd_type_len;
1633 uint32_t pkt_len;
1634 uint16_t slen;
1635 uint64_t ol_flags;
1636 uint16_t tx_end;
1637 uint16_t tx_id;
1638 uint16_t tx_last;
1639 uint16_t nb_tx;
1640 uint64_t tx_ol_req;
1641 uint32_t new_ctx = 0;
1642 union igc_tx_offload tx_offload = {0};
1643
1644 tx_id = txq->tx_tail;
1645 txe = &sw_ring[tx_id];
1646
1647 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1648 tx_pkt = *tx_pkts++;
1649 pkt_len = tx_pkt->pkt_len;
1650
1651 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
1652
1653 /*
1654 * The number of descriptors that must be allocated for a
1655 * packet is the number of segments of that packet, plus 1
1656 * Context Descriptor for the VLAN Tag Identifier, if any.
1657 * Determine the last TX descriptor to allocate in the TX ring
1658 * for the packet, starting from the current position (tx_id)
1659 * in the ring.
1660 */
1661 tx_last = (uint16_t)(tx_id + tx_pkt->nb_segs - 1);
1662
1663 ol_flags = tx_pkt->ol_flags;
1664 tx_ol_req = ol_flags & IGC_TX_OFFLOAD_MASK;
1665
1666 /* If a Context Descriptor need be built . */
1667 if (tx_ol_req) {
1668 tx_offload.l2_len = tx_pkt->l2_len;
1669 tx_offload.l3_len = tx_pkt->l3_len;
1670 tx_offload.l4_len = tx_pkt->l4_len;
1671 tx_offload.vlan_tci = tx_pkt->vlan_tci;
1672 tx_offload.tso_segsz = tx_pkt->tso_segsz;
1673 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
1674
1675 new_ctx = what_advctx_update(txq, tx_ol_req,
1676 tx_offload);
1677 /* Only allocate context descriptor if required*/
1678 new_ctx = (new_ctx >= IGC_CTX_NUM);
1679 tx_last = (uint16_t)(tx_last + new_ctx);
1680 }
1681 if (tx_last >= txq->nb_tx_desc)
1682 tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
1683
1684 PMD_TX_LOG(DEBUG,
1685 "port_id=%u queue_id=%u pktlen=%u tx_first=%u tx_last=%u",
1686 txq->port_id, txq->queue_id, pkt_len, tx_id, tx_last);
1687
1688 /*
1689 * Check if there are enough free descriptors in the TX ring
1690 * to transmit the next packet.
1691 * This operation is based on the two following rules:
1692 *
1693 * 1- Only check that the last needed TX descriptor can be
1694 * allocated (by construction, if that descriptor is free,
1695 * all intermediate ones are also free).
1696 *
1697 * For this purpose, the index of the last TX descriptor
1698 * used for a packet (the "last descriptor" of a packet)
1699 * is recorded in the TX entries (the last one included)
1700 * that are associated with all TX descriptors allocated
1701 * for that packet.
1702 *
1703 * 2- Avoid to allocate the last free TX descriptor of the
1704 * ring, in order to never set the TDT register with the
1705 * same value stored in parallel by the NIC in the TDH
1706 * register, which makes the TX engine of the NIC enter
1707 * in a deadlock situation.
1708 *
1709 * By extension, avoid to allocate a free descriptor that
1710 * belongs to the last set of free descriptors allocated
1711 * to the same packet previously transmitted.
1712 */
1713
1714 /*
1715 * The "last descriptor" of the previously sent packet, if any,
1716 * which used the last descriptor to allocate.
1717 */
1718 tx_end = sw_ring[tx_last].last_id;
1719
1720 /*
1721 * The next descriptor following that "last descriptor" in the
1722 * ring.
1723 */
1724 tx_end = sw_ring[tx_end].next_id;
1725
1726 /*
1727 * The "last descriptor" associated with that next descriptor.
1728 */
1729 tx_end = sw_ring[tx_end].last_id;
1730
1731 /*
1732 * Check that this descriptor is free.
1733 */
1734 if (!(txr[tx_end].wb.status & IGC_TXD_STAT_DD)) {
1735 if (nb_tx == 0)
1736 return 0;
1737 goto end_of_tx;
1738 }
1739
1740 /*
1741 * Set common flags of all TX Data Descriptors.
1742 *
1743 * The following bits must be set in all Data Descriptors:
1744 * - IGC_ADVTXD_DTYP_DATA
1745 * - IGC_ADVTXD_DCMD_DEXT
1746 *
1747 * The following bits must be set in the first Data Descriptor
1748 * and are ignored in the other ones:
1749 * - IGC_ADVTXD_DCMD_IFCS
1750 * - IGC_ADVTXD_MAC_1588
1751 * - IGC_ADVTXD_DCMD_VLE
1752 *
1753 * The following bits must only be set in the last Data
1754 * Descriptor:
1755 * - IGC_TXD_CMD_EOP
1756 *
1757 * The following bits can be set in any Data Descriptor, but
1758 * are only set in the last Data Descriptor:
1759 * - IGC_TXD_CMD_RS
1760 */
1761 cmd_type_len = txq->txd_type |
1762 IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT;
1763 if (tx_ol_req & IGC_TX_OFFLOAD_SEG)
1764 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len +
1765 tx_pkt->l4_len);
1766 olinfo_status = (pkt_len << IGC_ADVTXD_PAYLEN_SHIFT);
1767
1768 /*
1769 * Timer 0 should be used to for packet timestamping,
1770 * sample the packet timestamp to reg 0
1771 */
1772 if (ol_flags & PKT_TX_IEEE1588_TMST)
1773 cmd_type_len |= IGC_ADVTXD_MAC_TSTAMP;
1774
1775 if (tx_ol_req) {
1776 /* Setup TX Advanced context descriptor if required */
1777 if (new_ctx) {
1778 volatile struct igc_adv_tx_context_desc *
1779 ctx_txd = (volatile struct
1780 igc_adv_tx_context_desc *)&txr[tx_id];
1781
1782 txn = &sw_ring[txe->next_id];
1783 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1784
1785 if (txe->mbuf != NULL) {
1786 rte_pktmbuf_free_seg(txe->mbuf);
1787 txe->mbuf = NULL;
1788 }
1789
1790 igc_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
1791 tx_offload);
1792
1793 txe->last_id = tx_last;
1794 tx_id = txe->next_id;
1795 txe = txn;
1796 }
1797
1798 /* Setup the TX Advanced Data Descriptor */
1799 cmd_type_len |=
1800 tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
1801 olinfo_status |=
1802 tx_desc_cksum_flags_to_olinfo(tx_ol_req);
1803 olinfo_status |= (uint32_t)txq->ctx_curr <<
1804 IGC_ADVTXD_IDX_SHIFT;
1805 }
1806
1807 m_seg = tx_pkt;
1808 do {
1809 txn = &sw_ring[txe->next_id];
1810 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1811
1812 txd = &txr[tx_id];
1813
1814 if (txe->mbuf != NULL)
1815 rte_pktmbuf_free_seg(txe->mbuf);
1816 txe->mbuf = m_seg;
1817
1818 /* Set up transmit descriptor */
1819 slen = (uint16_t)m_seg->data_len;
1820 buf_dma_addr = rte_mbuf_data_iova(m_seg);
1821 txd->read.buffer_addr =
1822 rte_cpu_to_le_64(buf_dma_addr);
1823 txd->read.cmd_type_len =
1824 rte_cpu_to_le_32(cmd_type_len | slen);
1825 txd->read.olinfo_status =
1826 rte_cpu_to_le_32(olinfo_status);
1827 txe->last_id = tx_last;
1828 tx_id = txe->next_id;
1829 txe = txn;
1830 m_seg = m_seg->next;
1831 } while (m_seg != NULL);
1832
1833 /*
1834 * The last packet data descriptor needs End Of Packet (EOP)
1835 * and Report Status (RS).
1836 */
1837 txd->read.cmd_type_len |=
1838 rte_cpu_to_le_32(IGC_TXD_CMD_EOP | IGC_TXD_CMD_RS);
1839 }
1840 end_of_tx:
1841 rte_wmb();
1842
1843 /*
1844 * Set the Transmit Descriptor Tail (TDT).
1845 */
1846 IGC_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
1847 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
1848 txq->port_id, txq->queue_id, tx_id, nb_tx);
1849 txq->tx_tail = tx_id;
1850
1851 return nb_tx;
1852 }
1853
eth_igc_tx_descriptor_status(void * tx_queue,uint16_t offset)1854 int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset)
1855 {
1856 struct igc_tx_queue *txq = tx_queue;
1857 volatile uint32_t *status;
1858 uint32_t desc;
1859
1860 if (unlikely(!txq || offset >= txq->nb_tx_desc))
1861 return -EINVAL;
1862
1863 desc = txq->tx_tail + offset;
1864 if (desc >= txq->nb_tx_desc)
1865 desc -= txq->nb_tx_desc;
1866
1867 status = &txq->tx_ring[desc].wb.status;
1868 if (*status & rte_cpu_to_le_32(IGC_TXD_STAT_DD))
1869 return RTE_ETH_TX_DESC_DONE;
1870
1871 return RTE_ETH_TX_DESC_FULL;
1872 }
1873
1874 static void
igc_tx_queue_release_mbufs(struct igc_tx_queue * txq)1875 igc_tx_queue_release_mbufs(struct igc_tx_queue *txq)
1876 {
1877 unsigned int i;
1878
1879 if (txq->sw_ring != NULL) {
1880 for (i = 0; i < txq->nb_tx_desc; i++) {
1881 if (txq->sw_ring[i].mbuf != NULL) {
1882 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1883 txq->sw_ring[i].mbuf = NULL;
1884 }
1885 }
1886 }
1887 }
1888
1889 static void
igc_tx_queue_release(struct igc_tx_queue * txq)1890 igc_tx_queue_release(struct igc_tx_queue *txq)
1891 {
1892 igc_tx_queue_release_mbufs(txq);
1893 rte_free(txq->sw_ring);
1894 rte_free(txq);
1895 }
1896
eth_igc_tx_queue_release(void * txq)1897 void eth_igc_tx_queue_release(void *txq)
1898 {
1899 if (txq)
1900 igc_tx_queue_release(txq);
1901 }
1902
1903 static void
igc_reset_tx_queue_stat(struct igc_tx_queue * txq)1904 igc_reset_tx_queue_stat(struct igc_tx_queue *txq)
1905 {
1906 txq->tx_head = 0;
1907 txq->tx_tail = 0;
1908 txq->ctx_curr = 0;
1909 memset((void *)&txq->ctx_cache, 0,
1910 IGC_CTX_NUM * sizeof(struct igc_advctx_info));
1911 }
1912
1913 static void
igc_reset_tx_queue(struct igc_tx_queue * txq)1914 igc_reset_tx_queue(struct igc_tx_queue *txq)
1915 {
1916 struct igc_tx_entry *txe = txq->sw_ring;
1917 uint16_t i, prev;
1918
1919 /* Initialize ring entries */
1920 prev = (uint16_t)(txq->nb_tx_desc - 1);
1921 for (i = 0; i < txq->nb_tx_desc; i++) {
1922 volatile union igc_adv_tx_desc *txd = &txq->tx_ring[i];
1923
1924 txd->wb.status = IGC_TXD_STAT_DD;
1925 txe[i].mbuf = NULL;
1926 txe[i].last_id = i;
1927 txe[prev].next_id = i;
1928 prev = i;
1929 }
1930
1931 txq->txd_type = IGC_ADVTXD_DTYP_DATA;
1932 igc_reset_tx_queue_stat(txq);
1933 }
1934
1935 /*
1936 * clear all rx/tx queue
1937 */
1938 void
igc_dev_clear_queues(struct rte_eth_dev * dev)1939 igc_dev_clear_queues(struct rte_eth_dev *dev)
1940 {
1941 uint16_t i;
1942 struct igc_tx_queue *txq;
1943 struct igc_rx_queue *rxq;
1944
1945 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1946 txq = dev->data->tx_queues[i];
1947 if (txq != NULL) {
1948 igc_tx_queue_release_mbufs(txq);
1949 igc_reset_tx_queue(txq);
1950 }
1951 }
1952
1953 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1954 rxq = dev->data->rx_queues[i];
1955 if (rxq != NULL) {
1956 igc_rx_queue_release_mbufs(rxq);
1957 igc_reset_rx_queue(rxq);
1958 }
1959 }
1960 }
1961
eth_igc_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf)1962 int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
1963 uint16_t nb_desc, unsigned int socket_id,
1964 const struct rte_eth_txconf *tx_conf)
1965 {
1966 const struct rte_memzone *tz;
1967 struct igc_tx_queue *txq;
1968 struct igc_hw *hw;
1969 uint32_t size;
1970
1971 if (nb_desc % IGC_TX_DESCRIPTOR_MULTIPLE != 0 ||
1972 nb_desc > IGC_MAX_TXD || nb_desc < IGC_MIN_TXD) {
1973 PMD_DRV_LOG(ERR,
1974 "TX-descriptor must be a multiple of %u and between %u and %u, cur: %u",
1975 IGC_TX_DESCRIPTOR_MULTIPLE,
1976 IGC_MAX_TXD, IGC_MIN_TXD, nb_desc);
1977 return -EINVAL;
1978 }
1979
1980 hw = IGC_DEV_PRIVATE_HW(dev);
1981
1982 /*
1983 * The tx_free_thresh and tx_rs_thresh values are not used in the 2.5G
1984 * driver.
1985 */
1986 if (tx_conf->tx_free_thresh != 0)
1987 PMD_DRV_LOG(INFO,
1988 "The tx_free_thresh parameter is not used for the 2.5G driver");
1989 if (tx_conf->tx_rs_thresh != 0)
1990 PMD_DRV_LOG(INFO,
1991 "The tx_rs_thresh parameter is not used for the 2.5G driver");
1992 if (tx_conf->tx_thresh.wthresh == 0)
1993 PMD_DRV_LOG(INFO,
1994 "To improve 2.5G driver performance, consider setting the TX WTHRESH value to 4, 8, or 16.");
1995
1996 /* Free memory prior to re-allocation if needed */
1997 if (dev->data->tx_queues[queue_idx] != NULL) {
1998 igc_tx_queue_release(dev->data->tx_queues[queue_idx]);
1999 dev->data->tx_queues[queue_idx] = NULL;
2000 }
2001
2002 /* First allocate the tx queue data structure */
2003 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igc_tx_queue),
2004 RTE_CACHE_LINE_SIZE);
2005 if (txq == NULL)
2006 return -ENOMEM;
2007
2008 /*
2009 * Allocate TX ring hardware descriptors. A memzone large enough to
2010 * handle the maximum ring size is allocated in order to allow for
2011 * resizing in later calls to the queue setup function.
2012 */
2013 size = sizeof(union igc_adv_tx_desc) * IGC_MAX_TXD;
2014 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
2015 IGC_ALIGN, socket_id);
2016 if (tz == NULL) {
2017 igc_tx_queue_release(txq);
2018 return -ENOMEM;
2019 }
2020
2021 txq->nb_tx_desc = nb_desc;
2022 txq->pthresh = tx_conf->tx_thresh.pthresh;
2023 txq->hthresh = tx_conf->tx_thresh.hthresh;
2024 txq->wthresh = tx_conf->tx_thresh.wthresh;
2025
2026 txq->queue_id = queue_idx;
2027 txq->reg_idx = queue_idx;
2028 txq->port_id = dev->data->port_id;
2029
2030 txq->tdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_TDT(txq->reg_idx));
2031 txq->tx_ring_phys_addr = tz->iova;
2032
2033 txq->tx_ring = (union igc_adv_tx_desc *)tz->addr;
2034 /* Allocate software ring */
2035 txq->sw_ring = rte_zmalloc("txq->sw_ring",
2036 sizeof(struct igc_tx_entry) * nb_desc,
2037 RTE_CACHE_LINE_SIZE);
2038 if (txq->sw_ring == NULL) {
2039 igc_tx_queue_release(txq);
2040 return -ENOMEM;
2041 }
2042 PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2043 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2044
2045 igc_reset_tx_queue(txq);
2046 dev->tx_pkt_burst = igc_xmit_pkts;
2047 dev->tx_pkt_prepare = ð_igc_prep_pkts;
2048 dev->data->tx_queues[queue_idx] = txq;
2049 txq->offloads = tx_conf->offloads;
2050
2051 return 0;
2052 }
2053
2054 int
eth_igc_tx_done_cleanup(void * txqueue,uint32_t free_cnt)2055 eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt)
2056 {
2057 struct igc_tx_queue *txq = txqueue;
2058 struct igc_tx_entry *sw_ring;
2059 volatile union igc_adv_tx_desc *txr;
2060 uint16_t tx_first; /* First segment analyzed. */
2061 uint16_t tx_id; /* Current segment being processed. */
2062 uint16_t tx_last; /* Last segment in the current packet. */
2063 uint16_t tx_next; /* First segment of the next packet. */
2064 uint32_t count;
2065
2066 if (txq == NULL)
2067 return -ENODEV;
2068
2069 count = 0;
2070 sw_ring = txq->sw_ring;
2071 txr = txq->tx_ring;
2072
2073 /*
2074 * tx_tail is the last sent packet on the sw_ring. Goto the end
2075 * of that packet (the last segment in the packet chain) and
2076 * then the next segment will be the start of the oldest segment
2077 * in the sw_ring. This is the first packet that will be
2078 * attempted to be freed.
2079 */
2080
2081 /* Get last segment in most recently added packet. */
2082 tx_first = sw_ring[txq->tx_tail].last_id;
2083
2084 /* Get the next segment, which is the oldest segment in ring. */
2085 tx_first = sw_ring[tx_first].next_id;
2086
2087 /* Set the current index to the first. */
2088 tx_id = tx_first;
2089
2090 /*
2091 * Loop through each packet. For each packet, verify that an
2092 * mbuf exists and that the last segment is free. If so, free
2093 * it and move on.
2094 */
2095 while (1) {
2096 tx_last = sw_ring[tx_id].last_id;
2097
2098 if (sw_ring[tx_last].mbuf) {
2099 if (!(txr[tx_last].wb.status &
2100 rte_cpu_to_le_32(IGC_TXD_STAT_DD)))
2101 break;
2102
2103 /* Get the start of the next packet. */
2104 tx_next = sw_ring[tx_last].next_id;
2105
2106 /*
2107 * Loop through all segments in a
2108 * packet.
2109 */
2110 do {
2111 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
2112 sw_ring[tx_id].mbuf = NULL;
2113 sw_ring[tx_id].last_id = tx_id;
2114
2115 /* Move to next segemnt. */
2116 tx_id = sw_ring[tx_id].next_id;
2117 } while (tx_id != tx_next);
2118
2119 /*
2120 * Increment the number of packets
2121 * freed.
2122 */
2123 count++;
2124 if (unlikely(count == free_cnt))
2125 break;
2126 } else {
2127 /*
2128 * There are multiple reasons to be here:
2129 * 1) All the packets on the ring have been
2130 * freed - tx_id is equal to tx_first
2131 * and some packets have been freed.
2132 * - Done, exit
2133 * 2) Interfaces has not sent a rings worth of
2134 * packets yet, so the segment after tail is
2135 * still empty. Or a previous call to this
2136 * function freed some of the segments but
2137 * not all so there is a hole in the list.
2138 * Hopefully this is a rare case.
2139 * - Walk the list and find the next mbuf. If
2140 * there isn't one, then done.
2141 */
2142 if (likely(tx_id == tx_first && count != 0))
2143 break;
2144
2145 /*
2146 * Walk the list and find the next mbuf, if any.
2147 */
2148 do {
2149 /* Move to next segemnt. */
2150 tx_id = sw_ring[tx_id].next_id;
2151
2152 if (sw_ring[tx_id].mbuf)
2153 break;
2154
2155 } while (tx_id != tx_first);
2156
2157 /*
2158 * Determine why previous loop bailed. If there
2159 * is not an mbuf, done.
2160 */
2161 if (sw_ring[tx_id].mbuf == NULL)
2162 break;
2163 }
2164 }
2165
2166 return count;
2167 }
2168
2169 void
igc_tx_init(struct rte_eth_dev * dev)2170 igc_tx_init(struct rte_eth_dev *dev)
2171 {
2172 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2173 uint32_t tctl;
2174 uint32_t txdctl;
2175 uint16_t i;
2176
2177 /* Setup the Base and Length of the Tx Descriptor Rings. */
2178 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2179 struct igc_tx_queue *txq = dev->data->tx_queues[i];
2180 uint64_t bus_addr = txq->tx_ring_phys_addr;
2181
2182 IGC_WRITE_REG(hw, IGC_TDLEN(txq->reg_idx),
2183 txq->nb_tx_desc *
2184 sizeof(union igc_adv_tx_desc));
2185 IGC_WRITE_REG(hw, IGC_TDBAH(txq->reg_idx),
2186 (uint32_t)(bus_addr >> 32));
2187 IGC_WRITE_REG(hw, IGC_TDBAL(txq->reg_idx),
2188 (uint32_t)bus_addr);
2189
2190 /* Setup the HW Tx Head and Tail descriptor pointers. */
2191 IGC_WRITE_REG(hw, IGC_TDT(txq->reg_idx), 0);
2192 IGC_WRITE_REG(hw, IGC_TDH(txq->reg_idx), 0);
2193
2194 /* Setup Transmit threshold registers. */
2195 txdctl = ((uint32_t)txq->pthresh << IGC_TXDCTL_PTHRESH_SHIFT) &
2196 IGC_TXDCTL_PTHRESH_MSK;
2197 txdctl |= ((uint32_t)txq->hthresh << IGC_TXDCTL_HTHRESH_SHIFT) &
2198 IGC_TXDCTL_HTHRESH_MSK;
2199 txdctl |= ((uint32_t)txq->wthresh << IGC_TXDCTL_WTHRESH_SHIFT) &
2200 IGC_TXDCTL_WTHRESH_MSK;
2201 txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
2202 IGC_WRITE_REG(hw, IGC_TXDCTL(txq->reg_idx), txdctl);
2203 }
2204
2205 igc_config_collision_dist(hw);
2206
2207 /* Program the Transmit Control Register. */
2208 tctl = IGC_READ_REG(hw, IGC_TCTL);
2209 tctl &= ~IGC_TCTL_CT;
2210 tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
2211 ((uint32_t)IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT));
2212
2213 /* This write will effectively turn on the transmit unit. */
2214 IGC_WRITE_REG(hw, IGC_TCTL, tctl);
2215 }
2216
2217 void
eth_igc_rxq_info_get(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_rxq_info * qinfo)2218 eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2219 struct rte_eth_rxq_info *qinfo)
2220 {
2221 struct igc_rx_queue *rxq;
2222
2223 rxq = dev->data->rx_queues[queue_id];
2224
2225 qinfo->mp = rxq->mb_pool;
2226 qinfo->scattered_rx = dev->data->scattered_rx;
2227 qinfo->nb_desc = rxq->nb_rx_desc;
2228
2229 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2230 qinfo->conf.rx_drop_en = rxq->drop_en;
2231 qinfo->conf.offloads = rxq->offloads;
2232 qinfo->conf.rx_thresh.hthresh = rxq->hthresh;
2233 qinfo->conf.rx_thresh.pthresh = rxq->pthresh;
2234 qinfo->conf.rx_thresh.wthresh = rxq->wthresh;
2235 }
2236
2237 void
eth_igc_txq_info_get(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_txq_info * qinfo)2238 eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2239 struct rte_eth_txq_info *qinfo)
2240 {
2241 struct igc_tx_queue *txq;
2242
2243 txq = dev->data->tx_queues[queue_id];
2244
2245 qinfo->nb_desc = txq->nb_tx_desc;
2246
2247 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2248 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2249 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2250 qinfo->conf.offloads = txq->offloads;
2251 }
2252
2253 void
eth_igc_vlan_strip_queue_set(struct rte_eth_dev * dev,uint16_t rx_queue_id,int on)2254 eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev,
2255 uint16_t rx_queue_id, int on)
2256 {
2257 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2258 struct igc_rx_queue *rxq = dev->data->rx_queues[rx_queue_id];
2259 uint32_t reg_val;
2260
2261 if (rx_queue_id >= IGC_QUEUE_PAIRS_NUM) {
2262 PMD_DRV_LOG(ERR, "Queue index(%u) illegal, max is %u",
2263 rx_queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2264 return;
2265 }
2266
2267 reg_val = IGC_READ_REG(hw, IGC_DVMOLR(rx_queue_id));
2268 if (on) {
2269 /* If vlan been stripped off, the CRC is meaningless. */
2270 reg_val |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC;
2271 rxq->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2272 } else {
2273 reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN |
2274 IGC_DVMOLR_STRCRC);
2275 rxq->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
2276 }
2277
2278 IGC_WRITE_REG(hw, IGC_DVMOLR(rx_queue_id), reg_val);
2279 }
2280