xref: /dpdk/drivers/net/igc/igc_ethdev.c (revision ef156f4e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019-2020 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 
8 #include <rte_string_fns.h>
9 #include <rte_pci.h>
10 #include <rte_bus_pci.h>
11 #include <ethdev_driver.h>
12 #include <ethdev_pci.h>
13 #include <rte_malloc.h>
14 #include <rte_alarm.h>
15 
16 #include "igc_logs.h"
17 #include "igc_txrx.h"
18 #include "igc_filter.h"
19 #include "igc_flow.h"
20 
21 #define IGC_INTEL_VENDOR_ID		0x8086
22 
23 /*
24  * The overhead from MTU to max frame size.
25  * Considering VLAN so tag needs to be counted.
26  */
27 #define IGC_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + \
28 					RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE)
29 
30 #define IGC_FC_PAUSE_TIME		0x0680
31 #define IGC_LINK_UPDATE_CHECK_TIMEOUT	90  /* 9s */
32 #define IGC_LINK_UPDATE_CHECK_INTERVAL	100 /* ms */
33 
34 #define IGC_MISC_VEC_ID			RTE_INTR_VEC_ZERO_OFFSET
35 #define IGC_RX_VEC_START		RTE_INTR_VEC_RXTX_OFFSET
36 #define IGC_MSIX_OTHER_INTR_VEC		0   /* MSI-X other interrupt vector */
37 #define IGC_FLAG_NEED_LINK_UPDATE	(1u << 0)	/* need update link */
38 
39 #define IGC_DEFAULT_RX_FREE_THRESH	32
40 
41 #define IGC_DEFAULT_RX_PTHRESH		8
42 #define IGC_DEFAULT_RX_HTHRESH		8
43 #define IGC_DEFAULT_RX_WTHRESH		4
44 
45 #define IGC_DEFAULT_TX_PTHRESH		8
46 #define IGC_DEFAULT_TX_HTHRESH		1
47 #define IGC_DEFAULT_TX_WTHRESH		16
48 
49 /* MSI-X other interrupt vector */
50 #define IGC_MSIX_OTHER_INTR_VEC		0
51 
52 /* External VLAN Enable bit mask */
53 #define IGC_CTRL_EXT_EXT_VLAN		(1u << 26)
54 
55 /* Speed select */
56 #define IGC_CTRL_SPEED_MASK		(7u << 8)
57 #define IGC_CTRL_SPEED_2500		(6u << 8)
58 
59 /* External VLAN Ether Type bit mask and shift */
60 #define IGC_VET_EXT			0xFFFF0000
61 #define IGC_VET_EXT_SHIFT		16
62 
63 /* Force EEE Auto-negotiation */
64 #define IGC_EEER_EEE_FRC_AN		(1u << 28)
65 
66 /* Per Queue Good Packets Received Count */
67 #define IGC_PQGPRC(idx)		(0x10010 + 0x100 * (idx))
68 /* Per Queue Good Octets Received Count */
69 #define IGC_PQGORC(idx)		(0x10018 + 0x100 * (idx))
70 /* Per Queue Good Octets Transmitted Count */
71 #define IGC_PQGOTC(idx)		(0x10034 + 0x100 * (idx))
72 /* Per Queue Multicast Packets Received Count */
73 #define IGC_PQMPRC(idx)		(0x10038 + 0x100 * (idx))
74 /* Transmit Queue Drop Packet Count */
75 #define IGC_TQDPC(idx)		(0xe030 + 0x40 * (idx))
76 
77 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
78 #define U32_0_IN_U64		0	/* lower bytes of u64 */
79 #define U32_1_IN_U64		1	/* higher bytes of u64 */
80 #else
81 #define U32_0_IN_U64		1
82 #define U32_1_IN_U64		0
83 #endif
84 
85 #define IGC_ALARM_INTERVAL	8000000u
86 /* us, about 13.6s some per-queue registers will wrap around back to 0. */
87 
88 static const struct rte_eth_desc_lim rx_desc_lim = {
89 	.nb_max = IGC_MAX_RXD,
90 	.nb_min = IGC_MIN_RXD,
91 	.nb_align = IGC_RXD_ALIGN,
92 };
93 
94 static const struct rte_eth_desc_lim tx_desc_lim = {
95 	.nb_max = IGC_MAX_TXD,
96 	.nb_min = IGC_MIN_TXD,
97 	.nb_align = IGC_TXD_ALIGN,
98 	.nb_seg_max = IGC_TX_MAX_SEG,
99 	.nb_mtu_seg_max = IGC_TX_MAX_MTU_SEG,
100 };
101 
102 static const struct rte_pci_id pci_id_igc_map[] = {
103 	{ RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_LM) },
104 	{ RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_V)  },
105 	{ RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_I)  },
106 	{ RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_K)  },
107 	{ .vendor_id = 0, /* sentinel */ },
108 };
109 
110 /* store statistics names and its offset in stats structure */
111 struct rte_igc_xstats_name_off {
112 	char name[RTE_ETH_XSTATS_NAME_SIZE];
113 	unsigned int offset;
114 };
115 
116 static const struct rte_igc_xstats_name_off rte_igc_stats_strings[] = {
117 	{"rx_crc_errors", offsetof(struct igc_hw_stats, crcerrs)},
118 	{"rx_align_errors", offsetof(struct igc_hw_stats, algnerrc)},
119 	{"rx_errors", offsetof(struct igc_hw_stats, rxerrc)},
120 	{"rx_missed_packets", offsetof(struct igc_hw_stats, mpc)},
121 	{"tx_single_collision_packets", offsetof(struct igc_hw_stats, scc)},
122 	{"tx_multiple_collision_packets", offsetof(struct igc_hw_stats, mcc)},
123 	{"tx_excessive_collision_packets", offsetof(struct igc_hw_stats,
124 		ecol)},
125 	{"tx_late_collisions", offsetof(struct igc_hw_stats, latecol)},
126 	{"tx_total_collisions", offsetof(struct igc_hw_stats, colc)},
127 	{"tx_deferred_packets", offsetof(struct igc_hw_stats, dc)},
128 	{"tx_no_carrier_sense_packets", offsetof(struct igc_hw_stats, tncrs)},
129 	{"tx_discarded_packets", offsetof(struct igc_hw_stats, htdpmc)},
130 	{"rx_length_errors", offsetof(struct igc_hw_stats, rlec)},
131 	{"rx_xon_packets", offsetof(struct igc_hw_stats, xonrxc)},
132 	{"tx_xon_packets", offsetof(struct igc_hw_stats, xontxc)},
133 	{"rx_xoff_packets", offsetof(struct igc_hw_stats, xoffrxc)},
134 	{"tx_xoff_packets", offsetof(struct igc_hw_stats, xofftxc)},
135 	{"rx_flow_control_unsupported_packets", offsetof(struct igc_hw_stats,
136 		fcruc)},
137 	{"rx_size_64_packets", offsetof(struct igc_hw_stats, prc64)},
138 	{"rx_size_65_to_127_packets", offsetof(struct igc_hw_stats, prc127)},
139 	{"rx_size_128_to_255_packets", offsetof(struct igc_hw_stats, prc255)},
140 	{"rx_size_256_to_511_packets", offsetof(struct igc_hw_stats, prc511)},
141 	{"rx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
142 		prc1023)},
143 	{"rx_size_1024_to_max_packets", offsetof(struct igc_hw_stats,
144 		prc1522)},
145 	{"rx_broadcast_packets", offsetof(struct igc_hw_stats, bprc)},
146 	{"rx_multicast_packets", offsetof(struct igc_hw_stats, mprc)},
147 	{"rx_undersize_errors", offsetof(struct igc_hw_stats, ruc)},
148 	{"rx_fragment_errors", offsetof(struct igc_hw_stats, rfc)},
149 	{"rx_oversize_errors", offsetof(struct igc_hw_stats, roc)},
150 	{"rx_jabber_errors", offsetof(struct igc_hw_stats, rjc)},
151 	{"rx_no_buffers", offsetof(struct igc_hw_stats, rnbc)},
152 	{"rx_management_packets", offsetof(struct igc_hw_stats, mgprc)},
153 	{"rx_management_dropped", offsetof(struct igc_hw_stats, mgpdc)},
154 	{"tx_management_packets", offsetof(struct igc_hw_stats, mgptc)},
155 	{"rx_total_packets", offsetof(struct igc_hw_stats, tpr)},
156 	{"tx_total_packets", offsetof(struct igc_hw_stats, tpt)},
157 	{"rx_total_bytes", offsetof(struct igc_hw_stats, tor)},
158 	{"tx_total_bytes", offsetof(struct igc_hw_stats, tot)},
159 	{"tx_size_64_packets", offsetof(struct igc_hw_stats, ptc64)},
160 	{"tx_size_65_to_127_packets", offsetof(struct igc_hw_stats, ptc127)},
161 	{"tx_size_128_to_255_packets", offsetof(struct igc_hw_stats, ptc255)},
162 	{"tx_size_256_to_511_packets", offsetof(struct igc_hw_stats, ptc511)},
163 	{"tx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
164 		ptc1023)},
165 	{"tx_size_1023_to_max_packets", offsetof(struct igc_hw_stats,
166 		ptc1522)},
167 	{"tx_multicast_packets", offsetof(struct igc_hw_stats, mptc)},
168 	{"tx_broadcast_packets", offsetof(struct igc_hw_stats, bptc)},
169 	{"tx_tso_packets", offsetof(struct igc_hw_stats, tsctc)},
170 	{"rx_sent_to_host_packets", offsetof(struct igc_hw_stats, rpthc)},
171 	{"tx_sent_by_host_packets", offsetof(struct igc_hw_stats, hgptc)},
172 	{"interrupt_assert_count", offsetof(struct igc_hw_stats, iac)},
173 	{"rx_descriptor_lower_threshold",
174 		offsetof(struct igc_hw_stats, icrxdmtc)},
175 };
176 
177 #define IGC_NB_XSTATS (sizeof(rte_igc_stats_strings) / \
178 		sizeof(rte_igc_stats_strings[0]))
179 
180 static int eth_igc_configure(struct rte_eth_dev *dev);
181 static int eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete);
182 static int eth_igc_stop(struct rte_eth_dev *dev);
183 static int eth_igc_start(struct rte_eth_dev *dev);
184 static int eth_igc_set_link_up(struct rte_eth_dev *dev);
185 static int eth_igc_set_link_down(struct rte_eth_dev *dev);
186 static int eth_igc_close(struct rte_eth_dev *dev);
187 static int eth_igc_reset(struct rte_eth_dev *dev);
188 static int eth_igc_promiscuous_enable(struct rte_eth_dev *dev);
189 static int eth_igc_promiscuous_disable(struct rte_eth_dev *dev);
190 static int eth_igc_fw_version_get(struct rte_eth_dev *dev,
191 				char *fw_version, size_t fw_size);
192 static int eth_igc_infos_get(struct rte_eth_dev *dev,
193 			struct rte_eth_dev_info *dev_info);
194 static int eth_igc_led_on(struct rte_eth_dev *dev);
195 static int eth_igc_led_off(struct rte_eth_dev *dev);
196 static const uint32_t *eth_igc_supported_ptypes_get(struct rte_eth_dev *dev);
197 static int eth_igc_rar_set(struct rte_eth_dev *dev,
198 		struct rte_ether_addr *mac_addr, uint32_t index, uint32_t pool);
199 static void eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index);
200 static int eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
201 			struct rte_ether_addr *addr);
202 static int eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
203 			 struct rte_ether_addr *mc_addr_set,
204 			 uint32_t nb_mc_addr);
205 static int eth_igc_allmulticast_enable(struct rte_eth_dev *dev);
206 static int eth_igc_allmulticast_disable(struct rte_eth_dev *dev);
207 static int eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
208 static int eth_igc_stats_get(struct rte_eth_dev *dev,
209 			struct rte_eth_stats *rte_stats);
210 static int eth_igc_xstats_get(struct rte_eth_dev *dev,
211 			struct rte_eth_xstat *xstats, unsigned int n);
212 static int eth_igc_xstats_get_by_id(struct rte_eth_dev *dev,
213 				const uint64_t *ids,
214 				uint64_t *values, unsigned int n);
215 static int eth_igc_xstats_get_names(struct rte_eth_dev *dev,
216 				struct rte_eth_xstat_name *xstats_names,
217 				unsigned int size);
218 static int eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
219 		struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
220 		unsigned int limit);
221 static int eth_igc_xstats_reset(struct rte_eth_dev *dev);
222 static int
223 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
224 	uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx);
225 static int
226 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
227 static int
228 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
229 static int
230 eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
231 static int
232 eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
233 static int eth_igc_rss_reta_update(struct rte_eth_dev *dev,
234 			struct rte_eth_rss_reta_entry64 *reta_conf,
235 			uint16_t reta_size);
236 static int eth_igc_rss_reta_query(struct rte_eth_dev *dev,
237 		       struct rte_eth_rss_reta_entry64 *reta_conf,
238 		       uint16_t reta_size);
239 static int eth_igc_rss_hash_update(struct rte_eth_dev *dev,
240 			struct rte_eth_rss_conf *rss_conf);
241 static int eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev,
242 			struct rte_eth_rss_conf *rss_conf);
243 static int
244 eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on);
245 static int eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask);
246 static int eth_igc_vlan_tpid_set(struct rte_eth_dev *dev,
247 		      enum rte_vlan_type vlan_type, uint16_t tpid);
248 
249 static const struct eth_dev_ops eth_igc_ops = {
250 	.dev_configure		= eth_igc_configure,
251 	.link_update		= eth_igc_link_update,
252 	.dev_stop		= eth_igc_stop,
253 	.dev_start		= eth_igc_start,
254 	.dev_close		= eth_igc_close,
255 	.dev_reset		= eth_igc_reset,
256 	.dev_set_link_up	= eth_igc_set_link_up,
257 	.dev_set_link_down	= eth_igc_set_link_down,
258 	.promiscuous_enable	= eth_igc_promiscuous_enable,
259 	.promiscuous_disable	= eth_igc_promiscuous_disable,
260 	.allmulticast_enable	= eth_igc_allmulticast_enable,
261 	.allmulticast_disable	= eth_igc_allmulticast_disable,
262 	.fw_version_get		= eth_igc_fw_version_get,
263 	.dev_infos_get		= eth_igc_infos_get,
264 	.dev_led_on		= eth_igc_led_on,
265 	.dev_led_off		= eth_igc_led_off,
266 	.dev_supported_ptypes_get = eth_igc_supported_ptypes_get,
267 	.mtu_set		= eth_igc_mtu_set,
268 	.mac_addr_add		= eth_igc_rar_set,
269 	.mac_addr_remove	= eth_igc_rar_clear,
270 	.mac_addr_set		= eth_igc_default_mac_addr_set,
271 	.set_mc_addr_list	= eth_igc_set_mc_addr_list,
272 
273 	.rx_queue_setup		= eth_igc_rx_queue_setup,
274 	.rx_queue_release	= eth_igc_rx_queue_release,
275 	.tx_queue_setup		= eth_igc_tx_queue_setup,
276 	.tx_queue_release	= eth_igc_tx_queue_release,
277 	.tx_done_cleanup	= eth_igc_tx_done_cleanup,
278 	.rxq_info_get		= eth_igc_rxq_info_get,
279 	.txq_info_get		= eth_igc_txq_info_get,
280 	.stats_get		= eth_igc_stats_get,
281 	.xstats_get		= eth_igc_xstats_get,
282 	.xstats_get_by_id	= eth_igc_xstats_get_by_id,
283 	.xstats_get_names_by_id	= eth_igc_xstats_get_names_by_id,
284 	.xstats_get_names	= eth_igc_xstats_get_names,
285 	.stats_reset		= eth_igc_xstats_reset,
286 	.xstats_reset		= eth_igc_xstats_reset,
287 	.queue_stats_mapping_set = eth_igc_queue_stats_mapping_set,
288 	.rx_queue_intr_enable	= eth_igc_rx_queue_intr_enable,
289 	.rx_queue_intr_disable	= eth_igc_rx_queue_intr_disable,
290 	.flow_ctrl_get		= eth_igc_flow_ctrl_get,
291 	.flow_ctrl_set		= eth_igc_flow_ctrl_set,
292 	.reta_update		= eth_igc_rss_reta_update,
293 	.reta_query		= eth_igc_rss_reta_query,
294 	.rss_hash_update	= eth_igc_rss_hash_update,
295 	.rss_hash_conf_get	= eth_igc_rss_hash_conf_get,
296 	.vlan_filter_set	= eth_igc_vlan_filter_set,
297 	.vlan_offload_set	= eth_igc_vlan_offload_set,
298 	.vlan_tpid_set		= eth_igc_vlan_tpid_set,
299 	.vlan_strip_queue_set	= eth_igc_vlan_strip_queue_set,
300 	.flow_ops_get		= eth_igc_flow_ops_get,
301 };
302 
303 /*
304  * multiple queue mode checking
305  */
306 static int
307 igc_check_mq_mode(struct rte_eth_dev *dev)
308 {
309 	enum rte_eth_rx_mq_mode rx_mq_mode = dev->data->dev_conf.rxmode.mq_mode;
310 	enum rte_eth_tx_mq_mode tx_mq_mode = dev->data->dev_conf.txmode.mq_mode;
311 
312 	if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
313 		PMD_INIT_LOG(ERR, "SRIOV is not supported.");
314 		return -EINVAL;
315 	}
316 
317 	if (rx_mq_mode != ETH_MQ_RX_NONE &&
318 		rx_mq_mode != ETH_MQ_RX_RSS) {
319 		/* RSS together with VMDq not supported*/
320 		PMD_INIT_LOG(ERR, "RX mode %d is not supported.",
321 				rx_mq_mode);
322 		return -EINVAL;
323 	}
324 
325 	/* To no break software that set invalid mode, only display
326 	 * warning if invalid mode is used.
327 	 */
328 	if (tx_mq_mode != ETH_MQ_TX_NONE)
329 		PMD_INIT_LOG(WARNING,
330 			"TX mode %d is not supported. Due to meaningless in this driver, just ignore",
331 			tx_mq_mode);
332 
333 	return 0;
334 }
335 
336 static int
337 eth_igc_configure(struct rte_eth_dev *dev)
338 {
339 	struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
340 	int ret;
341 
342 	PMD_INIT_FUNC_TRACE();
343 
344 	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)
345 		dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH;
346 
347 	ret  = igc_check_mq_mode(dev);
348 	if (ret != 0)
349 		return ret;
350 
351 	intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
352 	return 0;
353 }
354 
355 static int
356 eth_igc_set_link_up(struct rte_eth_dev *dev)
357 {
358 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
359 
360 	if (hw->phy.media_type == igc_media_type_copper)
361 		igc_power_up_phy(hw);
362 	else
363 		igc_power_up_fiber_serdes_link(hw);
364 	return 0;
365 }
366 
367 static int
368 eth_igc_set_link_down(struct rte_eth_dev *dev)
369 {
370 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
371 
372 	if (hw->phy.media_type == igc_media_type_copper)
373 		igc_power_down_phy(hw);
374 	else
375 		igc_shutdown_fiber_serdes_link(hw);
376 	return 0;
377 }
378 
379 /*
380  * disable other interrupt
381  */
382 static void
383 igc_intr_other_disable(struct rte_eth_dev *dev)
384 {
385 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
386 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
387 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
388 
389 	if (rte_intr_allow_others(intr_handle) &&
390 		dev->data->dev_conf.intr_conf.lsc) {
391 		IGC_WRITE_REG(hw, IGC_EIMC, 1u << IGC_MSIX_OTHER_INTR_VEC);
392 	}
393 
394 	IGC_WRITE_REG(hw, IGC_IMC, ~0);
395 	IGC_WRITE_FLUSH(hw);
396 }
397 
398 /*
399  * enable other interrupt
400  */
401 static inline void
402 igc_intr_other_enable(struct rte_eth_dev *dev)
403 {
404 	struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
405 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
406 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
407 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
408 
409 	if (rte_intr_allow_others(intr_handle) &&
410 		dev->data->dev_conf.intr_conf.lsc) {
411 		IGC_WRITE_REG(hw, IGC_EIMS, 1u << IGC_MSIX_OTHER_INTR_VEC);
412 	}
413 
414 	IGC_WRITE_REG(hw, IGC_IMS, intr->mask);
415 	IGC_WRITE_FLUSH(hw);
416 }
417 
418 /*
419  * It reads ICR and gets interrupt causes, check it and set a bit flag
420  * to update link status.
421  */
422 static void
423 eth_igc_interrupt_get_status(struct rte_eth_dev *dev)
424 {
425 	uint32_t icr;
426 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
427 	struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
428 
429 	/* read-on-clear nic registers here */
430 	icr = IGC_READ_REG(hw, IGC_ICR);
431 
432 	intr->flags = 0;
433 	if (icr & IGC_ICR_LSC)
434 		intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
435 }
436 
437 /* return 0 means link status changed, -1 means not changed */
438 static int
439 eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete)
440 {
441 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
442 	struct rte_eth_link link;
443 	int link_check, count;
444 
445 	link_check = 0;
446 	hw->mac.get_link_status = 1;
447 
448 	/* possible wait-to-complete in up to 9 seconds */
449 	for (count = 0; count < IGC_LINK_UPDATE_CHECK_TIMEOUT; count++) {
450 		/* Read the real link status */
451 		switch (hw->phy.media_type) {
452 		case igc_media_type_copper:
453 			/* Do the work to read phy */
454 			igc_check_for_link(hw);
455 			link_check = !hw->mac.get_link_status;
456 			break;
457 
458 		case igc_media_type_fiber:
459 			igc_check_for_link(hw);
460 			link_check = (IGC_READ_REG(hw, IGC_STATUS) &
461 				      IGC_STATUS_LU);
462 			break;
463 
464 		case igc_media_type_internal_serdes:
465 			igc_check_for_link(hw);
466 			link_check = hw->mac.serdes_has_link;
467 			break;
468 
469 		default:
470 			break;
471 		}
472 		if (link_check || wait_to_complete == 0)
473 			break;
474 		rte_delay_ms(IGC_LINK_UPDATE_CHECK_INTERVAL);
475 	}
476 	memset(&link, 0, sizeof(link));
477 
478 	/* Now we check if a transition has happened */
479 	if (link_check) {
480 		uint16_t duplex, speed;
481 		hw->mac.ops.get_link_up_info(hw, &speed, &duplex);
482 		link.link_duplex = (duplex == FULL_DUPLEX) ?
483 				ETH_LINK_FULL_DUPLEX :
484 				ETH_LINK_HALF_DUPLEX;
485 		link.link_speed = speed;
486 		link.link_status = ETH_LINK_UP;
487 		link.link_autoneg = !(dev->data->dev_conf.link_speeds &
488 				ETH_LINK_SPEED_FIXED);
489 
490 		if (speed == SPEED_2500) {
491 			uint32_t tipg = IGC_READ_REG(hw, IGC_TIPG);
492 			if ((tipg & IGC_TIPG_IPGT_MASK) != 0x0b) {
493 				tipg &= ~IGC_TIPG_IPGT_MASK;
494 				tipg |= 0x0b;
495 				IGC_WRITE_REG(hw, IGC_TIPG, tipg);
496 			}
497 		}
498 	} else {
499 		link.link_speed = 0;
500 		link.link_duplex = ETH_LINK_HALF_DUPLEX;
501 		link.link_status = ETH_LINK_DOWN;
502 		link.link_autoneg = ETH_LINK_FIXED;
503 	}
504 
505 	return rte_eth_linkstatus_set(dev, &link);
506 }
507 
508 /*
509  * It executes link_update after knowing an interrupt is present.
510  */
511 static void
512 eth_igc_interrupt_action(struct rte_eth_dev *dev)
513 {
514 	struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
515 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
516 	struct rte_eth_link link;
517 	int ret;
518 
519 	if (intr->flags & IGC_FLAG_NEED_LINK_UPDATE) {
520 		intr->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
521 
522 		/* set get_link_status to check register later */
523 		ret = eth_igc_link_update(dev, 0);
524 
525 		/* check if link has changed */
526 		if (ret < 0)
527 			return;
528 
529 		rte_eth_linkstatus_get(dev, &link);
530 		if (link.link_status)
531 			PMD_DRV_LOG(INFO,
532 				" Port %d: Link Up - speed %u Mbps - %s",
533 				dev->data->port_id,
534 				(unsigned int)link.link_speed,
535 				link.link_duplex == ETH_LINK_FULL_DUPLEX ?
536 				"full-duplex" : "half-duplex");
537 		else
538 			PMD_DRV_LOG(INFO, " Port %d: Link Down",
539 				dev->data->port_id);
540 
541 		PMD_DRV_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
542 				pci_dev->addr.domain,
543 				pci_dev->addr.bus,
544 				pci_dev->addr.devid,
545 				pci_dev->addr.function);
546 		rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
547 	}
548 }
549 
550 /*
551  * Interrupt handler which shall be registered at first.
552  *
553  * @handle
554  *  Pointer to interrupt handle.
555  * @param
556  *  The address of parameter (struct rte_eth_dev *) registered before.
557  */
558 static void
559 eth_igc_interrupt_handler(void *param)
560 {
561 	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
562 
563 	eth_igc_interrupt_get_status(dev);
564 	eth_igc_interrupt_action(dev);
565 }
566 
567 static void igc_read_queue_stats_register(struct rte_eth_dev *dev);
568 
569 /*
570  * Update the queue status every IGC_ALARM_INTERVAL time.
571  * @param
572  *  The address of parameter (struct rte_eth_dev *) registered before.
573  */
574 static void
575 igc_update_queue_stats_handler(void *param)
576 {
577 	struct rte_eth_dev *dev = param;
578 	igc_read_queue_stats_register(dev);
579 	rte_eal_alarm_set(IGC_ALARM_INTERVAL,
580 			igc_update_queue_stats_handler, dev);
581 }
582 
583 /*
584  * rx,tx enable/disable
585  */
586 static void
587 eth_igc_rxtx_control(struct rte_eth_dev *dev, bool enable)
588 {
589 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
590 	uint32_t tctl, rctl;
591 
592 	tctl = IGC_READ_REG(hw, IGC_TCTL);
593 	rctl = IGC_READ_REG(hw, IGC_RCTL);
594 
595 	if (enable) {
596 		/* enable Tx/Rx */
597 		tctl |= IGC_TCTL_EN;
598 		rctl |= IGC_RCTL_EN;
599 	} else {
600 		/* disable Tx/Rx */
601 		tctl &= ~IGC_TCTL_EN;
602 		rctl &= ~IGC_RCTL_EN;
603 	}
604 	IGC_WRITE_REG(hw, IGC_TCTL, tctl);
605 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
606 	IGC_WRITE_FLUSH(hw);
607 }
608 
609 /*
610  *  This routine disables all traffic on the adapter by issuing a
611  *  global reset on the MAC.
612  */
613 static int
614 eth_igc_stop(struct rte_eth_dev *dev)
615 {
616 	struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
617 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
618 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
619 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
620 	struct rte_eth_link link;
621 
622 	dev->data->dev_started = 0;
623 	adapter->stopped = 1;
624 
625 	/* disable receive and transmit */
626 	eth_igc_rxtx_control(dev, false);
627 
628 	/* disable all MSI-X interrupts */
629 	IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
630 	IGC_WRITE_FLUSH(hw);
631 
632 	/* clear all MSI-X interrupts */
633 	IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
634 
635 	igc_intr_other_disable(dev);
636 
637 	rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
638 
639 	/* disable intr eventfd mapping */
640 	rte_intr_disable(intr_handle);
641 
642 	igc_reset_hw(hw);
643 
644 	/* disable all wake up */
645 	IGC_WRITE_REG(hw, IGC_WUC, 0);
646 
647 	/* disable checking EEE operation in MAC loopback mode */
648 	igc_read_reg_check_clear_bits(hw, IGC_EEER, IGC_EEER_EEE_FRC_AN);
649 
650 	/* Set bit for Go Link disconnect */
651 	igc_read_reg_check_set_bits(hw, IGC_82580_PHY_POWER_MGMT,
652 			IGC_82580_PM_GO_LINKD);
653 
654 	/* Power down the phy. Needed to make the link go Down */
655 	eth_igc_set_link_down(dev);
656 
657 	igc_dev_clear_queues(dev);
658 
659 	/* clear the recorded link status */
660 	memset(&link, 0, sizeof(link));
661 	rte_eth_linkstatus_set(dev, &link);
662 
663 	if (!rte_intr_allow_others(intr_handle))
664 		/* resume to the default handler */
665 		rte_intr_callback_register(intr_handle,
666 					   eth_igc_interrupt_handler,
667 					   (void *)dev);
668 
669 	/* Clean datapath event and queue/vec mapping */
670 	rte_intr_efd_disable(intr_handle);
671 	if (intr_handle->intr_vec != NULL) {
672 		rte_free(intr_handle->intr_vec);
673 		intr_handle->intr_vec = NULL;
674 	}
675 
676 	return 0;
677 }
678 
679 /*
680  * write interrupt vector allocation register
681  * @hw
682  *  board private structure
683  * @queue_index
684  *  queue index, valid 0,1,2,3
685  * @tx
686  *  tx:1, rx:0
687  * @msix_vector
688  *  msix-vector, valid 0,1,2,3,4
689  */
690 static void
691 igc_write_ivar(struct igc_hw *hw, uint8_t queue_index,
692 		bool tx, uint8_t msix_vector)
693 {
694 	uint8_t offset = 0;
695 	uint8_t reg_index = queue_index >> 1;
696 	uint32_t val;
697 
698 	/*
699 	 * IVAR(0)
700 	 * bit31...24	bit23...16	bit15...8	bit7...0
701 	 * TX1		RX1		TX0		RX0
702 	 *
703 	 * IVAR(1)
704 	 * bit31...24	bit23...16	bit15...8	bit7...0
705 	 * TX3		RX3		TX2		RX2
706 	 */
707 
708 	if (tx)
709 		offset = 8;
710 
711 	if (queue_index & 1)
712 		offset += 16;
713 
714 	val = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, reg_index);
715 
716 	/* clear bits */
717 	val &= ~((uint32_t)0xFF << offset);
718 
719 	/* write vector and valid bit */
720 	val |= (uint32_t)(msix_vector | IGC_IVAR_VALID) << offset;
721 
722 	IGC_WRITE_REG_ARRAY(hw, IGC_IVAR0, reg_index, val);
723 }
724 
725 /* Sets up the hardware to generate MSI-X interrupts properly
726  * @hw
727  *  board private structure
728  */
729 static void
730 igc_configure_msix_intr(struct rte_eth_dev *dev)
731 {
732 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
733 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
734 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
735 
736 	uint32_t intr_mask;
737 	uint32_t vec = IGC_MISC_VEC_ID;
738 	uint32_t base = IGC_MISC_VEC_ID;
739 	uint32_t misc_shift = 0;
740 	int i;
741 
742 	/* won't configure msix register if no mapping is done
743 	 * between intr vector and event fd
744 	 */
745 	if (!rte_intr_dp_is_en(intr_handle))
746 		return;
747 
748 	if (rte_intr_allow_others(intr_handle)) {
749 		base = IGC_RX_VEC_START;
750 		vec = base;
751 		misc_shift = 1;
752 	}
753 
754 	/* turn on MSI-X capability first */
755 	IGC_WRITE_REG(hw, IGC_GPIE, IGC_GPIE_MSIX_MODE |
756 				IGC_GPIE_PBA | IGC_GPIE_EIAME |
757 				IGC_GPIE_NSICR);
758 	intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
759 		misc_shift;
760 
761 	if (dev->data->dev_conf.intr_conf.lsc)
762 		intr_mask |= (1u << IGC_MSIX_OTHER_INTR_VEC);
763 
764 	/* enable msix auto-clear */
765 	igc_read_reg_check_set_bits(hw, IGC_EIAC, intr_mask);
766 
767 	/* set other cause interrupt vector */
768 	igc_read_reg_check_set_bits(hw, IGC_IVAR_MISC,
769 		(uint32_t)(IGC_MSIX_OTHER_INTR_VEC | IGC_IVAR_VALID) << 8);
770 
771 	/* enable auto-mask */
772 	igc_read_reg_check_set_bits(hw, IGC_EIAM, intr_mask);
773 
774 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
775 		igc_write_ivar(hw, i, 0, vec);
776 		intr_handle->intr_vec[i] = vec;
777 		if (vec < base + intr_handle->nb_efd - 1)
778 			vec++;
779 	}
780 
781 	IGC_WRITE_FLUSH(hw);
782 }
783 
784 /**
785  * It enables the interrupt mask and then enable the interrupt.
786  *
787  * @dev
788  *  Pointer to struct rte_eth_dev.
789  * @on
790  *  Enable or Disable
791  */
792 static void
793 igc_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on)
794 {
795 	struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
796 
797 	if (on)
798 		intr->mask |= IGC_ICR_LSC;
799 	else
800 		intr->mask &= ~IGC_ICR_LSC;
801 }
802 
803 /*
804  * It enables the interrupt.
805  * It will be called once only during nic initialized.
806  */
807 static void
808 igc_rxq_interrupt_setup(struct rte_eth_dev *dev)
809 {
810 	uint32_t mask;
811 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
812 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
813 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
814 	int misc_shift = rte_intr_allow_others(intr_handle) ? 1 : 0;
815 
816 	/* won't configure msix register if no mapping is done
817 	 * between intr vector and event fd
818 	 */
819 	if (!rte_intr_dp_is_en(intr_handle))
820 		return;
821 
822 	mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << misc_shift;
823 	IGC_WRITE_REG(hw, IGC_EIMS, mask);
824 }
825 
826 /*
827  *  Get hardware rx-buffer size.
828  */
829 static inline int
830 igc_get_rx_buffer_size(struct igc_hw *hw)
831 {
832 	return (IGC_READ_REG(hw, IGC_RXPBS) & 0x3f) << 10;
833 }
834 
835 /*
836  * igc_hw_control_acquire sets CTRL_EXT:DRV_LOAD bit.
837  * For ASF and Pass Through versions of f/w this means
838  * that the driver is loaded.
839  */
840 static void
841 igc_hw_control_acquire(struct igc_hw *hw)
842 {
843 	uint32_t ctrl_ext;
844 
845 	/* Let firmware know the driver has taken over */
846 	ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
847 	IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
848 }
849 
850 /*
851  * igc_hw_control_release resets CTRL_EXT:DRV_LOAD bit.
852  * For ASF and Pass Through versions of f/w this means that the
853  * driver is no longer loaded.
854  */
855 static void
856 igc_hw_control_release(struct igc_hw *hw)
857 {
858 	uint32_t ctrl_ext;
859 
860 	/* Let firmware taken over control of h/w */
861 	ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
862 	IGC_WRITE_REG(hw, IGC_CTRL_EXT,
863 			ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
864 }
865 
866 static int
867 igc_hardware_init(struct igc_hw *hw)
868 {
869 	uint32_t rx_buf_size;
870 	int diag;
871 
872 	/* Let the firmware know the OS is in control */
873 	igc_hw_control_acquire(hw);
874 
875 	/* Issue a global reset */
876 	igc_reset_hw(hw);
877 
878 	/* disable all wake up */
879 	IGC_WRITE_REG(hw, IGC_WUC, 0);
880 
881 	/*
882 	 * Hardware flow control
883 	 * - High water mark should allow for at least two standard size (1518)
884 	 *   frames to be received after sending an XOFF.
885 	 * - Low water mark works best when it is very near the high water mark.
886 	 *   This allows the receiver to restart by sending XON when it has
887 	 *   drained a bit. Here we use an arbitrary value of 1500 which will
888 	 *   restart after one full frame is pulled from the buffer. There
889 	 *   could be several smaller frames in the buffer and if so they will
890 	 *   not trigger the XON until their total number reduces the buffer
891 	 *   by 1500.
892 	 */
893 	rx_buf_size = igc_get_rx_buffer_size(hw);
894 	hw->fc.high_water = rx_buf_size - (RTE_ETHER_MAX_LEN * 2);
895 	hw->fc.low_water = hw->fc.high_water - 1500;
896 	hw->fc.pause_time = IGC_FC_PAUSE_TIME;
897 	hw->fc.send_xon = 1;
898 	hw->fc.requested_mode = igc_fc_full;
899 
900 	diag = igc_init_hw(hw);
901 	if (diag < 0)
902 		return diag;
903 
904 	igc_get_phy_info(hw);
905 	igc_check_for_link(hw);
906 
907 	return 0;
908 }
909 
910 static int
911 eth_igc_start(struct rte_eth_dev *dev)
912 {
913 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
914 	struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
915 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
916 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
917 	uint32_t *speeds;
918 	int ret;
919 
920 	PMD_INIT_FUNC_TRACE();
921 
922 	/* disable all MSI-X interrupts */
923 	IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
924 	IGC_WRITE_FLUSH(hw);
925 
926 	/* clear all MSI-X interrupts */
927 	IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
928 
929 	/* disable uio/vfio intr/eventfd mapping */
930 	if (!adapter->stopped)
931 		rte_intr_disable(intr_handle);
932 
933 	/* Power up the phy. Needed to make the link go Up */
934 	eth_igc_set_link_up(dev);
935 
936 	/* Put the address into the Receive Address Array */
937 	igc_rar_set(hw, hw->mac.addr, 0);
938 
939 	/* Initialize the hardware */
940 	if (igc_hardware_init(hw)) {
941 		PMD_DRV_LOG(ERR, "Unable to initialize the hardware");
942 		return -EIO;
943 	}
944 	adapter->stopped = 0;
945 
946 	/* check and configure queue intr-vector mapping */
947 	if (rte_intr_cap_multiple(intr_handle) &&
948 		dev->data->dev_conf.intr_conf.rxq) {
949 		uint32_t intr_vector = dev->data->nb_rx_queues;
950 		if (rte_intr_efd_enable(intr_handle, intr_vector))
951 			return -1;
952 	}
953 
954 	if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
955 		intr_handle->intr_vec = rte_zmalloc("intr_vec",
956 			dev->data->nb_rx_queues * sizeof(int), 0);
957 		if (intr_handle->intr_vec == NULL) {
958 			PMD_DRV_LOG(ERR,
959 				"Failed to allocate %d rx_queues intr_vec",
960 				dev->data->nb_rx_queues);
961 			return -ENOMEM;
962 		}
963 	}
964 
965 	/* configure msix for rx interrupt */
966 	igc_configure_msix_intr(dev);
967 
968 	igc_tx_init(dev);
969 
970 	/* This can fail when allocating mbufs for descriptor rings */
971 	ret = igc_rx_init(dev);
972 	if (ret) {
973 		PMD_DRV_LOG(ERR, "Unable to initialize RX hardware");
974 		igc_dev_clear_queues(dev);
975 		return ret;
976 	}
977 
978 	igc_clear_hw_cntrs_base_generic(hw);
979 
980 	/* VLAN Offload Settings */
981 	eth_igc_vlan_offload_set(dev,
982 		ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
983 		ETH_VLAN_EXTEND_MASK);
984 
985 	/* Setup link speed and duplex */
986 	speeds = &dev->data->dev_conf.link_speeds;
987 	if (*speeds == ETH_LINK_SPEED_AUTONEG) {
988 		hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
989 		hw->mac.autoneg = 1;
990 	} else {
991 		int num_speeds = 0;
992 		bool autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0;
993 
994 		/* Reset */
995 		hw->phy.autoneg_advertised = 0;
996 
997 		if (*speeds & ~(ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
998 				ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
999 				ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
1000 				ETH_LINK_SPEED_FIXED)) {
1001 			num_speeds = -1;
1002 			goto error_invalid_config;
1003 		}
1004 		if (*speeds & ETH_LINK_SPEED_10M_HD) {
1005 			hw->phy.autoneg_advertised |= ADVERTISE_10_HALF;
1006 			num_speeds++;
1007 		}
1008 		if (*speeds & ETH_LINK_SPEED_10M) {
1009 			hw->phy.autoneg_advertised |= ADVERTISE_10_FULL;
1010 			num_speeds++;
1011 		}
1012 		if (*speeds & ETH_LINK_SPEED_100M_HD) {
1013 			hw->phy.autoneg_advertised |= ADVERTISE_100_HALF;
1014 			num_speeds++;
1015 		}
1016 		if (*speeds & ETH_LINK_SPEED_100M) {
1017 			hw->phy.autoneg_advertised |= ADVERTISE_100_FULL;
1018 			num_speeds++;
1019 		}
1020 		if (*speeds & ETH_LINK_SPEED_1G) {
1021 			hw->phy.autoneg_advertised |= ADVERTISE_1000_FULL;
1022 			num_speeds++;
1023 		}
1024 		if (*speeds & ETH_LINK_SPEED_2_5G) {
1025 			hw->phy.autoneg_advertised |= ADVERTISE_2500_FULL;
1026 			num_speeds++;
1027 		}
1028 		if (num_speeds == 0 || (!autoneg && num_speeds > 1))
1029 			goto error_invalid_config;
1030 
1031 		/* Set/reset the mac.autoneg based on the link speed,
1032 		 * fixed or not
1033 		 */
1034 		if (!autoneg) {
1035 			hw->mac.autoneg = 0;
1036 			hw->mac.forced_speed_duplex =
1037 					hw->phy.autoneg_advertised;
1038 		} else {
1039 			hw->mac.autoneg = 1;
1040 		}
1041 	}
1042 
1043 	igc_setup_link(hw);
1044 
1045 	if (rte_intr_allow_others(intr_handle)) {
1046 		/* check if lsc interrupt is enabled */
1047 		if (dev->data->dev_conf.intr_conf.lsc)
1048 			igc_lsc_interrupt_setup(dev, 1);
1049 		else
1050 			igc_lsc_interrupt_setup(dev, 0);
1051 	} else {
1052 		rte_intr_callback_unregister(intr_handle,
1053 					     eth_igc_interrupt_handler,
1054 					     (void *)dev);
1055 		if (dev->data->dev_conf.intr_conf.lsc)
1056 			PMD_DRV_LOG(INFO,
1057 				"LSC won't enable because of no intr multiplex");
1058 	}
1059 
1060 	/* enable uio/vfio intr/eventfd mapping */
1061 	rte_intr_enable(intr_handle);
1062 
1063 	rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1064 			igc_update_queue_stats_handler, dev);
1065 
1066 	/* check if rxq interrupt is enabled */
1067 	if (dev->data->dev_conf.intr_conf.rxq &&
1068 			rte_intr_dp_is_en(intr_handle))
1069 		igc_rxq_interrupt_setup(dev);
1070 
1071 	/* resume enabled intr since hw reset */
1072 	igc_intr_other_enable(dev);
1073 
1074 	eth_igc_rxtx_control(dev, true);
1075 	eth_igc_link_update(dev, 0);
1076 
1077 	/* configure MAC-loopback mode */
1078 	if (dev->data->dev_conf.lpbk_mode == 1) {
1079 		uint32_t reg_val;
1080 
1081 		reg_val = IGC_READ_REG(hw, IGC_CTRL);
1082 		reg_val &= ~IGC_CTRL_SPEED_MASK;
1083 		reg_val |= IGC_CTRL_SLU | IGC_CTRL_FRCSPD |
1084 			IGC_CTRL_FRCDPX | IGC_CTRL_FD | IGC_CTRL_SPEED_2500;
1085 		IGC_WRITE_REG(hw, IGC_CTRL, reg_val);
1086 
1087 		igc_read_reg_check_set_bits(hw, IGC_EEER, IGC_EEER_EEE_FRC_AN);
1088 	}
1089 
1090 	return 0;
1091 
1092 error_invalid_config:
1093 	PMD_DRV_LOG(ERR, "Invalid advertised speeds (%u) for port %u",
1094 		     dev->data->dev_conf.link_speeds, dev->data->port_id);
1095 	igc_dev_clear_queues(dev);
1096 	return -EINVAL;
1097 }
1098 
1099 static int
1100 igc_reset_swfw_lock(struct igc_hw *hw)
1101 {
1102 	int ret_val;
1103 
1104 	/*
1105 	 * Do mac ops initialization manually here, since we will need
1106 	 * some function pointers set by this call.
1107 	 */
1108 	ret_val = igc_init_mac_params(hw);
1109 	if (ret_val)
1110 		return ret_val;
1111 
1112 	/*
1113 	 * SMBI lock should not fail in this early stage. If this is the case,
1114 	 * it is due to an improper exit of the application.
1115 	 * So force the release of the faulty lock.
1116 	 */
1117 	if (igc_get_hw_semaphore_generic(hw) < 0)
1118 		PMD_DRV_LOG(DEBUG, "SMBI lock released");
1119 
1120 	igc_put_hw_semaphore_generic(hw);
1121 
1122 	if (hw->mac.ops.acquire_swfw_sync != NULL) {
1123 		uint16_t mask;
1124 
1125 		/*
1126 		 * Phy lock should not fail in this early stage.
1127 		 * If this is the case, it is due to an improper exit of the
1128 		 * application. So force the release of the faulty lock.
1129 		 */
1130 		mask = IGC_SWFW_PHY0_SM;
1131 		if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0) {
1132 			PMD_DRV_LOG(DEBUG, "SWFW phy%d lock released",
1133 				    hw->bus.func);
1134 		}
1135 		hw->mac.ops.release_swfw_sync(hw, mask);
1136 
1137 		/*
1138 		 * This one is more tricky since it is common to all ports; but
1139 		 * swfw_sync retries last long enough (1s) to be almost sure
1140 		 * that if lock can not be taken it is due to an improper lock
1141 		 * of the semaphore.
1142 		 */
1143 		mask = IGC_SWFW_EEP_SM;
1144 		if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0)
1145 			PMD_DRV_LOG(DEBUG, "SWFW common locks released");
1146 
1147 		hw->mac.ops.release_swfw_sync(hw, mask);
1148 	}
1149 
1150 	return IGC_SUCCESS;
1151 }
1152 
1153 /*
1154  * free all rx/tx queues.
1155  */
1156 static void
1157 igc_dev_free_queues(struct rte_eth_dev *dev)
1158 {
1159 	uint16_t i;
1160 
1161 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1162 		eth_igc_rx_queue_release(dev->data->rx_queues[i]);
1163 		dev->data->rx_queues[i] = NULL;
1164 	}
1165 	dev->data->nb_rx_queues = 0;
1166 
1167 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1168 		eth_igc_tx_queue_release(dev->data->tx_queues[i]);
1169 		dev->data->tx_queues[i] = NULL;
1170 	}
1171 	dev->data->nb_tx_queues = 0;
1172 }
1173 
1174 static int
1175 eth_igc_close(struct rte_eth_dev *dev)
1176 {
1177 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1178 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
1179 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1180 	struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
1181 	int retry = 0;
1182 	int ret = 0;
1183 
1184 	PMD_INIT_FUNC_TRACE();
1185 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1186 		return 0;
1187 
1188 	if (!adapter->stopped)
1189 		ret = eth_igc_stop(dev);
1190 
1191 	igc_flow_flush(dev, NULL);
1192 	igc_clear_all_filter(dev);
1193 
1194 	igc_intr_other_disable(dev);
1195 	do {
1196 		int ret = rte_intr_callback_unregister(intr_handle,
1197 				eth_igc_interrupt_handler, dev);
1198 		if (ret >= 0 || ret == -ENOENT || ret == -EINVAL)
1199 			break;
1200 
1201 		PMD_DRV_LOG(ERR, "intr callback unregister failed: %d", ret);
1202 		DELAY(200 * 1000); /* delay 200ms */
1203 	} while (retry++ < 5);
1204 
1205 	igc_phy_hw_reset(hw);
1206 	igc_hw_control_release(hw);
1207 	igc_dev_free_queues(dev);
1208 
1209 	/* Reset any pending lock */
1210 	igc_reset_swfw_lock(hw);
1211 
1212 	return ret;
1213 }
1214 
1215 static void
1216 igc_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev)
1217 {
1218 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1219 
1220 	hw->vendor_id = pci_dev->id.vendor_id;
1221 	hw->device_id = pci_dev->id.device_id;
1222 	hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
1223 	hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
1224 }
1225 
1226 static int
1227 eth_igc_dev_init(struct rte_eth_dev *dev)
1228 {
1229 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1230 	struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1231 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1232 	int i, error = 0;
1233 
1234 	PMD_INIT_FUNC_TRACE();
1235 	dev->dev_ops = &eth_igc_ops;
1236 	dev->rx_descriptor_done	= eth_igc_rx_descriptor_done;
1237 	dev->rx_queue_count = eth_igc_rx_queue_count;
1238 	dev->rx_descriptor_status = eth_igc_rx_descriptor_status;
1239 	dev->tx_descriptor_status = eth_igc_tx_descriptor_status;
1240 
1241 	/*
1242 	 * for secondary processes, we don't initialize any further as primary
1243 	 * has already done this work. Only check we don't need a different
1244 	 * RX function.
1245 	 */
1246 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1247 		return 0;
1248 
1249 	rte_eth_copy_pci_info(dev, pci_dev);
1250 	dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1251 
1252 	hw->back = pci_dev;
1253 	hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
1254 
1255 	igc_identify_hardware(dev, pci_dev);
1256 	if (igc_setup_init_funcs(hw, false) != IGC_SUCCESS) {
1257 		error = -EIO;
1258 		goto err_late;
1259 	}
1260 
1261 	igc_get_bus_info(hw);
1262 
1263 	/* Reset any pending lock */
1264 	if (igc_reset_swfw_lock(hw) != IGC_SUCCESS) {
1265 		error = -EIO;
1266 		goto err_late;
1267 	}
1268 
1269 	/* Finish initialization */
1270 	if (igc_setup_init_funcs(hw, true) != IGC_SUCCESS) {
1271 		error = -EIO;
1272 		goto err_late;
1273 	}
1274 
1275 	hw->mac.autoneg = 1;
1276 	hw->phy.autoneg_wait_to_complete = 0;
1277 	hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
1278 
1279 	/* Copper options */
1280 	if (hw->phy.media_type == igc_media_type_copper) {
1281 		hw->phy.mdix = 0; /* AUTO_ALL_MODES */
1282 		hw->phy.disable_polarity_correction = 0;
1283 		hw->phy.ms_type = igc_ms_hw_default;
1284 	}
1285 
1286 	/*
1287 	 * Start from a known state, this is important in reading the nvm
1288 	 * and mac from that.
1289 	 */
1290 	igc_reset_hw(hw);
1291 
1292 	/* Make sure we have a good EEPROM before we read from it */
1293 	if (igc_validate_nvm_checksum(hw) < 0) {
1294 		/*
1295 		 * Some PCI-E parts fail the first check due to
1296 		 * the link being in sleep state, call it again,
1297 		 * if it fails a second time its a real issue.
1298 		 */
1299 		if (igc_validate_nvm_checksum(hw) < 0) {
1300 			PMD_INIT_LOG(ERR, "EEPROM checksum invalid");
1301 			error = -EIO;
1302 			goto err_late;
1303 		}
1304 	}
1305 
1306 	/* Read the permanent MAC address out of the EEPROM */
1307 	if (igc_read_mac_addr(hw) != 0) {
1308 		PMD_INIT_LOG(ERR, "EEPROM error while reading MAC address");
1309 		error = -EIO;
1310 		goto err_late;
1311 	}
1312 
1313 	/* Allocate memory for storing MAC addresses */
1314 	dev->data->mac_addrs = rte_zmalloc("igc",
1315 		RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count, 0);
1316 	if (dev->data->mac_addrs == NULL) {
1317 		PMD_INIT_LOG(ERR, "Failed to allocate %d bytes for storing MAC",
1318 				RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count);
1319 		error = -ENOMEM;
1320 		goto err_late;
1321 	}
1322 
1323 	/* Copy the permanent MAC address */
1324 	rte_ether_addr_copy((struct rte_ether_addr *)hw->mac.addr,
1325 			&dev->data->mac_addrs[0]);
1326 
1327 	/* Now initialize the hardware */
1328 	if (igc_hardware_init(hw) != 0) {
1329 		PMD_INIT_LOG(ERR, "Hardware initialization failed");
1330 		rte_free(dev->data->mac_addrs);
1331 		dev->data->mac_addrs = NULL;
1332 		error = -ENODEV;
1333 		goto err_late;
1334 	}
1335 
1336 	hw->mac.get_link_status = 1;
1337 	igc->stopped = 0;
1338 
1339 	/* Indicate SOL/IDER usage */
1340 	if (igc_check_reset_block(hw) < 0)
1341 		PMD_INIT_LOG(ERR,
1342 			"PHY reset is blocked due to SOL/IDER session.");
1343 
1344 	PMD_INIT_LOG(DEBUG, "port_id %d vendorID=0x%x deviceID=0x%x",
1345 			dev->data->port_id, pci_dev->id.vendor_id,
1346 			pci_dev->id.device_id);
1347 
1348 	rte_intr_callback_register(&pci_dev->intr_handle,
1349 			eth_igc_interrupt_handler, (void *)dev);
1350 
1351 	/* enable uio/vfio intr/eventfd mapping */
1352 	rte_intr_enable(&pci_dev->intr_handle);
1353 
1354 	/* enable support intr */
1355 	igc_intr_other_enable(dev);
1356 
1357 	/* initiate queue status */
1358 	for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1359 		igc->txq_stats_map[i] = -1;
1360 		igc->rxq_stats_map[i] = -1;
1361 	}
1362 
1363 	igc_flow_init(dev);
1364 	igc_clear_all_filter(dev);
1365 	return 0;
1366 
1367 err_late:
1368 	igc_hw_control_release(hw);
1369 	return error;
1370 }
1371 
1372 static int
1373 eth_igc_dev_uninit(__rte_unused struct rte_eth_dev *eth_dev)
1374 {
1375 	PMD_INIT_FUNC_TRACE();
1376 	eth_igc_close(eth_dev);
1377 	return 0;
1378 }
1379 
1380 static int
1381 eth_igc_reset(struct rte_eth_dev *dev)
1382 {
1383 	int ret;
1384 
1385 	PMD_INIT_FUNC_TRACE();
1386 
1387 	ret = eth_igc_dev_uninit(dev);
1388 	if (ret)
1389 		return ret;
1390 
1391 	return eth_igc_dev_init(dev);
1392 }
1393 
1394 static int
1395 eth_igc_promiscuous_enable(struct rte_eth_dev *dev)
1396 {
1397 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1398 	uint32_t rctl;
1399 
1400 	rctl = IGC_READ_REG(hw, IGC_RCTL);
1401 	rctl |= (IGC_RCTL_UPE | IGC_RCTL_MPE);
1402 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1403 	return 0;
1404 }
1405 
1406 static int
1407 eth_igc_promiscuous_disable(struct rte_eth_dev *dev)
1408 {
1409 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1410 	uint32_t rctl;
1411 
1412 	rctl = IGC_READ_REG(hw, IGC_RCTL);
1413 	rctl &= (~IGC_RCTL_UPE);
1414 	if (dev->data->all_multicast == 1)
1415 		rctl |= IGC_RCTL_MPE;
1416 	else
1417 		rctl &= (~IGC_RCTL_MPE);
1418 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1419 	return 0;
1420 }
1421 
1422 static int
1423 eth_igc_allmulticast_enable(struct rte_eth_dev *dev)
1424 {
1425 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1426 	uint32_t rctl;
1427 
1428 	rctl = IGC_READ_REG(hw, IGC_RCTL);
1429 	rctl |= IGC_RCTL_MPE;
1430 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1431 	return 0;
1432 }
1433 
1434 static int
1435 eth_igc_allmulticast_disable(struct rte_eth_dev *dev)
1436 {
1437 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1438 	uint32_t rctl;
1439 
1440 	if (dev->data->promiscuous == 1)
1441 		return 0;	/* must remain in all_multicast mode */
1442 
1443 	rctl = IGC_READ_REG(hw, IGC_RCTL);
1444 	rctl &= (~IGC_RCTL_MPE);
1445 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1446 	return 0;
1447 }
1448 
1449 static int
1450 eth_igc_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
1451 		       size_t fw_size)
1452 {
1453 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1454 	struct igc_fw_version fw;
1455 	int ret;
1456 
1457 	igc_get_fw_version(hw, &fw);
1458 
1459 	/* if option rom is valid, display its version too */
1460 	if (fw.or_valid) {
1461 		ret = snprintf(fw_version, fw_size,
1462 			 "%d.%d, 0x%08x, %d.%d.%d",
1463 			 fw.eep_major, fw.eep_minor, fw.etrack_id,
1464 			 fw.or_major, fw.or_build, fw.or_patch);
1465 	/* no option rom */
1466 	} else {
1467 		if (fw.etrack_id != 0X0000) {
1468 			ret = snprintf(fw_version, fw_size,
1469 				 "%d.%d, 0x%08x",
1470 				 fw.eep_major, fw.eep_minor,
1471 				 fw.etrack_id);
1472 		} else {
1473 			ret = snprintf(fw_version, fw_size,
1474 				 "%d.%d.%d",
1475 				 fw.eep_major, fw.eep_minor,
1476 				 fw.eep_build);
1477 		}
1478 	}
1479 
1480 	ret += 1; /* add the size of '\0' */
1481 	if (fw_size < (u32)ret)
1482 		return ret;
1483 	else
1484 		return 0;
1485 }
1486 
1487 static int
1488 eth_igc_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1489 {
1490 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1491 
1492 	dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */
1493 	dev_info->max_rx_pktlen = MAX_RX_JUMBO_FRAME_SIZE;
1494 	dev_info->max_mac_addrs = hw->mac.rar_entry_count;
1495 	dev_info->rx_offload_capa = IGC_RX_OFFLOAD_ALL;
1496 	dev_info->tx_offload_capa = IGC_TX_OFFLOAD_ALL;
1497 	dev_info->rx_queue_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
1498 
1499 	dev_info->max_rx_queues = IGC_QUEUE_PAIRS_NUM;
1500 	dev_info->max_tx_queues = IGC_QUEUE_PAIRS_NUM;
1501 	dev_info->max_vmdq_pools = 0;
1502 
1503 	dev_info->hash_key_size = IGC_HKEY_MAX_INDEX * sizeof(uint32_t);
1504 	dev_info->reta_size = ETH_RSS_RETA_SIZE_128;
1505 	dev_info->flow_type_rss_offloads = IGC_RSS_OFFLOAD_ALL;
1506 
1507 	dev_info->default_rxconf = (struct rte_eth_rxconf) {
1508 		.rx_thresh = {
1509 			.pthresh = IGC_DEFAULT_RX_PTHRESH,
1510 			.hthresh = IGC_DEFAULT_RX_HTHRESH,
1511 			.wthresh = IGC_DEFAULT_RX_WTHRESH,
1512 		},
1513 		.rx_free_thresh = IGC_DEFAULT_RX_FREE_THRESH,
1514 		.rx_drop_en = 0,
1515 		.offloads = 0,
1516 	};
1517 
1518 	dev_info->default_txconf = (struct rte_eth_txconf) {
1519 		.tx_thresh = {
1520 			.pthresh = IGC_DEFAULT_TX_PTHRESH,
1521 			.hthresh = IGC_DEFAULT_TX_HTHRESH,
1522 			.wthresh = IGC_DEFAULT_TX_WTHRESH,
1523 		},
1524 		.offloads = 0,
1525 	};
1526 
1527 	dev_info->rx_desc_lim = rx_desc_lim;
1528 	dev_info->tx_desc_lim = tx_desc_lim;
1529 
1530 	dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
1531 			ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
1532 			ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G;
1533 
1534 	dev_info->max_mtu = dev_info->max_rx_pktlen - IGC_ETH_OVERHEAD;
1535 	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
1536 	return 0;
1537 }
1538 
1539 static int
1540 eth_igc_led_on(struct rte_eth_dev *dev)
1541 {
1542 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1543 
1544 	return igc_led_on(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1545 }
1546 
1547 static int
1548 eth_igc_led_off(struct rte_eth_dev *dev)
1549 {
1550 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1551 
1552 	return igc_led_off(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1553 }
1554 
1555 static const uint32_t *
1556 eth_igc_supported_ptypes_get(__rte_unused struct rte_eth_dev *dev)
1557 {
1558 	static const uint32_t ptypes[] = {
1559 		/* refers to rx_desc_pkt_info_to_pkt_type() */
1560 		RTE_PTYPE_L2_ETHER,
1561 		RTE_PTYPE_L3_IPV4,
1562 		RTE_PTYPE_L3_IPV4_EXT,
1563 		RTE_PTYPE_L3_IPV6,
1564 		RTE_PTYPE_L3_IPV6_EXT,
1565 		RTE_PTYPE_L4_TCP,
1566 		RTE_PTYPE_L4_UDP,
1567 		RTE_PTYPE_L4_SCTP,
1568 		RTE_PTYPE_TUNNEL_IP,
1569 		RTE_PTYPE_INNER_L3_IPV6,
1570 		RTE_PTYPE_INNER_L3_IPV6_EXT,
1571 		RTE_PTYPE_INNER_L4_TCP,
1572 		RTE_PTYPE_INNER_L4_UDP,
1573 		RTE_PTYPE_UNKNOWN
1574 	};
1575 
1576 	return ptypes;
1577 }
1578 
1579 static int
1580 eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1581 {
1582 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1583 	uint32_t frame_size = mtu + IGC_ETH_OVERHEAD;
1584 	uint32_t rctl;
1585 
1586 	/* if extend vlan has been enabled */
1587 	if (IGC_READ_REG(hw, IGC_CTRL_EXT) & IGC_CTRL_EXT_EXT_VLAN)
1588 		frame_size += VLAN_TAG_SIZE;
1589 
1590 	/* check that mtu is within the allowed range */
1591 	if (mtu < RTE_ETHER_MIN_MTU ||
1592 		frame_size > MAX_RX_JUMBO_FRAME_SIZE)
1593 		return -EINVAL;
1594 
1595 	/*
1596 	 * If device is started, refuse mtu that requires the support of
1597 	 * scattered packets when this feature has not been enabled before.
1598 	 */
1599 	if (dev->data->dev_started && !dev->data->scattered_rx &&
1600 	    frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) {
1601 		PMD_INIT_LOG(ERR, "Stop port first.");
1602 		return -EINVAL;
1603 	}
1604 
1605 	rctl = IGC_READ_REG(hw, IGC_RCTL);
1606 
1607 	/* switch to jumbo mode if needed */
1608 	if (mtu > RTE_ETHER_MTU) {
1609 		dev->data->dev_conf.rxmode.offloads |=
1610 			DEV_RX_OFFLOAD_JUMBO_FRAME;
1611 		rctl |= IGC_RCTL_LPE;
1612 	} else {
1613 		dev->data->dev_conf.rxmode.offloads &=
1614 			~DEV_RX_OFFLOAD_JUMBO_FRAME;
1615 		rctl &= ~IGC_RCTL_LPE;
1616 	}
1617 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1618 
1619 	/* update max frame size */
1620 	dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
1621 
1622 	IGC_WRITE_REG(hw, IGC_RLPML,
1623 			dev->data->dev_conf.rxmode.max_rx_pkt_len);
1624 
1625 	return 0;
1626 }
1627 
1628 static int
1629 eth_igc_rar_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1630 		uint32_t index, uint32_t pool)
1631 {
1632 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1633 
1634 	igc_rar_set(hw, mac_addr->addr_bytes, index);
1635 	RTE_SET_USED(pool);
1636 	return 0;
1637 }
1638 
1639 static void
1640 eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index)
1641 {
1642 	uint8_t addr[RTE_ETHER_ADDR_LEN];
1643 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1644 
1645 	memset(addr, 0, sizeof(addr));
1646 	igc_rar_set(hw, addr, index);
1647 }
1648 
1649 static int
1650 eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
1651 			struct rte_ether_addr *addr)
1652 {
1653 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1654 	igc_rar_set(hw, addr->addr_bytes, 0);
1655 	return 0;
1656 }
1657 
1658 static int
1659 eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
1660 			 struct rte_ether_addr *mc_addr_set,
1661 			 uint32_t nb_mc_addr)
1662 {
1663 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1664 	igc_update_mc_addr_list(hw, (u8 *)mc_addr_set, nb_mc_addr);
1665 	return 0;
1666 }
1667 
1668 /*
1669  * Read hardware registers
1670  */
1671 static void
1672 igc_read_stats_registers(struct igc_hw *hw, struct igc_hw_stats *stats)
1673 {
1674 	int pause_frames;
1675 
1676 	uint64_t old_gprc  = stats->gprc;
1677 	uint64_t old_gptc  = stats->gptc;
1678 	uint64_t old_tpr   = stats->tpr;
1679 	uint64_t old_tpt   = stats->tpt;
1680 	uint64_t old_rpthc = stats->rpthc;
1681 	uint64_t old_hgptc = stats->hgptc;
1682 
1683 	stats->crcerrs += IGC_READ_REG(hw, IGC_CRCERRS);
1684 	stats->algnerrc += IGC_READ_REG(hw, IGC_ALGNERRC);
1685 	stats->rxerrc += IGC_READ_REG(hw, IGC_RXERRC);
1686 	stats->mpc += IGC_READ_REG(hw, IGC_MPC);
1687 	stats->scc += IGC_READ_REG(hw, IGC_SCC);
1688 	stats->ecol += IGC_READ_REG(hw, IGC_ECOL);
1689 
1690 	stats->mcc += IGC_READ_REG(hw, IGC_MCC);
1691 	stats->latecol += IGC_READ_REG(hw, IGC_LATECOL);
1692 	stats->colc += IGC_READ_REG(hw, IGC_COLC);
1693 
1694 	stats->dc += IGC_READ_REG(hw, IGC_DC);
1695 	stats->tncrs += IGC_READ_REG(hw, IGC_TNCRS);
1696 	stats->htdpmc += IGC_READ_REG(hw, IGC_HTDPMC);
1697 	stats->rlec += IGC_READ_REG(hw, IGC_RLEC);
1698 	stats->xonrxc += IGC_READ_REG(hw, IGC_XONRXC);
1699 	stats->xontxc += IGC_READ_REG(hw, IGC_XONTXC);
1700 
1701 	/*
1702 	 * For watchdog management we need to know if we have been
1703 	 * paused during the last interval, so capture that here.
1704 	 */
1705 	pause_frames = IGC_READ_REG(hw, IGC_XOFFRXC);
1706 	stats->xoffrxc += pause_frames;
1707 	stats->xofftxc += IGC_READ_REG(hw, IGC_XOFFTXC);
1708 	stats->fcruc += IGC_READ_REG(hw, IGC_FCRUC);
1709 	stats->prc64 += IGC_READ_REG(hw, IGC_PRC64);
1710 	stats->prc127 += IGC_READ_REG(hw, IGC_PRC127);
1711 	stats->prc255 += IGC_READ_REG(hw, IGC_PRC255);
1712 	stats->prc511 += IGC_READ_REG(hw, IGC_PRC511);
1713 	stats->prc1023 += IGC_READ_REG(hw, IGC_PRC1023);
1714 	stats->prc1522 += IGC_READ_REG(hw, IGC_PRC1522);
1715 	stats->gprc += IGC_READ_REG(hw, IGC_GPRC);
1716 	stats->bprc += IGC_READ_REG(hw, IGC_BPRC);
1717 	stats->mprc += IGC_READ_REG(hw, IGC_MPRC);
1718 	stats->gptc += IGC_READ_REG(hw, IGC_GPTC);
1719 
1720 	/* For the 64-bit byte counters the low dword must be read first. */
1721 	/* Both registers clear on the read of the high dword */
1722 
1723 	/* Workaround CRC bytes included in size, take away 4 bytes/packet */
1724 	stats->gorc += IGC_READ_REG(hw, IGC_GORCL);
1725 	stats->gorc += ((uint64_t)IGC_READ_REG(hw, IGC_GORCH) << 32);
1726 	stats->gorc -= (stats->gprc - old_gprc) * RTE_ETHER_CRC_LEN;
1727 	stats->gotc += IGC_READ_REG(hw, IGC_GOTCL);
1728 	stats->gotc += ((uint64_t)IGC_READ_REG(hw, IGC_GOTCH) << 32);
1729 	stats->gotc -= (stats->gptc - old_gptc) * RTE_ETHER_CRC_LEN;
1730 
1731 	stats->rnbc += IGC_READ_REG(hw, IGC_RNBC);
1732 	stats->ruc += IGC_READ_REG(hw, IGC_RUC);
1733 	stats->rfc += IGC_READ_REG(hw, IGC_RFC);
1734 	stats->roc += IGC_READ_REG(hw, IGC_ROC);
1735 	stats->rjc += IGC_READ_REG(hw, IGC_RJC);
1736 
1737 	stats->mgprc += IGC_READ_REG(hw, IGC_MGTPRC);
1738 	stats->mgpdc += IGC_READ_REG(hw, IGC_MGTPDC);
1739 	stats->mgptc += IGC_READ_REG(hw, IGC_MGTPTC);
1740 	stats->b2ospc += IGC_READ_REG(hw, IGC_B2OSPC);
1741 	stats->b2ogprc += IGC_READ_REG(hw, IGC_B2OGPRC);
1742 	stats->o2bgptc += IGC_READ_REG(hw, IGC_O2BGPTC);
1743 	stats->o2bspc += IGC_READ_REG(hw, IGC_O2BSPC);
1744 
1745 	stats->tpr += IGC_READ_REG(hw, IGC_TPR);
1746 	stats->tpt += IGC_READ_REG(hw, IGC_TPT);
1747 
1748 	stats->tor += IGC_READ_REG(hw, IGC_TORL);
1749 	stats->tor += ((uint64_t)IGC_READ_REG(hw, IGC_TORH) << 32);
1750 	stats->tor -= (stats->tpr - old_tpr) * RTE_ETHER_CRC_LEN;
1751 	stats->tot += IGC_READ_REG(hw, IGC_TOTL);
1752 	stats->tot += ((uint64_t)IGC_READ_REG(hw, IGC_TOTH) << 32);
1753 	stats->tot -= (stats->tpt - old_tpt) * RTE_ETHER_CRC_LEN;
1754 
1755 	stats->ptc64 += IGC_READ_REG(hw, IGC_PTC64);
1756 	stats->ptc127 += IGC_READ_REG(hw, IGC_PTC127);
1757 	stats->ptc255 += IGC_READ_REG(hw, IGC_PTC255);
1758 	stats->ptc511 += IGC_READ_REG(hw, IGC_PTC511);
1759 	stats->ptc1023 += IGC_READ_REG(hw, IGC_PTC1023);
1760 	stats->ptc1522 += IGC_READ_REG(hw, IGC_PTC1522);
1761 	stats->mptc += IGC_READ_REG(hw, IGC_MPTC);
1762 	stats->bptc += IGC_READ_REG(hw, IGC_BPTC);
1763 	stats->tsctc += IGC_READ_REG(hw, IGC_TSCTC);
1764 
1765 	stats->iac += IGC_READ_REG(hw, IGC_IAC);
1766 	stats->rpthc += IGC_READ_REG(hw, IGC_RPTHC);
1767 	stats->hgptc += IGC_READ_REG(hw, IGC_HGPTC);
1768 	stats->icrxdmtc += IGC_READ_REG(hw, IGC_ICRXDMTC);
1769 
1770 	/* Host to Card Statistics */
1771 	stats->hgorc += IGC_READ_REG(hw, IGC_HGORCL);
1772 	stats->hgorc += ((uint64_t)IGC_READ_REG(hw, IGC_HGORCH) << 32);
1773 	stats->hgorc -= (stats->rpthc - old_rpthc) * RTE_ETHER_CRC_LEN;
1774 	stats->hgotc += IGC_READ_REG(hw, IGC_HGOTCL);
1775 	stats->hgotc += ((uint64_t)IGC_READ_REG(hw, IGC_HGOTCH) << 32);
1776 	stats->hgotc -= (stats->hgptc - old_hgptc) * RTE_ETHER_CRC_LEN;
1777 	stats->lenerrs += IGC_READ_REG(hw, IGC_LENERRS);
1778 }
1779 
1780 /*
1781  * Write 0 to all queue status registers
1782  */
1783 static void
1784 igc_reset_queue_stats_register(struct igc_hw *hw)
1785 {
1786 	int i;
1787 
1788 	for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1789 		IGC_WRITE_REG(hw, IGC_PQGPRC(i), 0);
1790 		IGC_WRITE_REG(hw, IGC_PQGPTC(i), 0);
1791 		IGC_WRITE_REG(hw, IGC_PQGORC(i), 0);
1792 		IGC_WRITE_REG(hw, IGC_PQGOTC(i), 0);
1793 		IGC_WRITE_REG(hw, IGC_PQMPRC(i), 0);
1794 		IGC_WRITE_REG(hw, IGC_RQDPC(i), 0);
1795 		IGC_WRITE_REG(hw, IGC_TQDPC(i), 0);
1796 	}
1797 }
1798 
1799 /*
1800  * Read all hardware queue status registers
1801  */
1802 static void
1803 igc_read_queue_stats_register(struct rte_eth_dev *dev)
1804 {
1805 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1806 	struct igc_hw_queue_stats *queue_stats =
1807 				IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1808 	int i;
1809 
1810 	/*
1811 	 * This register is not cleared on read. Furthermore, the register wraps
1812 	 * around back to 0x00000000 on the next increment when reaching a value
1813 	 * of 0xFFFFFFFF and then continues normal count operation.
1814 	 */
1815 	for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1816 		union {
1817 			u64 ddword;
1818 			u32 dword[2];
1819 		} value;
1820 		u32 tmp;
1821 
1822 		/*
1823 		 * Read the register first, if the value is smaller than that
1824 		 * previous read, that mean the register has been overflowed,
1825 		 * then we add the high 4 bytes by 1 and replace the low 4
1826 		 * bytes by the new value.
1827 		 */
1828 		tmp = IGC_READ_REG(hw, IGC_PQGPRC(i));
1829 		value.ddword = queue_stats->pqgprc[i];
1830 		if (value.dword[U32_0_IN_U64] > tmp)
1831 			value.dword[U32_1_IN_U64]++;
1832 		value.dword[U32_0_IN_U64] = tmp;
1833 		queue_stats->pqgprc[i] = value.ddword;
1834 
1835 		tmp = IGC_READ_REG(hw, IGC_PQGPTC(i));
1836 		value.ddword = queue_stats->pqgptc[i];
1837 		if (value.dword[U32_0_IN_U64] > tmp)
1838 			value.dword[U32_1_IN_U64]++;
1839 		value.dword[U32_0_IN_U64] = tmp;
1840 		queue_stats->pqgptc[i] = value.ddword;
1841 
1842 		tmp = IGC_READ_REG(hw, IGC_PQGORC(i));
1843 		value.ddword = queue_stats->pqgorc[i];
1844 		if (value.dword[U32_0_IN_U64] > tmp)
1845 			value.dword[U32_1_IN_U64]++;
1846 		value.dword[U32_0_IN_U64] = tmp;
1847 		queue_stats->pqgorc[i] = value.ddword;
1848 
1849 		tmp = IGC_READ_REG(hw, IGC_PQGOTC(i));
1850 		value.ddword = queue_stats->pqgotc[i];
1851 		if (value.dword[U32_0_IN_U64] > tmp)
1852 			value.dword[U32_1_IN_U64]++;
1853 		value.dword[U32_0_IN_U64] = tmp;
1854 		queue_stats->pqgotc[i] = value.ddword;
1855 
1856 		tmp = IGC_READ_REG(hw, IGC_PQMPRC(i));
1857 		value.ddword = queue_stats->pqmprc[i];
1858 		if (value.dword[U32_0_IN_U64] > tmp)
1859 			value.dword[U32_1_IN_U64]++;
1860 		value.dword[U32_0_IN_U64] = tmp;
1861 		queue_stats->pqmprc[i] = value.ddword;
1862 
1863 		tmp = IGC_READ_REG(hw, IGC_RQDPC(i));
1864 		value.ddword = queue_stats->rqdpc[i];
1865 		if (value.dword[U32_0_IN_U64] > tmp)
1866 			value.dword[U32_1_IN_U64]++;
1867 		value.dword[U32_0_IN_U64] = tmp;
1868 		queue_stats->rqdpc[i] = value.ddword;
1869 
1870 		tmp = IGC_READ_REG(hw, IGC_TQDPC(i));
1871 		value.ddword = queue_stats->tqdpc[i];
1872 		if (value.dword[U32_0_IN_U64] > tmp)
1873 			value.dword[U32_1_IN_U64]++;
1874 		value.dword[U32_0_IN_U64] = tmp;
1875 		queue_stats->tqdpc[i] = value.ddword;
1876 	}
1877 }
1878 
1879 static int
1880 eth_igc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
1881 {
1882 	struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1883 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1884 	struct igc_hw_stats *stats = IGC_DEV_PRIVATE_STATS(dev);
1885 	struct igc_hw_queue_stats *queue_stats =
1886 			IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1887 	int i;
1888 
1889 	/*
1890 	 * Cancel status handler since it will read the queue status registers
1891 	 */
1892 	rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1893 
1894 	/* Read status register */
1895 	igc_read_queue_stats_register(dev);
1896 	igc_read_stats_registers(hw, stats);
1897 
1898 	if (rte_stats == NULL) {
1899 		/* Restart queue status handler */
1900 		rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1901 				igc_update_queue_stats_handler, dev);
1902 		return -EINVAL;
1903 	}
1904 
1905 	/* Rx Errors */
1906 	rte_stats->imissed = stats->mpc;
1907 	rte_stats->ierrors = stats->crcerrs + stats->rlec +
1908 			stats->rxerrc + stats->algnerrc;
1909 
1910 	/* Tx Errors */
1911 	rte_stats->oerrors = stats->ecol + stats->latecol;
1912 
1913 	rte_stats->ipackets = stats->gprc;
1914 	rte_stats->opackets = stats->gptc;
1915 	rte_stats->ibytes   = stats->gorc;
1916 	rte_stats->obytes   = stats->gotc;
1917 
1918 	/* Get per-queue statuses */
1919 	for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1920 		/* GET TX queue statuses */
1921 		int map_id = igc->txq_stats_map[i];
1922 		if (map_id >= 0) {
1923 			rte_stats->q_opackets[map_id] += queue_stats->pqgptc[i];
1924 			rte_stats->q_obytes[map_id] += queue_stats->pqgotc[i];
1925 		}
1926 		/* Get RX queue statuses */
1927 		map_id = igc->rxq_stats_map[i];
1928 		if (map_id >= 0) {
1929 			rte_stats->q_ipackets[map_id] += queue_stats->pqgprc[i];
1930 			rte_stats->q_ibytes[map_id] += queue_stats->pqgorc[i];
1931 			rte_stats->q_errors[map_id] += queue_stats->rqdpc[i];
1932 		}
1933 	}
1934 
1935 	/* Restart queue status handler */
1936 	rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1937 			igc_update_queue_stats_handler, dev);
1938 	return 0;
1939 }
1940 
1941 static int
1942 eth_igc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1943 		   unsigned int n)
1944 {
1945 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1946 	struct igc_hw_stats *hw_stats =
1947 			IGC_DEV_PRIVATE_STATS(dev);
1948 	unsigned int i;
1949 
1950 	igc_read_stats_registers(hw, hw_stats);
1951 
1952 	if (n < IGC_NB_XSTATS)
1953 		return IGC_NB_XSTATS;
1954 
1955 	/* If this is a reset xstats is NULL, and we have cleared the
1956 	 * registers by reading them.
1957 	 */
1958 	if (!xstats)
1959 		return 0;
1960 
1961 	/* Extended stats */
1962 	for (i = 0; i < IGC_NB_XSTATS; i++) {
1963 		xstats[i].id = i;
1964 		xstats[i].value = *(uint64_t *)(((char *)hw_stats) +
1965 			rte_igc_stats_strings[i].offset);
1966 	}
1967 
1968 	return IGC_NB_XSTATS;
1969 }
1970 
1971 static int
1972 eth_igc_xstats_reset(struct rte_eth_dev *dev)
1973 {
1974 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1975 	struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
1976 	struct igc_hw_queue_stats *queue_stats =
1977 			IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1978 
1979 	/* Cancel queue status handler for avoid conflict */
1980 	rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1981 
1982 	/* HW registers are cleared on read */
1983 	igc_reset_queue_stats_register(hw);
1984 	igc_read_stats_registers(hw, hw_stats);
1985 
1986 	/* Reset software totals */
1987 	memset(hw_stats, 0, sizeof(*hw_stats));
1988 	memset(queue_stats, 0, sizeof(*queue_stats));
1989 
1990 	/* Restart the queue status handler */
1991 	rte_eal_alarm_set(IGC_ALARM_INTERVAL, igc_update_queue_stats_handler,
1992 			dev);
1993 
1994 	return 0;
1995 }
1996 
1997 static int
1998 eth_igc_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
1999 	struct rte_eth_xstat_name *xstats_names, unsigned int size)
2000 {
2001 	unsigned int i;
2002 
2003 	if (xstats_names == NULL)
2004 		return IGC_NB_XSTATS;
2005 
2006 	if (size < IGC_NB_XSTATS) {
2007 		PMD_DRV_LOG(ERR, "not enough buffers!");
2008 		return IGC_NB_XSTATS;
2009 	}
2010 
2011 	for (i = 0; i < IGC_NB_XSTATS; i++)
2012 		strlcpy(xstats_names[i].name, rte_igc_stats_strings[i].name,
2013 			sizeof(xstats_names[i].name));
2014 
2015 	return IGC_NB_XSTATS;
2016 }
2017 
2018 static int
2019 eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
2020 		struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
2021 		unsigned int limit)
2022 {
2023 	unsigned int i;
2024 
2025 	if (!ids)
2026 		return eth_igc_xstats_get_names(dev, xstats_names, limit);
2027 
2028 	for (i = 0; i < limit; i++) {
2029 		if (ids[i] >= IGC_NB_XSTATS) {
2030 			PMD_DRV_LOG(ERR, "id value isn't valid");
2031 			return -EINVAL;
2032 		}
2033 		strlcpy(xstats_names[i].name,
2034 			rte_igc_stats_strings[ids[i]].name,
2035 			sizeof(xstats_names[i].name));
2036 	}
2037 	return limit;
2038 }
2039 
2040 static int
2041 eth_igc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
2042 		uint64_t *values, unsigned int n)
2043 {
2044 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2045 	struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
2046 	unsigned int i;
2047 
2048 	igc_read_stats_registers(hw, hw_stats);
2049 
2050 	if (!ids) {
2051 		if (n < IGC_NB_XSTATS)
2052 			return IGC_NB_XSTATS;
2053 
2054 		/* If this is a reset xstats is NULL, and we have cleared the
2055 		 * registers by reading them.
2056 		 */
2057 		if (!values)
2058 			return 0;
2059 
2060 		/* Extended stats */
2061 		for (i = 0; i < IGC_NB_XSTATS; i++)
2062 			values[i] = *(uint64_t *)(((char *)hw_stats) +
2063 					rte_igc_stats_strings[i].offset);
2064 
2065 		return IGC_NB_XSTATS;
2066 
2067 	} else {
2068 		for (i = 0; i < n; i++) {
2069 			if (ids[i] >= IGC_NB_XSTATS) {
2070 				PMD_DRV_LOG(ERR, "id value isn't valid");
2071 				return -EINVAL;
2072 			}
2073 			values[i] = *(uint64_t *)(((char *)hw_stats) +
2074 					rte_igc_stats_strings[ids[i]].offset);
2075 		}
2076 		return n;
2077 	}
2078 }
2079 
2080 static int
2081 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
2082 		uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx)
2083 {
2084 	struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
2085 
2086 	/* check queue id is valid */
2087 	if (queue_id >= IGC_QUEUE_PAIRS_NUM) {
2088 		PMD_DRV_LOG(ERR, "queue id(%u) error, max is %u",
2089 			queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2090 		return -EINVAL;
2091 	}
2092 
2093 	/* store the mapping status id */
2094 	if (is_rx)
2095 		igc->rxq_stats_map[queue_id] = stat_idx;
2096 	else
2097 		igc->txq_stats_map[queue_id] = stat_idx;
2098 
2099 	return 0;
2100 }
2101 
2102 static int
2103 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
2104 {
2105 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2106 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2107 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2108 	uint32_t vec = IGC_MISC_VEC_ID;
2109 
2110 	if (rte_intr_allow_others(intr_handle))
2111 		vec = IGC_RX_VEC_START;
2112 
2113 	uint32_t mask = 1u << (queue_id + vec);
2114 
2115 	IGC_WRITE_REG(hw, IGC_EIMC, mask);
2116 	IGC_WRITE_FLUSH(hw);
2117 
2118 	return 0;
2119 }
2120 
2121 static int
2122 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
2123 {
2124 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2125 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2126 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2127 	uint32_t vec = IGC_MISC_VEC_ID;
2128 
2129 	if (rte_intr_allow_others(intr_handle))
2130 		vec = IGC_RX_VEC_START;
2131 
2132 	uint32_t mask = 1u << (queue_id + vec);
2133 
2134 	IGC_WRITE_REG(hw, IGC_EIMS, mask);
2135 	IGC_WRITE_FLUSH(hw);
2136 
2137 	rte_intr_enable(intr_handle);
2138 
2139 	return 0;
2140 }
2141 
2142 static int
2143 eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
2144 {
2145 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2146 	uint32_t ctrl;
2147 	int tx_pause;
2148 	int rx_pause;
2149 
2150 	fc_conf->pause_time = hw->fc.pause_time;
2151 	fc_conf->high_water = hw->fc.high_water;
2152 	fc_conf->low_water = hw->fc.low_water;
2153 	fc_conf->send_xon = hw->fc.send_xon;
2154 	fc_conf->autoneg = hw->mac.autoneg;
2155 
2156 	/*
2157 	 * Return rx_pause and tx_pause status according to actual setting of
2158 	 * the TFCE and RFCE bits in the CTRL register.
2159 	 */
2160 	ctrl = IGC_READ_REG(hw, IGC_CTRL);
2161 	if (ctrl & IGC_CTRL_TFCE)
2162 		tx_pause = 1;
2163 	else
2164 		tx_pause = 0;
2165 
2166 	if (ctrl & IGC_CTRL_RFCE)
2167 		rx_pause = 1;
2168 	else
2169 		rx_pause = 0;
2170 
2171 	if (rx_pause && tx_pause)
2172 		fc_conf->mode = RTE_FC_FULL;
2173 	else if (rx_pause)
2174 		fc_conf->mode = RTE_FC_RX_PAUSE;
2175 	else if (tx_pause)
2176 		fc_conf->mode = RTE_FC_TX_PAUSE;
2177 	else
2178 		fc_conf->mode = RTE_FC_NONE;
2179 
2180 	return 0;
2181 }
2182 
2183 static int
2184 eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
2185 {
2186 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2187 	uint32_t rx_buf_size;
2188 	uint32_t max_high_water;
2189 	uint32_t rctl;
2190 	int err;
2191 
2192 	if (fc_conf->autoneg != hw->mac.autoneg)
2193 		return -ENOTSUP;
2194 
2195 	rx_buf_size = igc_get_rx_buffer_size(hw);
2196 	PMD_DRV_LOG(DEBUG, "Rx packet buffer size = 0x%x", rx_buf_size);
2197 
2198 	/* At least reserve one Ethernet frame for watermark */
2199 	max_high_water = rx_buf_size - RTE_ETHER_MAX_LEN;
2200 	if (fc_conf->high_water > max_high_water ||
2201 		fc_conf->high_water < fc_conf->low_water) {
2202 		PMD_DRV_LOG(ERR,
2203 			"Incorrect high(%u)/low(%u) water value, max is %u",
2204 			fc_conf->high_water, fc_conf->low_water,
2205 			max_high_water);
2206 		return -EINVAL;
2207 	}
2208 
2209 	switch (fc_conf->mode) {
2210 	case RTE_FC_NONE:
2211 		hw->fc.requested_mode = igc_fc_none;
2212 		break;
2213 	case RTE_FC_RX_PAUSE:
2214 		hw->fc.requested_mode = igc_fc_rx_pause;
2215 		break;
2216 	case RTE_FC_TX_PAUSE:
2217 		hw->fc.requested_mode = igc_fc_tx_pause;
2218 		break;
2219 	case RTE_FC_FULL:
2220 		hw->fc.requested_mode = igc_fc_full;
2221 		break;
2222 	default:
2223 		PMD_DRV_LOG(ERR, "unsupported fc mode: %u", fc_conf->mode);
2224 		return -EINVAL;
2225 	}
2226 
2227 	hw->fc.pause_time     = fc_conf->pause_time;
2228 	hw->fc.high_water     = fc_conf->high_water;
2229 	hw->fc.low_water      = fc_conf->low_water;
2230 	hw->fc.send_xon	      = fc_conf->send_xon;
2231 
2232 	err = igc_setup_link_generic(hw);
2233 	if (err == IGC_SUCCESS) {
2234 		/**
2235 		 * check if we want to forward MAC frames - driver doesn't have
2236 		 * native capability to do that, so we'll write the registers
2237 		 * ourselves
2238 		 **/
2239 		rctl = IGC_READ_REG(hw, IGC_RCTL);
2240 
2241 		/* set or clear MFLCN.PMCF bit depending on configuration */
2242 		if (fc_conf->mac_ctrl_frame_fwd != 0)
2243 			rctl |= IGC_RCTL_PMCF;
2244 		else
2245 			rctl &= ~IGC_RCTL_PMCF;
2246 
2247 		IGC_WRITE_REG(hw, IGC_RCTL, rctl);
2248 		IGC_WRITE_FLUSH(hw);
2249 
2250 		return 0;
2251 	}
2252 
2253 	PMD_DRV_LOG(ERR, "igc_setup_link_generic = 0x%x", err);
2254 	return -EIO;
2255 }
2256 
2257 static int
2258 eth_igc_rss_reta_update(struct rte_eth_dev *dev,
2259 			struct rte_eth_rss_reta_entry64 *reta_conf,
2260 			uint16_t reta_size)
2261 {
2262 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2263 	uint16_t i;
2264 
2265 	if (reta_size != ETH_RSS_RETA_SIZE_128) {
2266 		PMD_DRV_LOG(ERR,
2267 			"The size of RSS redirection table configured(%d) doesn't match the number hardware can supported(%d)",
2268 			reta_size, ETH_RSS_RETA_SIZE_128);
2269 		return -EINVAL;
2270 	}
2271 
2272 	RTE_BUILD_BUG_ON(ETH_RSS_RETA_SIZE_128 % IGC_RSS_RDT_REG_SIZE);
2273 
2274 	/* set redirection table */
2275 	for (i = 0; i < ETH_RSS_RETA_SIZE_128; i += IGC_RSS_RDT_REG_SIZE) {
2276 		union igc_rss_reta_reg reta, reg;
2277 		uint16_t idx, shift;
2278 		uint8_t j, mask;
2279 
2280 		idx = i / RTE_RETA_GROUP_SIZE;
2281 		shift = i % RTE_RETA_GROUP_SIZE;
2282 		mask = (uint8_t)((reta_conf[idx].mask >> shift) &
2283 				IGC_RSS_RDT_REG_SIZE_MASK);
2284 
2285 		/* if no need to update the register */
2286 		if (!mask ||
2287 		    shift > (RTE_RETA_GROUP_SIZE - IGC_RSS_RDT_REG_SIZE))
2288 			continue;
2289 
2290 		/* check mask whether need to read the register value first */
2291 		if (mask == IGC_RSS_RDT_REG_SIZE_MASK)
2292 			reg.dword = 0;
2293 		else
2294 			reg.dword = IGC_READ_REG_LE_VALUE(hw,
2295 					IGC_RETA(i / IGC_RSS_RDT_REG_SIZE));
2296 
2297 		/* update the register */
2298 		RTE_BUILD_BUG_ON(sizeof(reta.bytes) != IGC_RSS_RDT_REG_SIZE);
2299 		for (j = 0; j < IGC_RSS_RDT_REG_SIZE; j++) {
2300 			if (mask & (1u << j))
2301 				reta.bytes[j] =
2302 					(uint8_t)reta_conf[idx].reta[shift + j];
2303 			else
2304 				reta.bytes[j] = reg.bytes[j];
2305 		}
2306 		IGC_WRITE_REG_LE_VALUE(hw,
2307 			IGC_RETA(i / IGC_RSS_RDT_REG_SIZE), reta.dword);
2308 	}
2309 
2310 	return 0;
2311 }
2312 
2313 static int
2314 eth_igc_rss_reta_query(struct rte_eth_dev *dev,
2315 		       struct rte_eth_rss_reta_entry64 *reta_conf,
2316 		       uint16_t reta_size)
2317 {
2318 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2319 	uint16_t i;
2320 
2321 	if (reta_size != ETH_RSS_RETA_SIZE_128) {
2322 		PMD_DRV_LOG(ERR,
2323 			"The size of RSS redirection table configured(%d) doesn't match the number hardware can supported(%d)",
2324 			reta_size, ETH_RSS_RETA_SIZE_128);
2325 		return -EINVAL;
2326 	}
2327 
2328 	RTE_BUILD_BUG_ON(ETH_RSS_RETA_SIZE_128 % IGC_RSS_RDT_REG_SIZE);
2329 
2330 	/* read redirection table */
2331 	for (i = 0; i < ETH_RSS_RETA_SIZE_128; i += IGC_RSS_RDT_REG_SIZE) {
2332 		union igc_rss_reta_reg reta;
2333 		uint16_t idx, shift;
2334 		uint8_t j, mask;
2335 
2336 		idx = i / RTE_RETA_GROUP_SIZE;
2337 		shift = i % RTE_RETA_GROUP_SIZE;
2338 		mask = (uint8_t)((reta_conf[idx].mask >> shift) &
2339 				IGC_RSS_RDT_REG_SIZE_MASK);
2340 
2341 		/* if no need to read register */
2342 		if (!mask ||
2343 		    shift > (RTE_RETA_GROUP_SIZE - IGC_RSS_RDT_REG_SIZE))
2344 			continue;
2345 
2346 		/* read register and get the queue index */
2347 		RTE_BUILD_BUG_ON(sizeof(reta.bytes) != IGC_RSS_RDT_REG_SIZE);
2348 		reta.dword = IGC_READ_REG_LE_VALUE(hw,
2349 				IGC_RETA(i / IGC_RSS_RDT_REG_SIZE));
2350 		for (j = 0; j < IGC_RSS_RDT_REG_SIZE; j++) {
2351 			if (mask & (1u << j))
2352 				reta_conf[idx].reta[shift + j] = reta.bytes[j];
2353 		}
2354 	}
2355 
2356 	return 0;
2357 }
2358 
2359 static int
2360 eth_igc_rss_hash_update(struct rte_eth_dev *dev,
2361 			struct rte_eth_rss_conf *rss_conf)
2362 {
2363 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2364 	igc_hw_rss_hash_set(hw, rss_conf);
2365 	return 0;
2366 }
2367 
2368 static int
2369 eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev,
2370 			struct rte_eth_rss_conf *rss_conf)
2371 {
2372 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2373 	uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
2374 	uint32_t mrqc;
2375 	uint64_t rss_hf;
2376 
2377 	if (hash_key != NULL) {
2378 		int i;
2379 
2380 		/* if not enough space for store hash key */
2381 		if (rss_conf->rss_key_len != IGC_HKEY_SIZE) {
2382 			PMD_DRV_LOG(ERR,
2383 				"RSS hash key size %u in parameter doesn't match the hardware hash key size %u",
2384 				rss_conf->rss_key_len, IGC_HKEY_SIZE);
2385 			return -EINVAL;
2386 		}
2387 
2388 		/* read RSS key from register */
2389 		for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
2390 			hash_key[i] = IGC_READ_REG_LE_VALUE(hw, IGC_RSSRK(i));
2391 	}
2392 
2393 	/* get RSS functions configured in MRQC register */
2394 	mrqc = IGC_READ_REG(hw, IGC_MRQC);
2395 	if ((mrqc & IGC_MRQC_ENABLE_RSS_4Q) == 0)
2396 		return 0;
2397 
2398 	rss_hf = 0;
2399 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV4)
2400 		rss_hf |= ETH_RSS_IPV4;
2401 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_TCP)
2402 		rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2403 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV6)
2404 		rss_hf |= ETH_RSS_IPV6;
2405 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_EX)
2406 		rss_hf |= ETH_RSS_IPV6_EX;
2407 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP)
2408 		rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2409 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP_EX)
2410 		rss_hf |= ETH_RSS_IPV6_TCP_EX;
2411 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_UDP)
2412 		rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2413 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP)
2414 		rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2415 	if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP_EX)
2416 		rss_hf |= ETH_RSS_IPV6_UDP_EX;
2417 
2418 	rss_conf->rss_hf |= rss_hf;
2419 	return 0;
2420 }
2421 
2422 static int
2423 eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2424 {
2425 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2426 	struct igc_vfta *shadow_vfta = IGC_DEV_PRIVATE_VFTA(dev);
2427 	uint32_t vfta;
2428 	uint32_t vid_idx;
2429 	uint32_t vid_bit;
2430 
2431 	vid_idx = (vlan_id >> IGC_VFTA_ENTRY_SHIFT) & IGC_VFTA_ENTRY_MASK;
2432 	vid_bit = 1u << (vlan_id & IGC_VFTA_ENTRY_BIT_SHIFT_MASK);
2433 	vfta = shadow_vfta->vfta[vid_idx];
2434 	if (on)
2435 		vfta |= vid_bit;
2436 	else
2437 		vfta &= ~vid_bit;
2438 	IGC_WRITE_REG_ARRAY(hw, IGC_VFTA, vid_idx, vfta);
2439 
2440 	/* update local VFTA copy */
2441 	shadow_vfta->vfta[vid_idx] = vfta;
2442 
2443 	return 0;
2444 }
2445 
2446 static void
2447 igc_vlan_hw_filter_disable(struct rte_eth_dev *dev)
2448 {
2449 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2450 	igc_read_reg_check_clear_bits(hw, IGC_RCTL,
2451 			IGC_RCTL_CFIEN | IGC_RCTL_VFE);
2452 }
2453 
2454 static void
2455 igc_vlan_hw_filter_enable(struct rte_eth_dev *dev)
2456 {
2457 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2458 	struct igc_vfta *shadow_vfta = IGC_DEV_PRIVATE_VFTA(dev);
2459 	uint32_t reg_val;
2460 	int i;
2461 
2462 	/* Filter Table Enable, CFI not used for packet acceptance */
2463 	reg_val = IGC_READ_REG(hw, IGC_RCTL);
2464 	reg_val &= ~IGC_RCTL_CFIEN;
2465 	reg_val |= IGC_RCTL_VFE;
2466 	IGC_WRITE_REG(hw, IGC_RCTL, reg_val);
2467 
2468 	/* restore VFTA table */
2469 	for (i = 0; i < IGC_VFTA_SIZE; i++)
2470 		IGC_WRITE_REG_ARRAY(hw, IGC_VFTA, i, shadow_vfta->vfta[i]);
2471 }
2472 
2473 static void
2474 igc_vlan_hw_strip_disable(struct rte_eth_dev *dev)
2475 {
2476 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2477 
2478 	igc_read_reg_check_clear_bits(hw, IGC_CTRL, IGC_CTRL_VME);
2479 }
2480 
2481 static void
2482 igc_vlan_hw_strip_enable(struct rte_eth_dev *dev)
2483 {
2484 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2485 
2486 	igc_read_reg_check_set_bits(hw, IGC_CTRL, IGC_CTRL_VME);
2487 }
2488 
2489 static int
2490 igc_vlan_hw_extend_disable(struct rte_eth_dev *dev)
2491 {
2492 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2493 	uint32_t ctrl_ext;
2494 
2495 	ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
2496 
2497 	/* if extend vlan hasn't been enabled */
2498 	if ((ctrl_ext & IGC_CTRL_EXT_EXT_VLAN) == 0)
2499 		return 0;
2500 
2501 	if ((dev->data->dev_conf.rxmode.offloads &
2502 			DEV_RX_OFFLOAD_JUMBO_FRAME) == 0)
2503 		goto write_ext_vlan;
2504 
2505 	/* Update maximum packet length */
2506 	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <
2507 		RTE_ETHER_MIN_MTU + VLAN_TAG_SIZE) {
2508 		PMD_DRV_LOG(ERR, "Maximum packet length %u error, min is %u",
2509 			dev->data->dev_conf.rxmode.max_rx_pkt_len,
2510 			VLAN_TAG_SIZE + RTE_ETHER_MIN_MTU);
2511 		return -EINVAL;
2512 	}
2513 	dev->data->dev_conf.rxmode.max_rx_pkt_len -= VLAN_TAG_SIZE;
2514 	IGC_WRITE_REG(hw, IGC_RLPML,
2515 		dev->data->dev_conf.rxmode.max_rx_pkt_len);
2516 
2517 write_ext_vlan:
2518 	IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext & ~IGC_CTRL_EXT_EXT_VLAN);
2519 	return 0;
2520 }
2521 
2522 static int
2523 igc_vlan_hw_extend_enable(struct rte_eth_dev *dev)
2524 {
2525 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2526 	uint32_t ctrl_ext;
2527 
2528 	ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
2529 
2530 	/* if extend vlan has been enabled */
2531 	if (ctrl_ext & IGC_CTRL_EXT_EXT_VLAN)
2532 		return 0;
2533 
2534 	if ((dev->data->dev_conf.rxmode.offloads &
2535 			DEV_RX_OFFLOAD_JUMBO_FRAME) == 0)
2536 		goto write_ext_vlan;
2537 
2538 	/* Update maximum packet length */
2539 	if (dev->data->dev_conf.rxmode.max_rx_pkt_len >
2540 		MAX_RX_JUMBO_FRAME_SIZE - VLAN_TAG_SIZE) {
2541 		PMD_DRV_LOG(ERR, "Maximum packet length %u error, max is %u",
2542 			dev->data->dev_conf.rxmode.max_rx_pkt_len +
2543 			VLAN_TAG_SIZE, MAX_RX_JUMBO_FRAME_SIZE);
2544 		return -EINVAL;
2545 	}
2546 	dev->data->dev_conf.rxmode.max_rx_pkt_len += VLAN_TAG_SIZE;
2547 	IGC_WRITE_REG(hw, IGC_RLPML,
2548 		dev->data->dev_conf.rxmode.max_rx_pkt_len);
2549 
2550 write_ext_vlan:
2551 	IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_EXT_VLAN);
2552 	return 0;
2553 }
2554 
2555 static int
2556 eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2557 {
2558 	struct rte_eth_rxmode *rxmode;
2559 
2560 	rxmode = &dev->data->dev_conf.rxmode;
2561 	if (mask & ETH_VLAN_STRIP_MASK) {
2562 		if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2563 			igc_vlan_hw_strip_enable(dev);
2564 		else
2565 			igc_vlan_hw_strip_disable(dev);
2566 	}
2567 
2568 	if (mask & ETH_VLAN_FILTER_MASK) {
2569 		if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2570 			igc_vlan_hw_filter_enable(dev);
2571 		else
2572 			igc_vlan_hw_filter_disable(dev);
2573 	}
2574 
2575 	if (mask & ETH_VLAN_EXTEND_MASK) {
2576 		if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
2577 			return igc_vlan_hw_extend_enable(dev);
2578 		else
2579 			return igc_vlan_hw_extend_disable(dev);
2580 	}
2581 
2582 	return 0;
2583 }
2584 
2585 static int
2586 eth_igc_vlan_tpid_set(struct rte_eth_dev *dev,
2587 		      enum rte_vlan_type vlan_type,
2588 		      uint16_t tpid)
2589 {
2590 	struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2591 	uint32_t reg_val;
2592 
2593 	/* only outer TPID of double VLAN can be configured*/
2594 	if (vlan_type == ETH_VLAN_TYPE_OUTER) {
2595 		reg_val = IGC_READ_REG(hw, IGC_VET);
2596 		reg_val = (reg_val & (~IGC_VET_EXT)) |
2597 			((uint32_t)tpid << IGC_VET_EXT_SHIFT);
2598 		IGC_WRITE_REG(hw, IGC_VET, reg_val);
2599 
2600 		return 0;
2601 	}
2602 
2603 	/* all other TPID values are read-only*/
2604 	PMD_DRV_LOG(ERR, "Not supported");
2605 	return -ENOTSUP;
2606 }
2607 
2608 static int
2609 eth_igc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2610 	struct rte_pci_device *pci_dev)
2611 {
2612 	PMD_INIT_FUNC_TRACE();
2613 	return rte_eth_dev_pci_generic_probe(pci_dev,
2614 		sizeof(struct igc_adapter), eth_igc_dev_init);
2615 }
2616 
2617 static int
2618 eth_igc_pci_remove(struct rte_pci_device *pci_dev)
2619 {
2620 	PMD_INIT_FUNC_TRACE();
2621 	return rte_eth_dev_pci_generic_remove(pci_dev, eth_igc_dev_uninit);
2622 }
2623 
2624 static struct rte_pci_driver rte_igc_pmd = {
2625 	.id_table = pci_id_igc_map,
2626 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
2627 	.probe = eth_igc_pci_probe,
2628 	.remove = eth_igc_pci_remove,
2629 };
2630 
2631 RTE_PMD_REGISTER_PCI(net_igc, rte_igc_pmd);
2632 RTE_PMD_REGISTER_PCI_TABLE(net_igc, pci_id_igc_map);
2633 RTE_PMD_REGISTER_KMOD_DEP(net_igc, "* igb_uio | uio_pci_generic | vfio-pci");
2634