xref: /dpdk/drivers/net/bonding/rte_eth_bond_pmd.c (revision 1c5c6cd8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7 
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <ethdev_driver.h>
11 #include <ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22 
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
26 
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30 
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32 
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35 
36 static inline size_t
get_vlan_offset(struct rte_ether_hdr * eth_hdr,uint16_t * proto)37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39 	size_t vlan_offset = 0;
40 
41 	if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 		rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 		struct rte_vlan_hdr *vlan_hdr =
44 			(struct rte_vlan_hdr *)(eth_hdr + 1);
45 
46 		vlan_offset = sizeof(struct rte_vlan_hdr);
47 		*proto = vlan_hdr->eth_proto;
48 
49 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 			vlan_hdr = vlan_hdr + 1;
51 			*proto = vlan_hdr->eth_proto;
52 			vlan_offset += sizeof(struct rte_vlan_hdr);
53 		}
54 	}
55 	return vlan_offset;
56 }
57 
58 static uint16_t
bond_ethdev_rx_burst(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61 	struct bond_dev_private *internals;
62 
63 	uint16_t num_rx_total = 0;
64 	uint16_t slave_count;
65 	uint16_t active_slave;
66 	int i;
67 
68 	/* Cast to structure, containing bonded device's port id and queue id */
69 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 	internals = bd_rx_q->dev_private;
71 	slave_count = internals->active_slave_count;
72 	active_slave = bd_rx_q->active_slave;
73 
74 	for (i = 0; i < slave_count && nb_pkts; i++) {
75 		uint16_t num_rx_slave;
76 
77 		/* Offset of pointer to *bufs increases as packets are received
78 		 * from other slaves */
79 		num_rx_slave =
80 			rte_eth_rx_burst(internals->active_slaves[active_slave],
81 					 bd_rx_q->queue_id,
82 					 bufs + num_rx_total, nb_pkts);
83 		num_rx_total += num_rx_slave;
84 		nb_pkts -= num_rx_slave;
85 		if (++active_slave == slave_count)
86 			active_slave = 0;
87 	}
88 
89 	if (++bd_rx_q->active_slave >= slave_count)
90 		bd_rx_q->active_slave = 0;
91 	return num_rx_total;
92 }
93 
94 static uint16_t
bond_ethdev_rx_burst_active_backup(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 		uint16_t nb_pkts)
97 {
98 	struct bond_dev_private *internals;
99 
100 	/* Cast to structure, containing bonded device's port id and queue id */
101 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102 
103 	internals = bd_rx_q->dev_private;
104 
105 	return rte_eth_rx_burst(internals->current_primary_port,
106 			bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108 
109 static inline uint8_t
is_lacp_packets(uint16_t ethertype,uint8_t subtype,struct rte_mbuf * mbuf)110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112 	const uint16_t ether_type_slow_be =
113 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114 
115 	return !((mbuf->ol_flags & RTE_MBUF_F_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 		(ethertype == ether_type_slow_be &&
117 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119 
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123 
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 	.dst.addr_bytes = { 0 },
126 	.src.addr_bytes = { 0 },
127 	.type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129 
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 	.dst.addr_bytes = { 0 },
132 	.src.addr_bytes = { 0 },
133 	.type = 0xFFFF,
134 };
135 
136 static struct rte_flow_item flow_item_8023ad[] = {
137 	{
138 		.type = RTE_FLOW_ITEM_TYPE_ETH,
139 		.spec = &flow_item_eth_type_8023ad,
140 		.last = NULL,
141 		.mask = &flow_item_eth_mask_type_8023ad,
142 	},
143 	{
144 		.type = RTE_FLOW_ITEM_TYPE_END,
145 		.spec = NULL,
146 		.last = NULL,
147 		.mask = NULL,
148 	}
149 };
150 
151 const struct rte_flow_attr flow_attr_8023ad = {
152 	.group = 0,
153 	.priority = 0,
154 	.ingress = 1,
155 	.egress = 0,
156 	.reserved = 0,
157 };
158 
159 int
bond_ethdev_8023ad_flow_verify(struct rte_eth_dev * bond_dev,uint16_t slave_port)160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 		uint16_t slave_port) {
162 	struct rte_eth_dev_info slave_info;
163 	struct rte_flow_error error;
164 	struct bond_dev_private *internals = bond_dev->data->dev_private;
165 
166 	const struct rte_flow_action_queue lacp_queue_conf = {
167 		.index = 0,
168 	};
169 
170 	const struct rte_flow_action actions[] = {
171 		{
172 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 			.conf = &lacp_queue_conf
174 		},
175 		{
176 			.type = RTE_FLOW_ACTION_TYPE_END,
177 		}
178 	};
179 
180 	int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 			flow_item_8023ad, actions, &error);
182 	if (ret < 0) {
183 		RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 				__func__, error.message, slave_port,
185 				internals->mode4.dedicated_queues.rx_qid);
186 		return -1;
187 	}
188 
189 	ret = rte_eth_dev_info_get(slave_port, &slave_info);
190 	if (ret != 0) {
191 		RTE_BOND_LOG(ERR,
192 			"%s: Error during getting device (port %u) info: %s\n",
193 			__func__, slave_port, strerror(-ret));
194 
195 		return ret;
196 	}
197 
198 	if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 			slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
200 		RTE_BOND_LOG(ERR,
201 			"%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 			__func__, slave_port);
203 		return -1;
204 	}
205 
206 	return 0;
207 }
208 
209 int
bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id)210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 	struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 	struct bond_dev_private *internals = bond_dev->data->dev_private;
213 	struct rte_eth_dev_info bond_info;
214 	uint16_t idx;
215 	int ret;
216 
217 	/* Verify if all slaves in bonding supports flow director and */
218 	if (internals->slave_count > 0) {
219 		ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
220 		if (ret != 0) {
221 			RTE_BOND_LOG(ERR,
222 				"%s: Error during getting device (port %u) info: %s\n",
223 				__func__, bond_dev->data->port_id,
224 				strerror(-ret));
225 
226 			return ret;
227 		}
228 
229 		internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 		internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
231 
232 		for (idx = 0; idx < internals->slave_count; idx++) {
233 			if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 					internals->slaves[idx].port_id) != 0)
235 				return -1;
236 		}
237 	}
238 
239 	return 0;
240 }
241 
242 int
bond_ethdev_8023ad_flow_set(struct rte_eth_dev * bond_dev,uint16_t slave_port)243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
244 
245 	struct rte_flow_error error;
246 	struct bond_dev_private *internals = bond_dev->data->dev_private;
247 	struct rte_flow_action_queue lacp_queue_conf = {
248 		.index = internals->mode4.dedicated_queues.rx_qid,
249 	};
250 
251 	const struct rte_flow_action actions[] = {
252 		{
253 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 			.conf = &lacp_queue_conf
255 		},
256 		{
257 			.type = RTE_FLOW_ACTION_TYPE_END,
258 		}
259 	};
260 
261 	internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 			&flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 	if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 		RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 				"(slave_port=%d queue_id=%d)",
266 				error.message, slave_port,
267 				internals->mode4.dedicated_queues.rx_qid);
268 		return -1;
269 	}
270 
271 	return 0;
272 }
273 
274 static inline uint16_t
rx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts,bool dedicated_rxq)275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
276 		bool dedicated_rxq)
277 {
278 	/* Cast to structure, containing bonded device's port id and queue id */
279 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 	struct bond_dev_private *internals = bd_rx_q->dev_private;
281 	struct rte_eth_dev *bonded_eth_dev =
282 					&rte_eth_devices[internals->port_id];
283 	struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 	struct rte_ether_hdr *hdr;
285 
286 	const uint16_t ether_type_slow_be =
287 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 	uint16_t num_rx_total = 0;	/* Total number of received packets */
289 	uint16_t slaves[RTE_MAX_ETHPORTS];
290 	uint16_t slave_count, idx;
291 
292 	uint8_t collecting;  /* current slave collecting status */
293 	const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 	const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
295 	uint8_t subtype;
296 	uint16_t i;
297 	uint16_t j;
298 	uint16_t k;
299 
300 	/* Copy slave list to protect against slave up/down changes during tx
301 	 * bursting */
302 	slave_count = internals->active_slave_count;
303 	memcpy(slaves, internals->active_slaves,
304 			sizeof(internals->active_slaves[0]) * slave_count);
305 
306 	idx = bd_rx_q->active_slave;
307 	if (idx >= slave_count) {
308 		bd_rx_q->active_slave = 0;
309 		idx = 0;
310 	}
311 	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
312 		j = num_rx_total;
313 		collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
314 					 COLLECTING);
315 
316 		/* Read packets from this slave */
317 		num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 				&bufs[num_rx_total], nb_pkts - num_rx_total);
319 
320 		for (k = j; k < 2 && k < num_rx_total; k++)
321 			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
322 
323 		/* Handle slow protocol packets. */
324 		while (j < num_rx_total) {
325 			if (j + 3 < num_rx_total)
326 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
327 
328 			hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 			subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
330 
331 			/* Remove packet from array if:
332 			 * - it is slow packet but no dedicated rxq is present,
333 			 * - slave is not in collecting state,
334 			 * - bonding interface is not in promiscuous mode:
335 			 *   - packet is unicast and address does not match,
336 			 *   - packet is multicast and bonding interface
337 			 *     is not in allmulti,
338 			 */
339 			if (unlikely(
340 				(!dedicated_rxq &&
341 				 is_lacp_packets(hdr->ether_type, subtype,
342 						 bufs[j])) ||
343 				!collecting ||
344 				(!promisc &&
345 				 ((rte_is_unicast_ether_addr(&hdr->dst_addr) &&
346 				   !rte_is_same_ether_addr(bond_mac,
347 						       &hdr->dst_addr)) ||
348 				  (!allmulti &&
349 				   rte_is_multicast_ether_addr(&hdr->dst_addr)))))) {
350 
351 				if (hdr->ether_type == ether_type_slow_be) {
352 					bond_mode_8023ad_handle_slow_pkt(
353 					    internals, slaves[idx], bufs[j]);
354 				} else
355 					rte_pktmbuf_free(bufs[j]);
356 
357 				/* Packet is managed by mode 4 or dropped, shift the array */
358 				num_rx_total--;
359 				if (j < num_rx_total) {
360 					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
361 						(num_rx_total - j));
362 				}
363 			} else
364 				j++;
365 		}
366 		if (unlikely(++idx == slave_count))
367 			idx = 0;
368 	}
369 
370 	if (++bd_rx_q->active_slave >= slave_count)
371 		bd_rx_q->active_slave = 0;
372 
373 	return num_rx_total;
374 }
375 
376 static uint16_t
bond_ethdev_rx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
378 		uint16_t nb_pkts)
379 {
380 	return rx_burst_8023ad(queue, bufs, nb_pkts, false);
381 }
382 
383 static uint16_t
bond_ethdev_rx_burst_8023ad_fast_queue(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
385 		uint16_t nb_pkts)
386 {
387 	return rx_burst_8023ad(queue, bufs, nb_pkts, true);
388 }
389 
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
393 
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
395 
396 static void
arp_op_name(uint16_t arp_op,char * buf,size_t buf_len)397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
398 {
399 	switch (arp_op) {
400 	case RTE_ARP_OP_REQUEST:
401 		strlcpy(buf, "ARP Request", buf_len);
402 		return;
403 	case RTE_ARP_OP_REPLY:
404 		strlcpy(buf, "ARP Reply", buf_len);
405 		return;
406 	case RTE_ARP_OP_REVREQUEST:
407 		strlcpy(buf, "Reverse ARP Request", buf_len);
408 		return;
409 	case RTE_ARP_OP_REVREPLY:
410 		strlcpy(buf, "Reverse ARP Reply", buf_len);
411 		return;
412 	case RTE_ARP_OP_INVREQUEST:
413 		strlcpy(buf, "Peer Identify Request", buf_len);
414 		return;
415 	case RTE_ARP_OP_INVREPLY:
416 		strlcpy(buf, "Peer Identify Reply", buf_len);
417 		return;
418 	default:
419 		break;
420 	}
421 	strlcpy(buf, "Unknown", buf_len);
422 	return;
423 }
424 #endif
425 #define MaxIPv4String	16
426 static void
ipv4_addr_to_dot(uint32_t be_ipv4_addr,char * buf,uint8_t buf_size)427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
428 {
429 	uint32_t ipv4_addr;
430 
431 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
434 		ipv4_addr & 0xFF);
435 }
436 
437 #define MAX_CLIENTS_NUMBER	128
438 uint8_t active_clients;
439 struct client_stats_t {
440 	uint16_t port;
441 	uint32_t ipv4_addr;
442 	uint32_t ipv4_rx_packets;
443 	uint32_t ipv4_tx_packets;
444 };
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
446 
447 static void
update_client_stats(uint32_t addr,uint16_t port,uint32_t * TXorRXindicator)448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
449 {
450 	int i = 0;
451 
452 	for (; i < MAX_CLIENTS_NUMBER; i++)	{
453 		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
454 			/* Just update RX packets number for this client */
455 			if (TXorRXindicator == &burstnumberRX)
456 				client_stats[i].ipv4_rx_packets++;
457 			else
458 				client_stats[i].ipv4_tx_packets++;
459 			return;
460 		}
461 	}
462 	/* We have a new client. Insert him to the table, and increment stats */
463 	if (TXorRXindicator == &burstnumberRX)
464 		client_stats[active_clients].ipv4_rx_packets++;
465 	else
466 		client_stats[active_clients].ipv4_tx_packets++;
467 	client_stats[active_clients].ipv4_addr = addr;
468 	client_stats[active_clients].port = port;
469 	active_clients++;
470 
471 }
472 
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 	rte_log(RTE_LOG_DEBUG, bond_logtype,				\
476 		"%s port:%d SrcMAC:" RTE_ETHER_ADDR_PRT_FMT " SrcIP:%s " \
477 		"DstMAC:" RTE_ETHER_ADDR_PRT_FMT " DstIP:%s %s %d\n", \
478 		info,							\
479 		port,							\
480 		RTE_ETHER_ADDR_BYTES(&eth_h->src_addr),                  \
481 		src_ip,							\
482 		RTE_ETHER_ADDR_BYTES(&eth_h->dst_addr),                  \
483 		dst_ip,							\
484 		arp_op, ++burstnumber)
485 #endif
486 
487 static void
mode6_debug(const char __rte_unused * info,struct rte_ether_hdr * eth_h,uint16_t port,uint32_t __rte_unused * burstnumber)488 mode6_debug(const char __rte_unused *info,
489 	struct rte_ether_hdr *eth_h, uint16_t port,
490 	uint32_t __rte_unused *burstnumber)
491 {
492 	struct rte_ipv4_hdr *ipv4_h;
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494 	struct rte_arp_hdr *arp_h;
495 	char dst_ip[16];
496 	char ArpOp[24];
497 	char buf[16];
498 #endif
499 	char src_ip[16];
500 
501 	uint16_t ether_type = eth_h->ether_type;
502 	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
503 
504 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
505 	strlcpy(buf, info, 16);
506 #endif
507 
508 	if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
509 		ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
510 		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
511 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
512 		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
513 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
514 #endif
515 		update_client_stats(ipv4_h->src_addr, port, burstnumber);
516 	}
517 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
518 	else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
519 		arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
520 		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
521 		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
522 		arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
523 				ArpOp, sizeof(ArpOp));
524 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
525 	}
526 #endif
527 }
528 #endif
529 
530 static uint16_t
bond_ethdev_rx_burst_alb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)531 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
532 {
533 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
534 	struct bond_dev_private *internals = bd_rx_q->dev_private;
535 	struct rte_ether_hdr *eth_h;
536 	uint16_t ether_type, offset;
537 	uint16_t nb_recv_pkts;
538 	int i;
539 
540 	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
541 
542 	for (i = 0; i < nb_recv_pkts; i++) {
543 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
544 		ether_type = eth_h->ether_type;
545 		offset = get_vlan_offset(eth_h, &ether_type);
546 
547 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
548 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
549 			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
550 #endif
551 			bond_mode_alb_arp_recv(eth_h, offset, internals);
552 		}
553 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
554 		else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
555 			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
556 #endif
557 	}
558 
559 	return nb_recv_pkts;
560 }
561 
562 static uint16_t
bond_ethdev_tx_burst_round_robin(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)563 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
564 		uint16_t nb_pkts)
565 {
566 	struct bond_dev_private *internals;
567 	struct bond_tx_queue *bd_tx_q;
568 
569 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
570 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
571 
572 	uint16_t num_of_slaves;
573 	uint16_t slaves[RTE_MAX_ETHPORTS];
574 
575 	uint16_t num_tx_total = 0, num_tx_slave;
576 
577 	static int slave_idx = 0;
578 	int i, cslave_idx = 0, tx_fail_total = 0;
579 
580 	bd_tx_q = (struct bond_tx_queue *)queue;
581 	internals = bd_tx_q->dev_private;
582 
583 	/* Copy slave list to protect against slave up/down changes during tx
584 	 * bursting */
585 	num_of_slaves = internals->active_slave_count;
586 	memcpy(slaves, internals->active_slaves,
587 			sizeof(internals->active_slaves[0]) * num_of_slaves);
588 
589 	if (num_of_slaves < 1)
590 		return num_tx_total;
591 
592 	/* Populate slaves mbuf with which packets are to be sent on it  */
593 	for (i = 0; i < nb_pkts; i++) {
594 		cslave_idx = (slave_idx + i) % num_of_slaves;
595 		slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
596 	}
597 
598 	/* increment current slave index so the next call to tx burst starts on the
599 	 * next slave */
600 	slave_idx = ++cslave_idx;
601 
602 	/* Send packet burst on each slave device */
603 	for (i = 0; i < num_of_slaves; i++) {
604 		if (slave_nb_pkts[i] > 0) {
605 			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
606 					slave_bufs[i], slave_nb_pkts[i]);
607 
608 			/* if tx burst fails move packets to end of bufs */
609 			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
610 				int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
611 
612 				tx_fail_total += tx_fail_slave;
613 
614 				memcpy(&bufs[nb_pkts - tx_fail_total],
615 				       &slave_bufs[i][num_tx_slave],
616 				       tx_fail_slave * sizeof(bufs[0]));
617 			}
618 			num_tx_total += num_tx_slave;
619 		}
620 	}
621 
622 	return num_tx_total;
623 }
624 
625 static uint16_t
bond_ethdev_tx_burst_active_backup(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)626 bond_ethdev_tx_burst_active_backup(void *queue,
627 		struct rte_mbuf **bufs, uint16_t nb_pkts)
628 {
629 	struct bond_dev_private *internals;
630 	struct bond_tx_queue *bd_tx_q;
631 
632 	bd_tx_q = (struct bond_tx_queue *)queue;
633 	internals = bd_tx_q->dev_private;
634 
635 	if (internals->active_slave_count < 1)
636 		return 0;
637 
638 	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
639 			bufs, nb_pkts);
640 }
641 
642 static inline uint16_t
ether_hash(struct rte_ether_hdr * eth_hdr)643 ether_hash(struct rte_ether_hdr *eth_hdr)
644 {
645 	unaligned_uint16_t *word_src_addr =
646 		(unaligned_uint16_t *)eth_hdr->src_addr.addr_bytes;
647 	unaligned_uint16_t *word_dst_addr =
648 		(unaligned_uint16_t *)eth_hdr->dst_addr.addr_bytes;
649 
650 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
651 			(word_src_addr[1] ^ word_dst_addr[1]) ^
652 			(word_src_addr[2] ^ word_dst_addr[2]);
653 }
654 
655 static inline uint32_t
ipv4_hash(struct rte_ipv4_hdr * ipv4_hdr)656 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
657 {
658 	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
659 }
660 
661 static inline uint32_t
ipv6_hash(struct rte_ipv6_hdr * ipv6_hdr)662 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
663 {
664 	unaligned_uint32_t *word_src_addr =
665 		(unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
666 	unaligned_uint32_t *word_dst_addr =
667 		(unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
668 
669 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
670 			(word_src_addr[1] ^ word_dst_addr[1]) ^
671 			(word_src_addr[2] ^ word_dst_addr[2]) ^
672 			(word_src_addr[3] ^ word_dst_addr[3]);
673 }
674 
675 
676 void
burst_xmit_l2_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)677 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
678 		uint16_t slave_count, uint16_t *slaves)
679 {
680 	struct rte_ether_hdr *eth_hdr;
681 	uint32_t hash;
682 	int i;
683 
684 	for (i = 0; i < nb_pkts; i++) {
685 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
686 
687 		hash = ether_hash(eth_hdr);
688 
689 		slaves[i] = (hash ^= hash >> 8) % slave_count;
690 	}
691 }
692 
693 void
burst_xmit_l23_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)694 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
695 		uint16_t slave_count, uint16_t *slaves)
696 {
697 	uint16_t i;
698 	struct rte_ether_hdr *eth_hdr;
699 	uint16_t proto;
700 	size_t vlan_offset;
701 	uint32_t hash, l3hash;
702 
703 	for (i = 0; i < nb_pkts; i++) {
704 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
705 		l3hash = 0;
706 
707 		proto = eth_hdr->ether_type;
708 		hash = ether_hash(eth_hdr);
709 
710 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
711 
712 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
713 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
714 					((char *)(eth_hdr + 1) + vlan_offset);
715 			l3hash = ipv4_hash(ipv4_hdr);
716 
717 		} else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
718 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
719 					((char *)(eth_hdr + 1) + vlan_offset);
720 			l3hash = ipv6_hash(ipv6_hdr);
721 		}
722 
723 		hash = hash ^ l3hash;
724 		hash ^= hash >> 16;
725 		hash ^= hash >> 8;
726 
727 		slaves[i] = hash % slave_count;
728 	}
729 }
730 
731 void
burst_xmit_l34_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)732 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
733 		uint16_t slave_count, uint16_t *slaves)
734 {
735 	struct rte_ether_hdr *eth_hdr;
736 	uint16_t proto;
737 	size_t vlan_offset;
738 	int i;
739 
740 	struct rte_udp_hdr *udp_hdr;
741 	struct rte_tcp_hdr *tcp_hdr;
742 	uint32_t hash, l3hash, l4hash;
743 
744 	for (i = 0; i < nb_pkts; i++) {
745 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
746 		size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
747 		proto = eth_hdr->ether_type;
748 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
749 		l3hash = 0;
750 		l4hash = 0;
751 
752 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
753 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
754 					((char *)(eth_hdr + 1) + vlan_offset);
755 			size_t ip_hdr_offset;
756 
757 			l3hash = ipv4_hash(ipv4_hdr);
758 
759 			/* there is no L4 header in fragmented packet */
760 			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
761 								== 0)) {
762 				ip_hdr_offset = (ipv4_hdr->version_ihl
763 					& RTE_IPV4_HDR_IHL_MASK) *
764 					RTE_IPV4_IHL_MULTIPLIER;
765 
766 				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
767 					tcp_hdr = (struct rte_tcp_hdr *)
768 						((char *)ipv4_hdr +
769 							ip_hdr_offset);
770 					if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
771 							< pkt_end)
772 						l4hash = HASH_L4_PORTS(tcp_hdr);
773 				} else if (ipv4_hdr->next_proto_id ==
774 								IPPROTO_UDP) {
775 					udp_hdr = (struct rte_udp_hdr *)
776 						((char *)ipv4_hdr +
777 							ip_hdr_offset);
778 					if ((size_t)udp_hdr + sizeof(*udp_hdr)
779 							< pkt_end)
780 						l4hash = HASH_L4_PORTS(udp_hdr);
781 				}
782 			}
783 		} else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
784 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
785 					((char *)(eth_hdr + 1) + vlan_offset);
786 			l3hash = ipv6_hash(ipv6_hdr);
787 
788 			if (ipv6_hdr->proto == IPPROTO_TCP) {
789 				tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
790 				l4hash = HASH_L4_PORTS(tcp_hdr);
791 			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
792 				udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
793 				l4hash = HASH_L4_PORTS(udp_hdr);
794 			}
795 		}
796 
797 		hash = l3hash ^ l4hash;
798 		hash ^= hash >> 16;
799 		hash ^= hash >> 8;
800 
801 		slaves[i] = hash % slave_count;
802 	}
803 }
804 
805 struct bwg_slave {
806 	uint64_t bwg_left_int;
807 	uint64_t bwg_left_remainder;
808 	uint16_t slave;
809 };
810 
811 void
bond_tlb_activate_slave(struct bond_dev_private * internals)812 bond_tlb_activate_slave(struct bond_dev_private *internals) {
813 	int i;
814 
815 	for (i = 0; i < internals->active_slave_count; i++) {
816 		tlb_last_obytets[internals->active_slaves[i]] = 0;
817 	}
818 }
819 
820 static int
bandwidth_cmp(const void * a,const void * b)821 bandwidth_cmp(const void *a, const void *b)
822 {
823 	const struct bwg_slave *bwg_a = a;
824 	const struct bwg_slave *bwg_b = b;
825 	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
826 	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
827 			(int64_t)bwg_a->bwg_left_remainder;
828 	if (diff > 0)
829 		return 1;
830 	else if (diff < 0)
831 		return -1;
832 	else if (diff2 > 0)
833 		return 1;
834 	else if (diff2 < 0)
835 		return -1;
836 	else
837 		return 0;
838 }
839 
840 static void
bandwidth_left(uint16_t port_id,uint64_t load,uint8_t update_idx,struct bwg_slave * bwg_slave)841 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
842 		struct bwg_slave *bwg_slave)
843 {
844 	struct rte_eth_link link_status;
845 	int ret;
846 
847 	ret = rte_eth_link_get_nowait(port_id, &link_status);
848 	if (ret < 0) {
849 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
850 			     port_id, rte_strerror(-ret));
851 		return;
852 	}
853 	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
854 	if (link_bwg == 0)
855 		return;
856 	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
857 	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
858 	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
859 }
860 
861 static void
bond_ethdev_update_tlb_slave_cb(void * arg)862 bond_ethdev_update_tlb_slave_cb(void *arg)
863 {
864 	struct bond_dev_private *internals = arg;
865 	struct rte_eth_stats slave_stats;
866 	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
867 	uint16_t slave_count;
868 	uint64_t tx_bytes;
869 
870 	uint8_t update_stats = 0;
871 	uint16_t slave_id;
872 	uint16_t i;
873 
874 	internals->slave_update_idx++;
875 
876 
877 	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
878 		update_stats = 1;
879 
880 	for (i = 0; i < internals->active_slave_count; i++) {
881 		slave_id = internals->active_slaves[i];
882 		rte_eth_stats_get(slave_id, &slave_stats);
883 		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
884 		bandwidth_left(slave_id, tx_bytes,
885 				internals->slave_update_idx, &bwg_array[i]);
886 		bwg_array[i].slave = slave_id;
887 
888 		if (update_stats) {
889 			tlb_last_obytets[slave_id] = slave_stats.obytes;
890 		}
891 	}
892 
893 	if (update_stats == 1)
894 		internals->slave_update_idx = 0;
895 
896 	slave_count = i;
897 	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
898 	for (i = 0; i < slave_count; i++)
899 		internals->tlb_slaves_order[i] = bwg_array[i].slave;
900 
901 	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
902 			(struct bond_dev_private *)internals);
903 }
904 
905 static uint16_t
bond_ethdev_tx_burst_tlb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)906 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
907 {
908 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
909 	struct bond_dev_private *internals = bd_tx_q->dev_private;
910 
911 	struct rte_eth_dev *primary_port =
912 			&rte_eth_devices[internals->primary_port];
913 	uint16_t num_tx_total = 0;
914 	uint16_t i, j;
915 
916 	uint16_t num_of_slaves = internals->active_slave_count;
917 	uint16_t slaves[RTE_MAX_ETHPORTS];
918 
919 	struct rte_ether_hdr *ether_hdr;
920 	struct rte_ether_addr primary_slave_addr;
921 	struct rte_ether_addr active_slave_addr;
922 
923 	if (num_of_slaves < 1)
924 		return num_tx_total;
925 
926 	memcpy(slaves, internals->tlb_slaves_order,
927 				sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
928 
929 
930 	rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
931 
932 	if (nb_pkts > 3) {
933 		for (i = 0; i < 3; i++)
934 			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
935 	}
936 
937 	for (i = 0; i < num_of_slaves; i++) {
938 		rte_eth_macaddr_get(slaves[i], &active_slave_addr);
939 		for (j = num_tx_total; j < nb_pkts; j++) {
940 			if (j + 3 < nb_pkts)
941 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
942 
943 			ether_hdr = rte_pktmbuf_mtod(bufs[j],
944 						struct rte_ether_hdr *);
945 			if (rte_is_same_ether_addr(&ether_hdr->src_addr,
946 							&primary_slave_addr))
947 				rte_ether_addr_copy(&active_slave_addr,
948 						&ether_hdr->src_addr);
949 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
950 					mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
951 #endif
952 		}
953 
954 		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
955 				bufs + num_tx_total, nb_pkts - num_tx_total);
956 
957 		if (num_tx_total == nb_pkts)
958 			break;
959 	}
960 
961 	return num_tx_total;
962 }
963 
964 void
bond_tlb_disable(struct bond_dev_private * internals)965 bond_tlb_disable(struct bond_dev_private *internals)
966 {
967 	rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
968 }
969 
970 void
bond_tlb_enable(struct bond_dev_private * internals)971 bond_tlb_enable(struct bond_dev_private *internals)
972 {
973 	bond_ethdev_update_tlb_slave_cb(internals);
974 }
975 
976 static uint16_t
bond_ethdev_tx_burst_alb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)977 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 {
979 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980 	struct bond_dev_private *internals = bd_tx_q->dev_private;
981 
982 	struct rte_ether_hdr *eth_h;
983 	uint16_t ether_type, offset;
984 
985 	struct client_data *client_info;
986 
987 	/*
988 	 * We create transmit buffers for every slave and one additional to send
989 	 * through tlb. In worst case every packet will be send on one port.
990 	 */
991 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
992 	uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
993 
994 	/*
995 	 * We create separate transmit buffers for update packets as they won't
996 	 * be counted in num_tx_total.
997 	 */
998 	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
999 	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1000 
1001 	struct rte_mbuf *upd_pkt;
1002 	size_t pkt_size;
1003 
1004 	uint16_t num_send, num_not_send = 0;
1005 	uint16_t num_tx_total = 0;
1006 	uint16_t slave_idx;
1007 
1008 	int i, j;
1009 
1010 	/* Search tx buffer for ARP packets and forward them to alb */
1011 	for (i = 0; i < nb_pkts; i++) {
1012 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1013 		ether_type = eth_h->ether_type;
1014 		offset = get_vlan_offset(eth_h, &ether_type);
1015 
1016 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1017 			slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1018 
1019 			/* Change src mac in eth header */
1020 			rte_eth_macaddr_get(slave_idx, &eth_h->src_addr);
1021 
1022 			/* Add packet to slave tx buffer */
1023 			slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1024 			slave_bufs_pkts[slave_idx]++;
1025 		} else {
1026 			/* If packet is not ARP, send it with TLB policy */
1027 			slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1028 					bufs[i];
1029 			slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1030 		}
1031 	}
1032 
1033 	/* Update connected client ARP tables */
1034 	if (internals->mode6.ntt) {
1035 		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1036 			client_info = &internals->mode6.client_table[i];
1037 
1038 			if (client_info->in_use) {
1039 				/* Allocate new packet to send ARP update on current slave */
1040 				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1041 				if (upd_pkt == NULL) {
1042 					RTE_BOND_LOG(ERR,
1043 						     "Failed to allocate ARP packet from pool");
1044 					continue;
1045 				}
1046 				pkt_size = sizeof(struct rte_ether_hdr) +
1047 					sizeof(struct rte_arp_hdr) +
1048 					client_info->vlan_count *
1049 					sizeof(struct rte_vlan_hdr);
1050 				upd_pkt->data_len = pkt_size;
1051 				upd_pkt->pkt_len = pkt_size;
1052 
1053 				slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1054 						internals);
1055 
1056 				/* Add packet to update tx buffer */
1057 				update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1058 				update_bufs_pkts[slave_idx]++;
1059 			}
1060 		}
1061 		internals->mode6.ntt = 0;
1062 	}
1063 
1064 	/* Send ARP packets on proper slaves */
1065 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066 		if (slave_bufs_pkts[i] > 0) {
1067 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1068 					slave_bufs[i], slave_bufs_pkts[i]);
1069 			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1070 				bufs[nb_pkts - 1 - num_not_send - j] =
1071 						slave_bufs[i][nb_pkts - 1 - j];
1072 			}
1073 
1074 			num_tx_total += num_send;
1075 			num_not_send += slave_bufs_pkts[i] - num_send;
1076 
1077 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1078 	/* Print TX stats including update packets */
1079 			for (j = 0; j < slave_bufs_pkts[i]; j++) {
1080 				eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1081 							struct rte_ether_hdr *);
1082 				mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1083 			}
1084 #endif
1085 		}
1086 	}
1087 
1088 	/* Send update packets on proper slaves */
1089 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1090 		if (update_bufs_pkts[i] > 0) {
1091 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1092 					update_bufs_pkts[i]);
1093 			for (j = num_send; j < update_bufs_pkts[i]; j++) {
1094 				rte_pktmbuf_free(update_bufs[i][j]);
1095 			}
1096 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1097 			for (j = 0; j < update_bufs_pkts[i]; j++) {
1098 				eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1099 							struct rte_ether_hdr *);
1100 				mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1101 			}
1102 #endif
1103 		}
1104 	}
1105 
1106 	/* Send non-ARP packets using tlb policy */
1107 	if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1108 		num_send = bond_ethdev_tx_burst_tlb(queue,
1109 				slave_bufs[RTE_MAX_ETHPORTS],
1110 				slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1111 
1112 		for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1113 			bufs[nb_pkts - 1 - num_not_send - j] =
1114 					slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1115 		}
1116 
1117 		num_tx_total += num_send;
1118 	}
1119 
1120 	return num_tx_total;
1121 }
1122 
1123 static inline uint16_t
tx_burst_balance(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs,uint16_t * slave_port_ids,uint16_t slave_count)1124 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1125 		 uint16_t *slave_port_ids, uint16_t slave_count)
1126 {
1127 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1128 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1129 
1130 	/* Array to sort mbufs for transmission on each slave into */
1131 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1132 	/* Number of mbufs for transmission on each slave */
1133 	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1134 	/* Mapping array generated by hash function to map mbufs to slaves */
1135 	uint16_t bufs_slave_port_idxs[nb_bufs];
1136 
1137 	uint16_t slave_tx_count;
1138 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1139 
1140 	uint16_t i;
1141 
1142 	/*
1143 	 * Populate slaves mbuf with the packets which are to be sent on it
1144 	 * selecting output slave using hash based on xmit policy
1145 	 */
1146 	internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1147 			bufs_slave_port_idxs);
1148 
1149 	for (i = 0; i < nb_bufs; i++) {
1150 		/* Populate slave mbuf arrays with mbufs for that slave. */
1151 		uint16_t slave_idx = bufs_slave_port_idxs[i];
1152 
1153 		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1154 	}
1155 
1156 	/* Send packet burst on each slave device */
1157 	for (i = 0; i < slave_count; i++) {
1158 		if (slave_nb_bufs[i] == 0)
1159 			continue;
1160 
1161 		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1162 				bd_tx_q->queue_id, slave_bufs[i],
1163 				slave_nb_bufs[i]);
1164 
1165 		total_tx_count += slave_tx_count;
1166 
1167 		/* If tx burst fails move packets to end of bufs */
1168 		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1169 			int slave_tx_fail_count = slave_nb_bufs[i] -
1170 					slave_tx_count;
1171 			total_tx_fail_count += slave_tx_fail_count;
1172 			memcpy(&bufs[nb_bufs - total_tx_fail_count],
1173 			       &slave_bufs[i][slave_tx_count],
1174 			       slave_tx_fail_count * sizeof(bufs[0]));
1175 		}
1176 	}
1177 
1178 	return total_tx_count;
1179 }
1180 
1181 static uint16_t
bond_ethdev_tx_burst_balance(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1182 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1183 		uint16_t nb_bufs)
1184 {
1185 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1186 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1187 
1188 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1189 	uint16_t slave_count;
1190 
1191 	if (unlikely(nb_bufs == 0))
1192 		return 0;
1193 
1194 	/* Copy slave list to protect against slave up/down changes during tx
1195 	 * bursting
1196 	 */
1197 	slave_count = internals->active_slave_count;
1198 	if (unlikely(slave_count < 1))
1199 		return 0;
1200 
1201 	memcpy(slave_port_ids, internals->active_slaves,
1202 			sizeof(slave_port_ids[0]) * slave_count);
1203 	return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1204 				slave_count);
1205 }
1206 
1207 static inline uint16_t
tx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs,bool dedicated_txq)1208 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1209 		bool dedicated_txq)
1210 {
1211 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1212 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1213 
1214 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1215 	uint16_t slave_count;
1216 
1217 	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1218 	uint16_t dist_slave_count;
1219 
1220 	uint16_t slave_tx_count;
1221 
1222 	uint16_t i;
1223 
1224 	/* Copy slave list to protect against slave up/down changes during tx
1225 	 * bursting */
1226 	slave_count = internals->active_slave_count;
1227 	if (unlikely(slave_count < 1))
1228 		return 0;
1229 
1230 	memcpy(slave_port_ids, internals->active_slaves,
1231 			sizeof(slave_port_ids[0]) * slave_count);
1232 
1233 	if (dedicated_txq)
1234 		goto skip_tx_ring;
1235 
1236 	/* Check for LACP control packets and send if available */
1237 	for (i = 0; i < slave_count; i++) {
1238 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1239 		struct rte_mbuf *ctrl_pkt = NULL;
1240 
1241 		if (likely(rte_ring_empty(port->tx_ring)))
1242 			continue;
1243 
1244 		if (rte_ring_dequeue(port->tx_ring,
1245 				     (void **)&ctrl_pkt) != -ENOENT) {
1246 			slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1247 					bd_tx_q->queue_id, &ctrl_pkt, 1);
1248 			/*
1249 			 * re-enqueue LAG control plane packets to buffering
1250 			 * ring if transmission fails so the packet isn't lost.
1251 			 */
1252 			if (slave_tx_count != 1)
1253 				rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
1254 		}
1255 	}
1256 
1257 skip_tx_ring:
1258 	if (unlikely(nb_bufs == 0))
1259 		return 0;
1260 
1261 	dist_slave_count = 0;
1262 	for (i = 0; i < slave_count; i++) {
1263 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1264 
1265 		if (ACTOR_STATE(port, DISTRIBUTING))
1266 			dist_slave_port_ids[dist_slave_count++] =
1267 					slave_port_ids[i];
1268 	}
1269 
1270 	if (unlikely(dist_slave_count < 1))
1271 		return 0;
1272 
1273 	return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1274 				dist_slave_count);
1275 }
1276 
1277 static uint16_t
bond_ethdev_tx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1278 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1279 		uint16_t nb_bufs)
1280 {
1281 	return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1282 }
1283 
1284 static uint16_t
bond_ethdev_tx_burst_8023ad_fast_queue(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1286 		uint16_t nb_bufs)
1287 {
1288 	return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1289 }
1290 
1291 static uint16_t
bond_ethdev_tx_burst_broadcast(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)1292 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1293 		uint16_t nb_pkts)
1294 {
1295 	struct bond_dev_private *internals;
1296 	struct bond_tx_queue *bd_tx_q;
1297 
1298 	uint16_t slaves[RTE_MAX_ETHPORTS];
1299 	uint8_t tx_failed_flag = 0;
1300 	uint16_t num_of_slaves;
1301 
1302 	uint16_t max_nb_of_tx_pkts = 0;
1303 
1304 	int slave_tx_total[RTE_MAX_ETHPORTS];
1305 	int i, most_successful_tx_slave = -1;
1306 
1307 	bd_tx_q = (struct bond_tx_queue *)queue;
1308 	internals = bd_tx_q->dev_private;
1309 
1310 	/* Copy slave list to protect against slave up/down changes during tx
1311 	 * bursting */
1312 	num_of_slaves = internals->active_slave_count;
1313 	memcpy(slaves, internals->active_slaves,
1314 			sizeof(internals->active_slaves[0]) * num_of_slaves);
1315 
1316 	if (num_of_slaves < 1)
1317 		return 0;
1318 
1319 	/* Increment reference count on mbufs */
1320 	for (i = 0; i < nb_pkts; i++)
1321 		rte_pktmbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1322 
1323 	/* Transmit burst on each active slave */
1324 	for (i = 0; i < num_of_slaves; i++) {
1325 		slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1326 					bufs, nb_pkts);
1327 
1328 		if (unlikely(slave_tx_total[i] < nb_pkts))
1329 			tx_failed_flag = 1;
1330 
1331 		/* record the value and slave index for the slave which transmits the
1332 		 * maximum number of packets */
1333 		if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1334 			max_nb_of_tx_pkts = slave_tx_total[i];
1335 			most_successful_tx_slave = i;
1336 		}
1337 	}
1338 
1339 	/* if slaves fail to transmit packets from burst, the calling application
1340 	 * is not expected to know about multiple references to packets so we must
1341 	 * handle failures of all packets except those of the most successful slave
1342 	 */
1343 	if (unlikely(tx_failed_flag))
1344 		for (i = 0; i < num_of_slaves; i++)
1345 			if (i != most_successful_tx_slave)
1346 				while (slave_tx_total[i] < nb_pkts)
1347 					rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1348 
1349 	return max_nb_of_tx_pkts;
1350 }
1351 
1352 static void
link_properties_set(struct rte_eth_dev * ethdev,struct rte_eth_link * slave_link)1353 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1354 {
1355 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1356 
1357 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1358 		/**
1359 		 * If in mode 4 then save the link properties of the first
1360 		 * slave, all subsequent slaves must match these properties
1361 		 */
1362 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1363 
1364 		bond_link->link_autoneg = slave_link->link_autoneg;
1365 		bond_link->link_duplex = slave_link->link_duplex;
1366 		bond_link->link_speed = slave_link->link_speed;
1367 	} else {
1368 		/**
1369 		 * In any other mode the link properties are set to default
1370 		 * values of AUTONEG/DUPLEX
1371 		 */
1372 		ethdev->data->dev_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
1373 		ethdev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1374 	}
1375 }
1376 
1377 static int
link_properties_valid(struct rte_eth_dev * ethdev,struct rte_eth_link * slave_link)1378 link_properties_valid(struct rte_eth_dev *ethdev,
1379 		struct rte_eth_link *slave_link)
1380 {
1381 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1382 
1383 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1384 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1385 
1386 		if (bond_link->link_duplex != slave_link->link_duplex ||
1387 			bond_link->link_autoneg != slave_link->link_autoneg ||
1388 			bond_link->link_speed != slave_link->link_speed)
1389 			return -1;
1390 	}
1391 
1392 	return 0;
1393 }
1394 
1395 int
mac_address_get(struct rte_eth_dev * eth_dev,struct rte_ether_addr * dst_mac_addr)1396 mac_address_get(struct rte_eth_dev *eth_dev,
1397 		struct rte_ether_addr *dst_mac_addr)
1398 {
1399 	struct rte_ether_addr *mac_addr;
1400 
1401 	if (eth_dev == NULL) {
1402 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1403 		return -1;
1404 	}
1405 
1406 	if (dst_mac_addr == NULL) {
1407 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1408 		return -1;
1409 	}
1410 
1411 	mac_addr = eth_dev->data->mac_addrs;
1412 
1413 	rte_ether_addr_copy(mac_addr, dst_mac_addr);
1414 	return 0;
1415 }
1416 
1417 int
mac_address_set(struct rte_eth_dev * eth_dev,struct rte_ether_addr * new_mac_addr)1418 mac_address_set(struct rte_eth_dev *eth_dev,
1419 		struct rte_ether_addr *new_mac_addr)
1420 {
1421 	struct rte_ether_addr *mac_addr;
1422 
1423 	if (eth_dev == NULL) {
1424 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1425 		return -1;
1426 	}
1427 
1428 	if (new_mac_addr == NULL) {
1429 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1430 		return -1;
1431 	}
1432 
1433 	mac_addr = eth_dev->data->mac_addrs;
1434 
1435 	/* If new MAC is different to current MAC then update */
1436 	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1437 		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1438 
1439 	return 0;
1440 }
1441 
1442 static const struct rte_ether_addr null_mac_addr;
1443 
1444 /*
1445  * Add additional MAC addresses to the slave
1446  */
1447 int
slave_add_mac_addresses(struct rte_eth_dev * bonded_eth_dev,uint16_t slave_port_id)1448 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1449 		uint16_t slave_port_id)
1450 {
1451 	int i, ret;
1452 	struct rte_ether_addr *mac_addr;
1453 
1454 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1455 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1456 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1457 			break;
1458 
1459 		ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1460 		if (ret < 0) {
1461 			/* rollback */
1462 			for (i--; i > 0; i--)
1463 				rte_eth_dev_mac_addr_remove(slave_port_id,
1464 					&bonded_eth_dev->data->mac_addrs[i]);
1465 			return ret;
1466 		}
1467 	}
1468 
1469 	return 0;
1470 }
1471 
1472 /*
1473  * Remove additional MAC addresses from the slave
1474  */
1475 int
slave_remove_mac_addresses(struct rte_eth_dev * bonded_eth_dev,uint16_t slave_port_id)1476 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1477 		uint16_t slave_port_id)
1478 {
1479 	int i, rc, ret;
1480 	struct rte_ether_addr *mac_addr;
1481 
1482 	rc = 0;
1483 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1484 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1485 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1486 			break;
1487 
1488 		ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1489 		/* save only the first error */
1490 		if (ret < 0 && rc == 0)
1491 			rc = ret;
1492 	}
1493 
1494 	return rc;
1495 }
1496 
1497 int
mac_address_slaves_update(struct rte_eth_dev * bonded_eth_dev)1498 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1499 {
1500 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1501 	bool set;
1502 	int i;
1503 
1504 	/* Update slave devices MAC addresses */
1505 	if (internals->slave_count < 1)
1506 		return -1;
1507 
1508 	switch (internals->mode) {
1509 	case BONDING_MODE_ROUND_ROBIN:
1510 	case BONDING_MODE_BALANCE:
1511 	case BONDING_MODE_BROADCAST:
1512 		for (i = 0; i < internals->slave_count; i++) {
1513 			if (rte_eth_dev_default_mac_addr_set(
1514 					internals->slaves[i].port_id,
1515 					bonded_eth_dev->data->mac_addrs)) {
1516 				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1517 						internals->slaves[i].port_id);
1518 				return -1;
1519 			}
1520 		}
1521 		break;
1522 	case BONDING_MODE_8023AD:
1523 		bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1524 		break;
1525 	case BONDING_MODE_ACTIVE_BACKUP:
1526 	case BONDING_MODE_TLB:
1527 	case BONDING_MODE_ALB:
1528 	default:
1529 		set = true;
1530 		for (i = 0; i < internals->slave_count; i++) {
1531 			if (internals->slaves[i].port_id ==
1532 					internals->current_primary_port) {
1533 				if (rte_eth_dev_default_mac_addr_set(
1534 						internals->current_primary_port,
1535 						bonded_eth_dev->data->mac_addrs)) {
1536 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1537 							internals->current_primary_port);
1538 					set = false;
1539 				}
1540 			} else {
1541 				if (rte_eth_dev_default_mac_addr_set(
1542 						internals->slaves[i].port_id,
1543 						&internals->slaves[i].persisted_mac_addr)) {
1544 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1545 							internals->slaves[i].port_id);
1546 				}
1547 			}
1548 		}
1549 		if (!set)
1550 			return -1;
1551 	}
1552 
1553 	return 0;
1554 }
1555 
1556 int
bond_ethdev_mode_set(struct rte_eth_dev * eth_dev,uint8_t mode)1557 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, uint8_t mode)
1558 {
1559 	struct bond_dev_private *internals;
1560 
1561 	internals = eth_dev->data->dev_private;
1562 
1563 	switch (mode) {
1564 	case BONDING_MODE_ROUND_ROBIN:
1565 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1566 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1567 		break;
1568 	case BONDING_MODE_ACTIVE_BACKUP:
1569 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1570 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1571 		break;
1572 	case BONDING_MODE_BALANCE:
1573 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1574 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1575 		break;
1576 	case BONDING_MODE_BROADCAST:
1577 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1578 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 		break;
1580 	case BONDING_MODE_8023AD:
1581 		if (bond_mode_8023ad_enable(eth_dev) != 0)
1582 			return -1;
1583 
1584 		if (internals->mode4.dedicated_queues.enabled == 0) {
1585 			eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1586 			eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1587 			RTE_BOND_LOG(WARNING,
1588 				"Using mode 4, it is necessary to do TX burst "
1589 				"and RX burst at least every 100ms.");
1590 		} else {
1591 			/* Use flow director's optimization */
1592 			eth_dev->rx_pkt_burst =
1593 					bond_ethdev_rx_burst_8023ad_fast_queue;
1594 			eth_dev->tx_pkt_burst =
1595 					bond_ethdev_tx_burst_8023ad_fast_queue;
1596 		}
1597 		break;
1598 	case BONDING_MODE_TLB:
1599 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1600 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1601 		break;
1602 	case BONDING_MODE_ALB:
1603 		if (bond_mode_alb_enable(eth_dev) != 0)
1604 			return -1;
1605 
1606 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1607 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1608 		break;
1609 	default:
1610 		return -1;
1611 	}
1612 
1613 	internals->mode = mode;
1614 
1615 	return 0;
1616 }
1617 
1618 
1619 static int
slave_configure_slow_queue(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1620 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1621 		struct rte_eth_dev *slave_eth_dev)
1622 {
1623 	int errval = 0;
1624 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1625 	struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1626 
1627 	if (port->slow_pool == NULL) {
1628 		char mem_name[256];
1629 		int slave_id = slave_eth_dev->data->port_id;
1630 
1631 		snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1632 				slave_id);
1633 		port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1634 			250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1635 			slave_eth_dev->data->numa_node);
1636 
1637 		/* Any memory allocation failure in initialization is critical because
1638 		 * resources can't be free, so reinitialization is impossible. */
1639 		if (port->slow_pool == NULL) {
1640 			rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1641 				slave_id, mem_name, rte_strerror(rte_errno));
1642 		}
1643 	}
1644 
1645 	if (internals->mode4.dedicated_queues.enabled == 1) {
1646 		/* Configure slow Rx queue */
1647 
1648 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1649 				internals->mode4.dedicated_queues.rx_qid, 128,
1650 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1651 				NULL, port->slow_pool);
1652 		if (errval != 0) {
1653 			RTE_BOND_LOG(ERR,
1654 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1655 					slave_eth_dev->data->port_id,
1656 					internals->mode4.dedicated_queues.rx_qid,
1657 					errval);
1658 			return errval;
1659 		}
1660 
1661 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1662 				internals->mode4.dedicated_queues.tx_qid, 512,
1663 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1664 				NULL);
1665 		if (errval != 0) {
1666 			RTE_BOND_LOG(ERR,
1667 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1668 				slave_eth_dev->data->port_id,
1669 				internals->mode4.dedicated_queues.tx_qid,
1670 				errval);
1671 			return errval;
1672 		}
1673 	}
1674 	return 0;
1675 }
1676 
1677 int
slave_configure(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1678 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1679 		struct rte_eth_dev *slave_eth_dev)
1680 {
1681 	uint16_t nb_rx_queues;
1682 	uint16_t nb_tx_queues;
1683 
1684 	int errval;
1685 
1686 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1687 
1688 	/* Stop slave */
1689 	errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1690 	if (errval != 0)
1691 		RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1692 			     slave_eth_dev->data->port_id, errval);
1693 
1694 	/* Enable interrupts on slave device if supported */
1695 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1696 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1697 
1698 	/* If RSS is enabled for bonding, try to enable it for slaves  */
1699 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
1700 		/* rss_key won't be empty if RSS is configured in bonded dev */
1701 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1702 					internals->rss_key_len;
1703 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1704 					internals->rss_key;
1705 
1706 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1707 				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1708 		slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1709 				bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1710 	}
1711 
1712 	slave_eth_dev->data->dev_conf.rxmode.mtu =
1713 			bonded_eth_dev->data->dev_conf.rxmode.mtu;
1714 
1715 	slave_eth_dev->data->dev_conf.txmode.offloads |=
1716 		bonded_eth_dev->data->dev_conf.txmode.offloads;
1717 
1718 	slave_eth_dev->data->dev_conf.txmode.offloads &=
1719 		(bonded_eth_dev->data->dev_conf.txmode.offloads |
1720 		~internals->tx_offload_capa);
1721 
1722 	slave_eth_dev->data->dev_conf.rxmode.offloads |=
1723 		bonded_eth_dev->data->dev_conf.rxmode.offloads;
1724 
1725 	slave_eth_dev->data->dev_conf.rxmode.offloads &=
1726 		(bonded_eth_dev->data->dev_conf.rxmode.offloads |
1727 		~internals->rx_offload_capa);
1728 
1729 
1730 	nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1731 	nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1732 
1733 	if (internals->mode == BONDING_MODE_8023AD) {
1734 		if (internals->mode4.dedicated_queues.enabled == 1) {
1735 			nb_rx_queues++;
1736 			nb_tx_queues++;
1737 		}
1738 	}
1739 
1740 	/* Configure device */
1741 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1742 			nb_rx_queues, nb_tx_queues,
1743 			&(slave_eth_dev->data->dev_conf));
1744 	if (errval != 0) {
1745 		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1746 				slave_eth_dev->data->port_id, errval);
1747 		return errval;
1748 	}
1749 
1750 	errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1751 				     bonded_eth_dev->data->mtu);
1752 	if (errval != 0 && errval != -ENOTSUP) {
1753 		RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1754 				slave_eth_dev->data->port_id, errval);
1755 		return errval;
1756 	}
1757 	return 0;
1758 }
1759 
1760 int
slave_start(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1761 slave_start(struct rte_eth_dev *bonded_eth_dev,
1762 		struct rte_eth_dev *slave_eth_dev)
1763 {
1764 	int errval = 0;
1765 	struct bond_rx_queue *bd_rx_q;
1766 	struct bond_tx_queue *bd_tx_q;
1767 	uint16_t q_id;
1768 	struct rte_flow_error flow_error;
1769 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1770 
1771 	/* Setup Rx Queues */
1772 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1773 		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1774 
1775 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1776 				bd_rx_q->nb_rx_desc,
1777 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1778 				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1779 		if (errval != 0) {
1780 			RTE_BOND_LOG(ERR,
1781 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1782 					slave_eth_dev->data->port_id, q_id, errval);
1783 			return errval;
1784 		}
1785 	}
1786 
1787 	/* Setup Tx Queues */
1788 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1789 		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1790 
1791 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1792 				bd_tx_q->nb_tx_desc,
1793 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1794 				&bd_tx_q->tx_conf);
1795 		if (errval != 0) {
1796 			RTE_BOND_LOG(ERR,
1797 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1798 				slave_eth_dev->data->port_id, q_id, errval);
1799 			return errval;
1800 		}
1801 	}
1802 
1803 	if (internals->mode == BONDING_MODE_8023AD &&
1804 			internals->mode4.dedicated_queues.enabled == 1) {
1805 		if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1806 				!= 0)
1807 			return errval;
1808 
1809 		errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1810 				slave_eth_dev->data->port_id);
1811 		if (errval != 0) {
1812 			RTE_BOND_LOG(ERR,
1813 				"bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1814 				slave_eth_dev->data->port_id, errval);
1815 			return errval;
1816 		}
1817 
1818 		if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL) {
1819 			errval = rte_flow_destroy(slave_eth_dev->data->port_id,
1820 					internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1821 					&flow_error);
1822 			RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_destroy: port=%d, err (%d)",
1823 				slave_eth_dev->data->port_id, errval);
1824 		}
1825 
1826 		errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1827 				slave_eth_dev->data->port_id);
1828 		if (errval != 0) {
1829 			RTE_BOND_LOG(ERR,
1830 				"bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1831 				slave_eth_dev->data->port_id, errval);
1832 			return errval;
1833 		}
1834 	}
1835 
1836 	/* Start device */
1837 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1838 	if (errval != 0) {
1839 		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1840 				slave_eth_dev->data->port_id, errval);
1841 		return -1;
1842 	}
1843 
1844 	/* If RSS is enabled for bonding, synchronize RETA */
1845 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
1846 		int i;
1847 		struct bond_dev_private *internals;
1848 
1849 		internals = bonded_eth_dev->data->dev_private;
1850 
1851 		for (i = 0; i < internals->slave_count; i++) {
1852 			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1853 				errval = rte_eth_dev_rss_reta_update(
1854 						slave_eth_dev->data->port_id,
1855 						&internals->reta_conf[0],
1856 						internals->slaves[i].reta_size);
1857 				if (errval != 0) {
1858 					RTE_BOND_LOG(WARNING,
1859 						     "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1860 						     " RSS Configuration for bonding may be inconsistent.",
1861 						     slave_eth_dev->data->port_id, errval);
1862 				}
1863 				break;
1864 			}
1865 		}
1866 	}
1867 
1868 	/* If lsc interrupt is set, check initial slave's link status */
1869 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1870 		slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1871 		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1872 			RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1873 			NULL);
1874 	}
1875 
1876 	return 0;
1877 }
1878 
1879 void
slave_remove(struct bond_dev_private * internals,struct rte_eth_dev * slave_eth_dev)1880 slave_remove(struct bond_dev_private *internals,
1881 		struct rte_eth_dev *slave_eth_dev)
1882 {
1883 	uint16_t i;
1884 
1885 	for (i = 0; i < internals->slave_count; i++)
1886 		if (internals->slaves[i].port_id ==
1887 				slave_eth_dev->data->port_id)
1888 			break;
1889 
1890 	if (i < (internals->slave_count - 1)) {
1891 		struct rte_flow *flow;
1892 
1893 		memmove(&internals->slaves[i], &internals->slaves[i + 1],
1894 				sizeof(internals->slaves[0]) *
1895 				(internals->slave_count - i - 1));
1896 		TAILQ_FOREACH(flow, &internals->flow_list, next) {
1897 			memmove(&flow->flows[i], &flow->flows[i + 1],
1898 				sizeof(flow->flows[0]) *
1899 				(internals->slave_count - i - 1));
1900 			flow->flows[internals->slave_count - 1] = NULL;
1901 		}
1902 	}
1903 
1904 	internals->slave_count--;
1905 
1906 	/* force reconfiguration of slave interfaces */
1907 	rte_eth_dev_internal_reset(slave_eth_dev);
1908 }
1909 
1910 static void
1911 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1912 
1913 void
slave_add(struct bond_dev_private * internals,struct rte_eth_dev * slave_eth_dev)1914 slave_add(struct bond_dev_private *internals,
1915 		struct rte_eth_dev *slave_eth_dev)
1916 {
1917 	struct bond_slave_details *slave_details =
1918 			&internals->slaves[internals->slave_count];
1919 
1920 	slave_details->port_id = slave_eth_dev->data->port_id;
1921 	slave_details->last_link_status = 0;
1922 
1923 	/* Mark slave devices that don't support interrupts so we can
1924 	 * compensate when we start the bond
1925 	 */
1926 	if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1927 		slave_details->link_status_poll_enabled = 1;
1928 	}
1929 
1930 	slave_details->link_status_wait_to_complete = 0;
1931 	/* clean tlb_last_obytes when adding port for bonding device */
1932 	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1933 			sizeof(struct rte_ether_addr));
1934 }
1935 
1936 void
bond_ethdev_primary_set(struct bond_dev_private * internals,uint16_t slave_port_id)1937 bond_ethdev_primary_set(struct bond_dev_private *internals,
1938 		uint16_t slave_port_id)
1939 {
1940 	int i;
1941 
1942 	if (internals->active_slave_count < 1)
1943 		internals->current_primary_port = slave_port_id;
1944 	else
1945 		/* Search bonded device slave ports for new proposed primary port */
1946 		for (i = 0; i < internals->active_slave_count; i++) {
1947 			if (internals->active_slaves[i] == slave_port_id)
1948 				internals->current_primary_port = slave_port_id;
1949 		}
1950 }
1951 
1952 static int
1953 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1954 
1955 static int
bond_ethdev_start(struct rte_eth_dev * eth_dev)1956 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1957 {
1958 	struct bond_dev_private *internals;
1959 	int i;
1960 
1961 	/* slave eth dev will be started by bonded device */
1962 	if (check_for_bonded_ethdev(eth_dev)) {
1963 		RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1964 				eth_dev->data->port_id);
1965 		return -1;
1966 	}
1967 
1968 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
1969 	eth_dev->data->dev_started = 1;
1970 
1971 	internals = eth_dev->data->dev_private;
1972 
1973 	if (internals->slave_count == 0) {
1974 		RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1975 		goto out_err;
1976 	}
1977 
1978 	if (internals->user_defined_mac == 0) {
1979 		struct rte_ether_addr *new_mac_addr = NULL;
1980 
1981 		for (i = 0; i < internals->slave_count; i++)
1982 			if (internals->slaves[i].port_id == internals->primary_port)
1983 				new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1984 
1985 		if (new_mac_addr == NULL)
1986 			goto out_err;
1987 
1988 		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1989 			RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1990 					eth_dev->data->port_id);
1991 			goto out_err;
1992 		}
1993 	}
1994 
1995 	if (internals->mode == BONDING_MODE_8023AD) {
1996 		if (internals->mode4.dedicated_queues.enabled == 1) {
1997 			internals->mode4.dedicated_queues.rx_qid =
1998 					eth_dev->data->nb_rx_queues;
1999 			internals->mode4.dedicated_queues.tx_qid =
2000 					eth_dev->data->nb_tx_queues;
2001 		}
2002 	}
2003 
2004 
2005 	/* Reconfigure each slave device if starting bonded device */
2006 	for (i = 0; i < internals->slave_count; i++) {
2007 		struct rte_eth_dev *slave_ethdev =
2008 				&(rte_eth_devices[internals->slaves[i].port_id]);
2009 		if (slave_configure(eth_dev, slave_ethdev) != 0) {
2010 			RTE_BOND_LOG(ERR,
2011 				"bonded port (%d) failed to reconfigure slave device (%d)",
2012 				eth_dev->data->port_id,
2013 				internals->slaves[i].port_id);
2014 			goto out_err;
2015 		}
2016 		if (slave_start(eth_dev, slave_ethdev) != 0) {
2017 			RTE_BOND_LOG(ERR,
2018 				"bonded port (%d) failed to start slave device (%d)",
2019 				eth_dev->data->port_id,
2020 				internals->slaves[i].port_id);
2021 			goto out_err;
2022 		}
2023 		/* We will need to poll for link status if any slave doesn't
2024 		 * support interrupts
2025 		 */
2026 		if (internals->slaves[i].link_status_poll_enabled)
2027 			internals->link_status_polling_enabled = 1;
2028 	}
2029 
2030 	/* start polling if needed */
2031 	if (internals->link_status_polling_enabled) {
2032 		rte_eal_alarm_set(
2033 			internals->link_status_polling_interval_ms * 1000,
2034 			bond_ethdev_slave_link_status_change_monitor,
2035 			(void *)&rte_eth_devices[internals->port_id]);
2036 	}
2037 
2038 	/* Update all slave devices MACs*/
2039 	if (mac_address_slaves_update(eth_dev) != 0)
2040 		goto out_err;
2041 
2042 	if (internals->user_defined_primary_port)
2043 		bond_ethdev_primary_set(internals, internals->primary_port);
2044 
2045 	if (internals->mode == BONDING_MODE_8023AD)
2046 		bond_mode_8023ad_start(eth_dev);
2047 
2048 	if (internals->mode == BONDING_MODE_TLB ||
2049 			internals->mode == BONDING_MODE_ALB)
2050 		bond_tlb_enable(internals);
2051 
2052 	return 0;
2053 
2054 out_err:
2055 	eth_dev->data->dev_started = 0;
2056 	return -1;
2057 }
2058 
2059 static void
bond_ethdev_free_queues(struct rte_eth_dev * dev)2060 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2061 {
2062 	uint16_t i;
2063 
2064 	if (dev->data->rx_queues != NULL) {
2065 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
2066 			rte_free(dev->data->rx_queues[i]);
2067 			dev->data->rx_queues[i] = NULL;
2068 		}
2069 		dev->data->nb_rx_queues = 0;
2070 	}
2071 
2072 	if (dev->data->tx_queues != NULL) {
2073 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
2074 			rte_free(dev->data->tx_queues[i]);
2075 			dev->data->tx_queues[i] = NULL;
2076 		}
2077 		dev->data->nb_tx_queues = 0;
2078 	}
2079 }
2080 
2081 int
bond_ethdev_stop(struct rte_eth_dev * eth_dev)2082 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2083 {
2084 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2085 	uint16_t i;
2086 	int ret;
2087 
2088 	if (internals->mode == BONDING_MODE_8023AD) {
2089 		struct port *port;
2090 		void *pkt = NULL;
2091 
2092 		bond_mode_8023ad_stop(eth_dev);
2093 
2094 		/* Discard all messages to/from mode 4 state machines */
2095 		for (i = 0; i < internals->active_slave_count; i++) {
2096 			port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2097 
2098 			RTE_ASSERT(port->rx_ring != NULL);
2099 			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2100 				rte_pktmbuf_free(pkt);
2101 
2102 			RTE_ASSERT(port->tx_ring != NULL);
2103 			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2104 				rte_pktmbuf_free(pkt);
2105 		}
2106 	}
2107 
2108 	if (internals->mode == BONDING_MODE_TLB ||
2109 			internals->mode == BONDING_MODE_ALB) {
2110 		bond_tlb_disable(internals);
2111 		for (i = 0; i < internals->active_slave_count; i++)
2112 			tlb_last_obytets[internals->active_slaves[i]] = 0;
2113 	}
2114 
2115 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2116 	eth_dev->data->dev_started = 0;
2117 
2118 	internals->link_status_polling_enabled = 0;
2119 	for (i = 0; i < internals->slave_count; i++) {
2120 		uint16_t slave_id = internals->slaves[i].port_id;
2121 
2122 		internals->slaves[i].last_link_status = 0;
2123 		ret = rte_eth_dev_stop(slave_id);
2124 		if (ret != 0) {
2125 			RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2126 				     slave_id);
2127 			return ret;
2128 		}
2129 
2130 		/* active slaves need to be deactivated. */
2131 		if (find_slave_by_id(internals->active_slaves,
2132 				internals->active_slave_count, slave_id) !=
2133 					internals->active_slave_count)
2134 			deactivate_slave(eth_dev, slave_id);
2135 	}
2136 
2137 	return 0;
2138 }
2139 
2140 int
bond_ethdev_close(struct rte_eth_dev * dev)2141 bond_ethdev_close(struct rte_eth_dev *dev)
2142 {
2143 	struct bond_dev_private *internals = dev->data->dev_private;
2144 	uint16_t bond_port_id = internals->port_id;
2145 	int skipped = 0;
2146 	struct rte_flow_error ferror;
2147 
2148 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2149 		return 0;
2150 
2151 	RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2152 	while (internals->slave_count != skipped) {
2153 		uint16_t port_id = internals->slaves[skipped].port_id;
2154 
2155 		if (rte_eth_dev_stop(port_id) != 0) {
2156 			RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2157 				     port_id);
2158 			skipped++;
2159 			continue;
2160 		}
2161 
2162 		if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2163 			RTE_BOND_LOG(ERR,
2164 				     "Failed to remove port %d from bonded device %s",
2165 				     port_id, dev->device->name);
2166 			skipped++;
2167 		}
2168 	}
2169 	bond_flow_ops.flush(dev, &ferror);
2170 	bond_ethdev_free_queues(dev);
2171 	rte_bitmap_reset(internals->vlan_filter_bmp);
2172 	rte_bitmap_free(internals->vlan_filter_bmp);
2173 	rte_free(internals->vlan_filter_bmpmem);
2174 
2175 	/* Try to release mempool used in mode6. If the bond
2176 	 * device is not mode6, free the NULL is not problem.
2177 	 */
2178 	rte_mempool_free(internals->mode6.mempool);
2179 
2180 	rte_kvargs_free(internals->kvlist);
2181 
2182 	return 0;
2183 }
2184 
2185 /* forward declaration */
2186 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2187 
2188 static int
bond_ethdev_info(struct rte_eth_dev * dev,struct rte_eth_dev_info * dev_info)2189 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2190 {
2191 	struct bond_dev_private *internals = dev->data->dev_private;
2192 	struct bond_slave_details slave;
2193 	int ret;
2194 
2195 	uint16_t max_nb_rx_queues = UINT16_MAX;
2196 	uint16_t max_nb_tx_queues = UINT16_MAX;
2197 	uint16_t max_rx_desc_lim = UINT16_MAX;
2198 	uint16_t max_tx_desc_lim = UINT16_MAX;
2199 
2200 	dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2201 
2202 	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2203 			internals->candidate_max_rx_pktlen :
2204 			RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2205 
2206 	/* Max number of tx/rx queues that the bonded device can support is the
2207 	 * minimum values of the bonded slaves, as all slaves must be capable
2208 	 * of supporting the same number of tx/rx queues.
2209 	 */
2210 	if (internals->slave_count > 0) {
2211 		struct rte_eth_dev_info slave_info;
2212 		uint16_t idx;
2213 
2214 		for (idx = 0; idx < internals->slave_count; idx++) {
2215 			slave = internals->slaves[idx];
2216 			ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2217 			if (ret != 0) {
2218 				RTE_BOND_LOG(ERR,
2219 					"%s: Error during getting device (port %u) info: %s\n",
2220 					__func__,
2221 					slave.port_id,
2222 					strerror(-ret));
2223 
2224 				return ret;
2225 			}
2226 
2227 			if (slave_info.max_rx_queues < max_nb_rx_queues)
2228 				max_nb_rx_queues = slave_info.max_rx_queues;
2229 
2230 			if (slave_info.max_tx_queues < max_nb_tx_queues)
2231 				max_nb_tx_queues = slave_info.max_tx_queues;
2232 
2233 			if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2234 				max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2235 
2236 			if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2237 				max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2238 		}
2239 	}
2240 
2241 	dev_info->max_rx_queues = max_nb_rx_queues;
2242 	dev_info->max_tx_queues = max_nb_tx_queues;
2243 
2244 	memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2245 	       sizeof(dev_info->default_rxconf));
2246 	memcpy(&dev_info->default_txconf, &internals->default_txconf,
2247 	       sizeof(dev_info->default_txconf));
2248 
2249 	dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2250 	dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2251 
2252 	/**
2253 	 * If dedicated hw queues enabled for link bonding device in LACP mode
2254 	 * then we need to reduce the maximum number of data path queues by 1.
2255 	 */
2256 	if (internals->mode == BONDING_MODE_8023AD &&
2257 		internals->mode4.dedicated_queues.enabled == 1) {
2258 		dev_info->max_rx_queues--;
2259 		dev_info->max_tx_queues--;
2260 	}
2261 
2262 	dev_info->min_rx_bufsize = 0;
2263 
2264 	dev_info->rx_offload_capa = internals->rx_offload_capa;
2265 	dev_info->tx_offload_capa = internals->tx_offload_capa;
2266 	dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2267 	dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2268 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2269 
2270 	dev_info->reta_size = internals->reta_size;
2271 	dev_info->hash_key_size = internals->rss_key_len;
2272 
2273 	return 0;
2274 }
2275 
2276 static int
bond_ethdev_vlan_filter_set(struct rte_eth_dev * dev,uint16_t vlan_id,int on)2277 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2278 {
2279 	int res;
2280 	uint16_t i;
2281 	struct bond_dev_private *internals = dev->data->dev_private;
2282 
2283 	/* don't do this while a slave is being added */
2284 	rte_spinlock_lock(&internals->lock);
2285 
2286 	if (on)
2287 		rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2288 	else
2289 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2290 
2291 	for (i = 0; i < internals->slave_count; i++) {
2292 		uint16_t port_id = internals->slaves[i].port_id;
2293 
2294 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2295 		if (res == ENOTSUP)
2296 			RTE_BOND_LOG(WARNING,
2297 				     "Setting VLAN filter on slave port %u not supported.",
2298 				     port_id);
2299 	}
2300 
2301 	rte_spinlock_unlock(&internals->lock);
2302 	return 0;
2303 }
2304 
2305 static int
bond_ethdev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t rx_queue_id,uint16_t nb_rx_desc,unsigned int socket_id __rte_unused,const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mb_pool)2306 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2307 		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2308 		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2309 {
2310 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2311 			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2312 					0, dev->data->numa_node);
2313 	if (bd_rx_q == NULL)
2314 		return -1;
2315 
2316 	bd_rx_q->queue_id = rx_queue_id;
2317 	bd_rx_q->dev_private = dev->data->dev_private;
2318 
2319 	bd_rx_q->nb_rx_desc = nb_rx_desc;
2320 
2321 	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2322 	bd_rx_q->mb_pool = mb_pool;
2323 
2324 	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2325 
2326 	return 0;
2327 }
2328 
2329 static int
bond_ethdev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t tx_queue_id,uint16_t nb_tx_desc,unsigned int socket_id __rte_unused,const struct rte_eth_txconf * tx_conf)2330 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2331 		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2332 		const struct rte_eth_txconf *tx_conf)
2333 {
2334 	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2335 			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2336 					0, dev->data->numa_node);
2337 
2338 	if (bd_tx_q == NULL)
2339 		return -1;
2340 
2341 	bd_tx_q->queue_id = tx_queue_id;
2342 	bd_tx_q->dev_private = dev->data->dev_private;
2343 
2344 	bd_tx_q->nb_tx_desc = nb_tx_desc;
2345 	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2346 
2347 	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2348 
2349 	return 0;
2350 }
2351 
2352 static void
bond_ethdev_rx_queue_release(struct rte_eth_dev * dev,uint16_t queue_id)2353 bond_ethdev_rx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2354 {
2355 	void *queue = dev->data->rx_queues[queue_id];
2356 
2357 	if (queue == NULL)
2358 		return;
2359 
2360 	rte_free(queue);
2361 }
2362 
2363 static void
bond_ethdev_tx_queue_release(struct rte_eth_dev * dev,uint16_t queue_id)2364 bond_ethdev_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2365 {
2366 	void *queue = dev->data->tx_queues[queue_id];
2367 
2368 	if (queue == NULL)
2369 		return;
2370 
2371 	rte_free(queue);
2372 }
2373 
2374 static void
bond_ethdev_slave_link_status_change_monitor(void * cb_arg)2375 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2376 {
2377 	struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2378 	struct bond_dev_private *internals;
2379 
2380 	/* Default value for polling slave found is true as we don't want to
2381 	 * disable the polling thread if we cannot get the lock */
2382 	int i, polling_slave_found = 1;
2383 
2384 	if (cb_arg == NULL)
2385 		return;
2386 
2387 	bonded_ethdev = cb_arg;
2388 	internals = bonded_ethdev->data->dev_private;
2389 
2390 	if (!bonded_ethdev->data->dev_started ||
2391 		!internals->link_status_polling_enabled)
2392 		return;
2393 
2394 	/* If device is currently being configured then don't check slaves link
2395 	 * status, wait until next period */
2396 	if (rte_spinlock_trylock(&internals->lock)) {
2397 		if (internals->slave_count > 0)
2398 			polling_slave_found = 0;
2399 
2400 		for (i = 0; i < internals->slave_count; i++) {
2401 			if (!internals->slaves[i].link_status_poll_enabled)
2402 				continue;
2403 
2404 			slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2405 			polling_slave_found = 1;
2406 
2407 			/* Update slave link status */
2408 			(*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2409 					internals->slaves[i].link_status_wait_to_complete);
2410 
2411 			/* if link status has changed since last checked then call lsc
2412 			 * event callback */
2413 			if (slave_ethdev->data->dev_link.link_status !=
2414 					internals->slaves[i].last_link_status) {
2415 				internals->slaves[i].last_link_status =
2416 						slave_ethdev->data->dev_link.link_status;
2417 
2418 				bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2419 						RTE_ETH_EVENT_INTR_LSC,
2420 						&bonded_ethdev->data->port_id,
2421 						NULL);
2422 			}
2423 		}
2424 		rte_spinlock_unlock(&internals->lock);
2425 	}
2426 
2427 	if (polling_slave_found)
2428 		/* Set alarm to continue monitoring link status of slave ethdev's */
2429 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2430 				bond_ethdev_slave_link_status_change_monitor, cb_arg);
2431 }
2432 
2433 static int
bond_ethdev_link_update(struct rte_eth_dev * ethdev,int wait_to_complete)2434 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2435 {
2436 	int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2437 
2438 	struct bond_dev_private *bond_ctx;
2439 	struct rte_eth_link slave_link;
2440 
2441 	bool one_link_update_succeeded;
2442 	uint32_t idx;
2443 	int ret;
2444 
2445 	bond_ctx = ethdev->data->dev_private;
2446 
2447 	ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2448 
2449 	if (ethdev->data->dev_started == 0 ||
2450 			bond_ctx->active_slave_count == 0) {
2451 		ethdev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2452 		return 0;
2453 	}
2454 
2455 	ethdev->data->dev_link.link_status = RTE_ETH_LINK_UP;
2456 
2457 	if (wait_to_complete)
2458 		link_update = rte_eth_link_get;
2459 	else
2460 		link_update = rte_eth_link_get_nowait;
2461 
2462 	switch (bond_ctx->mode) {
2463 	case BONDING_MODE_BROADCAST:
2464 		/**
2465 		 * Setting link speed to UINT32_MAX to ensure we pick up the
2466 		 * value of the first active slave
2467 		 */
2468 		ethdev->data->dev_link.link_speed = UINT32_MAX;
2469 
2470 		/**
2471 		 * link speed is minimum value of all the slaves link speed as
2472 		 * packet loss will occur on this slave if transmission at rates
2473 		 * greater than this are attempted
2474 		 */
2475 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2476 			ret = link_update(bond_ctx->active_slaves[idx],
2477 					  &slave_link);
2478 			if (ret < 0) {
2479 				ethdev->data->dev_link.link_speed =
2480 					RTE_ETH_SPEED_NUM_NONE;
2481 				RTE_BOND_LOG(ERR,
2482 					"Slave (port %u) link get failed: %s",
2483 					bond_ctx->active_slaves[idx],
2484 					rte_strerror(-ret));
2485 				return 0;
2486 			}
2487 
2488 			if (slave_link.link_speed <
2489 					ethdev->data->dev_link.link_speed)
2490 				ethdev->data->dev_link.link_speed =
2491 						slave_link.link_speed;
2492 		}
2493 		break;
2494 	case BONDING_MODE_ACTIVE_BACKUP:
2495 		/* Current primary slave */
2496 		ret = link_update(bond_ctx->current_primary_port, &slave_link);
2497 		if (ret < 0) {
2498 			RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2499 				bond_ctx->current_primary_port,
2500 				rte_strerror(-ret));
2501 			return 0;
2502 		}
2503 
2504 		ethdev->data->dev_link.link_speed = slave_link.link_speed;
2505 		break;
2506 	case BONDING_MODE_8023AD:
2507 		ethdev->data->dev_link.link_autoneg =
2508 				bond_ctx->mode4.slave_link.link_autoneg;
2509 		ethdev->data->dev_link.link_duplex =
2510 				bond_ctx->mode4.slave_link.link_duplex;
2511 		/* fall through */
2512 		/* to update link speed */
2513 	case BONDING_MODE_ROUND_ROBIN:
2514 	case BONDING_MODE_BALANCE:
2515 	case BONDING_MODE_TLB:
2516 	case BONDING_MODE_ALB:
2517 	default:
2518 		/**
2519 		 * In theses mode the maximum theoretical link speed is the sum
2520 		 * of all the slaves
2521 		 */
2522 		ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2523 		one_link_update_succeeded = false;
2524 
2525 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2526 			ret = link_update(bond_ctx->active_slaves[idx],
2527 					&slave_link);
2528 			if (ret < 0) {
2529 				RTE_BOND_LOG(ERR,
2530 					"Slave (port %u) link get failed: %s",
2531 					bond_ctx->active_slaves[idx],
2532 					rte_strerror(-ret));
2533 				continue;
2534 			}
2535 
2536 			one_link_update_succeeded = true;
2537 			ethdev->data->dev_link.link_speed +=
2538 					slave_link.link_speed;
2539 		}
2540 
2541 		if (!one_link_update_succeeded) {
2542 			RTE_BOND_LOG(ERR, "All slaves link get failed");
2543 			return 0;
2544 		}
2545 	}
2546 
2547 
2548 	return 0;
2549 }
2550 
2551 
2552 static int
bond_ethdev_stats_get(struct rte_eth_dev * dev,struct rte_eth_stats * stats)2553 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2554 {
2555 	struct bond_dev_private *internals = dev->data->dev_private;
2556 	struct rte_eth_stats slave_stats;
2557 	int i, j;
2558 
2559 	for (i = 0; i < internals->slave_count; i++) {
2560 		rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2561 
2562 		stats->ipackets += slave_stats.ipackets;
2563 		stats->opackets += slave_stats.opackets;
2564 		stats->ibytes += slave_stats.ibytes;
2565 		stats->obytes += slave_stats.obytes;
2566 		stats->imissed += slave_stats.imissed;
2567 		stats->ierrors += slave_stats.ierrors;
2568 		stats->oerrors += slave_stats.oerrors;
2569 		stats->rx_nombuf += slave_stats.rx_nombuf;
2570 
2571 		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2572 			stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2573 			stats->q_opackets[j] += slave_stats.q_opackets[j];
2574 			stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2575 			stats->q_obytes[j] += slave_stats.q_obytes[j];
2576 			stats->q_errors[j] += slave_stats.q_errors[j];
2577 		}
2578 
2579 	}
2580 
2581 	return 0;
2582 }
2583 
2584 static int
bond_ethdev_stats_reset(struct rte_eth_dev * dev)2585 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2586 {
2587 	struct bond_dev_private *internals = dev->data->dev_private;
2588 	int i;
2589 	int err;
2590 	int ret;
2591 
2592 	for (i = 0, err = 0; i < internals->slave_count; i++) {
2593 		ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2594 		if (ret != 0)
2595 			err = ret;
2596 	}
2597 
2598 	return err;
2599 }
2600 
2601 static int
bond_ethdev_promiscuous_enable(struct rte_eth_dev * eth_dev)2602 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2603 {
2604 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2605 	int i;
2606 	int ret = 0;
2607 	uint16_t port_id;
2608 
2609 	switch (internals->mode) {
2610 	/* Promiscuous mode is propagated to all slaves */
2611 	case BONDING_MODE_ROUND_ROBIN:
2612 	case BONDING_MODE_BALANCE:
2613 	case BONDING_MODE_BROADCAST:
2614 	case BONDING_MODE_8023AD: {
2615 		unsigned int slave_ok = 0;
2616 
2617 		for (i = 0; i < internals->slave_count; i++) {
2618 			port_id = internals->slaves[i].port_id;
2619 
2620 			ret = rte_eth_promiscuous_enable(port_id);
2621 			if (ret != 0)
2622 				RTE_BOND_LOG(ERR,
2623 					"Failed to enable promiscuous mode for port %u: %s",
2624 					port_id, rte_strerror(-ret));
2625 			else
2626 				slave_ok++;
2627 		}
2628 		/*
2629 		 * Report success if operation is successful on at least
2630 		 * on one slave. Otherwise return last error code.
2631 		 */
2632 		if (slave_ok > 0)
2633 			ret = 0;
2634 		break;
2635 	}
2636 	/* Promiscuous mode is propagated only to primary slave */
2637 	case BONDING_MODE_ACTIVE_BACKUP:
2638 	case BONDING_MODE_TLB:
2639 	case BONDING_MODE_ALB:
2640 	default:
2641 		/* Do not touch promisc when there cannot be primary ports */
2642 		if (internals->slave_count == 0)
2643 			break;
2644 		port_id = internals->current_primary_port;
2645 		ret = rte_eth_promiscuous_enable(port_id);
2646 		if (ret != 0)
2647 			RTE_BOND_LOG(ERR,
2648 				"Failed to enable promiscuous mode for port %u: %s",
2649 				port_id, rte_strerror(-ret));
2650 	}
2651 
2652 	return ret;
2653 }
2654 
2655 static int
bond_ethdev_promiscuous_disable(struct rte_eth_dev * dev)2656 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2657 {
2658 	struct bond_dev_private *internals = dev->data->dev_private;
2659 	int i;
2660 	int ret = 0;
2661 	uint16_t port_id;
2662 
2663 	switch (internals->mode) {
2664 	/* Promiscuous mode is propagated to all slaves */
2665 	case BONDING_MODE_ROUND_ROBIN:
2666 	case BONDING_MODE_BALANCE:
2667 	case BONDING_MODE_BROADCAST:
2668 	case BONDING_MODE_8023AD: {
2669 		unsigned int slave_ok = 0;
2670 
2671 		for (i = 0; i < internals->slave_count; i++) {
2672 			port_id = internals->slaves[i].port_id;
2673 
2674 			if (internals->mode == BONDING_MODE_8023AD &&
2675 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2676 					BOND_8023AD_FORCED_PROMISC) {
2677 				slave_ok++;
2678 				continue;
2679 			}
2680 			ret = rte_eth_promiscuous_disable(port_id);
2681 			if (ret != 0)
2682 				RTE_BOND_LOG(ERR,
2683 					"Failed to disable promiscuous mode for port %u: %s",
2684 					port_id, rte_strerror(-ret));
2685 			else
2686 				slave_ok++;
2687 		}
2688 		/*
2689 		 * Report success if operation is successful on at least
2690 		 * on one slave. Otherwise return last error code.
2691 		 */
2692 		if (slave_ok > 0)
2693 			ret = 0;
2694 		break;
2695 	}
2696 	/* Promiscuous mode is propagated only to primary slave */
2697 	case BONDING_MODE_ACTIVE_BACKUP:
2698 	case BONDING_MODE_TLB:
2699 	case BONDING_MODE_ALB:
2700 	default:
2701 		/* Do not touch promisc when there cannot be primary ports */
2702 		if (internals->slave_count == 0)
2703 			break;
2704 		port_id = internals->current_primary_port;
2705 		ret = rte_eth_promiscuous_disable(port_id);
2706 		if (ret != 0)
2707 			RTE_BOND_LOG(ERR,
2708 				"Failed to disable promiscuous mode for port %u: %s",
2709 				port_id, rte_strerror(-ret));
2710 	}
2711 
2712 	return ret;
2713 }
2714 
2715 static int
bond_ethdev_promiscuous_update(struct rte_eth_dev * dev)2716 bond_ethdev_promiscuous_update(struct rte_eth_dev *dev)
2717 {
2718 	struct bond_dev_private *internals = dev->data->dev_private;
2719 	uint16_t port_id = internals->current_primary_port;
2720 
2721 	switch (internals->mode) {
2722 	case BONDING_MODE_ROUND_ROBIN:
2723 	case BONDING_MODE_BALANCE:
2724 	case BONDING_MODE_BROADCAST:
2725 	case BONDING_MODE_8023AD:
2726 		/* As promiscuous mode is propagated to all slaves for these
2727 		 * mode, no need to update for bonding device.
2728 		 */
2729 		break;
2730 	case BONDING_MODE_ACTIVE_BACKUP:
2731 	case BONDING_MODE_TLB:
2732 	case BONDING_MODE_ALB:
2733 	default:
2734 		/* As promiscuous mode is propagated only to primary slave
2735 		 * for these mode. When active/standby switchover, promiscuous
2736 		 * mode should be set to new primary slave according to bonding
2737 		 * device.
2738 		 */
2739 		if (rte_eth_promiscuous_get(internals->port_id) == 1)
2740 			rte_eth_promiscuous_enable(port_id);
2741 		else
2742 			rte_eth_promiscuous_disable(port_id);
2743 	}
2744 
2745 	return 0;
2746 }
2747 
2748 static int
bond_ethdev_allmulticast_enable(struct rte_eth_dev * eth_dev)2749 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2750 {
2751 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2752 	int i;
2753 	int ret = 0;
2754 	uint16_t port_id;
2755 
2756 	switch (internals->mode) {
2757 	/* allmulti mode is propagated to all slaves */
2758 	case BONDING_MODE_ROUND_ROBIN:
2759 	case BONDING_MODE_BALANCE:
2760 	case BONDING_MODE_BROADCAST:
2761 	case BONDING_MODE_8023AD: {
2762 		unsigned int slave_ok = 0;
2763 
2764 		for (i = 0; i < internals->slave_count; i++) {
2765 			port_id = internals->slaves[i].port_id;
2766 
2767 			ret = rte_eth_allmulticast_enable(port_id);
2768 			if (ret != 0)
2769 				RTE_BOND_LOG(ERR,
2770 					"Failed to enable allmulti mode for port %u: %s",
2771 					port_id, rte_strerror(-ret));
2772 			else
2773 				slave_ok++;
2774 		}
2775 		/*
2776 		 * Report success if operation is successful on at least
2777 		 * on one slave. Otherwise return last error code.
2778 		 */
2779 		if (slave_ok > 0)
2780 			ret = 0;
2781 		break;
2782 	}
2783 	/* allmulti mode is propagated only to primary slave */
2784 	case BONDING_MODE_ACTIVE_BACKUP:
2785 	case BONDING_MODE_TLB:
2786 	case BONDING_MODE_ALB:
2787 	default:
2788 		/* Do not touch allmulti when there cannot be primary ports */
2789 		if (internals->slave_count == 0)
2790 			break;
2791 		port_id = internals->current_primary_port;
2792 		ret = rte_eth_allmulticast_enable(port_id);
2793 		if (ret != 0)
2794 			RTE_BOND_LOG(ERR,
2795 				"Failed to enable allmulti mode for port %u: %s",
2796 				port_id, rte_strerror(-ret));
2797 	}
2798 
2799 	return ret;
2800 }
2801 
2802 static int
bond_ethdev_allmulticast_disable(struct rte_eth_dev * eth_dev)2803 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2804 {
2805 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2806 	int i;
2807 	int ret = 0;
2808 	uint16_t port_id;
2809 
2810 	switch (internals->mode) {
2811 	/* allmulti mode is propagated to all slaves */
2812 	case BONDING_MODE_ROUND_ROBIN:
2813 	case BONDING_MODE_BALANCE:
2814 	case BONDING_MODE_BROADCAST:
2815 	case BONDING_MODE_8023AD: {
2816 		unsigned int slave_ok = 0;
2817 
2818 		for (i = 0; i < internals->slave_count; i++) {
2819 			uint16_t port_id = internals->slaves[i].port_id;
2820 
2821 			if (internals->mode == BONDING_MODE_8023AD &&
2822 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2823 					BOND_8023AD_FORCED_ALLMULTI)
2824 				continue;
2825 
2826 			ret = rte_eth_allmulticast_disable(port_id);
2827 			if (ret != 0)
2828 				RTE_BOND_LOG(ERR,
2829 					"Failed to disable allmulti mode for port %u: %s",
2830 					port_id, rte_strerror(-ret));
2831 			else
2832 				slave_ok++;
2833 		}
2834 		/*
2835 		 * Report success if operation is successful on at least
2836 		 * on one slave. Otherwise return last error code.
2837 		 */
2838 		if (slave_ok > 0)
2839 			ret = 0;
2840 		break;
2841 	}
2842 	/* allmulti mode is propagated only to primary slave */
2843 	case BONDING_MODE_ACTIVE_BACKUP:
2844 	case BONDING_MODE_TLB:
2845 	case BONDING_MODE_ALB:
2846 	default:
2847 		/* Do not touch allmulti when there cannot be primary ports */
2848 		if (internals->slave_count == 0)
2849 			break;
2850 		port_id = internals->current_primary_port;
2851 		ret = rte_eth_allmulticast_disable(port_id);
2852 		if (ret != 0)
2853 			RTE_BOND_LOG(ERR,
2854 				"Failed to disable allmulti mode for port %u: %s",
2855 				port_id, rte_strerror(-ret));
2856 	}
2857 
2858 	return ret;
2859 }
2860 
2861 static int
bond_ethdev_allmulticast_update(struct rte_eth_dev * dev)2862 bond_ethdev_allmulticast_update(struct rte_eth_dev *dev)
2863 {
2864 	struct bond_dev_private *internals = dev->data->dev_private;
2865 	uint16_t port_id = internals->current_primary_port;
2866 
2867 	switch (internals->mode) {
2868 	case BONDING_MODE_ROUND_ROBIN:
2869 	case BONDING_MODE_BALANCE:
2870 	case BONDING_MODE_BROADCAST:
2871 	case BONDING_MODE_8023AD:
2872 		/* As allmulticast mode is propagated to all slaves for these
2873 		 * mode, no need to update for bonding device.
2874 		 */
2875 		break;
2876 	case BONDING_MODE_ACTIVE_BACKUP:
2877 	case BONDING_MODE_TLB:
2878 	case BONDING_MODE_ALB:
2879 	default:
2880 		/* As allmulticast mode is propagated only to primary slave
2881 		 * for these mode. When active/standby switchover, allmulticast
2882 		 * mode should be set to new primary slave according to bonding
2883 		 * device.
2884 		 */
2885 		if (rte_eth_allmulticast_get(internals->port_id) == 1)
2886 			rte_eth_allmulticast_enable(port_id);
2887 		else
2888 			rte_eth_allmulticast_disable(port_id);
2889 	}
2890 
2891 	return 0;
2892 }
2893 
2894 static void
bond_ethdev_delayed_lsc_propagation(void * arg)2895 bond_ethdev_delayed_lsc_propagation(void *arg)
2896 {
2897 	if (arg == NULL)
2898 		return;
2899 
2900 	rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2901 			RTE_ETH_EVENT_INTR_LSC, NULL);
2902 }
2903 
2904 int
bond_ethdev_lsc_event_callback(uint16_t port_id,enum rte_eth_event_type type,void * param,void * ret_param __rte_unused)2905 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2906 		void *param, void *ret_param __rte_unused)
2907 {
2908 	struct rte_eth_dev *bonded_eth_dev;
2909 	struct bond_dev_private *internals;
2910 	struct rte_eth_link link;
2911 	int rc = -1;
2912 	int ret;
2913 
2914 	uint8_t lsc_flag = 0;
2915 	int valid_slave = 0;
2916 	uint16_t active_pos;
2917 	uint16_t i;
2918 
2919 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2920 		return rc;
2921 
2922 	bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2923 
2924 	if (check_for_bonded_ethdev(bonded_eth_dev))
2925 		return rc;
2926 
2927 	internals = bonded_eth_dev->data->dev_private;
2928 
2929 	/* If the device isn't started don't handle interrupts */
2930 	if (!bonded_eth_dev->data->dev_started)
2931 		return rc;
2932 
2933 	/* verify that port_id is a valid slave of bonded port */
2934 	for (i = 0; i < internals->slave_count; i++) {
2935 		if (internals->slaves[i].port_id == port_id) {
2936 			valid_slave = 1;
2937 			break;
2938 		}
2939 	}
2940 
2941 	if (!valid_slave)
2942 		return rc;
2943 
2944 	/* Synchronize lsc callback parallel calls either by real link event
2945 	 * from the slaves PMDs or by the bonding PMD itself.
2946 	 */
2947 	rte_spinlock_lock(&internals->lsc_lock);
2948 
2949 	/* Search for port in active port list */
2950 	active_pos = find_slave_by_id(internals->active_slaves,
2951 			internals->active_slave_count, port_id);
2952 
2953 	ret = rte_eth_link_get_nowait(port_id, &link);
2954 	if (ret < 0)
2955 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2956 
2957 	if (ret == 0 && link.link_status) {
2958 		if (active_pos < internals->active_slave_count)
2959 			goto link_update;
2960 
2961 		/* check link state properties if bonded link is up*/
2962 		if (bonded_eth_dev->data->dev_link.link_status == RTE_ETH_LINK_UP) {
2963 			if (link_properties_valid(bonded_eth_dev, &link) != 0)
2964 				RTE_BOND_LOG(ERR, "Invalid link properties "
2965 					     "for slave %d in bonding mode %d",
2966 					     port_id, internals->mode);
2967 		} else {
2968 			/* inherit slave link properties */
2969 			link_properties_set(bonded_eth_dev, &link);
2970 		}
2971 
2972 		/* If no active slave ports then set this port to be
2973 		 * the primary port.
2974 		 */
2975 		if (internals->active_slave_count < 1) {
2976 			/* If first active slave, then change link status */
2977 			bonded_eth_dev->data->dev_link.link_status =
2978 								RTE_ETH_LINK_UP;
2979 			internals->current_primary_port = port_id;
2980 			lsc_flag = 1;
2981 
2982 			mac_address_slaves_update(bonded_eth_dev);
2983 			bond_ethdev_promiscuous_update(bonded_eth_dev);
2984 			bond_ethdev_allmulticast_update(bonded_eth_dev);
2985 		}
2986 
2987 		activate_slave(bonded_eth_dev, port_id);
2988 
2989 		/* If the user has defined the primary port then default to
2990 		 * using it.
2991 		 */
2992 		if (internals->user_defined_primary_port &&
2993 				internals->primary_port == port_id)
2994 			bond_ethdev_primary_set(internals, port_id);
2995 	} else {
2996 		if (active_pos == internals->active_slave_count)
2997 			goto link_update;
2998 
2999 		/* Remove from active slave list */
3000 		deactivate_slave(bonded_eth_dev, port_id);
3001 
3002 		if (internals->active_slave_count < 1)
3003 			lsc_flag = 1;
3004 
3005 		/* Update primary id, take first active slave from list or if none
3006 		 * available set to -1 */
3007 		if (port_id == internals->current_primary_port) {
3008 			if (internals->active_slave_count > 0)
3009 				bond_ethdev_primary_set(internals,
3010 						internals->active_slaves[0]);
3011 			else
3012 				internals->current_primary_port = internals->primary_port;
3013 			mac_address_slaves_update(bonded_eth_dev);
3014 			bond_ethdev_promiscuous_update(bonded_eth_dev);
3015 			bond_ethdev_allmulticast_update(bonded_eth_dev);
3016 		}
3017 	}
3018 
3019 link_update:
3020 	/**
3021 	 * Update bonded device link properties after any change to active
3022 	 * slaves
3023 	 */
3024 	bond_ethdev_link_update(bonded_eth_dev, 0);
3025 
3026 	if (lsc_flag) {
3027 		/* Cancel any possible outstanding interrupts if delays are enabled */
3028 		if (internals->link_up_delay_ms > 0 ||
3029 			internals->link_down_delay_ms > 0)
3030 			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
3031 					bonded_eth_dev);
3032 
3033 		if (bonded_eth_dev->data->dev_link.link_status) {
3034 			if (internals->link_up_delay_ms > 0)
3035 				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
3036 						bond_ethdev_delayed_lsc_propagation,
3037 						(void *)bonded_eth_dev);
3038 			else
3039 				rte_eth_dev_callback_process(bonded_eth_dev,
3040 						RTE_ETH_EVENT_INTR_LSC,
3041 						NULL);
3042 
3043 		} else {
3044 			if (internals->link_down_delay_ms > 0)
3045 				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
3046 						bond_ethdev_delayed_lsc_propagation,
3047 						(void *)bonded_eth_dev);
3048 			else
3049 				rte_eth_dev_callback_process(bonded_eth_dev,
3050 						RTE_ETH_EVENT_INTR_LSC,
3051 						NULL);
3052 		}
3053 	}
3054 
3055 	rte_spinlock_unlock(&internals->lsc_lock);
3056 
3057 	return rc;
3058 }
3059 
3060 static int
bond_ethdev_rss_reta_update(struct rte_eth_dev * dev,struct rte_eth_rss_reta_entry64 * reta_conf,uint16_t reta_size)3061 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
3062 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3063 {
3064 	unsigned i, j;
3065 	int result = 0;
3066 	int slave_reta_size;
3067 	unsigned reta_count;
3068 	struct bond_dev_private *internals = dev->data->dev_private;
3069 
3070 	if (reta_size != internals->reta_size)
3071 		return -EINVAL;
3072 
3073 	 /* Copy RETA table */
3074 	reta_count = (reta_size + RTE_ETH_RETA_GROUP_SIZE - 1) /
3075 			RTE_ETH_RETA_GROUP_SIZE;
3076 
3077 	for (i = 0; i < reta_count; i++) {
3078 		internals->reta_conf[i].mask = reta_conf[i].mask;
3079 		for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3080 			if ((reta_conf[i].mask >> j) & 0x01)
3081 				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
3082 	}
3083 
3084 	/* Fill rest of array */
3085 	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
3086 		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
3087 				sizeof(internals->reta_conf[0]) * reta_count);
3088 
3089 	/* Propagate RETA over slaves */
3090 	for (i = 0; i < internals->slave_count; i++) {
3091 		slave_reta_size = internals->slaves[i].reta_size;
3092 		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
3093 				&internals->reta_conf[0], slave_reta_size);
3094 		if (result < 0)
3095 			return result;
3096 	}
3097 
3098 	return 0;
3099 }
3100 
3101 static int
bond_ethdev_rss_reta_query(struct rte_eth_dev * dev,struct rte_eth_rss_reta_entry64 * reta_conf,uint16_t reta_size)3102 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3103 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3104 {
3105 	int i, j;
3106 	struct bond_dev_private *internals = dev->data->dev_private;
3107 
3108 	if (reta_size != internals->reta_size)
3109 		return -EINVAL;
3110 
3111 	 /* Copy RETA table */
3112 	for (i = 0; i < reta_size / RTE_ETH_RETA_GROUP_SIZE; i++)
3113 		for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3114 			if ((reta_conf[i].mask >> j) & 0x01)
3115 				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3116 
3117 	return 0;
3118 }
3119 
3120 static int
bond_ethdev_rss_hash_update(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)3121 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3122 		struct rte_eth_rss_conf *rss_conf)
3123 {
3124 	int i, result = 0;
3125 	struct bond_dev_private *internals = dev->data->dev_private;
3126 	struct rte_eth_rss_conf bond_rss_conf;
3127 
3128 	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3129 
3130 	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3131 
3132 	if (bond_rss_conf.rss_hf != 0)
3133 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3134 
3135 	if (bond_rss_conf.rss_key) {
3136 		if (bond_rss_conf.rss_key_len < internals->rss_key_len)
3137 			return -EINVAL;
3138 		else if (bond_rss_conf.rss_key_len > internals->rss_key_len)
3139 			RTE_BOND_LOG(WARNING, "rss_key will be truncated");
3140 
3141 		memcpy(internals->rss_key, bond_rss_conf.rss_key,
3142 				internals->rss_key_len);
3143 		bond_rss_conf.rss_key_len = internals->rss_key_len;
3144 	}
3145 
3146 	for (i = 0; i < internals->slave_count; i++) {
3147 		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3148 				&bond_rss_conf);
3149 		if (result < 0)
3150 			return result;
3151 	}
3152 
3153 	return 0;
3154 }
3155 
3156 static int
bond_ethdev_rss_hash_conf_get(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)3157 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3158 		struct rte_eth_rss_conf *rss_conf)
3159 {
3160 	struct bond_dev_private *internals = dev->data->dev_private;
3161 
3162 	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3163 	rss_conf->rss_key_len = internals->rss_key_len;
3164 	if (rss_conf->rss_key)
3165 		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3166 
3167 	return 0;
3168 }
3169 
3170 static int
bond_ethdev_mtu_set(struct rte_eth_dev * dev,uint16_t mtu)3171 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3172 {
3173 	struct rte_eth_dev *slave_eth_dev;
3174 	struct bond_dev_private *internals = dev->data->dev_private;
3175 	int ret, i;
3176 
3177 	rte_spinlock_lock(&internals->lock);
3178 
3179 	for (i = 0; i < internals->slave_count; i++) {
3180 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3181 		if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3182 			rte_spinlock_unlock(&internals->lock);
3183 			return -ENOTSUP;
3184 		}
3185 	}
3186 	for (i = 0; i < internals->slave_count; i++) {
3187 		ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3188 		if (ret < 0) {
3189 			rte_spinlock_unlock(&internals->lock);
3190 			return ret;
3191 		}
3192 	}
3193 
3194 	rte_spinlock_unlock(&internals->lock);
3195 	return 0;
3196 }
3197 
3198 static int
bond_ethdev_mac_address_set(struct rte_eth_dev * dev,struct rte_ether_addr * addr)3199 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3200 			struct rte_ether_addr *addr)
3201 {
3202 	if (mac_address_set(dev, addr)) {
3203 		RTE_BOND_LOG(ERR, "Failed to update MAC address");
3204 		return -EINVAL;
3205 	}
3206 
3207 	return 0;
3208 }
3209 
3210 static int
bond_flow_ops_get(struct rte_eth_dev * dev __rte_unused,const struct rte_flow_ops ** ops)3211 bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3212 		  const struct rte_flow_ops **ops)
3213 {
3214 	*ops = &bond_flow_ops;
3215 	return 0;
3216 }
3217 
3218 static int
bond_ethdev_mac_addr_add(struct rte_eth_dev * dev,struct rte_ether_addr * mac_addr,__rte_unused uint32_t index,uint32_t vmdq)3219 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3220 			struct rte_ether_addr *mac_addr,
3221 			__rte_unused uint32_t index, uint32_t vmdq)
3222 {
3223 	struct rte_eth_dev *slave_eth_dev;
3224 	struct bond_dev_private *internals = dev->data->dev_private;
3225 	int ret, i;
3226 
3227 	rte_spinlock_lock(&internals->lock);
3228 
3229 	for (i = 0; i < internals->slave_count; i++) {
3230 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3231 		if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3232 			 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3233 			ret = -ENOTSUP;
3234 			goto end;
3235 		}
3236 	}
3237 
3238 	for (i = 0; i < internals->slave_count; i++) {
3239 		ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3240 				mac_addr, vmdq);
3241 		if (ret < 0) {
3242 			/* rollback */
3243 			for (i--; i >= 0; i--)
3244 				rte_eth_dev_mac_addr_remove(
3245 					internals->slaves[i].port_id, mac_addr);
3246 			goto end;
3247 		}
3248 	}
3249 
3250 	ret = 0;
3251 end:
3252 	rte_spinlock_unlock(&internals->lock);
3253 	return ret;
3254 }
3255 
3256 static void
bond_ethdev_mac_addr_remove(struct rte_eth_dev * dev,uint32_t index)3257 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3258 {
3259 	struct rte_eth_dev *slave_eth_dev;
3260 	struct bond_dev_private *internals = dev->data->dev_private;
3261 	int i;
3262 
3263 	rte_spinlock_lock(&internals->lock);
3264 
3265 	for (i = 0; i < internals->slave_count; i++) {
3266 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3267 		if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3268 			goto end;
3269 	}
3270 
3271 	struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3272 
3273 	for (i = 0; i < internals->slave_count; i++)
3274 		rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3275 				mac_addr);
3276 
3277 end:
3278 	rte_spinlock_unlock(&internals->lock);
3279 }
3280 
3281 const struct eth_dev_ops default_dev_ops = {
3282 	.dev_start            = bond_ethdev_start,
3283 	.dev_stop             = bond_ethdev_stop,
3284 	.dev_close            = bond_ethdev_close,
3285 	.dev_configure        = bond_ethdev_configure,
3286 	.dev_infos_get        = bond_ethdev_info,
3287 	.vlan_filter_set      = bond_ethdev_vlan_filter_set,
3288 	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
3289 	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
3290 	.rx_queue_release     = bond_ethdev_rx_queue_release,
3291 	.tx_queue_release     = bond_ethdev_tx_queue_release,
3292 	.link_update          = bond_ethdev_link_update,
3293 	.stats_get            = bond_ethdev_stats_get,
3294 	.stats_reset          = bond_ethdev_stats_reset,
3295 	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
3296 	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
3297 	.allmulticast_enable  = bond_ethdev_allmulticast_enable,
3298 	.allmulticast_disable = bond_ethdev_allmulticast_disable,
3299 	.reta_update          = bond_ethdev_rss_reta_update,
3300 	.reta_query           = bond_ethdev_rss_reta_query,
3301 	.rss_hash_update      = bond_ethdev_rss_hash_update,
3302 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3303 	.mtu_set              = bond_ethdev_mtu_set,
3304 	.mac_addr_set         = bond_ethdev_mac_address_set,
3305 	.mac_addr_add         = bond_ethdev_mac_addr_add,
3306 	.mac_addr_remove      = bond_ethdev_mac_addr_remove,
3307 	.flow_ops_get         = bond_flow_ops_get
3308 };
3309 
3310 static int
bond_alloc(struct rte_vdev_device * dev,uint8_t mode)3311 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3312 {
3313 	const char *name = rte_vdev_device_name(dev);
3314 	uint8_t socket_id = dev->device.numa_node;
3315 	struct bond_dev_private *internals = NULL;
3316 	struct rte_eth_dev *eth_dev = NULL;
3317 	uint32_t vlan_filter_bmp_size;
3318 
3319 	/* now do all data allocation - for eth_dev structure, dummy pci driver
3320 	 * and internal (private) data
3321 	 */
3322 
3323 	/* reserve an ethdev entry */
3324 	eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3325 	if (eth_dev == NULL) {
3326 		RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3327 		goto err;
3328 	}
3329 
3330 	internals = eth_dev->data->dev_private;
3331 	eth_dev->data->nb_rx_queues = (uint16_t)1;
3332 	eth_dev->data->nb_tx_queues = (uint16_t)1;
3333 
3334 	/* Allocate memory for storing MAC addresses */
3335 	eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3336 			BOND_MAX_MAC_ADDRS, 0, socket_id);
3337 	if (eth_dev->data->mac_addrs == NULL) {
3338 		RTE_BOND_LOG(ERR,
3339 			     "Failed to allocate %u bytes needed to store MAC addresses",
3340 			     RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3341 		goto err;
3342 	}
3343 
3344 	eth_dev->dev_ops = &default_dev_ops;
3345 	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3346 					RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3347 
3348 	rte_spinlock_init(&internals->lock);
3349 	rte_spinlock_init(&internals->lsc_lock);
3350 
3351 	internals->port_id = eth_dev->data->port_id;
3352 	internals->mode = BONDING_MODE_INVALID;
3353 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3354 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3355 	internals->burst_xmit_hash = burst_xmit_l2_hash;
3356 	internals->user_defined_mac = 0;
3357 
3358 	internals->link_status_polling_enabled = 0;
3359 
3360 	internals->link_status_polling_interval_ms =
3361 		DEFAULT_POLLING_INTERVAL_10_MS;
3362 	internals->link_down_delay_ms = 0;
3363 	internals->link_up_delay_ms = 0;
3364 
3365 	internals->slave_count = 0;
3366 	internals->active_slave_count = 0;
3367 	internals->rx_offload_capa = 0;
3368 	internals->tx_offload_capa = 0;
3369 	internals->rx_queue_offload_capa = 0;
3370 	internals->tx_queue_offload_capa = 0;
3371 	internals->candidate_max_rx_pktlen = 0;
3372 	internals->max_rx_pktlen = 0;
3373 
3374 	/* Initially allow to choose any offload type */
3375 	internals->flow_type_rss_offloads = RTE_ETH_RSS_PROTO_MASK;
3376 
3377 	memset(&internals->default_rxconf, 0,
3378 	       sizeof(internals->default_rxconf));
3379 	memset(&internals->default_txconf, 0,
3380 	       sizeof(internals->default_txconf));
3381 
3382 	memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3383 	memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3384 
3385 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3386 	memset(internals->slaves, 0, sizeof(internals->slaves));
3387 
3388 	TAILQ_INIT(&internals->flow_list);
3389 	internals->flow_isolated_valid = 0;
3390 
3391 	/* Set mode 4 default configuration */
3392 	bond_mode_8023ad_setup(eth_dev, NULL);
3393 	if (bond_ethdev_mode_set(eth_dev, mode)) {
3394 		RTE_BOND_LOG(ERR, "Failed to set bonded device %u mode to %u",
3395 				 eth_dev->data->port_id, mode);
3396 		goto err;
3397 	}
3398 
3399 	vlan_filter_bmp_size =
3400 		rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3401 	internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3402 						   RTE_CACHE_LINE_SIZE);
3403 	if (internals->vlan_filter_bmpmem == NULL) {
3404 		RTE_BOND_LOG(ERR,
3405 			     "Failed to allocate vlan bitmap for bonded device %u",
3406 			     eth_dev->data->port_id);
3407 		goto err;
3408 	}
3409 
3410 	internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3411 			internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3412 	if (internals->vlan_filter_bmp == NULL) {
3413 		RTE_BOND_LOG(ERR,
3414 			     "Failed to init vlan bitmap for bonded device %u",
3415 			     eth_dev->data->port_id);
3416 		rte_free(internals->vlan_filter_bmpmem);
3417 		goto err;
3418 	}
3419 
3420 	return eth_dev->data->port_id;
3421 
3422 err:
3423 	rte_free(internals);
3424 	if (eth_dev != NULL)
3425 		eth_dev->data->dev_private = NULL;
3426 	rte_eth_dev_release_port(eth_dev);
3427 	return -1;
3428 }
3429 
3430 static int
bond_probe(struct rte_vdev_device * dev)3431 bond_probe(struct rte_vdev_device *dev)
3432 {
3433 	const char *name;
3434 	struct bond_dev_private *internals;
3435 	struct rte_kvargs *kvlist;
3436 	uint8_t bonding_mode;
3437 	int arg_count, port_id;
3438 	int socket_id;
3439 	uint8_t agg_mode;
3440 	struct rte_eth_dev *eth_dev;
3441 
3442 	if (!dev)
3443 		return -EINVAL;
3444 
3445 	name = rte_vdev_device_name(dev);
3446 	RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3447 
3448 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3449 		eth_dev = rte_eth_dev_attach_secondary(name);
3450 		if (!eth_dev) {
3451 			RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3452 			return -1;
3453 		}
3454 		/* TODO: request info from primary to set up Rx and Tx */
3455 		eth_dev->dev_ops = &default_dev_ops;
3456 		eth_dev->device = &dev->device;
3457 		rte_eth_dev_probing_finish(eth_dev);
3458 		return 0;
3459 	}
3460 
3461 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3462 		pmd_bond_init_valid_arguments);
3463 	if (kvlist == NULL) {
3464 		RTE_BOND_LOG(ERR, "Invalid args in %s", rte_vdev_device_args(dev));
3465 		return -1;
3466 	}
3467 
3468 	/* Parse link bonding mode */
3469 	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3470 		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3471 				&bond_ethdev_parse_slave_mode_kvarg,
3472 				&bonding_mode) != 0) {
3473 			RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3474 					name);
3475 			goto parse_error;
3476 		}
3477 	} else {
3478 		RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3479 				"device %s", name);
3480 		goto parse_error;
3481 	}
3482 
3483 	/* Parse socket id to create bonding device on */
3484 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3485 	if (arg_count == 1) {
3486 		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3487 				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
3488 				!= 0) {
3489 			RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3490 					"bonded device %s", name);
3491 			goto parse_error;
3492 		}
3493 	} else if (arg_count > 1) {
3494 		RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3495 				"bonded device %s", name);
3496 		goto parse_error;
3497 	} else {
3498 		socket_id = rte_socket_id();
3499 	}
3500 
3501 	dev->device.numa_node = socket_id;
3502 
3503 	/* Create link bonding eth device */
3504 	port_id = bond_alloc(dev, bonding_mode);
3505 	if (port_id < 0) {
3506 		RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3507 				"socket %u.",	name, bonding_mode, socket_id);
3508 		goto parse_error;
3509 	}
3510 	internals = rte_eth_devices[port_id].data->dev_private;
3511 	internals->kvlist = kvlist;
3512 
3513 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3514 		if (rte_kvargs_process(kvlist,
3515 				PMD_BOND_AGG_MODE_KVARG,
3516 				&bond_ethdev_parse_slave_agg_mode_kvarg,
3517 				&agg_mode) != 0) {
3518 			RTE_BOND_LOG(ERR,
3519 					"Failed to parse agg selection mode for bonded device %s",
3520 					name);
3521 			goto parse_error;
3522 		}
3523 
3524 		if (internals->mode == BONDING_MODE_8023AD)
3525 			internals->mode4.agg_selection = agg_mode;
3526 	} else {
3527 		internals->mode4.agg_selection = AGG_STABLE;
3528 	}
3529 
3530 	rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3531 	RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3532 			"socket %u.",	name, port_id, bonding_mode, socket_id);
3533 	return 0;
3534 
3535 parse_error:
3536 	rte_kvargs_free(kvlist);
3537 
3538 	return -1;
3539 }
3540 
3541 static int
bond_remove(struct rte_vdev_device * dev)3542 bond_remove(struct rte_vdev_device *dev)
3543 {
3544 	struct rte_eth_dev *eth_dev;
3545 	struct bond_dev_private *internals;
3546 	const char *name;
3547 	int ret = 0;
3548 
3549 	if (!dev)
3550 		return -EINVAL;
3551 
3552 	name = rte_vdev_device_name(dev);
3553 	RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3554 
3555 	/* find an ethdev entry */
3556 	eth_dev = rte_eth_dev_allocated(name);
3557 	if (eth_dev == NULL)
3558 		return 0; /* port already released */
3559 
3560 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3561 		return rte_eth_dev_release_port(eth_dev);
3562 
3563 	RTE_ASSERT(eth_dev->device == &dev->device);
3564 
3565 	internals = eth_dev->data->dev_private;
3566 	if (internals->slave_count != 0)
3567 		return -EBUSY;
3568 
3569 	if (eth_dev->data->dev_started == 1) {
3570 		ret = bond_ethdev_stop(eth_dev);
3571 		bond_ethdev_close(eth_dev);
3572 	}
3573 	rte_eth_dev_release_port(eth_dev);
3574 
3575 	return ret;
3576 }
3577 
3578 /* this part will resolve the slave portids after all the other pdev and vdev
3579  * have been allocated */
3580 static int
bond_ethdev_configure(struct rte_eth_dev * dev)3581 bond_ethdev_configure(struct rte_eth_dev *dev)
3582 {
3583 	const char *name = dev->device->name;
3584 	struct bond_dev_private *internals = dev->data->dev_private;
3585 	struct rte_kvargs *kvlist = internals->kvlist;
3586 	int arg_count;
3587 	uint16_t port_id = dev - rte_eth_devices;
3588 	uint8_t agg_mode;
3589 
3590 	static const uint8_t default_rss_key[40] = {
3591 		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3592 		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3593 		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3594 		0xBE, 0xAC, 0x01, 0xFA
3595 	};
3596 
3597 	unsigned i, j;
3598 
3599 	/*
3600 	 * If RSS is enabled, fill table with default values and
3601 	 * set key to the value specified in port RSS configuration.
3602 	 * Fall back to default RSS key if the key is not specified
3603 	 */
3604 	if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
3605 		struct rte_eth_rss_conf *rss_conf =
3606 			&dev->data->dev_conf.rx_adv_conf.rss_conf;
3607 
3608 		if (internals->rss_key_len == 0) {
3609 			internals->rss_key_len = sizeof(default_rss_key);
3610 		}
3611 
3612 		if (rss_conf->rss_key != NULL) {
3613 			if (internals->rss_key_len > rss_conf->rss_key_len) {
3614 				RTE_BOND_LOG(ERR, "Invalid rss key length(%u)",
3615 						rss_conf->rss_key_len);
3616 				return -EINVAL;
3617 			}
3618 
3619 			memcpy(internals->rss_key, rss_conf->rss_key,
3620 			       internals->rss_key_len);
3621 		} else {
3622 			if (internals->rss_key_len > sizeof(default_rss_key)) {
3623 				/*
3624 				 * If the rss_key includes standard_rss_key and
3625 				 * extended_hash_key, the rss key length will be
3626 				 * larger than default rss key length, so it should
3627 				 * re-calculate the hash key.
3628 				 */
3629 				for (i = 0; i < internals->rss_key_len; i++)
3630 					internals->rss_key[i] = (uint8_t)rte_rand();
3631 			} else {
3632 				memcpy(internals->rss_key, default_rss_key,
3633 					internals->rss_key_len);
3634 			}
3635 		}
3636 
3637 		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3638 			internals->reta_conf[i].mask = ~0LL;
3639 			for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3640 				internals->reta_conf[i].reta[j] =
3641 						(i * RTE_ETH_RETA_GROUP_SIZE + j) %
3642 						dev->data->nb_rx_queues;
3643 		}
3644 	}
3645 
3646 	/* set the max_rx_pktlen */
3647 	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3648 
3649 	/*
3650 	 * if no kvlist, it means that this bonded device has been created
3651 	 * through the bonding api.
3652 	 */
3653 	if (!kvlist)
3654 		return 0;
3655 
3656 	/* Parse MAC address for bonded device */
3657 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3658 	if (arg_count == 1) {
3659 		struct rte_ether_addr bond_mac;
3660 
3661 		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3662 				       &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3663 			RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3664 				     name);
3665 			return -1;
3666 		}
3667 
3668 		/* Set MAC address */
3669 		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3670 			RTE_BOND_LOG(ERR,
3671 				     "Failed to set mac address on bonded device %s",
3672 				     name);
3673 			return -1;
3674 		}
3675 	} else if (arg_count > 1) {
3676 		RTE_BOND_LOG(ERR,
3677 			     "MAC address can be specified only once for bonded device %s",
3678 			     name);
3679 		return -1;
3680 	}
3681 
3682 	/* Parse/set balance mode transmit policy */
3683 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3684 	if (arg_count == 1) {
3685 		uint8_t xmit_policy;
3686 
3687 		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3688 				       &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3689 		    0) {
3690 			RTE_BOND_LOG(INFO,
3691 				     "Invalid xmit policy specified for bonded device %s",
3692 				     name);
3693 			return -1;
3694 		}
3695 
3696 		/* Set balance mode transmit policy*/
3697 		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3698 			RTE_BOND_LOG(ERR,
3699 				     "Failed to set balance xmit policy on bonded device %s",
3700 				     name);
3701 			return -1;
3702 		}
3703 	} else if (arg_count > 1) {
3704 		RTE_BOND_LOG(ERR,
3705 			     "Transmit policy can be specified only once for bonded device %s",
3706 			     name);
3707 		return -1;
3708 	}
3709 
3710 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3711 		if (rte_kvargs_process(kvlist,
3712 				       PMD_BOND_AGG_MODE_KVARG,
3713 				       &bond_ethdev_parse_slave_agg_mode_kvarg,
3714 				       &agg_mode) != 0) {
3715 			RTE_BOND_LOG(ERR,
3716 				     "Failed to parse agg selection mode for bonded device %s",
3717 				     name);
3718 		}
3719 		if (internals->mode == BONDING_MODE_8023AD) {
3720 			int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3721 					agg_mode);
3722 			if (ret < 0) {
3723 				RTE_BOND_LOG(ERR,
3724 					"Invalid args for agg selection set for bonded device %s",
3725 					name);
3726 				return -1;
3727 			}
3728 		}
3729 	}
3730 
3731 	/* Parse/add slave ports to bonded device */
3732 	if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3733 		struct bond_ethdev_slave_ports slave_ports;
3734 		unsigned i;
3735 
3736 		memset(&slave_ports, 0, sizeof(slave_ports));
3737 
3738 		if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3739 				       &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3740 			RTE_BOND_LOG(ERR,
3741 				     "Failed to parse slave ports for bonded device %s",
3742 				     name);
3743 			return -1;
3744 		}
3745 
3746 		for (i = 0; i < slave_ports.slave_count; i++) {
3747 			if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3748 				RTE_BOND_LOG(ERR,
3749 					     "Failed to add port %d as slave to bonded device %s",
3750 					     slave_ports.slaves[i], name);
3751 			}
3752 		}
3753 
3754 	} else {
3755 		RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3756 		return -1;
3757 	}
3758 
3759 	/* Parse/set primary slave port id*/
3760 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3761 	if (arg_count == 1) {
3762 		uint16_t primary_slave_port_id;
3763 
3764 		if (rte_kvargs_process(kvlist,
3765 				       PMD_BOND_PRIMARY_SLAVE_KVARG,
3766 				       &bond_ethdev_parse_primary_slave_port_id_kvarg,
3767 				       &primary_slave_port_id) < 0) {
3768 			RTE_BOND_LOG(INFO,
3769 				     "Invalid primary slave port id specified for bonded device %s",
3770 				     name);
3771 			return -1;
3772 		}
3773 
3774 		/* Set balance mode transmit policy*/
3775 		if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3776 		    != 0) {
3777 			RTE_BOND_LOG(ERR,
3778 				     "Failed to set primary slave port %d on bonded device %s",
3779 				     primary_slave_port_id, name);
3780 			return -1;
3781 		}
3782 	} else if (arg_count > 1) {
3783 		RTE_BOND_LOG(INFO,
3784 			     "Primary slave can be specified only once for bonded device %s",
3785 			     name);
3786 		return -1;
3787 	}
3788 
3789 	/* Parse link status monitor polling interval */
3790 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3791 	if (arg_count == 1) {
3792 		uint32_t lsc_poll_interval_ms;
3793 
3794 		if (rte_kvargs_process(kvlist,
3795 				       PMD_BOND_LSC_POLL_PERIOD_KVARG,
3796 				       &bond_ethdev_parse_time_ms_kvarg,
3797 				       &lsc_poll_interval_ms) < 0) {
3798 			RTE_BOND_LOG(INFO,
3799 				     "Invalid lsc polling interval value specified for bonded"
3800 				     " device %s", name);
3801 			return -1;
3802 		}
3803 
3804 		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3805 		    != 0) {
3806 			RTE_BOND_LOG(ERR,
3807 				     "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3808 				     lsc_poll_interval_ms, name);
3809 			return -1;
3810 		}
3811 	} else if (arg_count > 1) {
3812 		RTE_BOND_LOG(INFO,
3813 			     "LSC polling interval can be specified only once for bonded"
3814 			     " device %s", name);
3815 		return -1;
3816 	}
3817 
3818 	/* Parse link up interrupt propagation delay */
3819 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3820 	if (arg_count == 1) {
3821 		uint32_t link_up_delay_ms;
3822 
3823 		if (rte_kvargs_process(kvlist,
3824 				       PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3825 				       &bond_ethdev_parse_time_ms_kvarg,
3826 				       &link_up_delay_ms) < 0) {
3827 			RTE_BOND_LOG(INFO,
3828 				     "Invalid link up propagation delay value specified for"
3829 				     " bonded device %s", name);
3830 			return -1;
3831 		}
3832 
3833 		/* Set balance mode transmit policy*/
3834 		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3835 		    != 0) {
3836 			RTE_BOND_LOG(ERR,
3837 				     "Failed to set link up propagation delay (%u ms) on bonded"
3838 				     " device %s", link_up_delay_ms, name);
3839 			return -1;
3840 		}
3841 	} else if (arg_count > 1) {
3842 		RTE_BOND_LOG(INFO,
3843 			     "Link up propagation delay can be specified only once for"
3844 			     " bonded device %s", name);
3845 		return -1;
3846 	}
3847 
3848 	/* Parse link down interrupt propagation delay */
3849 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3850 	if (arg_count == 1) {
3851 		uint32_t link_down_delay_ms;
3852 
3853 		if (rte_kvargs_process(kvlist,
3854 				       PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3855 				       &bond_ethdev_parse_time_ms_kvarg,
3856 				       &link_down_delay_ms) < 0) {
3857 			RTE_BOND_LOG(INFO,
3858 				     "Invalid link down propagation delay value specified for"
3859 				     " bonded device %s", name);
3860 			return -1;
3861 		}
3862 
3863 		/* Set balance mode transmit policy*/
3864 		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3865 		    != 0) {
3866 			RTE_BOND_LOG(ERR,
3867 				     "Failed to set link down propagation delay (%u ms) on bonded device %s",
3868 				     link_down_delay_ms, name);
3869 			return -1;
3870 		}
3871 	} else if (arg_count > 1) {
3872 		RTE_BOND_LOG(INFO,
3873 			     "Link down propagation delay can be specified only once for  bonded device %s",
3874 			     name);
3875 		return -1;
3876 	}
3877 
3878 	/* configure slaves so we can pass mtu setting */
3879 	for (i = 0; i < internals->slave_count; i++) {
3880 		struct rte_eth_dev *slave_ethdev =
3881 				&(rte_eth_devices[internals->slaves[i].port_id]);
3882 		if (slave_configure(dev, slave_ethdev) != 0) {
3883 			RTE_BOND_LOG(ERR,
3884 				"bonded port (%d) failed to configure slave device (%d)",
3885 				dev->data->port_id,
3886 				internals->slaves[i].port_id);
3887 			return -1;
3888 		}
3889 	}
3890 	return 0;
3891 }
3892 
3893 struct rte_vdev_driver pmd_bond_drv = {
3894 	.probe = bond_probe,
3895 	.remove = bond_remove,
3896 };
3897 
3898 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3899 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3900 
3901 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3902 	"slave=<ifc> "
3903 	"primary=<ifc> "
3904 	"mode=[0-6] "
3905 	"xmit_policy=[l2 | l23 | l34] "
3906 	"agg_mode=[count | stable | bandwidth] "
3907 	"socket_id=<int> "
3908 	"mac=<mac addr> "
3909 	"lsc_poll_period_ms=<int> "
3910 	"up_delay=<int> "
3911 	"down_delay=<int>");
3912 
3913 /* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for
3914  * this library, see meson.build.
3915  */
3916 RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);
3917