1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7 
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22 
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
26 
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30 
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32 
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35 
36 static inline size_t
get_vlan_offset(struct rte_ether_hdr * eth_hdr,uint16_t * proto)37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39 	size_t vlan_offset = 0;
40 
41 	if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 		rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 		struct rte_vlan_hdr *vlan_hdr =
44 			(struct rte_vlan_hdr *)(eth_hdr + 1);
45 
46 		vlan_offset = sizeof(struct rte_vlan_hdr);
47 		*proto = vlan_hdr->eth_proto;
48 
49 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 			vlan_hdr = vlan_hdr + 1;
51 			*proto = vlan_hdr->eth_proto;
52 			vlan_offset += sizeof(struct rte_vlan_hdr);
53 		}
54 	}
55 	return vlan_offset;
56 }
57 
58 static uint16_t
bond_ethdev_rx_burst(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61 	struct bond_dev_private *internals;
62 
63 	uint16_t num_rx_total = 0;
64 	uint16_t slave_count;
65 	uint16_t active_slave;
66 	int i;
67 
68 	/* Cast to structure, containing bonded device's port id and queue id */
69 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 	internals = bd_rx_q->dev_private;
71 	slave_count = internals->active_slave_count;
72 	active_slave = bd_rx_q->active_slave;
73 
74 	for (i = 0; i < slave_count && nb_pkts; i++) {
75 		uint16_t num_rx_slave;
76 
77 		/* Offset of pointer to *bufs increases as packets are received
78 		 * from other slaves */
79 		num_rx_slave =
80 			rte_eth_rx_burst(internals->active_slaves[active_slave],
81 					 bd_rx_q->queue_id,
82 					 bufs + num_rx_total, nb_pkts);
83 		num_rx_total += num_rx_slave;
84 		nb_pkts -= num_rx_slave;
85 		if (++active_slave == slave_count)
86 			active_slave = 0;
87 	}
88 
89 	if (++bd_rx_q->active_slave >= slave_count)
90 		bd_rx_q->active_slave = 0;
91 	return num_rx_total;
92 }
93 
94 static uint16_t
bond_ethdev_rx_burst_active_backup(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 		uint16_t nb_pkts)
97 {
98 	struct bond_dev_private *internals;
99 
100 	/* Cast to structure, containing bonded device's port id and queue id */
101 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102 
103 	internals = bd_rx_q->dev_private;
104 
105 	return rte_eth_rx_burst(internals->current_primary_port,
106 			bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108 
109 static inline uint8_t
is_lacp_packets(uint16_t ethertype,uint8_t subtype,struct rte_mbuf * mbuf)110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112 	const uint16_t ether_type_slow_be =
113 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114 
115 	return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 		(ethertype == ether_type_slow_be &&
117 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119 
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123 
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 	.dst.addr_bytes = { 0 },
126 	.src.addr_bytes = { 0 },
127 	.type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129 
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 	.dst.addr_bytes = { 0 },
132 	.src.addr_bytes = { 0 },
133 	.type = 0xFFFF,
134 };
135 
136 static struct rte_flow_item flow_item_8023ad[] = {
137 	{
138 		.type = RTE_FLOW_ITEM_TYPE_ETH,
139 		.spec = &flow_item_eth_type_8023ad,
140 		.last = NULL,
141 		.mask = &flow_item_eth_mask_type_8023ad,
142 	},
143 	{
144 		.type = RTE_FLOW_ITEM_TYPE_END,
145 		.spec = NULL,
146 		.last = NULL,
147 		.mask = NULL,
148 	}
149 };
150 
151 const struct rte_flow_attr flow_attr_8023ad = {
152 	.group = 0,
153 	.priority = 0,
154 	.ingress = 1,
155 	.egress = 0,
156 	.reserved = 0,
157 };
158 
159 int
bond_ethdev_8023ad_flow_verify(struct rte_eth_dev * bond_dev,uint16_t slave_port)160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 		uint16_t slave_port) {
162 	struct rte_eth_dev_info slave_info;
163 	struct rte_flow_error error;
164 	struct bond_dev_private *internals = bond_dev->data->dev_private;
165 
166 	const struct rte_flow_action_queue lacp_queue_conf = {
167 		.index = 0,
168 	};
169 
170 	const struct rte_flow_action actions[] = {
171 		{
172 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 			.conf = &lacp_queue_conf
174 		},
175 		{
176 			.type = RTE_FLOW_ACTION_TYPE_END,
177 		}
178 	};
179 
180 	int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 			flow_item_8023ad, actions, &error);
182 	if (ret < 0) {
183 		RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 				__func__, error.message, slave_port,
185 				internals->mode4.dedicated_queues.rx_qid);
186 		return -1;
187 	}
188 
189 	ret = rte_eth_dev_info_get(slave_port, &slave_info);
190 	if (ret != 0) {
191 		RTE_BOND_LOG(ERR,
192 			"%s: Error during getting device (port %u) info: %s\n",
193 			__func__, slave_port, strerror(-ret));
194 
195 		return ret;
196 	}
197 
198 	if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 			slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
200 		RTE_BOND_LOG(ERR,
201 			"%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 			__func__, slave_port);
203 		return -1;
204 	}
205 
206 	return 0;
207 }
208 
209 int
bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id)210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 	struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 	struct bond_dev_private *internals = bond_dev->data->dev_private;
213 	struct rte_eth_dev_info bond_info;
214 	uint16_t idx;
215 	int ret;
216 
217 	/* Verify if all slaves in bonding supports flow director and */
218 	if (internals->slave_count > 0) {
219 		ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
220 		if (ret != 0) {
221 			RTE_BOND_LOG(ERR,
222 				"%s: Error during getting device (port %u) info: %s\n",
223 				__func__, bond_dev->data->port_id,
224 				strerror(-ret));
225 
226 			return ret;
227 		}
228 
229 		internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 		internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
231 
232 		for (idx = 0; idx < internals->slave_count; idx++) {
233 			if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 					internals->slaves[idx].port_id) != 0)
235 				return -1;
236 		}
237 	}
238 
239 	return 0;
240 }
241 
242 int
bond_ethdev_8023ad_flow_set(struct rte_eth_dev * bond_dev,uint16_t slave_port)243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
244 
245 	struct rte_flow_error error;
246 	struct bond_dev_private *internals = bond_dev->data->dev_private;
247 	struct rte_flow_action_queue lacp_queue_conf = {
248 		.index = internals->mode4.dedicated_queues.rx_qid,
249 	};
250 
251 	const struct rte_flow_action actions[] = {
252 		{
253 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 			.conf = &lacp_queue_conf
255 		},
256 		{
257 			.type = RTE_FLOW_ACTION_TYPE_END,
258 		}
259 	};
260 
261 	internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 			&flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 	if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 		RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 				"(slave_port=%d queue_id=%d)",
266 				error.message, slave_port,
267 				internals->mode4.dedicated_queues.rx_qid);
268 		return -1;
269 	}
270 
271 	return 0;
272 }
273 
274 static inline uint16_t
rx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts,bool dedicated_rxq)275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
276 		bool dedicated_rxq)
277 {
278 	/* Cast to structure, containing bonded device's port id and queue id */
279 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 	struct bond_dev_private *internals = bd_rx_q->dev_private;
281 	struct rte_eth_dev *bonded_eth_dev =
282 					&rte_eth_devices[internals->port_id];
283 	struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 	struct rte_ether_hdr *hdr;
285 
286 	const uint16_t ether_type_slow_be =
287 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 	uint16_t num_rx_total = 0;	/* Total number of received packets */
289 	uint16_t slaves[RTE_MAX_ETHPORTS];
290 	uint16_t slave_count, idx;
291 
292 	uint8_t collecting;  /* current slave collecting status */
293 	const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 	const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
295 	uint8_t subtype;
296 	uint16_t i;
297 	uint16_t j;
298 	uint16_t k;
299 
300 	/* Copy slave list to protect against slave up/down changes during tx
301 	 * bursting */
302 	slave_count = internals->active_slave_count;
303 	memcpy(slaves, internals->active_slaves,
304 			sizeof(internals->active_slaves[0]) * slave_count);
305 
306 	idx = bd_rx_q->active_slave;
307 	if (idx >= slave_count) {
308 		bd_rx_q->active_slave = 0;
309 		idx = 0;
310 	}
311 	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
312 		j = num_rx_total;
313 		collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
314 					 COLLECTING);
315 
316 		/* Read packets from this slave */
317 		num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 				&bufs[num_rx_total], nb_pkts - num_rx_total);
319 
320 		for (k = j; k < 2 && k < num_rx_total; k++)
321 			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
322 
323 		/* Handle slow protocol packets. */
324 		while (j < num_rx_total) {
325 			if (j + 3 < num_rx_total)
326 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
327 
328 			hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 			subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
330 
331 			/* Remove packet from array if:
332 			 * - it is slow packet but no dedicated rxq is present,
333 			 * - slave is not in collecting state,
334 			 * - bonding interface is not in promiscuous mode:
335 			 *   - packet is unicast and address does not match,
336 			 *   - packet is multicast and bonding interface
337 			 *     is not in allmulti,
338 			 */
339 			if (unlikely(
340 				(!dedicated_rxq &&
341 				 is_lacp_packets(hdr->ether_type, subtype,
342 						 bufs[j])) ||
343 				!collecting ||
344 				(!promisc &&
345 				 ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
346 				   !rte_is_same_ether_addr(bond_mac,
347 						       &hdr->d_addr)) ||
348 				  (!allmulti &&
349 				   rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
350 
351 				if (hdr->ether_type == ether_type_slow_be) {
352 					bond_mode_8023ad_handle_slow_pkt(
353 					    internals, slaves[idx], bufs[j]);
354 				} else
355 					rte_pktmbuf_free(bufs[j]);
356 
357 				/* Packet is managed by mode 4 or dropped, shift the array */
358 				num_rx_total--;
359 				if (j < num_rx_total) {
360 					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
361 						(num_rx_total - j));
362 				}
363 			} else
364 				j++;
365 		}
366 		if (unlikely(++idx == slave_count))
367 			idx = 0;
368 	}
369 
370 	if (++bd_rx_q->active_slave >= slave_count)
371 		bd_rx_q->active_slave = 0;
372 
373 	return num_rx_total;
374 }
375 
376 static uint16_t
bond_ethdev_rx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
378 		uint16_t nb_pkts)
379 {
380 	return rx_burst_8023ad(queue, bufs, nb_pkts, false);
381 }
382 
383 static uint16_t
bond_ethdev_rx_burst_8023ad_fast_queue(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
385 		uint16_t nb_pkts)
386 {
387 	return rx_burst_8023ad(queue, bufs, nb_pkts, true);
388 }
389 
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
393 
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
395 
396 static void
arp_op_name(uint16_t arp_op,char * buf,size_t buf_len)397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
398 {
399 	switch (arp_op) {
400 	case RTE_ARP_OP_REQUEST:
401 		strlcpy(buf, "ARP Request", buf_len);
402 		return;
403 	case RTE_ARP_OP_REPLY:
404 		strlcpy(buf, "ARP Reply", buf_len);
405 		return;
406 	case RTE_ARP_OP_REVREQUEST:
407 		strlcpy(buf, "Reverse ARP Request", buf_len);
408 		return;
409 	case RTE_ARP_OP_REVREPLY:
410 		strlcpy(buf, "Reverse ARP Reply", buf_len);
411 		return;
412 	case RTE_ARP_OP_INVREQUEST:
413 		strlcpy(buf, "Peer Identify Request", buf_len);
414 		return;
415 	case RTE_ARP_OP_INVREPLY:
416 		strlcpy(buf, "Peer Identify Reply", buf_len);
417 		return;
418 	default:
419 		break;
420 	}
421 	strlcpy(buf, "Unknown", buf_len);
422 	return;
423 }
424 #endif
425 #define MaxIPv4String	16
426 static void
ipv4_addr_to_dot(uint32_t be_ipv4_addr,char * buf,uint8_t buf_size)427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
428 {
429 	uint32_t ipv4_addr;
430 
431 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
434 		ipv4_addr & 0xFF);
435 }
436 
437 #define MAX_CLIENTS_NUMBER	128
438 uint8_t active_clients;
439 struct client_stats_t {
440 	uint16_t port;
441 	uint32_t ipv4_addr;
442 	uint32_t ipv4_rx_packets;
443 	uint32_t ipv4_tx_packets;
444 };
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
446 
447 static void
update_client_stats(uint32_t addr,uint16_t port,uint32_t * TXorRXindicator)448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
449 {
450 	int i = 0;
451 
452 	for (; i < MAX_CLIENTS_NUMBER; i++)	{
453 		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
454 			/* Just update RX packets number for this client */
455 			if (TXorRXindicator == &burstnumberRX)
456 				client_stats[i].ipv4_rx_packets++;
457 			else
458 				client_stats[i].ipv4_tx_packets++;
459 			return;
460 		}
461 	}
462 	/* We have a new client. Insert him to the table, and increment stats */
463 	if (TXorRXindicator == &burstnumberRX)
464 		client_stats[active_clients].ipv4_rx_packets++;
465 	else
466 		client_stats[active_clients].ipv4_tx_packets++;
467 	client_stats[active_clients].ipv4_addr = addr;
468 	client_stats[active_clients].port = port;
469 	active_clients++;
470 
471 }
472 
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 	rte_log(RTE_LOG_DEBUG, bond_logtype,				\
476 		"%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
477 		"DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
478 		info,							\
479 		port,							\
480 		eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
481 		eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
482 		eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
483 		src_ip,							\
484 		eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
485 		eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
486 		eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
487 		dst_ip,							\
488 		arp_op, ++burstnumber)
489 #endif
490 
491 static void
mode6_debug(const char __rte_unused * info,struct rte_ether_hdr * eth_h,uint16_t port,uint32_t __rte_unused * burstnumber)492 mode6_debug(const char __rte_unused *info,
493 	struct rte_ether_hdr *eth_h, uint16_t port,
494 	uint32_t __rte_unused *burstnumber)
495 {
496 	struct rte_ipv4_hdr *ipv4_h;
497 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
498 	struct rte_arp_hdr *arp_h;
499 	char dst_ip[16];
500 	char ArpOp[24];
501 	char buf[16];
502 #endif
503 	char src_ip[16];
504 
505 	uint16_t ether_type = eth_h->ether_type;
506 	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
507 
508 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
509 	strlcpy(buf, info, 16);
510 #endif
511 
512 	if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
513 		ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
514 		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
515 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
516 		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
517 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
518 #endif
519 		update_client_stats(ipv4_h->src_addr, port, burstnumber);
520 	}
521 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
522 	else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
523 		arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
524 		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
525 		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
526 		arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
527 				ArpOp, sizeof(ArpOp));
528 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
529 	}
530 #endif
531 }
532 #endif
533 
534 static uint16_t
bond_ethdev_rx_burst_alb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)535 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
536 {
537 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
538 	struct bond_dev_private *internals = bd_rx_q->dev_private;
539 	struct rte_ether_hdr *eth_h;
540 	uint16_t ether_type, offset;
541 	uint16_t nb_recv_pkts;
542 	int i;
543 
544 	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
545 
546 	for (i = 0; i < nb_recv_pkts; i++) {
547 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
548 		ether_type = eth_h->ether_type;
549 		offset = get_vlan_offset(eth_h, &ether_type);
550 
551 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
552 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
553 			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
554 #endif
555 			bond_mode_alb_arp_recv(eth_h, offset, internals);
556 		}
557 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
558 		else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
559 			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
560 #endif
561 	}
562 
563 	return nb_recv_pkts;
564 }
565 
566 static uint16_t
bond_ethdev_tx_burst_round_robin(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)567 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
568 		uint16_t nb_pkts)
569 {
570 	struct bond_dev_private *internals;
571 	struct bond_tx_queue *bd_tx_q;
572 
573 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
574 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
575 
576 	uint16_t num_of_slaves;
577 	uint16_t slaves[RTE_MAX_ETHPORTS];
578 
579 	uint16_t num_tx_total = 0, num_tx_slave;
580 
581 	static int slave_idx = 0;
582 	int i, cslave_idx = 0, tx_fail_total = 0;
583 
584 	bd_tx_q = (struct bond_tx_queue *)queue;
585 	internals = bd_tx_q->dev_private;
586 
587 	/* Copy slave list to protect against slave up/down changes during tx
588 	 * bursting */
589 	num_of_slaves = internals->active_slave_count;
590 	memcpy(slaves, internals->active_slaves,
591 			sizeof(internals->active_slaves[0]) * num_of_slaves);
592 
593 	if (num_of_slaves < 1)
594 		return num_tx_total;
595 
596 	/* Populate slaves mbuf with which packets are to be sent on it  */
597 	for (i = 0; i < nb_pkts; i++) {
598 		cslave_idx = (slave_idx + i) % num_of_slaves;
599 		slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
600 	}
601 
602 	/* increment current slave index so the next call to tx burst starts on the
603 	 * next slave */
604 	slave_idx = ++cslave_idx;
605 
606 	/* Send packet burst on each slave device */
607 	for (i = 0; i < num_of_slaves; i++) {
608 		if (slave_nb_pkts[i] > 0) {
609 			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
610 					slave_bufs[i], slave_nb_pkts[i]);
611 
612 			/* if tx burst fails move packets to end of bufs */
613 			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
614 				int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
615 
616 				tx_fail_total += tx_fail_slave;
617 
618 				memcpy(&bufs[nb_pkts - tx_fail_total],
619 				       &slave_bufs[i][num_tx_slave],
620 				       tx_fail_slave * sizeof(bufs[0]));
621 			}
622 			num_tx_total += num_tx_slave;
623 		}
624 	}
625 
626 	return num_tx_total;
627 }
628 
629 static uint16_t
bond_ethdev_tx_burst_active_backup(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)630 bond_ethdev_tx_burst_active_backup(void *queue,
631 		struct rte_mbuf **bufs, uint16_t nb_pkts)
632 {
633 	struct bond_dev_private *internals;
634 	struct bond_tx_queue *bd_tx_q;
635 
636 	bd_tx_q = (struct bond_tx_queue *)queue;
637 	internals = bd_tx_q->dev_private;
638 
639 	if (internals->active_slave_count < 1)
640 		return 0;
641 
642 	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
643 			bufs, nb_pkts);
644 }
645 
646 static inline uint16_t
ether_hash(struct rte_ether_hdr * eth_hdr)647 ether_hash(struct rte_ether_hdr *eth_hdr)
648 {
649 	unaligned_uint16_t *word_src_addr =
650 		(unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
651 	unaligned_uint16_t *word_dst_addr =
652 		(unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
653 
654 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
655 			(word_src_addr[1] ^ word_dst_addr[1]) ^
656 			(word_src_addr[2] ^ word_dst_addr[2]);
657 }
658 
659 static inline uint32_t
ipv4_hash(struct rte_ipv4_hdr * ipv4_hdr)660 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
661 {
662 	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
663 }
664 
665 static inline uint32_t
ipv6_hash(struct rte_ipv6_hdr * ipv6_hdr)666 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
667 {
668 	unaligned_uint32_t *word_src_addr =
669 		(unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
670 	unaligned_uint32_t *word_dst_addr =
671 		(unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
672 
673 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
674 			(word_src_addr[1] ^ word_dst_addr[1]) ^
675 			(word_src_addr[2] ^ word_dst_addr[2]) ^
676 			(word_src_addr[3] ^ word_dst_addr[3]);
677 }
678 
679 
680 void
burst_xmit_l2_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)681 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
682 		uint16_t slave_count, uint16_t *slaves)
683 {
684 	struct rte_ether_hdr *eth_hdr;
685 	uint32_t hash;
686 	int i;
687 
688 	for (i = 0; i < nb_pkts; i++) {
689 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
690 
691 		hash = ether_hash(eth_hdr);
692 
693 		slaves[i] = (hash ^= hash >> 8) % slave_count;
694 	}
695 }
696 
697 void
burst_xmit_l23_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)698 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
699 		uint16_t slave_count, uint16_t *slaves)
700 {
701 	uint16_t i;
702 	struct rte_ether_hdr *eth_hdr;
703 	uint16_t proto;
704 	size_t vlan_offset;
705 	uint32_t hash, l3hash;
706 
707 	for (i = 0; i < nb_pkts; i++) {
708 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
709 		l3hash = 0;
710 
711 		proto = eth_hdr->ether_type;
712 		hash = ether_hash(eth_hdr);
713 
714 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
715 
716 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
717 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
718 					((char *)(eth_hdr + 1) + vlan_offset);
719 			l3hash = ipv4_hash(ipv4_hdr);
720 
721 		} else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
722 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
723 					((char *)(eth_hdr + 1) + vlan_offset);
724 			l3hash = ipv6_hash(ipv6_hdr);
725 		}
726 
727 		hash = hash ^ l3hash;
728 		hash ^= hash >> 16;
729 		hash ^= hash >> 8;
730 
731 		slaves[i] = hash % slave_count;
732 	}
733 }
734 
735 void
burst_xmit_l34_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)736 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
737 		uint16_t slave_count, uint16_t *slaves)
738 {
739 	struct rte_ether_hdr *eth_hdr;
740 	uint16_t proto;
741 	size_t vlan_offset;
742 	int i;
743 
744 	struct rte_udp_hdr *udp_hdr;
745 	struct rte_tcp_hdr *tcp_hdr;
746 	uint32_t hash, l3hash, l4hash;
747 
748 	for (i = 0; i < nb_pkts; i++) {
749 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
750 		size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
751 		proto = eth_hdr->ether_type;
752 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
753 		l3hash = 0;
754 		l4hash = 0;
755 
756 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
757 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
758 					((char *)(eth_hdr + 1) + vlan_offset);
759 			size_t ip_hdr_offset;
760 
761 			l3hash = ipv4_hash(ipv4_hdr);
762 
763 			/* there is no L4 header in fragmented packet */
764 			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
765 								== 0)) {
766 				ip_hdr_offset = (ipv4_hdr->version_ihl
767 					& RTE_IPV4_HDR_IHL_MASK) *
768 					RTE_IPV4_IHL_MULTIPLIER;
769 
770 				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
771 					tcp_hdr = (struct rte_tcp_hdr *)
772 						((char *)ipv4_hdr +
773 							ip_hdr_offset);
774 					if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
775 							< pkt_end)
776 						l4hash = HASH_L4_PORTS(tcp_hdr);
777 				} else if (ipv4_hdr->next_proto_id ==
778 								IPPROTO_UDP) {
779 					udp_hdr = (struct rte_udp_hdr *)
780 						((char *)ipv4_hdr +
781 							ip_hdr_offset);
782 					if ((size_t)udp_hdr + sizeof(*udp_hdr)
783 							< pkt_end)
784 						l4hash = HASH_L4_PORTS(udp_hdr);
785 				}
786 			}
787 		} else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
788 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
789 					((char *)(eth_hdr + 1) + vlan_offset);
790 			l3hash = ipv6_hash(ipv6_hdr);
791 
792 			if (ipv6_hdr->proto == IPPROTO_TCP) {
793 				tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
794 				l4hash = HASH_L4_PORTS(tcp_hdr);
795 			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
796 				udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
797 				l4hash = HASH_L4_PORTS(udp_hdr);
798 			}
799 		}
800 
801 		hash = l3hash ^ l4hash;
802 		hash ^= hash >> 16;
803 		hash ^= hash >> 8;
804 
805 		slaves[i] = hash % slave_count;
806 	}
807 }
808 
809 struct bwg_slave {
810 	uint64_t bwg_left_int;
811 	uint64_t bwg_left_remainder;
812 	uint16_t slave;
813 };
814 
815 void
bond_tlb_activate_slave(struct bond_dev_private * internals)816 bond_tlb_activate_slave(struct bond_dev_private *internals) {
817 	int i;
818 
819 	for (i = 0; i < internals->active_slave_count; i++) {
820 		tlb_last_obytets[internals->active_slaves[i]] = 0;
821 	}
822 }
823 
824 static int
bandwidth_cmp(const void * a,const void * b)825 bandwidth_cmp(const void *a, const void *b)
826 {
827 	const struct bwg_slave *bwg_a = a;
828 	const struct bwg_slave *bwg_b = b;
829 	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
830 	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
831 			(int64_t)bwg_a->bwg_left_remainder;
832 	if (diff > 0)
833 		return 1;
834 	else if (diff < 0)
835 		return -1;
836 	else if (diff2 > 0)
837 		return 1;
838 	else if (diff2 < 0)
839 		return -1;
840 	else
841 		return 0;
842 }
843 
844 static void
bandwidth_left(uint16_t port_id,uint64_t load,uint8_t update_idx,struct bwg_slave * bwg_slave)845 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
846 		struct bwg_slave *bwg_slave)
847 {
848 	struct rte_eth_link link_status;
849 	int ret;
850 
851 	ret = rte_eth_link_get_nowait(port_id, &link_status);
852 	if (ret < 0) {
853 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
854 			     port_id, rte_strerror(-ret));
855 		return;
856 	}
857 	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
858 	if (link_bwg == 0)
859 		return;
860 	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
861 	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
862 	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
863 }
864 
865 static void
bond_ethdev_update_tlb_slave_cb(void * arg)866 bond_ethdev_update_tlb_slave_cb(void *arg)
867 {
868 	struct bond_dev_private *internals = arg;
869 	struct rte_eth_stats slave_stats;
870 	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
871 	uint16_t slave_count;
872 	uint64_t tx_bytes;
873 
874 	uint8_t update_stats = 0;
875 	uint16_t slave_id;
876 	uint16_t i;
877 
878 	internals->slave_update_idx++;
879 
880 
881 	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
882 		update_stats = 1;
883 
884 	for (i = 0; i < internals->active_slave_count; i++) {
885 		slave_id = internals->active_slaves[i];
886 		rte_eth_stats_get(slave_id, &slave_stats);
887 		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
888 		bandwidth_left(slave_id, tx_bytes,
889 				internals->slave_update_idx, &bwg_array[i]);
890 		bwg_array[i].slave = slave_id;
891 
892 		if (update_stats) {
893 			tlb_last_obytets[slave_id] = slave_stats.obytes;
894 		}
895 	}
896 
897 	if (update_stats == 1)
898 		internals->slave_update_idx = 0;
899 
900 	slave_count = i;
901 	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
902 	for (i = 0; i < slave_count; i++)
903 		internals->tlb_slaves_order[i] = bwg_array[i].slave;
904 
905 	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
906 			(struct bond_dev_private *)internals);
907 }
908 
909 static uint16_t
bond_ethdev_tx_burst_tlb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)910 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
911 {
912 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
913 	struct bond_dev_private *internals = bd_tx_q->dev_private;
914 
915 	struct rte_eth_dev *primary_port =
916 			&rte_eth_devices[internals->primary_port];
917 	uint16_t num_tx_total = 0;
918 	uint16_t i, j;
919 
920 	uint16_t num_of_slaves = internals->active_slave_count;
921 	uint16_t slaves[RTE_MAX_ETHPORTS];
922 
923 	struct rte_ether_hdr *ether_hdr;
924 	struct rte_ether_addr primary_slave_addr;
925 	struct rte_ether_addr active_slave_addr;
926 
927 	if (num_of_slaves < 1)
928 		return num_tx_total;
929 
930 	memcpy(slaves, internals->tlb_slaves_order,
931 				sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
932 
933 
934 	rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
935 
936 	if (nb_pkts > 3) {
937 		for (i = 0; i < 3; i++)
938 			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
939 	}
940 
941 	for (i = 0; i < num_of_slaves; i++) {
942 		rte_eth_macaddr_get(slaves[i], &active_slave_addr);
943 		for (j = num_tx_total; j < nb_pkts; j++) {
944 			if (j + 3 < nb_pkts)
945 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
946 
947 			ether_hdr = rte_pktmbuf_mtod(bufs[j],
948 						struct rte_ether_hdr *);
949 			if (rte_is_same_ether_addr(&ether_hdr->s_addr,
950 							&primary_slave_addr))
951 				rte_ether_addr_copy(&active_slave_addr,
952 						&ether_hdr->s_addr);
953 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
954 					mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
955 #endif
956 		}
957 
958 		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
959 				bufs + num_tx_total, nb_pkts - num_tx_total);
960 
961 		if (num_tx_total == nb_pkts)
962 			break;
963 	}
964 
965 	return num_tx_total;
966 }
967 
968 void
bond_tlb_disable(struct bond_dev_private * internals)969 bond_tlb_disable(struct bond_dev_private *internals)
970 {
971 	rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
972 }
973 
974 void
bond_tlb_enable(struct bond_dev_private * internals)975 bond_tlb_enable(struct bond_dev_private *internals)
976 {
977 	bond_ethdev_update_tlb_slave_cb(internals);
978 }
979 
980 static uint16_t
bond_ethdev_tx_burst_alb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)981 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
982 {
983 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
984 	struct bond_dev_private *internals = bd_tx_q->dev_private;
985 
986 	struct rte_ether_hdr *eth_h;
987 	uint16_t ether_type, offset;
988 
989 	struct client_data *client_info;
990 
991 	/*
992 	 * We create transmit buffers for every slave and one additional to send
993 	 * through tlb. In worst case every packet will be send on one port.
994 	 */
995 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
996 	uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
997 
998 	/*
999 	 * We create separate transmit buffers for update packets as they won't
1000 	 * be counted in num_tx_total.
1001 	 */
1002 	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1003 	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1004 
1005 	struct rte_mbuf *upd_pkt;
1006 	size_t pkt_size;
1007 
1008 	uint16_t num_send, num_not_send = 0;
1009 	uint16_t num_tx_total = 0;
1010 	uint16_t slave_idx;
1011 
1012 	int i, j;
1013 
1014 	/* Search tx buffer for ARP packets and forward them to alb */
1015 	for (i = 0; i < nb_pkts; i++) {
1016 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1017 		ether_type = eth_h->ether_type;
1018 		offset = get_vlan_offset(eth_h, &ether_type);
1019 
1020 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1021 			slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1022 
1023 			/* Change src mac in eth header */
1024 			rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1025 
1026 			/* Add packet to slave tx buffer */
1027 			slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1028 			slave_bufs_pkts[slave_idx]++;
1029 		} else {
1030 			/* If packet is not ARP, send it with TLB policy */
1031 			slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1032 					bufs[i];
1033 			slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1034 		}
1035 	}
1036 
1037 	/* Update connected client ARP tables */
1038 	if (internals->mode6.ntt) {
1039 		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1040 			client_info = &internals->mode6.client_table[i];
1041 
1042 			if (client_info->in_use) {
1043 				/* Allocate new packet to send ARP update on current slave */
1044 				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1045 				if (upd_pkt == NULL) {
1046 					RTE_BOND_LOG(ERR,
1047 						     "Failed to allocate ARP packet from pool");
1048 					continue;
1049 				}
1050 				pkt_size = sizeof(struct rte_ether_hdr) +
1051 					sizeof(struct rte_arp_hdr) +
1052 					client_info->vlan_count *
1053 					sizeof(struct rte_vlan_hdr);
1054 				upd_pkt->data_len = pkt_size;
1055 				upd_pkt->pkt_len = pkt_size;
1056 
1057 				slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1058 						internals);
1059 
1060 				/* Add packet to update tx buffer */
1061 				update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1062 				update_bufs_pkts[slave_idx]++;
1063 			}
1064 		}
1065 		internals->mode6.ntt = 0;
1066 	}
1067 
1068 	/* Send ARP packets on proper slaves */
1069 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1070 		if (slave_bufs_pkts[i] > 0) {
1071 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1072 					slave_bufs[i], slave_bufs_pkts[i]);
1073 			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1074 				bufs[nb_pkts - 1 - num_not_send - j] =
1075 						slave_bufs[i][nb_pkts - 1 - j];
1076 			}
1077 
1078 			num_tx_total += num_send;
1079 			num_not_send += slave_bufs_pkts[i] - num_send;
1080 
1081 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1082 	/* Print TX stats including update packets */
1083 			for (j = 0; j < slave_bufs_pkts[i]; j++) {
1084 				eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1085 							struct rte_ether_hdr *);
1086 				mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1087 			}
1088 #endif
1089 		}
1090 	}
1091 
1092 	/* Send update packets on proper slaves */
1093 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1094 		if (update_bufs_pkts[i] > 0) {
1095 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1096 					update_bufs_pkts[i]);
1097 			for (j = num_send; j < update_bufs_pkts[i]; j++) {
1098 				rte_pktmbuf_free(update_bufs[i][j]);
1099 			}
1100 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1101 			for (j = 0; j < update_bufs_pkts[i]; j++) {
1102 				eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1103 							struct rte_ether_hdr *);
1104 				mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1105 			}
1106 #endif
1107 		}
1108 	}
1109 
1110 	/* Send non-ARP packets using tlb policy */
1111 	if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1112 		num_send = bond_ethdev_tx_burst_tlb(queue,
1113 				slave_bufs[RTE_MAX_ETHPORTS],
1114 				slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1115 
1116 		for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1117 			bufs[nb_pkts - 1 - num_not_send - j] =
1118 					slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1119 		}
1120 
1121 		num_tx_total += num_send;
1122 	}
1123 
1124 	return num_tx_total;
1125 }
1126 
1127 static inline uint16_t
tx_burst_balance(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs,uint16_t * slave_port_ids,uint16_t slave_count)1128 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1129 		 uint16_t *slave_port_ids, uint16_t slave_count)
1130 {
1131 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1132 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1133 
1134 	/* Array to sort mbufs for transmission on each slave into */
1135 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1136 	/* Number of mbufs for transmission on each slave */
1137 	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1138 	/* Mapping array generated by hash function to map mbufs to slaves */
1139 	uint16_t bufs_slave_port_idxs[nb_bufs];
1140 
1141 	uint16_t slave_tx_count;
1142 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1143 
1144 	uint16_t i;
1145 
1146 	/*
1147 	 * Populate slaves mbuf with the packets which are to be sent on it
1148 	 * selecting output slave using hash based on xmit policy
1149 	 */
1150 	internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1151 			bufs_slave_port_idxs);
1152 
1153 	for (i = 0; i < nb_bufs; i++) {
1154 		/* Populate slave mbuf arrays with mbufs for that slave. */
1155 		uint16_t slave_idx = bufs_slave_port_idxs[i];
1156 
1157 		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1158 	}
1159 
1160 	/* Send packet burst on each slave device */
1161 	for (i = 0; i < slave_count; i++) {
1162 		if (slave_nb_bufs[i] == 0)
1163 			continue;
1164 
1165 		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1166 				bd_tx_q->queue_id, slave_bufs[i],
1167 				slave_nb_bufs[i]);
1168 
1169 		total_tx_count += slave_tx_count;
1170 
1171 		/* If tx burst fails move packets to end of bufs */
1172 		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1173 			int slave_tx_fail_count = slave_nb_bufs[i] -
1174 					slave_tx_count;
1175 			total_tx_fail_count += slave_tx_fail_count;
1176 			memcpy(&bufs[nb_bufs - total_tx_fail_count],
1177 			       &slave_bufs[i][slave_tx_count],
1178 			       slave_tx_fail_count * sizeof(bufs[0]));
1179 		}
1180 	}
1181 
1182 	return total_tx_count;
1183 }
1184 
1185 static uint16_t
bond_ethdev_tx_burst_balance(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1186 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1187 		uint16_t nb_bufs)
1188 {
1189 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1190 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1191 
1192 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1193 	uint16_t slave_count;
1194 
1195 	if (unlikely(nb_bufs == 0))
1196 		return 0;
1197 
1198 	/* Copy slave list to protect against slave up/down changes during tx
1199 	 * bursting
1200 	 */
1201 	slave_count = internals->active_slave_count;
1202 	if (unlikely(slave_count < 1))
1203 		return 0;
1204 
1205 	memcpy(slave_port_ids, internals->active_slaves,
1206 			sizeof(slave_port_ids[0]) * slave_count);
1207 	return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1208 				slave_count);
1209 }
1210 
1211 static inline uint16_t
tx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs,bool dedicated_txq)1212 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1213 		bool dedicated_txq)
1214 {
1215 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1216 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1217 
1218 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1219 	uint16_t slave_count;
1220 
1221 	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1222 	uint16_t dist_slave_count;
1223 
1224 	uint16_t slave_tx_count;
1225 
1226 	uint16_t i;
1227 
1228 	/* Copy slave list to protect against slave up/down changes during tx
1229 	 * bursting */
1230 	slave_count = internals->active_slave_count;
1231 	if (unlikely(slave_count < 1))
1232 		return 0;
1233 
1234 	memcpy(slave_port_ids, internals->active_slaves,
1235 			sizeof(slave_port_ids[0]) * slave_count);
1236 
1237 	if (dedicated_txq)
1238 		goto skip_tx_ring;
1239 
1240 	/* Check for LACP control packets and send if available */
1241 	for (i = 0; i < slave_count; i++) {
1242 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1243 		struct rte_mbuf *ctrl_pkt = NULL;
1244 
1245 		if (likely(rte_ring_empty(port->tx_ring)))
1246 			continue;
1247 
1248 		if (rte_ring_dequeue(port->tx_ring,
1249 				     (void **)&ctrl_pkt) != -ENOENT) {
1250 			slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1251 					bd_tx_q->queue_id, &ctrl_pkt, 1);
1252 			/*
1253 			 * re-enqueue LAG control plane packets to buffering
1254 			 * ring if transmission fails so the packet isn't lost.
1255 			 */
1256 			if (slave_tx_count != 1)
1257 				rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
1258 		}
1259 	}
1260 
1261 skip_tx_ring:
1262 	if (unlikely(nb_bufs == 0))
1263 		return 0;
1264 
1265 	dist_slave_count = 0;
1266 	for (i = 0; i < slave_count; i++) {
1267 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1268 
1269 		if (ACTOR_STATE(port, DISTRIBUTING))
1270 			dist_slave_port_ids[dist_slave_count++] =
1271 					slave_port_ids[i];
1272 	}
1273 
1274 	if (unlikely(dist_slave_count < 1))
1275 		return 0;
1276 
1277 	return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1278 				dist_slave_count);
1279 }
1280 
1281 static uint16_t
bond_ethdev_tx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1282 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1283 		uint16_t nb_bufs)
1284 {
1285 	return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1286 }
1287 
1288 static uint16_t
bond_ethdev_tx_burst_8023ad_fast_queue(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1289 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1290 		uint16_t nb_bufs)
1291 {
1292 	return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1293 }
1294 
1295 static uint16_t
bond_ethdev_tx_burst_broadcast(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)1296 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1297 		uint16_t nb_pkts)
1298 {
1299 	struct bond_dev_private *internals;
1300 	struct bond_tx_queue *bd_tx_q;
1301 
1302 	uint16_t slaves[RTE_MAX_ETHPORTS];
1303 	uint8_t tx_failed_flag = 0;
1304 	uint16_t num_of_slaves;
1305 
1306 	uint16_t max_nb_of_tx_pkts = 0;
1307 
1308 	int slave_tx_total[RTE_MAX_ETHPORTS];
1309 	int i, most_successful_tx_slave = -1;
1310 
1311 	bd_tx_q = (struct bond_tx_queue *)queue;
1312 	internals = bd_tx_q->dev_private;
1313 
1314 	/* Copy slave list to protect against slave up/down changes during tx
1315 	 * bursting */
1316 	num_of_slaves = internals->active_slave_count;
1317 	memcpy(slaves, internals->active_slaves,
1318 			sizeof(internals->active_slaves[0]) * num_of_slaves);
1319 
1320 	if (num_of_slaves < 1)
1321 		return 0;
1322 
1323 	/* Increment reference count on mbufs */
1324 	for (i = 0; i < nb_pkts; i++)
1325 		rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1326 
1327 	/* Transmit burst on each active slave */
1328 	for (i = 0; i < num_of_slaves; i++) {
1329 		slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1330 					bufs, nb_pkts);
1331 
1332 		if (unlikely(slave_tx_total[i] < nb_pkts))
1333 			tx_failed_flag = 1;
1334 
1335 		/* record the value and slave index for the slave which transmits the
1336 		 * maximum number of packets */
1337 		if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1338 			max_nb_of_tx_pkts = slave_tx_total[i];
1339 			most_successful_tx_slave = i;
1340 		}
1341 	}
1342 
1343 	/* if slaves fail to transmit packets from burst, the calling application
1344 	 * is not expected to know about multiple references to packets so we must
1345 	 * handle failures of all packets except those of the most successful slave
1346 	 */
1347 	if (unlikely(tx_failed_flag))
1348 		for (i = 0; i < num_of_slaves; i++)
1349 			if (i != most_successful_tx_slave)
1350 				while (slave_tx_total[i] < nb_pkts)
1351 					rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1352 
1353 	return max_nb_of_tx_pkts;
1354 }
1355 
1356 static void
link_properties_set(struct rte_eth_dev * ethdev,struct rte_eth_link * slave_link)1357 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1358 {
1359 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1360 
1361 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1362 		/**
1363 		 * If in mode 4 then save the link properties of the first
1364 		 * slave, all subsequent slaves must match these properties
1365 		 */
1366 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1367 
1368 		bond_link->link_autoneg = slave_link->link_autoneg;
1369 		bond_link->link_duplex = slave_link->link_duplex;
1370 		bond_link->link_speed = slave_link->link_speed;
1371 	} else {
1372 		/**
1373 		 * In any other mode the link properties are set to default
1374 		 * values of AUTONEG/DUPLEX
1375 		 */
1376 		ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1377 		ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1378 	}
1379 }
1380 
1381 static int
link_properties_valid(struct rte_eth_dev * ethdev,struct rte_eth_link * slave_link)1382 link_properties_valid(struct rte_eth_dev *ethdev,
1383 		struct rte_eth_link *slave_link)
1384 {
1385 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1386 
1387 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1388 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1389 
1390 		if (bond_link->link_duplex != slave_link->link_duplex ||
1391 			bond_link->link_autoneg != slave_link->link_autoneg ||
1392 			bond_link->link_speed != slave_link->link_speed)
1393 			return -1;
1394 	}
1395 
1396 	return 0;
1397 }
1398 
1399 int
mac_address_get(struct rte_eth_dev * eth_dev,struct rte_ether_addr * dst_mac_addr)1400 mac_address_get(struct rte_eth_dev *eth_dev,
1401 		struct rte_ether_addr *dst_mac_addr)
1402 {
1403 	struct rte_ether_addr *mac_addr;
1404 
1405 	if (eth_dev == NULL) {
1406 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1407 		return -1;
1408 	}
1409 
1410 	if (dst_mac_addr == NULL) {
1411 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1412 		return -1;
1413 	}
1414 
1415 	mac_addr = eth_dev->data->mac_addrs;
1416 
1417 	rte_ether_addr_copy(mac_addr, dst_mac_addr);
1418 	return 0;
1419 }
1420 
1421 int
mac_address_set(struct rte_eth_dev * eth_dev,struct rte_ether_addr * new_mac_addr)1422 mac_address_set(struct rte_eth_dev *eth_dev,
1423 		struct rte_ether_addr *new_mac_addr)
1424 {
1425 	struct rte_ether_addr *mac_addr;
1426 
1427 	if (eth_dev == NULL) {
1428 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1429 		return -1;
1430 	}
1431 
1432 	if (new_mac_addr == NULL) {
1433 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1434 		return -1;
1435 	}
1436 
1437 	mac_addr = eth_dev->data->mac_addrs;
1438 
1439 	/* If new MAC is different to current MAC then update */
1440 	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1441 		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1442 
1443 	return 0;
1444 }
1445 
1446 static const struct rte_ether_addr null_mac_addr;
1447 
1448 /*
1449  * Add additional MAC addresses to the slave
1450  */
1451 int
slave_add_mac_addresses(struct rte_eth_dev * bonded_eth_dev,uint16_t slave_port_id)1452 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453 		uint16_t slave_port_id)
1454 {
1455 	int i, ret;
1456 	struct rte_ether_addr *mac_addr;
1457 
1458 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1459 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1460 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1461 			break;
1462 
1463 		ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1464 		if (ret < 0) {
1465 			/* rollback */
1466 			for (i--; i > 0; i--)
1467 				rte_eth_dev_mac_addr_remove(slave_port_id,
1468 					&bonded_eth_dev->data->mac_addrs[i]);
1469 			return ret;
1470 		}
1471 	}
1472 
1473 	return 0;
1474 }
1475 
1476 /*
1477  * Remove additional MAC addresses from the slave
1478  */
1479 int
slave_remove_mac_addresses(struct rte_eth_dev * bonded_eth_dev,uint16_t slave_port_id)1480 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1481 		uint16_t slave_port_id)
1482 {
1483 	int i, rc, ret;
1484 	struct rte_ether_addr *mac_addr;
1485 
1486 	rc = 0;
1487 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1488 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1489 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1490 			break;
1491 
1492 		ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1493 		/* save only the first error */
1494 		if (ret < 0 && rc == 0)
1495 			rc = ret;
1496 	}
1497 
1498 	return rc;
1499 }
1500 
1501 int
mac_address_slaves_update(struct rte_eth_dev * bonded_eth_dev)1502 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1503 {
1504 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1505 	bool set;
1506 	int i;
1507 
1508 	/* Update slave devices MAC addresses */
1509 	if (internals->slave_count < 1)
1510 		return -1;
1511 
1512 	switch (internals->mode) {
1513 	case BONDING_MODE_ROUND_ROBIN:
1514 	case BONDING_MODE_BALANCE:
1515 	case BONDING_MODE_BROADCAST:
1516 		for (i = 0; i < internals->slave_count; i++) {
1517 			if (rte_eth_dev_default_mac_addr_set(
1518 					internals->slaves[i].port_id,
1519 					bonded_eth_dev->data->mac_addrs)) {
1520 				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1521 						internals->slaves[i].port_id);
1522 				return -1;
1523 			}
1524 		}
1525 		break;
1526 	case BONDING_MODE_8023AD:
1527 		bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1528 		break;
1529 	case BONDING_MODE_ACTIVE_BACKUP:
1530 	case BONDING_MODE_TLB:
1531 	case BONDING_MODE_ALB:
1532 	default:
1533 		set = true;
1534 		for (i = 0; i < internals->slave_count; i++) {
1535 			if (internals->slaves[i].port_id ==
1536 					internals->current_primary_port) {
1537 				if (rte_eth_dev_default_mac_addr_set(
1538 						internals->current_primary_port,
1539 						bonded_eth_dev->data->mac_addrs)) {
1540 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1541 							internals->current_primary_port);
1542 					set = false;
1543 				}
1544 			} else {
1545 				if (rte_eth_dev_default_mac_addr_set(
1546 						internals->slaves[i].port_id,
1547 						&internals->slaves[i].persisted_mac_addr)) {
1548 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1549 							internals->slaves[i].port_id);
1550 				}
1551 			}
1552 		}
1553 		if (!set)
1554 			return -1;
1555 	}
1556 
1557 	return 0;
1558 }
1559 
1560 int
bond_ethdev_mode_set(struct rte_eth_dev * eth_dev,int mode)1561 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1562 {
1563 	struct bond_dev_private *internals;
1564 
1565 	internals = eth_dev->data->dev_private;
1566 
1567 	switch (mode) {
1568 	case BONDING_MODE_ROUND_ROBIN:
1569 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1571 		break;
1572 	case BONDING_MODE_ACTIVE_BACKUP:
1573 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575 		break;
1576 	case BONDING_MODE_BALANCE:
1577 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 		break;
1580 	case BONDING_MODE_BROADCAST:
1581 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1583 		break;
1584 	case BONDING_MODE_8023AD:
1585 		if (bond_mode_8023ad_enable(eth_dev) != 0)
1586 			return -1;
1587 
1588 		if (internals->mode4.dedicated_queues.enabled == 0) {
1589 			eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590 			eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591 			RTE_BOND_LOG(WARNING,
1592 				"Using mode 4, it is necessary to do TX burst "
1593 				"and RX burst at least every 100ms.");
1594 		} else {
1595 			/* Use flow director's optimization */
1596 			eth_dev->rx_pkt_burst =
1597 					bond_ethdev_rx_burst_8023ad_fast_queue;
1598 			eth_dev->tx_pkt_burst =
1599 					bond_ethdev_tx_burst_8023ad_fast_queue;
1600 		}
1601 		break;
1602 	case BONDING_MODE_TLB:
1603 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1605 		break;
1606 	case BONDING_MODE_ALB:
1607 		if (bond_mode_alb_enable(eth_dev) != 0)
1608 			return -1;
1609 
1610 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1612 		break;
1613 	default:
1614 		return -1;
1615 	}
1616 
1617 	internals->mode = mode;
1618 
1619 	return 0;
1620 }
1621 
1622 
1623 static int
slave_configure_slow_queue(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1624 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625 		struct rte_eth_dev *slave_eth_dev)
1626 {
1627 	int errval = 0;
1628 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1629 	struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1630 
1631 	if (port->slow_pool == NULL) {
1632 		char mem_name[256];
1633 		int slave_id = slave_eth_dev->data->port_id;
1634 
1635 		snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1636 				slave_id);
1637 		port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1638 			250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1639 			slave_eth_dev->data->numa_node);
1640 
1641 		/* Any memory allocation failure in initialization is critical because
1642 		 * resources can't be free, so reinitialization is impossible. */
1643 		if (port->slow_pool == NULL) {
1644 			rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1645 				slave_id, mem_name, rte_strerror(rte_errno));
1646 		}
1647 	}
1648 
1649 	if (internals->mode4.dedicated_queues.enabled == 1) {
1650 		/* Configure slow Rx queue */
1651 
1652 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1653 				internals->mode4.dedicated_queues.rx_qid, 128,
1654 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1655 				NULL, port->slow_pool);
1656 		if (errval != 0) {
1657 			RTE_BOND_LOG(ERR,
1658 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1659 					slave_eth_dev->data->port_id,
1660 					internals->mode4.dedicated_queues.rx_qid,
1661 					errval);
1662 			return errval;
1663 		}
1664 
1665 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1666 				internals->mode4.dedicated_queues.tx_qid, 512,
1667 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1668 				NULL);
1669 		if (errval != 0) {
1670 			RTE_BOND_LOG(ERR,
1671 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1672 				slave_eth_dev->data->port_id,
1673 				internals->mode4.dedicated_queues.tx_qid,
1674 				errval);
1675 			return errval;
1676 		}
1677 	}
1678 	return 0;
1679 }
1680 
1681 int
slave_configure(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1682 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1683 		struct rte_eth_dev *slave_eth_dev)
1684 {
1685 	struct bond_rx_queue *bd_rx_q;
1686 	struct bond_tx_queue *bd_tx_q;
1687 	uint16_t nb_rx_queues;
1688 	uint16_t nb_tx_queues;
1689 
1690 	int errval;
1691 	uint16_t q_id;
1692 	struct rte_flow_error flow_error;
1693 
1694 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1695 
1696 	/* Stop slave */
1697 	errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1698 	if (errval != 0)
1699 		RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1700 			     slave_eth_dev->data->port_id, errval);
1701 
1702 	/* Enable interrupts on slave device if supported */
1703 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1704 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1705 
1706 	/* If RSS is enabled for bonding, try to enable it for slaves  */
1707 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1708 		if (internals->rss_key_len != 0) {
1709 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1710 					internals->rss_key_len;
1711 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1712 					internals->rss_key;
1713 		} else {
1714 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1715 		}
1716 
1717 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1718 				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1719 		slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1720 				bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1721 	}
1722 
1723 	if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1724 			DEV_RX_OFFLOAD_VLAN_FILTER)
1725 		slave_eth_dev->data->dev_conf.rxmode.offloads |=
1726 				DEV_RX_OFFLOAD_VLAN_FILTER;
1727 	else
1728 		slave_eth_dev->data->dev_conf.rxmode.offloads &=
1729 				~DEV_RX_OFFLOAD_VLAN_FILTER;
1730 
1731 	nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1732 	nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1733 
1734 	if (internals->mode == BONDING_MODE_8023AD) {
1735 		if (internals->mode4.dedicated_queues.enabled == 1) {
1736 			nb_rx_queues++;
1737 			nb_tx_queues++;
1738 		}
1739 	}
1740 
1741 	errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1742 				     bonded_eth_dev->data->mtu);
1743 	if (errval != 0 && errval != -ENOTSUP) {
1744 		RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1745 				slave_eth_dev->data->port_id, errval);
1746 		return errval;
1747 	}
1748 
1749 	/* Configure device */
1750 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1751 			nb_rx_queues, nb_tx_queues,
1752 			&(slave_eth_dev->data->dev_conf));
1753 	if (errval != 0) {
1754 		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1755 				slave_eth_dev->data->port_id, errval);
1756 		return errval;
1757 	}
1758 
1759 	/* Setup Rx Queues */
1760 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1761 		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1762 
1763 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1764 				bd_rx_q->nb_rx_desc,
1765 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1766 				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1767 		if (errval != 0) {
1768 			RTE_BOND_LOG(ERR,
1769 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1770 					slave_eth_dev->data->port_id, q_id, errval);
1771 			return errval;
1772 		}
1773 	}
1774 
1775 	/* Setup Tx Queues */
1776 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1777 		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1778 
1779 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1780 				bd_tx_q->nb_tx_desc,
1781 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1782 				&bd_tx_q->tx_conf);
1783 		if (errval != 0) {
1784 			RTE_BOND_LOG(ERR,
1785 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1786 				slave_eth_dev->data->port_id, q_id, errval);
1787 			return errval;
1788 		}
1789 	}
1790 
1791 	if (internals->mode == BONDING_MODE_8023AD &&
1792 			internals->mode4.dedicated_queues.enabled == 1) {
1793 		if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1794 				!= 0)
1795 			return errval;
1796 
1797 		if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1798 				slave_eth_dev->data->port_id) != 0) {
1799 			RTE_BOND_LOG(ERR,
1800 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1801 				slave_eth_dev->data->port_id, q_id, errval);
1802 			return -1;
1803 		}
1804 
1805 		if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1806 			rte_flow_destroy(slave_eth_dev->data->port_id,
1807 					internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1808 					&flow_error);
1809 
1810 		bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1811 				slave_eth_dev->data->port_id);
1812 	}
1813 
1814 	/* Start device */
1815 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1816 	if (errval != 0) {
1817 		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1818 				slave_eth_dev->data->port_id, errval);
1819 		return -1;
1820 	}
1821 
1822 	/* If RSS is enabled for bonding, synchronize RETA */
1823 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1824 		int i;
1825 		struct bond_dev_private *internals;
1826 
1827 		internals = bonded_eth_dev->data->dev_private;
1828 
1829 		for (i = 0; i < internals->slave_count; i++) {
1830 			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1831 				errval = rte_eth_dev_rss_reta_update(
1832 						slave_eth_dev->data->port_id,
1833 						&internals->reta_conf[0],
1834 						internals->slaves[i].reta_size);
1835 				if (errval != 0) {
1836 					RTE_BOND_LOG(WARNING,
1837 						     "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1838 						     " RSS Configuration for bonding may be inconsistent.",
1839 						     slave_eth_dev->data->port_id, errval);
1840 				}
1841 				break;
1842 			}
1843 		}
1844 	}
1845 
1846 	/* If lsc interrupt is set, check initial slave's link status */
1847 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1848 		slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1849 		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1850 			RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1851 			NULL);
1852 	}
1853 
1854 	return 0;
1855 }
1856 
1857 void
slave_remove(struct bond_dev_private * internals,struct rte_eth_dev * slave_eth_dev)1858 slave_remove(struct bond_dev_private *internals,
1859 		struct rte_eth_dev *slave_eth_dev)
1860 {
1861 	uint16_t i;
1862 
1863 	for (i = 0; i < internals->slave_count; i++)
1864 		if (internals->slaves[i].port_id ==
1865 				slave_eth_dev->data->port_id)
1866 			break;
1867 
1868 	if (i < (internals->slave_count - 1)) {
1869 		struct rte_flow *flow;
1870 
1871 		memmove(&internals->slaves[i], &internals->slaves[i + 1],
1872 				sizeof(internals->slaves[0]) *
1873 				(internals->slave_count - i - 1));
1874 		TAILQ_FOREACH(flow, &internals->flow_list, next) {
1875 			memmove(&flow->flows[i], &flow->flows[i + 1],
1876 				sizeof(flow->flows[0]) *
1877 				(internals->slave_count - i - 1));
1878 			flow->flows[internals->slave_count - 1] = NULL;
1879 		}
1880 	}
1881 
1882 	internals->slave_count--;
1883 
1884 	/* force reconfiguration of slave interfaces */
1885 	rte_eth_dev_internal_reset(slave_eth_dev);
1886 }
1887 
1888 static void
1889 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1890 
1891 void
slave_add(struct bond_dev_private * internals,struct rte_eth_dev * slave_eth_dev)1892 slave_add(struct bond_dev_private *internals,
1893 		struct rte_eth_dev *slave_eth_dev)
1894 {
1895 	struct bond_slave_details *slave_details =
1896 			&internals->slaves[internals->slave_count];
1897 
1898 	slave_details->port_id = slave_eth_dev->data->port_id;
1899 	slave_details->last_link_status = 0;
1900 
1901 	/* Mark slave devices that don't support interrupts so we can
1902 	 * compensate when we start the bond
1903 	 */
1904 	if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1905 		slave_details->link_status_poll_enabled = 1;
1906 	}
1907 
1908 	slave_details->link_status_wait_to_complete = 0;
1909 	/* clean tlb_last_obytes when adding port for bonding device */
1910 	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1911 			sizeof(struct rte_ether_addr));
1912 }
1913 
1914 void
bond_ethdev_primary_set(struct bond_dev_private * internals,uint16_t slave_port_id)1915 bond_ethdev_primary_set(struct bond_dev_private *internals,
1916 		uint16_t slave_port_id)
1917 {
1918 	int i;
1919 
1920 	if (internals->active_slave_count < 1)
1921 		internals->current_primary_port = slave_port_id;
1922 	else
1923 		/* Search bonded device slave ports for new proposed primary port */
1924 		for (i = 0; i < internals->active_slave_count; i++) {
1925 			if (internals->active_slaves[i] == slave_port_id)
1926 				internals->current_primary_port = slave_port_id;
1927 		}
1928 }
1929 
1930 static int
1931 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1932 
1933 static int
bond_ethdev_start(struct rte_eth_dev * eth_dev)1934 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1935 {
1936 	struct bond_dev_private *internals;
1937 	int i;
1938 
1939 	/* slave eth dev will be started by bonded device */
1940 	if (check_for_bonded_ethdev(eth_dev)) {
1941 		RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1942 				eth_dev->data->port_id);
1943 		return -1;
1944 	}
1945 
1946 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1947 	eth_dev->data->dev_started = 1;
1948 
1949 	internals = eth_dev->data->dev_private;
1950 
1951 	if (internals->slave_count == 0) {
1952 		RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1953 		goto out_err;
1954 	}
1955 
1956 	if (internals->user_defined_mac == 0) {
1957 		struct rte_ether_addr *new_mac_addr = NULL;
1958 
1959 		for (i = 0; i < internals->slave_count; i++)
1960 			if (internals->slaves[i].port_id == internals->primary_port)
1961 				new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1962 
1963 		if (new_mac_addr == NULL)
1964 			goto out_err;
1965 
1966 		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1967 			RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1968 					eth_dev->data->port_id);
1969 			goto out_err;
1970 		}
1971 	}
1972 
1973 	if (internals->mode == BONDING_MODE_8023AD) {
1974 		if (internals->mode4.dedicated_queues.enabled == 1) {
1975 			internals->mode4.dedicated_queues.rx_qid =
1976 					eth_dev->data->nb_rx_queues;
1977 			internals->mode4.dedicated_queues.tx_qid =
1978 					eth_dev->data->nb_tx_queues;
1979 		}
1980 	}
1981 
1982 
1983 	/* Reconfigure each slave device if starting bonded device */
1984 	for (i = 0; i < internals->slave_count; i++) {
1985 		struct rte_eth_dev *slave_ethdev =
1986 				&(rte_eth_devices[internals->slaves[i].port_id]);
1987 		if (slave_configure(eth_dev, slave_ethdev) != 0) {
1988 			RTE_BOND_LOG(ERR,
1989 				"bonded port (%d) failed to reconfigure slave device (%d)",
1990 				eth_dev->data->port_id,
1991 				internals->slaves[i].port_id);
1992 			goto out_err;
1993 		}
1994 		/* We will need to poll for link status if any slave doesn't
1995 		 * support interrupts
1996 		 */
1997 		if (internals->slaves[i].link_status_poll_enabled)
1998 			internals->link_status_polling_enabled = 1;
1999 	}
2000 
2001 	/* start polling if needed */
2002 	if (internals->link_status_polling_enabled) {
2003 		rte_eal_alarm_set(
2004 			internals->link_status_polling_interval_ms * 1000,
2005 			bond_ethdev_slave_link_status_change_monitor,
2006 			(void *)&rte_eth_devices[internals->port_id]);
2007 	}
2008 
2009 	/* Update all slave devices MACs*/
2010 	if (mac_address_slaves_update(eth_dev) != 0)
2011 		goto out_err;
2012 
2013 	if (internals->user_defined_primary_port)
2014 		bond_ethdev_primary_set(internals, internals->primary_port);
2015 
2016 	if (internals->mode == BONDING_MODE_8023AD)
2017 		bond_mode_8023ad_start(eth_dev);
2018 
2019 	if (internals->mode == BONDING_MODE_TLB ||
2020 			internals->mode == BONDING_MODE_ALB)
2021 		bond_tlb_enable(internals);
2022 
2023 	return 0;
2024 
2025 out_err:
2026 	eth_dev->data->dev_started = 0;
2027 	return -1;
2028 }
2029 
2030 static void
bond_ethdev_free_queues(struct rte_eth_dev * dev)2031 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2032 {
2033 	uint16_t i;
2034 
2035 	if (dev->data->rx_queues != NULL) {
2036 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
2037 			rte_free(dev->data->rx_queues[i]);
2038 			dev->data->rx_queues[i] = NULL;
2039 		}
2040 		dev->data->nb_rx_queues = 0;
2041 	}
2042 
2043 	if (dev->data->tx_queues != NULL) {
2044 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
2045 			rte_free(dev->data->tx_queues[i]);
2046 			dev->data->tx_queues[i] = NULL;
2047 		}
2048 		dev->data->nb_tx_queues = 0;
2049 	}
2050 }
2051 
2052 int
bond_ethdev_stop(struct rte_eth_dev * eth_dev)2053 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2054 {
2055 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2056 	uint16_t i;
2057 	int ret;
2058 
2059 	if (internals->mode == BONDING_MODE_8023AD) {
2060 		struct port *port;
2061 		void *pkt = NULL;
2062 
2063 		bond_mode_8023ad_stop(eth_dev);
2064 
2065 		/* Discard all messages to/from mode 4 state machines */
2066 		for (i = 0; i < internals->active_slave_count; i++) {
2067 			port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2068 
2069 			RTE_ASSERT(port->rx_ring != NULL);
2070 			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2071 				rte_pktmbuf_free(pkt);
2072 
2073 			RTE_ASSERT(port->tx_ring != NULL);
2074 			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2075 				rte_pktmbuf_free(pkt);
2076 		}
2077 	}
2078 
2079 	if (internals->mode == BONDING_MODE_TLB ||
2080 			internals->mode == BONDING_MODE_ALB) {
2081 		bond_tlb_disable(internals);
2082 		for (i = 0; i < internals->active_slave_count; i++)
2083 			tlb_last_obytets[internals->active_slaves[i]] = 0;
2084 	}
2085 
2086 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2087 	eth_dev->data->dev_started = 0;
2088 
2089 	internals->link_status_polling_enabled = 0;
2090 	for (i = 0; i < internals->slave_count; i++) {
2091 		uint16_t slave_id = internals->slaves[i].port_id;
2092 		if (find_slave_by_id(internals->active_slaves,
2093 				internals->active_slave_count, slave_id) !=
2094 						internals->active_slave_count) {
2095 			internals->slaves[i].last_link_status = 0;
2096 			ret = rte_eth_dev_stop(slave_id);
2097 			if (ret != 0) {
2098 				RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2099 					     slave_id);
2100 				return ret;
2101 			}
2102 			deactivate_slave(eth_dev, slave_id);
2103 		}
2104 	}
2105 
2106 	return 0;
2107 }
2108 
2109 int
bond_ethdev_close(struct rte_eth_dev * dev)2110 bond_ethdev_close(struct rte_eth_dev *dev)
2111 {
2112 	struct bond_dev_private *internals = dev->data->dev_private;
2113 	uint16_t bond_port_id = internals->port_id;
2114 	int skipped = 0;
2115 	struct rte_flow_error ferror;
2116 
2117 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2118 		return 0;
2119 
2120 	RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2121 	while (internals->slave_count != skipped) {
2122 		uint16_t port_id = internals->slaves[skipped].port_id;
2123 
2124 		if (rte_eth_dev_stop(port_id) != 0) {
2125 			RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2126 				     port_id);
2127 			skipped++;
2128 		}
2129 
2130 		if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2131 			RTE_BOND_LOG(ERR,
2132 				     "Failed to remove port %d from bonded device %s",
2133 				     port_id, dev->device->name);
2134 			skipped++;
2135 		}
2136 	}
2137 	bond_flow_ops.flush(dev, &ferror);
2138 	bond_ethdev_free_queues(dev);
2139 	rte_bitmap_reset(internals->vlan_filter_bmp);
2140 	rte_bitmap_free(internals->vlan_filter_bmp);
2141 	rte_free(internals->vlan_filter_bmpmem);
2142 
2143 	/* Try to release mempool used in mode6. If the bond
2144 	 * device is not mode6, free the NULL is not problem.
2145 	 */
2146 	rte_mempool_free(internals->mode6.mempool);
2147 
2148 	return 0;
2149 }
2150 
2151 /* forward declaration */
2152 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2153 
2154 static int
bond_ethdev_info(struct rte_eth_dev * dev,struct rte_eth_dev_info * dev_info)2155 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2156 {
2157 	struct bond_dev_private *internals = dev->data->dev_private;
2158 	struct bond_slave_details slave;
2159 	int ret;
2160 
2161 	uint16_t max_nb_rx_queues = UINT16_MAX;
2162 	uint16_t max_nb_tx_queues = UINT16_MAX;
2163 	uint16_t max_rx_desc_lim = UINT16_MAX;
2164 	uint16_t max_tx_desc_lim = UINT16_MAX;
2165 
2166 	dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2167 
2168 	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2169 			internals->candidate_max_rx_pktlen :
2170 			RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2171 
2172 	/* Max number of tx/rx queues that the bonded device can support is the
2173 	 * minimum values of the bonded slaves, as all slaves must be capable
2174 	 * of supporting the same number of tx/rx queues.
2175 	 */
2176 	if (internals->slave_count > 0) {
2177 		struct rte_eth_dev_info slave_info;
2178 		uint16_t idx;
2179 
2180 		for (idx = 0; idx < internals->slave_count; idx++) {
2181 			slave = internals->slaves[idx];
2182 			ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2183 			if (ret != 0) {
2184 				RTE_BOND_LOG(ERR,
2185 					"%s: Error during getting device (port %u) info: %s\n",
2186 					__func__,
2187 					slave.port_id,
2188 					strerror(-ret));
2189 
2190 				return ret;
2191 			}
2192 
2193 			if (slave_info.max_rx_queues < max_nb_rx_queues)
2194 				max_nb_rx_queues = slave_info.max_rx_queues;
2195 
2196 			if (slave_info.max_tx_queues < max_nb_tx_queues)
2197 				max_nb_tx_queues = slave_info.max_tx_queues;
2198 
2199 			if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2200 				max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2201 
2202 			if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2203 				max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2204 		}
2205 	}
2206 
2207 	dev_info->max_rx_queues = max_nb_rx_queues;
2208 	dev_info->max_tx_queues = max_nb_tx_queues;
2209 
2210 	memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2211 	       sizeof(dev_info->default_rxconf));
2212 	memcpy(&dev_info->default_txconf, &internals->default_txconf,
2213 	       sizeof(dev_info->default_txconf));
2214 
2215 	dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2216 	dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2217 
2218 	/**
2219 	 * If dedicated hw queues enabled for link bonding device in LACP mode
2220 	 * then we need to reduce the maximum number of data path queues by 1.
2221 	 */
2222 	if (internals->mode == BONDING_MODE_8023AD &&
2223 		internals->mode4.dedicated_queues.enabled == 1) {
2224 		dev_info->max_rx_queues--;
2225 		dev_info->max_tx_queues--;
2226 	}
2227 
2228 	dev_info->min_rx_bufsize = 0;
2229 
2230 	dev_info->rx_offload_capa = internals->rx_offload_capa;
2231 	dev_info->tx_offload_capa = internals->tx_offload_capa;
2232 	dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2233 	dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2234 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2235 
2236 	dev_info->reta_size = internals->reta_size;
2237 
2238 	return 0;
2239 }
2240 
2241 static int
bond_ethdev_vlan_filter_set(struct rte_eth_dev * dev,uint16_t vlan_id,int on)2242 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2243 {
2244 	int res;
2245 	uint16_t i;
2246 	struct bond_dev_private *internals = dev->data->dev_private;
2247 
2248 	/* don't do this while a slave is being added */
2249 	rte_spinlock_lock(&internals->lock);
2250 
2251 	if (on)
2252 		rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2253 	else
2254 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2255 
2256 	for (i = 0; i < internals->slave_count; i++) {
2257 		uint16_t port_id = internals->slaves[i].port_id;
2258 
2259 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2260 		if (res == ENOTSUP)
2261 			RTE_BOND_LOG(WARNING,
2262 				     "Setting VLAN filter on slave port %u not supported.",
2263 				     port_id);
2264 	}
2265 
2266 	rte_spinlock_unlock(&internals->lock);
2267 	return 0;
2268 }
2269 
2270 static int
bond_ethdev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t rx_queue_id,uint16_t nb_rx_desc,unsigned int socket_id __rte_unused,const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mb_pool)2271 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2272 		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2273 		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2274 {
2275 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2276 			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2277 					0, dev->data->numa_node);
2278 	if (bd_rx_q == NULL)
2279 		return -1;
2280 
2281 	bd_rx_q->queue_id = rx_queue_id;
2282 	bd_rx_q->dev_private = dev->data->dev_private;
2283 
2284 	bd_rx_q->nb_rx_desc = nb_rx_desc;
2285 
2286 	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2287 	bd_rx_q->mb_pool = mb_pool;
2288 
2289 	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2290 
2291 	return 0;
2292 }
2293 
2294 static int
bond_ethdev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t tx_queue_id,uint16_t nb_tx_desc,unsigned int socket_id __rte_unused,const struct rte_eth_txconf * tx_conf)2295 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2296 		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2297 		const struct rte_eth_txconf *tx_conf)
2298 {
2299 	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2300 			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2301 					0, dev->data->numa_node);
2302 
2303 	if (bd_tx_q == NULL)
2304 		return -1;
2305 
2306 	bd_tx_q->queue_id = tx_queue_id;
2307 	bd_tx_q->dev_private = dev->data->dev_private;
2308 
2309 	bd_tx_q->nb_tx_desc = nb_tx_desc;
2310 	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2311 
2312 	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2313 
2314 	return 0;
2315 }
2316 
2317 static void
bond_ethdev_rx_queue_release(void * queue)2318 bond_ethdev_rx_queue_release(void *queue)
2319 {
2320 	if (queue == NULL)
2321 		return;
2322 
2323 	rte_free(queue);
2324 }
2325 
2326 static void
bond_ethdev_tx_queue_release(void * queue)2327 bond_ethdev_tx_queue_release(void *queue)
2328 {
2329 	if (queue == NULL)
2330 		return;
2331 
2332 	rte_free(queue);
2333 }
2334 
2335 static void
bond_ethdev_slave_link_status_change_monitor(void * cb_arg)2336 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2337 {
2338 	struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2339 	struct bond_dev_private *internals;
2340 
2341 	/* Default value for polling slave found is true as we don't want to
2342 	 * disable the polling thread if we cannot get the lock */
2343 	int i, polling_slave_found = 1;
2344 
2345 	if (cb_arg == NULL)
2346 		return;
2347 
2348 	bonded_ethdev = cb_arg;
2349 	internals = bonded_ethdev->data->dev_private;
2350 
2351 	if (!bonded_ethdev->data->dev_started ||
2352 		!internals->link_status_polling_enabled)
2353 		return;
2354 
2355 	/* If device is currently being configured then don't check slaves link
2356 	 * status, wait until next period */
2357 	if (rte_spinlock_trylock(&internals->lock)) {
2358 		if (internals->slave_count > 0)
2359 			polling_slave_found = 0;
2360 
2361 		for (i = 0; i < internals->slave_count; i++) {
2362 			if (!internals->slaves[i].link_status_poll_enabled)
2363 				continue;
2364 
2365 			slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2366 			polling_slave_found = 1;
2367 
2368 			/* Update slave link status */
2369 			(*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2370 					internals->slaves[i].link_status_wait_to_complete);
2371 
2372 			/* if link status has changed since last checked then call lsc
2373 			 * event callback */
2374 			if (slave_ethdev->data->dev_link.link_status !=
2375 					internals->slaves[i].last_link_status) {
2376 				internals->slaves[i].last_link_status =
2377 						slave_ethdev->data->dev_link.link_status;
2378 
2379 				bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2380 						RTE_ETH_EVENT_INTR_LSC,
2381 						&bonded_ethdev->data->port_id,
2382 						NULL);
2383 			}
2384 		}
2385 		rte_spinlock_unlock(&internals->lock);
2386 	}
2387 
2388 	if (polling_slave_found)
2389 		/* Set alarm to continue monitoring link status of slave ethdev's */
2390 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2391 				bond_ethdev_slave_link_status_change_monitor, cb_arg);
2392 }
2393 
2394 static int
bond_ethdev_link_update(struct rte_eth_dev * ethdev,int wait_to_complete)2395 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2396 {
2397 	int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2398 
2399 	struct bond_dev_private *bond_ctx;
2400 	struct rte_eth_link slave_link;
2401 
2402 	bool one_link_update_succeeded;
2403 	uint32_t idx;
2404 	int ret;
2405 
2406 	bond_ctx = ethdev->data->dev_private;
2407 
2408 	ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2409 
2410 	if (ethdev->data->dev_started == 0 ||
2411 			bond_ctx->active_slave_count == 0) {
2412 		ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2413 		return 0;
2414 	}
2415 
2416 	ethdev->data->dev_link.link_status = ETH_LINK_UP;
2417 
2418 	if (wait_to_complete)
2419 		link_update = rte_eth_link_get;
2420 	else
2421 		link_update = rte_eth_link_get_nowait;
2422 
2423 	switch (bond_ctx->mode) {
2424 	case BONDING_MODE_BROADCAST:
2425 		/**
2426 		 * Setting link speed to UINT32_MAX to ensure we pick up the
2427 		 * value of the first active slave
2428 		 */
2429 		ethdev->data->dev_link.link_speed = UINT32_MAX;
2430 
2431 		/**
2432 		 * link speed is minimum value of all the slaves link speed as
2433 		 * packet loss will occur on this slave if transmission at rates
2434 		 * greater than this are attempted
2435 		 */
2436 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2437 			ret = link_update(bond_ctx->active_slaves[idx],
2438 					  &slave_link);
2439 			if (ret < 0) {
2440 				ethdev->data->dev_link.link_speed =
2441 					ETH_SPEED_NUM_NONE;
2442 				RTE_BOND_LOG(ERR,
2443 					"Slave (port %u) link get failed: %s",
2444 					bond_ctx->active_slaves[idx],
2445 					rte_strerror(-ret));
2446 				return 0;
2447 			}
2448 
2449 			if (slave_link.link_speed <
2450 					ethdev->data->dev_link.link_speed)
2451 				ethdev->data->dev_link.link_speed =
2452 						slave_link.link_speed;
2453 		}
2454 		break;
2455 	case BONDING_MODE_ACTIVE_BACKUP:
2456 		/* Current primary slave */
2457 		ret = link_update(bond_ctx->current_primary_port, &slave_link);
2458 		if (ret < 0) {
2459 			RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2460 				bond_ctx->current_primary_port,
2461 				rte_strerror(-ret));
2462 			return 0;
2463 		}
2464 
2465 		ethdev->data->dev_link.link_speed = slave_link.link_speed;
2466 		break;
2467 	case BONDING_MODE_8023AD:
2468 		ethdev->data->dev_link.link_autoneg =
2469 				bond_ctx->mode4.slave_link.link_autoneg;
2470 		ethdev->data->dev_link.link_duplex =
2471 				bond_ctx->mode4.slave_link.link_duplex;
2472 		/* fall through */
2473 		/* to update link speed */
2474 	case BONDING_MODE_ROUND_ROBIN:
2475 	case BONDING_MODE_BALANCE:
2476 	case BONDING_MODE_TLB:
2477 	case BONDING_MODE_ALB:
2478 	default:
2479 		/**
2480 		 * In theses mode the maximum theoretical link speed is the sum
2481 		 * of all the slaves
2482 		 */
2483 		ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2484 		one_link_update_succeeded = false;
2485 
2486 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2487 			ret = link_update(bond_ctx->active_slaves[idx],
2488 					&slave_link);
2489 			if (ret < 0) {
2490 				RTE_BOND_LOG(ERR,
2491 					"Slave (port %u) link get failed: %s",
2492 					bond_ctx->active_slaves[idx],
2493 					rte_strerror(-ret));
2494 				continue;
2495 			}
2496 
2497 			one_link_update_succeeded = true;
2498 			ethdev->data->dev_link.link_speed +=
2499 					slave_link.link_speed;
2500 		}
2501 
2502 		if (!one_link_update_succeeded) {
2503 			RTE_BOND_LOG(ERR, "All slaves link get failed");
2504 			return 0;
2505 		}
2506 	}
2507 
2508 
2509 	return 0;
2510 }
2511 
2512 
2513 static int
bond_ethdev_stats_get(struct rte_eth_dev * dev,struct rte_eth_stats * stats)2514 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2515 {
2516 	struct bond_dev_private *internals = dev->data->dev_private;
2517 	struct rte_eth_stats slave_stats;
2518 	int i, j;
2519 
2520 	for (i = 0; i < internals->slave_count; i++) {
2521 		rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2522 
2523 		stats->ipackets += slave_stats.ipackets;
2524 		stats->opackets += slave_stats.opackets;
2525 		stats->ibytes += slave_stats.ibytes;
2526 		stats->obytes += slave_stats.obytes;
2527 		stats->imissed += slave_stats.imissed;
2528 		stats->ierrors += slave_stats.ierrors;
2529 		stats->oerrors += slave_stats.oerrors;
2530 		stats->rx_nombuf += slave_stats.rx_nombuf;
2531 
2532 		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2533 			stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2534 			stats->q_opackets[j] += slave_stats.q_opackets[j];
2535 			stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2536 			stats->q_obytes[j] += slave_stats.q_obytes[j];
2537 			stats->q_errors[j] += slave_stats.q_errors[j];
2538 		}
2539 
2540 	}
2541 
2542 	return 0;
2543 }
2544 
2545 static int
bond_ethdev_stats_reset(struct rte_eth_dev * dev)2546 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2547 {
2548 	struct bond_dev_private *internals = dev->data->dev_private;
2549 	int i;
2550 	int err;
2551 	int ret;
2552 
2553 	for (i = 0, err = 0; i < internals->slave_count; i++) {
2554 		ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2555 		if (ret != 0)
2556 			err = ret;
2557 	}
2558 
2559 	return err;
2560 }
2561 
2562 static int
bond_ethdev_promiscuous_enable(struct rte_eth_dev * eth_dev)2563 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2564 {
2565 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2566 	int i;
2567 	int ret = 0;
2568 	uint16_t port_id;
2569 
2570 	switch (internals->mode) {
2571 	/* Promiscuous mode is propagated to all slaves */
2572 	case BONDING_MODE_ROUND_ROBIN:
2573 	case BONDING_MODE_BALANCE:
2574 	case BONDING_MODE_BROADCAST:
2575 	case BONDING_MODE_8023AD: {
2576 		unsigned int slave_ok = 0;
2577 
2578 		for (i = 0; i < internals->slave_count; i++) {
2579 			port_id = internals->slaves[i].port_id;
2580 
2581 			ret = rte_eth_promiscuous_enable(port_id);
2582 			if (ret != 0)
2583 				RTE_BOND_LOG(ERR,
2584 					"Failed to enable promiscuous mode for port %u: %s",
2585 					port_id, rte_strerror(-ret));
2586 			else
2587 				slave_ok++;
2588 		}
2589 		/*
2590 		 * Report success if operation is successful on at least
2591 		 * on one slave. Otherwise return last error code.
2592 		 */
2593 		if (slave_ok > 0)
2594 			ret = 0;
2595 		break;
2596 	}
2597 	/* Promiscuous mode is propagated only to primary slave */
2598 	case BONDING_MODE_ACTIVE_BACKUP:
2599 	case BONDING_MODE_TLB:
2600 	case BONDING_MODE_ALB:
2601 	default:
2602 		/* Do not touch promisc when there cannot be primary ports */
2603 		if (internals->slave_count == 0)
2604 			break;
2605 		port_id = internals->current_primary_port;
2606 		ret = rte_eth_promiscuous_enable(port_id);
2607 		if (ret != 0)
2608 			RTE_BOND_LOG(ERR,
2609 				"Failed to enable promiscuous mode for port %u: %s",
2610 				port_id, rte_strerror(-ret));
2611 	}
2612 
2613 	return ret;
2614 }
2615 
2616 static int
bond_ethdev_promiscuous_disable(struct rte_eth_dev * dev)2617 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2618 {
2619 	struct bond_dev_private *internals = dev->data->dev_private;
2620 	int i;
2621 	int ret = 0;
2622 	uint16_t port_id;
2623 
2624 	switch (internals->mode) {
2625 	/* Promiscuous mode is propagated to all slaves */
2626 	case BONDING_MODE_ROUND_ROBIN:
2627 	case BONDING_MODE_BALANCE:
2628 	case BONDING_MODE_BROADCAST:
2629 	case BONDING_MODE_8023AD: {
2630 		unsigned int slave_ok = 0;
2631 
2632 		for (i = 0; i < internals->slave_count; i++) {
2633 			port_id = internals->slaves[i].port_id;
2634 
2635 			if (internals->mode == BONDING_MODE_8023AD &&
2636 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2637 					BOND_8023AD_FORCED_PROMISC) {
2638 				slave_ok++;
2639 				continue;
2640 			}
2641 			ret = rte_eth_promiscuous_disable(port_id);
2642 			if (ret != 0)
2643 				RTE_BOND_LOG(ERR,
2644 					"Failed to disable promiscuous mode for port %u: %s",
2645 					port_id, rte_strerror(-ret));
2646 			else
2647 				slave_ok++;
2648 		}
2649 		/*
2650 		 * Report success if operation is successful on at least
2651 		 * on one slave. Otherwise return last error code.
2652 		 */
2653 		if (slave_ok > 0)
2654 			ret = 0;
2655 		break;
2656 	}
2657 	/* Promiscuous mode is propagated only to primary slave */
2658 	case BONDING_MODE_ACTIVE_BACKUP:
2659 	case BONDING_MODE_TLB:
2660 	case BONDING_MODE_ALB:
2661 	default:
2662 		/* Do not touch promisc when there cannot be primary ports */
2663 		if (internals->slave_count == 0)
2664 			break;
2665 		port_id = internals->current_primary_port;
2666 		ret = rte_eth_promiscuous_disable(port_id);
2667 		if (ret != 0)
2668 			RTE_BOND_LOG(ERR,
2669 				"Failed to disable promiscuous mode for port %u: %s",
2670 				port_id, rte_strerror(-ret));
2671 	}
2672 
2673 	return ret;
2674 }
2675 
2676 static int
bond_ethdev_allmulticast_enable(struct rte_eth_dev * eth_dev)2677 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2678 {
2679 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2680 	int i;
2681 	int ret = 0;
2682 	uint16_t port_id;
2683 
2684 	switch (internals->mode) {
2685 	/* allmulti mode is propagated to all slaves */
2686 	case BONDING_MODE_ROUND_ROBIN:
2687 	case BONDING_MODE_BALANCE:
2688 	case BONDING_MODE_BROADCAST:
2689 	case BONDING_MODE_8023AD: {
2690 		unsigned int slave_ok = 0;
2691 
2692 		for (i = 0; i < internals->slave_count; i++) {
2693 			port_id = internals->slaves[i].port_id;
2694 
2695 			ret = rte_eth_allmulticast_enable(port_id);
2696 			if (ret != 0)
2697 				RTE_BOND_LOG(ERR,
2698 					"Failed to enable allmulti mode for port %u: %s",
2699 					port_id, rte_strerror(-ret));
2700 			else
2701 				slave_ok++;
2702 		}
2703 		/*
2704 		 * Report success if operation is successful on at least
2705 		 * on one slave. Otherwise return last error code.
2706 		 */
2707 		if (slave_ok > 0)
2708 			ret = 0;
2709 		break;
2710 	}
2711 	/* allmulti mode is propagated only to primary slave */
2712 	case BONDING_MODE_ACTIVE_BACKUP:
2713 	case BONDING_MODE_TLB:
2714 	case BONDING_MODE_ALB:
2715 	default:
2716 		/* Do not touch allmulti when there cannot be primary ports */
2717 		if (internals->slave_count == 0)
2718 			break;
2719 		port_id = internals->current_primary_port;
2720 		ret = rte_eth_allmulticast_enable(port_id);
2721 		if (ret != 0)
2722 			RTE_BOND_LOG(ERR,
2723 				"Failed to enable allmulti mode for port %u: %s",
2724 				port_id, rte_strerror(-ret));
2725 	}
2726 
2727 	return ret;
2728 }
2729 
2730 static int
bond_ethdev_allmulticast_disable(struct rte_eth_dev * eth_dev)2731 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2732 {
2733 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2734 	int i;
2735 	int ret = 0;
2736 	uint16_t port_id;
2737 
2738 	switch (internals->mode) {
2739 	/* allmulti mode is propagated to all slaves */
2740 	case BONDING_MODE_ROUND_ROBIN:
2741 	case BONDING_MODE_BALANCE:
2742 	case BONDING_MODE_BROADCAST:
2743 	case BONDING_MODE_8023AD: {
2744 		unsigned int slave_ok = 0;
2745 
2746 		for (i = 0; i < internals->slave_count; i++) {
2747 			uint16_t port_id = internals->slaves[i].port_id;
2748 
2749 			if (internals->mode == BONDING_MODE_8023AD &&
2750 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2751 					BOND_8023AD_FORCED_ALLMULTI)
2752 				continue;
2753 
2754 			ret = rte_eth_allmulticast_disable(port_id);
2755 			if (ret != 0)
2756 				RTE_BOND_LOG(ERR,
2757 					"Failed to disable allmulti mode for port %u: %s",
2758 					port_id, rte_strerror(-ret));
2759 			else
2760 				slave_ok++;
2761 		}
2762 		/*
2763 		 * Report success if operation is successful on at least
2764 		 * on one slave. Otherwise return last error code.
2765 		 */
2766 		if (slave_ok > 0)
2767 			ret = 0;
2768 		break;
2769 	}
2770 	/* allmulti mode is propagated only to primary slave */
2771 	case BONDING_MODE_ACTIVE_BACKUP:
2772 	case BONDING_MODE_TLB:
2773 	case BONDING_MODE_ALB:
2774 	default:
2775 		/* Do not touch allmulti when there cannot be primary ports */
2776 		if (internals->slave_count == 0)
2777 			break;
2778 		port_id = internals->current_primary_port;
2779 		ret = rte_eth_allmulticast_disable(port_id);
2780 		if (ret != 0)
2781 			RTE_BOND_LOG(ERR,
2782 				"Failed to disable allmulti mode for port %u: %s",
2783 				port_id, rte_strerror(-ret));
2784 	}
2785 
2786 	return ret;
2787 }
2788 
2789 static void
bond_ethdev_delayed_lsc_propagation(void * arg)2790 bond_ethdev_delayed_lsc_propagation(void *arg)
2791 {
2792 	if (arg == NULL)
2793 		return;
2794 
2795 	rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2796 			RTE_ETH_EVENT_INTR_LSC, NULL);
2797 }
2798 
2799 int
bond_ethdev_lsc_event_callback(uint16_t port_id,enum rte_eth_event_type type,void * param,void * ret_param __rte_unused)2800 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2801 		void *param, void *ret_param __rte_unused)
2802 {
2803 	struct rte_eth_dev *bonded_eth_dev;
2804 	struct bond_dev_private *internals;
2805 	struct rte_eth_link link;
2806 	int rc = -1;
2807 	int ret;
2808 
2809 	uint8_t lsc_flag = 0;
2810 	int valid_slave = 0;
2811 	uint16_t active_pos;
2812 	uint16_t i;
2813 
2814 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2815 		return rc;
2816 
2817 	bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2818 
2819 	if (check_for_bonded_ethdev(bonded_eth_dev))
2820 		return rc;
2821 
2822 	internals = bonded_eth_dev->data->dev_private;
2823 
2824 	/* If the device isn't started don't handle interrupts */
2825 	if (!bonded_eth_dev->data->dev_started)
2826 		return rc;
2827 
2828 	/* verify that port_id is a valid slave of bonded port */
2829 	for (i = 0; i < internals->slave_count; i++) {
2830 		if (internals->slaves[i].port_id == port_id) {
2831 			valid_slave = 1;
2832 			break;
2833 		}
2834 	}
2835 
2836 	if (!valid_slave)
2837 		return rc;
2838 
2839 	/* Synchronize lsc callback parallel calls either by real link event
2840 	 * from the slaves PMDs or by the bonding PMD itself.
2841 	 */
2842 	rte_spinlock_lock(&internals->lsc_lock);
2843 
2844 	/* Search for port in active port list */
2845 	active_pos = find_slave_by_id(internals->active_slaves,
2846 			internals->active_slave_count, port_id);
2847 
2848 	ret = rte_eth_link_get_nowait(port_id, &link);
2849 	if (ret < 0)
2850 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2851 
2852 	if (ret == 0 && link.link_status) {
2853 		if (active_pos < internals->active_slave_count)
2854 			goto link_update;
2855 
2856 		/* check link state properties if bonded link is up*/
2857 		if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2858 			if (link_properties_valid(bonded_eth_dev, &link) != 0)
2859 				RTE_BOND_LOG(ERR, "Invalid link properties "
2860 					     "for slave %d in bonding mode %d",
2861 					     port_id, internals->mode);
2862 		} else {
2863 			/* inherit slave link properties */
2864 			link_properties_set(bonded_eth_dev, &link);
2865 		}
2866 
2867 		/* If no active slave ports then set this port to be
2868 		 * the primary port.
2869 		 */
2870 		if (internals->active_slave_count < 1) {
2871 			/* If first active slave, then change link status */
2872 			bonded_eth_dev->data->dev_link.link_status =
2873 								ETH_LINK_UP;
2874 			internals->current_primary_port = port_id;
2875 			lsc_flag = 1;
2876 
2877 			mac_address_slaves_update(bonded_eth_dev);
2878 		}
2879 
2880 		activate_slave(bonded_eth_dev, port_id);
2881 
2882 		/* If the user has defined the primary port then default to
2883 		 * using it.
2884 		 */
2885 		if (internals->user_defined_primary_port &&
2886 				internals->primary_port == port_id)
2887 			bond_ethdev_primary_set(internals, port_id);
2888 	} else {
2889 		if (active_pos == internals->active_slave_count)
2890 			goto link_update;
2891 
2892 		/* Remove from active slave list */
2893 		deactivate_slave(bonded_eth_dev, port_id);
2894 
2895 		if (internals->active_slave_count < 1)
2896 			lsc_flag = 1;
2897 
2898 		/* Update primary id, take first active slave from list or if none
2899 		 * available set to -1 */
2900 		if (port_id == internals->current_primary_port) {
2901 			if (internals->active_slave_count > 0)
2902 				bond_ethdev_primary_set(internals,
2903 						internals->active_slaves[0]);
2904 			else
2905 				internals->current_primary_port = internals->primary_port;
2906 			mac_address_slaves_update(bonded_eth_dev);
2907 		}
2908 	}
2909 
2910 link_update:
2911 	/**
2912 	 * Update bonded device link properties after any change to active
2913 	 * slaves
2914 	 */
2915 	bond_ethdev_link_update(bonded_eth_dev, 0);
2916 
2917 	if (lsc_flag) {
2918 		/* Cancel any possible outstanding interrupts if delays are enabled */
2919 		if (internals->link_up_delay_ms > 0 ||
2920 			internals->link_down_delay_ms > 0)
2921 			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2922 					bonded_eth_dev);
2923 
2924 		if (bonded_eth_dev->data->dev_link.link_status) {
2925 			if (internals->link_up_delay_ms > 0)
2926 				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2927 						bond_ethdev_delayed_lsc_propagation,
2928 						(void *)bonded_eth_dev);
2929 			else
2930 				rte_eth_dev_callback_process(bonded_eth_dev,
2931 						RTE_ETH_EVENT_INTR_LSC,
2932 						NULL);
2933 
2934 		} else {
2935 			if (internals->link_down_delay_ms > 0)
2936 				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2937 						bond_ethdev_delayed_lsc_propagation,
2938 						(void *)bonded_eth_dev);
2939 			else
2940 				rte_eth_dev_callback_process(bonded_eth_dev,
2941 						RTE_ETH_EVENT_INTR_LSC,
2942 						NULL);
2943 		}
2944 	}
2945 
2946 	rte_spinlock_unlock(&internals->lsc_lock);
2947 
2948 	return rc;
2949 }
2950 
2951 static int
bond_ethdev_rss_reta_update(struct rte_eth_dev * dev,struct rte_eth_rss_reta_entry64 * reta_conf,uint16_t reta_size)2952 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2953 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2954 {
2955 	unsigned i, j;
2956 	int result = 0;
2957 	int slave_reta_size;
2958 	unsigned reta_count;
2959 	struct bond_dev_private *internals = dev->data->dev_private;
2960 
2961 	if (reta_size != internals->reta_size)
2962 		return -EINVAL;
2963 
2964 	 /* Copy RETA table */
2965 	reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) /
2966 			RTE_RETA_GROUP_SIZE;
2967 
2968 	for (i = 0; i < reta_count; i++) {
2969 		internals->reta_conf[i].mask = reta_conf[i].mask;
2970 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2971 			if ((reta_conf[i].mask >> j) & 0x01)
2972 				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2973 	}
2974 
2975 	/* Fill rest of array */
2976 	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2977 		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2978 				sizeof(internals->reta_conf[0]) * reta_count);
2979 
2980 	/* Propagate RETA over slaves */
2981 	for (i = 0; i < internals->slave_count; i++) {
2982 		slave_reta_size = internals->slaves[i].reta_size;
2983 		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2984 				&internals->reta_conf[0], slave_reta_size);
2985 		if (result < 0)
2986 			return result;
2987 	}
2988 
2989 	return 0;
2990 }
2991 
2992 static int
bond_ethdev_rss_reta_query(struct rte_eth_dev * dev,struct rte_eth_rss_reta_entry64 * reta_conf,uint16_t reta_size)2993 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2994 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2995 {
2996 	int i, j;
2997 	struct bond_dev_private *internals = dev->data->dev_private;
2998 
2999 	if (reta_size != internals->reta_size)
3000 		return -EINVAL;
3001 
3002 	 /* Copy RETA table */
3003 	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
3004 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3005 			if ((reta_conf[i].mask >> j) & 0x01)
3006 				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3007 
3008 	return 0;
3009 }
3010 
3011 static int
bond_ethdev_rss_hash_update(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)3012 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3013 		struct rte_eth_rss_conf *rss_conf)
3014 {
3015 	int i, result = 0;
3016 	struct bond_dev_private *internals = dev->data->dev_private;
3017 	struct rte_eth_rss_conf bond_rss_conf;
3018 
3019 	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3020 
3021 	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3022 
3023 	if (bond_rss_conf.rss_hf != 0)
3024 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3025 
3026 	if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
3027 			sizeof(internals->rss_key)) {
3028 		if (bond_rss_conf.rss_key_len == 0)
3029 			bond_rss_conf.rss_key_len = 40;
3030 		internals->rss_key_len = bond_rss_conf.rss_key_len;
3031 		memcpy(internals->rss_key, bond_rss_conf.rss_key,
3032 				internals->rss_key_len);
3033 	}
3034 
3035 	for (i = 0; i < internals->slave_count; i++) {
3036 		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3037 				&bond_rss_conf);
3038 		if (result < 0)
3039 			return result;
3040 	}
3041 
3042 	return 0;
3043 }
3044 
3045 static int
bond_ethdev_rss_hash_conf_get(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)3046 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3047 		struct rte_eth_rss_conf *rss_conf)
3048 {
3049 	struct bond_dev_private *internals = dev->data->dev_private;
3050 
3051 	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3052 	rss_conf->rss_key_len = internals->rss_key_len;
3053 	if (rss_conf->rss_key)
3054 		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3055 
3056 	return 0;
3057 }
3058 
3059 static int
bond_ethdev_mtu_set(struct rte_eth_dev * dev,uint16_t mtu)3060 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3061 {
3062 	struct rte_eth_dev *slave_eth_dev;
3063 	struct bond_dev_private *internals = dev->data->dev_private;
3064 	int ret, i;
3065 
3066 	rte_spinlock_lock(&internals->lock);
3067 
3068 	for (i = 0; i < internals->slave_count; i++) {
3069 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3070 		if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3071 			rte_spinlock_unlock(&internals->lock);
3072 			return -ENOTSUP;
3073 		}
3074 	}
3075 	for (i = 0; i < internals->slave_count; i++) {
3076 		ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3077 		if (ret < 0) {
3078 			rte_spinlock_unlock(&internals->lock);
3079 			return ret;
3080 		}
3081 	}
3082 
3083 	rte_spinlock_unlock(&internals->lock);
3084 	return 0;
3085 }
3086 
3087 static int
bond_ethdev_mac_address_set(struct rte_eth_dev * dev,struct rte_ether_addr * addr)3088 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3089 			struct rte_ether_addr *addr)
3090 {
3091 	if (mac_address_set(dev, addr)) {
3092 		RTE_BOND_LOG(ERR, "Failed to update MAC address");
3093 		return -EINVAL;
3094 	}
3095 
3096 	return 0;
3097 }
3098 
3099 static int
bond_filter_ctrl(struct rte_eth_dev * dev __rte_unused,enum rte_filter_type type,enum rte_filter_op op,void * arg)3100 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
3101 		 enum rte_filter_type type, enum rte_filter_op op, void *arg)
3102 {
3103 	if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
3104 		*(const void **)arg = &bond_flow_ops;
3105 		return 0;
3106 	}
3107 	return -ENOTSUP;
3108 }
3109 
3110 static int
bond_ethdev_mac_addr_add(struct rte_eth_dev * dev,struct rte_ether_addr * mac_addr,__rte_unused uint32_t index,uint32_t vmdq)3111 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3112 			struct rte_ether_addr *mac_addr,
3113 			__rte_unused uint32_t index, uint32_t vmdq)
3114 {
3115 	struct rte_eth_dev *slave_eth_dev;
3116 	struct bond_dev_private *internals = dev->data->dev_private;
3117 	int ret, i;
3118 
3119 	rte_spinlock_lock(&internals->lock);
3120 
3121 	for (i = 0; i < internals->slave_count; i++) {
3122 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3123 		if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3124 			 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3125 			ret = -ENOTSUP;
3126 			goto end;
3127 		}
3128 	}
3129 
3130 	for (i = 0; i < internals->slave_count; i++) {
3131 		ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3132 				mac_addr, vmdq);
3133 		if (ret < 0) {
3134 			/* rollback */
3135 			for (i--; i >= 0; i--)
3136 				rte_eth_dev_mac_addr_remove(
3137 					internals->slaves[i].port_id, mac_addr);
3138 			goto end;
3139 		}
3140 	}
3141 
3142 	ret = 0;
3143 end:
3144 	rte_spinlock_unlock(&internals->lock);
3145 	return ret;
3146 }
3147 
3148 static void
bond_ethdev_mac_addr_remove(struct rte_eth_dev * dev,uint32_t index)3149 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3150 {
3151 	struct rte_eth_dev *slave_eth_dev;
3152 	struct bond_dev_private *internals = dev->data->dev_private;
3153 	int i;
3154 
3155 	rte_spinlock_lock(&internals->lock);
3156 
3157 	for (i = 0; i < internals->slave_count; i++) {
3158 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3159 		if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3160 			goto end;
3161 	}
3162 
3163 	struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3164 
3165 	for (i = 0; i < internals->slave_count; i++)
3166 		rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3167 				mac_addr);
3168 
3169 end:
3170 	rte_spinlock_unlock(&internals->lock);
3171 }
3172 
3173 const struct eth_dev_ops default_dev_ops = {
3174 	.dev_start            = bond_ethdev_start,
3175 	.dev_stop             = bond_ethdev_stop,
3176 	.dev_close            = bond_ethdev_close,
3177 	.dev_configure        = bond_ethdev_configure,
3178 	.dev_infos_get        = bond_ethdev_info,
3179 	.vlan_filter_set      = bond_ethdev_vlan_filter_set,
3180 	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
3181 	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
3182 	.rx_queue_release     = bond_ethdev_rx_queue_release,
3183 	.tx_queue_release     = bond_ethdev_tx_queue_release,
3184 	.link_update          = bond_ethdev_link_update,
3185 	.stats_get            = bond_ethdev_stats_get,
3186 	.stats_reset          = bond_ethdev_stats_reset,
3187 	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
3188 	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
3189 	.allmulticast_enable  = bond_ethdev_allmulticast_enable,
3190 	.allmulticast_disable = bond_ethdev_allmulticast_disable,
3191 	.reta_update          = bond_ethdev_rss_reta_update,
3192 	.reta_query           = bond_ethdev_rss_reta_query,
3193 	.rss_hash_update      = bond_ethdev_rss_hash_update,
3194 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3195 	.mtu_set              = bond_ethdev_mtu_set,
3196 	.mac_addr_set         = bond_ethdev_mac_address_set,
3197 	.mac_addr_add         = bond_ethdev_mac_addr_add,
3198 	.mac_addr_remove      = bond_ethdev_mac_addr_remove,
3199 	.filter_ctrl          = bond_filter_ctrl
3200 };
3201 
3202 static int
bond_alloc(struct rte_vdev_device * dev,uint8_t mode)3203 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3204 {
3205 	const char *name = rte_vdev_device_name(dev);
3206 	uint8_t socket_id = dev->device.numa_node;
3207 	struct bond_dev_private *internals = NULL;
3208 	struct rte_eth_dev *eth_dev = NULL;
3209 	uint32_t vlan_filter_bmp_size;
3210 
3211 	/* now do all data allocation - for eth_dev structure, dummy pci driver
3212 	 * and internal (private) data
3213 	 */
3214 
3215 	/* reserve an ethdev entry */
3216 	eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3217 	if (eth_dev == NULL) {
3218 		RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3219 		goto err;
3220 	}
3221 
3222 	internals = eth_dev->data->dev_private;
3223 	eth_dev->data->nb_rx_queues = (uint16_t)1;
3224 	eth_dev->data->nb_tx_queues = (uint16_t)1;
3225 
3226 	/* Allocate memory for storing MAC addresses */
3227 	eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3228 			BOND_MAX_MAC_ADDRS, 0, socket_id);
3229 	if (eth_dev->data->mac_addrs == NULL) {
3230 		RTE_BOND_LOG(ERR,
3231 			     "Failed to allocate %u bytes needed to store MAC addresses",
3232 			     RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3233 		goto err;
3234 	}
3235 
3236 	eth_dev->dev_ops = &default_dev_ops;
3237 	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3238 					RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3239 
3240 	rte_spinlock_init(&internals->lock);
3241 	rte_spinlock_init(&internals->lsc_lock);
3242 
3243 	internals->port_id = eth_dev->data->port_id;
3244 	internals->mode = BONDING_MODE_INVALID;
3245 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3246 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3247 	internals->burst_xmit_hash = burst_xmit_l2_hash;
3248 	internals->user_defined_mac = 0;
3249 
3250 	internals->link_status_polling_enabled = 0;
3251 
3252 	internals->link_status_polling_interval_ms =
3253 		DEFAULT_POLLING_INTERVAL_10_MS;
3254 	internals->link_down_delay_ms = 0;
3255 	internals->link_up_delay_ms = 0;
3256 
3257 	internals->slave_count = 0;
3258 	internals->active_slave_count = 0;
3259 	internals->rx_offload_capa = 0;
3260 	internals->tx_offload_capa = 0;
3261 	internals->rx_queue_offload_capa = 0;
3262 	internals->tx_queue_offload_capa = 0;
3263 	internals->candidate_max_rx_pktlen = 0;
3264 	internals->max_rx_pktlen = 0;
3265 
3266 	/* Initially allow to choose any offload type */
3267 	internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3268 
3269 	memset(&internals->default_rxconf, 0,
3270 	       sizeof(internals->default_rxconf));
3271 	memset(&internals->default_txconf, 0,
3272 	       sizeof(internals->default_txconf));
3273 
3274 	memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3275 	memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3276 
3277 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3278 	memset(internals->slaves, 0, sizeof(internals->slaves));
3279 
3280 	TAILQ_INIT(&internals->flow_list);
3281 	internals->flow_isolated_valid = 0;
3282 
3283 	/* Set mode 4 default configuration */
3284 	bond_mode_8023ad_setup(eth_dev, NULL);
3285 	if (bond_ethdev_mode_set(eth_dev, mode)) {
3286 		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3287 				 eth_dev->data->port_id, mode);
3288 		goto err;
3289 	}
3290 
3291 	vlan_filter_bmp_size =
3292 		rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3293 	internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3294 						   RTE_CACHE_LINE_SIZE);
3295 	if (internals->vlan_filter_bmpmem == NULL) {
3296 		RTE_BOND_LOG(ERR,
3297 			     "Failed to allocate vlan bitmap for bonded device %u",
3298 			     eth_dev->data->port_id);
3299 		goto err;
3300 	}
3301 
3302 	internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3303 			internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3304 	if (internals->vlan_filter_bmp == NULL) {
3305 		RTE_BOND_LOG(ERR,
3306 			     "Failed to init vlan bitmap for bonded device %u",
3307 			     eth_dev->data->port_id);
3308 		rte_free(internals->vlan_filter_bmpmem);
3309 		goto err;
3310 	}
3311 
3312 	return eth_dev->data->port_id;
3313 
3314 err:
3315 	rte_free(internals);
3316 	if (eth_dev != NULL)
3317 		eth_dev->data->dev_private = NULL;
3318 	rte_eth_dev_release_port(eth_dev);
3319 	return -1;
3320 }
3321 
3322 static int
bond_probe(struct rte_vdev_device * dev)3323 bond_probe(struct rte_vdev_device *dev)
3324 {
3325 	const char *name;
3326 	struct bond_dev_private *internals;
3327 	struct rte_kvargs *kvlist;
3328 	uint8_t bonding_mode, socket_id/*, agg_mode*/;
3329 	int  arg_count, port_id;
3330 	uint8_t agg_mode;
3331 	struct rte_eth_dev *eth_dev;
3332 
3333 	if (!dev)
3334 		return -EINVAL;
3335 
3336 	name = rte_vdev_device_name(dev);
3337 	RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3338 
3339 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3340 		eth_dev = rte_eth_dev_attach_secondary(name);
3341 		if (!eth_dev) {
3342 			RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3343 			return -1;
3344 		}
3345 		/* TODO: request info from primary to set up Rx and Tx */
3346 		eth_dev->dev_ops = &default_dev_ops;
3347 		eth_dev->device = &dev->device;
3348 		rte_eth_dev_probing_finish(eth_dev);
3349 		return 0;
3350 	}
3351 
3352 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3353 		pmd_bond_init_valid_arguments);
3354 	if (kvlist == NULL)
3355 		return -1;
3356 
3357 	/* Parse link bonding mode */
3358 	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3359 		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3360 				&bond_ethdev_parse_slave_mode_kvarg,
3361 				&bonding_mode) != 0) {
3362 			RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3363 					name);
3364 			goto parse_error;
3365 		}
3366 	} else {
3367 		RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3368 				"device %s", name);
3369 		goto parse_error;
3370 	}
3371 
3372 	/* Parse socket id to create bonding device on */
3373 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3374 	if (arg_count == 1) {
3375 		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3376 				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
3377 				!= 0) {
3378 			RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3379 					"bonded device %s", name);
3380 			goto parse_error;
3381 		}
3382 	} else if (arg_count > 1) {
3383 		RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3384 				"bonded device %s", name);
3385 		goto parse_error;
3386 	} else {
3387 		socket_id = rte_socket_id();
3388 	}
3389 
3390 	dev->device.numa_node = socket_id;
3391 
3392 	/* Create link bonding eth device */
3393 	port_id = bond_alloc(dev, bonding_mode);
3394 	if (port_id < 0) {
3395 		RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3396 				"socket %u.",	name, bonding_mode, socket_id);
3397 		goto parse_error;
3398 	}
3399 	internals = rte_eth_devices[port_id].data->dev_private;
3400 	internals->kvlist = kvlist;
3401 
3402 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3403 		if (rte_kvargs_process(kvlist,
3404 				PMD_BOND_AGG_MODE_KVARG,
3405 				&bond_ethdev_parse_slave_agg_mode_kvarg,
3406 				&agg_mode) != 0) {
3407 			RTE_BOND_LOG(ERR,
3408 					"Failed to parse agg selection mode for bonded device %s",
3409 					name);
3410 			goto parse_error;
3411 		}
3412 
3413 		if (internals->mode == BONDING_MODE_8023AD)
3414 			internals->mode4.agg_selection = agg_mode;
3415 	} else {
3416 		internals->mode4.agg_selection = AGG_STABLE;
3417 	}
3418 
3419 	rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3420 	RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3421 			"socket %u.",	name, port_id, bonding_mode, socket_id);
3422 	return 0;
3423 
3424 parse_error:
3425 	rte_kvargs_free(kvlist);
3426 
3427 	return -1;
3428 }
3429 
3430 static int
bond_remove(struct rte_vdev_device * dev)3431 bond_remove(struct rte_vdev_device *dev)
3432 {
3433 	struct rte_eth_dev *eth_dev;
3434 	struct bond_dev_private *internals;
3435 	const char *name;
3436 	int ret = 0;
3437 
3438 	if (!dev)
3439 		return -EINVAL;
3440 
3441 	name = rte_vdev_device_name(dev);
3442 	RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3443 
3444 	/* find an ethdev entry */
3445 	eth_dev = rte_eth_dev_allocated(name);
3446 	if (eth_dev == NULL)
3447 		return 0; /* port already released */
3448 
3449 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3450 		return rte_eth_dev_release_port(eth_dev);
3451 
3452 	RTE_ASSERT(eth_dev->device == &dev->device);
3453 
3454 	internals = eth_dev->data->dev_private;
3455 	if (internals->slave_count != 0)
3456 		return -EBUSY;
3457 
3458 	if (eth_dev->data->dev_started == 1) {
3459 		ret = bond_ethdev_stop(eth_dev);
3460 		bond_ethdev_close(eth_dev);
3461 	}
3462 	rte_eth_dev_release_port(eth_dev);
3463 
3464 	return ret;
3465 }
3466 
3467 /* this part will resolve the slave portids after all the other pdev and vdev
3468  * have been allocated */
3469 static int
bond_ethdev_configure(struct rte_eth_dev * dev)3470 bond_ethdev_configure(struct rte_eth_dev *dev)
3471 {
3472 	const char *name = dev->device->name;
3473 	struct bond_dev_private *internals = dev->data->dev_private;
3474 	struct rte_kvargs *kvlist = internals->kvlist;
3475 	int arg_count;
3476 	uint16_t port_id = dev - rte_eth_devices;
3477 	uint8_t agg_mode;
3478 
3479 	static const uint8_t default_rss_key[40] = {
3480 		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3481 		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3482 		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3483 		0xBE, 0xAC, 0x01, 0xFA
3484 	};
3485 
3486 	unsigned i, j;
3487 
3488 	/*
3489 	 * If RSS is enabled, fill table with default values and
3490 	 * set key to the the value specified in port RSS configuration.
3491 	 * Fall back to default RSS key if the key is not specified
3492 	 */
3493 	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3494 		if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3495 			internals->rss_key_len =
3496 				dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3497 			memcpy(internals->rss_key,
3498 			       dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3499 			       internals->rss_key_len);
3500 		} else {
3501 			internals->rss_key_len = sizeof(default_rss_key);
3502 			memcpy(internals->rss_key, default_rss_key,
3503 			       internals->rss_key_len);
3504 		}
3505 
3506 		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3507 			internals->reta_conf[i].mask = ~0LL;
3508 			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3509 				internals->reta_conf[i].reta[j] =
3510 						(i * RTE_RETA_GROUP_SIZE + j) %
3511 						dev->data->nb_rx_queues;
3512 		}
3513 	}
3514 
3515 	/* set the max_rx_pktlen */
3516 	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3517 
3518 	/*
3519 	 * if no kvlist, it means that this bonded device has been created
3520 	 * through the bonding api.
3521 	 */
3522 	if (!kvlist)
3523 		return 0;
3524 
3525 	/* Parse MAC address for bonded device */
3526 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3527 	if (arg_count == 1) {
3528 		struct rte_ether_addr bond_mac;
3529 
3530 		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3531 				       &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3532 			RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3533 				     name);
3534 			return -1;
3535 		}
3536 
3537 		/* Set MAC address */
3538 		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3539 			RTE_BOND_LOG(ERR,
3540 				     "Failed to set mac address on bonded device %s",
3541 				     name);
3542 			return -1;
3543 		}
3544 	} else if (arg_count > 1) {
3545 		RTE_BOND_LOG(ERR,
3546 			     "MAC address can be specified only once for bonded device %s",
3547 			     name);
3548 		return -1;
3549 	}
3550 
3551 	/* Parse/set balance mode transmit policy */
3552 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3553 	if (arg_count == 1) {
3554 		uint8_t xmit_policy;
3555 
3556 		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3557 				       &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3558 		    0) {
3559 			RTE_BOND_LOG(INFO,
3560 				     "Invalid xmit policy specified for bonded device %s",
3561 				     name);
3562 			return -1;
3563 		}
3564 
3565 		/* Set balance mode transmit policy*/
3566 		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3567 			RTE_BOND_LOG(ERR,
3568 				     "Failed to set balance xmit policy on bonded device %s",
3569 				     name);
3570 			return -1;
3571 		}
3572 	} else if (arg_count > 1) {
3573 		RTE_BOND_LOG(ERR,
3574 			     "Transmit policy can be specified only once for bonded device %s",
3575 			     name);
3576 		return -1;
3577 	}
3578 
3579 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3580 		if (rte_kvargs_process(kvlist,
3581 				       PMD_BOND_AGG_MODE_KVARG,
3582 				       &bond_ethdev_parse_slave_agg_mode_kvarg,
3583 				       &agg_mode) != 0) {
3584 			RTE_BOND_LOG(ERR,
3585 				     "Failed to parse agg selection mode for bonded device %s",
3586 				     name);
3587 		}
3588 		if (internals->mode == BONDING_MODE_8023AD) {
3589 			int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3590 					agg_mode);
3591 			if (ret < 0) {
3592 				RTE_BOND_LOG(ERR,
3593 					"Invalid args for agg selection set for bonded device %s",
3594 					name);
3595 				return -1;
3596 			}
3597 		}
3598 	}
3599 
3600 	/* Parse/add slave ports to bonded device */
3601 	if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3602 		struct bond_ethdev_slave_ports slave_ports;
3603 		unsigned i;
3604 
3605 		memset(&slave_ports, 0, sizeof(slave_ports));
3606 
3607 		if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3608 				       &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3609 			RTE_BOND_LOG(ERR,
3610 				     "Failed to parse slave ports for bonded device %s",
3611 				     name);
3612 			return -1;
3613 		}
3614 
3615 		for (i = 0; i < slave_ports.slave_count; i++) {
3616 			if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3617 				RTE_BOND_LOG(ERR,
3618 					     "Failed to add port %d as slave to bonded device %s",
3619 					     slave_ports.slaves[i], name);
3620 			}
3621 		}
3622 
3623 	} else {
3624 		RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3625 		return -1;
3626 	}
3627 
3628 	/* Parse/set primary slave port id*/
3629 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3630 	if (arg_count == 1) {
3631 		uint16_t primary_slave_port_id;
3632 
3633 		if (rte_kvargs_process(kvlist,
3634 				       PMD_BOND_PRIMARY_SLAVE_KVARG,
3635 				       &bond_ethdev_parse_primary_slave_port_id_kvarg,
3636 				       &primary_slave_port_id) < 0) {
3637 			RTE_BOND_LOG(INFO,
3638 				     "Invalid primary slave port id specified for bonded device %s",
3639 				     name);
3640 			return -1;
3641 		}
3642 
3643 		/* Set balance mode transmit policy*/
3644 		if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3645 		    != 0) {
3646 			RTE_BOND_LOG(ERR,
3647 				     "Failed to set primary slave port %d on bonded device %s",
3648 				     primary_slave_port_id, name);
3649 			return -1;
3650 		}
3651 	} else if (arg_count > 1) {
3652 		RTE_BOND_LOG(INFO,
3653 			     "Primary slave can be specified only once for bonded device %s",
3654 			     name);
3655 		return -1;
3656 	}
3657 
3658 	/* Parse link status monitor polling interval */
3659 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3660 	if (arg_count == 1) {
3661 		uint32_t lsc_poll_interval_ms;
3662 
3663 		if (rte_kvargs_process(kvlist,
3664 				       PMD_BOND_LSC_POLL_PERIOD_KVARG,
3665 				       &bond_ethdev_parse_time_ms_kvarg,
3666 				       &lsc_poll_interval_ms) < 0) {
3667 			RTE_BOND_LOG(INFO,
3668 				     "Invalid lsc polling interval value specified for bonded"
3669 				     " device %s", name);
3670 			return -1;
3671 		}
3672 
3673 		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3674 		    != 0) {
3675 			RTE_BOND_LOG(ERR,
3676 				     "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3677 				     lsc_poll_interval_ms, name);
3678 			return -1;
3679 		}
3680 	} else if (arg_count > 1) {
3681 		RTE_BOND_LOG(INFO,
3682 			     "LSC polling interval can be specified only once for bonded"
3683 			     " device %s", name);
3684 		return -1;
3685 	}
3686 
3687 	/* Parse link up interrupt propagation delay */
3688 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3689 	if (arg_count == 1) {
3690 		uint32_t link_up_delay_ms;
3691 
3692 		if (rte_kvargs_process(kvlist,
3693 				       PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3694 				       &bond_ethdev_parse_time_ms_kvarg,
3695 				       &link_up_delay_ms) < 0) {
3696 			RTE_BOND_LOG(INFO,
3697 				     "Invalid link up propagation delay value specified for"
3698 				     " bonded device %s", name);
3699 			return -1;
3700 		}
3701 
3702 		/* Set balance mode transmit policy*/
3703 		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3704 		    != 0) {
3705 			RTE_BOND_LOG(ERR,
3706 				     "Failed to set link up propagation delay (%u ms) on bonded"
3707 				     " device %s", link_up_delay_ms, name);
3708 			return -1;
3709 		}
3710 	} else if (arg_count > 1) {
3711 		RTE_BOND_LOG(INFO,
3712 			     "Link up propagation delay can be specified only once for"
3713 			     " bonded device %s", name);
3714 		return -1;
3715 	}
3716 
3717 	/* Parse link down interrupt propagation delay */
3718 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3719 	if (arg_count == 1) {
3720 		uint32_t link_down_delay_ms;
3721 
3722 		if (rte_kvargs_process(kvlist,
3723 				       PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3724 				       &bond_ethdev_parse_time_ms_kvarg,
3725 				       &link_down_delay_ms) < 0) {
3726 			RTE_BOND_LOG(INFO,
3727 				     "Invalid link down propagation delay value specified for"
3728 				     " bonded device %s", name);
3729 			return -1;
3730 		}
3731 
3732 		/* Set balance mode transmit policy*/
3733 		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3734 		    != 0) {
3735 			RTE_BOND_LOG(ERR,
3736 				     "Failed to set link down propagation delay (%u ms) on bonded device %s",
3737 				     link_down_delay_ms, name);
3738 			return -1;
3739 		}
3740 	} else if (arg_count > 1) {
3741 		RTE_BOND_LOG(INFO,
3742 			     "Link down propagation delay can be specified only once for  bonded device %s",
3743 			     name);
3744 		return -1;
3745 	}
3746 
3747 	return 0;
3748 }
3749 
3750 struct rte_vdev_driver pmd_bond_drv = {
3751 	.probe = bond_probe,
3752 	.remove = bond_remove,
3753 };
3754 
3755 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3756 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3757 
3758 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3759 	"slave=<ifc> "
3760 	"primary=<ifc> "
3761 	"mode=[0-6] "
3762 	"xmit_policy=[l2 | l23 | l34] "
3763 	"agg_mode=[count | stable | bandwidth] "
3764 	"socket_id=<int> "
3765 	"mac=<mac addr> "
3766 	"lsc_poll_period_ms=<int> "
3767 	"up_delay=<int> "
3768 	"down_delay=<int>");
3769 
3770 RTE_LOG_REGISTER(bond_logtype, pmd.net.bond, NOTICE);
3771