1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
3 */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <ethdev_driver.h>
11 #include <ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
26
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35
36 static inline size_t
get_vlan_offset(struct rte_ether_hdr * eth_hdr,uint16_t * proto)37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39 size_t vlan_offset = 0;
40
41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 struct rte_vlan_hdr *vlan_hdr =
44 (struct rte_vlan_hdr *)(eth_hdr + 1);
45
46 vlan_offset = sizeof(struct rte_vlan_hdr);
47 *proto = vlan_hdr->eth_proto;
48
49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 vlan_hdr = vlan_hdr + 1;
51 *proto = vlan_hdr->eth_proto;
52 vlan_offset += sizeof(struct rte_vlan_hdr);
53 }
54 }
55 return vlan_offset;
56 }
57
58 static uint16_t
bond_ethdev_rx_burst(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61 struct bond_dev_private *internals;
62
63 uint16_t num_rx_total = 0;
64 uint16_t slave_count;
65 uint16_t active_slave;
66 int i;
67
68 /* Cast to structure, containing bonded device's port id and queue id */
69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 internals = bd_rx_q->dev_private;
71 slave_count = internals->active_slave_count;
72 active_slave = bd_rx_q->active_slave;
73
74 for (i = 0; i < slave_count && nb_pkts; i++) {
75 uint16_t num_rx_slave;
76
77 /* Offset of pointer to *bufs increases as packets are received
78 * from other slaves */
79 num_rx_slave =
80 rte_eth_rx_burst(internals->active_slaves[active_slave],
81 bd_rx_q->queue_id,
82 bufs + num_rx_total, nb_pkts);
83 num_rx_total += num_rx_slave;
84 nb_pkts -= num_rx_slave;
85 if (++active_slave == slave_count)
86 active_slave = 0;
87 }
88
89 if (++bd_rx_q->active_slave >= slave_count)
90 bd_rx_q->active_slave = 0;
91 return num_rx_total;
92 }
93
94 static uint16_t
bond_ethdev_rx_burst_active_backup(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 uint16_t nb_pkts)
97 {
98 struct bond_dev_private *internals;
99
100 /* Cast to structure, containing bonded device's port id and queue id */
101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102
103 internals = bd_rx_q->dev_private;
104
105 return rte_eth_rx_burst(internals->current_primary_port,
106 bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108
109 static inline uint8_t
is_lacp_packets(uint16_t ethertype,uint8_t subtype,struct rte_mbuf * mbuf)110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112 const uint16_t ether_type_slow_be =
113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114
115 return !((mbuf->ol_flags & RTE_MBUF_F_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 (ethertype == ether_type_slow_be &&
117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119
120 /*****************************************************************************
121 * Flow director's setup for mode 4 optimization
122 */
123
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 .dst.addr_bytes = { 0 },
126 .src.addr_bytes = { 0 },
127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 .dst.addr_bytes = { 0 },
132 .src.addr_bytes = { 0 },
133 .type = 0xFFFF,
134 };
135
136 static struct rte_flow_item flow_item_8023ad[] = {
137 {
138 .type = RTE_FLOW_ITEM_TYPE_ETH,
139 .spec = &flow_item_eth_type_8023ad,
140 .last = NULL,
141 .mask = &flow_item_eth_mask_type_8023ad,
142 },
143 {
144 .type = RTE_FLOW_ITEM_TYPE_END,
145 .spec = NULL,
146 .last = NULL,
147 .mask = NULL,
148 }
149 };
150
151 const struct rte_flow_attr flow_attr_8023ad = {
152 .group = 0,
153 .priority = 0,
154 .ingress = 1,
155 .egress = 0,
156 .reserved = 0,
157 };
158
159 int
bond_ethdev_8023ad_flow_verify(struct rte_eth_dev * bond_dev,uint16_t slave_port)160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 uint16_t slave_port) {
162 struct rte_eth_dev_info slave_info;
163 struct rte_flow_error error;
164 struct bond_dev_private *internals = bond_dev->data->dev_private;
165
166 const struct rte_flow_action_queue lacp_queue_conf = {
167 .index = 0,
168 };
169
170 const struct rte_flow_action actions[] = {
171 {
172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 .conf = &lacp_queue_conf
174 },
175 {
176 .type = RTE_FLOW_ACTION_TYPE_END,
177 }
178 };
179
180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 flow_item_8023ad, actions, &error);
182 if (ret < 0) {
183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 __func__, error.message, slave_port,
185 internals->mode4.dedicated_queues.rx_qid);
186 return -1;
187 }
188
189 ret = rte_eth_dev_info_get(slave_port, &slave_info);
190 if (ret != 0) {
191 RTE_BOND_LOG(ERR,
192 "%s: Error during getting device (port %u) info: %s\n",
193 __func__, slave_port, strerror(-ret));
194
195 return ret;
196 }
197
198 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
200 RTE_BOND_LOG(ERR,
201 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 __func__, slave_port);
203 return -1;
204 }
205
206 return 0;
207 }
208
209 int
bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id)210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 struct bond_dev_private *internals = bond_dev->data->dev_private;
213 struct rte_eth_dev_info bond_info;
214 uint16_t idx;
215 int ret;
216
217 /* Verify if all slaves in bonding supports flow director and */
218 if (internals->slave_count > 0) {
219 ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
220 if (ret != 0) {
221 RTE_BOND_LOG(ERR,
222 "%s: Error during getting device (port %u) info: %s\n",
223 __func__, bond_dev->data->port_id,
224 strerror(-ret));
225
226 return ret;
227 }
228
229 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
231
232 for (idx = 0; idx < internals->slave_count; idx++) {
233 if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 internals->slaves[idx].port_id) != 0)
235 return -1;
236 }
237 }
238
239 return 0;
240 }
241
242 int
bond_ethdev_8023ad_flow_set(struct rte_eth_dev * bond_dev,uint16_t slave_port)243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
244
245 struct rte_flow_error error;
246 struct bond_dev_private *internals = bond_dev->data->dev_private;
247 struct rte_flow_action_queue lacp_queue_conf = {
248 .index = internals->mode4.dedicated_queues.rx_qid,
249 };
250
251 const struct rte_flow_action actions[] = {
252 {
253 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 .conf = &lacp_queue_conf
255 },
256 {
257 .type = RTE_FLOW_ACTION_TYPE_END,
258 }
259 };
260
261 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 "(slave_port=%d queue_id=%d)",
266 error.message, slave_port,
267 internals->mode4.dedicated_queues.rx_qid);
268 return -1;
269 }
270
271 return 0;
272 }
273
274 static inline uint16_t
rx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts,bool dedicated_rxq)275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
276 bool dedicated_rxq)
277 {
278 /* Cast to structure, containing bonded device's port id and queue id */
279 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 struct bond_dev_private *internals = bd_rx_q->dev_private;
281 struct rte_eth_dev *bonded_eth_dev =
282 &rte_eth_devices[internals->port_id];
283 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 struct rte_ether_hdr *hdr;
285
286 const uint16_t ether_type_slow_be =
287 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 uint16_t num_rx_total = 0; /* Total number of received packets */
289 uint16_t slaves[RTE_MAX_ETHPORTS];
290 uint16_t slave_count, idx;
291
292 uint8_t collecting; /* current slave collecting status */
293 const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
295 uint8_t subtype;
296 uint16_t i;
297 uint16_t j;
298 uint16_t k;
299
300 /* Copy slave list to protect against slave up/down changes during tx
301 * bursting */
302 slave_count = internals->active_slave_count;
303 memcpy(slaves, internals->active_slaves,
304 sizeof(internals->active_slaves[0]) * slave_count);
305
306 idx = bd_rx_q->active_slave;
307 if (idx >= slave_count) {
308 bd_rx_q->active_slave = 0;
309 idx = 0;
310 }
311 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
312 j = num_rx_total;
313 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
314 COLLECTING);
315
316 /* Read packets from this slave */
317 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 &bufs[num_rx_total], nb_pkts - num_rx_total);
319
320 for (k = j; k < 2 && k < num_rx_total; k++)
321 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
322
323 /* Handle slow protocol packets. */
324 while (j < num_rx_total) {
325 if (j + 3 < num_rx_total)
326 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
327
328 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
330
331 /* Remove packet from array if:
332 * - it is slow packet but no dedicated rxq is present,
333 * - slave is not in collecting state,
334 * - bonding interface is not in promiscuous mode:
335 * - packet is unicast and address does not match,
336 * - packet is multicast and bonding interface
337 * is not in allmulti,
338 */
339 if (unlikely(
340 (!dedicated_rxq &&
341 is_lacp_packets(hdr->ether_type, subtype,
342 bufs[j])) ||
343 !collecting ||
344 (!promisc &&
345 ((rte_is_unicast_ether_addr(&hdr->dst_addr) &&
346 !rte_is_same_ether_addr(bond_mac,
347 &hdr->dst_addr)) ||
348 (!allmulti &&
349 rte_is_multicast_ether_addr(&hdr->dst_addr)))))) {
350
351 if (hdr->ether_type == ether_type_slow_be) {
352 bond_mode_8023ad_handle_slow_pkt(
353 internals, slaves[idx], bufs[j]);
354 } else
355 rte_pktmbuf_free(bufs[j]);
356
357 /* Packet is managed by mode 4 or dropped, shift the array */
358 num_rx_total--;
359 if (j < num_rx_total) {
360 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
361 (num_rx_total - j));
362 }
363 } else
364 j++;
365 }
366 if (unlikely(++idx == slave_count))
367 idx = 0;
368 }
369
370 if (++bd_rx_q->active_slave >= slave_count)
371 bd_rx_q->active_slave = 0;
372
373 return num_rx_total;
374 }
375
376 static uint16_t
bond_ethdev_rx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
378 uint16_t nb_pkts)
379 {
380 return rx_burst_8023ad(queue, bufs, nb_pkts, false);
381 }
382
383 static uint16_t
bond_ethdev_rx_burst_8023ad_fast_queue(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
385 uint16_t nb_pkts)
386 {
387 return rx_burst_8023ad(queue, bufs, nb_pkts, true);
388 }
389
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
393
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
395
396 static void
arp_op_name(uint16_t arp_op,char * buf,size_t buf_len)397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
398 {
399 switch (arp_op) {
400 case RTE_ARP_OP_REQUEST:
401 strlcpy(buf, "ARP Request", buf_len);
402 return;
403 case RTE_ARP_OP_REPLY:
404 strlcpy(buf, "ARP Reply", buf_len);
405 return;
406 case RTE_ARP_OP_REVREQUEST:
407 strlcpy(buf, "Reverse ARP Request", buf_len);
408 return;
409 case RTE_ARP_OP_REVREPLY:
410 strlcpy(buf, "Reverse ARP Reply", buf_len);
411 return;
412 case RTE_ARP_OP_INVREQUEST:
413 strlcpy(buf, "Peer Identify Request", buf_len);
414 return;
415 case RTE_ARP_OP_INVREPLY:
416 strlcpy(buf, "Peer Identify Reply", buf_len);
417 return;
418 default:
419 break;
420 }
421 strlcpy(buf, "Unknown", buf_len);
422 return;
423 }
424 #endif
425 #define MaxIPv4String 16
426 static void
ipv4_addr_to_dot(uint32_t be_ipv4_addr,char * buf,uint8_t buf_size)427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
428 {
429 uint32_t ipv4_addr;
430
431 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
434 ipv4_addr & 0xFF);
435 }
436
437 #define MAX_CLIENTS_NUMBER 128
438 uint8_t active_clients;
439 struct client_stats_t {
440 uint16_t port;
441 uint32_t ipv4_addr;
442 uint32_t ipv4_rx_packets;
443 uint32_t ipv4_tx_packets;
444 };
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
446
447 static void
update_client_stats(uint32_t addr,uint16_t port,uint32_t * TXorRXindicator)448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
449 {
450 int i = 0;
451
452 for (; i < MAX_CLIENTS_NUMBER; i++) {
453 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
454 /* Just update RX packets number for this client */
455 if (TXorRXindicator == &burstnumberRX)
456 client_stats[i].ipv4_rx_packets++;
457 else
458 client_stats[i].ipv4_tx_packets++;
459 return;
460 }
461 }
462 /* We have a new client. Insert him to the table, and increment stats */
463 if (TXorRXindicator == &burstnumberRX)
464 client_stats[active_clients].ipv4_rx_packets++;
465 else
466 client_stats[active_clients].ipv4_tx_packets++;
467 client_stats[active_clients].ipv4_addr = addr;
468 client_stats[active_clients].port = port;
469 active_clients++;
470
471 }
472
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 rte_log(RTE_LOG_DEBUG, bond_logtype, \
476 "%s port:%d SrcMAC:" RTE_ETHER_ADDR_PRT_FMT " SrcIP:%s " \
477 "DstMAC:" RTE_ETHER_ADDR_PRT_FMT " DstIP:%s %s %d\n", \
478 info, \
479 port, \
480 RTE_ETHER_ADDR_BYTES(ð_h->src_addr), \
481 src_ip, \
482 RTE_ETHER_ADDR_BYTES(ð_h->dst_addr), \
483 dst_ip, \
484 arp_op, ++burstnumber)
485 #endif
486
487 static void
mode6_debug(const char __rte_unused * info,struct rte_ether_hdr * eth_h,uint16_t port,uint32_t __rte_unused * burstnumber)488 mode6_debug(const char __rte_unused *info,
489 struct rte_ether_hdr *eth_h, uint16_t port,
490 uint32_t __rte_unused *burstnumber)
491 {
492 struct rte_ipv4_hdr *ipv4_h;
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494 struct rte_arp_hdr *arp_h;
495 char dst_ip[16];
496 char ArpOp[24];
497 char buf[16];
498 #endif
499 char src_ip[16];
500
501 uint16_t ether_type = eth_h->ether_type;
502 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
503
504 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
505 strlcpy(buf, info, 16);
506 #endif
507
508 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
509 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
510 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
511 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
512 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
513 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
514 #endif
515 update_client_stats(ipv4_h->src_addr, port, burstnumber);
516 }
517 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
518 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
519 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
520 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
521 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
522 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
523 ArpOp, sizeof(ArpOp));
524 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
525 }
526 #endif
527 }
528 #endif
529
530 static uint16_t
bond_ethdev_rx_burst_alb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)531 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
532 {
533 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
534 struct bond_dev_private *internals = bd_rx_q->dev_private;
535 struct rte_ether_hdr *eth_h;
536 uint16_t ether_type, offset;
537 uint16_t nb_recv_pkts;
538 int i;
539
540 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
541
542 for (i = 0; i < nb_recv_pkts; i++) {
543 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
544 ether_type = eth_h->ether_type;
545 offset = get_vlan_offset(eth_h, ðer_type);
546
547 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
548 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
549 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
550 #endif
551 bond_mode_alb_arp_recv(eth_h, offset, internals);
552 }
553 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
554 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
555 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
556 #endif
557 }
558
559 return nb_recv_pkts;
560 }
561
562 static uint16_t
bond_ethdev_tx_burst_round_robin(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)563 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
564 uint16_t nb_pkts)
565 {
566 struct bond_dev_private *internals;
567 struct bond_tx_queue *bd_tx_q;
568
569 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
570 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
571
572 uint16_t num_of_slaves;
573 uint16_t slaves[RTE_MAX_ETHPORTS];
574
575 uint16_t num_tx_total = 0, num_tx_slave;
576
577 static int slave_idx = 0;
578 int i, cslave_idx = 0, tx_fail_total = 0;
579
580 bd_tx_q = (struct bond_tx_queue *)queue;
581 internals = bd_tx_q->dev_private;
582
583 /* Copy slave list to protect against slave up/down changes during tx
584 * bursting */
585 num_of_slaves = internals->active_slave_count;
586 memcpy(slaves, internals->active_slaves,
587 sizeof(internals->active_slaves[0]) * num_of_slaves);
588
589 if (num_of_slaves < 1)
590 return num_tx_total;
591
592 /* Populate slaves mbuf with which packets are to be sent on it */
593 for (i = 0; i < nb_pkts; i++) {
594 cslave_idx = (slave_idx + i) % num_of_slaves;
595 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
596 }
597
598 /* increment current slave index so the next call to tx burst starts on the
599 * next slave */
600 slave_idx = ++cslave_idx;
601
602 /* Send packet burst on each slave device */
603 for (i = 0; i < num_of_slaves; i++) {
604 if (slave_nb_pkts[i] > 0) {
605 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
606 slave_bufs[i], slave_nb_pkts[i]);
607
608 /* if tx burst fails move packets to end of bufs */
609 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
610 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
611
612 tx_fail_total += tx_fail_slave;
613
614 memcpy(&bufs[nb_pkts - tx_fail_total],
615 &slave_bufs[i][num_tx_slave],
616 tx_fail_slave * sizeof(bufs[0]));
617 }
618 num_tx_total += num_tx_slave;
619 }
620 }
621
622 return num_tx_total;
623 }
624
625 static uint16_t
bond_ethdev_tx_burst_active_backup(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)626 bond_ethdev_tx_burst_active_backup(void *queue,
627 struct rte_mbuf **bufs, uint16_t nb_pkts)
628 {
629 struct bond_dev_private *internals;
630 struct bond_tx_queue *bd_tx_q;
631
632 bd_tx_q = (struct bond_tx_queue *)queue;
633 internals = bd_tx_q->dev_private;
634
635 if (internals->active_slave_count < 1)
636 return 0;
637
638 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
639 bufs, nb_pkts);
640 }
641
642 static inline uint16_t
ether_hash(struct rte_ether_hdr * eth_hdr)643 ether_hash(struct rte_ether_hdr *eth_hdr)
644 {
645 unaligned_uint16_t *word_src_addr =
646 (unaligned_uint16_t *)eth_hdr->src_addr.addr_bytes;
647 unaligned_uint16_t *word_dst_addr =
648 (unaligned_uint16_t *)eth_hdr->dst_addr.addr_bytes;
649
650 return (word_src_addr[0] ^ word_dst_addr[0]) ^
651 (word_src_addr[1] ^ word_dst_addr[1]) ^
652 (word_src_addr[2] ^ word_dst_addr[2]);
653 }
654
655 static inline uint32_t
ipv4_hash(struct rte_ipv4_hdr * ipv4_hdr)656 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
657 {
658 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
659 }
660
661 static inline uint32_t
ipv6_hash(struct rte_ipv6_hdr * ipv6_hdr)662 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
663 {
664 unaligned_uint32_t *word_src_addr =
665 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
666 unaligned_uint32_t *word_dst_addr =
667 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
668
669 return (word_src_addr[0] ^ word_dst_addr[0]) ^
670 (word_src_addr[1] ^ word_dst_addr[1]) ^
671 (word_src_addr[2] ^ word_dst_addr[2]) ^
672 (word_src_addr[3] ^ word_dst_addr[3]);
673 }
674
675
676 void
burst_xmit_l2_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)677 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
678 uint16_t slave_count, uint16_t *slaves)
679 {
680 struct rte_ether_hdr *eth_hdr;
681 uint32_t hash;
682 int i;
683
684 for (i = 0; i < nb_pkts; i++) {
685 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
686
687 hash = ether_hash(eth_hdr);
688
689 slaves[i] = (hash ^= hash >> 8) % slave_count;
690 }
691 }
692
693 void
burst_xmit_l23_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)694 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
695 uint16_t slave_count, uint16_t *slaves)
696 {
697 uint16_t i;
698 struct rte_ether_hdr *eth_hdr;
699 uint16_t proto;
700 size_t vlan_offset;
701 uint32_t hash, l3hash;
702
703 for (i = 0; i < nb_pkts; i++) {
704 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
705 l3hash = 0;
706
707 proto = eth_hdr->ether_type;
708 hash = ether_hash(eth_hdr);
709
710 vlan_offset = get_vlan_offset(eth_hdr, &proto);
711
712 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
713 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
714 ((char *)(eth_hdr + 1) + vlan_offset);
715 l3hash = ipv4_hash(ipv4_hdr);
716
717 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
718 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
719 ((char *)(eth_hdr + 1) + vlan_offset);
720 l3hash = ipv6_hash(ipv6_hdr);
721 }
722
723 hash = hash ^ l3hash;
724 hash ^= hash >> 16;
725 hash ^= hash >> 8;
726
727 slaves[i] = hash % slave_count;
728 }
729 }
730
731 void
burst_xmit_l34_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)732 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
733 uint16_t slave_count, uint16_t *slaves)
734 {
735 struct rte_ether_hdr *eth_hdr;
736 uint16_t proto;
737 size_t vlan_offset;
738 int i;
739
740 struct rte_udp_hdr *udp_hdr;
741 struct rte_tcp_hdr *tcp_hdr;
742 uint32_t hash, l3hash, l4hash;
743
744 for (i = 0; i < nb_pkts; i++) {
745 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
746 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
747 proto = eth_hdr->ether_type;
748 vlan_offset = get_vlan_offset(eth_hdr, &proto);
749 l3hash = 0;
750 l4hash = 0;
751
752 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
753 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
754 ((char *)(eth_hdr + 1) + vlan_offset);
755 size_t ip_hdr_offset;
756
757 l3hash = ipv4_hash(ipv4_hdr);
758
759 /* there is no L4 header in fragmented packet */
760 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
761 == 0)) {
762 ip_hdr_offset = (ipv4_hdr->version_ihl
763 & RTE_IPV4_HDR_IHL_MASK) *
764 RTE_IPV4_IHL_MULTIPLIER;
765
766 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
767 tcp_hdr = (struct rte_tcp_hdr *)
768 ((char *)ipv4_hdr +
769 ip_hdr_offset);
770 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
771 < pkt_end)
772 l4hash = HASH_L4_PORTS(tcp_hdr);
773 } else if (ipv4_hdr->next_proto_id ==
774 IPPROTO_UDP) {
775 udp_hdr = (struct rte_udp_hdr *)
776 ((char *)ipv4_hdr +
777 ip_hdr_offset);
778 if ((size_t)udp_hdr + sizeof(*udp_hdr)
779 < pkt_end)
780 l4hash = HASH_L4_PORTS(udp_hdr);
781 }
782 }
783 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
784 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
785 ((char *)(eth_hdr + 1) + vlan_offset);
786 l3hash = ipv6_hash(ipv6_hdr);
787
788 if (ipv6_hdr->proto == IPPROTO_TCP) {
789 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
790 l4hash = HASH_L4_PORTS(tcp_hdr);
791 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
792 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
793 l4hash = HASH_L4_PORTS(udp_hdr);
794 }
795 }
796
797 hash = l3hash ^ l4hash;
798 hash ^= hash >> 16;
799 hash ^= hash >> 8;
800
801 slaves[i] = hash % slave_count;
802 }
803 }
804
805 struct bwg_slave {
806 uint64_t bwg_left_int;
807 uint64_t bwg_left_remainder;
808 uint16_t slave;
809 };
810
811 void
bond_tlb_activate_slave(struct bond_dev_private * internals)812 bond_tlb_activate_slave(struct bond_dev_private *internals) {
813 int i;
814
815 for (i = 0; i < internals->active_slave_count; i++) {
816 tlb_last_obytets[internals->active_slaves[i]] = 0;
817 }
818 }
819
820 static int
bandwidth_cmp(const void * a,const void * b)821 bandwidth_cmp(const void *a, const void *b)
822 {
823 const struct bwg_slave *bwg_a = a;
824 const struct bwg_slave *bwg_b = b;
825 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
826 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
827 (int64_t)bwg_a->bwg_left_remainder;
828 if (diff > 0)
829 return 1;
830 else if (diff < 0)
831 return -1;
832 else if (diff2 > 0)
833 return 1;
834 else if (diff2 < 0)
835 return -1;
836 else
837 return 0;
838 }
839
840 static void
bandwidth_left(uint16_t port_id,uint64_t load,uint8_t update_idx,struct bwg_slave * bwg_slave)841 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
842 struct bwg_slave *bwg_slave)
843 {
844 struct rte_eth_link link_status;
845 int ret;
846
847 ret = rte_eth_link_get_nowait(port_id, &link_status);
848 if (ret < 0) {
849 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
850 port_id, rte_strerror(-ret));
851 return;
852 }
853 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
854 if (link_bwg == 0)
855 return;
856 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
857 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
858 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
859 }
860
861 static void
bond_ethdev_update_tlb_slave_cb(void * arg)862 bond_ethdev_update_tlb_slave_cb(void *arg)
863 {
864 struct bond_dev_private *internals = arg;
865 struct rte_eth_stats slave_stats;
866 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
867 uint16_t slave_count;
868 uint64_t tx_bytes;
869
870 uint8_t update_stats = 0;
871 uint16_t slave_id;
872 uint16_t i;
873
874 internals->slave_update_idx++;
875
876
877 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
878 update_stats = 1;
879
880 for (i = 0; i < internals->active_slave_count; i++) {
881 slave_id = internals->active_slaves[i];
882 rte_eth_stats_get(slave_id, &slave_stats);
883 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
884 bandwidth_left(slave_id, tx_bytes,
885 internals->slave_update_idx, &bwg_array[i]);
886 bwg_array[i].slave = slave_id;
887
888 if (update_stats) {
889 tlb_last_obytets[slave_id] = slave_stats.obytes;
890 }
891 }
892
893 if (update_stats == 1)
894 internals->slave_update_idx = 0;
895
896 slave_count = i;
897 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
898 for (i = 0; i < slave_count; i++)
899 internals->tlb_slaves_order[i] = bwg_array[i].slave;
900
901 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
902 (struct bond_dev_private *)internals);
903 }
904
905 static uint16_t
bond_ethdev_tx_burst_tlb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)906 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
907 {
908 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
909 struct bond_dev_private *internals = bd_tx_q->dev_private;
910
911 struct rte_eth_dev *primary_port =
912 &rte_eth_devices[internals->primary_port];
913 uint16_t num_tx_total = 0;
914 uint16_t i, j;
915
916 uint16_t num_of_slaves = internals->active_slave_count;
917 uint16_t slaves[RTE_MAX_ETHPORTS];
918
919 struct rte_ether_hdr *ether_hdr;
920 struct rte_ether_addr primary_slave_addr;
921 struct rte_ether_addr active_slave_addr;
922
923 if (num_of_slaves < 1)
924 return num_tx_total;
925
926 memcpy(slaves, internals->tlb_slaves_order,
927 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
928
929
930 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
931
932 if (nb_pkts > 3) {
933 for (i = 0; i < 3; i++)
934 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
935 }
936
937 for (i = 0; i < num_of_slaves; i++) {
938 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
939 for (j = num_tx_total; j < nb_pkts; j++) {
940 if (j + 3 < nb_pkts)
941 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
942
943 ether_hdr = rte_pktmbuf_mtod(bufs[j],
944 struct rte_ether_hdr *);
945 if (rte_is_same_ether_addr(ðer_hdr->src_addr,
946 &primary_slave_addr))
947 rte_ether_addr_copy(&active_slave_addr,
948 ðer_hdr->src_addr);
949 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
950 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
951 #endif
952 }
953
954 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
955 bufs + num_tx_total, nb_pkts - num_tx_total);
956
957 if (num_tx_total == nb_pkts)
958 break;
959 }
960
961 return num_tx_total;
962 }
963
964 void
bond_tlb_disable(struct bond_dev_private * internals)965 bond_tlb_disable(struct bond_dev_private *internals)
966 {
967 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
968 }
969
970 void
bond_tlb_enable(struct bond_dev_private * internals)971 bond_tlb_enable(struct bond_dev_private *internals)
972 {
973 bond_ethdev_update_tlb_slave_cb(internals);
974 }
975
976 static uint16_t
bond_ethdev_tx_burst_alb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)977 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 {
979 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980 struct bond_dev_private *internals = bd_tx_q->dev_private;
981
982 struct rte_ether_hdr *eth_h;
983 uint16_t ether_type, offset;
984
985 struct client_data *client_info;
986
987 /*
988 * We create transmit buffers for every slave and one additional to send
989 * through tlb. In worst case every packet will be send on one port.
990 */
991 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
992 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
993
994 /*
995 * We create separate transmit buffers for update packets as they won't
996 * be counted in num_tx_total.
997 */
998 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
999 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1000
1001 struct rte_mbuf *upd_pkt;
1002 size_t pkt_size;
1003
1004 uint16_t num_send, num_not_send = 0;
1005 uint16_t num_tx_total = 0;
1006 uint16_t slave_idx;
1007
1008 int i, j;
1009
1010 /* Search tx buffer for ARP packets and forward them to alb */
1011 for (i = 0; i < nb_pkts; i++) {
1012 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1013 ether_type = eth_h->ether_type;
1014 offset = get_vlan_offset(eth_h, ðer_type);
1015
1016 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1017 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1018
1019 /* Change src mac in eth header */
1020 rte_eth_macaddr_get(slave_idx, ð_h->src_addr);
1021
1022 /* Add packet to slave tx buffer */
1023 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1024 slave_bufs_pkts[slave_idx]++;
1025 } else {
1026 /* If packet is not ARP, send it with TLB policy */
1027 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1028 bufs[i];
1029 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1030 }
1031 }
1032
1033 /* Update connected client ARP tables */
1034 if (internals->mode6.ntt) {
1035 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1036 client_info = &internals->mode6.client_table[i];
1037
1038 if (client_info->in_use) {
1039 /* Allocate new packet to send ARP update on current slave */
1040 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1041 if (upd_pkt == NULL) {
1042 RTE_BOND_LOG(ERR,
1043 "Failed to allocate ARP packet from pool");
1044 continue;
1045 }
1046 pkt_size = sizeof(struct rte_ether_hdr) +
1047 sizeof(struct rte_arp_hdr) +
1048 client_info->vlan_count *
1049 sizeof(struct rte_vlan_hdr);
1050 upd_pkt->data_len = pkt_size;
1051 upd_pkt->pkt_len = pkt_size;
1052
1053 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1054 internals);
1055
1056 /* Add packet to update tx buffer */
1057 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1058 update_bufs_pkts[slave_idx]++;
1059 }
1060 }
1061 internals->mode6.ntt = 0;
1062 }
1063
1064 /* Send ARP packets on proper slaves */
1065 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066 if (slave_bufs_pkts[i] > 0) {
1067 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1068 slave_bufs[i], slave_bufs_pkts[i]);
1069 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1070 bufs[nb_pkts - 1 - num_not_send - j] =
1071 slave_bufs[i][nb_pkts - 1 - j];
1072 }
1073
1074 num_tx_total += num_send;
1075 num_not_send += slave_bufs_pkts[i] - num_send;
1076
1077 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1078 /* Print TX stats including update packets */
1079 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1080 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1081 struct rte_ether_hdr *);
1082 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1083 }
1084 #endif
1085 }
1086 }
1087
1088 /* Send update packets on proper slaves */
1089 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1090 if (update_bufs_pkts[i] > 0) {
1091 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1092 update_bufs_pkts[i]);
1093 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1094 rte_pktmbuf_free(update_bufs[i][j]);
1095 }
1096 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1097 for (j = 0; j < update_bufs_pkts[i]; j++) {
1098 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1099 struct rte_ether_hdr *);
1100 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1101 }
1102 #endif
1103 }
1104 }
1105
1106 /* Send non-ARP packets using tlb policy */
1107 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1108 num_send = bond_ethdev_tx_burst_tlb(queue,
1109 slave_bufs[RTE_MAX_ETHPORTS],
1110 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1111
1112 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1113 bufs[nb_pkts - 1 - num_not_send - j] =
1114 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1115 }
1116
1117 num_tx_total += num_send;
1118 }
1119
1120 return num_tx_total;
1121 }
1122
1123 static inline uint16_t
tx_burst_balance(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs,uint16_t * slave_port_ids,uint16_t slave_count)1124 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1125 uint16_t *slave_port_ids, uint16_t slave_count)
1126 {
1127 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1128 struct bond_dev_private *internals = bd_tx_q->dev_private;
1129
1130 /* Array to sort mbufs for transmission on each slave into */
1131 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1132 /* Number of mbufs for transmission on each slave */
1133 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1134 /* Mapping array generated by hash function to map mbufs to slaves */
1135 uint16_t bufs_slave_port_idxs[nb_bufs];
1136
1137 uint16_t slave_tx_count;
1138 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1139
1140 uint16_t i;
1141
1142 /*
1143 * Populate slaves mbuf with the packets which are to be sent on it
1144 * selecting output slave using hash based on xmit policy
1145 */
1146 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1147 bufs_slave_port_idxs);
1148
1149 for (i = 0; i < nb_bufs; i++) {
1150 /* Populate slave mbuf arrays with mbufs for that slave. */
1151 uint16_t slave_idx = bufs_slave_port_idxs[i];
1152
1153 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1154 }
1155
1156 /* Send packet burst on each slave device */
1157 for (i = 0; i < slave_count; i++) {
1158 if (slave_nb_bufs[i] == 0)
1159 continue;
1160
1161 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1162 bd_tx_q->queue_id, slave_bufs[i],
1163 slave_nb_bufs[i]);
1164
1165 total_tx_count += slave_tx_count;
1166
1167 /* If tx burst fails move packets to end of bufs */
1168 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1169 int slave_tx_fail_count = slave_nb_bufs[i] -
1170 slave_tx_count;
1171 total_tx_fail_count += slave_tx_fail_count;
1172 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1173 &slave_bufs[i][slave_tx_count],
1174 slave_tx_fail_count * sizeof(bufs[0]));
1175 }
1176 }
1177
1178 return total_tx_count;
1179 }
1180
1181 static uint16_t
bond_ethdev_tx_burst_balance(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1182 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1183 uint16_t nb_bufs)
1184 {
1185 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1186 struct bond_dev_private *internals = bd_tx_q->dev_private;
1187
1188 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1189 uint16_t slave_count;
1190
1191 if (unlikely(nb_bufs == 0))
1192 return 0;
1193
1194 /* Copy slave list to protect against slave up/down changes during tx
1195 * bursting
1196 */
1197 slave_count = internals->active_slave_count;
1198 if (unlikely(slave_count < 1))
1199 return 0;
1200
1201 memcpy(slave_port_ids, internals->active_slaves,
1202 sizeof(slave_port_ids[0]) * slave_count);
1203 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1204 slave_count);
1205 }
1206
1207 static inline uint16_t
tx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs,bool dedicated_txq)1208 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1209 bool dedicated_txq)
1210 {
1211 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1212 struct bond_dev_private *internals = bd_tx_q->dev_private;
1213
1214 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1215 uint16_t slave_count;
1216
1217 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1218 uint16_t dist_slave_count;
1219
1220 uint16_t slave_tx_count;
1221
1222 uint16_t i;
1223
1224 /* Copy slave list to protect against slave up/down changes during tx
1225 * bursting */
1226 slave_count = internals->active_slave_count;
1227 if (unlikely(slave_count < 1))
1228 return 0;
1229
1230 memcpy(slave_port_ids, internals->active_slaves,
1231 sizeof(slave_port_ids[0]) * slave_count);
1232
1233 if (dedicated_txq)
1234 goto skip_tx_ring;
1235
1236 /* Check for LACP control packets and send if available */
1237 for (i = 0; i < slave_count; i++) {
1238 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1239 struct rte_mbuf *ctrl_pkt = NULL;
1240
1241 if (likely(rte_ring_empty(port->tx_ring)))
1242 continue;
1243
1244 if (rte_ring_dequeue(port->tx_ring,
1245 (void **)&ctrl_pkt) != -ENOENT) {
1246 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1247 bd_tx_q->queue_id, &ctrl_pkt, 1);
1248 /*
1249 * re-enqueue LAG control plane packets to buffering
1250 * ring if transmission fails so the packet isn't lost.
1251 */
1252 if (slave_tx_count != 1)
1253 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1254 }
1255 }
1256
1257 skip_tx_ring:
1258 if (unlikely(nb_bufs == 0))
1259 return 0;
1260
1261 dist_slave_count = 0;
1262 for (i = 0; i < slave_count; i++) {
1263 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1264
1265 if (ACTOR_STATE(port, DISTRIBUTING))
1266 dist_slave_port_ids[dist_slave_count++] =
1267 slave_port_ids[i];
1268 }
1269
1270 if (unlikely(dist_slave_count < 1))
1271 return 0;
1272
1273 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1274 dist_slave_count);
1275 }
1276
1277 static uint16_t
bond_ethdev_tx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1278 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1279 uint16_t nb_bufs)
1280 {
1281 return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1282 }
1283
1284 static uint16_t
bond_ethdev_tx_burst_8023ad_fast_queue(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1286 uint16_t nb_bufs)
1287 {
1288 return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1289 }
1290
1291 static uint16_t
bond_ethdev_tx_burst_broadcast(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)1292 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1293 uint16_t nb_pkts)
1294 {
1295 struct bond_dev_private *internals;
1296 struct bond_tx_queue *bd_tx_q;
1297
1298 uint16_t slaves[RTE_MAX_ETHPORTS];
1299 uint8_t tx_failed_flag = 0;
1300 uint16_t num_of_slaves;
1301
1302 uint16_t max_nb_of_tx_pkts = 0;
1303
1304 int slave_tx_total[RTE_MAX_ETHPORTS];
1305 int i, most_successful_tx_slave = -1;
1306
1307 bd_tx_q = (struct bond_tx_queue *)queue;
1308 internals = bd_tx_q->dev_private;
1309
1310 /* Copy slave list to protect against slave up/down changes during tx
1311 * bursting */
1312 num_of_slaves = internals->active_slave_count;
1313 memcpy(slaves, internals->active_slaves,
1314 sizeof(internals->active_slaves[0]) * num_of_slaves);
1315
1316 if (num_of_slaves < 1)
1317 return 0;
1318
1319 /* Increment reference count on mbufs */
1320 for (i = 0; i < nb_pkts; i++)
1321 rte_pktmbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1322
1323 /* Transmit burst on each active slave */
1324 for (i = 0; i < num_of_slaves; i++) {
1325 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1326 bufs, nb_pkts);
1327
1328 if (unlikely(slave_tx_total[i] < nb_pkts))
1329 tx_failed_flag = 1;
1330
1331 /* record the value and slave index for the slave which transmits the
1332 * maximum number of packets */
1333 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1334 max_nb_of_tx_pkts = slave_tx_total[i];
1335 most_successful_tx_slave = i;
1336 }
1337 }
1338
1339 /* if slaves fail to transmit packets from burst, the calling application
1340 * is not expected to know about multiple references to packets so we must
1341 * handle failures of all packets except those of the most successful slave
1342 */
1343 if (unlikely(tx_failed_flag))
1344 for (i = 0; i < num_of_slaves; i++)
1345 if (i != most_successful_tx_slave)
1346 while (slave_tx_total[i] < nb_pkts)
1347 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1348
1349 return max_nb_of_tx_pkts;
1350 }
1351
1352 static void
link_properties_set(struct rte_eth_dev * ethdev,struct rte_eth_link * slave_link)1353 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1354 {
1355 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1356
1357 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1358 /**
1359 * If in mode 4 then save the link properties of the first
1360 * slave, all subsequent slaves must match these properties
1361 */
1362 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1363
1364 bond_link->link_autoneg = slave_link->link_autoneg;
1365 bond_link->link_duplex = slave_link->link_duplex;
1366 bond_link->link_speed = slave_link->link_speed;
1367 } else {
1368 /**
1369 * In any other mode the link properties are set to default
1370 * values of AUTONEG/DUPLEX
1371 */
1372 ethdev->data->dev_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
1373 ethdev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1374 }
1375 }
1376
1377 static int
link_properties_valid(struct rte_eth_dev * ethdev,struct rte_eth_link * slave_link)1378 link_properties_valid(struct rte_eth_dev *ethdev,
1379 struct rte_eth_link *slave_link)
1380 {
1381 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1382
1383 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1384 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1385
1386 if (bond_link->link_duplex != slave_link->link_duplex ||
1387 bond_link->link_autoneg != slave_link->link_autoneg ||
1388 bond_link->link_speed != slave_link->link_speed)
1389 return -1;
1390 }
1391
1392 return 0;
1393 }
1394
1395 int
mac_address_get(struct rte_eth_dev * eth_dev,struct rte_ether_addr * dst_mac_addr)1396 mac_address_get(struct rte_eth_dev *eth_dev,
1397 struct rte_ether_addr *dst_mac_addr)
1398 {
1399 struct rte_ether_addr *mac_addr;
1400
1401 if (eth_dev == NULL) {
1402 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1403 return -1;
1404 }
1405
1406 if (dst_mac_addr == NULL) {
1407 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1408 return -1;
1409 }
1410
1411 mac_addr = eth_dev->data->mac_addrs;
1412
1413 rte_ether_addr_copy(mac_addr, dst_mac_addr);
1414 return 0;
1415 }
1416
1417 int
mac_address_set(struct rte_eth_dev * eth_dev,struct rte_ether_addr * new_mac_addr)1418 mac_address_set(struct rte_eth_dev *eth_dev,
1419 struct rte_ether_addr *new_mac_addr)
1420 {
1421 struct rte_ether_addr *mac_addr;
1422
1423 if (eth_dev == NULL) {
1424 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1425 return -1;
1426 }
1427
1428 if (new_mac_addr == NULL) {
1429 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1430 return -1;
1431 }
1432
1433 mac_addr = eth_dev->data->mac_addrs;
1434
1435 /* If new MAC is different to current MAC then update */
1436 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1437 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1438
1439 return 0;
1440 }
1441
1442 static const struct rte_ether_addr null_mac_addr;
1443
1444 /*
1445 * Add additional MAC addresses to the slave
1446 */
1447 int
slave_add_mac_addresses(struct rte_eth_dev * bonded_eth_dev,uint16_t slave_port_id)1448 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1449 uint16_t slave_port_id)
1450 {
1451 int i, ret;
1452 struct rte_ether_addr *mac_addr;
1453
1454 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1455 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1456 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1457 break;
1458
1459 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1460 if (ret < 0) {
1461 /* rollback */
1462 for (i--; i > 0; i--)
1463 rte_eth_dev_mac_addr_remove(slave_port_id,
1464 &bonded_eth_dev->data->mac_addrs[i]);
1465 return ret;
1466 }
1467 }
1468
1469 return 0;
1470 }
1471
1472 /*
1473 * Remove additional MAC addresses from the slave
1474 */
1475 int
slave_remove_mac_addresses(struct rte_eth_dev * bonded_eth_dev,uint16_t slave_port_id)1476 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1477 uint16_t slave_port_id)
1478 {
1479 int i, rc, ret;
1480 struct rte_ether_addr *mac_addr;
1481
1482 rc = 0;
1483 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1484 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1485 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1486 break;
1487
1488 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1489 /* save only the first error */
1490 if (ret < 0 && rc == 0)
1491 rc = ret;
1492 }
1493
1494 return rc;
1495 }
1496
1497 int
mac_address_slaves_update(struct rte_eth_dev * bonded_eth_dev)1498 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1499 {
1500 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1501 bool set;
1502 int i;
1503
1504 /* Update slave devices MAC addresses */
1505 if (internals->slave_count < 1)
1506 return -1;
1507
1508 switch (internals->mode) {
1509 case BONDING_MODE_ROUND_ROBIN:
1510 case BONDING_MODE_BALANCE:
1511 case BONDING_MODE_BROADCAST:
1512 for (i = 0; i < internals->slave_count; i++) {
1513 if (rte_eth_dev_default_mac_addr_set(
1514 internals->slaves[i].port_id,
1515 bonded_eth_dev->data->mac_addrs)) {
1516 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1517 internals->slaves[i].port_id);
1518 return -1;
1519 }
1520 }
1521 break;
1522 case BONDING_MODE_8023AD:
1523 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1524 break;
1525 case BONDING_MODE_ACTIVE_BACKUP:
1526 case BONDING_MODE_TLB:
1527 case BONDING_MODE_ALB:
1528 default:
1529 set = true;
1530 for (i = 0; i < internals->slave_count; i++) {
1531 if (internals->slaves[i].port_id ==
1532 internals->current_primary_port) {
1533 if (rte_eth_dev_default_mac_addr_set(
1534 internals->current_primary_port,
1535 bonded_eth_dev->data->mac_addrs)) {
1536 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1537 internals->current_primary_port);
1538 set = false;
1539 }
1540 } else {
1541 if (rte_eth_dev_default_mac_addr_set(
1542 internals->slaves[i].port_id,
1543 &internals->slaves[i].persisted_mac_addr)) {
1544 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1545 internals->slaves[i].port_id);
1546 }
1547 }
1548 }
1549 if (!set)
1550 return -1;
1551 }
1552
1553 return 0;
1554 }
1555
1556 int
bond_ethdev_mode_set(struct rte_eth_dev * eth_dev,uint8_t mode)1557 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, uint8_t mode)
1558 {
1559 struct bond_dev_private *internals;
1560
1561 internals = eth_dev->data->dev_private;
1562
1563 switch (mode) {
1564 case BONDING_MODE_ROUND_ROBIN:
1565 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1566 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1567 break;
1568 case BONDING_MODE_ACTIVE_BACKUP:
1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1571 break;
1572 case BONDING_MODE_BALANCE:
1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1575 break;
1576 case BONDING_MODE_BROADCAST:
1577 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1578 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 break;
1580 case BONDING_MODE_8023AD:
1581 if (bond_mode_8023ad_enable(eth_dev) != 0)
1582 return -1;
1583
1584 if (internals->mode4.dedicated_queues.enabled == 0) {
1585 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1586 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1587 RTE_BOND_LOG(WARNING,
1588 "Using mode 4, it is necessary to do TX burst "
1589 "and RX burst at least every 100ms.");
1590 } else {
1591 /* Use flow director's optimization */
1592 eth_dev->rx_pkt_burst =
1593 bond_ethdev_rx_burst_8023ad_fast_queue;
1594 eth_dev->tx_pkt_burst =
1595 bond_ethdev_tx_burst_8023ad_fast_queue;
1596 }
1597 break;
1598 case BONDING_MODE_TLB:
1599 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1600 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1601 break;
1602 case BONDING_MODE_ALB:
1603 if (bond_mode_alb_enable(eth_dev) != 0)
1604 return -1;
1605
1606 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1607 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1608 break;
1609 default:
1610 return -1;
1611 }
1612
1613 internals->mode = mode;
1614
1615 return 0;
1616 }
1617
1618
1619 static int
slave_configure_slow_queue(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1620 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1621 struct rte_eth_dev *slave_eth_dev)
1622 {
1623 int errval = 0;
1624 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1625 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1626
1627 if (port->slow_pool == NULL) {
1628 char mem_name[256];
1629 int slave_id = slave_eth_dev->data->port_id;
1630
1631 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1632 slave_id);
1633 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1634 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1635 slave_eth_dev->data->numa_node);
1636
1637 /* Any memory allocation failure in initialization is critical because
1638 * resources can't be free, so reinitialization is impossible. */
1639 if (port->slow_pool == NULL) {
1640 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1641 slave_id, mem_name, rte_strerror(rte_errno));
1642 }
1643 }
1644
1645 if (internals->mode4.dedicated_queues.enabled == 1) {
1646 /* Configure slow Rx queue */
1647
1648 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1649 internals->mode4.dedicated_queues.rx_qid, 128,
1650 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1651 NULL, port->slow_pool);
1652 if (errval != 0) {
1653 RTE_BOND_LOG(ERR,
1654 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1655 slave_eth_dev->data->port_id,
1656 internals->mode4.dedicated_queues.rx_qid,
1657 errval);
1658 return errval;
1659 }
1660
1661 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1662 internals->mode4.dedicated_queues.tx_qid, 512,
1663 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1664 NULL);
1665 if (errval != 0) {
1666 RTE_BOND_LOG(ERR,
1667 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1668 slave_eth_dev->data->port_id,
1669 internals->mode4.dedicated_queues.tx_qid,
1670 errval);
1671 return errval;
1672 }
1673 }
1674 return 0;
1675 }
1676
1677 int
slave_configure(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1678 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1679 struct rte_eth_dev *slave_eth_dev)
1680 {
1681 uint16_t nb_rx_queues;
1682 uint16_t nb_tx_queues;
1683
1684 int errval;
1685
1686 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1687
1688 /* Stop slave */
1689 errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1690 if (errval != 0)
1691 RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1692 slave_eth_dev->data->port_id, errval);
1693
1694 /* Enable interrupts on slave device if supported */
1695 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1696 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1697
1698 /* If RSS is enabled for bonding, try to enable it for slaves */
1699 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
1700 /* rss_key won't be empty if RSS is configured in bonded dev */
1701 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1702 internals->rss_key_len;
1703 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1704 internals->rss_key;
1705
1706 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1707 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1708 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1709 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1710 }
1711
1712 slave_eth_dev->data->dev_conf.rxmode.mtu =
1713 bonded_eth_dev->data->dev_conf.rxmode.mtu;
1714
1715 slave_eth_dev->data->dev_conf.txmode.offloads |=
1716 bonded_eth_dev->data->dev_conf.txmode.offloads;
1717
1718 slave_eth_dev->data->dev_conf.txmode.offloads &=
1719 (bonded_eth_dev->data->dev_conf.txmode.offloads |
1720 ~internals->tx_offload_capa);
1721
1722 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1723 bonded_eth_dev->data->dev_conf.rxmode.offloads;
1724
1725 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1726 (bonded_eth_dev->data->dev_conf.rxmode.offloads |
1727 ~internals->rx_offload_capa);
1728
1729
1730 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1731 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1732
1733 if (internals->mode == BONDING_MODE_8023AD) {
1734 if (internals->mode4.dedicated_queues.enabled == 1) {
1735 nb_rx_queues++;
1736 nb_tx_queues++;
1737 }
1738 }
1739
1740 /* Configure device */
1741 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1742 nb_rx_queues, nb_tx_queues,
1743 &(slave_eth_dev->data->dev_conf));
1744 if (errval != 0) {
1745 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1746 slave_eth_dev->data->port_id, errval);
1747 return errval;
1748 }
1749
1750 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1751 bonded_eth_dev->data->mtu);
1752 if (errval != 0 && errval != -ENOTSUP) {
1753 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1754 slave_eth_dev->data->port_id, errval);
1755 return errval;
1756 }
1757 return 0;
1758 }
1759
1760 int
slave_start(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1761 slave_start(struct rte_eth_dev *bonded_eth_dev,
1762 struct rte_eth_dev *slave_eth_dev)
1763 {
1764 int errval = 0;
1765 struct bond_rx_queue *bd_rx_q;
1766 struct bond_tx_queue *bd_tx_q;
1767 uint16_t q_id;
1768 struct rte_flow_error flow_error;
1769 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1770
1771 /* Setup Rx Queues */
1772 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1773 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1774
1775 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1776 bd_rx_q->nb_rx_desc,
1777 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1778 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1779 if (errval != 0) {
1780 RTE_BOND_LOG(ERR,
1781 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1782 slave_eth_dev->data->port_id, q_id, errval);
1783 return errval;
1784 }
1785 }
1786
1787 /* Setup Tx Queues */
1788 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1789 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1790
1791 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1792 bd_tx_q->nb_tx_desc,
1793 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1794 &bd_tx_q->tx_conf);
1795 if (errval != 0) {
1796 RTE_BOND_LOG(ERR,
1797 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1798 slave_eth_dev->data->port_id, q_id, errval);
1799 return errval;
1800 }
1801 }
1802
1803 if (internals->mode == BONDING_MODE_8023AD &&
1804 internals->mode4.dedicated_queues.enabled == 1) {
1805 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1806 != 0)
1807 return errval;
1808
1809 errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1810 slave_eth_dev->data->port_id);
1811 if (errval != 0) {
1812 RTE_BOND_LOG(ERR,
1813 "bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1814 slave_eth_dev->data->port_id, errval);
1815 return errval;
1816 }
1817
1818 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL) {
1819 errval = rte_flow_destroy(slave_eth_dev->data->port_id,
1820 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1821 &flow_error);
1822 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_destroy: port=%d, err (%d)",
1823 slave_eth_dev->data->port_id, errval);
1824 }
1825
1826 errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1827 slave_eth_dev->data->port_id);
1828 if (errval != 0) {
1829 RTE_BOND_LOG(ERR,
1830 "bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1831 slave_eth_dev->data->port_id, errval);
1832 return errval;
1833 }
1834 }
1835
1836 /* Start device */
1837 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1838 if (errval != 0) {
1839 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1840 slave_eth_dev->data->port_id, errval);
1841 return -1;
1842 }
1843
1844 /* If RSS is enabled for bonding, synchronize RETA */
1845 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
1846 int i;
1847 struct bond_dev_private *internals;
1848
1849 internals = bonded_eth_dev->data->dev_private;
1850
1851 for (i = 0; i < internals->slave_count; i++) {
1852 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1853 errval = rte_eth_dev_rss_reta_update(
1854 slave_eth_dev->data->port_id,
1855 &internals->reta_conf[0],
1856 internals->slaves[i].reta_size);
1857 if (errval != 0) {
1858 RTE_BOND_LOG(WARNING,
1859 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1860 " RSS Configuration for bonding may be inconsistent.",
1861 slave_eth_dev->data->port_id, errval);
1862 }
1863 break;
1864 }
1865 }
1866 }
1867
1868 /* If lsc interrupt is set, check initial slave's link status */
1869 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1870 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1871 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1872 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1873 NULL);
1874 }
1875
1876 return 0;
1877 }
1878
1879 void
slave_remove(struct bond_dev_private * internals,struct rte_eth_dev * slave_eth_dev)1880 slave_remove(struct bond_dev_private *internals,
1881 struct rte_eth_dev *slave_eth_dev)
1882 {
1883 uint16_t i;
1884
1885 for (i = 0; i < internals->slave_count; i++)
1886 if (internals->slaves[i].port_id ==
1887 slave_eth_dev->data->port_id)
1888 break;
1889
1890 if (i < (internals->slave_count - 1)) {
1891 struct rte_flow *flow;
1892
1893 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1894 sizeof(internals->slaves[0]) *
1895 (internals->slave_count - i - 1));
1896 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1897 memmove(&flow->flows[i], &flow->flows[i + 1],
1898 sizeof(flow->flows[0]) *
1899 (internals->slave_count - i - 1));
1900 flow->flows[internals->slave_count - 1] = NULL;
1901 }
1902 }
1903
1904 internals->slave_count--;
1905
1906 /* force reconfiguration of slave interfaces */
1907 rte_eth_dev_internal_reset(slave_eth_dev);
1908 }
1909
1910 static void
1911 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1912
1913 void
slave_add(struct bond_dev_private * internals,struct rte_eth_dev * slave_eth_dev)1914 slave_add(struct bond_dev_private *internals,
1915 struct rte_eth_dev *slave_eth_dev)
1916 {
1917 struct bond_slave_details *slave_details =
1918 &internals->slaves[internals->slave_count];
1919
1920 slave_details->port_id = slave_eth_dev->data->port_id;
1921 slave_details->last_link_status = 0;
1922
1923 /* Mark slave devices that don't support interrupts so we can
1924 * compensate when we start the bond
1925 */
1926 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1927 slave_details->link_status_poll_enabled = 1;
1928 }
1929
1930 slave_details->link_status_wait_to_complete = 0;
1931 /* clean tlb_last_obytes when adding port for bonding device */
1932 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1933 sizeof(struct rte_ether_addr));
1934 }
1935
1936 void
bond_ethdev_primary_set(struct bond_dev_private * internals,uint16_t slave_port_id)1937 bond_ethdev_primary_set(struct bond_dev_private *internals,
1938 uint16_t slave_port_id)
1939 {
1940 int i;
1941
1942 if (internals->active_slave_count < 1)
1943 internals->current_primary_port = slave_port_id;
1944 else
1945 /* Search bonded device slave ports for new proposed primary port */
1946 for (i = 0; i < internals->active_slave_count; i++) {
1947 if (internals->active_slaves[i] == slave_port_id)
1948 internals->current_primary_port = slave_port_id;
1949 }
1950 }
1951
1952 static int
1953 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1954
1955 static int
bond_ethdev_start(struct rte_eth_dev * eth_dev)1956 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1957 {
1958 struct bond_dev_private *internals;
1959 int i;
1960
1961 /* slave eth dev will be started by bonded device */
1962 if (check_for_bonded_ethdev(eth_dev)) {
1963 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1964 eth_dev->data->port_id);
1965 return -1;
1966 }
1967
1968 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
1969 eth_dev->data->dev_started = 1;
1970
1971 internals = eth_dev->data->dev_private;
1972
1973 if (internals->slave_count == 0) {
1974 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1975 goto out_err;
1976 }
1977
1978 if (internals->user_defined_mac == 0) {
1979 struct rte_ether_addr *new_mac_addr = NULL;
1980
1981 for (i = 0; i < internals->slave_count; i++)
1982 if (internals->slaves[i].port_id == internals->primary_port)
1983 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1984
1985 if (new_mac_addr == NULL)
1986 goto out_err;
1987
1988 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1989 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1990 eth_dev->data->port_id);
1991 goto out_err;
1992 }
1993 }
1994
1995 if (internals->mode == BONDING_MODE_8023AD) {
1996 if (internals->mode4.dedicated_queues.enabled == 1) {
1997 internals->mode4.dedicated_queues.rx_qid =
1998 eth_dev->data->nb_rx_queues;
1999 internals->mode4.dedicated_queues.tx_qid =
2000 eth_dev->data->nb_tx_queues;
2001 }
2002 }
2003
2004
2005 /* Reconfigure each slave device if starting bonded device */
2006 for (i = 0; i < internals->slave_count; i++) {
2007 struct rte_eth_dev *slave_ethdev =
2008 &(rte_eth_devices[internals->slaves[i].port_id]);
2009 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2010 RTE_BOND_LOG(ERR,
2011 "bonded port (%d) failed to reconfigure slave device (%d)",
2012 eth_dev->data->port_id,
2013 internals->slaves[i].port_id);
2014 goto out_err;
2015 }
2016 if (slave_start(eth_dev, slave_ethdev) != 0) {
2017 RTE_BOND_LOG(ERR,
2018 "bonded port (%d) failed to start slave device (%d)",
2019 eth_dev->data->port_id,
2020 internals->slaves[i].port_id);
2021 goto out_err;
2022 }
2023 /* We will need to poll for link status if any slave doesn't
2024 * support interrupts
2025 */
2026 if (internals->slaves[i].link_status_poll_enabled)
2027 internals->link_status_polling_enabled = 1;
2028 }
2029
2030 /* start polling if needed */
2031 if (internals->link_status_polling_enabled) {
2032 rte_eal_alarm_set(
2033 internals->link_status_polling_interval_ms * 1000,
2034 bond_ethdev_slave_link_status_change_monitor,
2035 (void *)&rte_eth_devices[internals->port_id]);
2036 }
2037
2038 /* Update all slave devices MACs*/
2039 if (mac_address_slaves_update(eth_dev) != 0)
2040 goto out_err;
2041
2042 if (internals->user_defined_primary_port)
2043 bond_ethdev_primary_set(internals, internals->primary_port);
2044
2045 if (internals->mode == BONDING_MODE_8023AD)
2046 bond_mode_8023ad_start(eth_dev);
2047
2048 if (internals->mode == BONDING_MODE_TLB ||
2049 internals->mode == BONDING_MODE_ALB)
2050 bond_tlb_enable(internals);
2051
2052 return 0;
2053
2054 out_err:
2055 eth_dev->data->dev_started = 0;
2056 return -1;
2057 }
2058
2059 static void
bond_ethdev_free_queues(struct rte_eth_dev * dev)2060 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2061 {
2062 uint16_t i;
2063
2064 if (dev->data->rx_queues != NULL) {
2065 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2066 rte_free(dev->data->rx_queues[i]);
2067 dev->data->rx_queues[i] = NULL;
2068 }
2069 dev->data->nb_rx_queues = 0;
2070 }
2071
2072 if (dev->data->tx_queues != NULL) {
2073 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2074 rte_free(dev->data->tx_queues[i]);
2075 dev->data->tx_queues[i] = NULL;
2076 }
2077 dev->data->nb_tx_queues = 0;
2078 }
2079 }
2080
2081 int
bond_ethdev_stop(struct rte_eth_dev * eth_dev)2082 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2083 {
2084 struct bond_dev_private *internals = eth_dev->data->dev_private;
2085 uint16_t i;
2086 int ret;
2087
2088 if (internals->mode == BONDING_MODE_8023AD) {
2089 struct port *port;
2090 void *pkt = NULL;
2091
2092 bond_mode_8023ad_stop(eth_dev);
2093
2094 /* Discard all messages to/from mode 4 state machines */
2095 for (i = 0; i < internals->active_slave_count; i++) {
2096 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2097
2098 RTE_ASSERT(port->rx_ring != NULL);
2099 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2100 rte_pktmbuf_free(pkt);
2101
2102 RTE_ASSERT(port->tx_ring != NULL);
2103 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2104 rte_pktmbuf_free(pkt);
2105 }
2106 }
2107
2108 if (internals->mode == BONDING_MODE_TLB ||
2109 internals->mode == BONDING_MODE_ALB) {
2110 bond_tlb_disable(internals);
2111 for (i = 0; i < internals->active_slave_count; i++)
2112 tlb_last_obytets[internals->active_slaves[i]] = 0;
2113 }
2114
2115 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2116 eth_dev->data->dev_started = 0;
2117
2118 internals->link_status_polling_enabled = 0;
2119 for (i = 0; i < internals->slave_count; i++) {
2120 uint16_t slave_id = internals->slaves[i].port_id;
2121
2122 internals->slaves[i].last_link_status = 0;
2123 ret = rte_eth_dev_stop(slave_id);
2124 if (ret != 0) {
2125 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2126 slave_id);
2127 return ret;
2128 }
2129
2130 /* active slaves need to be deactivated. */
2131 if (find_slave_by_id(internals->active_slaves,
2132 internals->active_slave_count, slave_id) !=
2133 internals->active_slave_count)
2134 deactivate_slave(eth_dev, slave_id);
2135 }
2136
2137 return 0;
2138 }
2139
2140 int
bond_ethdev_close(struct rte_eth_dev * dev)2141 bond_ethdev_close(struct rte_eth_dev *dev)
2142 {
2143 struct bond_dev_private *internals = dev->data->dev_private;
2144 uint16_t bond_port_id = internals->port_id;
2145 int skipped = 0;
2146 struct rte_flow_error ferror;
2147
2148 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2149 return 0;
2150
2151 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2152 while (internals->slave_count != skipped) {
2153 uint16_t port_id = internals->slaves[skipped].port_id;
2154
2155 if (rte_eth_dev_stop(port_id) != 0) {
2156 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2157 port_id);
2158 skipped++;
2159 continue;
2160 }
2161
2162 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2163 RTE_BOND_LOG(ERR,
2164 "Failed to remove port %d from bonded device %s",
2165 port_id, dev->device->name);
2166 skipped++;
2167 }
2168 }
2169 bond_flow_ops.flush(dev, &ferror);
2170 bond_ethdev_free_queues(dev);
2171 rte_bitmap_reset(internals->vlan_filter_bmp);
2172 rte_bitmap_free(internals->vlan_filter_bmp);
2173 rte_free(internals->vlan_filter_bmpmem);
2174
2175 /* Try to release mempool used in mode6. If the bond
2176 * device is not mode6, free the NULL is not problem.
2177 */
2178 rte_mempool_free(internals->mode6.mempool);
2179
2180 rte_kvargs_free(internals->kvlist);
2181
2182 return 0;
2183 }
2184
2185 /* forward declaration */
2186 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2187
2188 static int
bond_ethdev_info(struct rte_eth_dev * dev,struct rte_eth_dev_info * dev_info)2189 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2190 {
2191 struct bond_dev_private *internals = dev->data->dev_private;
2192 struct bond_slave_details slave;
2193 int ret;
2194
2195 uint16_t max_nb_rx_queues = UINT16_MAX;
2196 uint16_t max_nb_tx_queues = UINT16_MAX;
2197 uint16_t max_rx_desc_lim = UINT16_MAX;
2198 uint16_t max_tx_desc_lim = UINT16_MAX;
2199
2200 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2201
2202 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2203 internals->candidate_max_rx_pktlen :
2204 RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2205
2206 /* Max number of tx/rx queues that the bonded device can support is the
2207 * minimum values of the bonded slaves, as all slaves must be capable
2208 * of supporting the same number of tx/rx queues.
2209 */
2210 if (internals->slave_count > 0) {
2211 struct rte_eth_dev_info slave_info;
2212 uint16_t idx;
2213
2214 for (idx = 0; idx < internals->slave_count; idx++) {
2215 slave = internals->slaves[idx];
2216 ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2217 if (ret != 0) {
2218 RTE_BOND_LOG(ERR,
2219 "%s: Error during getting device (port %u) info: %s\n",
2220 __func__,
2221 slave.port_id,
2222 strerror(-ret));
2223
2224 return ret;
2225 }
2226
2227 if (slave_info.max_rx_queues < max_nb_rx_queues)
2228 max_nb_rx_queues = slave_info.max_rx_queues;
2229
2230 if (slave_info.max_tx_queues < max_nb_tx_queues)
2231 max_nb_tx_queues = slave_info.max_tx_queues;
2232
2233 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2234 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2235
2236 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2237 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2238 }
2239 }
2240
2241 dev_info->max_rx_queues = max_nb_rx_queues;
2242 dev_info->max_tx_queues = max_nb_tx_queues;
2243
2244 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2245 sizeof(dev_info->default_rxconf));
2246 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2247 sizeof(dev_info->default_txconf));
2248
2249 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2250 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2251
2252 /**
2253 * If dedicated hw queues enabled for link bonding device in LACP mode
2254 * then we need to reduce the maximum number of data path queues by 1.
2255 */
2256 if (internals->mode == BONDING_MODE_8023AD &&
2257 internals->mode4.dedicated_queues.enabled == 1) {
2258 dev_info->max_rx_queues--;
2259 dev_info->max_tx_queues--;
2260 }
2261
2262 dev_info->min_rx_bufsize = 0;
2263
2264 dev_info->rx_offload_capa = internals->rx_offload_capa;
2265 dev_info->tx_offload_capa = internals->tx_offload_capa;
2266 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2267 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2268 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2269
2270 dev_info->reta_size = internals->reta_size;
2271 dev_info->hash_key_size = internals->rss_key_len;
2272
2273 return 0;
2274 }
2275
2276 static int
bond_ethdev_vlan_filter_set(struct rte_eth_dev * dev,uint16_t vlan_id,int on)2277 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2278 {
2279 int res;
2280 uint16_t i;
2281 struct bond_dev_private *internals = dev->data->dev_private;
2282
2283 /* don't do this while a slave is being added */
2284 rte_spinlock_lock(&internals->lock);
2285
2286 if (on)
2287 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2288 else
2289 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2290
2291 for (i = 0; i < internals->slave_count; i++) {
2292 uint16_t port_id = internals->slaves[i].port_id;
2293
2294 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2295 if (res == ENOTSUP)
2296 RTE_BOND_LOG(WARNING,
2297 "Setting VLAN filter on slave port %u not supported.",
2298 port_id);
2299 }
2300
2301 rte_spinlock_unlock(&internals->lock);
2302 return 0;
2303 }
2304
2305 static int
bond_ethdev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t rx_queue_id,uint16_t nb_rx_desc,unsigned int socket_id __rte_unused,const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mb_pool)2306 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2307 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2308 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2309 {
2310 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2311 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2312 0, dev->data->numa_node);
2313 if (bd_rx_q == NULL)
2314 return -1;
2315
2316 bd_rx_q->queue_id = rx_queue_id;
2317 bd_rx_q->dev_private = dev->data->dev_private;
2318
2319 bd_rx_q->nb_rx_desc = nb_rx_desc;
2320
2321 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2322 bd_rx_q->mb_pool = mb_pool;
2323
2324 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2325
2326 return 0;
2327 }
2328
2329 static int
bond_ethdev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t tx_queue_id,uint16_t nb_tx_desc,unsigned int socket_id __rte_unused,const struct rte_eth_txconf * tx_conf)2330 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2331 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2332 const struct rte_eth_txconf *tx_conf)
2333 {
2334 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2335 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2336 0, dev->data->numa_node);
2337
2338 if (bd_tx_q == NULL)
2339 return -1;
2340
2341 bd_tx_q->queue_id = tx_queue_id;
2342 bd_tx_q->dev_private = dev->data->dev_private;
2343
2344 bd_tx_q->nb_tx_desc = nb_tx_desc;
2345 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2346
2347 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2348
2349 return 0;
2350 }
2351
2352 static void
bond_ethdev_rx_queue_release(struct rte_eth_dev * dev,uint16_t queue_id)2353 bond_ethdev_rx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2354 {
2355 void *queue = dev->data->rx_queues[queue_id];
2356
2357 if (queue == NULL)
2358 return;
2359
2360 rte_free(queue);
2361 }
2362
2363 static void
bond_ethdev_tx_queue_release(struct rte_eth_dev * dev,uint16_t queue_id)2364 bond_ethdev_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2365 {
2366 void *queue = dev->data->tx_queues[queue_id];
2367
2368 if (queue == NULL)
2369 return;
2370
2371 rte_free(queue);
2372 }
2373
2374 static void
bond_ethdev_slave_link_status_change_monitor(void * cb_arg)2375 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2376 {
2377 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2378 struct bond_dev_private *internals;
2379
2380 /* Default value for polling slave found is true as we don't want to
2381 * disable the polling thread if we cannot get the lock */
2382 int i, polling_slave_found = 1;
2383
2384 if (cb_arg == NULL)
2385 return;
2386
2387 bonded_ethdev = cb_arg;
2388 internals = bonded_ethdev->data->dev_private;
2389
2390 if (!bonded_ethdev->data->dev_started ||
2391 !internals->link_status_polling_enabled)
2392 return;
2393
2394 /* If device is currently being configured then don't check slaves link
2395 * status, wait until next period */
2396 if (rte_spinlock_trylock(&internals->lock)) {
2397 if (internals->slave_count > 0)
2398 polling_slave_found = 0;
2399
2400 for (i = 0; i < internals->slave_count; i++) {
2401 if (!internals->slaves[i].link_status_poll_enabled)
2402 continue;
2403
2404 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2405 polling_slave_found = 1;
2406
2407 /* Update slave link status */
2408 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2409 internals->slaves[i].link_status_wait_to_complete);
2410
2411 /* if link status has changed since last checked then call lsc
2412 * event callback */
2413 if (slave_ethdev->data->dev_link.link_status !=
2414 internals->slaves[i].last_link_status) {
2415 internals->slaves[i].last_link_status =
2416 slave_ethdev->data->dev_link.link_status;
2417
2418 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2419 RTE_ETH_EVENT_INTR_LSC,
2420 &bonded_ethdev->data->port_id,
2421 NULL);
2422 }
2423 }
2424 rte_spinlock_unlock(&internals->lock);
2425 }
2426
2427 if (polling_slave_found)
2428 /* Set alarm to continue monitoring link status of slave ethdev's */
2429 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2430 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2431 }
2432
2433 static int
bond_ethdev_link_update(struct rte_eth_dev * ethdev,int wait_to_complete)2434 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2435 {
2436 int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2437
2438 struct bond_dev_private *bond_ctx;
2439 struct rte_eth_link slave_link;
2440
2441 bool one_link_update_succeeded;
2442 uint32_t idx;
2443 int ret;
2444
2445 bond_ctx = ethdev->data->dev_private;
2446
2447 ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2448
2449 if (ethdev->data->dev_started == 0 ||
2450 bond_ctx->active_slave_count == 0) {
2451 ethdev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2452 return 0;
2453 }
2454
2455 ethdev->data->dev_link.link_status = RTE_ETH_LINK_UP;
2456
2457 if (wait_to_complete)
2458 link_update = rte_eth_link_get;
2459 else
2460 link_update = rte_eth_link_get_nowait;
2461
2462 switch (bond_ctx->mode) {
2463 case BONDING_MODE_BROADCAST:
2464 /**
2465 * Setting link speed to UINT32_MAX to ensure we pick up the
2466 * value of the first active slave
2467 */
2468 ethdev->data->dev_link.link_speed = UINT32_MAX;
2469
2470 /**
2471 * link speed is minimum value of all the slaves link speed as
2472 * packet loss will occur on this slave if transmission at rates
2473 * greater than this are attempted
2474 */
2475 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2476 ret = link_update(bond_ctx->active_slaves[idx],
2477 &slave_link);
2478 if (ret < 0) {
2479 ethdev->data->dev_link.link_speed =
2480 RTE_ETH_SPEED_NUM_NONE;
2481 RTE_BOND_LOG(ERR,
2482 "Slave (port %u) link get failed: %s",
2483 bond_ctx->active_slaves[idx],
2484 rte_strerror(-ret));
2485 return 0;
2486 }
2487
2488 if (slave_link.link_speed <
2489 ethdev->data->dev_link.link_speed)
2490 ethdev->data->dev_link.link_speed =
2491 slave_link.link_speed;
2492 }
2493 break;
2494 case BONDING_MODE_ACTIVE_BACKUP:
2495 /* Current primary slave */
2496 ret = link_update(bond_ctx->current_primary_port, &slave_link);
2497 if (ret < 0) {
2498 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2499 bond_ctx->current_primary_port,
2500 rte_strerror(-ret));
2501 return 0;
2502 }
2503
2504 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2505 break;
2506 case BONDING_MODE_8023AD:
2507 ethdev->data->dev_link.link_autoneg =
2508 bond_ctx->mode4.slave_link.link_autoneg;
2509 ethdev->data->dev_link.link_duplex =
2510 bond_ctx->mode4.slave_link.link_duplex;
2511 /* fall through */
2512 /* to update link speed */
2513 case BONDING_MODE_ROUND_ROBIN:
2514 case BONDING_MODE_BALANCE:
2515 case BONDING_MODE_TLB:
2516 case BONDING_MODE_ALB:
2517 default:
2518 /**
2519 * In theses mode the maximum theoretical link speed is the sum
2520 * of all the slaves
2521 */
2522 ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2523 one_link_update_succeeded = false;
2524
2525 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2526 ret = link_update(bond_ctx->active_slaves[idx],
2527 &slave_link);
2528 if (ret < 0) {
2529 RTE_BOND_LOG(ERR,
2530 "Slave (port %u) link get failed: %s",
2531 bond_ctx->active_slaves[idx],
2532 rte_strerror(-ret));
2533 continue;
2534 }
2535
2536 one_link_update_succeeded = true;
2537 ethdev->data->dev_link.link_speed +=
2538 slave_link.link_speed;
2539 }
2540
2541 if (!one_link_update_succeeded) {
2542 RTE_BOND_LOG(ERR, "All slaves link get failed");
2543 return 0;
2544 }
2545 }
2546
2547
2548 return 0;
2549 }
2550
2551
2552 static int
bond_ethdev_stats_get(struct rte_eth_dev * dev,struct rte_eth_stats * stats)2553 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2554 {
2555 struct bond_dev_private *internals = dev->data->dev_private;
2556 struct rte_eth_stats slave_stats;
2557 int i, j;
2558
2559 for (i = 0; i < internals->slave_count; i++) {
2560 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2561
2562 stats->ipackets += slave_stats.ipackets;
2563 stats->opackets += slave_stats.opackets;
2564 stats->ibytes += slave_stats.ibytes;
2565 stats->obytes += slave_stats.obytes;
2566 stats->imissed += slave_stats.imissed;
2567 stats->ierrors += slave_stats.ierrors;
2568 stats->oerrors += slave_stats.oerrors;
2569 stats->rx_nombuf += slave_stats.rx_nombuf;
2570
2571 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2572 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2573 stats->q_opackets[j] += slave_stats.q_opackets[j];
2574 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2575 stats->q_obytes[j] += slave_stats.q_obytes[j];
2576 stats->q_errors[j] += slave_stats.q_errors[j];
2577 }
2578
2579 }
2580
2581 return 0;
2582 }
2583
2584 static int
bond_ethdev_stats_reset(struct rte_eth_dev * dev)2585 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2586 {
2587 struct bond_dev_private *internals = dev->data->dev_private;
2588 int i;
2589 int err;
2590 int ret;
2591
2592 for (i = 0, err = 0; i < internals->slave_count; i++) {
2593 ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2594 if (ret != 0)
2595 err = ret;
2596 }
2597
2598 return err;
2599 }
2600
2601 static int
bond_ethdev_promiscuous_enable(struct rte_eth_dev * eth_dev)2602 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2603 {
2604 struct bond_dev_private *internals = eth_dev->data->dev_private;
2605 int i;
2606 int ret = 0;
2607 uint16_t port_id;
2608
2609 switch (internals->mode) {
2610 /* Promiscuous mode is propagated to all slaves */
2611 case BONDING_MODE_ROUND_ROBIN:
2612 case BONDING_MODE_BALANCE:
2613 case BONDING_MODE_BROADCAST:
2614 case BONDING_MODE_8023AD: {
2615 unsigned int slave_ok = 0;
2616
2617 for (i = 0; i < internals->slave_count; i++) {
2618 port_id = internals->slaves[i].port_id;
2619
2620 ret = rte_eth_promiscuous_enable(port_id);
2621 if (ret != 0)
2622 RTE_BOND_LOG(ERR,
2623 "Failed to enable promiscuous mode for port %u: %s",
2624 port_id, rte_strerror(-ret));
2625 else
2626 slave_ok++;
2627 }
2628 /*
2629 * Report success if operation is successful on at least
2630 * on one slave. Otherwise return last error code.
2631 */
2632 if (slave_ok > 0)
2633 ret = 0;
2634 break;
2635 }
2636 /* Promiscuous mode is propagated only to primary slave */
2637 case BONDING_MODE_ACTIVE_BACKUP:
2638 case BONDING_MODE_TLB:
2639 case BONDING_MODE_ALB:
2640 default:
2641 /* Do not touch promisc when there cannot be primary ports */
2642 if (internals->slave_count == 0)
2643 break;
2644 port_id = internals->current_primary_port;
2645 ret = rte_eth_promiscuous_enable(port_id);
2646 if (ret != 0)
2647 RTE_BOND_LOG(ERR,
2648 "Failed to enable promiscuous mode for port %u: %s",
2649 port_id, rte_strerror(-ret));
2650 }
2651
2652 return ret;
2653 }
2654
2655 static int
bond_ethdev_promiscuous_disable(struct rte_eth_dev * dev)2656 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2657 {
2658 struct bond_dev_private *internals = dev->data->dev_private;
2659 int i;
2660 int ret = 0;
2661 uint16_t port_id;
2662
2663 switch (internals->mode) {
2664 /* Promiscuous mode is propagated to all slaves */
2665 case BONDING_MODE_ROUND_ROBIN:
2666 case BONDING_MODE_BALANCE:
2667 case BONDING_MODE_BROADCAST:
2668 case BONDING_MODE_8023AD: {
2669 unsigned int slave_ok = 0;
2670
2671 for (i = 0; i < internals->slave_count; i++) {
2672 port_id = internals->slaves[i].port_id;
2673
2674 if (internals->mode == BONDING_MODE_8023AD &&
2675 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2676 BOND_8023AD_FORCED_PROMISC) {
2677 slave_ok++;
2678 continue;
2679 }
2680 ret = rte_eth_promiscuous_disable(port_id);
2681 if (ret != 0)
2682 RTE_BOND_LOG(ERR,
2683 "Failed to disable promiscuous mode for port %u: %s",
2684 port_id, rte_strerror(-ret));
2685 else
2686 slave_ok++;
2687 }
2688 /*
2689 * Report success if operation is successful on at least
2690 * on one slave. Otherwise return last error code.
2691 */
2692 if (slave_ok > 0)
2693 ret = 0;
2694 break;
2695 }
2696 /* Promiscuous mode is propagated only to primary slave */
2697 case BONDING_MODE_ACTIVE_BACKUP:
2698 case BONDING_MODE_TLB:
2699 case BONDING_MODE_ALB:
2700 default:
2701 /* Do not touch promisc when there cannot be primary ports */
2702 if (internals->slave_count == 0)
2703 break;
2704 port_id = internals->current_primary_port;
2705 ret = rte_eth_promiscuous_disable(port_id);
2706 if (ret != 0)
2707 RTE_BOND_LOG(ERR,
2708 "Failed to disable promiscuous mode for port %u: %s",
2709 port_id, rte_strerror(-ret));
2710 }
2711
2712 return ret;
2713 }
2714
2715 static int
bond_ethdev_promiscuous_update(struct rte_eth_dev * dev)2716 bond_ethdev_promiscuous_update(struct rte_eth_dev *dev)
2717 {
2718 struct bond_dev_private *internals = dev->data->dev_private;
2719 uint16_t port_id = internals->current_primary_port;
2720
2721 switch (internals->mode) {
2722 case BONDING_MODE_ROUND_ROBIN:
2723 case BONDING_MODE_BALANCE:
2724 case BONDING_MODE_BROADCAST:
2725 case BONDING_MODE_8023AD:
2726 /* As promiscuous mode is propagated to all slaves for these
2727 * mode, no need to update for bonding device.
2728 */
2729 break;
2730 case BONDING_MODE_ACTIVE_BACKUP:
2731 case BONDING_MODE_TLB:
2732 case BONDING_MODE_ALB:
2733 default:
2734 /* As promiscuous mode is propagated only to primary slave
2735 * for these mode. When active/standby switchover, promiscuous
2736 * mode should be set to new primary slave according to bonding
2737 * device.
2738 */
2739 if (rte_eth_promiscuous_get(internals->port_id) == 1)
2740 rte_eth_promiscuous_enable(port_id);
2741 else
2742 rte_eth_promiscuous_disable(port_id);
2743 }
2744
2745 return 0;
2746 }
2747
2748 static int
bond_ethdev_allmulticast_enable(struct rte_eth_dev * eth_dev)2749 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2750 {
2751 struct bond_dev_private *internals = eth_dev->data->dev_private;
2752 int i;
2753 int ret = 0;
2754 uint16_t port_id;
2755
2756 switch (internals->mode) {
2757 /* allmulti mode is propagated to all slaves */
2758 case BONDING_MODE_ROUND_ROBIN:
2759 case BONDING_MODE_BALANCE:
2760 case BONDING_MODE_BROADCAST:
2761 case BONDING_MODE_8023AD: {
2762 unsigned int slave_ok = 0;
2763
2764 for (i = 0; i < internals->slave_count; i++) {
2765 port_id = internals->slaves[i].port_id;
2766
2767 ret = rte_eth_allmulticast_enable(port_id);
2768 if (ret != 0)
2769 RTE_BOND_LOG(ERR,
2770 "Failed to enable allmulti mode for port %u: %s",
2771 port_id, rte_strerror(-ret));
2772 else
2773 slave_ok++;
2774 }
2775 /*
2776 * Report success if operation is successful on at least
2777 * on one slave. Otherwise return last error code.
2778 */
2779 if (slave_ok > 0)
2780 ret = 0;
2781 break;
2782 }
2783 /* allmulti mode is propagated only to primary slave */
2784 case BONDING_MODE_ACTIVE_BACKUP:
2785 case BONDING_MODE_TLB:
2786 case BONDING_MODE_ALB:
2787 default:
2788 /* Do not touch allmulti when there cannot be primary ports */
2789 if (internals->slave_count == 0)
2790 break;
2791 port_id = internals->current_primary_port;
2792 ret = rte_eth_allmulticast_enable(port_id);
2793 if (ret != 0)
2794 RTE_BOND_LOG(ERR,
2795 "Failed to enable allmulti mode for port %u: %s",
2796 port_id, rte_strerror(-ret));
2797 }
2798
2799 return ret;
2800 }
2801
2802 static int
bond_ethdev_allmulticast_disable(struct rte_eth_dev * eth_dev)2803 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2804 {
2805 struct bond_dev_private *internals = eth_dev->data->dev_private;
2806 int i;
2807 int ret = 0;
2808 uint16_t port_id;
2809
2810 switch (internals->mode) {
2811 /* allmulti mode is propagated to all slaves */
2812 case BONDING_MODE_ROUND_ROBIN:
2813 case BONDING_MODE_BALANCE:
2814 case BONDING_MODE_BROADCAST:
2815 case BONDING_MODE_8023AD: {
2816 unsigned int slave_ok = 0;
2817
2818 for (i = 0; i < internals->slave_count; i++) {
2819 uint16_t port_id = internals->slaves[i].port_id;
2820
2821 if (internals->mode == BONDING_MODE_8023AD &&
2822 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2823 BOND_8023AD_FORCED_ALLMULTI)
2824 continue;
2825
2826 ret = rte_eth_allmulticast_disable(port_id);
2827 if (ret != 0)
2828 RTE_BOND_LOG(ERR,
2829 "Failed to disable allmulti mode for port %u: %s",
2830 port_id, rte_strerror(-ret));
2831 else
2832 slave_ok++;
2833 }
2834 /*
2835 * Report success if operation is successful on at least
2836 * on one slave. Otherwise return last error code.
2837 */
2838 if (slave_ok > 0)
2839 ret = 0;
2840 break;
2841 }
2842 /* allmulti mode is propagated only to primary slave */
2843 case BONDING_MODE_ACTIVE_BACKUP:
2844 case BONDING_MODE_TLB:
2845 case BONDING_MODE_ALB:
2846 default:
2847 /* Do not touch allmulti when there cannot be primary ports */
2848 if (internals->slave_count == 0)
2849 break;
2850 port_id = internals->current_primary_port;
2851 ret = rte_eth_allmulticast_disable(port_id);
2852 if (ret != 0)
2853 RTE_BOND_LOG(ERR,
2854 "Failed to disable allmulti mode for port %u: %s",
2855 port_id, rte_strerror(-ret));
2856 }
2857
2858 return ret;
2859 }
2860
2861 static int
bond_ethdev_allmulticast_update(struct rte_eth_dev * dev)2862 bond_ethdev_allmulticast_update(struct rte_eth_dev *dev)
2863 {
2864 struct bond_dev_private *internals = dev->data->dev_private;
2865 uint16_t port_id = internals->current_primary_port;
2866
2867 switch (internals->mode) {
2868 case BONDING_MODE_ROUND_ROBIN:
2869 case BONDING_MODE_BALANCE:
2870 case BONDING_MODE_BROADCAST:
2871 case BONDING_MODE_8023AD:
2872 /* As allmulticast mode is propagated to all slaves for these
2873 * mode, no need to update for bonding device.
2874 */
2875 break;
2876 case BONDING_MODE_ACTIVE_BACKUP:
2877 case BONDING_MODE_TLB:
2878 case BONDING_MODE_ALB:
2879 default:
2880 /* As allmulticast mode is propagated only to primary slave
2881 * for these mode. When active/standby switchover, allmulticast
2882 * mode should be set to new primary slave according to bonding
2883 * device.
2884 */
2885 if (rte_eth_allmulticast_get(internals->port_id) == 1)
2886 rte_eth_allmulticast_enable(port_id);
2887 else
2888 rte_eth_allmulticast_disable(port_id);
2889 }
2890
2891 return 0;
2892 }
2893
2894 static void
bond_ethdev_delayed_lsc_propagation(void * arg)2895 bond_ethdev_delayed_lsc_propagation(void *arg)
2896 {
2897 if (arg == NULL)
2898 return;
2899
2900 rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2901 RTE_ETH_EVENT_INTR_LSC, NULL);
2902 }
2903
2904 int
bond_ethdev_lsc_event_callback(uint16_t port_id,enum rte_eth_event_type type,void * param,void * ret_param __rte_unused)2905 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2906 void *param, void *ret_param __rte_unused)
2907 {
2908 struct rte_eth_dev *bonded_eth_dev;
2909 struct bond_dev_private *internals;
2910 struct rte_eth_link link;
2911 int rc = -1;
2912 int ret;
2913
2914 uint8_t lsc_flag = 0;
2915 int valid_slave = 0;
2916 uint16_t active_pos;
2917 uint16_t i;
2918
2919 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2920 return rc;
2921
2922 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2923
2924 if (check_for_bonded_ethdev(bonded_eth_dev))
2925 return rc;
2926
2927 internals = bonded_eth_dev->data->dev_private;
2928
2929 /* If the device isn't started don't handle interrupts */
2930 if (!bonded_eth_dev->data->dev_started)
2931 return rc;
2932
2933 /* verify that port_id is a valid slave of bonded port */
2934 for (i = 0; i < internals->slave_count; i++) {
2935 if (internals->slaves[i].port_id == port_id) {
2936 valid_slave = 1;
2937 break;
2938 }
2939 }
2940
2941 if (!valid_slave)
2942 return rc;
2943
2944 /* Synchronize lsc callback parallel calls either by real link event
2945 * from the slaves PMDs or by the bonding PMD itself.
2946 */
2947 rte_spinlock_lock(&internals->lsc_lock);
2948
2949 /* Search for port in active port list */
2950 active_pos = find_slave_by_id(internals->active_slaves,
2951 internals->active_slave_count, port_id);
2952
2953 ret = rte_eth_link_get_nowait(port_id, &link);
2954 if (ret < 0)
2955 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2956
2957 if (ret == 0 && link.link_status) {
2958 if (active_pos < internals->active_slave_count)
2959 goto link_update;
2960
2961 /* check link state properties if bonded link is up*/
2962 if (bonded_eth_dev->data->dev_link.link_status == RTE_ETH_LINK_UP) {
2963 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2964 RTE_BOND_LOG(ERR, "Invalid link properties "
2965 "for slave %d in bonding mode %d",
2966 port_id, internals->mode);
2967 } else {
2968 /* inherit slave link properties */
2969 link_properties_set(bonded_eth_dev, &link);
2970 }
2971
2972 /* If no active slave ports then set this port to be
2973 * the primary port.
2974 */
2975 if (internals->active_slave_count < 1) {
2976 /* If first active slave, then change link status */
2977 bonded_eth_dev->data->dev_link.link_status =
2978 RTE_ETH_LINK_UP;
2979 internals->current_primary_port = port_id;
2980 lsc_flag = 1;
2981
2982 mac_address_slaves_update(bonded_eth_dev);
2983 bond_ethdev_promiscuous_update(bonded_eth_dev);
2984 bond_ethdev_allmulticast_update(bonded_eth_dev);
2985 }
2986
2987 activate_slave(bonded_eth_dev, port_id);
2988
2989 /* If the user has defined the primary port then default to
2990 * using it.
2991 */
2992 if (internals->user_defined_primary_port &&
2993 internals->primary_port == port_id)
2994 bond_ethdev_primary_set(internals, port_id);
2995 } else {
2996 if (active_pos == internals->active_slave_count)
2997 goto link_update;
2998
2999 /* Remove from active slave list */
3000 deactivate_slave(bonded_eth_dev, port_id);
3001
3002 if (internals->active_slave_count < 1)
3003 lsc_flag = 1;
3004
3005 /* Update primary id, take first active slave from list or if none
3006 * available set to -1 */
3007 if (port_id == internals->current_primary_port) {
3008 if (internals->active_slave_count > 0)
3009 bond_ethdev_primary_set(internals,
3010 internals->active_slaves[0]);
3011 else
3012 internals->current_primary_port = internals->primary_port;
3013 mac_address_slaves_update(bonded_eth_dev);
3014 bond_ethdev_promiscuous_update(bonded_eth_dev);
3015 bond_ethdev_allmulticast_update(bonded_eth_dev);
3016 }
3017 }
3018
3019 link_update:
3020 /**
3021 * Update bonded device link properties after any change to active
3022 * slaves
3023 */
3024 bond_ethdev_link_update(bonded_eth_dev, 0);
3025
3026 if (lsc_flag) {
3027 /* Cancel any possible outstanding interrupts if delays are enabled */
3028 if (internals->link_up_delay_ms > 0 ||
3029 internals->link_down_delay_ms > 0)
3030 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
3031 bonded_eth_dev);
3032
3033 if (bonded_eth_dev->data->dev_link.link_status) {
3034 if (internals->link_up_delay_ms > 0)
3035 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
3036 bond_ethdev_delayed_lsc_propagation,
3037 (void *)bonded_eth_dev);
3038 else
3039 rte_eth_dev_callback_process(bonded_eth_dev,
3040 RTE_ETH_EVENT_INTR_LSC,
3041 NULL);
3042
3043 } else {
3044 if (internals->link_down_delay_ms > 0)
3045 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
3046 bond_ethdev_delayed_lsc_propagation,
3047 (void *)bonded_eth_dev);
3048 else
3049 rte_eth_dev_callback_process(bonded_eth_dev,
3050 RTE_ETH_EVENT_INTR_LSC,
3051 NULL);
3052 }
3053 }
3054
3055 rte_spinlock_unlock(&internals->lsc_lock);
3056
3057 return rc;
3058 }
3059
3060 static int
bond_ethdev_rss_reta_update(struct rte_eth_dev * dev,struct rte_eth_rss_reta_entry64 * reta_conf,uint16_t reta_size)3061 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
3062 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3063 {
3064 unsigned i, j;
3065 int result = 0;
3066 int slave_reta_size;
3067 unsigned reta_count;
3068 struct bond_dev_private *internals = dev->data->dev_private;
3069
3070 if (reta_size != internals->reta_size)
3071 return -EINVAL;
3072
3073 /* Copy RETA table */
3074 reta_count = (reta_size + RTE_ETH_RETA_GROUP_SIZE - 1) /
3075 RTE_ETH_RETA_GROUP_SIZE;
3076
3077 for (i = 0; i < reta_count; i++) {
3078 internals->reta_conf[i].mask = reta_conf[i].mask;
3079 for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3080 if ((reta_conf[i].mask >> j) & 0x01)
3081 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
3082 }
3083
3084 /* Fill rest of array */
3085 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
3086 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
3087 sizeof(internals->reta_conf[0]) * reta_count);
3088
3089 /* Propagate RETA over slaves */
3090 for (i = 0; i < internals->slave_count; i++) {
3091 slave_reta_size = internals->slaves[i].reta_size;
3092 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
3093 &internals->reta_conf[0], slave_reta_size);
3094 if (result < 0)
3095 return result;
3096 }
3097
3098 return 0;
3099 }
3100
3101 static int
bond_ethdev_rss_reta_query(struct rte_eth_dev * dev,struct rte_eth_rss_reta_entry64 * reta_conf,uint16_t reta_size)3102 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3103 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3104 {
3105 int i, j;
3106 struct bond_dev_private *internals = dev->data->dev_private;
3107
3108 if (reta_size != internals->reta_size)
3109 return -EINVAL;
3110
3111 /* Copy RETA table */
3112 for (i = 0; i < reta_size / RTE_ETH_RETA_GROUP_SIZE; i++)
3113 for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3114 if ((reta_conf[i].mask >> j) & 0x01)
3115 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3116
3117 return 0;
3118 }
3119
3120 static int
bond_ethdev_rss_hash_update(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)3121 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3122 struct rte_eth_rss_conf *rss_conf)
3123 {
3124 int i, result = 0;
3125 struct bond_dev_private *internals = dev->data->dev_private;
3126 struct rte_eth_rss_conf bond_rss_conf;
3127
3128 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3129
3130 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3131
3132 if (bond_rss_conf.rss_hf != 0)
3133 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3134
3135 if (bond_rss_conf.rss_key) {
3136 if (bond_rss_conf.rss_key_len < internals->rss_key_len)
3137 return -EINVAL;
3138 else if (bond_rss_conf.rss_key_len > internals->rss_key_len)
3139 RTE_BOND_LOG(WARNING, "rss_key will be truncated");
3140
3141 memcpy(internals->rss_key, bond_rss_conf.rss_key,
3142 internals->rss_key_len);
3143 bond_rss_conf.rss_key_len = internals->rss_key_len;
3144 }
3145
3146 for (i = 0; i < internals->slave_count; i++) {
3147 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3148 &bond_rss_conf);
3149 if (result < 0)
3150 return result;
3151 }
3152
3153 return 0;
3154 }
3155
3156 static int
bond_ethdev_rss_hash_conf_get(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)3157 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3158 struct rte_eth_rss_conf *rss_conf)
3159 {
3160 struct bond_dev_private *internals = dev->data->dev_private;
3161
3162 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3163 rss_conf->rss_key_len = internals->rss_key_len;
3164 if (rss_conf->rss_key)
3165 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3166
3167 return 0;
3168 }
3169
3170 static int
bond_ethdev_mtu_set(struct rte_eth_dev * dev,uint16_t mtu)3171 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3172 {
3173 struct rte_eth_dev *slave_eth_dev;
3174 struct bond_dev_private *internals = dev->data->dev_private;
3175 int ret, i;
3176
3177 rte_spinlock_lock(&internals->lock);
3178
3179 for (i = 0; i < internals->slave_count; i++) {
3180 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3181 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3182 rte_spinlock_unlock(&internals->lock);
3183 return -ENOTSUP;
3184 }
3185 }
3186 for (i = 0; i < internals->slave_count; i++) {
3187 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3188 if (ret < 0) {
3189 rte_spinlock_unlock(&internals->lock);
3190 return ret;
3191 }
3192 }
3193
3194 rte_spinlock_unlock(&internals->lock);
3195 return 0;
3196 }
3197
3198 static int
bond_ethdev_mac_address_set(struct rte_eth_dev * dev,struct rte_ether_addr * addr)3199 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3200 struct rte_ether_addr *addr)
3201 {
3202 if (mac_address_set(dev, addr)) {
3203 RTE_BOND_LOG(ERR, "Failed to update MAC address");
3204 return -EINVAL;
3205 }
3206
3207 return 0;
3208 }
3209
3210 static int
bond_flow_ops_get(struct rte_eth_dev * dev __rte_unused,const struct rte_flow_ops ** ops)3211 bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3212 const struct rte_flow_ops **ops)
3213 {
3214 *ops = &bond_flow_ops;
3215 return 0;
3216 }
3217
3218 static int
bond_ethdev_mac_addr_add(struct rte_eth_dev * dev,struct rte_ether_addr * mac_addr,__rte_unused uint32_t index,uint32_t vmdq)3219 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3220 struct rte_ether_addr *mac_addr,
3221 __rte_unused uint32_t index, uint32_t vmdq)
3222 {
3223 struct rte_eth_dev *slave_eth_dev;
3224 struct bond_dev_private *internals = dev->data->dev_private;
3225 int ret, i;
3226
3227 rte_spinlock_lock(&internals->lock);
3228
3229 for (i = 0; i < internals->slave_count; i++) {
3230 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3231 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3232 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3233 ret = -ENOTSUP;
3234 goto end;
3235 }
3236 }
3237
3238 for (i = 0; i < internals->slave_count; i++) {
3239 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3240 mac_addr, vmdq);
3241 if (ret < 0) {
3242 /* rollback */
3243 for (i--; i >= 0; i--)
3244 rte_eth_dev_mac_addr_remove(
3245 internals->slaves[i].port_id, mac_addr);
3246 goto end;
3247 }
3248 }
3249
3250 ret = 0;
3251 end:
3252 rte_spinlock_unlock(&internals->lock);
3253 return ret;
3254 }
3255
3256 static void
bond_ethdev_mac_addr_remove(struct rte_eth_dev * dev,uint32_t index)3257 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3258 {
3259 struct rte_eth_dev *slave_eth_dev;
3260 struct bond_dev_private *internals = dev->data->dev_private;
3261 int i;
3262
3263 rte_spinlock_lock(&internals->lock);
3264
3265 for (i = 0; i < internals->slave_count; i++) {
3266 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3267 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3268 goto end;
3269 }
3270
3271 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3272
3273 for (i = 0; i < internals->slave_count; i++)
3274 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3275 mac_addr);
3276
3277 end:
3278 rte_spinlock_unlock(&internals->lock);
3279 }
3280
3281 const struct eth_dev_ops default_dev_ops = {
3282 .dev_start = bond_ethdev_start,
3283 .dev_stop = bond_ethdev_stop,
3284 .dev_close = bond_ethdev_close,
3285 .dev_configure = bond_ethdev_configure,
3286 .dev_infos_get = bond_ethdev_info,
3287 .vlan_filter_set = bond_ethdev_vlan_filter_set,
3288 .rx_queue_setup = bond_ethdev_rx_queue_setup,
3289 .tx_queue_setup = bond_ethdev_tx_queue_setup,
3290 .rx_queue_release = bond_ethdev_rx_queue_release,
3291 .tx_queue_release = bond_ethdev_tx_queue_release,
3292 .link_update = bond_ethdev_link_update,
3293 .stats_get = bond_ethdev_stats_get,
3294 .stats_reset = bond_ethdev_stats_reset,
3295 .promiscuous_enable = bond_ethdev_promiscuous_enable,
3296 .promiscuous_disable = bond_ethdev_promiscuous_disable,
3297 .allmulticast_enable = bond_ethdev_allmulticast_enable,
3298 .allmulticast_disable = bond_ethdev_allmulticast_disable,
3299 .reta_update = bond_ethdev_rss_reta_update,
3300 .reta_query = bond_ethdev_rss_reta_query,
3301 .rss_hash_update = bond_ethdev_rss_hash_update,
3302 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
3303 .mtu_set = bond_ethdev_mtu_set,
3304 .mac_addr_set = bond_ethdev_mac_address_set,
3305 .mac_addr_add = bond_ethdev_mac_addr_add,
3306 .mac_addr_remove = bond_ethdev_mac_addr_remove,
3307 .flow_ops_get = bond_flow_ops_get
3308 };
3309
3310 static int
bond_alloc(struct rte_vdev_device * dev,uint8_t mode)3311 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3312 {
3313 const char *name = rte_vdev_device_name(dev);
3314 uint8_t socket_id = dev->device.numa_node;
3315 struct bond_dev_private *internals = NULL;
3316 struct rte_eth_dev *eth_dev = NULL;
3317 uint32_t vlan_filter_bmp_size;
3318
3319 /* now do all data allocation - for eth_dev structure, dummy pci driver
3320 * and internal (private) data
3321 */
3322
3323 /* reserve an ethdev entry */
3324 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3325 if (eth_dev == NULL) {
3326 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3327 goto err;
3328 }
3329
3330 internals = eth_dev->data->dev_private;
3331 eth_dev->data->nb_rx_queues = (uint16_t)1;
3332 eth_dev->data->nb_tx_queues = (uint16_t)1;
3333
3334 /* Allocate memory for storing MAC addresses */
3335 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3336 BOND_MAX_MAC_ADDRS, 0, socket_id);
3337 if (eth_dev->data->mac_addrs == NULL) {
3338 RTE_BOND_LOG(ERR,
3339 "Failed to allocate %u bytes needed to store MAC addresses",
3340 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3341 goto err;
3342 }
3343
3344 eth_dev->dev_ops = &default_dev_ops;
3345 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3346 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3347
3348 rte_spinlock_init(&internals->lock);
3349 rte_spinlock_init(&internals->lsc_lock);
3350
3351 internals->port_id = eth_dev->data->port_id;
3352 internals->mode = BONDING_MODE_INVALID;
3353 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3354 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3355 internals->burst_xmit_hash = burst_xmit_l2_hash;
3356 internals->user_defined_mac = 0;
3357
3358 internals->link_status_polling_enabled = 0;
3359
3360 internals->link_status_polling_interval_ms =
3361 DEFAULT_POLLING_INTERVAL_10_MS;
3362 internals->link_down_delay_ms = 0;
3363 internals->link_up_delay_ms = 0;
3364
3365 internals->slave_count = 0;
3366 internals->active_slave_count = 0;
3367 internals->rx_offload_capa = 0;
3368 internals->tx_offload_capa = 0;
3369 internals->rx_queue_offload_capa = 0;
3370 internals->tx_queue_offload_capa = 0;
3371 internals->candidate_max_rx_pktlen = 0;
3372 internals->max_rx_pktlen = 0;
3373
3374 /* Initially allow to choose any offload type */
3375 internals->flow_type_rss_offloads = RTE_ETH_RSS_PROTO_MASK;
3376
3377 memset(&internals->default_rxconf, 0,
3378 sizeof(internals->default_rxconf));
3379 memset(&internals->default_txconf, 0,
3380 sizeof(internals->default_txconf));
3381
3382 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3383 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3384
3385 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3386 memset(internals->slaves, 0, sizeof(internals->slaves));
3387
3388 TAILQ_INIT(&internals->flow_list);
3389 internals->flow_isolated_valid = 0;
3390
3391 /* Set mode 4 default configuration */
3392 bond_mode_8023ad_setup(eth_dev, NULL);
3393 if (bond_ethdev_mode_set(eth_dev, mode)) {
3394 RTE_BOND_LOG(ERR, "Failed to set bonded device %u mode to %u",
3395 eth_dev->data->port_id, mode);
3396 goto err;
3397 }
3398
3399 vlan_filter_bmp_size =
3400 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3401 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3402 RTE_CACHE_LINE_SIZE);
3403 if (internals->vlan_filter_bmpmem == NULL) {
3404 RTE_BOND_LOG(ERR,
3405 "Failed to allocate vlan bitmap for bonded device %u",
3406 eth_dev->data->port_id);
3407 goto err;
3408 }
3409
3410 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3411 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3412 if (internals->vlan_filter_bmp == NULL) {
3413 RTE_BOND_LOG(ERR,
3414 "Failed to init vlan bitmap for bonded device %u",
3415 eth_dev->data->port_id);
3416 rte_free(internals->vlan_filter_bmpmem);
3417 goto err;
3418 }
3419
3420 return eth_dev->data->port_id;
3421
3422 err:
3423 rte_free(internals);
3424 if (eth_dev != NULL)
3425 eth_dev->data->dev_private = NULL;
3426 rte_eth_dev_release_port(eth_dev);
3427 return -1;
3428 }
3429
3430 static int
bond_probe(struct rte_vdev_device * dev)3431 bond_probe(struct rte_vdev_device *dev)
3432 {
3433 const char *name;
3434 struct bond_dev_private *internals;
3435 struct rte_kvargs *kvlist;
3436 uint8_t bonding_mode;
3437 int arg_count, port_id;
3438 int socket_id;
3439 uint8_t agg_mode;
3440 struct rte_eth_dev *eth_dev;
3441
3442 if (!dev)
3443 return -EINVAL;
3444
3445 name = rte_vdev_device_name(dev);
3446 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3447
3448 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3449 eth_dev = rte_eth_dev_attach_secondary(name);
3450 if (!eth_dev) {
3451 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3452 return -1;
3453 }
3454 /* TODO: request info from primary to set up Rx and Tx */
3455 eth_dev->dev_ops = &default_dev_ops;
3456 eth_dev->device = &dev->device;
3457 rte_eth_dev_probing_finish(eth_dev);
3458 return 0;
3459 }
3460
3461 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3462 pmd_bond_init_valid_arguments);
3463 if (kvlist == NULL) {
3464 RTE_BOND_LOG(ERR, "Invalid args in %s", rte_vdev_device_args(dev));
3465 return -1;
3466 }
3467
3468 /* Parse link bonding mode */
3469 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3470 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3471 &bond_ethdev_parse_slave_mode_kvarg,
3472 &bonding_mode) != 0) {
3473 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3474 name);
3475 goto parse_error;
3476 }
3477 } else {
3478 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3479 "device %s", name);
3480 goto parse_error;
3481 }
3482
3483 /* Parse socket id to create bonding device on */
3484 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3485 if (arg_count == 1) {
3486 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3487 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3488 != 0) {
3489 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3490 "bonded device %s", name);
3491 goto parse_error;
3492 }
3493 } else if (arg_count > 1) {
3494 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3495 "bonded device %s", name);
3496 goto parse_error;
3497 } else {
3498 socket_id = rte_socket_id();
3499 }
3500
3501 dev->device.numa_node = socket_id;
3502
3503 /* Create link bonding eth device */
3504 port_id = bond_alloc(dev, bonding_mode);
3505 if (port_id < 0) {
3506 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3507 "socket %u.", name, bonding_mode, socket_id);
3508 goto parse_error;
3509 }
3510 internals = rte_eth_devices[port_id].data->dev_private;
3511 internals->kvlist = kvlist;
3512
3513 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3514 if (rte_kvargs_process(kvlist,
3515 PMD_BOND_AGG_MODE_KVARG,
3516 &bond_ethdev_parse_slave_agg_mode_kvarg,
3517 &agg_mode) != 0) {
3518 RTE_BOND_LOG(ERR,
3519 "Failed to parse agg selection mode for bonded device %s",
3520 name);
3521 goto parse_error;
3522 }
3523
3524 if (internals->mode == BONDING_MODE_8023AD)
3525 internals->mode4.agg_selection = agg_mode;
3526 } else {
3527 internals->mode4.agg_selection = AGG_STABLE;
3528 }
3529
3530 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3531 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3532 "socket %u.", name, port_id, bonding_mode, socket_id);
3533 return 0;
3534
3535 parse_error:
3536 rte_kvargs_free(kvlist);
3537
3538 return -1;
3539 }
3540
3541 static int
bond_remove(struct rte_vdev_device * dev)3542 bond_remove(struct rte_vdev_device *dev)
3543 {
3544 struct rte_eth_dev *eth_dev;
3545 struct bond_dev_private *internals;
3546 const char *name;
3547 int ret = 0;
3548
3549 if (!dev)
3550 return -EINVAL;
3551
3552 name = rte_vdev_device_name(dev);
3553 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3554
3555 /* find an ethdev entry */
3556 eth_dev = rte_eth_dev_allocated(name);
3557 if (eth_dev == NULL)
3558 return 0; /* port already released */
3559
3560 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3561 return rte_eth_dev_release_port(eth_dev);
3562
3563 RTE_ASSERT(eth_dev->device == &dev->device);
3564
3565 internals = eth_dev->data->dev_private;
3566 if (internals->slave_count != 0)
3567 return -EBUSY;
3568
3569 if (eth_dev->data->dev_started == 1) {
3570 ret = bond_ethdev_stop(eth_dev);
3571 bond_ethdev_close(eth_dev);
3572 }
3573 rte_eth_dev_release_port(eth_dev);
3574
3575 return ret;
3576 }
3577
3578 /* this part will resolve the slave portids after all the other pdev and vdev
3579 * have been allocated */
3580 static int
bond_ethdev_configure(struct rte_eth_dev * dev)3581 bond_ethdev_configure(struct rte_eth_dev *dev)
3582 {
3583 const char *name = dev->device->name;
3584 struct bond_dev_private *internals = dev->data->dev_private;
3585 struct rte_kvargs *kvlist = internals->kvlist;
3586 int arg_count;
3587 uint16_t port_id = dev - rte_eth_devices;
3588 uint8_t agg_mode;
3589
3590 static const uint8_t default_rss_key[40] = {
3591 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3592 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3593 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3594 0xBE, 0xAC, 0x01, 0xFA
3595 };
3596
3597 unsigned i, j;
3598
3599 /*
3600 * If RSS is enabled, fill table with default values and
3601 * set key to the value specified in port RSS configuration.
3602 * Fall back to default RSS key if the key is not specified
3603 */
3604 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
3605 struct rte_eth_rss_conf *rss_conf =
3606 &dev->data->dev_conf.rx_adv_conf.rss_conf;
3607
3608 if (internals->rss_key_len == 0) {
3609 internals->rss_key_len = sizeof(default_rss_key);
3610 }
3611
3612 if (rss_conf->rss_key != NULL) {
3613 if (internals->rss_key_len > rss_conf->rss_key_len) {
3614 RTE_BOND_LOG(ERR, "Invalid rss key length(%u)",
3615 rss_conf->rss_key_len);
3616 return -EINVAL;
3617 }
3618
3619 memcpy(internals->rss_key, rss_conf->rss_key,
3620 internals->rss_key_len);
3621 } else {
3622 if (internals->rss_key_len > sizeof(default_rss_key)) {
3623 /*
3624 * If the rss_key includes standard_rss_key and
3625 * extended_hash_key, the rss key length will be
3626 * larger than default rss key length, so it should
3627 * re-calculate the hash key.
3628 */
3629 for (i = 0; i < internals->rss_key_len; i++)
3630 internals->rss_key[i] = (uint8_t)rte_rand();
3631 } else {
3632 memcpy(internals->rss_key, default_rss_key,
3633 internals->rss_key_len);
3634 }
3635 }
3636
3637 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3638 internals->reta_conf[i].mask = ~0LL;
3639 for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3640 internals->reta_conf[i].reta[j] =
3641 (i * RTE_ETH_RETA_GROUP_SIZE + j) %
3642 dev->data->nb_rx_queues;
3643 }
3644 }
3645
3646 /* set the max_rx_pktlen */
3647 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3648
3649 /*
3650 * if no kvlist, it means that this bonded device has been created
3651 * through the bonding api.
3652 */
3653 if (!kvlist)
3654 return 0;
3655
3656 /* Parse MAC address for bonded device */
3657 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3658 if (arg_count == 1) {
3659 struct rte_ether_addr bond_mac;
3660
3661 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3662 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3663 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3664 name);
3665 return -1;
3666 }
3667
3668 /* Set MAC address */
3669 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3670 RTE_BOND_LOG(ERR,
3671 "Failed to set mac address on bonded device %s",
3672 name);
3673 return -1;
3674 }
3675 } else if (arg_count > 1) {
3676 RTE_BOND_LOG(ERR,
3677 "MAC address can be specified only once for bonded device %s",
3678 name);
3679 return -1;
3680 }
3681
3682 /* Parse/set balance mode transmit policy */
3683 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3684 if (arg_count == 1) {
3685 uint8_t xmit_policy;
3686
3687 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3688 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3689 0) {
3690 RTE_BOND_LOG(INFO,
3691 "Invalid xmit policy specified for bonded device %s",
3692 name);
3693 return -1;
3694 }
3695
3696 /* Set balance mode transmit policy*/
3697 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3698 RTE_BOND_LOG(ERR,
3699 "Failed to set balance xmit policy on bonded device %s",
3700 name);
3701 return -1;
3702 }
3703 } else if (arg_count > 1) {
3704 RTE_BOND_LOG(ERR,
3705 "Transmit policy can be specified only once for bonded device %s",
3706 name);
3707 return -1;
3708 }
3709
3710 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3711 if (rte_kvargs_process(kvlist,
3712 PMD_BOND_AGG_MODE_KVARG,
3713 &bond_ethdev_parse_slave_agg_mode_kvarg,
3714 &agg_mode) != 0) {
3715 RTE_BOND_LOG(ERR,
3716 "Failed to parse agg selection mode for bonded device %s",
3717 name);
3718 }
3719 if (internals->mode == BONDING_MODE_8023AD) {
3720 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3721 agg_mode);
3722 if (ret < 0) {
3723 RTE_BOND_LOG(ERR,
3724 "Invalid args for agg selection set for bonded device %s",
3725 name);
3726 return -1;
3727 }
3728 }
3729 }
3730
3731 /* Parse/add slave ports to bonded device */
3732 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3733 struct bond_ethdev_slave_ports slave_ports;
3734 unsigned i;
3735
3736 memset(&slave_ports, 0, sizeof(slave_ports));
3737
3738 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3739 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3740 RTE_BOND_LOG(ERR,
3741 "Failed to parse slave ports for bonded device %s",
3742 name);
3743 return -1;
3744 }
3745
3746 for (i = 0; i < slave_ports.slave_count; i++) {
3747 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3748 RTE_BOND_LOG(ERR,
3749 "Failed to add port %d as slave to bonded device %s",
3750 slave_ports.slaves[i], name);
3751 }
3752 }
3753
3754 } else {
3755 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3756 return -1;
3757 }
3758
3759 /* Parse/set primary slave port id*/
3760 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3761 if (arg_count == 1) {
3762 uint16_t primary_slave_port_id;
3763
3764 if (rte_kvargs_process(kvlist,
3765 PMD_BOND_PRIMARY_SLAVE_KVARG,
3766 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3767 &primary_slave_port_id) < 0) {
3768 RTE_BOND_LOG(INFO,
3769 "Invalid primary slave port id specified for bonded device %s",
3770 name);
3771 return -1;
3772 }
3773
3774 /* Set balance mode transmit policy*/
3775 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3776 != 0) {
3777 RTE_BOND_LOG(ERR,
3778 "Failed to set primary slave port %d on bonded device %s",
3779 primary_slave_port_id, name);
3780 return -1;
3781 }
3782 } else if (arg_count > 1) {
3783 RTE_BOND_LOG(INFO,
3784 "Primary slave can be specified only once for bonded device %s",
3785 name);
3786 return -1;
3787 }
3788
3789 /* Parse link status monitor polling interval */
3790 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3791 if (arg_count == 1) {
3792 uint32_t lsc_poll_interval_ms;
3793
3794 if (rte_kvargs_process(kvlist,
3795 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3796 &bond_ethdev_parse_time_ms_kvarg,
3797 &lsc_poll_interval_ms) < 0) {
3798 RTE_BOND_LOG(INFO,
3799 "Invalid lsc polling interval value specified for bonded"
3800 " device %s", name);
3801 return -1;
3802 }
3803
3804 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3805 != 0) {
3806 RTE_BOND_LOG(ERR,
3807 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3808 lsc_poll_interval_ms, name);
3809 return -1;
3810 }
3811 } else if (arg_count > 1) {
3812 RTE_BOND_LOG(INFO,
3813 "LSC polling interval can be specified only once for bonded"
3814 " device %s", name);
3815 return -1;
3816 }
3817
3818 /* Parse link up interrupt propagation delay */
3819 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3820 if (arg_count == 1) {
3821 uint32_t link_up_delay_ms;
3822
3823 if (rte_kvargs_process(kvlist,
3824 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3825 &bond_ethdev_parse_time_ms_kvarg,
3826 &link_up_delay_ms) < 0) {
3827 RTE_BOND_LOG(INFO,
3828 "Invalid link up propagation delay value specified for"
3829 " bonded device %s", name);
3830 return -1;
3831 }
3832
3833 /* Set balance mode transmit policy*/
3834 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3835 != 0) {
3836 RTE_BOND_LOG(ERR,
3837 "Failed to set link up propagation delay (%u ms) on bonded"
3838 " device %s", link_up_delay_ms, name);
3839 return -1;
3840 }
3841 } else if (arg_count > 1) {
3842 RTE_BOND_LOG(INFO,
3843 "Link up propagation delay can be specified only once for"
3844 " bonded device %s", name);
3845 return -1;
3846 }
3847
3848 /* Parse link down interrupt propagation delay */
3849 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3850 if (arg_count == 1) {
3851 uint32_t link_down_delay_ms;
3852
3853 if (rte_kvargs_process(kvlist,
3854 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3855 &bond_ethdev_parse_time_ms_kvarg,
3856 &link_down_delay_ms) < 0) {
3857 RTE_BOND_LOG(INFO,
3858 "Invalid link down propagation delay value specified for"
3859 " bonded device %s", name);
3860 return -1;
3861 }
3862
3863 /* Set balance mode transmit policy*/
3864 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3865 != 0) {
3866 RTE_BOND_LOG(ERR,
3867 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3868 link_down_delay_ms, name);
3869 return -1;
3870 }
3871 } else if (arg_count > 1) {
3872 RTE_BOND_LOG(INFO,
3873 "Link down propagation delay can be specified only once for bonded device %s",
3874 name);
3875 return -1;
3876 }
3877
3878 /* configure slaves so we can pass mtu setting */
3879 for (i = 0; i < internals->slave_count; i++) {
3880 struct rte_eth_dev *slave_ethdev =
3881 &(rte_eth_devices[internals->slaves[i].port_id]);
3882 if (slave_configure(dev, slave_ethdev) != 0) {
3883 RTE_BOND_LOG(ERR,
3884 "bonded port (%d) failed to configure slave device (%d)",
3885 dev->data->port_id,
3886 internals->slaves[i].port_id);
3887 return -1;
3888 }
3889 }
3890 return 0;
3891 }
3892
3893 struct rte_vdev_driver pmd_bond_drv = {
3894 .probe = bond_probe,
3895 .remove = bond_remove,
3896 };
3897
3898 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3899 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3900
3901 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3902 "slave=<ifc> "
3903 "primary=<ifc> "
3904 "mode=[0-6] "
3905 "xmit_policy=[l2 | l23 | l34] "
3906 "agg_mode=[count | stable | bandwidth] "
3907 "socket_id=<int> "
3908 "mac=<mac addr> "
3909 "lsc_poll_period_ms=<int> "
3910 "up_delay=<int> "
3911 "down_delay=<int>");
3912
3913 /* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for
3914 * this library, see meson.build.
3915 */
3916 RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);
3917