1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
3 */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
26
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35
36 static inline size_t
get_vlan_offset(struct rte_ether_hdr * eth_hdr,uint16_t * proto)37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39 size_t vlan_offset = 0;
40
41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 struct rte_vlan_hdr *vlan_hdr =
44 (struct rte_vlan_hdr *)(eth_hdr + 1);
45
46 vlan_offset = sizeof(struct rte_vlan_hdr);
47 *proto = vlan_hdr->eth_proto;
48
49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 vlan_hdr = vlan_hdr + 1;
51 *proto = vlan_hdr->eth_proto;
52 vlan_offset += sizeof(struct rte_vlan_hdr);
53 }
54 }
55 return vlan_offset;
56 }
57
58 static uint16_t
bond_ethdev_rx_burst(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61 struct bond_dev_private *internals;
62
63 uint16_t num_rx_total = 0;
64 uint16_t slave_count;
65 uint16_t active_slave;
66 int i;
67
68 /* Cast to structure, containing bonded device's port id and queue id */
69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 internals = bd_rx_q->dev_private;
71 slave_count = internals->active_slave_count;
72 active_slave = bd_rx_q->active_slave;
73
74 for (i = 0; i < slave_count && nb_pkts; i++) {
75 uint16_t num_rx_slave;
76
77 /* Offset of pointer to *bufs increases as packets are received
78 * from other slaves */
79 num_rx_slave =
80 rte_eth_rx_burst(internals->active_slaves[active_slave],
81 bd_rx_q->queue_id,
82 bufs + num_rx_total, nb_pkts);
83 num_rx_total += num_rx_slave;
84 nb_pkts -= num_rx_slave;
85 if (++active_slave == slave_count)
86 active_slave = 0;
87 }
88
89 if (++bd_rx_q->active_slave >= slave_count)
90 bd_rx_q->active_slave = 0;
91 return num_rx_total;
92 }
93
94 static uint16_t
bond_ethdev_rx_burst_active_backup(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 uint16_t nb_pkts)
97 {
98 struct bond_dev_private *internals;
99
100 /* Cast to structure, containing bonded device's port id and queue id */
101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102
103 internals = bd_rx_q->dev_private;
104
105 return rte_eth_rx_burst(internals->current_primary_port,
106 bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108
109 static inline uint8_t
is_lacp_packets(uint16_t ethertype,uint8_t subtype,struct rte_mbuf * mbuf)110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112 const uint16_t ether_type_slow_be =
113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114
115 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 (ethertype == ether_type_slow_be &&
117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119
120 /*****************************************************************************
121 * Flow director's setup for mode 4 optimization
122 */
123
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 .dst.addr_bytes = { 0 },
126 .src.addr_bytes = { 0 },
127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 .dst.addr_bytes = { 0 },
132 .src.addr_bytes = { 0 },
133 .type = 0xFFFF,
134 };
135
136 static struct rte_flow_item flow_item_8023ad[] = {
137 {
138 .type = RTE_FLOW_ITEM_TYPE_ETH,
139 .spec = &flow_item_eth_type_8023ad,
140 .last = NULL,
141 .mask = &flow_item_eth_mask_type_8023ad,
142 },
143 {
144 .type = RTE_FLOW_ITEM_TYPE_END,
145 .spec = NULL,
146 .last = NULL,
147 .mask = NULL,
148 }
149 };
150
151 const struct rte_flow_attr flow_attr_8023ad = {
152 .group = 0,
153 .priority = 0,
154 .ingress = 1,
155 .egress = 0,
156 .reserved = 0,
157 };
158
159 int
bond_ethdev_8023ad_flow_verify(struct rte_eth_dev * bond_dev,uint16_t slave_port)160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 uint16_t slave_port) {
162 struct rte_eth_dev_info slave_info;
163 struct rte_flow_error error;
164 struct bond_dev_private *internals = bond_dev->data->dev_private;
165
166 const struct rte_flow_action_queue lacp_queue_conf = {
167 .index = 0,
168 };
169
170 const struct rte_flow_action actions[] = {
171 {
172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 .conf = &lacp_queue_conf
174 },
175 {
176 .type = RTE_FLOW_ACTION_TYPE_END,
177 }
178 };
179
180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 flow_item_8023ad, actions, &error);
182 if (ret < 0) {
183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 __func__, error.message, slave_port,
185 internals->mode4.dedicated_queues.rx_qid);
186 return -1;
187 }
188
189 ret = rte_eth_dev_info_get(slave_port, &slave_info);
190 if (ret != 0) {
191 RTE_BOND_LOG(ERR,
192 "%s: Error during getting device (port %u) info: %s\n",
193 __func__, slave_port, strerror(-ret));
194
195 return ret;
196 }
197
198 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
200 RTE_BOND_LOG(ERR,
201 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 __func__, slave_port);
203 return -1;
204 }
205
206 return 0;
207 }
208
209 int
bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id)210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 struct bond_dev_private *internals = bond_dev->data->dev_private;
213 struct rte_eth_dev_info bond_info;
214 uint16_t idx;
215 int ret;
216
217 /* Verify if all slaves in bonding supports flow director and */
218 if (internals->slave_count > 0) {
219 ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
220 if (ret != 0) {
221 RTE_BOND_LOG(ERR,
222 "%s: Error during getting device (port %u) info: %s\n",
223 __func__, bond_dev->data->port_id,
224 strerror(-ret));
225
226 return ret;
227 }
228
229 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
231
232 for (idx = 0; idx < internals->slave_count; idx++) {
233 if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 internals->slaves[idx].port_id) != 0)
235 return -1;
236 }
237 }
238
239 return 0;
240 }
241
242 int
bond_ethdev_8023ad_flow_set(struct rte_eth_dev * bond_dev,uint16_t slave_port)243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
244
245 struct rte_flow_error error;
246 struct bond_dev_private *internals = bond_dev->data->dev_private;
247 struct rte_flow_action_queue lacp_queue_conf = {
248 .index = internals->mode4.dedicated_queues.rx_qid,
249 };
250
251 const struct rte_flow_action actions[] = {
252 {
253 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 .conf = &lacp_queue_conf
255 },
256 {
257 .type = RTE_FLOW_ACTION_TYPE_END,
258 }
259 };
260
261 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 "(slave_port=%d queue_id=%d)",
266 error.message, slave_port,
267 internals->mode4.dedicated_queues.rx_qid);
268 return -1;
269 }
270
271 return 0;
272 }
273
274 static inline uint16_t
rx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts,bool dedicated_rxq)275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
276 bool dedicated_rxq)
277 {
278 /* Cast to structure, containing bonded device's port id and queue id */
279 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 struct bond_dev_private *internals = bd_rx_q->dev_private;
281 struct rte_eth_dev *bonded_eth_dev =
282 &rte_eth_devices[internals->port_id];
283 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 struct rte_ether_hdr *hdr;
285
286 const uint16_t ether_type_slow_be =
287 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 uint16_t num_rx_total = 0; /* Total number of received packets */
289 uint16_t slaves[RTE_MAX_ETHPORTS];
290 uint16_t slave_count, idx;
291
292 uint8_t collecting; /* current slave collecting status */
293 const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
295 uint8_t subtype;
296 uint16_t i;
297 uint16_t j;
298 uint16_t k;
299
300 /* Copy slave list to protect against slave up/down changes during tx
301 * bursting */
302 slave_count = internals->active_slave_count;
303 memcpy(slaves, internals->active_slaves,
304 sizeof(internals->active_slaves[0]) * slave_count);
305
306 idx = bd_rx_q->active_slave;
307 if (idx >= slave_count) {
308 bd_rx_q->active_slave = 0;
309 idx = 0;
310 }
311 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
312 j = num_rx_total;
313 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
314 COLLECTING);
315
316 /* Read packets from this slave */
317 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 &bufs[num_rx_total], nb_pkts - num_rx_total);
319
320 for (k = j; k < 2 && k < num_rx_total; k++)
321 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
322
323 /* Handle slow protocol packets. */
324 while (j < num_rx_total) {
325 if (j + 3 < num_rx_total)
326 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
327
328 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
330
331 /* Remove packet from array if:
332 * - it is slow packet but no dedicated rxq is present,
333 * - slave is not in collecting state,
334 * - bonding interface is not in promiscuous mode:
335 * - packet is unicast and address does not match,
336 * - packet is multicast and bonding interface
337 * is not in allmulti,
338 */
339 if (unlikely(
340 (!dedicated_rxq &&
341 is_lacp_packets(hdr->ether_type, subtype,
342 bufs[j])) ||
343 !collecting ||
344 (!promisc &&
345 ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
346 !rte_is_same_ether_addr(bond_mac,
347 &hdr->d_addr)) ||
348 (!allmulti &&
349 rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
350
351 if (hdr->ether_type == ether_type_slow_be) {
352 bond_mode_8023ad_handle_slow_pkt(
353 internals, slaves[idx], bufs[j]);
354 } else
355 rte_pktmbuf_free(bufs[j]);
356
357 /* Packet is managed by mode 4 or dropped, shift the array */
358 num_rx_total--;
359 if (j < num_rx_total) {
360 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
361 (num_rx_total - j));
362 }
363 } else
364 j++;
365 }
366 if (unlikely(++idx == slave_count))
367 idx = 0;
368 }
369
370 if (++bd_rx_q->active_slave >= slave_count)
371 bd_rx_q->active_slave = 0;
372
373 return num_rx_total;
374 }
375
376 static uint16_t
bond_ethdev_rx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
378 uint16_t nb_pkts)
379 {
380 return rx_burst_8023ad(queue, bufs, nb_pkts, false);
381 }
382
383 static uint16_t
bond_ethdev_rx_burst_8023ad_fast_queue(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
385 uint16_t nb_pkts)
386 {
387 return rx_burst_8023ad(queue, bufs, nb_pkts, true);
388 }
389
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
393
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
395
396 static void
arp_op_name(uint16_t arp_op,char * buf,size_t buf_len)397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
398 {
399 switch (arp_op) {
400 case RTE_ARP_OP_REQUEST:
401 strlcpy(buf, "ARP Request", buf_len);
402 return;
403 case RTE_ARP_OP_REPLY:
404 strlcpy(buf, "ARP Reply", buf_len);
405 return;
406 case RTE_ARP_OP_REVREQUEST:
407 strlcpy(buf, "Reverse ARP Request", buf_len);
408 return;
409 case RTE_ARP_OP_REVREPLY:
410 strlcpy(buf, "Reverse ARP Reply", buf_len);
411 return;
412 case RTE_ARP_OP_INVREQUEST:
413 strlcpy(buf, "Peer Identify Request", buf_len);
414 return;
415 case RTE_ARP_OP_INVREPLY:
416 strlcpy(buf, "Peer Identify Reply", buf_len);
417 return;
418 default:
419 break;
420 }
421 strlcpy(buf, "Unknown", buf_len);
422 return;
423 }
424 #endif
425 #define MaxIPv4String 16
426 static void
ipv4_addr_to_dot(uint32_t be_ipv4_addr,char * buf,uint8_t buf_size)427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
428 {
429 uint32_t ipv4_addr;
430
431 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
434 ipv4_addr & 0xFF);
435 }
436
437 #define MAX_CLIENTS_NUMBER 128
438 uint8_t active_clients;
439 struct client_stats_t {
440 uint16_t port;
441 uint32_t ipv4_addr;
442 uint32_t ipv4_rx_packets;
443 uint32_t ipv4_tx_packets;
444 };
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
446
447 static void
update_client_stats(uint32_t addr,uint16_t port,uint32_t * TXorRXindicator)448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
449 {
450 int i = 0;
451
452 for (; i < MAX_CLIENTS_NUMBER; i++) {
453 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
454 /* Just update RX packets number for this client */
455 if (TXorRXindicator == &burstnumberRX)
456 client_stats[i].ipv4_rx_packets++;
457 else
458 client_stats[i].ipv4_tx_packets++;
459 return;
460 }
461 }
462 /* We have a new client. Insert him to the table, and increment stats */
463 if (TXorRXindicator == &burstnumberRX)
464 client_stats[active_clients].ipv4_rx_packets++;
465 else
466 client_stats[active_clients].ipv4_tx_packets++;
467 client_stats[active_clients].ipv4_addr = addr;
468 client_stats[active_clients].port = port;
469 active_clients++;
470
471 }
472
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 rte_log(RTE_LOG_DEBUG, bond_logtype, \
476 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
477 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
478 info, \
479 port, \
480 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
481 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
482 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
483 src_ip, \
484 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
485 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
486 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
487 dst_ip, \
488 arp_op, ++burstnumber)
489 #endif
490
491 static void
mode6_debug(const char __rte_unused * info,struct rte_ether_hdr * eth_h,uint16_t port,uint32_t __rte_unused * burstnumber)492 mode6_debug(const char __rte_unused *info,
493 struct rte_ether_hdr *eth_h, uint16_t port,
494 uint32_t __rte_unused *burstnumber)
495 {
496 struct rte_ipv4_hdr *ipv4_h;
497 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
498 struct rte_arp_hdr *arp_h;
499 char dst_ip[16];
500 char ArpOp[24];
501 char buf[16];
502 #endif
503 char src_ip[16];
504
505 uint16_t ether_type = eth_h->ether_type;
506 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
507
508 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
509 strlcpy(buf, info, 16);
510 #endif
511
512 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
513 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
514 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
515 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
516 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
517 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
518 #endif
519 update_client_stats(ipv4_h->src_addr, port, burstnumber);
520 }
521 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
522 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
523 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
524 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
525 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
526 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
527 ArpOp, sizeof(ArpOp));
528 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
529 }
530 #endif
531 }
532 #endif
533
534 static uint16_t
bond_ethdev_rx_burst_alb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)535 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
536 {
537 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
538 struct bond_dev_private *internals = bd_rx_q->dev_private;
539 struct rte_ether_hdr *eth_h;
540 uint16_t ether_type, offset;
541 uint16_t nb_recv_pkts;
542 int i;
543
544 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
545
546 for (i = 0; i < nb_recv_pkts; i++) {
547 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
548 ether_type = eth_h->ether_type;
549 offset = get_vlan_offset(eth_h, ðer_type);
550
551 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
552 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
553 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
554 #endif
555 bond_mode_alb_arp_recv(eth_h, offset, internals);
556 }
557 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
558 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
559 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
560 #endif
561 }
562
563 return nb_recv_pkts;
564 }
565
566 static uint16_t
bond_ethdev_tx_burst_round_robin(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)567 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
568 uint16_t nb_pkts)
569 {
570 struct bond_dev_private *internals;
571 struct bond_tx_queue *bd_tx_q;
572
573 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
574 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
575
576 uint16_t num_of_slaves;
577 uint16_t slaves[RTE_MAX_ETHPORTS];
578
579 uint16_t num_tx_total = 0, num_tx_slave;
580
581 static int slave_idx = 0;
582 int i, cslave_idx = 0, tx_fail_total = 0;
583
584 bd_tx_q = (struct bond_tx_queue *)queue;
585 internals = bd_tx_q->dev_private;
586
587 /* Copy slave list to protect against slave up/down changes during tx
588 * bursting */
589 num_of_slaves = internals->active_slave_count;
590 memcpy(slaves, internals->active_slaves,
591 sizeof(internals->active_slaves[0]) * num_of_slaves);
592
593 if (num_of_slaves < 1)
594 return num_tx_total;
595
596 /* Populate slaves mbuf with which packets are to be sent on it */
597 for (i = 0; i < nb_pkts; i++) {
598 cslave_idx = (slave_idx + i) % num_of_slaves;
599 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
600 }
601
602 /* increment current slave index so the next call to tx burst starts on the
603 * next slave */
604 slave_idx = ++cslave_idx;
605
606 /* Send packet burst on each slave device */
607 for (i = 0; i < num_of_slaves; i++) {
608 if (slave_nb_pkts[i] > 0) {
609 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
610 slave_bufs[i], slave_nb_pkts[i]);
611
612 /* if tx burst fails move packets to end of bufs */
613 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
614 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
615
616 tx_fail_total += tx_fail_slave;
617
618 memcpy(&bufs[nb_pkts - tx_fail_total],
619 &slave_bufs[i][num_tx_slave],
620 tx_fail_slave * sizeof(bufs[0]));
621 }
622 num_tx_total += num_tx_slave;
623 }
624 }
625
626 return num_tx_total;
627 }
628
629 static uint16_t
bond_ethdev_tx_burst_active_backup(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)630 bond_ethdev_tx_burst_active_backup(void *queue,
631 struct rte_mbuf **bufs, uint16_t nb_pkts)
632 {
633 struct bond_dev_private *internals;
634 struct bond_tx_queue *bd_tx_q;
635
636 bd_tx_q = (struct bond_tx_queue *)queue;
637 internals = bd_tx_q->dev_private;
638
639 if (internals->active_slave_count < 1)
640 return 0;
641
642 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
643 bufs, nb_pkts);
644 }
645
646 static inline uint16_t
ether_hash(struct rte_ether_hdr * eth_hdr)647 ether_hash(struct rte_ether_hdr *eth_hdr)
648 {
649 unaligned_uint16_t *word_src_addr =
650 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
651 unaligned_uint16_t *word_dst_addr =
652 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
653
654 return (word_src_addr[0] ^ word_dst_addr[0]) ^
655 (word_src_addr[1] ^ word_dst_addr[1]) ^
656 (word_src_addr[2] ^ word_dst_addr[2]);
657 }
658
659 static inline uint32_t
ipv4_hash(struct rte_ipv4_hdr * ipv4_hdr)660 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
661 {
662 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
663 }
664
665 static inline uint32_t
ipv6_hash(struct rte_ipv6_hdr * ipv6_hdr)666 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
667 {
668 unaligned_uint32_t *word_src_addr =
669 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
670 unaligned_uint32_t *word_dst_addr =
671 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
672
673 return (word_src_addr[0] ^ word_dst_addr[0]) ^
674 (word_src_addr[1] ^ word_dst_addr[1]) ^
675 (word_src_addr[2] ^ word_dst_addr[2]) ^
676 (word_src_addr[3] ^ word_dst_addr[3]);
677 }
678
679
680 void
burst_xmit_l2_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)681 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
682 uint16_t slave_count, uint16_t *slaves)
683 {
684 struct rte_ether_hdr *eth_hdr;
685 uint32_t hash;
686 int i;
687
688 for (i = 0; i < nb_pkts; i++) {
689 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
690
691 hash = ether_hash(eth_hdr);
692
693 slaves[i] = (hash ^= hash >> 8) % slave_count;
694 }
695 }
696
697 void
burst_xmit_l23_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)698 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
699 uint16_t slave_count, uint16_t *slaves)
700 {
701 uint16_t i;
702 struct rte_ether_hdr *eth_hdr;
703 uint16_t proto;
704 size_t vlan_offset;
705 uint32_t hash, l3hash;
706
707 for (i = 0; i < nb_pkts; i++) {
708 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
709 l3hash = 0;
710
711 proto = eth_hdr->ether_type;
712 hash = ether_hash(eth_hdr);
713
714 vlan_offset = get_vlan_offset(eth_hdr, &proto);
715
716 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
717 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
718 ((char *)(eth_hdr + 1) + vlan_offset);
719 l3hash = ipv4_hash(ipv4_hdr);
720
721 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
722 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
723 ((char *)(eth_hdr + 1) + vlan_offset);
724 l3hash = ipv6_hash(ipv6_hdr);
725 }
726
727 hash = hash ^ l3hash;
728 hash ^= hash >> 16;
729 hash ^= hash >> 8;
730
731 slaves[i] = hash % slave_count;
732 }
733 }
734
735 void
burst_xmit_l34_hash(struct rte_mbuf ** buf,uint16_t nb_pkts,uint16_t slave_count,uint16_t * slaves)736 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
737 uint16_t slave_count, uint16_t *slaves)
738 {
739 struct rte_ether_hdr *eth_hdr;
740 uint16_t proto;
741 size_t vlan_offset;
742 int i;
743
744 struct rte_udp_hdr *udp_hdr;
745 struct rte_tcp_hdr *tcp_hdr;
746 uint32_t hash, l3hash, l4hash;
747
748 for (i = 0; i < nb_pkts; i++) {
749 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
750 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
751 proto = eth_hdr->ether_type;
752 vlan_offset = get_vlan_offset(eth_hdr, &proto);
753 l3hash = 0;
754 l4hash = 0;
755
756 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
757 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
758 ((char *)(eth_hdr + 1) + vlan_offset);
759 size_t ip_hdr_offset;
760
761 l3hash = ipv4_hash(ipv4_hdr);
762
763 /* there is no L4 header in fragmented packet */
764 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
765 == 0)) {
766 ip_hdr_offset = (ipv4_hdr->version_ihl
767 & RTE_IPV4_HDR_IHL_MASK) *
768 RTE_IPV4_IHL_MULTIPLIER;
769
770 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
771 tcp_hdr = (struct rte_tcp_hdr *)
772 ((char *)ipv4_hdr +
773 ip_hdr_offset);
774 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
775 < pkt_end)
776 l4hash = HASH_L4_PORTS(tcp_hdr);
777 } else if (ipv4_hdr->next_proto_id ==
778 IPPROTO_UDP) {
779 udp_hdr = (struct rte_udp_hdr *)
780 ((char *)ipv4_hdr +
781 ip_hdr_offset);
782 if ((size_t)udp_hdr + sizeof(*udp_hdr)
783 < pkt_end)
784 l4hash = HASH_L4_PORTS(udp_hdr);
785 }
786 }
787 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
788 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
789 ((char *)(eth_hdr + 1) + vlan_offset);
790 l3hash = ipv6_hash(ipv6_hdr);
791
792 if (ipv6_hdr->proto == IPPROTO_TCP) {
793 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
794 l4hash = HASH_L4_PORTS(tcp_hdr);
795 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
796 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
797 l4hash = HASH_L4_PORTS(udp_hdr);
798 }
799 }
800
801 hash = l3hash ^ l4hash;
802 hash ^= hash >> 16;
803 hash ^= hash >> 8;
804
805 slaves[i] = hash % slave_count;
806 }
807 }
808
809 struct bwg_slave {
810 uint64_t bwg_left_int;
811 uint64_t bwg_left_remainder;
812 uint16_t slave;
813 };
814
815 void
bond_tlb_activate_slave(struct bond_dev_private * internals)816 bond_tlb_activate_slave(struct bond_dev_private *internals) {
817 int i;
818
819 for (i = 0; i < internals->active_slave_count; i++) {
820 tlb_last_obytets[internals->active_slaves[i]] = 0;
821 }
822 }
823
824 static int
bandwidth_cmp(const void * a,const void * b)825 bandwidth_cmp(const void *a, const void *b)
826 {
827 const struct bwg_slave *bwg_a = a;
828 const struct bwg_slave *bwg_b = b;
829 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
830 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
831 (int64_t)bwg_a->bwg_left_remainder;
832 if (diff > 0)
833 return 1;
834 else if (diff < 0)
835 return -1;
836 else if (diff2 > 0)
837 return 1;
838 else if (diff2 < 0)
839 return -1;
840 else
841 return 0;
842 }
843
844 static void
bandwidth_left(uint16_t port_id,uint64_t load,uint8_t update_idx,struct bwg_slave * bwg_slave)845 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
846 struct bwg_slave *bwg_slave)
847 {
848 struct rte_eth_link link_status;
849 int ret;
850
851 ret = rte_eth_link_get_nowait(port_id, &link_status);
852 if (ret < 0) {
853 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
854 port_id, rte_strerror(-ret));
855 return;
856 }
857 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
858 if (link_bwg == 0)
859 return;
860 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
861 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
862 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
863 }
864
865 static void
bond_ethdev_update_tlb_slave_cb(void * arg)866 bond_ethdev_update_tlb_slave_cb(void *arg)
867 {
868 struct bond_dev_private *internals = arg;
869 struct rte_eth_stats slave_stats;
870 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
871 uint16_t slave_count;
872 uint64_t tx_bytes;
873
874 uint8_t update_stats = 0;
875 uint16_t slave_id;
876 uint16_t i;
877
878 internals->slave_update_idx++;
879
880
881 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
882 update_stats = 1;
883
884 for (i = 0; i < internals->active_slave_count; i++) {
885 slave_id = internals->active_slaves[i];
886 rte_eth_stats_get(slave_id, &slave_stats);
887 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
888 bandwidth_left(slave_id, tx_bytes,
889 internals->slave_update_idx, &bwg_array[i]);
890 bwg_array[i].slave = slave_id;
891
892 if (update_stats) {
893 tlb_last_obytets[slave_id] = slave_stats.obytes;
894 }
895 }
896
897 if (update_stats == 1)
898 internals->slave_update_idx = 0;
899
900 slave_count = i;
901 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
902 for (i = 0; i < slave_count; i++)
903 internals->tlb_slaves_order[i] = bwg_array[i].slave;
904
905 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
906 (struct bond_dev_private *)internals);
907 }
908
909 static uint16_t
bond_ethdev_tx_burst_tlb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)910 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
911 {
912 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
913 struct bond_dev_private *internals = bd_tx_q->dev_private;
914
915 struct rte_eth_dev *primary_port =
916 &rte_eth_devices[internals->primary_port];
917 uint16_t num_tx_total = 0;
918 uint16_t i, j;
919
920 uint16_t num_of_slaves = internals->active_slave_count;
921 uint16_t slaves[RTE_MAX_ETHPORTS];
922
923 struct rte_ether_hdr *ether_hdr;
924 struct rte_ether_addr primary_slave_addr;
925 struct rte_ether_addr active_slave_addr;
926
927 if (num_of_slaves < 1)
928 return num_tx_total;
929
930 memcpy(slaves, internals->tlb_slaves_order,
931 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
932
933
934 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
935
936 if (nb_pkts > 3) {
937 for (i = 0; i < 3; i++)
938 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
939 }
940
941 for (i = 0; i < num_of_slaves; i++) {
942 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
943 for (j = num_tx_total; j < nb_pkts; j++) {
944 if (j + 3 < nb_pkts)
945 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
946
947 ether_hdr = rte_pktmbuf_mtod(bufs[j],
948 struct rte_ether_hdr *);
949 if (rte_is_same_ether_addr(ðer_hdr->s_addr,
950 &primary_slave_addr))
951 rte_ether_addr_copy(&active_slave_addr,
952 ðer_hdr->s_addr);
953 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
954 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
955 #endif
956 }
957
958 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
959 bufs + num_tx_total, nb_pkts - num_tx_total);
960
961 if (num_tx_total == nb_pkts)
962 break;
963 }
964
965 return num_tx_total;
966 }
967
968 void
bond_tlb_disable(struct bond_dev_private * internals)969 bond_tlb_disable(struct bond_dev_private *internals)
970 {
971 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
972 }
973
974 void
bond_tlb_enable(struct bond_dev_private * internals)975 bond_tlb_enable(struct bond_dev_private *internals)
976 {
977 bond_ethdev_update_tlb_slave_cb(internals);
978 }
979
980 static uint16_t
bond_ethdev_tx_burst_alb(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)981 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
982 {
983 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
984 struct bond_dev_private *internals = bd_tx_q->dev_private;
985
986 struct rte_ether_hdr *eth_h;
987 uint16_t ether_type, offset;
988
989 struct client_data *client_info;
990
991 /*
992 * We create transmit buffers for every slave and one additional to send
993 * through tlb. In worst case every packet will be send on one port.
994 */
995 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
996 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
997
998 /*
999 * We create separate transmit buffers for update packets as they won't
1000 * be counted in num_tx_total.
1001 */
1002 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1003 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1004
1005 struct rte_mbuf *upd_pkt;
1006 size_t pkt_size;
1007
1008 uint16_t num_send, num_not_send = 0;
1009 uint16_t num_tx_total = 0;
1010 uint16_t slave_idx;
1011
1012 int i, j;
1013
1014 /* Search tx buffer for ARP packets and forward them to alb */
1015 for (i = 0; i < nb_pkts; i++) {
1016 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1017 ether_type = eth_h->ether_type;
1018 offset = get_vlan_offset(eth_h, ðer_type);
1019
1020 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1021 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1022
1023 /* Change src mac in eth header */
1024 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1025
1026 /* Add packet to slave tx buffer */
1027 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1028 slave_bufs_pkts[slave_idx]++;
1029 } else {
1030 /* If packet is not ARP, send it with TLB policy */
1031 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1032 bufs[i];
1033 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1034 }
1035 }
1036
1037 /* Update connected client ARP tables */
1038 if (internals->mode6.ntt) {
1039 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1040 client_info = &internals->mode6.client_table[i];
1041
1042 if (client_info->in_use) {
1043 /* Allocate new packet to send ARP update on current slave */
1044 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1045 if (upd_pkt == NULL) {
1046 RTE_BOND_LOG(ERR,
1047 "Failed to allocate ARP packet from pool");
1048 continue;
1049 }
1050 pkt_size = sizeof(struct rte_ether_hdr) +
1051 sizeof(struct rte_arp_hdr) +
1052 client_info->vlan_count *
1053 sizeof(struct rte_vlan_hdr);
1054 upd_pkt->data_len = pkt_size;
1055 upd_pkt->pkt_len = pkt_size;
1056
1057 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1058 internals);
1059
1060 /* Add packet to update tx buffer */
1061 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1062 update_bufs_pkts[slave_idx]++;
1063 }
1064 }
1065 internals->mode6.ntt = 0;
1066 }
1067
1068 /* Send ARP packets on proper slaves */
1069 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1070 if (slave_bufs_pkts[i] > 0) {
1071 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1072 slave_bufs[i], slave_bufs_pkts[i]);
1073 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1074 bufs[nb_pkts - 1 - num_not_send - j] =
1075 slave_bufs[i][nb_pkts - 1 - j];
1076 }
1077
1078 num_tx_total += num_send;
1079 num_not_send += slave_bufs_pkts[i] - num_send;
1080
1081 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1082 /* Print TX stats including update packets */
1083 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1084 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1085 struct rte_ether_hdr *);
1086 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1087 }
1088 #endif
1089 }
1090 }
1091
1092 /* Send update packets on proper slaves */
1093 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1094 if (update_bufs_pkts[i] > 0) {
1095 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1096 update_bufs_pkts[i]);
1097 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1098 rte_pktmbuf_free(update_bufs[i][j]);
1099 }
1100 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1101 for (j = 0; j < update_bufs_pkts[i]; j++) {
1102 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1103 struct rte_ether_hdr *);
1104 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1105 }
1106 #endif
1107 }
1108 }
1109
1110 /* Send non-ARP packets using tlb policy */
1111 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1112 num_send = bond_ethdev_tx_burst_tlb(queue,
1113 slave_bufs[RTE_MAX_ETHPORTS],
1114 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1115
1116 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1117 bufs[nb_pkts - 1 - num_not_send - j] =
1118 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1119 }
1120
1121 num_tx_total += num_send;
1122 }
1123
1124 return num_tx_total;
1125 }
1126
1127 static inline uint16_t
tx_burst_balance(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs,uint16_t * slave_port_ids,uint16_t slave_count)1128 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1129 uint16_t *slave_port_ids, uint16_t slave_count)
1130 {
1131 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1132 struct bond_dev_private *internals = bd_tx_q->dev_private;
1133
1134 /* Array to sort mbufs for transmission on each slave into */
1135 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1136 /* Number of mbufs for transmission on each slave */
1137 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1138 /* Mapping array generated by hash function to map mbufs to slaves */
1139 uint16_t bufs_slave_port_idxs[nb_bufs];
1140
1141 uint16_t slave_tx_count;
1142 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1143
1144 uint16_t i;
1145
1146 /*
1147 * Populate slaves mbuf with the packets which are to be sent on it
1148 * selecting output slave using hash based on xmit policy
1149 */
1150 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1151 bufs_slave_port_idxs);
1152
1153 for (i = 0; i < nb_bufs; i++) {
1154 /* Populate slave mbuf arrays with mbufs for that slave. */
1155 uint16_t slave_idx = bufs_slave_port_idxs[i];
1156
1157 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1158 }
1159
1160 /* Send packet burst on each slave device */
1161 for (i = 0; i < slave_count; i++) {
1162 if (slave_nb_bufs[i] == 0)
1163 continue;
1164
1165 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1166 bd_tx_q->queue_id, slave_bufs[i],
1167 slave_nb_bufs[i]);
1168
1169 total_tx_count += slave_tx_count;
1170
1171 /* If tx burst fails move packets to end of bufs */
1172 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1173 int slave_tx_fail_count = slave_nb_bufs[i] -
1174 slave_tx_count;
1175 total_tx_fail_count += slave_tx_fail_count;
1176 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1177 &slave_bufs[i][slave_tx_count],
1178 slave_tx_fail_count * sizeof(bufs[0]));
1179 }
1180 }
1181
1182 return total_tx_count;
1183 }
1184
1185 static uint16_t
bond_ethdev_tx_burst_balance(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1186 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1187 uint16_t nb_bufs)
1188 {
1189 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1190 struct bond_dev_private *internals = bd_tx_q->dev_private;
1191
1192 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1193 uint16_t slave_count;
1194
1195 if (unlikely(nb_bufs == 0))
1196 return 0;
1197
1198 /* Copy slave list to protect against slave up/down changes during tx
1199 * bursting
1200 */
1201 slave_count = internals->active_slave_count;
1202 if (unlikely(slave_count < 1))
1203 return 0;
1204
1205 memcpy(slave_port_ids, internals->active_slaves,
1206 sizeof(slave_port_ids[0]) * slave_count);
1207 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1208 slave_count);
1209 }
1210
1211 static inline uint16_t
tx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs,bool dedicated_txq)1212 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1213 bool dedicated_txq)
1214 {
1215 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1216 struct bond_dev_private *internals = bd_tx_q->dev_private;
1217
1218 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1219 uint16_t slave_count;
1220
1221 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1222 uint16_t dist_slave_count;
1223
1224 uint16_t slave_tx_count;
1225
1226 uint16_t i;
1227
1228 /* Copy slave list to protect against slave up/down changes during tx
1229 * bursting */
1230 slave_count = internals->active_slave_count;
1231 if (unlikely(slave_count < 1))
1232 return 0;
1233
1234 memcpy(slave_port_ids, internals->active_slaves,
1235 sizeof(slave_port_ids[0]) * slave_count);
1236
1237 if (dedicated_txq)
1238 goto skip_tx_ring;
1239
1240 /* Check for LACP control packets and send if available */
1241 for (i = 0; i < slave_count; i++) {
1242 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1243 struct rte_mbuf *ctrl_pkt = NULL;
1244
1245 if (likely(rte_ring_empty(port->tx_ring)))
1246 continue;
1247
1248 if (rte_ring_dequeue(port->tx_ring,
1249 (void **)&ctrl_pkt) != -ENOENT) {
1250 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1251 bd_tx_q->queue_id, &ctrl_pkt, 1);
1252 /*
1253 * re-enqueue LAG control plane packets to buffering
1254 * ring if transmission fails so the packet isn't lost.
1255 */
1256 if (slave_tx_count != 1)
1257 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1258 }
1259 }
1260
1261 skip_tx_ring:
1262 if (unlikely(nb_bufs == 0))
1263 return 0;
1264
1265 dist_slave_count = 0;
1266 for (i = 0; i < slave_count; i++) {
1267 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1268
1269 if (ACTOR_STATE(port, DISTRIBUTING))
1270 dist_slave_port_ids[dist_slave_count++] =
1271 slave_port_ids[i];
1272 }
1273
1274 if (unlikely(dist_slave_count < 1))
1275 return 0;
1276
1277 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1278 dist_slave_count);
1279 }
1280
1281 static uint16_t
bond_ethdev_tx_burst_8023ad(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1282 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1283 uint16_t nb_bufs)
1284 {
1285 return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1286 }
1287
1288 static uint16_t
bond_ethdev_tx_burst_8023ad_fast_queue(void * queue,struct rte_mbuf ** bufs,uint16_t nb_bufs)1289 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1290 uint16_t nb_bufs)
1291 {
1292 return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1293 }
1294
1295 static uint16_t
bond_ethdev_tx_burst_broadcast(void * queue,struct rte_mbuf ** bufs,uint16_t nb_pkts)1296 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1297 uint16_t nb_pkts)
1298 {
1299 struct bond_dev_private *internals;
1300 struct bond_tx_queue *bd_tx_q;
1301
1302 uint16_t slaves[RTE_MAX_ETHPORTS];
1303 uint8_t tx_failed_flag = 0;
1304 uint16_t num_of_slaves;
1305
1306 uint16_t max_nb_of_tx_pkts = 0;
1307
1308 int slave_tx_total[RTE_MAX_ETHPORTS];
1309 int i, most_successful_tx_slave = -1;
1310
1311 bd_tx_q = (struct bond_tx_queue *)queue;
1312 internals = bd_tx_q->dev_private;
1313
1314 /* Copy slave list to protect against slave up/down changes during tx
1315 * bursting */
1316 num_of_slaves = internals->active_slave_count;
1317 memcpy(slaves, internals->active_slaves,
1318 sizeof(internals->active_slaves[0]) * num_of_slaves);
1319
1320 if (num_of_slaves < 1)
1321 return 0;
1322
1323 /* Increment reference count on mbufs */
1324 for (i = 0; i < nb_pkts; i++)
1325 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1326
1327 /* Transmit burst on each active slave */
1328 for (i = 0; i < num_of_slaves; i++) {
1329 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1330 bufs, nb_pkts);
1331
1332 if (unlikely(slave_tx_total[i] < nb_pkts))
1333 tx_failed_flag = 1;
1334
1335 /* record the value and slave index for the slave which transmits the
1336 * maximum number of packets */
1337 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1338 max_nb_of_tx_pkts = slave_tx_total[i];
1339 most_successful_tx_slave = i;
1340 }
1341 }
1342
1343 /* if slaves fail to transmit packets from burst, the calling application
1344 * is not expected to know about multiple references to packets so we must
1345 * handle failures of all packets except those of the most successful slave
1346 */
1347 if (unlikely(tx_failed_flag))
1348 for (i = 0; i < num_of_slaves; i++)
1349 if (i != most_successful_tx_slave)
1350 while (slave_tx_total[i] < nb_pkts)
1351 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1352
1353 return max_nb_of_tx_pkts;
1354 }
1355
1356 static void
link_properties_set(struct rte_eth_dev * ethdev,struct rte_eth_link * slave_link)1357 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1358 {
1359 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1360
1361 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1362 /**
1363 * If in mode 4 then save the link properties of the first
1364 * slave, all subsequent slaves must match these properties
1365 */
1366 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1367
1368 bond_link->link_autoneg = slave_link->link_autoneg;
1369 bond_link->link_duplex = slave_link->link_duplex;
1370 bond_link->link_speed = slave_link->link_speed;
1371 } else {
1372 /**
1373 * In any other mode the link properties are set to default
1374 * values of AUTONEG/DUPLEX
1375 */
1376 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1377 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1378 }
1379 }
1380
1381 static int
link_properties_valid(struct rte_eth_dev * ethdev,struct rte_eth_link * slave_link)1382 link_properties_valid(struct rte_eth_dev *ethdev,
1383 struct rte_eth_link *slave_link)
1384 {
1385 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1386
1387 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1388 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1389
1390 if (bond_link->link_duplex != slave_link->link_duplex ||
1391 bond_link->link_autoneg != slave_link->link_autoneg ||
1392 bond_link->link_speed != slave_link->link_speed)
1393 return -1;
1394 }
1395
1396 return 0;
1397 }
1398
1399 int
mac_address_get(struct rte_eth_dev * eth_dev,struct rte_ether_addr * dst_mac_addr)1400 mac_address_get(struct rte_eth_dev *eth_dev,
1401 struct rte_ether_addr *dst_mac_addr)
1402 {
1403 struct rte_ether_addr *mac_addr;
1404
1405 if (eth_dev == NULL) {
1406 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1407 return -1;
1408 }
1409
1410 if (dst_mac_addr == NULL) {
1411 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1412 return -1;
1413 }
1414
1415 mac_addr = eth_dev->data->mac_addrs;
1416
1417 rte_ether_addr_copy(mac_addr, dst_mac_addr);
1418 return 0;
1419 }
1420
1421 int
mac_address_set(struct rte_eth_dev * eth_dev,struct rte_ether_addr * new_mac_addr)1422 mac_address_set(struct rte_eth_dev *eth_dev,
1423 struct rte_ether_addr *new_mac_addr)
1424 {
1425 struct rte_ether_addr *mac_addr;
1426
1427 if (eth_dev == NULL) {
1428 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1429 return -1;
1430 }
1431
1432 if (new_mac_addr == NULL) {
1433 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1434 return -1;
1435 }
1436
1437 mac_addr = eth_dev->data->mac_addrs;
1438
1439 /* If new MAC is different to current MAC then update */
1440 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1441 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1442
1443 return 0;
1444 }
1445
1446 static const struct rte_ether_addr null_mac_addr;
1447
1448 /*
1449 * Add additional MAC addresses to the slave
1450 */
1451 int
slave_add_mac_addresses(struct rte_eth_dev * bonded_eth_dev,uint16_t slave_port_id)1452 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453 uint16_t slave_port_id)
1454 {
1455 int i, ret;
1456 struct rte_ether_addr *mac_addr;
1457
1458 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1459 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1460 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1461 break;
1462
1463 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1464 if (ret < 0) {
1465 /* rollback */
1466 for (i--; i > 0; i--)
1467 rte_eth_dev_mac_addr_remove(slave_port_id,
1468 &bonded_eth_dev->data->mac_addrs[i]);
1469 return ret;
1470 }
1471 }
1472
1473 return 0;
1474 }
1475
1476 /*
1477 * Remove additional MAC addresses from the slave
1478 */
1479 int
slave_remove_mac_addresses(struct rte_eth_dev * bonded_eth_dev,uint16_t slave_port_id)1480 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1481 uint16_t slave_port_id)
1482 {
1483 int i, rc, ret;
1484 struct rte_ether_addr *mac_addr;
1485
1486 rc = 0;
1487 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1488 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1489 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1490 break;
1491
1492 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1493 /* save only the first error */
1494 if (ret < 0 && rc == 0)
1495 rc = ret;
1496 }
1497
1498 return rc;
1499 }
1500
1501 int
mac_address_slaves_update(struct rte_eth_dev * bonded_eth_dev)1502 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1503 {
1504 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1505 bool set;
1506 int i;
1507
1508 /* Update slave devices MAC addresses */
1509 if (internals->slave_count < 1)
1510 return -1;
1511
1512 switch (internals->mode) {
1513 case BONDING_MODE_ROUND_ROBIN:
1514 case BONDING_MODE_BALANCE:
1515 case BONDING_MODE_BROADCAST:
1516 for (i = 0; i < internals->slave_count; i++) {
1517 if (rte_eth_dev_default_mac_addr_set(
1518 internals->slaves[i].port_id,
1519 bonded_eth_dev->data->mac_addrs)) {
1520 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1521 internals->slaves[i].port_id);
1522 return -1;
1523 }
1524 }
1525 break;
1526 case BONDING_MODE_8023AD:
1527 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1528 break;
1529 case BONDING_MODE_ACTIVE_BACKUP:
1530 case BONDING_MODE_TLB:
1531 case BONDING_MODE_ALB:
1532 default:
1533 set = true;
1534 for (i = 0; i < internals->slave_count; i++) {
1535 if (internals->slaves[i].port_id ==
1536 internals->current_primary_port) {
1537 if (rte_eth_dev_default_mac_addr_set(
1538 internals->current_primary_port,
1539 bonded_eth_dev->data->mac_addrs)) {
1540 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1541 internals->current_primary_port);
1542 set = false;
1543 }
1544 } else {
1545 if (rte_eth_dev_default_mac_addr_set(
1546 internals->slaves[i].port_id,
1547 &internals->slaves[i].persisted_mac_addr)) {
1548 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1549 internals->slaves[i].port_id);
1550 }
1551 }
1552 }
1553 if (!set)
1554 return -1;
1555 }
1556
1557 return 0;
1558 }
1559
1560 int
bond_ethdev_mode_set(struct rte_eth_dev * eth_dev,int mode)1561 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1562 {
1563 struct bond_dev_private *internals;
1564
1565 internals = eth_dev->data->dev_private;
1566
1567 switch (mode) {
1568 case BONDING_MODE_ROUND_ROBIN:
1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1571 break;
1572 case BONDING_MODE_ACTIVE_BACKUP:
1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575 break;
1576 case BONDING_MODE_BALANCE:
1577 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 break;
1580 case BONDING_MODE_BROADCAST:
1581 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1583 break;
1584 case BONDING_MODE_8023AD:
1585 if (bond_mode_8023ad_enable(eth_dev) != 0)
1586 return -1;
1587
1588 if (internals->mode4.dedicated_queues.enabled == 0) {
1589 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591 RTE_BOND_LOG(WARNING,
1592 "Using mode 4, it is necessary to do TX burst "
1593 "and RX burst at least every 100ms.");
1594 } else {
1595 /* Use flow director's optimization */
1596 eth_dev->rx_pkt_burst =
1597 bond_ethdev_rx_burst_8023ad_fast_queue;
1598 eth_dev->tx_pkt_burst =
1599 bond_ethdev_tx_burst_8023ad_fast_queue;
1600 }
1601 break;
1602 case BONDING_MODE_TLB:
1603 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1605 break;
1606 case BONDING_MODE_ALB:
1607 if (bond_mode_alb_enable(eth_dev) != 0)
1608 return -1;
1609
1610 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1612 break;
1613 default:
1614 return -1;
1615 }
1616
1617 internals->mode = mode;
1618
1619 return 0;
1620 }
1621
1622
1623 static int
slave_configure_slow_queue(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1624 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625 struct rte_eth_dev *slave_eth_dev)
1626 {
1627 int errval = 0;
1628 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1629 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1630
1631 if (port->slow_pool == NULL) {
1632 char mem_name[256];
1633 int slave_id = slave_eth_dev->data->port_id;
1634
1635 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1636 slave_id);
1637 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1638 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1639 slave_eth_dev->data->numa_node);
1640
1641 /* Any memory allocation failure in initialization is critical because
1642 * resources can't be free, so reinitialization is impossible. */
1643 if (port->slow_pool == NULL) {
1644 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1645 slave_id, mem_name, rte_strerror(rte_errno));
1646 }
1647 }
1648
1649 if (internals->mode4.dedicated_queues.enabled == 1) {
1650 /* Configure slow Rx queue */
1651
1652 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1653 internals->mode4.dedicated_queues.rx_qid, 128,
1654 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1655 NULL, port->slow_pool);
1656 if (errval != 0) {
1657 RTE_BOND_LOG(ERR,
1658 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1659 slave_eth_dev->data->port_id,
1660 internals->mode4.dedicated_queues.rx_qid,
1661 errval);
1662 return errval;
1663 }
1664
1665 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1666 internals->mode4.dedicated_queues.tx_qid, 512,
1667 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1668 NULL);
1669 if (errval != 0) {
1670 RTE_BOND_LOG(ERR,
1671 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1672 slave_eth_dev->data->port_id,
1673 internals->mode4.dedicated_queues.tx_qid,
1674 errval);
1675 return errval;
1676 }
1677 }
1678 return 0;
1679 }
1680
1681 int
slave_configure(struct rte_eth_dev * bonded_eth_dev,struct rte_eth_dev * slave_eth_dev)1682 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1683 struct rte_eth_dev *slave_eth_dev)
1684 {
1685 struct bond_rx_queue *bd_rx_q;
1686 struct bond_tx_queue *bd_tx_q;
1687 uint16_t nb_rx_queues;
1688 uint16_t nb_tx_queues;
1689
1690 int errval;
1691 uint16_t q_id;
1692 struct rte_flow_error flow_error;
1693
1694 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1695
1696 /* Stop slave */
1697 errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1698 if (errval != 0)
1699 RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1700 slave_eth_dev->data->port_id, errval);
1701
1702 /* Enable interrupts on slave device if supported */
1703 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1704 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1705
1706 /* If RSS is enabled for bonding, try to enable it for slaves */
1707 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1708 if (internals->rss_key_len != 0) {
1709 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1710 internals->rss_key_len;
1711 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1712 internals->rss_key;
1713 } else {
1714 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1715 }
1716
1717 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1718 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1719 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1720 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1721 }
1722
1723 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1724 DEV_RX_OFFLOAD_VLAN_FILTER)
1725 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1726 DEV_RX_OFFLOAD_VLAN_FILTER;
1727 else
1728 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1729 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1730
1731 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1732 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1733
1734 if (internals->mode == BONDING_MODE_8023AD) {
1735 if (internals->mode4.dedicated_queues.enabled == 1) {
1736 nb_rx_queues++;
1737 nb_tx_queues++;
1738 }
1739 }
1740
1741 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1742 bonded_eth_dev->data->mtu);
1743 if (errval != 0 && errval != -ENOTSUP) {
1744 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1745 slave_eth_dev->data->port_id, errval);
1746 return errval;
1747 }
1748
1749 /* Configure device */
1750 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1751 nb_rx_queues, nb_tx_queues,
1752 &(slave_eth_dev->data->dev_conf));
1753 if (errval != 0) {
1754 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1755 slave_eth_dev->data->port_id, errval);
1756 return errval;
1757 }
1758
1759 /* Setup Rx Queues */
1760 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1761 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1762
1763 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1764 bd_rx_q->nb_rx_desc,
1765 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1766 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1767 if (errval != 0) {
1768 RTE_BOND_LOG(ERR,
1769 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1770 slave_eth_dev->data->port_id, q_id, errval);
1771 return errval;
1772 }
1773 }
1774
1775 /* Setup Tx Queues */
1776 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1777 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1778
1779 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1780 bd_tx_q->nb_tx_desc,
1781 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1782 &bd_tx_q->tx_conf);
1783 if (errval != 0) {
1784 RTE_BOND_LOG(ERR,
1785 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1786 slave_eth_dev->data->port_id, q_id, errval);
1787 return errval;
1788 }
1789 }
1790
1791 if (internals->mode == BONDING_MODE_8023AD &&
1792 internals->mode4.dedicated_queues.enabled == 1) {
1793 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1794 != 0)
1795 return errval;
1796
1797 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1798 slave_eth_dev->data->port_id) != 0) {
1799 RTE_BOND_LOG(ERR,
1800 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1801 slave_eth_dev->data->port_id, q_id, errval);
1802 return -1;
1803 }
1804
1805 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1806 rte_flow_destroy(slave_eth_dev->data->port_id,
1807 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1808 &flow_error);
1809
1810 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1811 slave_eth_dev->data->port_id);
1812 }
1813
1814 /* Start device */
1815 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1816 if (errval != 0) {
1817 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1818 slave_eth_dev->data->port_id, errval);
1819 return -1;
1820 }
1821
1822 /* If RSS is enabled for bonding, synchronize RETA */
1823 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1824 int i;
1825 struct bond_dev_private *internals;
1826
1827 internals = bonded_eth_dev->data->dev_private;
1828
1829 for (i = 0; i < internals->slave_count; i++) {
1830 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1831 errval = rte_eth_dev_rss_reta_update(
1832 slave_eth_dev->data->port_id,
1833 &internals->reta_conf[0],
1834 internals->slaves[i].reta_size);
1835 if (errval != 0) {
1836 RTE_BOND_LOG(WARNING,
1837 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1838 " RSS Configuration for bonding may be inconsistent.",
1839 slave_eth_dev->data->port_id, errval);
1840 }
1841 break;
1842 }
1843 }
1844 }
1845
1846 /* If lsc interrupt is set, check initial slave's link status */
1847 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1848 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1849 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1850 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1851 NULL);
1852 }
1853
1854 return 0;
1855 }
1856
1857 void
slave_remove(struct bond_dev_private * internals,struct rte_eth_dev * slave_eth_dev)1858 slave_remove(struct bond_dev_private *internals,
1859 struct rte_eth_dev *slave_eth_dev)
1860 {
1861 uint16_t i;
1862
1863 for (i = 0; i < internals->slave_count; i++)
1864 if (internals->slaves[i].port_id ==
1865 slave_eth_dev->data->port_id)
1866 break;
1867
1868 if (i < (internals->slave_count - 1)) {
1869 struct rte_flow *flow;
1870
1871 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1872 sizeof(internals->slaves[0]) *
1873 (internals->slave_count - i - 1));
1874 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1875 memmove(&flow->flows[i], &flow->flows[i + 1],
1876 sizeof(flow->flows[0]) *
1877 (internals->slave_count - i - 1));
1878 flow->flows[internals->slave_count - 1] = NULL;
1879 }
1880 }
1881
1882 internals->slave_count--;
1883
1884 /* force reconfiguration of slave interfaces */
1885 rte_eth_dev_internal_reset(slave_eth_dev);
1886 }
1887
1888 static void
1889 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1890
1891 void
slave_add(struct bond_dev_private * internals,struct rte_eth_dev * slave_eth_dev)1892 slave_add(struct bond_dev_private *internals,
1893 struct rte_eth_dev *slave_eth_dev)
1894 {
1895 struct bond_slave_details *slave_details =
1896 &internals->slaves[internals->slave_count];
1897
1898 slave_details->port_id = slave_eth_dev->data->port_id;
1899 slave_details->last_link_status = 0;
1900
1901 /* Mark slave devices that don't support interrupts so we can
1902 * compensate when we start the bond
1903 */
1904 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1905 slave_details->link_status_poll_enabled = 1;
1906 }
1907
1908 slave_details->link_status_wait_to_complete = 0;
1909 /* clean tlb_last_obytes when adding port for bonding device */
1910 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1911 sizeof(struct rte_ether_addr));
1912 }
1913
1914 void
bond_ethdev_primary_set(struct bond_dev_private * internals,uint16_t slave_port_id)1915 bond_ethdev_primary_set(struct bond_dev_private *internals,
1916 uint16_t slave_port_id)
1917 {
1918 int i;
1919
1920 if (internals->active_slave_count < 1)
1921 internals->current_primary_port = slave_port_id;
1922 else
1923 /* Search bonded device slave ports for new proposed primary port */
1924 for (i = 0; i < internals->active_slave_count; i++) {
1925 if (internals->active_slaves[i] == slave_port_id)
1926 internals->current_primary_port = slave_port_id;
1927 }
1928 }
1929
1930 static int
1931 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1932
1933 static int
bond_ethdev_start(struct rte_eth_dev * eth_dev)1934 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1935 {
1936 struct bond_dev_private *internals;
1937 int i;
1938
1939 /* slave eth dev will be started by bonded device */
1940 if (check_for_bonded_ethdev(eth_dev)) {
1941 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1942 eth_dev->data->port_id);
1943 return -1;
1944 }
1945
1946 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1947 eth_dev->data->dev_started = 1;
1948
1949 internals = eth_dev->data->dev_private;
1950
1951 if (internals->slave_count == 0) {
1952 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1953 goto out_err;
1954 }
1955
1956 if (internals->user_defined_mac == 0) {
1957 struct rte_ether_addr *new_mac_addr = NULL;
1958
1959 for (i = 0; i < internals->slave_count; i++)
1960 if (internals->slaves[i].port_id == internals->primary_port)
1961 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1962
1963 if (new_mac_addr == NULL)
1964 goto out_err;
1965
1966 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1967 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1968 eth_dev->data->port_id);
1969 goto out_err;
1970 }
1971 }
1972
1973 if (internals->mode == BONDING_MODE_8023AD) {
1974 if (internals->mode4.dedicated_queues.enabled == 1) {
1975 internals->mode4.dedicated_queues.rx_qid =
1976 eth_dev->data->nb_rx_queues;
1977 internals->mode4.dedicated_queues.tx_qid =
1978 eth_dev->data->nb_tx_queues;
1979 }
1980 }
1981
1982
1983 /* Reconfigure each slave device if starting bonded device */
1984 for (i = 0; i < internals->slave_count; i++) {
1985 struct rte_eth_dev *slave_ethdev =
1986 &(rte_eth_devices[internals->slaves[i].port_id]);
1987 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1988 RTE_BOND_LOG(ERR,
1989 "bonded port (%d) failed to reconfigure slave device (%d)",
1990 eth_dev->data->port_id,
1991 internals->slaves[i].port_id);
1992 goto out_err;
1993 }
1994 /* We will need to poll for link status if any slave doesn't
1995 * support interrupts
1996 */
1997 if (internals->slaves[i].link_status_poll_enabled)
1998 internals->link_status_polling_enabled = 1;
1999 }
2000
2001 /* start polling if needed */
2002 if (internals->link_status_polling_enabled) {
2003 rte_eal_alarm_set(
2004 internals->link_status_polling_interval_ms * 1000,
2005 bond_ethdev_slave_link_status_change_monitor,
2006 (void *)&rte_eth_devices[internals->port_id]);
2007 }
2008
2009 /* Update all slave devices MACs*/
2010 if (mac_address_slaves_update(eth_dev) != 0)
2011 goto out_err;
2012
2013 if (internals->user_defined_primary_port)
2014 bond_ethdev_primary_set(internals, internals->primary_port);
2015
2016 if (internals->mode == BONDING_MODE_8023AD)
2017 bond_mode_8023ad_start(eth_dev);
2018
2019 if (internals->mode == BONDING_MODE_TLB ||
2020 internals->mode == BONDING_MODE_ALB)
2021 bond_tlb_enable(internals);
2022
2023 return 0;
2024
2025 out_err:
2026 eth_dev->data->dev_started = 0;
2027 return -1;
2028 }
2029
2030 static void
bond_ethdev_free_queues(struct rte_eth_dev * dev)2031 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2032 {
2033 uint16_t i;
2034
2035 if (dev->data->rx_queues != NULL) {
2036 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2037 rte_free(dev->data->rx_queues[i]);
2038 dev->data->rx_queues[i] = NULL;
2039 }
2040 dev->data->nb_rx_queues = 0;
2041 }
2042
2043 if (dev->data->tx_queues != NULL) {
2044 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2045 rte_free(dev->data->tx_queues[i]);
2046 dev->data->tx_queues[i] = NULL;
2047 }
2048 dev->data->nb_tx_queues = 0;
2049 }
2050 }
2051
2052 int
bond_ethdev_stop(struct rte_eth_dev * eth_dev)2053 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2054 {
2055 struct bond_dev_private *internals = eth_dev->data->dev_private;
2056 uint16_t i;
2057 int ret;
2058
2059 if (internals->mode == BONDING_MODE_8023AD) {
2060 struct port *port;
2061 void *pkt = NULL;
2062
2063 bond_mode_8023ad_stop(eth_dev);
2064
2065 /* Discard all messages to/from mode 4 state machines */
2066 for (i = 0; i < internals->active_slave_count; i++) {
2067 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2068
2069 RTE_ASSERT(port->rx_ring != NULL);
2070 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2071 rte_pktmbuf_free(pkt);
2072
2073 RTE_ASSERT(port->tx_ring != NULL);
2074 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2075 rte_pktmbuf_free(pkt);
2076 }
2077 }
2078
2079 if (internals->mode == BONDING_MODE_TLB ||
2080 internals->mode == BONDING_MODE_ALB) {
2081 bond_tlb_disable(internals);
2082 for (i = 0; i < internals->active_slave_count; i++)
2083 tlb_last_obytets[internals->active_slaves[i]] = 0;
2084 }
2085
2086 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2087 eth_dev->data->dev_started = 0;
2088
2089 internals->link_status_polling_enabled = 0;
2090 for (i = 0; i < internals->slave_count; i++) {
2091 uint16_t slave_id = internals->slaves[i].port_id;
2092 if (find_slave_by_id(internals->active_slaves,
2093 internals->active_slave_count, slave_id) !=
2094 internals->active_slave_count) {
2095 internals->slaves[i].last_link_status = 0;
2096 ret = rte_eth_dev_stop(slave_id);
2097 if (ret != 0) {
2098 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2099 slave_id);
2100 return ret;
2101 }
2102 deactivate_slave(eth_dev, slave_id);
2103 }
2104 }
2105
2106 return 0;
2107 }
2108
2109 int
bond_ethdev_close(struct rte_eth_dev * dev)2110 bond_ethdev_close(struct rte_eth_dev *dev)
2111 {
2112 struct bond_dev_private *internals = dev->data->dev_private;
2113 uint16_t bond_port_id = internals->port_id;
2114 int skipped = 0;
2115 struct rte_flow_error ferror;
2116
2117 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2118 return 0;
2119
2120 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2121 while (internals->slave_count != skipped) {
2122 uint16_t port_id = internals->slaves[skipped].port_id;
2123
2124 if (rte_eth_dev_stop(port_id) != 0) {
2125 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2126 port_id);
2127 skipped++;
2128 }
2129
2130 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2131 RTE_BOND_LOG(ERR,
2132 "Failed to remove port %d from bonded device %s",
2133 port_id, dev->device->name);
2134 skipped++;
2135 }
2136 }
2137 bond_flow_ops.flush(dev, &ferror);
2138 bond_ethdev_free_queues(dev);
2139 rte_bitmap_reset(internals->vlan_filter_bmp);
2140 rte_bitmap_free(internals->vlan_filter_bmp);
2141 rte_free(internals->vlan_filter_bmpmem);
2142
2143 /* Try to release mempool used in mode6. If the bond
2144 * device is not mode6, free the NULL is not problem.
2145 */
2146 rte_mempool_free(internals->mode6.mempool);
2147
2148 return 0;
2149 }
2150
2151 /* forward declaration */
2152 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2153
2154 static int
bond_ethdev_info(struct rte_eth_dev * dev,struct rte_eth_dev_info * dev_info)2155 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2156 {
2157 struct bond_dev_private *internals = dev->data->dev_private;
2158 struct bond_slave_details slave;
2159 int ret;
2160
2161 uint16_t max_nb_rx_queues = UINT16_MAX;
2162 uint16_t max_nb_tx_queues = UINT16_MAX;
2163 uint16_t max_rx_desc_lim = UINT16_MAX;
2164 uint16_t max_tx_desc_lim = UINT16_MAX;
2165
2166 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2167
2168 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2169 internals->candidate_max_rx_pktlen :
2170 RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2171
2172 /* Max number of tx/rx queues that the bonded device can support is the
2173 * minimum values of the bonded slaves, as all slaves must be capable
2174 * of supporting the same number of tx/rx queues.
2175 */
2176 if (internals->slave_count > 0) {
2177 struct rte_eth_dev_info slave_info;
2178 uint16_t idx;
2179
2180 for (idx = 0; idx < internals->slave_count; idx++) {
2181 slave = internals->slaves[idx];
2182 ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2183 if (ret != 0) {
2184 RTE_BOND_LOG(ERR,
2185 "%s: Error during getting device (port %u) info: %s\n",
2186 __func__,
2187 slave.port_id,
2188 strerror(-ret));
2189
2190 return ret;
2191 }
2192
2193 if (slave_info.max_rx_queues < max_nb_rx_queues)
2194 max_nb_rx_queues = slave_info.max_rx_queues;
2195
2196 if (slave_info.max_tx_queues < max_nb_tx_queues)
2197 max_nb_tx_queues = slave_info.max_tx_queues;
2198
2199 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2200 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2201
2202 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2203 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2204 }
2205 }
2206
2207 dev_info->max_rx_queues = max_nb_rx_queues;
2208 dev_info->max_tx_queues = max_nb_tx_queues;
2209
2210 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2211 sizeof(dev_info->default_rxconf));
2212 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2213 sizeof(dev_info->default_txconf));
2214
2215 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2216 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2217
2218 /**
2219 * If dedicated hw queues enabled for link bonding device in LACP mode
2220 * then we need to reduce the maximum number of data path queues by 1.
2221 */
2222 if (internals->mode == BONDING_MODE_8023AD &&
2223 internals->mode4.dedicated_queues.enabled == 1) {
2224 dev_info->max_rx_queues--;
2225 dev_info->max_tx_queues--;
2226 }
2227
2228 dev_info->min_rx_bufsize = 0;
2229
2230 dev_info->rx_offload_capa = internals->rx_offload_capa;
2231 dev_info->tx_offload_capa = internals->tx_offload_capa;
2232 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2233 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2234 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2235
2236 dev_info->reta_size = internals->reta_size;
2237
2238 return 0;
2239 }
2240
2241 static int
bond_ethdev_vlan_filter_set(struct rte_eth_dev * dev,uint16_t vlan_id,int on)2242 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2243 {
2244 int res;
2245 uint16_t i;
2246 struct bond_dev_private *internals = dev->data->dev_private;
2247
2248 /* don't do this while a slave is being added */
2249 rte_spinlock_lock(&internals->lock);
2250
2251 if (on)
2252 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2253 else
2254 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2255
2256 for (i = 0; i < internals->slave_count; i++) {
2257 uint16_t port_id = internals->slaves[i].port_id;
2258
2259 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2260 if (res == ENOTSUP)
2261 RTE_BOND_LOG(WARNING,
2262 "Setting VLAN filter on slave port %u not supported.",
2263 port_id);
2264 }
2265
2266 rte_spinlock_unlock(&internals->lock);
2267 return 0;
2268 }
2269
2270 static int
bond_ethdev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t rx_queue_id,uint16_t nb_rx_desc,unsigned int socket_id __rte_unused,const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mb_pool)2271 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2272 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2273 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2274 {
2275 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2276 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2277 0, dev->data->numa_node);
2278 if (bd_rx_q == NULL)
2279 return -1;
2280
2281 bd_rx_q->queue_id = rx_queue_id;
2282 bd_rx_q->dev_private = dev->data->dev_private;
2283
2284 bd_rx_q->nb_rx_desc = nb_rx_desc;
2285
2286 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2287 bd_rx_q->mb_pool = mb_pool;
2288
2289 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2290
2291 return 0;
2292 }
2293
2294 static int
bond_ethdev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t tx_queue_id,uint16_t nb_tx_desc,unsigned int socket_id __rte_unused,const struct rte_eth_txconf * tx_conf)2295 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2296 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2297 const struct rte_eth_txconf *tx_conf)
2298 {
2299 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2300 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2301 0, dev->data->numa_node);
2302
2303 if (bd_tx_q == NULL)
2304 return -1;
2305
2306 bd_tx_q->queue_id = tx_queue_id;
2307 bd_tx_q->dev_private = dev->data->dev_private;
2308
2309 bd_tx_q->nb_tx_desc = nb_tx_desc;
2310 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2311
2312 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2313
2314 return 0;
2315 }
2316
2317 static void
bond_ethdev_rx_queue_release(void * queue)2318 bond_ethdev_rx_queue_release(void *queue)
2319 {
2320 if (queue == NULL)
2321 return;
2322
2323 rte_free(queue);
2324 }
2325
2326 static void
bond_ethdev_tx_queue_release(void * queue)2327 bond_ethdev_tx_queue_release(void *queue)
2328 {
2329 if (queue == NULL)
2330 return;
2331
2332 rte_free(queue);
2333 }
2334
2335 static void
bond_ethdev_slave_link_status_change_monitor(void * cb_arg)2336 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2337 {
2338 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2339 struct bond_dev_private *internals;
2340
2341 /* Default value for polling slave found is true as we don't want to
2342 * disable the polling thread if we cannot get the lock */
2343 int i, polling_slave_found = 1;
2344
2345 if (cb_arg == NULL)
2346 return;
2347
2348 bonded_ethdev = cb_arg;
2349 internals = bonded_ethdev->data->dev_private;
2350
2351 if (!bonded_ethdev->data->dev_started ||
2352 !internals->link_status_polling_enabled)
2353 return;
2354
2355 /* If device is currently being configured then don't check slaves link
2356 * status, wait until next period */
2357 if (rte_spinlock_trylock(&internals->lock)) {
2358 if (internals->slave_count > 0)
2359 polling_slave_found = 0;
2360
2361 for (i = 0; i < internals->slave_count; i++) {
2362 if (!internals->slaves[i].link_status_poll_enabled)
2363 continue;
2364
2365 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2366 polling_slave_found = 1;
2367
2368 /* Update slave link status */
2369 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2370 internals->slaves[i].link_status_wait_to_complete);
2371
2372 /* if link status has changed since last checked then call lsc
2373 * event callback */
2374 if (slave_ethdev->data->dev_link.link_status !=
2375 internals->slaves[i].last_link_status) {
2376 internals->slaves[i].last_link_status =
2377 slave_ethdev->data->dev_link.link_status;
2378
2379 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2380 RTE_ETH_EVENT_INTR_LSC,
2381 &bonded_ethdev->data->port_id,
2382 NULL);
2383 }
2384 }
2385 rte_spinlock_unlock(&internals->lock);
2386 }
2387
2388 if (polling_slave_found)
2389 /* Set alarm to continue monitoring link status of slave ethdev's */
2390 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2391 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2392 }
2393
2394 static int
bond_ethdev_link_update(struct rte_eth_dev * ethdev,int wait_to_complete)2395 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2396 {
2397 int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2398
2399 struct bond_dev_private *bond_ctx;
2400 struct rte_eth_link slave_link;
2401
2402 bool one_link_update_succeeded;
2403 uint32_t idx;
2404 int ret;
2405
2406 bond_ctx = ethdev->data->dev_private;
2407
2408 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2409
2410 if (ethdev->data->dev_started == 0 ||
2411 bond_ctx->active_slave_count == 0) {
2412 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2413 return 0;
2414 }
2415
2416 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2417
2418 if (wait_to_complete)
2419 link_update = rte_eth_link_get;
2420 else
2421 link_update = rte_eth_link_get_nowait;
2422
2423 switch (bond_ctx->mode) {
2424 case BONDING_MODE_BROADCAST:
2425 /**
2426 * Setting link speed to UINT32_MAX to ensure we pick up the
2427 * value of the first active slave
2428 */
2429 ethdev->data->dev_link.link_speed = UINT32_MAX;
2430
2431 /**
2432 * link speed is minimum value of all the slaves link speed as
2433 * packet loss will occur on this slave if transmission at rates
2434 * greater than this are attempted
2435 */
2436 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2437 ret = link_update(bond_ctx->active_slaves[idx],
2438 &slave_link);
2439 if (ret < 0) {
2440 ethdev->data->dev_link.link_speed =
2441 ETH_SPEED_NUM_NONE;
2442 RTE_BOND_LOG(ERR,
2443 "Slave (port %u) link get failed: %s",
2444 bond_ctx->active_slaves[idx],
2445 rte_strerror(-ret));
2446 return 0;
2447 }
2448
2449 if (slave_link.link_speed <
2450 ethdev->data->dev_link.link_speed)
2451 ethdev->data->dev_link.link_speed =
2452 slave_link.link_speed;
2453 }
2454 break;
2455 case BONDING_MODE_ACTIVE_BACKUP:
2456 /* Current primary slave */
2457 ret = link_update(bond_ctx->current_primary_port, &slave_link);
2458 if (ret < 0) {
2459 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2460 bond_ctx->current_primary_port,
2461 rte_strerror(-ret));
2462 return 0;
2463 }
2464
2465 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2466 break;
2467 case BONDING_MODE_8023AD:
2468 ethdev->data->dev_link.link_autoneg =
2469 bond_ctx->mode4.slave_link.link_autoneg;
2470 ethdev->data->dev_link.link_duplex =
2471 bond_ctx->mode4.slave_link.link_duplex;
2472 /* fall through */
2473 /* to update link speed */
2474 case BONDING_MODE_ROUND_ROBIN:
2475 case BONDING_MODE_BALANCE:
2476 case BONDING_MODE_TLB:
2477 case BONDING_MODE_ALB:
2478 default:
2479 /**
2480 * In theses mode the maximum theoretical link speed is the sum
2481 * of all the slaves
2482 */
2483 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2484 one_link_update_succeeded = false;
2485
2486 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2487 ret = link_update(bond_ctx->active_slaves[idx],
2488 &slave_link);
2489 if (ret < 0) {
2490 RTE_BOND_LOG(ERR,
2491 "Slave (port %u) link get failed: %s",
2492 bond_ctx->active_slaves[idx],
2493 rte_strerror(-ret));
2494 continue;
2495 }
2496
2497 one_link_update_succeeded = true;
2498 ethdev->data->dev_link.link_speed +=
2499 slave_link.link_speed;
2500 }
2501
2502 if (!one_link_update_succeeded) {
2503 RTE_BOND_LOG(ERR, "All slaves link get failed");
2504 return 0;
2505 }
2506 }
2507
2508
2509 return 0;
2510 }
2511
2512
2513 static int
bond_ethdev_stats_get(struct rte_eth_dev * dev,struct rte_eth_stats * stats)2514 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2515 {
2516 struct bond_dev_private *internals = dev->data->dev_private;
2517 struct rte_eth_stats slave_stats;
2518 int i, j;
2519
2520 for (i = 0; i < internals->slave_count; i++) {
2521 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2522
2523 stats->ipackets += slave_stats.ipackets;
2524 stats->opackets += slave_stats.opackets;
2525 stats->ibytes += slave_stats.ibytes;
2526 stats->obytes += slave_stats.obytes;
2527 stats->imissed += slave_stats.imissed;
2528 stats->ierrors += slave_stats.ierrors;
2529 stats->oerrors += slave_stats.oerrors;
2530 stats->rx_nombuf += slave_stats.rx_nombuf;
2531
2532 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2533 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2534 stats->q_opackets[j] += slave_stats.q_opackets[j];
2535 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2536 stats->q_obytes[j] += slave_stats.q_obytes[j];
2537 stats->q_errors[j] += slave_stats.q_errors[j];
2538 }
2539
2540 }
2541
2542 return 0;
2543 }
2544
2545 static int
bond_ethdev_stats_reset(struct rte_eth_dev * dev)2546 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2547 {
2548 struct bond_dev_private *internals = dev->data->dev_private;
2549 int i;
2550 int err;
2551 int ret;
2552
2553 for (i = 0, err = 0; i < internals->slave_count; i++) {
2554 ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2555 if (ret != 0)
2556 err = ret;
2557 }
2558
2559 return err;
2560 }
2561
2562 static int
bond_ethdev_promiscuous_enable(struct rte_eth_dev * eth_dev)2563 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2564 {
2565 struct bond_dev_private *internals = eth_dev->data->dev_private;
2566 int i;
2567 int ret = 0;
2568 uint16_t port_id;
2569
2570 switch (internals->mode) {
2571 /* Promiscuous mode is propagated to all slaves */
2572 case BONDING_MODE_ROUND_ROBIN:
2573 case BONDING_MODE_BALANCE:
2574 case BONDING_MODE_BROADCAST:
2575 case BONDING_MODE_8023AD: {
2576 unsigned int slave_ok = 0;
2577
2578 for (i = 0; i < internals->slave_count; i++) {
2579 port_id = internals->slaves[i].port_id;
2580
2581 ret = rte_eth_promiscuous_enable(port_id);
2582 if (ret != 0)
2583 RTE_BOND_LOG(ERR,
2584 "Failed to enable promiscuous mode for port %u: %s",
2585 port_id, rte_strerror(-ret));
2586 else
2587 slave_ok++;
2588 }
2589 /*
2590 * Report success if operation is successful on at least
2591 * on one slave. Otherwise return last error code.
2592 */
2593 if (slave_ok > 0)
2594 ret = 0;
2595 break;
2596 }
2597 /* Promiscuous mode is propagated only to primary slave */
2598 case BONDING_MODE_ACTIVE_BACKUP:
2599 case BONDING_MODE_TLB:
2600 case BONDING_MODE_ALB:
2601 default:
2602 /* Do not touch promisc when there cannot be primary ports */
2603 if (internals->slave_count == 0)
2604 break;
2605 port_id = internals->current_primary_port;
2606 ret = rte_eth_promiscuous_enable(port_id);
2607 if (ret != 0)
2608 RTE_BOND_LOG(ERR,
2609 "Failed to enable promiscuous mode for port %u: %s",
2610 port_id, rte_strerror(-ret));
2611 }
2612
2613 return ret;
2614 }
2615
2616 static int
bond_ethdev_promiscuous_disable(struct rte_eth_dev * dev)2617 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2618 {
2619 struct bond_dev_private *internals = dev->data->dev_private;
2620 int i;
2621 int ret = 0;
2622 uint16_t port_id;
2623
2624 switch (internals->mode) {
2625 /* Promiscuous mode is propagated to all slaves */
2626 case BONDING_MODE_ROUND_ROBIN:
2627 case BONDING_MODE_BALANCE:
2628 case BONDING_MODE_BROADCAST:
2629 case BONDING_MODE_8023AD: {
2630 unsigned int slave_ok = 0;
2631
2632 for (i = 0; i < internals->slave_count; i++) {
2633 port_id = internals->slaves[i].port_id;
2634
2635 if (internals->mode == BONDING_MODE_8023AD &&
2636 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2637 BOND_8023AD_FORCED_PROMISC) {
2638 slave_ok++;
2639 continue;
2640 }
2641 ret = rte_eth_promiscuous_disable(port_id);
2642 if (ret != 0)
2643 RTE_BOND_LOG(ERR,
2644 "Failed to disable promiscuous mode for port %u: %s",
2645 port_id, rte_strerror(-ret));
2646 else
2647 slave_ok++;
2648 }
2649 /*
2650 * Report success if operation is successful on at least
2651 * on one slave. Otherwise return last error code.
2652 */
2653 if (slave_ok > 0)
2654 ret = 0;
2655 break;
2656 }
2657 /* Promiscuous mode is propagated only to primary slave */
2658 case BONDING_MODE_ACTIVE_BACKUP:
2659 case BONDING_MODE_TLB:
2660 case BONDING_MODE_ALB:
2661 default:
2662 /* Do not touch promisc when there cannot be primary ports */
2663 if (internals->slave_count == 0)
2664 break;
2665 port_id = internals->current_primary_port;
2666 ret = rte_eth_promiscuous_disable(port_id);
2667 if (ret != 0)
2668 RTE_BOND_LOG(ERR,
2669 "Failed to disable promiscuous mode for port %u: %s",
2670 port_id, rte_strerror(-ret));
2671 }
2672
2673 return ret;
2674 }
2675
2676 static int
bond_ethdev_allmulticast_enable(struct rte_eth_dev * eth_dev)2677 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2678 {
2679 struct bond_dev_private *internals = eth_dev->data->dev_private;
2680 int i;
2681 int ret = 0;
2682 uint16_t port_id;
2683
2684 switch (internals->mode) {
2685 /* allmulti mode is propagated to all slaves */
2686 case BONDING_MODE_ROUND_ROBIN:
2687 case BONDING_MODE_BALANCE:
2688 case BONDING_MODE_BROADCAST:
2689 case BONDING_MODE_8023AD: {
2690 unsigned int slave_ok = 0;
2691
2692 for (i = 0; i < internals->slave_count; i++) {
2693 port_id = internals->slaves[i].port_id;
2694
2695 ret = rte_eth_allmulticast_enable(port_id);
2696 if (ret != 0)
2697 RTE_BOND_LOG(ERR,
2698 "Failed to enable allmulti mode for port %u: %s",
2699 port_id, rte_strerror(-ret));
2700 else
2701 slave_ok++;
2702 }
2703 /*
2704 * Report success if operation is successful on at least
2705 * on one slave. Otherwise return last error code.
2706 */
2707 if (slave_ok > 0)
2708 ret = 0;
2709 break;
2710 }
2711 /* allmulti mode is propagated only to primary slave */
2712 case BONDING_MODE_ACTIVE_BACKUP:
2713 case BONDING_MODE_TLB:
2714 case BONDING_MODE_ALB:
2715 default:
2716 /* Do not touch allmulti when there cannot be primary ports */
2717 if (internals->slave_count == 0)
2718 break;
2719 port_id = internals->current_primary_port;
2720 ret = rte_eth_allmulticast_enable(port_id);
2721 if (ret != 0)
2722 RTE_BOND_LOG(ERR,
2723 "Failed to enable allmulti mode for port %u: %s",
2724 port_id, rte_strerror(-ret));
2725 }
2726
2727 return ret;
2728 }
2729
2730 static int
bond_ethdev_allmulticast_disable(struct rte_eth_dev * eth_dev)2731 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2732 {
2733 struct bond_dev_private *internals = eth_dev->data->dev_private;
2734 int i;
2735 int ret = 0;
2736 uint16_t port_id;
2737
2738 switch (internals->mode) {
2739 /* allmulti mode is propagated to all slaves */
2740 case BONDING_MODE_ROUND_ROBIN:
2741 case BONDING_MODE_BALANCE:
2742 case BONDING_MODE_BROADCAST:
2743 case BONDING_MODE_8023AD: {
2744 unsigned int slave_ok = 0;
2745
2746 for (i = 0; i < internals->slave_count; i++) {
2747 uint16_t port_id = internals->slaves[i].port_id;
2748
2749 if (internals->mode == BONDING_MODE_8023AD &&
2750 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2751 BOND_8023AD_FORCED_ALLMULTI)
2752 continue;
2753
2754 ret = rte_eth_allmulticast_disable(port_id);
2755 if (ret != 0)
2756 RTE_BOND_LOG(ERR,
2757 "Failed to disable allmulti mode for port %u: %s",
2758 port_id, rte_strerror(-ret));
2759 else
2760 slave_ok++;
2761 }
2762 /*
2763 * Report success if operation is successful on at least
2764 * on one slave. Otherwise return last error code.
2765 */
2766 if (slave_ok > 0)
2767 ret = 0;
2768 break;
2769 }
2770 /* allmulti mode is propagated only to primary slave */
2771 case BONDING_MODE_ACTIVE_BACKUP:
2772 case BONDING_MODE_TLB:
2773 case BONDING_MODE_ALB:
2774 default:
2775 /* Do not touch allmulti when there cannot be primary ports */
2776 if (internals->slave_count == 0)
2777 break;
2778 port_id = internals->current_primary_port;
2779 ret = rte_eth_allmulticast_disable(port_id);
2780 if (ret != 0)
2781 RTE_BOND_LOG(ERR,
2782 "Failed to disable allmulti mode for port %u: %s",
2783 port_id, rte_strerror(-ret));
2784 }
2785
2786 return ret;
2787 }
2788
2789 static void
bond_ethdev_delayed_lsc_propagation(void * arg)2790 bond_ethdev_delayed_lsc_propagation(void *arg)
2791 {
2792 if (arg == NULL)
2793 return;
2794
2795 rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2796 RTE_ETH_EVENT_INTR_LSC, NULL);
2797 }
2798
2799 int
bond_ethdev_lsc_event_callback(uint16_t port_id,enum rte_eth_event_type type,void * param,void * ret_param __rte_unused)2800 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2801 void *param, void *ret_param __rte_unused)
2802 {
2803 struct rte_eth_dev *bonded_eth_dev;
2804 struct bond_dev_private *internals;
2805 struct rte_eth_link link;
2806 int rc = -1;
2807 int ret;
2808
2809 uint8_t lsc_flag = 0;
2810 int valid_slave = 0;
2811 uint16_t active_pos;
2812 uint16_t i;
2813
2814 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2815 return rc;
2816
2817 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2818
2819 if (check_for_bonded_ethdev(bonded_eth_dev))
2820 return rc;
2821
2822 internals = bonded_eth_dev->data->dev_private;
2823
2824 /* If the device isn't started don't handle interrupts */
2825 if (!bonded_eth_dev->data->dev_started)
2826 return rc;
2827
2828 /* verify that port_id is a valid slave of bonded port */
2829 for (i = 0; i < internals->slave_count; i++) {
2830 if (internals->slaves[i].port_id == port_id) {
2831 valid_slave = 1;
2832 break;
2833 }
2834 }
2835
2836 if (!valid_slave)
2837 return rc;
2838
2839 /* Synchronize lsc callback parallel calls either by real link event
2840 * from the slaves PMDs or by the bonding PMD itself.
2841 */
2842 rte_spinlock_lock(&internals->lsc_lock);
2843
2844 /* Search for port in active port list */
2845 active_pos = find_slave_by_id(internals->active_slaves,
2846 internals->active_slave_count, port_id);
2847
2848 ret = rte_eth_link_get_nowait(port_id, &link);
2849 if (ret < 0)
2850 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2851
2852 if (ret == 0 && link.link_status) {
2853 if (active_pos < internals->active_slave_count)
2854 goto link_update;
2855
2856 /* check link state properties if bonded link is up*/
2857 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2858 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2859 RTE_BOND_LOG(ERR, "Invalid link properties "
2860 "for slave %d in bonding mode %d",
2861 port_id, internals->mode);
2862 } else {
2863 /* inherit slave link properties */
2864 link_properties_set(bonded_eth_dev, &link);
2865 }
2866
2867 /* If no active slave ports then set this port to be
2868 * the primary port.
2869 */
2870 if (internals->active_slave_count < 1) {
2871 /* If first active slave, then change link status */
2872 bonded_eth_dev->data->dev_link.link_status =
2873 ETH_LINK_UP;
2874 internals->current_primary_port = port_id;
2875 lsc_flag = 1;
2876
2877 mac_address_slaves_update(bonded_eth_dev);
2878 }
2879
2880 activate_slave(bonded_eth_dev, port_id);
2881
2882 /* If the user has defined the primary port then default to
2883 * using it.
2884 */
2885 if (internals->user_defined_primary_port &&
2886 internals->primary_port == port_id)
2887 bond_ethdev_primary_set(internals, port_id);
2888 } else {
2889 if (active_pos == internals->active_slave_count)
2890 goto link_update;
2891
2892 /* Remove from active slave list */
2893 deactivate_slave(bonded_eth_dev, port_id);
2894
2895 if (internals->active_slave_count < 1)
2896 lsc_flag = 1;
2897
2898 /* Update primary id, take first active slave from list or if none
2899 * available set to -1 */
2900 if (port_id == internals->current_primary_port) {
2901 if (internals->active_slave_count > 0)
2902 bond_ethdev_primary_set(internals,
2903 internals->active_slaves[0]);
2904 else
2905 internals->current_primary_port = internals->primary_port;
2906 mac_address_slaves_update(bonded_eth_dev);
2907 }
2908 }
2909
2910 link_update:
2911 /**
2912 * Update bonded device link properties after any change to active
2913 * slaves
2914 */
2915 bond_ethdev_link_update(bonded_eth_dev, 0);
2916
2917 if (lsc_flag) {
2918 /* Cancel any possible outstanding interrupts if delays are enabled */
2919 if (internals->link_up_delay_ms > 0 ||
2920 internals->link_down_delay_ms > 0)
2921 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2922 bonded_eth_dev);
2923
2924 if (bonded_eth_dev->data->dev_link.link_status) {
2925 if (internals->link_up_delay_ms > 0)
2926 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2927 bond_ethdev_delayed_lsc_propagation,
2928 (void *)bonded_eth_dev);
2929 else
2930 rte_eth_dev_callback_process(bonded_eth_dev,
2931 RTE_ETH_EVENT_INTR_LSC,
2932 NULL);
2933
2934 } else {
2935 if (internals->link_down_delay_ms > 0)
2936 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2937 bond_ethdev_delayed_lsc_propagation,
2938 (void *)bonded_eth_dev);
2939 else
2940 rte_eth_dev_callback_process(bonded_eth_dev,
2941 RTE_ETH_EVENT_INTR_LSC,
2942 NULL);
2943 }
2944 }
2945
2946 rte_spinlock_unlock(&internals->lsc_lock);
2947
2948 return rc;
2949 }
2950
2951 static int
bond_ethdev_rss_reta_update(struct rte_eth_dev * dev,struct rte_eth_rss_reta_entry64 * reta_conf,uint16_t reta_size)2952 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2953 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2954 {
2955 unsigned i, j;
2956 int result = 0;
2957 int slave_reta_size;
2958 unsigned reta_count;
2959 struct bond_dev_private *internals = dev->data->dev_private;
2960
2961 if (reta_size != internals->reta_size)
2962 return -EINVAL;
2963
2964 /* Copy RETA table */
2965 reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) /
2966 RTE_RETA_GROUP_SIZE;
2967
2968 for (i = 0; i < reta_count; i++) {
2969 internals->reta_conf[i].mask = reta_conf[i].mask;
2970 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2971 if ((reta_conf[i].mask >> j) & 0x01)
2972 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2973 }
2974
2975 /* Fill rest of array */
2976 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2977 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2978 sizeof(internals->reta_conf[0]) * reta_count);
2979
2980 /* Propagate RETA over slaves */
2981 for (i = 0; i < internals->slave_count; i++) {
2982 slave_reta_size = internals->slaves[i].reta_size;
2983 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2984 &internals->reta_conf[0], slave_reta_size);
2985 if (result < 0)
2986 return result;
2987 }
2988
2989 return 0;
2990 }
2991
2992 static int
bond_ethdev_rss_reta_query(struct rte_eth_dev * dev,struct rte_eth_rss_reta_entry64 * reta_conf,uint16_t reta_size)2993 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2994 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2995 {
2996 int i, j;
2997 struct bond_dev_private *internals = dev->data->dev_private;
2998
2999 if (reta_size != internals->reta_size)
3000 return -EINVAL;
3001
3002 /* Copy RETA table */
3003 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
3004 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3005 if ((reta_conf[i].mask >> j) & 0x01)
3006 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3007
3008 return 0;
3009 }
3010
3011 static int
bond_ethdev_rss_hash_update(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)3012 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3013 struct rte_eth_rss_conf *rss_conf)
3014 {
3015 int i, result = 0;
3016 struct bond_dev_private *internals = dev->data->dev_private;
3017 struct rte_eth_rss_conf bond_rss_conf;
3018
3019 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3020
3021 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3022
3023 if (bond_rss_conf.rss_hf != 0)
3024 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3025
3026 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
3027 sizeof(internals->rss_key)) {
3028 if (bond_rss_conf.rss_key_len == 0)
3029 bond_rss_conf.rss_key_len = 40;
3030 internals->rss_key_len = bond_rss_conf.rss_key_len;
3031 memcpy(internals->rss_key, bond_rss_conf.rss_key,
3032 internals->rss_key_len);
3033 }
3034
3035 for (i = 0; i < internals->slave_count; i++) {
3036 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3037 &bond_rss_conf);
3038 if (result < 0)
3039 return result;
3040 }
3041
3042 return 0;
3043 }
3044
3045 static int
bond_ethdev_rss_hash_conf_get(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)3046 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3047 struct rte_eth_rss_conf *rss_conf)
3048 {
3049 struct bond_dev_private *internals = dev->data->dev_private;
3050
3051 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3052 rss_conf->rss_key_len = internals->rss_key_len;
3053 if (rss_conf->rss_key)
3054 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3055
3056 return 0;
3057 }
3058
3059 static int
bond_ethdev_mtu_set(struct rte_eth_dev * dev,uint16_t mtu)3060 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3061 {
3062 struct rte_eth_dev *slave_eth_dev;
3063 struct bond_dev_private *internals = dev->data->dev_private;
3064 int ret, i;
3065
3066 rte_spinlock_lock(&internals->lock);
3067
3068 for (i = 0; i < internals->slave_count; i++) {
3069 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3070 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3071 rte_spinlock_unlock(&internals->lock);
3072 return -ENOTSUP;
3073 }
3074 }
3075 for (i = 0; i < internals->slave_count; i++) {
3076 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3077 if (ret < 0) {
3078 rte_spinlock_unlock(&internals->lock);
3079 return ret;
3080 }
3081 }
3082
3083 rte_spinlock_unlock(&internals->lock);
3084 return 0;
3085 }
3086
3087 static int
bond_ethdev_mac_address_set(struct rte_eth_dev * dev,struct rte_ether_addr * addr)3088 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3089 struct rte_ether_addr *addr)
3090 {
3091 if (mac_address_set(dev, addr)) {
3092 RTE_BOND_LOG(ERR, "Failed to update MAC address");
3093 return -EINVAL;
3094 }
3095
3096 return 0;
3097 }
3098
3099 static int
bond_filter_ctrl(struct rte_eth_dev * dev __rte_unused,enum rte_filter_type type,enum rte_filter_op op,void * arg)3100 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
3101 enum rte_filter_type type, enum rte_filter_op op, void *arg)
3102 {
3103 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
3104 *(const void **)arg = &bond_flow_ops;
3105 return 0;
3106 }
3107 return -ENOTSUP;
3108 }
3109
3110 static int
bond_ethdev_mac_addr_add(struct rte_eth_dev * dev,struct rte_ether_addr * mac_addr,__rte_unused uint32_t index,uint32_t vmdq)3111 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3112 struct rte_ether_addr *mac_addr,
3113 __rte_unused uint32_t index, uint32_t vmdq)
3114 {
3115 struct rte_eth_dev *slave_eth_dev;
3116 struct bond_dev_private *internals = dev->data->dev_private;
3117 int ret, i;
3118
3119 rte_spinlock_lock(&internals->lock);
3120
3121 for (i = 0; i < internals->slave_count; i++) {
3122 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3123 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3124 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3125 ret = -ENOTSUP;
3126 goto end;
3127 }
3128 }
3129
3130 for (i = 0; i < internals->slave_count; i++) {
3131 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3132 mac_addr, vmdq);
3133 if (ret < 0) {
3134 /* rollback */
3135 for (i--; i >= 0; i--)
3136 rte_eth_dev_mac_addr_remove(
3137 internals->slaves[i].port_id, mac_addr);
3138 goto end;
3139 }
3140 }
3141
3142 ret = 0;
3143 end:
3144 rte_spinlock_unlock(&internals->lock);
3145 return ret;
3146 }
3147
3148 static void
bond_ethdev_mac_addr_remove(struct rte_eth_dev * dev,uint32_t index)3149 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3150 {
3151 struct rte_eth_dev *slave_eth_dev;
3152 struct bond_dev_private *internals = dev->data->dev_private;
3153 int i;
3154
3155 rte_spinlock_lock(&internals->lock);
3156
3157 for (i = 0; i < internals->slave_count; i++) {
3158 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3159 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3160 goto end;
3161 }
3162
3163 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3164
3165 for (i = 0; i < internals->slave_count; i++)
3166 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3167 mac_addr);
3168
3169 end:
3170 rte_spinlock_unlock(&internals->lock);
3171 }
3172
3173 const struct eth_dev_ops default_dev_ops = {
3174 .dev_start = bond_ethdev_start,
3175 .dev_stop = bond_ethdev_stop,
3176 .dev_close = bond_ethdev_close,
3177 .dev_configure = bond_ethdev_configure,
3178 .dev_infos_get = bond_ethdev_info,
3179 .vlan_filter_set = bond_ethdev_vlan_filter_set,
3180 .rx_queue_setup = bond_ethdev_rx_queue_setup,
3181 .tx_queue_setup = bond_ethdev_tx_queue_setup,
3182 .rx_queue_release = bond_ethdev_rx_queue_release,
3183 .tx_queue_release = bond_ethdev_tx_queue_release,
3184 .link_update = bond_ethdev_link_update,
3185 .stats_get = bond_ethdev_stats_get,
3186 .stats_reset = bond_ethdev_stats_reset,
3187 .promiscuous_enable = bond_ethdev_promiscuous_enable,
3188 .promiscuous_disable = bond_ethdev_promiscuous_disable,
3189 .allmulticast_enable = bond_ethdev_allmulticast_enable,
3190 .allmulticast_disable = bond_ethdev_allmulticast_disable,
3191 .reta_update = bond_ethdev_rss_reta_update,
3192 .reta_query = bond_ethdev_rss_reta_query,
3193 .rss_hash_update = bond_ethdev_rss_hash_update,
3194 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
3195 .mtu_set = bond_ethdev_mtu_set,
3196 .mac_addr_set = bond_ethdev_mac_address_set,
3197 .mac_addr_add = bond_ethdev_mac_addr_add,
3198 .mac_addr_remove = bond_ethdev_mac_addr_remove,
3199 .filter_ctrl = bond_filter_ctrl
3200 };
3201
3202 static int
bond_alloc(struct rte_vdev_device * dev,uint8_t mode)3203 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3204 {
3205 const char *name = rte_vdev_device_name(dev);
3206 uint8_t socket_id = dev->device.numa_node;
3207 struct bond_dev_private *internals = NULL;
3208 struct rte_eth_dev *eth_dev = NULL;
3209 uint32_t vlan_filter_bmp_size;
3210
3211 /* now do all data allocation - for eth_dev structure, dummy pci driver
3212 * and internal (private) data
3213 */
3214
3215 /* reserve an ethdev entry */
3216 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3217 if (eth_dev == NULL) {
3218 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3219 goto err;
3220 }
3221
3222 internals = eth_dev->data->dev_private;
3223 eth_dev->data->nb_rx_queues = (uint16_t)1;
3224 eth_dev->data->nb_tx_queues = (uint16_t)1;
3225
3226 /* Allocate memory for storing MAC addresses */
3227 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3228 BOND_MAX_MAC_ADDRS, 0, socket_id);
3229 if (eth_dev->data->mac_addrs == NULL) {
3230 RTE_BOND_LOG(ERR,
3231 "Failed to allocate %u bytes needed to store MAC addresses",
3232 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3233 goto err;
3234 }
3235
3236 eth_dev->dev_ops = &default_dev_ops;
3237 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3238 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3239
3240 rte_spinlock_init(&internals->lock);
3241 rte_spinlock_init(&internals->lsc_lock);
3242
3243 internals->port_id = eth_dev->data->port_id;
3244 internals->mode = BONDING_MODE_INVALID;
3245 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3246 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3247 internals->burst_xmit_hash = burst_xmit_l2_hash;
3248 internals->user_defined_mac = 0;
3249
3250 internals->link_status_polling_enabled = 0;
3251
3252 internals->link_status_polling_interval_ms =
3253 DEFAULT_POLLING_INTERVAL_10_MS;
3254 internals->link_down_delay_ms = 0;
3255 internals->link_up_delay_ms = 0;
3256
3257 internals->slave_count = 0;
3258 internals->active_slave_count = 0;
3259 internals->rx_offload_capa = 0;
3260 internals->tx_offload_capa = 0;
3261 internals->rx_queue_offload_capa = 0;
3262 internals->tx_queue_offload_capa = 0;
3263 internals->candidate_max_rx_pktlen = 0;
3264 internals->max_rx_pktlen = 0;
3265
3266 /* Initially allow to choose any offload type */
3267 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3268
3269 memset(&internals->default_rxconf, 0,
3270 sizeof(internals->default_rxconf));
3271 memset(&internals->default_txconf, 0,
3272 sizeof(internals->default_txconf));
3273
3274 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3275 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3276
3277 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3278 memset(internals->slaves, 0, sizeof(internals->slaves));
3279
3280 TAILQ_INIT(&internals->flow_list);
3281 internals->flow_isolated_valid = 0;
3282
3283 /* Set mode 4 default configuration */
3284 bond_mode_8023ad_setup(eth_dev, NULL);
3285 if (bond_ethdev_mode_set(eth_dev, mode)) {
3286 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3287 eth_dev->data->port_id, mode);
3288 goto err;
3289 }
3290
3291 vlan_filter_bmp_size =
3292 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3293 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3294 RTE_CACHE_LINE_SIZE);
3295 if (internals->vlan_filter_bmpmem == NULL) {
3296 RTE_BOND_LOG(ERR,
3297 "Failed to allocate vlan bitmap for bonded device %u",
3298 eth_dev->data->port_id);
3299 goto err;
3300 }
3301
3302 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3303 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3304 if (internals->vlan_filter_bmp == NULL) {
3305 RTE_BOND_LOG(ERR,
3306 "Failed to init vlan bitmap for bonded device %u",
3307 eth_dev->data->port_id);
3308 rte_free(internals->vlan_filter_bmpmem);
3309 goto err;
3310 }
3311
3312 return eth_dev->data->port_id;
3313
3314 err:
3315 rte_free(internals);
3316 if (eth_dev != NULL)
3317 eth_dev->data->dev_private = NULL;
3318 rte_eth_dev_release_port(eth_dev);
3319 return -1;
3320 }
3321
3322 static int
bond_probe(struct rte_vdev_device * dev)3323 bond_probe(struct rte_vdev_device *dev)
3324 {
3325 const char *name;
3326 struct bond_dev_private *internals;
3327 struct rte_kvargs *kvlist;
3328 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3329 int arg_count, port_id;
3330 uint8_t agg_mode;
3331 struct rte_eth_dev *eth_dev;
3332
3333 if (!dev)
3334 return -EINVAL;
3335
3336 name = rte_vdev_device_name(dev);
3337 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3338
3339 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3340 eth_dev = rte_eth_dev_attach_secondary(name);
3341 if (!eth_dev) {
3342 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3343 return -1;
3344 }
3345 /* TODO: request info from primary to set up Rx and Tx */
3346 eth_dev->dev_ops = &default_dev_ops;
3347 eth_dev->device = &dev->device;
3348 rte_eth_dev_probing_finish(eth_dev);
3349 return 0;
3350 }
3351
3352 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3353 pmd_bond_init_valid_arguments);
3354 if (kvlist == NULL)
3355 return -1;
3356
3357 /* Parse link bonding mode */
3358 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3359 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3360 &bond_ethdev_parse_slave_mode_kvarg,
3361 &bonding_mode) != 0) {
3362 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3363 name);
3364 goto parse_error;
3365 }
3366 } else {
3367 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3368 "device %s", name);
3369 goto parse_error;
3370 }
3371
3372 /* Parse socket id to create bonding device on */
3373 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3374 if (arg_count == 1) {
3375 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3376 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3377 != 0) {
3378 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3379 "bonded device %s", name);
3380 goto parse_error;
3381 }
3382 } else if (arg_count > 1) {
3383 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3384 "bonded device %s", name);
3385 goto parse_error;
3386 } else {
3387 socket_id = rte_socket_id();
3388 }
3389
3390 dev->device.numa_node = socket_id;
3391
3392 /* Create link bonding eth device */
3393 port_id = bond_alloc(dev, bonding_mode);
3394 if (port_id < 0) {
3395 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3396 "socket %u.", name, bonding_mode, socket_id);
3397 goto parse_error;
3398 }
3399 internals = rte_eth_devices[port_id].data->dev_private;
3400 internals->kvlist = kvlist;
3401
3402 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3403 if (rte_kvargs_process(kvlist,
3404 PMD_BOND_AGG_MODE_KVARG,
3405 &bond_ethdev_parse_slave_agg_mode_kvarg,
3406 &agg_mode) != 0) {
3407 RTE_BOND_LOG(ERR,
3408 "Failed to parse agg selection mode for bonded device %s",
3409 name);
3410 goto parse_error;
3411 }
3412
3413 if (internals->mode == BONDING_MODE_8023AD)
3414 internals->mode4.agg_selection = agg_mode;
3415 } else {
3416 internals->mode4.agg_selection = AGG_STABLE;
3417 }
3418
3419 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3420 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3421 "socket %u.", name, port_id, bonding_mode, socket_id);
3422 return 0;
3423
3424 parse_error:
3425 rte_kvargs_free(kvlist);
3426
3427 return -1;
3428 }
3429
3430 static int
bond_remove(struct rte_vdev_device * dev)3431 bond_remove(struct rte_vdev_device *dev)
3432 {
3433 struct rte_eth_dev *eth_dev;
3434 struct bond_dev_private *internals;
3435 const char *name;
3436 int ret = 0;
3437
3438 if (!dev)
3439 return -EINVAL;
3440
3441 name = rte_vdev_device_name(dev);
3442 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3443
3444 /* find an ethdev entry */
3445 eth_dev = rte_eth_dev_allocated(name);
3446 if (eth_dev == NULL)
3447 return 0; /* port already released */
3448
3449 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3450 return rte_eth_dev_release_port(eth_dev);
3451
3452 RTE_ASSERT(eth_dev->device == &dev->device);
3453
3454 internals = eth_dev->data->dev_private;
3455 if (internals->slave_count != 0)
3456 return -EBUSY;
3457
3458 if (eth_dev->data->dev_started == 1) {
3459 ret = bond_ethdev_stop(eth_dev);
3460 bond_ethdev_close(eth_dev);
3461 }
3462 rte_eth_dev_release_port(eth_dev);
3463
3464 return ret;
3465 }
3466
3467 /* this part will resolve the slave portids after all the other pdev and vdev
3468 * have been allocated */
3469 static int
bond_ethdev_configure(struct rte_eth_dev * dev)3470 bond_ethdev_configure(struct rte_eth_dev *dev)
3471 {
3472 const char *name = dev->device->name;
3473 struct bond_dev_private *internals = dev->data->dev_private;
3474 struct rte_kvargs *kvlist = internals->kvlist;
3475 int arg_count;
3476 uint16_t port_id = dev - rte_eth_devices;
3477 uint8_t agg_mode;
3478
3479 static const uint8_t default_rss_key[40] = {
3480 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3481 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3482 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3483 0xBE, 0xAC, 0x01, 0xFA
3484 };
3485
3486 unsigned i, j;
3487
3488 /*
3489 * If RSS is enabled, fill table with default values and
3490 * set key to the the value specified in port RSS configuration.
3491 * Fall back to default RSS key if the key is not specified
3492 */
3493 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3494 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3495 internals->rss_key_len =
3496 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3497 memcpy(internals->rss_key,
3498 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3499 internals->rss_key_len);
3500 } else {
3501 internals->rss_key_len = sizeof(default_rss_key);
3502 memcpy(internals->rss_key, default_rss_key,
3503 internals->rss_key_len);
3504 }
3505
3506 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3507 internals->reta_conf[i].mask = ~0LL;
3508 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3509 internals->reta_conf[i].reta[j] =
3510 (i * RTE_RETA_GROUP_SIZE + j) %
3511 dev->data->nb_rx_queues;
3512 }
3513 }
3514
3515 /* set the max_rx_pktlen */
3516 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3517
3518 /*
3519 * if no kvlist, it means that this bonded device has been created
3520 * through the bonding api.
3521 */
3522 if (!kvlist)
3523 return 0;
3524
3525 /* Parse MAC address for bonded device */
3526 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3527 if (arg_count == 1) {
3528 struct rte_ether_addr bond_mac;
3529
3530 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3531 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3532 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3533 name);
3534 return -1;
3535 }
3536
3537 /* Set MAC address */
3538 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3539 RTE_BOND_LOG(ERR,
3540 "Failed to set mac address on bonded device %s",
3541 name);
3542 return -1;
3543 }
3544 } else if (arg_count > 1) {
3545 RTE_BOND_LOG(ERR,
3546 "MAC address can be specified only once for bonded device %s",
3547 name);
3548 return -1;
3549 }
3550
3551 /* Parse/set balance mode transmit policy */
3552 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3553 if (arg_count == 1) {
3554 uint8_t xmit_policy;
3555
3556 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3557 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3558 0) {
3559 RTE_BOND_LOG(INFO,
3560 "Invalid xmit policy specified for bonded device %s",
3561 name);
3562 return -1;
3563 }
3564
3565 /* Set balance mode transmit policy*/
3566 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3567 RTE_BOND_LOG(ERR,
3568 "Failed to set balance xmit policy on bonded device %s",
3569 name);
3570 return -1;
3571 }
3572 } else if (arg_count > 1) {
3573 RTE_BOND_LOG(ERR,
3574 "Transmit policy can be specified only once for bonded device %s",
3575 name);
3576 return -1;
3577 }
3578
3579 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3580 if (rte_kvargs_process(kvlist,
3581 PMD_BOND_AGG_MODE_KVARG,
3582 &bond_ethdev_parse_slave_agg_mode_kvarg,
3583 &agg_mode) != 0) {
3584 RTE_BOND_LOG(ERR,
3585 "Failed to parse agg selection mode for bonded device %s",
3586 name);
3587 }
3588 if (internals->mode == BONDING_MODE_8023AD) {
3589 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3590 agg_mode);
3591 if (ret < 0) {
3592 RTE_BOND_LOG(ERR,
3593 "Invalid args for agg selection set for bonded device %s",
3594 name);
3595 return -1;
3596 }
3597 }
3598 }
3599
3600 /* Parse/add slave ports to bonded device */
3601 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3602 struct bond_ethdev_slave_ports slave_ports;
3603 unsigned i;
3604
3605 memset(&slave_ports, 0, sizeof(slave_ports));
3606
3607 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3608 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3609 RTE_BOND_LOG(ERR,
3610 "Failed to parse slave ports for bonded device %s",
3611 name);
3612 return -1;
3613 }
3614
3615 for (i = 0; i < slave_ports.slave_count; i++) {
3616 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3617 RTE_BOND_LOG(ERR,
3618 "Failed to add port %d as slave to bonded device %s",
3619 slave_ports.slaves[i], name);
3620 }
3621 }
3622
3623 } else {
3624 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3625 return -1;
3626 }
3627
3628 /* Parse/set primary slave port id*/
3629 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3630 if (arg_count == 1) {
3631 uint16_t primary_slave_port_id;
3632
3633 if (rte_kvargs_process(kvlist,
3634 PMD_BOND_PRIMARY_SLAVE_KVARG,
3635 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3636 &primary_slave_port_id) < 0) {
3637 RTE_BOND_LOG(INFO,
3638 "Invalid primary slave port id specified for bonded device %s",
3639 name);
3640 return -1;
3641 }
3642
3643 /* Set balance mode transmit policy*/
3644 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3645 != 0) {
3646 RTE_BOND_LOG(ERR,
3647 "Failed to set primary slave port %d on bonded device %s",
3648 primary_slave_port_id, name);
3649 return -1;
3650 }
3651 } else if (arg_count > 1) {
3652 RTE_BOND_LOG(INFO,
3653 "Primary slave can be specified only once for bonded device %s",
3654 name);
3655 return -1;
3656 }
3657
3658 /* Parse link status monitor polling interval */
3659 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3660 if (arg_count == 1) {
3661 uint32_t lsc_poll_interval_ms;
3662
3663 if (rte_kvargs_process(kvlist,
3664 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3665 &bond_ethdev_parse_time_ms_kvarg,
3666 &lsc_poll_interval_ms) < 0) {
3667 RTE_BOND_LOG(INFO,
3668 "Invalid lsc polling interval value specified for bonded"
3669 " device %s", name);
3670 return -1;
3671 }
3672
3673 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3674 != 0) {
3675 RTE_BOND_LOG(ERR,
3676 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3677 lsc_poll_interval_ms, name);
3678 return -1;
3679 }
3680 } else if (arg_count > 1) {
3681 RTE_BOND_LOG(INFO,
3682 "LSC polling interval can be specified only once for bonded"
3683 " device %s", name);
3684 return -1;
3685 }
3686
3687 /* Parse link up interrupt propagation delay */
3688 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3689 if (arg_count == 1) {
3690 uint32_t link_up_delay_ms;
3691
3692 if (rte_kvargs_process(kvlist,
3693 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3694 &bond_ethdev_parse_time_ms_kvarg,
3695 &link_up_delay_ms) < 0) {
3696 RTE_BOND_LOG(INFO,
3697 "Invalid link up propagation delay value specified for"
3698 " bonded device %s", name);
3699 return -1;
3700 }
3701
3702 /* Set balance mode transmit policy*/
3703 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3704 != 0) {
3705 RTE_BOND_LOG(ERR,
3706 "Failed to set link up propagation delay (%u ms) on bonded"
3707 " device %s", link_up_delay_ms, name);
3708 return -1;
3709 }
3710 } else if (arg_count > 1) {
3711 RTE_BOND_LOG(INFO,
3712 "Link up propagation delay can be specified only once for"
3713 " bonded device %s", name);
3714 return -1;
3715 }
3716
3717 /* Parse link down interrupt propagation delay */
3718 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3719 if (arg_count == 1) {
3720 uint32_t link_down_delay_ms;
3721
3722 if (rte_kvargs_process(kvlist,
3723 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3724 &bond_ethdev_parse_time_ms_kvarg,
3725 &link_down_delay_ms) < 0) {
3726 RTE_BOND_LOG(INFO,
3727 "Invalid link down propagation delay value specified for"
3728 " bonded device %s", name);
3729 return -1;
3730 }
3731
3732 /* Set balance mode transmit policy*/
3733 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3734 != 0) {
3735 RTE_BOND_LOG(ERR,
3736 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3737 link_down_delay_ms, name);
3738 return -1;
3739 }
3740 } else if (arg_count > 1) {
3741 RTE_BOND_LOG(INFO,
3742 "Link down propagation delay can be specified only once for bonded device %s",
3743 name);
3744 return -1;
3745 }
3746
3747 return 0;
3748 }
3749
3750 struct rte_vdev_driver pmd_bond_drv = {
3751 .probe = bond_probe,
3752 .remove = bond_remove,
3753 };
3754
3755 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3756 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3757
3758 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3759 "slave=<ifc> "
3760 "primary=<ifc> "
3761 "mode=[0-6] "
3762 "xmit_policy=[l2 | l23 | l34] "
3763 "agg_mode=[count | stable | bandwidth] "
3764 "socket_id=<int> "
3765 "mac=<mac addr> "
3766 "lsc_poll_period_ms=<int> "
3767 "up_delay=<int> "
3768 "down_delay=<int>");
3769
3770 RTE_LOG_REGISTER(bond_logtype, pmd.net.bond, NOTICE);
3771