1d30ea906Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause
2d30ea906Sjfb8856606 * Copyright(c) 2016-2017 Intel Corporation
32bfe3f2eSlogwang */
42bfe3f2eSlogwang
52bfe3f2eSlogwang #include <stdio.h>
62bfe3f2eSlogwang #include <stdlib.h>
72bfe3f2eSlogwang #include <string.h>
82bfe3f2eSlogwang #include <unistd.h>
92bfe3f2eSlogwang #include <stdint.h>
102bfe3f2eSlogwang #include <stdarg.h>
112bfe3f2eSlogwang #include <inttypes.h>
122bfe3f2eSlogwang #include <sys/queue.h>
132bfe3f2eSlogwang #include <errno.h>
144418919fSjohnjiang #include <sys/types.h>
154418919fSjohnjiang #include <netinet/in.h>
162bfe3f2eSlogwang #include <netinet/ip.h>
172bfe3f2eSlogwang
182bfe3f2eSlogwang #include <rte_common.h>
192bfe3f2eSlogwang #include <rte_memory.h>
202bfe3f2eSlogwang #include <rte_eal.h>
212bfe3f2eSlogwang #include <rte_launch.h>
222bfe3f2eSlogwang #include <rte_per_lcore.h>
232bfe3f2eSlogwang #include <rte_lcore.h>
242bfe3f2eSlogwang #include <rte_branch_prediction.h>
252bfe3f2eSlogwang #include <rte_atomic.h>
262bfe3f2eSlogwang #include <rte_ring.h>
272bfe3f2eSlogwang #include <rte_log.h>
282bfe3f2eSlogwang #include <rte_debug.h>
292bfe3f2eSlogwang #include <rte_mempool.h>
302bfe3f2eSlogwang #include <rte_memcpy.h>
312bfe3f2eSlogwang #include <rte_mbuf.h>
322bfe3f2eSlogwang #include <rte_ether.h>
332bfe3f2eSlogwang #include <rte_interrupts.h>
342bfe3f2eSlogwang #include <rte_ethdev.h>
352bfe3f2eSlogwang #include <rte_byteorder.h>
362bfe3f2eSlogwang #include <rte_malloc.h>
372bfe3f2eSlogwang #include <rte_string_fns.h>
382bfe3f2eSlogwang #include <rte_efd.h>
392bfe3f2eSlogwang #include <rte_ip.h>
402bfe3f2eSlogwang
412bfe3f2eSlogwang #include "common.h"
422bfe3f2eSlogwang #include "args.h"
432bfe3f2eSlogwang #include "init.h"
442bfe3f2eSlogwang
452bfe3f2eSlogwang /*
462bfe3f2eSlogwang * When doing reads from the NIC or the node queues,
472bfe3f2eSlogwang * use this batch size
482bfe3f2eSlogwang */
492bfe3f2eSlogwang #define PACKET_READ_SIZE 32
502bfe3f2eSlogwang
512bfe3f2eSlogwang /*
522bfe3f2eSlogwang * Local buffers to put packets in, used to send packets in bursts to the
532bfe3f2eSlogwang * nodes
542bfe3f2eSlogwang */
552bfe3f2eSlogwang struct node_rx_buf {
562bfe3f2eSlogwang struct rte_mbuf *buffer[PACKET_READ_SIZE];
572bfe3f2eSlogwang uint16_t count;
582bfe3f2eSlogwang };
592bfe3f2eSlogwang
602bfe3f2eSlogwang struct efd_stats {
612bfe3f2eSlogwang uint64_t distributed;
622bfe3f2eSlogwang uint64_t drop;
632bfe3f2eSlogwang } flow_dist_stats;
642bfe3f2eSlogwang
652bfe3f2eSlogwang /* One buffer per node rx queue - dynamically allocate array */
662bfe3f2eSlogwang static struct node_rx_buf *cl_rx_buf;
672bfe3f2eSlogwang
682bfe3f2eSlogwang static const char *
get_printable_mac_addr(uint16_t port)692bfe3f2eSlogwang get_printable_mac_addr(uint16_t port)
702bfe3f2eSlogwang {
712bfe3f2eSlogwang static const char err_address[] = "00:00:00:00:00:00";
722bfe3f2eSlogwang static char addresses[RTE_MAX_ETHPORTS][sizeof(err_address)];
734418919fSjohnjiang struct rte_ether_addr mac;
744418919fSjohnjiang int ret;
752bfe3f2eSlogwang
762bfe3f2eSlogwang if (unlikely(port >= RTE_MAX_ETHPORTS))
772bfe3f2eSlogwang return err_address;
782bfe3f2eSlogwang if (unlikely(addresses[port][0] == '\0')) {
794418919fSjohnjiang ret = rte_eth_macaddr_get(port, &mac);
804418919fSjohnjiang if (ret != 0) {
814418919fSjohnjiang printf("Failed to get MAC address (port %u): %s\n",
824418919fSjohnjiang port, rte_strerror(-ret));
834418919fSjohnjiang return err_address;
844418919fSjohnjiang }
854418919fSjohnjiang
862bfe3f2eSlogwang snprintf(addresses[port], sizeof(addresses[port]),
872bfe3f2eSlogwang "%02x:%02x:%02x:%02x:%02x:%02x\n",
882bfe3f2eSlogwang mac.addr_bytes[0], mac.addr_bytes[1],
892bfe3f2eSlogwang mac.addr_bytes[2], mac.addr_bytes[3],
902bfe3f2eSlogwang mac.addr_bytes[4], mac.addr_bytes[5]);
912bfe3f2eSlogwang }
922bfe3f2eSlogwang return addresses[port];
932bfe3f2eSlogwang }
942bfe3f2eSlogwang
952bfe3f2eSlogwang /*
962bfe3f2eSlogwang * This function displays the recorded statistics for each port
972bfe3f2eSlogwang * and for each node. It uses ANSI terminal codes to clear
98*2d9fd380Sjfb8856606 * screen when called. It is called from a single worker
992bfe3f2eSlogwang * thread in the server process, when the process is run with more
1002bfe3f2eSlogwang * than one lcore enabled.
1012bfe3f2eSlogwang */
1022bfe3f2eSlogwang static void
do_stats_display(void)1032bfe3f2eSlogwang do_stats_display(void)
1042bfe3f2eSlogwang {
1052bfe3f2eSlogwang unsigned int i, j;
1062bfe3f2eSlogwang const char clr[] = {27, '[', '2', 'J', '\0'};
1072bfe3f2eSlogwang const char topLeft[] = {27, '[', '1', ';', '1', 'H', '\0'};
1082bfe3f2eSlogwang uint64_t port_tx[RTE_MAX_ETHPORTS], port_tx_drop[RTE_MAX_ETHPORTS];
1092bfe3f2eSlogwang uint64_t node_tx[MAX_NODES], node_tx_drop[MAX_NODES];
1102bfe3f2eSlogwang
1112bfe3f2eSlogwang /* to get TX stats, we need to do some summing calculations */
1122bfe3f2eSlogwang memset(port_tx, 0, sizeof(port_tx));
1132bfe3f2eSlogwang memset(port_tx_drop, 0, sizeof(port_tx_drop));
1142bfe3f2eSlogwang memset(node_tx, 0, sizeof(node_tx));
1152bfe3f2eSlogwang memset(node_tx_drop, 0, sizeof(node_tx_drop));
1162bfe3f2eSlogwang
1172bfe3f2eSlogwang for (i = 0; i < num_nodes; i++) {
1182bfe3f2eSlogwang const struct tx_stats *tx = &info->tx_stats[i];
1192bfe3f2eSlogwang
1202bfe3f2eSlogwang for (j = 0; j < info->num_ports; j++) {
1212bfe3f2eSlogwang const uint64_t tx_val = tx->tx[info->id[j]];
1222bfe3f2eSlogwang const uint64_t drop_val = tx->tx_drop[info->id[j]];
1232bfe3f2eSlogwang
1242bfe3f2eSlogwang port_tx[j] += tx_val;
1252bfe3f2eSlogwang port_tx_drop[j] += drop_val;
1262bfe3f2eSlogwang node_tx[i] += tx_val;
1272bfe3f2eSlogwang node_tx_drop[i] += drop_val;
1282bfe3f2eSlogwang }
1292bfe3f2eSlogwang }
1302bfe3f2eSlogwang
1312bfe3f2eSlogwang /* Clear screen and move to top left */
1322bfe3f2eSlogwang printf("%s%s", clr, topLeft);
1332bfe3f2eSlogwang
1342bfe3f2eSlogwang printf("PORTS\n");
1352bfe3f2eSlogwang printf("-----\n");
1362bfe3f2eSlogwang for (i = 0; i < info->num_ports; i++)
1372bfe3f2eSlogwang printf("Port %u: '%s'\t", (unsigned int)info->id[i],
1382bfe3f2eSlogwang get_printable_mac_addr(info->id[i]));
1392bfe3f2eSlogwang printf("\n\n");
1402bfe3f2eSlogwang for (i = 0; i < info->num_ports; i++) {
1412bfe3f2eSlogwang printf("Port %u - rx: %9"PRIu64"\t"
1422bfe3f2eSlogwang "tx: %9"PRIu64"\n",
1432bfe3f2eSlogwang (unsigned int)info->id[i], info->rx_stats.rx[i],
1442bfe3f2eSlogwang port_tx[i]);
1452bfe3f2eSlogwang }
1462bfe3f2eSlogwang
1472bfe3f2eSlogwang printf("\nSERVER\n");
1482bfe3f2eSlogwang printf("-----\n");
1492bfe3f2eSlogwang printf("distributed: %9"PRIu64", drop: %9"PRIu64"\n",
1502bfe3f2eSlogwang flow_dist_stats.distributed, flow_dist_stats.drop);
1512bfe3f2eSlogwang
1522bfe3f2eSlogwang printf("\nNODES\n");
1532bfe3f2eSlogwang printf("-------\n");
1542bfe3f2eSlogwang for (i = 0; i < num_nodes; i++) {
1552bfe3f2eSlogwang const unsigned long long rx = nodes[i].stats.rx;
1562bfe3f2eSlogwang const unsigned long long rx_drop = nodes[i].stats.rx_drop;
1572bfe3f2eSlogwang const struct filter_stats *filter = &info->filter_stats[i];
1582bfe3f2eSlogwang
1592bfe3f2eSlogwang printf("Node %2u - rx: %9llu, rx_drop: %9llu\n"
1602bfe3f2eSlogwang " tx: %9"PRIu64", tx_drop: %9"PRIu64"\n"
1612bfe3f2eSlogwang " filter_passed: %9"PRIu64", "
1622bfe3f2eSlogwang "filter_drop: %9"PRIu64"\n",
1632bfe3f2eSlogwang i, rx, rx_drop, node_tx[i], node_tx_drop[i],
1642bfe3f2eSlogwang filter->passed, filter->drop);
1652bfe3f2eSlogwang }
1662bfe3f2eSlogwang
1672bfe3f2eSlogwang printf("\n");
1682bfe3f2eSlogwang }
1692bfe3f2eSlogwang
1702bfe3f2eSlogwang /*
171*2d9fd380Sjfb8856606 * The function called from each non-main lcore used by the process.
1722bfe3f2eSlogwang * The test_and_set function is used to randomly pick a single lcore on which
1732bfe3f2eSlogwang * the code to display the statistics will run. Otherwise, the code just
1742bfe3f2eSlogwang * repeatedly sleeps.
1752bfe3f2eSlogwang */
1762bfe3f2eSlogwang static int
sleep_lcore(__rte_unused void * dummy)177*2d9fd380Sjfb8856606 sleep_lcore(__rte_unused void *dummy)
1782bfe3f2eSlogwang {
1792bfe3f2eSlogwang /* Used to pick a display thread - static, so zero-initialised */
1802bfe3f2eSlogwang static rte_atomic32_t display_stats;
1812bfe3f2eSlogwang
1822bfe3f2eSlogwang /* Only one core should display stats */
1832bfe3f2eSlogwang if (rte_atomic32_test_and_set(&display_stats)) {
1842bfe3f2eSlogwang const unsigned int sleeptime = 1;
1852bfe3f2eSlogwang
1862bfe3f2eSlogwang printf("Core %u displaying statistics\n", rte_lcore_id());
1872bfe3f2eSlogwang
1882bfe3f2eSlogwang /* Longer initial pause so above printf is seen */
1892bfe3f2eSlogwang sleep(sleeptime * 3);
1902bfe3f2eSlogwang
1912bfe3f2eSlogwang /* Loop forever: sleep always returns 0 or <= param */
1922bfe3f2eSlogwang while (sleep(sleeptime) <= sleeptime)
1932bfe3f2eSlogwang do_stats_display();
1942bfe3f2eSlogwang }
1952bfe3f2eSlogwang return 0;
1962bfe3f2eSlogwang }
1972bfe3f2eSlogwang
1982bfe3f2eSlogwang /*
1992bfe3f2eSlogwang * Function to set all the node statistic values to zero.
2002bfe3f2eSlogwang * Called at program startup.
2012bfe3f2eSlogwang */
2022bfe3f2eSlogwang static void
clear_stats(void)2032bfe3f2eSlogwang clear_stats(void)
2042bfe3f2eSlogwang {
2052bfe3f2eSlogwang unsigned int i;
2062bfe3f2eSlogwang
2072bfe3f2eSlogwang for (i = 0; i < num_nodes; i++)
2082bfe3f2eSlogwang nodes[i].stats.rx = nodes[i].stats.rx_drop = 0;
2092bfe3f2eSlogwang }
2102bfe3f2eSlogwang
2112bfe3f2eSlogwang /*
2122bfe3f2eSlogwang * send a burst of traffic to a node, assuming there are packets
2132bfe3f2eSlogwang * available to be sent to this node
2142bfe3f2eSlogwang */
2152bfe3f2eSlogwang static void
flush_rx_queue(uint16_t node)2162bfe3f2eSlogwang flush_rx_queue(uint16_t node)
2172bfe3f2eSlogwang {
2182bfe3f2eSlogwang uint16_t j;
2192bfe3f2eSlogwang struct node *cl;
2202bfe3f2eSlogwang
2212bfe3f2eSlogwang if (cl_rx_buf[node].count == 0)
2222bfe3f2eSlogwang return;
2232bfe3f2eSlogwang
2242bfe3f2eSlogwang cl = &nodes[node];
2252bfe3f2eSlogwang if (rte_ring_enqueue_bulk(cl->rx_q, (void **)cl_rx_buf[node].buffer,
2262bfe3f2eSlogwang cl_rx_buf[node].count, NULL) != cl_rx_buf[node].count){
2272bfe3f2eSlogwang for (j = 0; j < cl_rx_buf[node].count; j++)
2282bfe3f2eSlogwang rte_pktmbuf_free(cl_rx_buf[node].buffer[j]);
2292bfe3f2eSlogwang cl->stats.rx_drop += cl_rx_buf[node].count;
2302bfe3f2eSlogwang } else
2312bfe3f2eSlogwang cl->stats.rx += cl_rx_buf[node].count;
2322bfe3f2eSlogwang
2332bfe3f2eSlogwang cl_rx_buf[node].count = 0;
2342bfe3f2eSlogwang }
2352bfe3f2eSlogwang
2362bfe3f2eSlogwang /*
2372bfe3f2eSlogwang * marks a packet down to be sent to a particular node process
2382bfe3f2eSlogwang */
2392bfe3f2eSlogwang static inline void
enqueue_rx_packet(uint8_t node,struct rte_mbuf * buf)2402bfe3f2eSlogwang enqueue_rx_packet(uint8_t node, struct rte_mbuf *buf)
2412bfe3f2eSlogwang {
2422bfe3f2eSlogwang cl_rx_buf[node].buffer[cl_rx_buf[node].count++] = buf;
2432bfe3f2eSlogwang }
2442bfe3f2eSlogwang
2452bfe3f2eSlogwang /*
2462bfe3f2eSlogwang * This function takes a group of packets and routes them
2472bfe3f2eSlogwang * individually to the node process. Very simply round-robins the packets
2482bfe3f2eSlogwang * without checking any of the packet contents.
2492bfe3f2eSlogwang */
2502bfe3f2eSlogwang static void
process_packets(uint32_t port_num __rte_unused,struct rte_mbuf * pkts[],uint16_t rx_count,unsigned int socket_id)2512bfe3f2eSlogwang process_packets(uint32_t port_num __rte_unused, struct rte_mbuf *pkts[],
2522bfe3f2eSlogwang uint16_t rx_count, unsigned int socket_id)
2532bfe3f2eSlogwang {
2542bfe3f2eSlogwang uint16_t i;
2552bfe3f2eSlogwang uint8_t node;
2562bfe3f2eSlogwang efd_value_t data[RTE_EFD_BURST_MAX];
2572bfe3f2eSlogwang const void *key_ptrs[RTE_EFD_BURST_MAX];
2582bfe3f2eSlogwang
2594418919fSjohnjiang struct rte_ipv4_hdr *ipv4_hdr;
2602bfe3f2eSlogwang uint32_t ipv4_dst_ip[RTE_EFD_BURST_MAX];
2612bfe3f2eSlogwang
2622bfe3f2eSlogwang for (i = 0; i < rx_count; i++) {
2632bfe3f2eSlogwang /* Handle IPv4 header.*/
2644418919fSjohnjiang ipv4_hdr = rte_pktmbuf_mtod_offset(pkts[i],
2654418919fSjohnjiang struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr));
2662bfe3f2eSlogwang ipv4_dst_ip[i] = ipv4_hdr->dst_addr;
2672bfe3f2eSlogwang key_ptrs[i] = (void *)&ipv4_dst_ip[i];
2682bfe3f2eSlogwang }
2692bfe3f2eSlogwang
2702bfe3f2eSlogwang rte_efd_lookup_bulk(efd_table, socket_id, rx_count,
2712bfe3f2eSlogwang (const void **) key_ptrs, data);
2722bfe3f2eSlogwang for (i = 0; i < rx_count; i++) {
2732bfe3f2eSlogwang node = (uint8_t) ((uintptr_t)data[i]);
2742bfe3f2eSlogwang
2752bfe3f2eSlogwang if (node >= num_nodes) {
2762bfe3f2eSlogwang /*
2772bfe3f2eSlogwang * Node is out of range, which means that
2782bfe3f2eSlogwang * flow has not been inserted
2792bfe3f2eSlogwang */
2802bfe3f2eSlogwang flow_dist_stats.drop++;
2812bfe3f2eSlogwang rte_pktmbuf_free(pkts[i]);
2822bfe3f2eSlogwang } else {
2832bfe3f2eSlogwang flow_dist_stats.distributed++;
2842bfe3f2eSlogwang enqueue_rx_packet(node, pkts[i]);
2852bfe3f2eSlogwang }
2862bfe3f2eSlogwang }
2872bfe3f2eSlogwang
2882bfe3f2eSlogwang for (i = 0; i < num_nodes; i++)
2892bfe3f2eSlogwang flush_rx_queue(i);
2902bfe3f2eSlogwang }
2912bfe3f2eSlogwang
2922bfe3f2eSlogwang /*
293*2d9fd380Sjfb8856606 * Function called by the main lcore of the DPDK process.
2942bfe3f2eSlogwang */
2952bfe3f2eSlogwang static void
do_packet_forwarding(void)2962bfe3f2eSlogwang do_packet_forwarding(void)
2972bfe3f2eSlogwang {
2982bfe3f2eSlogwang unsigned int port_num = 0; /* indexes the port[] array */
2992bfe3f2eSlogwang unsigned int socket_id = rte_socket_id();
3002bfe3f2eSlogwang
3012bfe3f2eSlogwang for (;;) {
3022bfe3f2eSlogwang struct rte_mbuf *buf[PACKET_READ_SIZE];
3032bfe3f2eSlogwang uint16_t rx_count;
3042bfe3f2eSlogwang
3052bfe3f2eSlogwang /* read a port */
3062bfe3f2eSlogwang rx_count = rte_eth_rx_burst(info->id[port_num], 0,
3072bfe3f2eSlogwang buf, PACKET_READ_SIZE);
3082bfe3f2eSlogwang info->rx_stats.rx[port_num] += rx_count;
3092bfe3f2eSlogwang
3102bfe3f2eSlogwang /* Now process the NIC packets read */
3112bfe3f2eSlogwang if (likely(rx_count > 0))
3122bfe3f2eSlogwang process_packets(port_num, buf, rx_count, socket_id);
3132bfe3f2eSlogwang
3142bfe3f2eSlogwang /* move to next port */
3152bfe3f2eSlogwang if (++port_num == info->num_ports)
3162bfe3f2eSlogwang port_num = 0;
3172bfe3f2eSlogwang }
3182bfe3f2eSlogwang }
3192bfe3f2eSlogwang
3202bfe3f2eSlogwang int
main(int argc,char * argv[])3212bfe3f2eSlogwang main(int argc, char *argv[])
3222bfe3f2eSlogwang {
3232bfe3f2eSlogwang /* initialise the system */
3242bfe3f2eSlogwang if (init(argc, argv) < 0)
3252bfe3f2eSlogwang return -1;
3262bfe3f2eSlogwang RTE_LOG(INFO, APP, "Finished Process Init.\n");
3272bfe3f2eSlogwang
3282bfe3f2eSlogwang cl_rx_buf = calloc(num_nodes, sizeof(cl_rx_buf[0]));
3292bfe3f2eSlogwang
3302bfe3f2eSlogwang /* clear statistics */
3312bfe3f2eSlogwang clear_stats();
3322bfe3f2eSlogwang
333*2d9fd380Sjfb8856606 /* put all other cores to sleep except main */
334*2d9fd380Sjfb8856606 rte_eal_mp_remote_launch(sleep_lcore, NULL, SKIP_MAIN);
3352bfe3f2eSlogwang
3362bfe3f2eSlogwang do_packet_forwarding();
3372bfe3f2eSlogwang return 0;
3382bfe3f2eSlogwang }
339