1d30ea906Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause
2d30ea906Sjfb8856606 * Copyright(c) 2016-2017 Intel Corporation
32bfe3f2eSlogwang */
42bfe3f2eSlogwang
52bfe3f2eSlogwang #include <stdint.h>
62bfe3f2eSlogwang #include <stdio.h>
72bfe3f2eSlogwang #include <string.h>
82bfe3f2eSlogwang #include <sys/queue.h>
92bfe3f2eSlogwang #include <errno.h>
102bfe3f2eSlogwang #include <stdarg.h>
112bfe3f2eSlogwang #include <inttypes.h>
122bfe3f2eSlogwang
132bfe3f2eSlogwang #include <rte_common.h>
142bfe3f2eSlogwang #include <rte_memory.h>
152bfe3f2eSlogwang #include <rte_memzone.h>
162bfe3f2eSlogwang #include <rte_eal.h>
172bfe3f2eSlogwang #include <rte_byteorder.h>
182bfe3f2eSlogwang #include <rte_atomic.h>
192bfe3f2eSlogwang #include <rte_launch.h>
202bfe3f2eSlogwang #include <rte_per_lcore.h>
212bfe3f2eSlogwang #include <rte_lcore.h>
222bfe3f2eSlogwang #include <rte_branch_prediction.h>
232bfe3f2eSlogwang #include <rte_debug.h>
242bfe3f2eSlogwang #include <rte_ring.h>
252bfe3f2eSlogwang #include <rte_log.h>
262bfe3f2eSlogwang #include <rte_mempool.h>
272bfe3f2eSlogwang #include <rte_memcpy.h>
282bfe3f2eSlogwang #include <rte_mbuf.h>
292bfe3f2eSlogwang #include <rte_interrupts.h>
302bfe3f2eSlogwang #include <rte_ether.h>
312bfe3f2eSlogwang #include <rte_ethdev.h>
322bfe3f2eSlogwang #include <rte_malloc.h>
332bfe3f2eSlogwang #include <rte_string_fns.h>
342bfe3f2eSlogwang #include <rte_cycles.h>
352bfe3f2eSlogwang #include <rte_efd.h>
362bfe3f2eSlogwang #include <rte_hash.h>
372bfe3f2eSlogwang
382bfe3f2eSlogwang #include "common.h"
392bfe3f2eSlogwang #include "args.h"
402bfe3f2eSlogwang #include "init.h"
412bfe3f2eSlogwang
422bfe3f2eSlogwang #define MBUFS_PER_NODE 1536
432bfe3f2eSlogwang #define MBUFS_PER_PORT 1536
442bfe3f2eSlogwang #define MBUF_CACHE_SIZE 512
452bfe3f2eSlogwang
462bfe3f2eSlogwang #define RTE_MP_RX_DESC_DEFAULT 512
472bfe3f2eSlogwang #define RTE_MP_TX_DESC_DEFAULT 512
482bfe3f2eSlogwang #define NODE_QUEUE_RINGSIZE 128
492bfe3f2eSlogwang
502bfe3f2eSlogwang #define NO_FLAGS 0
512bfe3f2eSlogwang
522bfe3f2eSlogwang /* The mbuf pool for packet rx */
532bfe3f2eSlogwang struct rte_mempool *pktmbuf_pool;
542bfe3f2eSlogwang
552bfe3f2eSlogwang /* array of info/queues for nodes */
562bfe3f2eSlogwang struct node *nodes;
572bfe3f2eSlogwang
582bfe3f2eSlogwang /* EFD table */
592bfe3f2eSlogwang struct rte_efd_table *efd_table;
602bfe3f2eSlogwang
612bfe3f2eSlogwang /* Shared info between server and nodes */
622bfe3f2eSlogwang struct shared_info *info;
632bfe3f2eSlogwang
642bfe3f2eSlogwang /**
652bfe3f2eSlogwang * Initialise the mbuf pool for packet reception for the NIC, and any other
662bfe3f2eSlogwang * buffer pools needed by the app - currently none.
672bfe3f2eSlogwang */
682bfe3f2eSlogwang static int
init_mbuf_pools(void)692bfe3f2eSlogwang init_mbuf_pools(void)
702bfe3f2eSlogwang {
712bfe3f2eSlogwang const unsigned int num_mbufs = (num_nodes * MBUFS_PER_NODE) +
722bfe3f2eSlogwang (info->num_ports * MBUFS_PER_PORT);
732bfe3f2eSlogwang
742bfe3f2eSlogwang /*
752bfe3f2eSlogwang * Don't pass single-producer/single-consumer flags to mbuf create as it
762bfe3f2eSlogwang * seems faster to use a cache instead
772bfe3f2eSlogwang */
782bfe3f2eSlogwang printf("Creating mbuf pool '%s' [%u mbufs] ...\n",
792bfe3f2eSlogwang PKTMBUF_POOL_NAME, num_mbufs);
802bfe3f2eSlogwang pktmbuf_pool = rte_pktmbuf_pool_create(PKTMBUF_POOL_NAME, num_mbufs,
812bfe3f2eSlogwang MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
822bfe3f2eSlogwang
832bfe3f2eSlogwang return pktmbuf_pool == NULL; /* 0 on success */
842bfe3f2eSlogwang }
852bfe3f2eSlogwang
862bfe3f2eSlogwang /**
872bfe3f2eSlogwang * Initialise an individual port:
882bfe3f2eSlogwang * - configure number of rx and tx rings
892bfe3f2eSlogwang * - set up each rx ring, to pull from the main mbuf pool
902bfe3f2eSlogwang * - set up each tx ring
912bfe3f2eSlogwang * - start the port and report its status to stdout
922bfe3f2eSlogwang */
932bfe3f2eSlogwang static int
init_port(uint16_t port_num)942bfe3f2eSlogwang init_port(uint16_t port_num)
952bfe3f2eSlogwang {
962bfe3f2eSlogwang /* for port configuration all features are off by default */
97d30ea906Sjfb8856606 struct rte_eth_conf port_conf = {
982bfe3f2eSlogwang .rxmode = {
99d30ea906Sjfb8856606 .mq_mode = ETH_MQ_RX_RSS,
100d30ea906Sjfb8856606 },
1012bfe3f2eSlogwang };
1022bfe3f2eSlogwang const uint16_t rx_rings = 1, tx_rings = num_nodes;
1032bfe3f2eSlogwang uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT;
1042bfe3f2eSlogwang uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT;
105d30ea906Sjfb8856606 struct rte_eth_dev_info dev_info;
106d30ea906Sjfb8856606 struct rte_eth_txconf txconf;
1072bfe3f2eSlogwang
1082bfe3f2eSlogwang uint16_t q;
1092bfe3f2eSlogwang int retval;
1102bfe3f2eSlogwang
1112bfe3f2eSlogwang printf("Port %u init ... ", port_num);
1122bfe3f2eSlogwang fflush(stdout);
1132bfe3f2eSlogwang
1144418919fSjohnjiang retval = rte_eth_dev_info_get(port_num, &dev_info);
1154418919fSjohnjiang if (retval != 0)
1164418919fSjohnjiang return retval;
1174418919fSjohnjiang
118d30ea906Sjfb8856606 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
119d30ea906Sjfb8856606 port_conf.txmode.offloads |=
120d30ea906Sjfb8856606 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
121d30ea906Sjfb8856606
1222bfe3f2eSlogwang /*
1232bfe3f2eSlogwang * Standard DPDK port initialisation - config port, then set up
1242bfe3f2eSlogwang * rx and tx rings.
1252bfe3f2eSlogwang */
1262bfe3f2eSlogwang retval = rte_eth_dev_configure(port_num, rx_rings, tx_rings, &port_conf);
1272bfe3f2eSlogwang if (retval != 0)
1282bfe3f2eSlogwang return retval;
1292bfe3f2eSlogwang
1302bfe3f2eSlogwang retval = rte_eth_dev_adjust_nb_rx_tx_desc(port_num, &rx_ring_size,
1312bfe3f2eSlogwang &tx_ring_size);
1322bfe3f2eSlogwang if (retval != 0)
1332bfe3f2eSlogwang return retval;
1342bfe3f2eSlogwang
1352bfe3f2eSlogwang for (q = 0; q < rx_rings; q++) {
1362bfe3f2eSlogwang retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size,
1372bfe3f2eSlogwang rte_eth_dev_socket_id(port_num),
1382bfe3f2eSlogwang NULL, pktmbuf_pool);
1392bfe3f2eSlogwang if (retval < 0)
1402bfe3f2eSlogwang return retval;
1412bfe3f2eSlogwang }
1422bfe3f2eSlogwang
143d30ea906Sjfb8856606 txconf = dev_info.default_txconf;
144d30ea906Sjfb8856606 txconf.offloads = port_conf.txmode.offloads;
1452bfe3f2eSlogwang for (q = 0; q < tx_rings; q++) {
1462bfe3f2eSlogwang retval = rte_eth_tx_queue_setup(port_num, q, tx_ring_size,
1472bfe3f2eSlogwang rte_eth_dev_socket_id(port_num),
148d30ea906Sjfb8856606 &txconf);
1492bfe3f2eSlogwang if (retval < 0)
1502bfe3f2eSlogwang return retval;
1512bfe3f2eSlogwang }
1522bfe3f2eSlogwang
1534418919fSjohnjiang retval = rte_eth_promiscuous_enable(port_num);
1544418919fSjohnjiang if (retval != 0)
1554418919fSjohnjiang return retval;
1562bfe3f2eSlogwang
1572bfe3f2eSlogwang retval = rte_eth_dev_start(port_num);
1582bfe3f2eSlogwang if (retval < 0)
1592bfe3f2eSlogwang return retval;
1602bfe3f2eSlogwang
1612bfe3f2eSlogwang printf("done:\n");
1622bfe3f2eSlogwang
1632bfe3f2eSlogwang return 0;
1642bfe3f2eSlogwang }
1652bfe3f2eSlogwang
1662bfe3f2eSlogwang /**
1672bfe3f2eSlogwang * Set up the DPDK rings which will be used to pass packets, via
1682bfe3f2eSlogwang * pointers, between the multi-process server and node processes.
1692bfe3f2eSlogwang * Each node needs one RX queue.
1702bfe3f2eSlogwang */
1712bfe3f2eSlogwang static int
init_shm_rings(void)1722bfe3f2eSlogwang init_shm_rings(void)
1732bfe3f2eSlogwang {
1742bfe3f2eSlogwang unsigned int i;
1752bfe3f2eSlogwang unsigned int socket_id;
1762bfe3f2eSlogwang const char *q_name;
1772bfe3f2eSlogwang const unsigned int ringsize = NODE_QUEUE_RINGSIZE;
1782bfe3f2eSlogwang
1792bfe3f2eSlogwang nodes = rte_malloc("node details",
1802bfe3f2eSlogwang sizeof(*nodes) * num_nodes, 0);
1812bfe3f2eSlogwang if (nodes == NULL)
1822bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot allocate memory for "
1832bfe3f2eSlogwang "node program details\n");
1842bfe3f2eSlogwang
1852bfe3f2eSlogwang for (i = 0; i < num_nodes; i++) {
1862bfe3f2eSlogwang /* Create an RX queue for each node */
1872bfe3f2eSlogwang socket_id = rte_socket_id();
1882bfe3f2eSlogwang q_name = get_rx_queue_name(i);
1892bfe3f2eSlogwang nodes[i].rx_q = rte_ring_create(q_name,
1902bfe3f2eSlogwang ringsize, socket_id,
1912bfe3f2eSlogwang RING_F_SP_ENQ | RING_F_SC_DEQ);
1922bfe3f2eSlogwang if (nodes[i].rx_q == NULL)
1932bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot create rx ring queue "
1942bfe3f2eSlogwang "for node %u\n", i);
1952bfe3f2eSlogwang }
1962bfe3f2eSlogwang return 0;
1972bfe3f2eSlogwang }
1982bfe3f2eSlogwang
1992bfe3f2eSlogwang /*
2002bfe3f2eSlogwang * Create EFD table which will contain all the flows
2012bfe3f2eSlogwang * that will be distributed among the nodes
2022bfe3f2eSlogwang */
2032bfe3f2eSlogwang static void
create_efd_table(void)2042bfe3f2eSlogwang create_efd_table(void)
2052bfe3f2eSlogwang {
2062bfe3f2eSlogwang uint8_t socket_id = rte_socket_id();
2072bfe3f2eSlogwang
2082bfe3f2eSlogwang /* create table */
2092bfe3f2eSlogwang efd_table = rte_efd_create("flow table", num_flows * 2, sizeof(uint32_t),
2102bfe3f2eSlogwang 1 << socket_id, socket_id);
2112bfe3f2eSlogwang
2122bfe3f2eSlogwang if (efd_table == NULL)
2132bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Problem creating the flow table\n");
2142bfe3f2eSlogwang }
2152bfe3f2eSlogwang
2162bfe3f2eSlogwang static void
populate_efd_table(void)2172bfe3f2eSlogwang populate_efd_table(void)
2182bfe3f2eSlogwang {
2192bfe3f2eSlogwang unsigned int i;
2202bfe3f2eSlogwang int32_t ret;
2212bfe3f2eSlogwang uint32_t ip_dst;
2222bfe3f2eSlogwang uint8_t socket_id = rte_socket_id();
2232bfe3f2eSlogwang uint64_t node_id;
2242bfe3f2eSlogwang
2252bfe3f2eSlogwang /* Add flows in table */
2262bfe3f2eSlogwang for (i = 0; i < num_flows; i++) {
2272bfe3f2eSlogwang node_id = i % num_nodes;
2282bfe3f2eSlogwang
2292bfe3f2eSlogwang ip_dst = rte_cpu_to_be_32(i);
2302bfe3f2eSlogwang ret = rte_efd_update(efd_table, socket_id,
2312bfe3f2eSlogwang (void *)&ip_dst, (efd_value_t)node_id);
2322bfe3f2eSlogwang if (ret < 0)
2332bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Unable to add entry %u in "
2342bfe3f2eSlogwang "EFD table\n", i);
2352bfe3f2eSlogwang }
2362bfe3f2eSlogwang
2372bfe3f2eSlogwang printf("EFD table: Adding 0x%x keys\n", num_flows);
2382bfe3f2eSlogwang }
2392bfe3f2eSlogwang
2402bfe3f2eSlogwang /* Check the link status of all ports in up to 9s, and print them finally */
2412bfe3f2eSlogwang static void
check_all_ports_link_status(uint16_t port_num,uint32_t port_mask)2422bfe3f2eSlogwang check_all_ports_link_status(uint16_t port_num, uint32_t port_mask)
2432bfe3f2eSlogwang {
2442bfe3f2eSlogwang #define CHECK_INTERVAL 100 /* 100ms */
2452bfe3f2eSlogwang #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
2462bfe3f2eSlogwang uint8_t count, all_ports_up, print_flag = 0;
2472bfe3f2eSlogwang uint16_t portid;
2482bfe3f2eSlogwang struct rte_eth_link link;
2494418919fSjohnjiang int ret;
250*2d9fd380Sjfb8856606 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN];
2512bfe3f2eSlogwang
2522bfe3f2eSlogwang printf("\nChecking link status");
2532bfe3f2eSlogwang fflush(stdout);
2542bfe3f2eSlogwang for (count = 0; count <= MAX_CHECK_TIME; count++) {
2552bfe3f2eSlogwang all_ports_up = 1;
2562bfe3f2eSlogwang for (portid = 0; portid < port_num; portid++) {
2572bfe3f2eSlogwang if ((port_mask & (1 << info->id[portid])) == 0)
2582bfe3f2eSlogwang continue;
2592bfe3f2eSlogwang memset(&link, 0, sizeof(link));
2604418919fSjohnjiang ret = rte_eth_link_get_nowait(info->id[portid], &link);
2614418919fSjohnjiang if (ret < 0) {
2624418919fSjohnjiang all_ports_up = 0;
2634418919fSjohnjiang if (print_flag == 1)
2644418919fSjohnjiang printf("Port %u link get failed: %s\n",
2654418919fSjohnjiang portid, rte_strerror(-ret));
2664418919fSjohnjiang continue;
2674418919fSjohnjiang }
2682bfe3f2eSlogwang /* print link status if flag set */
2692bfe3f2eSlogwang if (print_flag == 1) {
270*2d9fd380Sjfb8856606 rte_eth_link_to_str(link_status_text,
271*2d9fd380Sjfb8856606 sizeof(link_status_text), &link);
272*2d9fd380Sjfb8856606 printf("Port %d %s\n", info->id[portid],
273*2d9fd380Sjfb8856606 link_status_text);
2742bfe3f2eSlogwang continue;
2752bfe3f2eSlogwang }
2762bfe3f2eSlogwang /* clear all_ports_up flag if any link down */
2772bfe3f2eSlogwang if (link.link_status == ETH_LINK_DOWN) {
2782bfe3f2eSlogwang all_ports_up = 0;
2792bfe3f2eSlogwang break;
2802bfe3f2eSlogwang }
2812bfe3f2eSlogwang }
2822bfe3f2eSlogwang /* after finally printing all link status, get out */
2832bfe3f2eSlogwang if (print_flag == 1)
2842bfe3f2eSlogwang break;
2852bfe3f2eSlogwang
2862bfe3f2eSlogwang if (all_ports_up == 0) {
2872bfe3f2eSlogwang printf(".");
2882bfe3f2eSlogwang fflush(stdout);
2892bfe3f2eSlogwang rte_delay_ms(CHECK_INTERVAL);
2902bfe3f2eSlogwang }
2912bfe3f2eSlogwang
2922bfe3f2eSlogwang /* set the print_flag if all ports up or timeout */
2932bfe3f2eSlogwang if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
2942bfe3f2eSlogwang print_flag = 1;
2952bfe3f2eSlogwang printf("done\n");
2962bfe3f2eSlogwang }
2972bfe3f2eSlogwang }
2982bfe3f2eSlogwang }
2992bfe3f2eSlogwang
3002bfe3f2eSlogwang /**
3012bfe3f2eSlogwang * Main init function for the multi-process server app,
3022bfe3f2eSlogwang * calls subfunctions to do each stage of the initialisation.
3032bfe3f2eSlogwang */
3042bfe3f2eSlogwang int
init(int argc,char * argv[])3052bfe3f2eSlogwang init(int argc, char *argv[])
3062bfe3f2eSlogwang {
3072bfe3f2eSlogwang int retval;
3082bfe3f2eSlogwang const struct rte_memzone *mz;
3092bfe3f2eSlogwang uint8_t i, total_ports;
3102bfe3f2eSlogwang
3112bfe3f2eSlogwang /* init EAL, parsing EAL args */
3122bfe3f2eSlogwang retval = rte_eal_init(argc, argv);
3132bfe3f2eSlogwang if (retval < 0)
3142bfe3f2eSlogwang return -1;
3152bfe3f2eSlogwang argc -= retval;
3162bfe3f2eSlogwang argv += retval;
3172bfe3f2eSlogwang
3182bfe3f2eSlogwang /* get total number of ports */
319d30ea906Sjfb8856606 total_ports = rte_eth_dev_count_avail();
3202bfe3f2eSlogwang
3212bfe3f2eSlogwang /* set up array for port data */
3222bfe3f2eSlogwang mz = rte_memzone_reserve(MZ_SHARED_INFO, sizeof(*info),
3232bfe3f2eSlogwang rte_socket_id(), NO_FLAGS);
3242bfe3f2eSlogwang if (mz == NULL)
3252bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot reserve memory zone "
3262bfe3f2eSlogwang "for port information\n");
3272bfe3f2eSlogwang memset(mz->addr, 0, sizeof(*info));
3282bfe3f2eSlogwang info = mz->addr;
3292bfe3f2eSlogwang
3302bfe3f2eSlogwang /* parse additional, application arguments */
3312bfe3f2eSlogwang retval = parse_app_args(total_ports, argc, argv);
3322bfe3f2eSlogwang if (retval != 0)
3332bfe3f2eSlogwang return -1;
3342bfe3f2eSlogwang
3352bfe3f2eSlogwang /* initialise mbuf pools */
3362bfe3f2eSlogwang retval = init_mbuf_pools();
3372bfe3f2eSlogwang if (retval != 0)
3382bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n");
3392bfe3f2eSlogwang
3402bfe3f2eSlogwang /* now initialise the ports we will use */
3412bfe3f2eSlogwang for (i = 0; i < info->num_ports; i++) {
3422bfe3f2eSlogwang retval = init_port(info->id[i]);
3432bfe3f2eSlogwang if (retval != 0)
3442bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n",
3452bfe3f2eSlogwang (unsigned int) i);
3462bfe3f2eSlogwang }
3472bfe3f2eSlogwang
3482bfe3f2eSlogwang check_all_ports_link_status(info->num_ports, (~0x0));
3492bfe3f2eSlogwang
3502bfe3f2eSlogwang /* initialise the node queues/rings for inter-eu comms */
3512bfe3f2eSlogwang init_shm_rings();
3522bfe3f2eSlogwang
3532bfe3f2eSlogwang /* Create the EFD table */
3542bfe3f2eSlogwang create_efd_table();
3552bfe3f2eSlogwang
3562bfe3f2eSlogwang /* Populate the EFD table */
3572bfe3f2eSlogwang populate_efd_table();
3582bfe3f2eSlogwang
3592bfe3f2eSlogwang /* Share the total number of nodes */
3602bfe3f2eSlogwang info->num_nodes = num_nodes;
3612bfe3f2eSlogwang
3622bfe3f2eSlogwang /* Share the total number of flows */
3632bfe3f2eSlogwang info->num_flows = num_flows;
3642bfe3f2eSlogwang return 0;
3652bfe3f2eSlogwang }
366