13998e2a0SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 23998e2a0SBruce Richardson * Copyright(c) 2016-2017 Intel Corporation 3ed2a80fdSPablo de Lara */ 4ed2a80fdSPablo de Lara 5ed2a80fdSPablo de Lara #include <stdint.h> 6ed2a80fdSPablo de Lara #include <stdio.h> 7ed2a80fdSPablo de Lara #include <string.h> 8ed2a80fdSPablo de Lara #include <sys/queue.h> 9ed2a80fdSPablo de Lara #include <errno.h> 10ed2a80fdSPablo de Lara #include <stdarg.h> 11ed2a80fdSPablo de Lara #include <inttypes.h> 12ed2a80fdSPablo de Lara 13ed2a80fdSPablo de Lara #include <rte_common.h> 14ed2a80fdSPablo de Lara #include <rte_memory.h> 15ed2a80fdSPablo de Lara #include <rte_memzone.h> 16ed2a80fdSPablo de Lara #include <rte_eal.h> 17ed2a80fdSPablo de Lara #include <rte_byteorder.h> 18ed2a80fdSPablo de Lara #include <rte_atomic.h> 19ed2a80fdSPablo de Lara #include <rte_launch.h> 20ed2a80fdSPablo de Lara #include <rte_per_lcore.h> 21ed2a80fdSPablo de Lara #include <rte_lcore.h> 22ed2a80fdSPablo de Lara #include <rte_branch_prediction.h> 23ed2a80fdSPablo de Lara #include <rte_debug.h> 24ed2a80fdSPablo de Lara #include <rte_ring.h> 25ed2a80fdSPablo de Lara #include <rte_log.h> 26ed2a80fdSPablo de Lara #include <rte_mempool.h> 27ed2a80fdSPablo de Lara #include <rte_memcpy.h> 28ed2a80fdSPablo de Lara #include <rte_mbuf.h> 29ed2a80fdSPablo de Lara #include <rte_interrupts.h> 30ed2a80fdSPablo de Lara #include <rte_ether.h> 31ed2a80fdSPablo de Lara #include <rte_ethdev.h> 32ed2a80fdSPablo de Lara #include <rte_malloc.h> 33ed2a80fdSPablo de Lara #include <rte_string_fns.h> 34ed2a80fdSPablo de Lara #include <rte_cycles.h> 35ed2a80fdSPablo de Lara #include <rte_efd.h> 36ed2a80fdSPablo de Lara #include <rte_hash.h> 37ed2a80fdSPablo de Lara 38ed2a80fdSPablo de Lara #include "common.h" 39ed2a80fdSPablo de Lara #include "args.h" 40ed2a80fdSPablo de Lara #include "init.h" 41ed2a80fdSPablo de Lara 42ed2a80fdSPablo de Lara #define MBUFS_PER_NODE 1536 43ed2a80fdSPablo de Lara #define MBUFS_PER_PORT 1536 44ed2a80fdSPablo de Lara #define MBUF_CACHE_SIZE 512 45ed2a80fdSPablo de Lara 46ed2a80fdSPablo de Lara #define RTE_MP_RX_DESC_DEFAULT 512 47ed2a80fdSPablo de Lara #define RTE_MP_TX_DESC_DEFAULT 512 48ed2a80fdSPablo de Lara #define NODE_QUEUE_RINGSIZE 128 49ed2a80fdSPablo de Lara 50ed2a80fdSPablo de Lara #define NO_FLAGS 0 51ed2a80fdSPablo de Lara 52ed2a80fdSPablo de Lara /* The mbuf pool for packet rx */ 53ed2a80fdSPablo de Lara struct rte_mempool *pktmbuf_pool; 54ed2a80fdSPablo de Lara 55ed2a80fdSPablo de Lara /* array of info/queues for nodes */ 56ed2a80fdSPablo de Lara struct node *nodes; 57ed2a80fdSPablo de Lara 58ed2a80fdSPablo de Lara /* EFD table */ 59ed2a80fdSPablo de Lara struct rte_efd_table *efd_table; 60ed2a80fdSPablo de Lara 61ed2a80fdSPablo de Lara /* Shared info between server and nodes */ 62ed2a80fdSPablo de Lara struct shared_info *info; 63ed2a80fdSPablo de Lara 64ed2a80fdSPablo de Lara /** 65ed2a80fdSPablo de Lara * Initialise the mbuf pool for packet reception for the NIC, and any other 66ed2a80fdSPablo de Lara * buffer pools needed by the app - currently none. 67ed2a80fdSPablo de Lara */ 68ed2a80fdSPablo de Lara static int 69ed2a80fdSPablo de Lara init_mbuf_pools(void) 70ed2a80fdSPablo de Lara { 71ed2a80fdSPablo de Lara const unsigned int num_mbufs = (num_nodes * MBUFS_PER_NODE) + 72ed2a80fdSPablo de Lara (info->num_ports * MBUFS_PER_PORT); 73ed2a80fdSPablo de Lara 74ed2a80fdSPablo de Lara /* 75ed2a80fdSPablo de Lara * Don't pass single-producer/single-consumer flags to mbuf create as it 76ed2a80fdSPablo de Lara * seems faster to use a cache instead 77ed2a80fdSPablo de Lara */ 78ed2a80fdSPablo de Lara printf("Creating mbuf pool '%s' [%u mbufs] ...\n", 79ed2a80fdSPablo de Lara PKTMBUF_POOL_NAME, num_mbufs); 80ed2a80fdSPablo de Lara pktmbuf_pool = rte_pktmbuf_pool_create(PKTMBUF_POOL_NAME, num_mbufs, 81ed2a80fdSPablo de Lara MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 82ed2a80fdSPablo de Lara 83ed2a80fdSPablo de Lara return pktmbuf_pool == NULL; /* 0 on success */ 84ed2a80fdSPablo de Lara } 85ed2a80fdSPablo de Lara 86ed2a80fdSPablo de Lara /** 87ed2a80fdSPablo de Lara * Initialise an individual port: 88ed2a80fdSPablo de Lara * - configure number of rx and tx rings 89ed2a80fdSPablo de Lara * - set up each rx ring, to pull from the main mbuf pool 90ed2a80fdSPablo de Lara * - set up each tx ring 91ed2a80fdSPablo de Lara * - start the port and report its status to stdout 92ed2a80fdSPablo de Lara */ 93ed2a80fdSPablo de Lara static int 9447523597SZhiyong Yang init_port(uint16_t port_num) 95ed2a80fdSPablo de Lara { 96ed2a80fdSPablo de Lara /* for port configuration all features are off by default */ 9715c2e952SShahaf Shuler struct rte_eth_conf port_conf = { 98ed2a80fdSPablo de Lara .rxmode = { 9915c2e952SShahaf Shuler .mq_mode = ETH_MQ_RX_RSS, 10015c2e952SShahaf Shuler }, 101ed2a80fdSPablo de Lara }; 102ed2a80fdSPablo de Lara const uint16_t rx_rings = 1, tx_rings = num_nodes; 10360efb44fSRoman Zhukov uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT; 10460efb44fSRoman Zhukov uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT; 10515c2e952SShahaf Shuler struct rte_eth_dev_info dev_info; 10615c2e952SShahaf Shuler struct rte_eth_txconf txconf; 107ed2a80fdSPablo de Lara 108ed2a80fdSPablo de Lara uint16_t q; 109ed2a80fdSPablo de Lara int retval; 110ed2a80fdSPablo de Lara 11147523597SZhiyong Yang printf("Port %u init ... ", port_num); 112ed2a80fdSPablo de Lara fflush(stdout); 113ed2a80fdSPablo de Lara 114089e5ed7SIvan Ilchenko retval = rte_eth_dev_info_get(port_num, &dev_info); 115089e5ed7SIvan Ilchenko if (retval != 0) 116089e5ed7SIvan Ilchenko return retval; 117089e5ed7SIvan Ilchenko 11815c2e952SShahaf Shuler if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 11915c2e952SShahaf Shuler port_conf.txmode.offloads |= 12015c2e952SShahaf Shuler DEV_TX_OFFLOAD_MBUF_FAST_FREE; 12115c2e952SShahaf Shuler 122ed2a80fdSPablo de Lara /* 123ed2a80fdSPablo de Lara * Standard DPDK port initialisation - config port, then set up 124ed2a80fdSPablo de Lara * rx and tx rings. 125ed2a80fdSPablo de Lara */ 126ed2a80fdSPablo de Lara retval = rte_eth_dev_configure(port_num, rx_rings, tx_rings, &port_conf); 127ed2a80fdSPablo de Lara if (retval != 0) 128ed2a80fdSPablo de Lara return retval; 129ed2a80fdSPablo de Lara 13060efb44fSRoman Zhukov retval = rte_eth_dev_adjust_nb_rx_tx_desc(port_num, &rx_ring_size, 13160efb44fSRoman Zhukov &tx_ring_size); 13260efb44fSRoman Zhukov if (retval != 0) 13360efb44fSRoman Zhukov return retval; 13460efb44fSRoman Zhukov 135ed2a80fdSPablo de Lara for (q = 0; q < rx_rings; q++) { 136ed2a80fdSPablo de Lara retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size, 137ed2a80fdSPablo de Lara rte_eth_dev_socket_id(port_num), 138ed2a80fdSPablo de Lara NULL, pktmbuf_pool); 139ed2a80fdSPablo de Lara if (retval < 0) 140ed2a80fdSPablo de Lara return retval; 141ed2a80fdSPablo de Lara } 142ed2a80fdSPablo de Lara 14315c2e952SShahaf Shuler txconf = dev_info.default_txconf; 14415c2e952SShahaf Shuler txconf.offloads = port_conf.txmode.offloads; 145ed2a80fdSPablo de Lara for (q = 0; q < tx_rings; q++) { 146ed2a80fdSPablo de Lara retval = rte_eth_tx_queue_setup(port_num, q, tx_ring_size, 147ed2a80fdSPablo de Lara rte_eth_dev_socket_id(port_num), 14815c2e952SShahaf Shuler &txconf); 149ed2a80fdSPablo de Lara if (retval < 0) 150ed2a80fdSPablo de Lara return retval; 151ed2a80fdSPablo de Lara } 152ed2a80fdSPablo de Lara 153f430bbceSIvan Ilchenko retval = rte_eth_promiscuous_enable(port_num); 154f430bbceSIvan Ilchenko if (retval != 0) 155f430bbceSIvan Ilchenko return retval; 156ed2a80fdSPablo de Lara 157ed2a80fdSPablo de Lara retval = rte_eth_dev_start(port_num); 158ed2a80fdSPablo de Lara if (retval < 0) 159ed2a80fdSPablo de Lara return retval; 160ed2a80fdSPablo de Lara 161ed2a80fdSPablo de Lara printf("done:\n"); 162ed2a80fdSPablo de Lara 163ed2a80fdSPablo de Lara return 0; 164ed2a80fdSPablo de Lara } 165ed2a80fdSPablo de Lara 166ed2a80fdSPablo de Lara /** 167ed2a80fdSPablo de Lara * Set up the DPDK rings which will be used to pass packets, via 168ed2a80fdSPablo de Lara * pointers, between the multi-process server and node processes. 169ed2a80fdSPablo de Lara * Each node needs one RX queue. 170ed2a80fdSPablo de Lara */ 171ed2a80fdSPablo de Lara static int 172ed2a80fdSPablo de Lara init_shm_rings(void) 173ed2a80fdSPablo de Lara { 174ed2a80fdSPablo de Lara unsigned int i; 175ed2a80fdSPablo de Lara unsigned int socket_id; 176ed2a80fdSPablo de Lara const char *q_name; 177ed2a80fdSPablo de Lara const unsigned int ringsize = NODE_QUEUE_RINGSIZE; 178ed2a80fdSPablo de Lara 179ed2a80fdSPablo de Lara nodes = rte_malloc("node details", 180ed2a80fdSPablo de Lara sizeof(*nodes) * num_nodes, 0); 181ed2a80fdSPablo de Lara if (nodes == NULL) 182ed2a80fdSPablo de Lara rte_exit(EXIT_FAILURE, "Cannot allocate memory for " 183ed2a80fdSPablo de Lara "node program details\n"); 184ed2a80fdSPablo de Lara 185ed2a80fdSPablo de Lara for (i = 0; i < num_nodes; i++) { 186ed2a80fdSPablo de Lara /* Create an RX queue for each node */ 187ed2a80fdSPablo de Lara socket_id = rte_socket_id(); 188ed2a80fdSPablo de Lara q_name = get_rx_queue_name(i); 189ed2a80fdSPablo de Lara nodes[i].rx_q = rte_ring_create(q_name, 190ed2a80fdSPablo de Lara ringsize, socket_id, 191ed2a80fdSPablo de Lara RING_F_SP_ENQ | RING_F_SC_DEQ); 192ed2a80fdSPablo de Lara if (nodes[i].rx_q == NULL) 193ed2a80fdSPablo de Lara rte_exit(EXIT_FAILURE, "Cannot create rx ring queue " 194ed2a80fdSPablo de Lara "for node %u\n", i); 195ed2a80fdSPablo de Lara } 196ed2a80fdSPablo de Lara return 0; 197ed2a80fdSPablo de Lara } 198ed2a80fdSPablo de Lara 199ed2a80fdSPablo de Lara /* 200ed2a80fdSPablo de Lara * Create EFD table which will contain all the flows 201ed2a80fdSPablo de Lara * that will be distributed among the nodes 202ed2a80fdSPablo de Lara */ 203ed2a80fdSPablo de Lara static void 204ed2a80fdSPablo de Lara create_efd_table(void) 205ed2a80fdSPablo de Lara { 206ed2a80fdSPablo de Lara uint8_t socket_id = rte_socket_id(); 207ed2a80fdSPablo de Lara 208ed2a80fdSPablo de Lara /* create table */ 209ed2a80fdSPablo de Lara efd_table = rte_efd_create("flow table", num_flows * 2, sizeof(uint32_t), 210ed2a80fdSPablo de Lara 1 << socket_id, socket_id); 211ed2a80fdSPablo de Lara 212ed2a80fdSPablo de Lara if (efd_table == NULL) 213ed2a80fdSPablo de Lara rte_exit(EXIT_FAILURE, "Problem creating the flow table\n"); 214ed2a80fdSPablo de Lara } 215ed2a80fdSPablo de Lara 216ed2a80fdSPablo de Lara static void 217ed2a80fdSPablo de Lara populate_efd_table(void) 218ed2a80fdSPablo de Lara { 219ed2a80fdSPablo de Lara unsigned int i; 220ed2a80fdSPablo de Lara int32_t ret; 221ed2a80fdSPablo de Lara uint32_t ip_dst; 222ed2a80fdSPablo de Lara uint8_t socket_id = rte_socket_id(); 223ed2a80fdSPablo de Lara uint64_t node_id; 224ed2a80fdSPablo de Lara 225ed2a80fdSPablo de Lara /* Add flows in table */ 226ed2a80fdSPablo de Lara for (i = 0; i < num_flows; i++) { 227ed2a80fdSPablo de Lara node_id = i % num_nodes; 228ed2a80fdSPablo de Lara 229ed2a80fdSPablo de Lara ip_dst = rte_cpu_to_be_32(i); 230ed2a80fdSPablo de Lara ret = rte_efd_update(efd_table, socket_id, 231ed2a80fdSPablo de Lara (void *)&ip_dst, (efd_value_t)node_id); 232ed2a80fdSPablo de Lara if (ret < 0) 233ed2a80fdSPablo de Lara rte_exit(EXIT_FAILURE, "Unable to add entry %u in " 234ed2a80fdSPablo de Lara "EFD table\n", i); 235ed2a80fdSPablo de Lara } 236ed2a80fdSPablo de Lara 237ed2a80fdSPablo de Lara printf("EFD table: Adding 0x%x keys\n", num_flows); 238ed2a80fdSPablo de Lara } 239ed2a80fdSPablo de Lara 240ed2a80fdSPablo de Lara /* Check the link status of all ports in up to 9s, and print them finally */ 241ed2a80fdSPablo de Lara static void 24247523597SZhiyong Yang check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) 243ed2a80fdSPablo de Lara { 244ed2a80fdSPablo de Lara #define CHECK_INTERVAL 100 /* 100ms */ 245ed2a80fdSPablo de Lara #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 24647523597SZhiyong Yang uint8_t count, all_ports_up, print_flag = 0; 24747523597SZhiyong Yang uint16_t portid; 248ed2a80fdSPablo de Lara struct rte_eth_link link; 24922e5c73bSIgor Romanov int ret; 250*db4e8135SIvan Dyukov char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 251ed2a80fdSPablo de Lara 252ed2a80fdSPablo de Lara printf("\nChecking link status"); 253ed2a80fdSPablo de Lara fflush(stdout); 254ed2a80fdSPablo de Lara for (count = 0; count <= MAX_CHECK_TIME; count++) { 255ed2a80fdSPablo de Lara all_ports_up = 1; 256ed2a80fdSPablo de Lara for (portid = 0; portid < port_num; portid++) { 257ed2a80fdSPablo de Lara if ((port_mask & (1 << info->id[portid])) == 0) 258ed2a80fdSPablo de Lara continue; 259ed2a80fdSPablo de Lara memset(&link, 0, sizeof(link)); 26022e5c73bSIgor Romanov ret = rte_eth_link_get_nowait(info->id[portid], &link); 26122e5c73bSIgor Romanov if (ret < 0) { 26222e5c73bSIgor Romanov all_ports_up = 0; 26322e5c73bSIgor Romanov if (print_flag == 1) 26422e5c73bSIgor Romanov printf("Port %u link get failed: %s\n", 26522e5c73bSIgor Romanov portid, rte_strerror(-ret)); 26622e5c73bSIgor Romanov continue; 26722e5c73bSIgor Romanov } 268ed2a80fdSPablo de Lara /* print link status if flag set */ 269ed2a80fdSPablo de Lara if (print_flag == 1) { 270*db4e8135SIvan Dyukov rte_eth_link_to_str(link_status_text, 271*db4e8135SIvan Dyukov sizeof(link_status_text), &link); 272*db4e8135SIvan Dyukov printf("Port %d %s\n", info->id[portid], 273*db4e8135SIvan Dyukov link_status_text); 274ed2a80fdSPablo de Lara continue; 275ed2a80fdSPablo de Lara } 276ed2a80fdSPablo de Lara /* clear all_ports_up flag if any link down */ 277ed2a80fdSPablo de Lara if (link.link_status == ETH_LINK_DOWN) { 278ed2a80fdSPablo de Lara all_ports_up = 0; 279ed2a80fdSPablo de Lara break; 280ed2a80fdSPablo de Lara } 281ed2a80fdSPablo de Lara } 282ed2a80fdSPablo de Lara /* after finally printing all link status, get out */ 283ed2a80fdSPablo de Lara if (print_flag == 1) 284ed2a80fdSPablo de Lara break; 285ed2a80fdSPablo de Lara 286ed2a80fdSPablo de Lara if (all_ports_up == 0) { 287ed2a80fdSPablo de Lara printf("."); 288ed2a80fdSPablo de Lara fflush(stdout); 289ed2a80fdSPablo de Lara rte_delay_ms(CHECK_INTERVAL); 290ed2a80fdSPablo de Lara } 291ed2a80fdSPablo de Lara 292ed2a80fdSPablo de Lara /* set the print_flag if all ports up or timeout */ 293ed2a80fdSPablo de Lara if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 294ed2a80fdSPablo de Lara print_flag = 1; 295ed2a80fdSPablo de Lara printf("done\n"); 296ed2a80fdSPablo de Lara } 297ed2a80fdSPablo de Lara } 298ed2a80fdSPablo de Lara } 299ed2a80fdSPablo de Lara 300ed2a80fdSPablo de Lara /** 301ed2a80fdSPablo de Lara * Main init function for the multi-process server app, 302ed2a80fdSPablo de Lara * calls subfunctions to do each stage of the initialisation. 303ed2a80fdSPablo de Lara */ 304ed2a80fdSPablo de Lara int 305ed2a80fdSPablo de Lara init(int argc, char *argv[]) 306ed2a80fdSPablo de Lara { 307ed2a80fdSPablo de Lara int retval; 308ed2a80fdSPablo de Lara const struct rte_memzone *mz; 309ed2a80fdSPablo de Lara uint8_t i, total_ports; 310ed2a80fdSPablo de Lara 311ed2a80fdSPablo de Lara /* init EAL, parsing EAL args */ 312ed2a80fdSPablo de Lara retval = rte_eal_init(argc, argv); 313ed2a80fdSPablo de Lara if (retval < 0) 314ed2a80fdSPablo de Lara return -1; 315ed2a80fdSPablo de Lara argc -= retval; 316ed2a80fdSPablo de Lara argv += retval; 317ed2a80fdSPablo de Lara 318ed2a80fdSPablo de Lara /* get total number of ports */ 319d9a42a69SThomas Monjalon total_ports = rte_eth_dev_count_avail(); 320ed2a80fdSPablo de Lara 321ed2a80fdSPablo de Lara /* set up array for port data */ 322ed2a80fdSPablo de Lara mz = rte_memzone_reserve(MZ_SHARED_INFO, sizeof(*info), 323ed2a80fdSPablo de Lara rte_socket_id(), NO_FLAGS); 324ed2a80fdSPablo de Lara if (mz == NULL) 325ed2a80fdSPablo de Lara rte_exit(EXIT_FAILURE, "Cannot reserve memory zone " 326ed2a80fdSPablo de Lara "for port information\n"); 327ed2a80fdSPablo de Lara memset(mz->addr, 0, sizeof(*info)); 328ed2a80fdSPablo de Lara info = mz->addr; 329ed2a80fdSPablo de Lara 330ed2a80fdSPablo de Lara /* parse additional, application arguments */ 331ed2a80fdSPablo de Lara retval = parse_app_args(total_ports, argc, argv); 332ed2a80fdSPablo de Lara if (retval != 0) 333ed2a80fdSPablo de Lara return -1; 334ed2a80fdSPablo de Lara 335ed2a80fdSPablo de Lara /* initialise mbuf pools */ 336ed2a80fdSPablo de Lara retval = init_mbuf_pools(); 337ed2a80fdSPablo de Lara if (retval != 0) 338ed2a80fdSPablo de Lara rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n"); 339ed2a80fdSPablo de Lara 340ed2a80fdSPablo de Lara /* now initialise the ports we will use */ 341ed2a80fdSPablo de Lara for (i = 0; i < info->num_ports; i++) { 342ed2a80fdSPablo de Lara retval = init_port(info->id[i]); 343ed2a80fdSPablo de Lara if (retval != 0) 344ed2a80fdSPablo de Lara rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n", 345ed2a80fdSPablo de Lara (unsigned int) i); 346ed2a80fdSPablo de Lara } 347ed2a80fdSPablo de Lara 348ed2a80fdSPablo de Lara check_all_ports_link_status(info->num_ports, (~0x0)); 349ed2a80fdSPablo de Lara 350ed2a80fdSPablo de Lara /* initialise the node queues/rings for inter-eu comms */ 351ed2a80fdSPablo de Lara init_shm_rings(); 352ed2a80fdSPablo de Lara 353ed2a80fdSPablo de Lara /* Create the EFD table */ 354ed2a80fdSPablo de Lara create_efd_table(); 355ed2a80fdSPablo de Lara 356ed2a80fdSPablo de Lara /* Populate the EFD table */ 357ed2a80fdSPablo de Lara populate_efd_table(); 358ed2a80fdSPablo de Lara 359ed2a80fdSPablo de Lara /* Share the total number of nodes */ 360ed2a80fdSPablo de Lara info->num_nodes = num_nodes; 361ed2a80fdSPablo de Lara 362ed2a80fdSPablo de Lara /* Share the total number of flows */ 363ed2a80fdSPablo de Lara info->num_flows = num_flows; 364ed2a80fdSPablo de Lara return 0; 365ed2a80fdSPablo de Lara } 366