1d30ea906Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause 2d30ea906Sjfb8856606 * Copyright(c) 2016-2017 Intel Corporation 32bfe3f2eSlogwang */ 42bfe3f2eSlogwang 52bfe3f2eSlogwang #include <stdint.h> 62bfe3f2eSlogwang #include <stdio.h> 72bfe3f2eSlogwang #include <string.h> 82bfe3f2eSlogwang #include <sys/queue.h> 92bfe3f2eSlogwang #include <errno.h> 102bfe3f2eSlogwang #include <stdarg.h> 112bfe3f2eSlogwang #include <inttypes.h> 122bfe3f2eSlogwang 132bfe3f2eSlogwang #include <rte_common.h> 142bfe3f2eSlogwang #include <rte_memory.h> 152bfe3f2eSlogwang #include <rte_memzone.h> 162bfe3f2eSlogwang #include <rte_eal.h> 172bfe3f2eSlogwang #include <rte_byteorder.h> 182bfe3f2eSlogwang #include <rte_atomic.h> 192bfe3f2eSlogwang #include <rte_launch.h> 202bfe3f2eSlogwang #include <rte_per_lcore.h> 212bfe3f2eSlogwang #include <rte_lcore.h> 222bfe3f2eSlogwang #include <rte_branch_prediction.h> 232bfe3f2eSlogwang #include <rte_debug.h> 242bfe3f2eSlogwang #include <rte_ring.h> 252bfe3f2eSlogwang #include <rte_log.h> 262bfe3f2eSlogwang #include <rte_mempool.h> 272bfe3f2eSlogwang #include <rte_memcpy.h> 282bfe3f2eSlogwang #include <rte_mbuf.h> 292bfe3f2eSlogwang #include <rte_interrupts.h> 302bfe3f2eSlogwang #include <rte_ether.h> 312bfe3f2eSlogwang #include <rte_ethdev.h> 322bfe3f2eSlogwang #include <rte_malloc.h> 332bfe3f2eSlogwang #include <rte_string_fns.h> 342bfe3f2eSlogwang #include <rte_cycles.h> 352bfe3f2eSlogwang #include <rte_efd.h> 362bfe3f2eSlogwang #include <rte_hash.h> 372bfe3f2eSlogwang 382bfe3f2eSlogwang #include "common.h" 392bfe3f2eSlogwang #include "args.h" 402bfe3f2eSlogwang #include "init.h" 412bfe3f2eSlogwang 422bfe3f2eSlogwang #define MBUFS_PER_NODE 1536 432bfe3f2eSlogwang #define MBUFS_PER_PORT 1536 442bfe3f2eSlogwang #define MBUF_CACHE_SIZE 512 452bfe3f2eSlogwang 462bfe3f2eSlogwang #define RTE_MP_RX_DESC_DEFAULT 512 472bfe3f2eSlogwang #define RTE_MP_TX_DESC_DEFAULT 512 482bfe3f2eSlogwang #define NODE_QUEUE_RINGSIZE 128 492bfe3f2eSlogwang 502bfe3f2eSlogwang #define NO_FLAGS 0 512bfe3f2eSlogwang 522bfe3f2eSlogwang /* The mbuf pool for packet rx */ 532bfe3f2eSlogwang struct rte_mempool *pktmbuf_pool; 542bfe3f2eSlogwang 552bfe3f2eSlogwang /* array of info/queues for nodes */ 562bfe3f2eSlogwang struct node *nodes; 572bfe3f2eSlogwang 582bfe3f2eSlogwang /* EFD table */ 592bfe3f2eSlogwang struct rte_efd_table *efd_table; 602bfe3f2eSlogwang 612bfe3f2eSlogwang /* Shared info between server and nodes */ 622bfe3f2eSlogwang struct shared_info *info; 632bfe3f2eSlogwang 642bfe3f2eSlogwang /** 652bfe3f2eSlogwang * Initialise the mbuf pool for packet reception for the NIC, and any other 662bfe3f2eSlogwang * buffer pools needed by the app - currently none. 672bfe3f2eSlogwang */ 682bfe3f2eSlogwang static int 692bfe3f2eSlogwang init_mbuf_pools(void) 702bfe3f2eSlogwang { 712bfe3f2eSlogwang const unsigned int num_mbufs = (num_nodes * MBUFS_PER_NODE) + 722bfe3f2eSlogwang (info->num_ports * MBUFS_PER_PORT); 732bfe3f2eSlogwang 742bfe3f2eSlogwang /* 752bfe3f2eSlogwang * Don't pass single-producer/single-consumer flags to mbuf create as it 762bfe3f2eSlogwang * seems faster to use a cache instead 772bfe3f2eSlogwang */ 782bfe3f2eSlogwang printf("Creating mbuf pool '%s' [%u mbufs] ...\n", 792bfe3f2eSlogwang PKTMBUF_POOL_NAME, num_mbufs); 802bfe3f2eSlogwang pktmbuf_pool = rte_pktmbuf_pool_create(PKTMBUF_POOL_NAME, num_mbufs, 812bfe3f2eSlogwang MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 822bfe3f2eSlogwang 832bfe3f2eSlogwang return pktmbuf_pool == NULL; /* 0 on success */ 842bfe3f2eSlogwang } 852bfe3f2eSlogwang 862bfe3f2eSlogwang /** 872bfe3f2eSlogwang * Initialise an individual port: 882bfe3f2eSlogwang * - configure number of rx and tx rings 892bfe3f2eSlogwang * - set up each rx ring, to pull from the main mbuf pool 902bfe3f2eSlogwang * - set up each tx ring 912bfe3f2eSlogwang * - start the port and report its status to stdout 922bfe3f2eSlogwang */ 932bfe3f2eSlogwang static int 942bfe3f2eSlogwang init_port(uint16_t port_num) 952bfe3f2eSlogwang { 962bfe3f2eSlogwang /* for port configuration all features are off by default */ 97d30ea906Sjfb8856606 struct rte_eth_conf port_conf = { 982bfe3f2eSlogwang .rxmode = { 99d30ea906Sjfb8856606 .mq_mode = ETH_MQ_RX_RSS, 100d30ea906Sjfb8856606 }, 1012bfe3f2eSlogwang }; 1022bfe3f2eSlogwang const uint16_t rx_rings = 1, tx_rings = num_nodes; 1032bfe3f2eSlogwang uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT; 1042bfe3f2eSlogwang uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT; 105d30ea906Sjfb8856606 struct rte_eth_dev_info dev_info; 106d30ea906Sjfb8856606 struct rte_eth_txconf txconf; 1072bfe3f2eSlogwang 1082bfe3f2eSlogwang uint16_t q; 1092bfe3f2eSlogwang int retval; 1102bfe3f2eSlogwang 1112bfe3f2eSlogwang printf("Port %u init ... ", port_num); 1122bfe3f2eSlogwang fflush(stdout); 1132bfe3f2eSlogwang 114*4418919fSjohnjiang retval = rte_eth_dev_info_get(port_num, &dev_info); 115*4418919fSjohnjiang if (retval != 0) 116*4418919fSjohnjiang return retval; 117*4418919fSjohnjiang 118d30ea906Sjfb8856606 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 119d30ea906Sjfb8856606 port_conf.txmode.offloads |= 120d30ea906Sjfb8856606 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 121d30ea906Sjfb8856606 1222bfe3f2eSlogwang /* 1232bfe3f2eSlogwang * Standard DPDK port initialisation - config port, then set up 1242bfe3f2eSlogwang * rx and tx rings. 1252bfe3f2eSlogwang */ 1262bfe3f2eSlogwang retval = rte_eth_dev_configure(port_num, rx_rings, tx_rings, &port_conf); 1272bfe3f2eSlogwang if (retval != 0) 1282bfe3f2eSlogwang return retval; 1292bfe3f2eSlogwang 1302bfe3f2eSlogwang retval = rte_eth_dev_adjust_nb_rx_tx_desc(port_num, &rx_ring_size, 1312bfe3f2eSlogwang &tx_ring_size); 1322bfe3f2eSlogwang if (retval != 0) 1332bfe3f2eSlogwang return retval; 1342bfe3f2eSlogwang 1352bfe3f2eSlogwang for (q = 0; q < rx_rings; q++) { 1362bfe3f2eSlogwang retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size, 1372bfe3f2eSlogwang rte_eth_dev_socket_id(port_num), 1382bfe3f2eSlogwang NULL, pktmbuf_pool); 1392bfe3f2eSlogwang if (retval < 0) 1402bfe3f2eSlogwang return retval; 1412bfe3f2eSlogwang } 1422bfe3f2eSlogwang 143d30ea906Sjfb8856606 txconf = dev_info.default_txconf; 144d30ea906Sjfb8856606 txconf.offloads = port_conf.txmode.offloads; 1452bfe3f2eSlogwang for (q = 0; q < tx_rings; q++) { 1462bfe3f2eSlogwang retval = rte_eth_tx_queue_setup(port_num, q, tx_ring_size, 1472bfe3f2eSlogwang rte_eth_dev_socket_id(port_num), 148d30ea906Sjfb8856606 &txconf); 1492bfe3f2eSlogwang if (retval < 0) 1502bfe3f2eSlogwang return retval; 1512bfe3f2eSlogwang } 1522bfe3f2eSlogwang 153*4418919fSjohnjiang retval = rte_eth_promiscuous_enable(port_num); 154*4418919fSjohnjiang if (retval != 0) 155*4418919fSjohnjiang return retval; 1562bfe3f2eSlogwang 1572bfe3f2eSlogwang retval = rte_eth_dev_start(port_num); 1582bfe3f2eSlogwang if (retval < 0) 1592bfe3f2eSlogwang return retval; 1602bfe3f2eSlogwang 1612bfe3f2eSlogwang printf("done:\n"); 1622bfe3f2eSlogwang 1632bfe3f2eSlogwang return 0; 1642bfe3f2eSlogwang } 1652bfe3f2eSlogwang 1662bfe3f2eSlogwang /** 1672bfe3f2eSlogwang * Set up the DPDK rings which will be used to pass packets, via 1682bfe3f2eSlogwang * pointers, between the multi-process server and node processes. 1692bfe3f2eSlogwang * Each node needs one RX queue. 1702bfe3f2eSlogwang */ 1712bfe3f2eSlogwang static int 1722bfe3f2eSlogwang init_shm_rings(void) 1732bfe3f2eSlogwang { 1742bfe3f2eSlogwang unsigned int i; 1752bfe3f2eSlogwang unsigned int socket_id; 1762bfe3f2eSlogwang const char *q_name; 1772bfe3f2eSlogwang const unsigned int ringsize = NODE_QUEUE_RINGSIZE; 1782bfe3f2eSlogwang 1792bfe3f2eSlogwang nodes = rte_malloc("node details", 1802bfe3f2eSlogwang sizeof(*nodes) * num_nodes, 0); 1812bfe3f2eSlogwang if (nodes == NULL) 1822bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot allocate memory for " 1832bfe3f2eSlogwang "node program details\n"); 1842bfe3f2eSlogwang 1852bfe3f2eSlogwang for (i = 0; i < num_nodes; i++) { 1862bfe3f2eSlogwang /* Create an RX queue for each node */ 1872bfe3f2eSlogwang socket_id = rte_socket_id(); 1882bfe3f2eSlogwang q_name = get_rx_queue_name(i); 1892bfe3f2eSlogwang nodes[i].rx_q = rte_ring_create(q_name, 1902bfe3f2eSlogwang ringsize, socket_id, 1912bfe3f2eSlogwang RING_F_SP_ENQ | RING_F_SC_DEQ); 1922bfe3f2eSlogwang if (nodes[i].rx_q == NULL) 1932bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot create rx ring queue " 1942bfe3f2eSlogwang "for node %u\n", i); 1952bfe3f2eSlogwang } 1962bfe3f2eSlogwang return 0; 1972bfe3f2eSlogwang } 1982bfe3f2eSlogwang 1992bfe3f2eSlogwang /* 2002bfe3f2eSlogwang * Create EFD table which will contain all the flows 2012bfe3f2eSlogwang * that will be distributed among the nodes 2022bfe3f2eSlogwang */ 2032bfe3f2eSlogwang static void 2042bfe3f2eSlogwang create_efd_table(void) 2052bfe3f2eSlogwang { 2062bfe3f2eSlogwang uint8_t socket_id = rte_socket_id(); 2072bfe3f2eSlogwang 2082bfe3f2eSlogwang /* create table */ 2092bfe3f2eSlogwang efd_table = rte_efd_create("flow table", num_flows * 2, sizeof(uint32_t), 2102bfe3f2eSlogwang 1 << socket_id, socket_id); 2112bfe3f2eSlogwang 2122bfe3f2eSlogwang if (efd_table == NULL) 2132bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Problem creating the flow table\n"); 2142bfe3f2eSlogwang } 2152bfe3f2eSlogwang 2162bfe3f2eSlogwang static void 2172bfe3f2eSlogwang populate_efd_table(void) 2182bfe3f2eSlogwang { 2192bfe3f2eSlogwang unsigned int i; 2202bfe3f2eSlogwang int32_t ret; 2212bfe3f2eSlogwang uint32_t ip_dst; 2222bfe3f2eSlogwang uint8_t socket_id = rte_socket_id(); 2232bfe3f2eSlogwang uint64_t node_id; 2242bfe3f2eSlogwang 2252bfe3f2eSlogwang /* Add flows in table */ 2262bfe3f2eSlogwang for (i = 0; i < num_flows; i++) { 2272bfe3f2eSlogwang node_id = i % num_nodes; 2282bfe3f2eSlogwang 2292bfe3f2eSlogwang ip_dst = rte_cpu_to_be_32(i); 2302bfe3f2eSlogwang ret = rte_efd_update(efd_table, socket_id, 2312bfe3f2eSlogwang (void *)&ip_dst, (efd_value_t)node_id); 2322bfe3f2eSlogwang if (ret < 0) 2332bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Unable to add entry %u in " 2342bfe3f2eSlogwang "EFD table\n", i); 2352bfe3f2eSlogwang } 2362bfe3f2eSlogwang 2372bfe3f2eSlogwang printf("EFD table: Adding 0x%x keys\n", num_flows); 2382bfe3f2eSlogwang } 2392bfe3f2eSlogwang 2402bfe3f2eSlogwang /* Check the link status of all ports in up to 9s, and print them finally */ 2412bfe3f2eSlogwang static void 2422bfe3f2eSlogwang check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) 2432bfe3f2eSlogwang { 2442bfe3f2eSlogwang #define CHECK_INTERVAL 100 /* 100ms */ 2452bfe3f2eSlogwang #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2462bfe3f2eSlogwang uint8_t count, all_ports_up, print_flag = 0; 2472bfe3f2eSlogwang uint16_t portid; 2482bfe3f2eSlogwang struct rte_eth_link link; 249*4418919fSjohnjiang int ret; 2502bfe3f2eSlogwang 2512bfe3f2eSlogwang printf("\nChecking link status"); 2522bfe3f2eSlogwang fflush(stdout); 2532bfe3f2eSlogwang for (count = 0; count <= MAX_CHECK_TIME; count++) { 2542bfe3f2eSlogwang all_ports_up = 1; 2552bfe3f2eSlogwang for (portid = 0; portid < port_num; portid++) { 2562bfe3f2eSlogwang if ((port_mask & (1 << info->id[portid])) == 0) 2572bfe3f2eSlogwang continue; 2582bfe3f2eSlogwang memset(&link, 0, sizeof(link)); 259*4418919fSjohnjiang ret = rte_eth_link_get_nowait(info->id[portid], &link); 260*4418919fSjohnjiang if (ret < 0) { 261*4418919fSjohnjiang all_ports_up = 0; 262*4418919fSjohnjiang if (print_flag == 1) 263*4418919fSjohnjiang printf("Port %u link get failed: %s\n", 264*4418919fSjohnjiang portid, rte_strerror(-ret)); 265*4418919fSjohnjiang continue; 266*4418919fSjohnjiang } 2672bfe3f2eSlogwang /* print link status if flag set */ 2682bfe3f2eSlogwang if (print_flag == 1) { 2692bfe3f2eSlogwang if (link.link_status) 2702bfe3f2eSlogwang printf( 2712bfe3f2eSlogwang "Port%d Link Up. Speed %u Mbps - %s\n", 2722bfe3f2eSlogwang info->id[portid], 2732bfe3f2eSlogwang link.link_speed, 2742bfe3f2eSlogwang (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 2752bfe3f2eSlogwang ("full-duplex") : ("half-duplex\n")); 2762bfe3f2eSlogwang else 2772bfe3f2eSlogwang printf("Port %d Link Down\n", 2782bfe3f2eSlogwang info->id[portid]); 2792bfe3f2eSlogwang continue; 2802bfe3f2eSlogwang } 2812bfe3f2eSlogwang /* clear all_ports_up flag if any link down */ 2822bfe3f2eSlogwang if (link.link_status == ETH_LINK_DOWN) { 2832bfe3f2eSlogwang all_ports_up = 0; 2842bfe3f2eSlogwang break; 2852bfe3f2eSlogwang } 2862bfe3f2eSlogwang } 2872bfe3f2eSlogwang /* after finally printing all link status, get out */ 2882bfe3f2eSlogwang if (print_flag == 1) 2892bfe3f2eSlogwang break; 2902bfe3f2eSlogwang 2912bfe3f2eSlogwang if (all_ports_up == 0) { 2922bfe3f2eSlogwang printf("."); 2932bfe3f2eSlogwang fflush(stdout); 2942bfe3f2eSlogwang rte_delay_ms(CHECK_INTERVAL); 2952bfe3f2eSlogwang } 2962bfe3f2eSlogwang 2972bfe3f2eSlogwang /* set the print_flag if all ports up or timeout */ 2982bfe3f2eSlogwang if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2992bfe3f2eSlogwang print_flag = 1; 3002bfe3f2eSlogwang printf("done\n"); 3012bfe3f2eSlogwang } 3022bfe3f2eSlogwang } 3032bfe3f2eSlogwang } 3042bfe3f2eSlogwang 3052bfe3f2eSlogwang /** 3062bfe3f2eSlogwang * Main init function for the multi-process server app, 3072bfe3f2eSlogwang * calls subfunctions to do each stage of the initialisation. 3082bfe3f2eSlogwang */ 3092bfe3f2eSlogwang int 3102bfe3f2eSlogwang init(int argc, char *argv[]) 3112bfe3f2eSlogwang { 3122bfe3f2eSlogwang int retval; 3132bfe3f2eSlogwang const struct rte_memzone *mz; 3142bfe3f2eSlogwang uint8_t i, total_ports; 3152bfe3f2eSlogwang 3162bfe3f2eSlogwang /* init EAL, parsing EAL args */ 3172bfe3f2eSlogwang retval = rte_eal_init(argc, argv); 3182bfe3f2eSlogwang if (retval < 0) 3192bfe3f2eSlogwang return -1; 3202bfe3f2eSlogwang argc -= retval; 3212bfe3f2eSlogwang argv += retval; 3222bfe3f2eSlogwang 3232bfe3f2eSlogwang /* get total number of ports */ 324d30ea906Sjfb8856606 total_ports = rte_eth_dev_count_avail(); 3252bfe3f2eSlogwang 3262bfe3f2eSlogwang /* set up array for port data */ 3272bfe3f2eSlogwang mz = rte_memzone_reserve(MZ_SHARED_INFO, sizeof(*info), 3282bfe3f2eSlogwang rte_socket_id(), NO_FLAGS); 3292bfe3f2eSlogwang if (mz == NULL) 3302bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot reserve memory zone " 3312bfe3f2eSlogwang "for port information\n"); 3322bfe3f2eSlogwang memset(mz->addr, 0, sizeof(*info)); 3332bfe3f2eSlogwang info = mz->addr; 3342bfe3f2eSlogwang 3352bfe3f2eSlogwang /* parse additional, application arguments */ 3362bfe3f2eSlogwang retval = parse_app_args(total_ports, argc, argv); 3372bfe3f2eSlogwang if (retval != 0) 3382bfe3f2eSlogwang return -1; 3392bfe3f2eSlogwang 3402bfe3f2eSlogwang /* initialise mbuf pools */ 3412bfe3f2eSlogwang retval = init_mbuf_pools(); 3422bfe3f2eSlogwang if (retval != 0) 3432bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n"); 3442bfe3f2eSlogwang 3452bfe3f2eSlogwang /* now initialise the ports we will use */ 3462bfe3f2eSlogwang for (i = 0; i < info->num_ports; i++) { 3472bfe3f2eSlogwang retval = init_port(info->id[i]); 3482bfe3f2eSlogwang if (retval != 0) 3492bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n", 3502bfe3f2eSlogwang (unsigned int) i); 3512bfe3f2eSlogwang } 3522bfe3f2eSlogwang 3532bfe3f2eSlogwang check_all_ports_link_status(info->num_ports, (~0x0)); 3542bfe3f2eSlogwang 3552bfe3f2eSlogwang /* initialise the node queues/rings for inter-eu comms */ 3562bfe3f2eSlogwang init_shm_rings(); 3572bfe3f2eSlogwang 3582bfe3f2eSlogwang /* Create the EFD table */ 3592bfe3f2eSlogwang create_efd_table(); 3602bfe3f2eSlogwang 3612bfe3f2eSlogwang /* Populate the EFD table */ 3622bfe3f2eSlogwang populate_efd_table(); 3632bfe3f2eSlogwang 3642bfe3f2eSlogwang /* Share the total number of nodes */ 3652bfe3f2eSlogwang info->num_nodes = num_nodes; 3662bfe3f2eSlogwang 3672bfe3f2eSlogwang /* Share the total number of flows */ 3682bfe3f2eSlogwang info->num_flows = num_flows; 3692bfe3f2eSlogwang return 0; 3702bfe3f2eSlogwang } 371