1*d30ea906Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause 2*d30ea906Sjfb8856606 * Copyright(c) 2016-2017 Intel Corporation 32bfe3f2eSlogwang */ 42bfe3f2eSlogwang 52bfe3f2eSlogwang #include <stdint.h> 62bfe3f2eSlogwang #include <stdio.h> 72bfe3f2eSlogwang #include <string.h> 82bfe3f2eSlogwang #include <sys/queue.h> 92bfe3f2eSlogwang #include <errno.h> 102bfe3f2eSlogwang #include <stdarg.h> 112bfe3f2eSlogwang #include <inttypes.h> 122bfe3f2eSlogwang 132bfe3f2eSlogwang #include <rte_common.h> 142bfe3f2eSlogwang #include <rte_memory.h> 152bfe3f2eSlogwang #include <rte_memzone.h> 162bfe3f2eSlogwang #include <rte_eal.h> 172bfe3f2eSlogwang #include <rte_byteorder.h> 182bfe3f2eSlogwang #include <rte_atomic.h> 192bfe3f2eSlogwang #include <rte_launch.h> 202bfe3f2eSlogwang #include <rte_per_lcore.h> 212bfe3f2eSlogwang #include <rte_lcore.h> 222bfe3f2eSlogwang #include <rte_branch_prediction.h> 232bfe3f2eSlogwang #include <rte_debug.h> 242bfe3f2eSlogwang #include <rte_ring.h> 252bfe3f2eSlogwang #include <rte_log.h> 262bfe3f2eSlogwang #include <rte_mempool.h> 272bfe3f2eSlogwang #include <rte_memcpy.h> 282bfe3f2eSlogwang #include <rte_mbuf.h> 292bfe3f2eSlogwang #include <rte_interrupts.h> 302bfe3f2eSlogwang #include <rte_ether.h> 312bfe3f2eSlogwang #include <rte_ethdev.h> 322bfe3f2eSlogwang #include <rte_malloc.h> 332bfe3f2eSlogwang #include <rte_string_fns.h> 342bfe3f2eSlogwang #include <rte_cycles.h> 352bfe3f2eSlogwang #include <rte_efd.h> 362bfe3f2eSlogwang #include <rte_hash.h> 372bfe3f2eSlogwang 382bfe3f2eSlogwang #include "common.h" 392bfe3f2eSlogwang #include "args.h" 402bfe3f2eSlogwang #include "init.h" 412bfe3f2eSlogwang 422bfe3f2eSlogwang #define MBUFS_PER_NODE 1536 432bfe3f2eSlogwang #define MBUFS_PER_PORT 1536 442bfe3f2eSlogwang #define MBUF_CACHE_SIZE 512 452bfe3f2eSlogwang 462bfe3f2eSlogwang #define RTE_MP_RX_DESC_DEFAULT 512 472bfe3f2eSlogwang #define RTE_MP_TX_DESC_DEFAULT 512 482bfe3f2eSlogwang #define NODE_QUEUE_RINGSIZE 128 492bfe3f2eSlogwang 502bfe3f2eSlogwang #define NO_FLAGS 0 512bfe3f2eSlogwang 522bfe3f2eSlogwang /* The mbuf pool for packet rx */ 532bfe3f2eSlogwang struct rte_mempool *pktmbuf_pool; 542bfe3f2eSlogwang 552bfe3f2eSlogwang /* array of info/queues for nodes */ 562bfe3f2eSlogwang struct node *nodes; 572bfe3f2eSlogwang 582bfe3f2eSlogwang /* EFD table */ 592bfe3f2eSlogwang struct rte_efd_table *efd_table; 602bfe3f2eSlogwang 612bfe3f2eSlogwang /* Shared info between server and nodes */ 622bfe3f2eSlogwang struct shared_info *info; 632bfe3f2eSlogwang 642bfe3f2eSlogwang /** 652bfe3f2eSlogwang * Initialise the mbuf pool for packet reception for the NIC, and any other 662bfe3f2eSlogwang * buffer pools needed by the app - currently none. 672bfe3f2eSlogwang */ 682bfe3f2eSlogwang static int 692bfe3f2eSlogwang init_mbuf_pools(void) 702bfe3f2eSlogwang { 712bfe3f2eSlogwang const unsigned int num_mbufs = (num_nodes * MBUFS_PER_NODE) + 722bfe3f2eSlogwang (info->num_ports * MBUFS_PER_PORT); 732bfe3f2eSlogwang 742bfe3f2eSlogwang /* 752bfe3f2eSlogwang * Don't pass single-producer/single-consumer flags to mbuf create as it 762bfe3f2eSlogwang * seems faster to use a cache instead 772bfe3f2eSlogwang */ 782bfe3f2eSlogwang printf("Creating mbuf pool '%s' [%u mbufs] ...\n", 792bfe3f2eSlogwang PKTMBUF_POOL_NAME, num_mbufs); 802bfe3f2eSlogwang pktmbuf_pool = rte_pktmbuf_pool_create(PKTMBUF_POOL_NAME, num_mbufs, 812bfe3f2eSlogwang MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 822bfe3f2eSlogwang 832bfe3f2eSlogwang return pktmbuf_pool == NULL; /* 0 on success */ 842bfe3f2eSlogwang } 852bfe3f2eSlogwang 862bfe3f2eSlogwang /** 872bfe3f2eSlogwang * Initialise an individual port: 882bfe3f2eSlogwang * - configure number of rx and tx rings 892bfe3f2eSlogwang * - set up each rx ring, to pull from the main mbuf pool 902bfe3f2eSlogwang * - set up each tx ring 912bfe3f2eSlogwang * - start the port and report its status to stdout 922bfe3f2eSlogwang */ 932bfe3f2eSlogwang static int 942bfe3f2eSlogwang init_port(uint16_t port_num) 952bfe3f2eSlogwang { 962bfe3f2eSlogwang /* for port configuration all features are off by default */ 97*d30ea906Sjfb8856606 struct rte_eth_conf port_conf = { 982bfe3f2eSlogwang .rxmode = { 99*d30ea906Sjfb8856606 .mq_mode = ETH_MQ_RX_RSS, 100*d30ea906Sjfb8856606 }, 1012bfe3f2eSlogwang }; 1022bfe3f2eSlogwang const uint16_t rx_rings = 1, tx_rings = num_nodes; 1032bfe3f2eSlogwang uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT; 1042bfe3f2eSlogwang uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT; 105*d30ea906Sjfb8856606 struct rte_eth_dev_info dev_info; 106*d30ea906Sjfb8856606 struct rte_eth_txconf txconf; 1072bfe3f2eSlogwang 1082bfe3f2eSlogwang uint16_t q; 1092bfe3f2eSlogwang int retval; 1102bfe3f2eSlogwang 1112bfe3f2eSlogwang printf("Port %u init ... ", port_num); 1122bfe3f2eSlogwang fflush(stdout); 1132bfe3f2eSlogwang 114*d30ea906Sjfb8856606 rte_eth_dev_info_get(port_num, &dev_info); 115*d30ea906Sjfb8856606 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 116*d30ea906Sjfb8856606 port_conf.txmode.offloads |= 117*d30ea906Sjfb8856606 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 118*d30ea906Sjfb8856606 1192bfe3f2eSlogwang /* 1202bfe3f2eSlogwang * Standard DPDK port initialisation - config port, then set up 1212bfe3f2eSlogwang * rx and tx rings. 1222bfe3f2eSlogwang */ 1232bfe3f2eSlogwang retval = rte_eth_dev_configure(port_num, rx_rings, tx_rings, &port_conf); 1242bfe3f2eSlogwang if (retval != 0) 1252bfe3f2eSlogwang return retval; 1262bfe3f2eSlogwang 1272bfe3f2eSlogwang retval = rte_eth_dev_adjust_nb_rx_tx_desc(port_num, &rx_ring_size, 1282bfe3f2eSlogwang &tx_ring_size); 1292bfe3f2eSlogwang if (retval != 0) 1302bfe3f2eSlogwang return retval; 1312bfe3f2eSlogwang 1322bfe3f2eSlogwang for (q = 0; q < rx_rings; q++) { 1332bfe3f2eSlogwang retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size, 1342bfe3f2eSlogwang rte_eth_dev_socket_id(port_num), 1352bfe3f2eSlogwang NULL, pktmbuf_pool); 1362bfe3f2eSlogwang if (retval < 0) 1372bfe3f2eSlogwang return retval; 1382bfe3f2eSlogwang } 1392bfe3f2eSlogwang 140*d30ea906Sjfb8856606 txconf = dev_info.default_txconf; 141*d30ea906Sjfb8856606 txconf.offloads = port_conf.txmode.offloads; 1422bfe3f2eSlogwang for (q = 0; q < tx_rings; q++) { 1432bfe3f2eSlogwang retval = rte_eth_tx_queue_setup(port_num, q, tx_ring_size, 1442bfe3f2eSlogwang rte_eth_dev_socket_id(port_num), 145*d30ea906Sjfb8856606 &txconf); 1462bfe3f2eSlogwang if (retval < 0) 1472bfe3f2eSlogwang return retval; 1482bfe3f2eSlogwang } 1492bfe3f2eSlogwang 1502bfe3f2eSlogwang rte_eth_promiscuous_enable(port_num); 1512bfe3f2eSlogwang 1522bfe3f2eSlogwang retval = rte_eth_dev_start(port_num); 1532bfe3f2eSlogwang if (retval < 0) 1542bfe3f2eSlogwang return retval; 1552bfe3f2eSlogwang 1562bfe3f2eSlogwang printf("done:\n"); 1572bfe3f2eSlogwang 1582bfe3f2eSlogwang return 0; 1592bfe3f2eSlogwang } 1602bfe3f2eSlogwang 1612bfe3f2eSlogwang /** 1622bfe3f2eSlogwang * Set up the DPDK rings which will be used to pass packets, via 1632bfe3f2eSlogwang * pointers, between the multi-process server and node processes. 1642bfe3f2eSlogwang * Each node needs one RX queue. 1652bfe3f2eSlogwang */ 1662bfe3f2eSlogwang static int 1672bfe3f2eSlogwang init_shm_rings(void) 1682bfe3f2eSlogwang { 1692bfe3f2eSlogwang unsigned int i; 1702bfe3f2eSlogwang unsigned int socket_id; 1712bfe3f2eSlogwang const char *q_name; 1722bfe3f2eSlogwang const unsigned int ringsize = NODE_QUEUE_RINGSIZE; 1732bfe3f2eSlogwang 1742bfe3f2eSlogwang nodes = rte_malloc("node details", 1752bfe3f2eSlogwang sizeof(*nodes) * num_nodes, 0); 1762bfe3f2eSlogwang if (nodes == NULL) 1772bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot allocate memory for " 1782bfe3f2eSlogwang "node program details\n"); 1792bfe3f2eSlogwang 1802bfe3f2eSlogwang for (i = 0; i < num_nodes; i++) { 1812bfe3f2eSlogwang /* Create an RX queue for each node */ 1822bfe3f2eSlogwang socket_id = rte_socket_id(); 1832bfe3f2eSlogwang q_name = get_rx_queue_name(i); 1842bfe3f2eSlogwang nodes[i].rx_q = rte_ring_create(q_name, 1852bfe3f2eSlogwang ringsize, socket_id, 1862bfe3f2eSlogwang RING_F_SP_ENQ | RING_F_SC_DEQ); 1872bfe3f2eSlogwang if (nodes[i].rx_q == NULL) 1882bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot create rx ring queue " 1892bfe3f2eSlogwang "for node %u\n", i); 1902bfe3f2eSlogwang } 1912bfe3f2eSlogwang return 0; 1922bfe3f2eSlogwang } 1932bfe3f2eSlogwang 1942bfe3f2eSlogwang /* 1952bfe3f2eSlogwang * Create EFD table which will contain all the flows 1962bfe3f2eSlogwang * that will be distributed among the nodes 1972bfe3f2eSlogwang */ 1982bfe3f2eSlogwang static void 1992bfe3f2eSlogwang create_efd_table(void) 2002bfe3f2eSlogwang { 2012bfe3f2eSlogwang uint8_t socket_id = rte_socket_id(); 2022bfe3f2eSlogwang 2032bfe3f2eSlogwang /* create table */ 2042bfe3f2eSlogwang efd_table = rte_efd_create("flow table", num_flows * 2, sizeof(uint32_t), 2052bfe3f2eSlogwang 1 << socket_id, socket_id); 2062bfe3f2eSlogwang 2072bfe3f2eSlogwang if (efd_table == NULL) 2082bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Problem creating the flow table\n"); 2092bfe3f2eSlogwang } 2102bfe3f2eSlogwang 2112bfe3f2eSlogwang static void 2122bfe3f2eSlogwang populate_efd_table(void) 2132bfe3f2eSlogwang { 2142bfe3f2eSlogwang unsigned int i; 2152bfe3f2eSlogwang int32_t ret; 2162bfe3f2eSlogwang uint32_t ip_dst; 2172bfe3f2eSlogwang uint8_t socket_id = rte_socket_id(); 2182bfe3f2eSlogwang uint64_t node_id; 2192bfe3f2eSlogwang 2202bfe3f2eSlogwang /* Add flows in table */ 2212bfe3f2eSlogwang for (i = 0; i < num_flows; i++) { 2222bfe3f2eSlogwang node_id = i % num_nodes; 2232bfe3f2eSlogwang 2242bfe3f2eSlogwang ip_dst = rte_cpu_to_be_32(i); 2252bfe3f2eSlogwang ret = rte_efd_update(efd_table, socket_id, 2262bfe3f2eSlogwang (void *)&ip_dst, (efd_value_t)node_id); 2272bfe3f2eSlogwang if (ret < 0) 2282bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Unable to add entry %u in " 2292bfe3f2eSlogwang "EFD table\n", i); 2302bfe3f2eSlogwang } 2312bfe3f2eSlogwang 2322bfe3f2eSlogwang printf("EFD table: Adding 0x%x keys\n", num_flows); 2332bfe3f2eSlogwang } 2342bfe3f2eSlogwang 2352bfe3f2eSlogwang /* Check the link status of all ports in up to 9s, and print them finally */ 2362bfe3f2eSlogwang static void 2372bfe3f2eSlogwang check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) 2382bfe3f2eSlogwang { 2392bfe3f2eSlogwang #define CHECK_INTERVAL 100 /* 100ms */ 2402bfe3f2eSlogwang #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2412bfe3f2eSlogwang uint8_t count, all_ports_up, print_flag = 0; 2422bfe3f2eSlogwang uint16_t portid; 2432bfe3f2eSlogwang struct rte_eth_link link; 2442bfe3f2eSlogwang 2452bfe3f2eSlogwang printf("\nChecking link status"); 2462bfe3f2eSlogwang fflush(stdout); 2472bfe3f2eSlogwang for (count = 0; count <= MAX_CHECK_TIME; count++) { 2482bfe3f2eSlogwang all_ports_up = 1; 2492bfe3f2eSlogwang for (portid = 0; portid < port_num; portid++) { 2502bfe3f2eSlogwang if ((port_mask & (1 << info->id[portid])) == 0) 2512bfe3f2eSlogwang continue; 2522bfe3f2eSlogwang memset(&link, 0, sizeof(link)); 2532bfe3f2eSlogwang rte_eth_link_get_nowait(info->id[portid], &link); 2542bfe3f2eSlogwang /* print link status if flag set */ 2552bfe3f2eSlogwang if (print_flag == 1) { 2562bfe3f2eSlogwang if (link.link_status) 2572bfe3f2eSlogwang printf( 2582bfe3f2eSlogwang "Port%d Link Up. Speed %u Mbps - %s\n", 2592bfe3f2eSlogwang info->id[portid], 2602bfe3f2eSlogwang link.link_speed, 2612bfe3f2eSlogwang (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 2622bfe3f2eSlogwang ("full-duplex") : ("half-duplex\n")); 2632bfe3f2eSlogwang else 2642bfe3f2eSlogwang printf("Port %d Link Down\n", 2652bfe3f2eSlogwang info->id[portid]); 2662bfe3f2eSlogwang continue; 2672bfe3f2eSlogwang } 2682bfe3f2eSlogwang /* clear all_ports_up flag if any link down */ 2692bfe3f2eSlogwang if (link.link_status == ETH_LINK_DOWN) { 2702bfe3f2eSlogwang all_ports_up = 0; 2712bfe3f2eSlogwang break; 2722bfe3f2eSlogwang } 2732bfe3f2eSlogwang } 2742bfe3f2eSlogwang /* after finally printing all link status, get out */ 2752bfe3f2eSlogwang if (print_flag == 1) 2762bfe3f2eSlogwang break; 2772bfe3f2eSlogwang 2782bfe3f2eSlogwang if (all_ports_up == 0) { 2792bfe3f2eSlogwang printf("."); 2802bfe3f2eSlogwang fflush(stdout); 2812bfe3f2eSlogwang rte_delay_ms(CHECK_INTERVAL); 2822bfe3f2eSlogwang } 2832bfe3f2eSlogwang 2842bfe3f2eSlogwang /* set the print_flag if all ports up or timeout */ 2852bfe3f2eSlogwang if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2862bfe3f2eSlogwang print_flag = 1; 2872bfe3f2eSlogwang printf("done\n"); 2882bfe3f2eSlogwang } 2892bfe3f2eSlogwang } 2902bfe3f2eSlogwang } 2912bfe3f2eSlogwang 2922bfe3f2eSlogwang /** 2932bfe3f2eSlogwang * Main init function for the multi-process server app, 2942bfe3f2eSlogwang * calls subfunctions to do each stage of the initialisation. 2952bfe3f2eSlogwang */ 2962bfe3f2eSlogwang int 2972bfe3f2eSlogwang init(int argc, char *argv[]) 2982bfe3f2eSlogwang { 2992bfe3f2eSlogwang int retval; 3002bfe3f2eSlogwang const struct rte_memzone *mz; 3012bfe3f2eSlogwang uint8_t i, total_ports; 3022bfe3f2eSlogwang 3032bfe3f2eSlogwang /* init EAL, parsing EAL args */ 3042bfe3f2eSlogwang retval = rte_eal_init(argc, argv); 3052bfe3f2eSlogwang if (retval < 0) 3062bfe3f2eSlogwang return -1; 3072bfe3f2eSlogwang argc -= retval; 3082bfe3f2eSlogwang argv += retval; 3092bfe3f2eSlogwang 3102bfe3f2eSlogwang /* get total number of ports */ 311*d30ea906Sjfb8856606 total_ports = rte_eth_dev_count_avail(); 3122bfe3f2eSlogwang 3132bfe3f2eSlogwang /* set up array for port data */ 3142bfe3f2eSlogwang mz = rte_memzone_reserve(MZ_SHARED_INFO, sizeof(*info), 3152bfe3f2eSlogwang rte_socket_id(), NO_FLAGS); 3162bfe3f2eSlogwang if (mz == NULL) 3172bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot reserve memory zone " 3182bfe3f2eSlogwang "for port information\n"); 3192bfe3f2eSlogwang memset(mz->addr, 0, sizeof(*info)); 3202bfe3f2eSlogwang info = mz->addr; 3212bfe3f2eSlogwang 3222bfe3f2eSlogwang /* parse additional, application arguments */ 3232bfe3f2eSlogwang retval = parse_app_args(total_ports, argc, argv); 3242bfe3f2eSlogwang if (retval != 0) 3252bfe3f2eSlogwang return -1; 3262bfe3f2eSlogwang 3272bfe3f2eSlogwang /* initialise mbuf pools */ 3282bfe3f2eSlogwang retval = init_mbuf_pools(); 3292bfe3f2eSlogwang if (retval != 0) 3302bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n"); 3312bfe3f2eSlogwang 3322bfe3f2eSlogwang /* now initialise the ports we will use */ 3332bfe3f2eSlogwang for (i = 0; i < info->num_ports; i++) { 3342bfe3f2eSlogwang retval = init_port(info->id[i]); 3352bfe3f2eSlogwang if (retval != 0) 3362bfe3f2eSlogwang rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n", 3372bfe3f2eSlogwang (unsigned int) i); 3382bfe3f2eSlogwang } 3392bfe3f2eSlogwang 3402bfe3f2eSlogwang check_all_ports_link_status(info->num_ports, (~0x0)); 3412bfe3f2eSlogwang 3422bfe3f2eSlogwang /* initialise the node queues/rings for inter-eu comms */ 3432bfe3f2eSlogwang init_shm_rings(); 3442bfe3f2eSlogwang 3452bfe3f2eSlogwang /* Create the EFD table */ 3462bfe3f2eSlogwang create_efd_table(); 3472bfe3f2eSlogwang 3482bfe3f2eSlogwang /* Populate the EFD table */ 3492bfe3f2eSlogwang populate_efd_table(); 3502bfe3f2eSlogwang 3512bfe3f2eSlogwang /* Share the total number of nodes */ 3522bfe3f2eSlogwang info->num_nodes = num_nodes; 3532bfe3f2eSlogwang 3542bfe3f2eSlogwang /* Share the total number of flows */ 3552bfe3f2eSlogwang info->num_flows = num_flows; 3562bfe3f2eSlogwang return 0; 3572bfe3f2eSlogwang } 358