1*d30ea906Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause
2*d30ea906Sjfb8856606  * Copyright(c) 2016-2017 Intel Corporation
32bfe3f2eSlogwang  */
42bfe3f2eSlogwang 
52bfe3f2eSlogwang #include <stdint.h>
62bfe3f2eSlogwang #include <stdio.h>
72bfe3f2eSlogwang #include <string.h>
82bfe3f2eSlogwang #include <sys/queue.h>
92bfe3f2eSlogwang #include <errno.h>
102bfe3f2eSlogwang #include <stdarg.h>
112bfe3f2eSlogwang #include <inttypes.h>
122bfe3f2eSlogwang 
132bfe3f2eSlogwang #include <rte_common.h>
142bfe3f2eSlogwang #include <rte_memory.h>
152bfe3f2eSlogwang #include <rte_memzone.h>
162bfe3f2eSlogwang #include <rte_eal.h>
172bfe3f2eSlogwang #include <rte_byteorder.h>
182bfe3f2eSlogwang #include <rte_atomic.h>
192bfe3f2eSlogwang #include <rte_launch.h>
202bfe3f2eSlogwang #include <rte_per_lcore.h>
212bfe3f2eSlogwang #include <rte_lcore.h>
222bfe3f2eSlogwang #include <rte_branch_prediction.h>
232bfe3f2eSlogwang #include <rte_debug.h>
242bfe3f2eSlogwang #include <rte_ring.h>
252bfe3f2eSlogwang #include <rte_log.h>
262bfe3f2eSlogwang #include <rte_mempool.h>
272bfe3f2eSlogwang #include <rte_memcpy.h>
282bfe3f2eSlogwang #include <rte_mbuf.h>
292bfe3f2eSlogwang #include <rte_interrupts.h>
302bfe3f2eSlogwang #include <rte_ether.h>
312bfe3f2eSlogwang #include <rte_ethdev.h>
322bfe3f2eSlogwang #include <rte_malloc.h>
332bfe3f2eSlogwang #include <rte_string_fns.h>
342bfe3f2eSlogwang #include <rte_cycles.h>
352bfe3f2eSlogwang #include <rte_efd.h>
362bfe3f2eSlogwang #include <rte_hash.h>
372bfe3f2eSlogwang 
382bfe3f2eSlogwang #include "common.h"
392bfe3f2eSlogwang #include "args.h"
402bfe3f2eSlogwang #include "init.h"
412bfe3f2eSlogwang 
422bfe3f2eSlogwang #define MBUFS_PER_NODE 1536
432bfe3f2eSlogwang #define MBUFS_PER_PORT 1536
442bfe3f2eSlogwang #define MBUF_CACHE_SIZE 512
452bfe3f2eSlogwang 
462bfe3f2eSlogwang #define RTE_MP_RX_DESC_DEFAULT 512
472bfe3f2eSlogwang #define RTE_MP_TX_DESC_DEFAULT 512
482bfe3f2eSlogwang #define NODE_QUEUE_RINGSIZE 128
492bfe3f2eSlogwang 
502bfe3f2eSlogwang #define NO_FLAGS 0
512bfe3f2eSlogwang 
522bfe3f2eSlogwang /* The mbuf pool for packet rx */
532bfe3f2eSlogwang struct rte_mempool *pktmbuf_pool;
542bfe3f2eSlogwang 
552bfe3f2eSlogwang /* array of info/queues for nodes */
562bfe3f2eSlogwang struct node *nodes;
572bfe3f2eSlogwang 
582bfe3f2eSlogwang /* EFD table */
592bfe3f2eSlogwang struct rte_efd_table *efd_table;
602bfe3f2eSlogwang 
612bfe3f2eSlogwang /* Shared info between server and nodes */
622bfe3f2eSlogwang struct shared_info *info;
632bfe3f2eSlogwang 
642bfe3f2eSlogwang /**
652bfe3f2eSlogwang  * Initialise the mbuf pool for packet reception for the NIC, and any other
662bfe3f2eSlogwang  * buffer pools needed by the app - currently none.
672bfe3f2eSlogwang  */
682bfe3f2eSlogwang static int
692bfe3f2eSlogwang init_mbuf_pools(void)
702bfe3f2eSlogwang {
712bfe3f2eSlogwang 	const unsigned int num_mbufs = (num_nodes * MBUFS_PER_NODE) +
722bfe3f2eSlogwang 			(info->num_ports * MBUFS_PER_PORT);
732bfe3f2eSlogwang 
742bfe3f2eSlogwang 	/*
752bfe3f2eSlogwang 	 * Don't pass single-producer/single-consumer flags to mbuf create as it
762bfe3f2eSlogwang 	 * seems faster to use a cache instead
772bfe3f2eSlogwang 	 */
782bfe3f2eSlogwang 	printf("Creating mbuf pool '%s' [%u mbufs] ...\n",
792bfe3f2eSlogwang 			PKTMBUF_POOL_NAME, num_mbufs);
802bfe3f2eSlogwang 	pktmbuf_pool = rte_pktmbuf_pool_create(PKTMBUF_POOL_NAME, num_mbufs,
812bfe3f2eSlogwang 		MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
822bfe3f2eSlogwang 
832bfe3f2eSlogwang 	return pktmbuf_pool == NULL; /* 0  on success */
842bfe3f2eSlogwang }
852bfe3f2eSlogwang 
862bfe3f2eSlogwang /**
872bfe3f2eSlogwang  * Initialise an individual port:
882bfe3f2eSlogwang  * - configure number of rx and tx rings
892bfe3f2eSlogwang  * - set up each rx ring, to pull from the main mbuf pool
902bfe3f2eSlogwang  * - set up each tx ring
912bfe3f2eSlogwang  * - start the port and report its status to stdout
922bfe3f2eSlogwang  */
932bfe3f2eSlogwang static int
942bfe3f2eSlogwang init_port(uint16_t port_num)
952bfe3f2eSlogwang {
962bfe3f2eSlogwang 	/* for port configuration all features are off by default */
97*d30ea906Sjfb8856606 	struct rte_eth_conf port_conf = {
982bfe3f2eSlogwang 		.rxmode = {
99*d30ea906Sjfb8856606 			.mq_mode = ETH_MQ_RX_RSS,
100*d30ea906Sjfb8856606 		},
1012bfe3f2eSlogwang 	};
1022bfe3f2eSlogwang 	const uint16_t rx_rings = 1, tx_rings = num_nodes;
1032bfe3f2eSlogwang 	uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT;
1042bfe3f2eSlogwang 	uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT;
105*d30ea906Sjfb8856606 	struct rte_eth_dev_info dev_info;
106*d30ea906Sjfb8856606 	struct rte_eth_txconf txconf;
1072bfe3f2eSlogwang 
1082bfe3f2eSlogwang 	uint16_t q;
1092bfe3f2eSlogwang 	int retval;
1102bfe3f2eSlogwang 
1112bfe3f2eSlogwang 	printf("Port %u init ... ", port_num);
1122bfe3f2eSlogwang 	fflush(stdout);
1132bfe3f2eSlogwang 
114*d30ea906Sjfb8856606 	rte_eth_dev_info_get(port_num, &dev_info);
115*d30ea906Sjfb8856606 	if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
116*d30ea906Sjfb8856606 		port_conf.txmode.offloads |=
117*d30ea906Sjfb8856606 			DEV_TX_OFFLOAD_MBUF_FAST_FREE;
118*d30ea906Sjfb8856606 
1192bfe3f2eSlogwang 	/*
1202bfe3f2eSlogwang 	 * Standard DPDK port initialisation - config port, then set up
1212bfe3f2eSlogwang 	 * rx and tx rings.
1222bfe3f2eSlogwang 	 */
1232bfe3f2eSlogwang 	retval = rte_eth_dev_configure(port_num, rx_rings, tx_rings, &port_conf);
1242bfe3f2eSlogwang 	if (retval != 0)
1252bfe3f2eSlogwang 		return retval;
1262bfe3f2eSlogwang 
1272bfe3f2eSlogwang 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port_num, &rx_ring_size,
1282bfe3f2eSlogwang 			&tx_ring_size);
1292bfe3f2eSlogwang 	if (retval != 0)
1302bfe3f2eSlogwang 		return retval;
1312bfe3f2eSlogwang 
1322bfe3f2eSlogwang 	for (q = 0; q < rx_rings; q++) {
1332bfe3f2eSlogwang 		retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size,
1342bfe3f2eSlogwang 				rte_eth_dev_socket_id(port_num),
1352bfe3f2eSlogwang 				NULL, pktmbuf_pool);
1362bfe3f2eSlogwang 		if (retval < 0)
1372bfe3f2eSlogwang 			return retval;
1382bfe3f2eSlogwang 	}
1392bfe3f2eSlogwang 
140*d30ea906Sjfb8856606 	txconf = dev_info.default_txconf;
141*d30ea906Sjfb8856606 	txconf.offloads = port_conf.txmode.offloads;
1422bfe3f2eSlogwang 	for (q = 0; q < tx_rings; q++) {
1432bfe3f2eSlogwang 		retval = rte_eth_tx_queue_setup(port_num, q, tx_ring_size,
1442bfe3f2eSlogwang 				rte_eth_dev_socket_id(port_num),
145*d30ea906Sjfb8856606 				&txconf);
1462bfe3f2eSlogwang 		if (retval < 0)
1472bfe3f2eSlogwang 			return retval;
1482bfe3f2eSlogwang 	}
1492bfe3f2eSlogwang 
1502bfe3f2eSlogwang 	rte_eth_promiscuous_enable(port_num);
1512bfe3f2eSlogwang 
1522bfe3f2eSlogwang 	retval = rte_eth_dev_start(port_num);
1532bfe3f2eSlogwang 	if (retval < 0)
1542bfe3f2eSlogwang 		return retval;
1552bfe3f2eSlogwang 
1562bfe3f2eSlogwang 	printf("done:\n");
1572bfe3f2eSlogwang 
1582bfe3f2eSlogwang 	return 0;
1592bfe3f2eSlogwang }
1602bfe3f2eSlogwang 
1612bfe3f2eSlogwang /**
1622bfe3f2eSlogwang  * Set up the DPDK rings which will be used to pass packets, via
1632bfe3f2eSlogwang  * pointers, between the multi-process server and node processes.
1642bfe3f2eSlogwang  * Each node needs one RX queue.
1652bfe3f2eSlogwang  */
1662bfe3f2eSlogwang static int
1672bfe3f2eSlogwang init_shm_rings(void)
1682bfe3f2eSlogwang {
1692bfe3f2eSlogwang 	unsigned int i;
1702bfe3f2eSlogwang 	unsigned int socket_id;
1712bfe3f2eSlogwang 	const char *q_name;
1722bfe3f2eSlogwang 	const unsigned int ringsize = NODE_QUEUE_RINGSIZE;
1732bfe3f2eSlogwang 
1742bfe3f2eSlogwang 	nodes = rte_malloc("node details",
1752bfe3f2eSlogwang 		sizeof(*nodes) * num_nodes, 0);
1762bfe3f2eSlogwang 	if (nodes == NULL)
1772bfe3f2eSlogwang 		rte_exit(EXIT_FAILURE, "Cannot allocate memory for "
1782bfe3f2eSlogwang 				"node program details\n");
1792bfe3f2eSlogwang 
1802bfe3f2eSlogwang 	for (i = 0; i < num_nodes; i++) {
1812bfe3f2eSlogwang 		/* Create an RX queue for each node */
1822bfe3f2eSlogwang 		socket_id = rte_socket_id();
1832bfe3f2eSlogwang 		q_name = get_rx_queue_name(i);
1842bfe3f2eSlogwang 		nodes[i].rx_q = rte_ring_create(q_name,
1852bfe3f2eSlogwang 				ringsize, socket_id,
1862bfe3f2eSlogwang 				RING_F_SP_ENQ | RING_F_SC_DEQ);
1872bfe3f2eSlogwang 		if (nodes[i].rx_q == NULL)
1882bfe3f2eSlogwang 			rte_exit(EXIT_FAILURE, "Cannot create rx ring queue "
1892bfe3f2eSlogwang 					"for node %u\n", i);
1902bfe3f2eSlogwang 	}
1912bfe3f2eSlogwang 	return 0;
1922bfe3f2eSlogwang }
1932bfe3f2eSlogwang 
1942bfe3f2eSlogwang /*
1952bfe3f2eSlogwang  * Create EFD table which will contain all the flows
1962bfe3f2eSlogwang  * that will be distributed among the nodes
1972bfe3f2eSlogwang  */
1982bfe3f2eSlogwang static void
1992bfe3f2eSlogwang create_efd_table(void)
2002bfe3f2eSlogwang {
2012bfe3f2eSlogwang 	uint8_t socket_id = rte_socket_id();
2022bfe3f2eSlogwang 
2032bfe3f2eSlogwang 	/* create table */
2042bfe3f2eSlogwang 	efd_table = rte_efd_create("flow table", num_flows * 2, sizeof(uint32_t),
2052bfe3f2eSlogwang 			1 << socket_id,	socket_id);
2062bfe3f2eSlogwang 
2072bfe3f2eSlogwang 	if (efd_table == NULL)
2082bfe3f2eSlogwang 		rte_exit(EXIT_FAILURE, "Problem creating the flow table\n");
2092bfe3f2eSlogwang }
2102bfe3f2eSlogwang 
2112bfe3f2eSlogwang static void
2122bfe3f2eSlogwang populate_efd_table(void)
2132bfe3f2eSlogwang {
2142bfe3f2eSlogwang 	unsigned int i;
2152bfe3f2eSlogwang 	int32_t ret;
2162bfe3f2eSlogwang 	uint32_t ip_dst;
2172bfe3f2eSlogwang 	uint8_t socket_id = rte_socket_id();
2182bfe3f2eSlogwang 	uint64_t node_id;
2192bfe3f2eSlogwang 
2202bfe3f2eSlogwang 	/* Add flows in table */
2212bfe3f2eSlogwang 	for (i = 0; i < num_flows; i++) {
2222bfe3f2eSlogwang 		node_id = i % num_nodes;
2232bfe3f2eSlogwang 
2242bfe3f2eSlogwang 		ip_dst = rte_cpu_to_be_32(i);
2252bfe3f2eSlogwang 		ret = rte_efd_update(efd_table, socket_id,
2262bfe3f2eSlogwang 				(void *)&ip_dst, (efd_value_t)node_id);
2272bfe3f2eSlogwang 		if (ret < 0)
2282bfe3f2eSlogwang 			rte_exit(EXIT_FAILURE, "Unable to add entry %u in "
2292bfe3f2eSlogwang 					"EFD table\n", i);
2302bfe3f2eSlogwang 	}
2312bfe3f2eSlogwang 
2322bfe3f2eSlogwang 	printf("EFD table: Adding 0x%x keys\n", num_flows);
2332bfe3f2eSlogwang }
2342bfe3f2eSlogwang 
2352bfe3f2eSlogwang /* Check the link status of all ports in up to 9s, and print them finally */
2362bfe3f2eSlogwang static void
2372bfe3f2eSlogwang check_all_ports_link_status(uint16_t port_num, uint32_t port_mask)
2382bfe3f2eSlogwang {
2392bfe3f2eSlogwang #define CHECK_INTERVAL 100 /* 100ms */
2402bfe3f2eSlogwang #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
2412bfe3f2eSlogwang 	uint8_t count, all_ports_up, print_flag = 0;
2422bfe3f2eSlogwang 	uint16_t portid;
2432bfe3f2eSlogwang 	struct rte_eth_link link;
2442bfe3f2eSlogwang 
2452bfe3f2eSlogwang 	printf("\nChecking link status");
2462bfe3f2eSlogwang 	fflush(stdout);
2472bfe3f2eSlogwang 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
2482bfe3f2eSlogwang 		all_ports_up = 1;
2492bfe3f2eSlogwang 		for (portid = 0; portid < port_num; portid++) {
2502bfe3f2eSlogwang 			if ((port_mask & (1 << info->id[portid])) == 0)
2512bfe3f2eSlogwang 				continue;
2522bfe3f2eSlogwang 			memset(&link, 0, sizeof(link));
2532bfe3f2eSlogwang 			rte_eth_link_get_nowait(info->id[portid], &link);
2542bfe3f2eSlogwang 			/* print link status if flag set */
2552bfe3f2eSlogwang 			if (print_flag == 1) {
2562bfe3f2eSlogwang 				if (link.link_status)
2572bfe3f2eSlogwang 					printf(
2582bfe3f2eSlogwang 					"Port%d Link Up. Speed %u Mbps - %s\n",
2592bfe3f2eSlogwang 						info->id[portid],
2602bfe3f2eSlogwang 						link.link_speed,
2612bfe3f2eSlogwang 				(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
2622bfe3f2eSlogwang 					("full-duplex") : ("half-duplex\n"));
2632bfe3f2eSlogwang 				else
2642bfe3f2eSlogwang 					printf("Port %d Link Down\n",
2652bfe3f2eSlogwang 						info->id[portid]);
2662bfe3f2eSlogwang 				continue;
2672bfe3f2eSlogwang 			}
2682bfe3f2eSlogwang 			/* clear all_ports_up flag if any link down */
2692bfe3f2eSlogwang 			if (link.link_status == ETH_LINK_DOWN) {
2702bfe3f2eSlogwang 				all_ports_up = 0;
2712bfe3f2eSlogwang 				break;
2722bfe3f2eSlogwang 			}
2732bfe3f2eSlogwang 		}
2742bfe3f2eSlogwang 		/* after finally printing all link status, get out */
2752bfe3f2eSlogwang 		if (print_flag == 1)
2762bfe3f2eSlogwang 			break;
2772bfe3f2eSlogwang 
2782bfe3f2eSlogwang 		if (all_ports_up == 0) {
2792bfe3f2eSlogwang 			printf(".");
2802bfe3f2eSlogwang 			fflush(stdout);
2812bfe3f2eSlogwang 			rte_delay_ms(CHECK_INTERVAL);
2822bfe3f2eSlogwang 		}
2832bfe3f2eSlogwang 
2842bfe3f2eSlogwang 		/* set the print_flag if all ports up or timeout */
2852bfe3f2eSlogwang 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
2862bfe3f2eSlogwang 			print_flag = 1;
2872bfe3f2eSlogwang 			printf("done\n");
2882bfe3f2eSlogwang 		}
2892bfe3f2eSlogwang 	}
2902bfe3f2eSlogwang }
2912bfe3f2eSlogwang 
2922bfe3f2eSlogwang /**
2932bfe3f2eSlogwang  * Main init function for the multi-process server app,
2942bfe3f2eSlogwang  * calls subfunctions to do each stage of the initialisation.
2952bfe3f2eSlogwang  */
2962bfe3f2eSlogwang int
2972bfe3f2eSlogwang init(int argc, char *argv[])
2982bfe3f2eSlogwang {
2992bfe3f2eSlogwang 	int retval;
3002bfe3f2eSlogwang 	const struct rte_memzone *mz;
3012bfe3f2eSlogwang 	uint8_t i, total_ports;
3022bfe3f2eSlogwang 
3032bfe3f2eSlogwang 	/* init EAL, parsing EAL args */
3042bfe3f2eSlogwang 	retval = rte_eal_init(argc, argv);
3052bfe3f2eSlogwang 	if (retval < 0)
3062bfe3f2eSlogwang 		return -1;
3072bfe3f2eSlogwang 	argc -= retval;
3082bfe3f2eSlogwang 	argv += retval;
3092bfe3f2eSlogwang 
3102bfe3f2eSlogwang 	/* get total number of ports */
311*d30ea906Sjfb8856606 	total_ports = rte_eth_dev_count_avail();
3122bfe3f2eSlogwang 
3132bfe3f2eSlogwang 	/* set up array for port data */
3142bfe3f2eSlogwang 	mz = rte_memzone_reserve(MZ_SHARED_INFO, sizeof(*info),
3152bfe3f2eSlogwang 				rte_socket_id(), NO_FLAGS);
3162bfe3f2eSlogwang 	if (mz == NULL)
3172bfe3f2eSlogwang 		rte_exit(EXIT_FAILURE, "Cannot reserve memory zone "
3182bfe3f2eSlogwang 				"for port information\n");
3192bfe3f2eSlogwang 	memset(mz->addr, 0, sizeof(*info));
3202bfe3f2eSlogwang 	info = mz->addr;
3212bfe3f2eSlogwang 
3222bfe3f2eSlogwang 	/* parse additional, application arguments */
3232bfe3f2eSlogwang 	retval = parse_app_args(total_ports, argc, argv);
3242bfe3f2eSlogwang 	if (retval != 0)
3252bfe3f2eSlogwang 		return -1;
3262bfe3f2eSlogwang 
3272bfe3f2eSlogwang 	/* initialise mbuf pools */
3282bfe3f2eSlogwang 	retval = init_mbuf_pools();
3292bfe3f2eSlogwang 	if (retval != 0)
3302bfe3f2eSlogwang 		rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n");
3312bfe3f2eSlogwang 
3322bfe3f2eSlogwang 	/* now initialise the ports we will use */
3332bfe3f2eSlogwang 	for (i = 0; i < info->num_ports; i++) {
3342bfe3f2eSlogwang 		retval = init_port(info->id[i]);
3352bfe3f2eSlogwang 		if (retval != 0)
3362bfe3f2eSlogwang 			rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n",
3372bfe3f2eSlogwang 					(unsigned int) i);
3382bfe3f2eSlogwang 	}
3392bfe3f2eSlogwang 
3402bfe3f2eSlogwang 	check_all_ports_link_status(info->num_ports, (~0x0));
3412bfe3f2eSlogwang 
3422bfe3f2eSlogwang 	/* initialise the node queues/rings for inter-eu comms */
3432bfe3f2eSlogwang 	init_shm_rings();
3442bfe3f2eSlogwang 
3452bfe3f2eSlogwang 	/* Create the EFD table */
3462bfe3f2eSlogwang 	create_efd_table();
3472bfe3f2eSlogwang 
3482bfe3f2eSlogwang 	/* Populate the EFD table */
3492bfe3f2eSlogwang 	populate_efd_table();
3502bfe3f2eSlogwang 
3512bfe3f2eSlogwang 	/* Share the total number of nodes */
3522bfe3f2eSlogwang 	info->num_nodes = num_nodes;
3532bfe3f2eSlogwang 
3542bfe3f2eSlogwang 	/* Share the total number of flows */
3552bfe3f2eSlogwang 	info->num_flows = num_flows;
3562bfe3f2eSlogwang 	return 0;
3572bfe3f2eSlogwang }
358