1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <string.h>
37 #include <sys/queue.h>
38 #include <errno.h>
39 #include <stdarg.h>
40 #include <inttypes.h>
41 
42 #include <rte_common.h>
43 #include <rte_memory.h>
44 #include <rte_memzone.h>
45 #include <rte_eal.h>
46 #include <rte_byteorder.h>
47 #include <rte_atomic.h>
48 #include <rte_launch.h>
49 #include <rte_per_lcore.h>
50 #include <rte_lcore.h>
51 #include <rte_branch_prediction.h>
52 #include <rte_debug.h>
53 #include <rte_ring.h>
54 #include <rte_log.h>
55 #include <rte_mempool.h>
56 #include <rte_memcpy.h>
57 #include <rte_mbuf.h>
58 #include <rte_interrupts.h>
59 #include <rte_ether.h>
60 #include <rte_ethdev.h>
61 #include <rte_malloc.h>
62 #include <rte_string_fns.h>
63 #include <rte_cycles.h>
64 #include <rte_efd.h>
65 #include <rte_hash.h>
66 
67 #include "common.h"
68 #include "args.h"
69 #include "init.h"
70 
71 #define MBUFS_PER_NODE 1536
72 #define MBUFS_PER_PORT 1536
73 #define MBUF_CACHE_SIZE 512
74 
75 #define RTE_MP_RX_DESC_DEFAULT 512
76 #define RTE_MP_TX_DESC_DEFAULT 512
77 #define NODE_QUEUE_RINGSIZE 128
78 
79 #define NO_FLAGS 0
80 
81 /* The mbuf pool for packet rx */
82 struct rte_mempool *pktmbuf_pool;
83 
84 /* array of info/queues for nodes */
85 struct node *nodes;
86 
87 /* EFD table */
88 struct rte_efd_table *efd_table;
89 
90 /* Shared info between server and nodes */
91 struct shared_info *info;
92 
93 /**
94  * Initialise the mbuf pool for packet reception for the NIC, and any other
95  * buffer pools needed by the app - currently none.
96  */
97 static int
98 init_mbuf_pools(void)
99 {
100 	const unsigned int num_mbufs = (num_nodes * MBUFS_PER_NODE) +
101 			(info->num_ports * MBUFS_PER_PORT);
102 
103 	/*
104 	 * Don't pass single-producer/single-consumer flags to mbuf create as it
105 	 * seems faster to use a cache instead
106 	 */
107 	printf("Creating mbuf pool '%s' [%u mbufs] ...\n",
108 			PKTMBUF_POOL_NAME, num_mbufs);
109 	pktmbuf_pool = rte_pktmbuf_pool_create(PKTMBUF_POOL_NAME, num_mbufs,
110 		MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
111 
112 	return pktmbuf_pool == NULL; /* 0  on success */
113 }
114 
115 /**
116  * Initialise an individual port:
117  * - configure number of rx and tx rings
118  * - set up each rx ring, to pull from the main mbuf pool
119  * - set up each tx ring
120  * - start the port and report its status to stdout
121  */
122 static int
123 init_port(uint16_t port_num)
124 {
125 	/* for port configuration all features are off by default */
126 	const struct rte_eth_conf port_conf = {
127 		.rxmode = {
128 			.mq_mode = ETH_MQ_RX_RSS
129 		}
130 	};
131 	const uint16_t rx_rings = 1, tx_rings = num_nodes;
132 	uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT;
133 	uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT;
134 
135 	uint16_t q;
136 	int retval;
137 
138 	printf("Port %u init ... ", port_num);
139 	fflush(stdout);
140 
141 	/*
142 	 * Standard DPDK port initialisation - config port, then set up
143 	 * rx and tx rings.
144 	 */
145 	retval = rte_eth_dev_configure(port_num, rx_rings, tx_rings, &port_conf);
146 	if (retval != 0)
147 		return retval;
148 
149 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port_num, &rx_ring_size,
150 			&tx_ring_size);
151 	if (retval != 0)
152 		return retval;
153 
154 	for (q = 0; q < rx_rings; q++) {
155 		retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size,
156 				rte_eth_dev_socket_id(port_num),
157 				NULL, pktmbuf_pool);
158 		if (retval < 0)
159 			return retval;
160 	}
161 
162 	for (q = 0; q < tx_rings; q++) {
163 		retval = rte_eth_tx_queue_setup(port_num, q, tx_ring_size,
164 				rte_eth_dev_socket_id(port_num),
165 				NULL);
166 		if (retval < 0)
167 			return retval;
168 	}
169 
170 	rte_eth_promiscuous_enable(port_num);
171 
172 	retval = rte_eth_dev_start(port_num);
173 	if (retval < 0)
174 		return retval;
175 
176 	printf("done:\n");
177 
178 	return 0;
179 }
180 
181 /**
182  * Set up the DPDK rings which will be used to pass packets, via
183  * pointers, between the multi-process server and node processes.
184  * Each node needs one RX queue.
185  */
186 static int
187 init_shm_rings(void)
188 {
189 	unsigned int i;
190 	unsigned int socket_id;
191 	const char *q_name;
192 	const unsigned int ringsize = NODE_QUEUE_RINGSIZE;
193 
194 	nodes = rte_malloc("node details",
195 		sizeof(*nodes) * num_nodes, 0);
196 	if (nodes == NULL)
197 		rte_exit(EXIT_FAILURE, "Cannot allocate memory for "
198 				"node program details\n");
199 
200 	for (i = 0; i < num_nodes; i++) {
201 		/* Create an RX queue for each node */
202 		socket_id = rte_socket_id();
203 		q_name = get_rx_queue_name(i);
204 		nodes[i].rx_q = rte_ring_create(q_name,
205 				ringsize, socket_id,
206 				RING_F_SP_ENQ | RING_F_SC_DEQ);
207 		if (nodes[i].rx_q == NULL)
208 			rte_exit(EXIT_FAILURE, "Cannot create rx ring queue "
209 					"for node %u\n", i);
210 	}
211 	return 0;
212 }
213 
214 /*
215  * Create EFD table which will contain all the flows
216  * that will be distributed among the nodes
217  */
218 static void
219 create_efd_table(void)
220 {
221 	uint8_t socket_id = rte_socket_id();
222 
223 	/* create table */
224 	efd_table = rte_efd_create("flow table", num_flows * 2, sizeof(uint32_t),
225 			1 << socket_id,	socket_id);
226 
227 	if (efd_table == NULL)
228 		rte_exit(EXIT_FAILURE, "Problem creating the flow table\n");
229 }
230 
231 static void
232 populate_efd_table(void)
233 {
234 	unsigned int i;
235 	int32_t ret;
236 	uint32_t ip_dst;
237 	uint8_t socket_id = rte_socket_id();
238 	uint64_t node_id;
239 
240 	/* Add flows in table */
241 	for (i = 0; i < num_flows; i++) {
242 		node_id = i % num_nodes;
243 
244 		ip_dst = rte_cpu_to_be_32(i);
245 		ret = rte_efd_update(efd_table, socket_id,
246 				(void *)&ip_dst, (efd_value_t)node_id);
247 		if (ret < 0)
248 			rte_exit(EXIT_FAILURE, "Unable to add entry %u in "
249 					"EFD table\n", i);
250 	}
251 
252 	printf("EFD table: Adding 0x%x keys\n", num_flows);
253 }
254 
255 /* Check the link status of all ports in up to 9s, and print them finally */
256 static void
257 check_all_ports_link_status(uint16_t port_num, uint32_t port_mask)
258 {
259 #define CHECK_INTERVAL 100 /* 100ms */
260 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
261 	uint8_t count, all_ports_up, print_flag = 0;
262 	uint16_t portid;
263 	struct rte_eth_link link;
264 
265 	printf("\nChecking link status");
266 	fflush(stdout);
267 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
268 		all_ports_up = 1;
269 		for (portid = 0; portid < port_num; portid++) {
270 			if ((port_mask & (1 << info->id[portid])) == 0)
271 				continue;
272 			memset(&link, 0, sizeof(link));
273 			rte_eth_link_get_nowait(info->id[portid], &link);
274 			/* print link status if flag set */
275 			if (print_flag == 1) {
276 				if (link.link_status)
277 					printf(
278 					"Port%d Link Up. Speed %u Mbps - %s\n",
279 						info->id[portid],
280 						link.link_speed,
281 				(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
282 					("full-duplex") : ("half-duplex\n"));
283 				else
284 					printf("Port %d Link Down\n",
285 						info->id[portid]);
286 				continue;
287 			}
288 			/* clear all_ports_up flag if any link down */
289 			if (link.link_status == ETH_LINK_DOWN) {
290 				all_ports_up = 0;
291 				break;
292 			}
293 		}
294 		/* after finally printing all link status, get out */
295 		if (print_flag == 1)
296 			break;
297 
298 		if (all_ports_up == 0) {
299 			printf(".");
300 			fflush(stdout);
301 			rte_delay_ms(CHECK_INTERVAL);
302 		}
303 
304 		/* set the print_flag if all ports up or timeout */
305 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
306 			print_flag = 1;
307 			printf("done\n");
308 		}
309 	}
310 }
311 
312 /**
313  * Main init function for the multi-process server app,
314  * calls subfunctions to do each stage of the initialisation.
315  */
316 int
317 init(int argc, char *argv[])
318 {
319 	int retval;
320 	const struct rte_memzone *mz;
321 	uint8_t i, total_ports;
322 
323 	/* init EAL, parsing EAL args */
324 	retval = rte_eal_init(argc, argv);
325 	if (retval < 0)
326 		return -1;
327 	argc -= retval;
328 	argv += retval;
329 
330 	/* get total number of ports */
331 	total_ports = rte_eth_dev_count();
332 
333 	/* set up array for port data */
334 	mz = rte_memzone_reserve(MZ_SHARED_INFO, sizeof(*info),
335 				rte_socket_id(), NO_FLAGS);
336 	if (mz == NULL)
337 		rte_exit(EXIT_FAILURE, "Cannot reserve memory zone "
338 				"for port information\n");
339 	memset(mz->addr, 0, sizeof(*info));
340 	info = mz->addr;
341 
342 	/* parse additional, application arguments */
343 	retval = parse_app_args(total_ports, argc, argv);
344 	if (retval != 0)
345 		return -1;
346 
347 	/* initialise mbuf pools */
348 	retval = init_mbuf_pools();
349 	if (retval != 0)
350 		rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n");
351 
352 	/* now initialise the ports we will use */
353 	for (i = 0; i < info->num_ports; i++) {
354 		retval = init_port(info->id[i]);
355 		if (retval != 0)
356 			rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n",
357 					(unsigned int) i);
358 	}
359 
360 	check_all_ports_link_status(info->num_ports, (~0x0));
361 
362 	/* initialise the node queues/rings for inter-eu comms */
363 	init_shm_rings();
364 
365 	/* Create the EFD table */
366 	create_efd_table();
367 
368 	/* Populate the EFD table */
369 	populate_efd_table();
370 
371 	/* Share the total number of nodes */
372 	info->num_nodes = num_nodes;
373 
374 	/* Share the total number of flows */
375 	info->num_flows = num_flows;
376 	return 0;
377 }
378