xref: /f-stack/dpdk/app/test/test_ring_perf.c (revision 2d9fd380)
14418919fSjohnjiang /* SPDX-License-Identifier: BSD-3-Clause
24418919fSjohnjiang  * Copyright(c) 2010-2014 Intel Corporation
34418919fSjohnjiang  * Copyright(c) 2019 Arm Limited
44418919fSjohnjiang  */
54418919fSjohnjiang 
64418919fSjohnjiang 
74418919fSjohnjiang #include <stdio.h>
84418919fSjohnjiang #include <inttypes.h>
94418919fSjohnjiang #include <rte_ring.h>
104418919fSjohnjiang #include <rte_cycles.h>
114418919fSjohnjiang #include <rte_launch.h>
124418919fSjohnjiang #include <rte_pause.h>
134418919fSjohnjiang #include <string.h>
144418919fSjohnjiang 
154418919fSjohnjiang #include "test.h"
16*2d9fd380Sjfb8856606 #include "test_ring.h"
174418919fSjohnjiang 
184418919fSjohnjiang /*
19*2d9fd380Sjfb8856606  * Ring performance test cases, measures performance of various operations
20*2d9fd380Sjfb8856606  * using rdtsc for legacy and 16B size ring elements.
214418919fSjohnjiang  */
224418919fSjohnjiang 
234418919fSjohnjiang #define RING_NAME "RING_PERF"
244418919fSjohnjiang #define RING_SIZE 4096
254418919fSjohnjiang #define MAX_BURST 32
264418919fSjohnjiang 
274418919fSjohnjiang /*
284418919fSjohnjiang  * the sizes to enqueue and dequeue in testing
294418919fSjohnjiang  * (marked volatile so they won't be seen as compile-time constants)
304418919fSjohnjiang  */
314418919fSjohnjiang static const volatile unsigned bulk_sizes[] = { 8, 32 };
324418919fSjohnjiang 
334418919fSjohnjiang struct lcore_pair {
344418919fSjohnjiang 	unsigned c1, c2;
354418919fSjohnjiang };
364418919fSjohnjiang 
374418919fSjohnjiang static volatile unsigned lcore_count = 0;
384418919fSjohnjiang 
39*2d9fd380Sjfb8856606 static void
test_ring_print_test_string(unsigned int api_type,int esize,unsigned int bsz,double value)40*2d9fd380Sjfb8856606 test_ring_print_test_string(unsigned int api_type, int esize,
41*2d9fd380Sjfb8856606 	unsigned int bsz, double value)
42*2d9fd380Sjfb8856606 {
43*2d9fd380Sjfb8856606 	if (esize == -1)
44*2d9fd380Sjfb8856606 		printf("legacy APIs");
45*2d9fd380Sjfb8856606 	else
46*2d9fd380Sjfb8856606 		printf("elem APIs: element size %dB", esize);
47*2d9fd380Sjfb8856606 
48*2d9fd380Sjfb8856606 	if (api_type == TEST_RING_IGNORE_API_TYPE)
49*2d9fd380Sjfb8856606 		return;
50*2d9fd380Sjfb8856606 
51*2d9fd380Sjfb8856606 	if ((api_type & TEST_RING_THREAD_DEF) == TEST_RING_THREAD_DEF)
52*2d9fd380Sjfb8856606 		printf(": default enqueue/dequeue: ");
53*2d9fd380Sjfb8856606 	else if ((api_type & TEST_RING_THREAD_SPSC) == TEST_RING_THREAD_SPSC)
54*2d9fd380Sjfb8856606 		printf(": SP/SC: ");
55*2d9fd380Sjfb8856606 	else if ((api_type & TEST_RING_THREAD_MPMC) == TEST_RING_THREAD_MPMC)
56*2d9fd380Sjfb8856606 		printf(": MP/MC: ");
57*2d9fd380Sjfb8856606 
58*2d9fd380Sjfb8856606 	if ((api_type & TEST_RING_ELEM_SINGLE) == TEST_RING_ELEM_SINGLE)
59*2d9fd380Sjfb8856606 		printf("single: ");
60*2d9fd380Sjfb8856606 	else if ((api_type & TEST_RING_ELEM_BULK) == TEST_RING_ELEM_BULK)
61*2d9fd380Sjfb8856606 		printf("bulk (size: %u): ", bsz);
62*2d9fd380Sjfb8856606 	else if ((api_type & TEST_RING_ELEM_BURST) == TEST_RING_ELEM_BURST)
63*2d9fd380Sjfb8856606 		printf("burst (size: %u): ", bsz);
64*2d9fd380Sjfb8856606 
65*2d9fd380Sjfb8856606 	printf("%.2F\n", value);
66*2d9fd380Sjfb8856606 }
67*2d9fd380Sjfb8856606 
684418919fSjohnjiang /**** Functions to analyse our core mask to get cores for different tests ***/
694418919fSjohnjiang 
704418919fSjohnjiang static int
get_two_hyperthreads(struct lcore_pair * lcp)714418919fSjohnjiang get_two_hyperthreads(struct lcore_pair *lcp)
724418919fSjohnjiang {
734418919fSjohnjiang 	unsigned id1, id2;
744418919fSjohnjiang 	unsigned c1, c2, s1, s2;
754418919fSjohnjiang 	RTE_LCORE_FOREACH(id1) {
764418919fSjohnjiang 		/* inner loop just re-reads all id's. We could skip the first few
774418919fSjohnjiang 		 * elements, but since number of cores is small there is little point
784418919fSjohnjiang 		 */
794418919fSjohnjiang 		RTE_LCORE_FOREACH(id2) {
804418919fSjohnjiang 			if (id1 == id2)
814418919fSjohnjiang 				continue;
824418919fSjohnjiang 
834418919fSjohnjiang 			c1 = rte_lcore_to_cpu_id(id1);
844418919fSjohnjiang 			c2 = rte_lcore_to_cpu_id(id2);
854418919fSjohnjiang 			s1 = rte_lcore_to_socket_id(id1);
864418919fSjohnjiang 			s2 = rte_lcore_to_socket_id(id2);
874418919fSjohnjiang 			if ((c1 == c2) && (s1 == s2)){
884418919fSjohnjiang 				lcp->c1 = id1;
894418919fSjohnjiang 				lcp->c2 = id2;
904418919fSjohnjiang 				return 0;
914418919fSjohnjiang 			}
924418919fSjohnjiang 		}
934418919fSjohnjiang 	}
944418919fSjohnjiang 	return 1;
954418919fSjohnjiang }
964418919fSjohnjiang 
974418919fSjohnjiang static int
get_two_cores(struct lcore_pair * lcp)984418919fSjohnjiang get_two_cores(struct lcore_pair *lcp)
994418919fSjohnjiang {
1004418919fSjohnjiang 	unsigned id1, id2;
1014418919fSjohnjiang 	unsigned c1, c2, s1, s2;
1024418919fSjohnjiang 	RTE_LCORE_FOREACH(id1) {
1034418919fSjohnjiang 		RTE_LCORE_FOREACH(id2) {
1044418919fSjohnjiang 			if (id1 == id2)
1054418919fSjohnjiang 				continue;
1064418919fSjohnjiang 
1074418919fSjohnjiang 			c1 = rte_lcore_to_cpu_id(id1);
1084418919fSjohnjiang 			c2 = rte_lcore_to_cpu_id(id2);
1094418919fSjohnjiang 			s1 = rte_lcore_to_socket_id(id1);
1104418919fSjohnjiang 			s2 = rte_lcore_to_socket_id(id2);
1114418919fSjohnjiang 			if ((c1 != c2) && (s1 == s2)){
1124418919fSjohnjiang 				lcp->c1 = id1;
1134418919fSjohnjiang 				lcp->c2 = id2;
1144418919fSjohnjiang 				return 0;
1154418919fSjohnjiang 			}
1164418919fSjohnjiang 		}
1174418919fSjohnjiang 	}
1184418919fSjohnjiang 	return 1;
1194418919fSjohnjiang }
1204418919fSjohnjiang 
1214418919fSjohnjiang static int
get_two_sockets(struct lcore_pair * lcp)1224418919fSjohnjiang get_two_sockets(struct lcore_pair *lcp)
1234418919fSjohnjiang {
1244418919fSjohnjiang 	unsigned id1, id2;
1254418919fSjohnjiang 	unsigned s1, s2;
1264418919fSjohnjiang 	RTE_LCORE_FOREACH(id1) {
1274418919fSjohnjiang 		RTE_LCORE_FOREACH(id2) {
1284418919fSjohnjiang 			if (id1 == id2)
1294418919fSjohnjiang 				continue;
1304418919fSjohnjiang 			s1 = rte_lcore_to_socket_id(id1);
1314418919fSjohnjiang 			s2 = rte_lcore_to_socket_id(id2);
1324418919fSjohnjiang 			if (s1 != s2){
1334418919fSjohnjiang 				lcp->c1 = id1;
1344418919fSjohnjiang 				lcp->c2 = id2;
1354418919fSjohnjiang 				return 0;
1364418919fSjohnjiang 			}
1374418919fSjohnjiang 		}
1384418919fSjohnjiang 	}
1394418919fSjohnjiang 	return 1;
1404418919fSjohnjiang }
1414418919fSjohnjiang 
1424418919fSjohnjiang /* Get cycle counts for dequeuing from an empty ring. Should be 2 or 3 cycles */
1434418919fSjohnjiang static void
test_empty_dequeue(struct rte_ring * r,const int esize,const unsigned int api_type)144*2d9fd380Sjfb8856606 test_empty_dequeue(struct rte_ring *r, const int esize,
145*2d9fd380Sjfb8856606 			const unsigned int api_type)
1464418919fSjohnjiang {
147*2d9fd380Sjfb8856606 	const unsigned int iter_shift = 26;
148*2d9fd380Sjfb8856606 	const unsigned int iterations = 1 << iter_shift;
149*2d9fd380Sjfb8856606 	unsigned int i = 0;
1504418919fSjohnjiang 	void *burst[MAX_BURST];
1514418919fSjohnjiang 
152*2d9fd380Sjfb8856606 	const uint64_t start = rte_rdtsc();
1534418919fSjohnjiang 	for (i = 0; i < iterations; i++)
154*2d9fd380Sjfb8856606 		test_ring_dequeue(r, burst, esize, bulk_sizes[0], api_type);
155*2d9fd380Sjfb8856606 	const uint64_t end = rte_rdtsc();
1564418919fSjohnjiang 
157*2d9fd380Sjfb8856606 	test_ring_print_test_string(api_type, esize, bulk_sizes[0],
158*2d9fd380Sjfb8856606 					((double)(end - start)) / iterations);
1594418919fSjohnjiang }
1604418919fSjohnjiang 
1614418919fSjohnjiang /*
1624418919fSjohnjiang  * for the separate enqueue and dequeue threads they take in one param
1634418919fSjohnjiang  * and return two. Input = burst size, output = cycle average for sp/sc & mp/mc
1644418919fSjohnjiang  */
1654418919fSjohnjiang struct thread_params {
1664418919fSjohnjiang 	struct rte_ring *r;
1674418919fSjohnjiang 	unsigned size;        /* input value, the burst size */
1684418919fSjohnjiang 	double spsc, mpmc;    /* output value, the single or multi timings */
1694418919fSjohnjiang };
1704418919fSjohnjiang 
1714418919fSjohnjiang /*
172*2d9fd380Sjfb8856606  * Helper function to call bulk SP/MP enqueue functions.
173*2d9fd380Sjfb8856606  * flag == 0 -> enqueue
174*2d9fd380Sjfb8856606  * flag == 1 -> dequeue
1754418919fSjohnjiang  */
176*2d9fd380Sjfb8856606 static __rte_always_inline int
enqueue_dequeue_bulk_helper(const unsigned int flag,const int esize,struct thread_params * p)177*2d9fd380Sjfb8856606 enqueue_dequeue_bulk_helper(const unsigned int flag, const int esize,
178*2d9fd380Sjfb8856606 	struct thread_params *p)
1794418919fSjohnjiang {
180*2d9fd380Sjfb8856606 	int ret;
181*2d9fd380Sjfb8856606 	const unsigned int iter_shift = 23;
182*2d9fd380Sjfb8856606 	const unsigned int iterations = 1 << iter_shift;
183*2d9fd380Sjfb8856606 	struct rte_ring *r = p->r;
184*2d9fd380Sjfb8856606 	unsigned int bsize = p->size;
185*2d9fd380Sjfb8856606 	unsigned int i;
186*2d9fd380Sjfb8856606 	void *burst = NULL;
1874418919fSjohnjiang 
1884418919fSjohnjiang #ifdef RTE_USE_C11_MEM_MODEL
1894418919fSjohnjiang 	if (__atomic_add_fetch(&lcore_count, 1, __ATOMIC_RELAXED) != 2)
1904418919fSjohnjiang #else
1914418919fSjohnjiang 	if (__sync_add_and_fetch(&lcore_count, 1) != 2)
1924418919fSjohnjiang #endif
1934418919fSjohnjiang 		while(lcore_count != 2)
1944418919fSjohnjiang 			rte_pause();
1954418919fSjohnjiang 
196*2d9fd380Sjfb8856606 	burst = test_ring_calloc(MAX_BURST, esize);
197*2d9fd380Sjfb8856606 	if (burst == NULL)
198*2d9fd380Sjfb8856606 		return -1;
199*2d9fd380Sjfb8856606 
2004418919fSjohnjiang 	const uint64_t sp_start = rte_rdtsc();
2014418919fSjohnjiang 	for (i = 0; i < iterations; i++)
202*2d9fd380Sjfb8856606 		do {
203*2d9fd380Sjfb8856606 			if (flag == 0)
204*2d9fd380Sjfb8856606 				ret = test_ring_enqueue(r, burst, esize, bsize,
205*2d9fd380Sjfb8856606 						TEST_RING_THREAD_SPSC |
206*2d9fd380Sjfb8856606 						TEST_RING_ELEM_BULK);
207*2d9fd380Sjfb8856606 			else if (flag == 1)
208*2d9fd380Sjfb8856606 				ret = test_ring_dequeue(r, burst, esize, bsize,
209*2d9fd380Sjfb8856606 						TEST_RING_THREAD_SPSC |
210*2d9fd380Sjfb8856606 						TEST_RING_ELEM_BULK);
211*2d9fd380Sjfb8856606 			if (ret == 0)
2124418919fSjohnjiang 				rte_pause();
213*2d9fd380Sjfb8856606 		} while (!ret);
2144418919fSjohnjiang 	const uint64_t sp_end = rte_rdtsc();
2154418919fSjohnjiang 
2164418919fSjohnjiang 	const uint64_t mp_start = rte_rdtsc();
2174418919fSjohnjiang 	for (i = 0; i < iterations; i++)
218*2d9fd380Sjfb8856606 		do {
219*2d9fd380Sjfb8856606 			if (flag == 0)
220*2d9fd380Sjfb8856606 				ret = test_ring_enqueue(r, burst, esize, bsize,
221*2d9fd380Sjfb8856606 						TEST_RING_THREAD_MPMC |
222*2d9fd380Sjfb8856606 						TEST_RING_ELEM_BULK);
223*2d9fd380Sjfb8856606 			else if (flag == 1)
224*2d9fd380Sjfb8856606 				ret = test_ring_dequeue(r, burst, esize, bsize,
225*2d9fd380Sjfb8856606 						TEST_RING_THREAD_MPMC |
226*2d9fd380Sjfb8856606 						TEST_RING_ELEM_BULK);
227*2d9fd380Sjfb8856606 			if (ret == 0)
2284418919fSjohnjiang 				rte_pause();
229*2d9fd380Sjfb8856606 		} while (!ret);
2304418919fSjohnjiang 	const uint64_t mp_end = rte_rdtsc();
2314418919fSjohnjiang 
232*2d9fd380Sjfb8856606 	p->spsc = ((double)(sp_end - sp_start))/(iterations * bsize);
233*2d9fd380Sjfb8856606 	p->mpmc = ((double)(mp_end - mp_start))/(iterations * bsize);
2344418919fSjohnjiang 	return 0;
2354418919fSjohnjiang }
2364418919fSjohnjiang 
2374418919fSjohnjiang /*
238*2d9fd380Sjfb8856606  * Function that uses rdtsc to measure timing for ring enqueue. Needs pair
239*2d9fd380Sjfb8856606  * thread running dequeue_bulk function
240*2d9fd380Sjfb8856606  */
241*2d9fd380Sjfb8856606 static int
enqueue_bulk(void * p)242*2d9fd380Sjfb8856606 enqueue_bulk(void *p)
243*2d9fd380Sjfb8856606 {
244*2d9fd380Sjfb8856606 	struct thread_params *params = p;
245*2d9fd380Sjfb8856606 
246*2d9fd380Sjfb8856606 	return enqueue_dequeue_bulk_helper(0, -1, params);
247*2d9fd380Sjfb8856606 }
248*2d9fd380Sjfb8856606 
249*2d9fd380Sjfb8856606 static int
enqueue_bulk_16B(void * p)250*2d9fd380Sjfb8856606 enqueue_bulk_16B(void *p)
251*2d9fd380Sjfb8856606 {
252*2d9fd380Sjfb8856606 	struct thread_params *params = p;
253*2d9fd380Sjfb8856606 
254*2d9fd380Sjfb8856606 	return enqueue_dequeue_bulk_helper(0, 16, params);
255*2d9fd380Sjfb8856606 }
256*2d9fd380Sjfb8856606 
257*2d9fd380Sjfb8856606 /*
2584418919fSjohnjiang  * Function that uses rdtsc to measure timing for ring dequeue. Needs pair
2594418919fSjohnjiang  * thread running enqueue_bulk function
2604418919fSjohnjiang  */
2614418919fSjohnjiang static int
dequeue_bulk(void * p)2624418919fSjohnjiang dequeue_bulk(void *p)
2634418919fSjohnjiang {
2644418919fSjohnjiang 	struct thread_params *params = p;
2654418919fSjohnjiang 
266*2d9fd380Sjfb8856606 	return enqueue_dequeue_bulk_helper(1, -1, params);
267*2d9fd380Sjfb8856606 }
2684418919fSjohnjiang 
269*2d9fd380Sjfb8856606 static int
dequeue_bulk_16B(void * p)270*2d9fd380Sjfb8856606 dequeue_bulk_16B(void *p)
271*2d9fd380Sjfb8856606 {
272*2d9fd380Sjfb8856606 	struct thread_params *params = p;
2734418919fSjohnjiang 
274*2d9fd380Sjfb8856606 	return enqueue_dequeue_bulk_helper(1, 16, params);
2754418919fSjohnjiang }
2764418919fSjohnjiang 
2774418919fSjohnjiang /*
2784418919fSjohnjiang  * Function that calls the enqueue and dequeue bulk functions on pairs of cores.
2794418919fSjohnjiang  * used to measure ring perf between hyperthreads, cores and sockets.
2804418919fSjohnjiang  */
281*2d9fd380Sjfb8856606 static int
run_on_core_pair(struct lcore_pair * cores,struct rte_ring * r,const int esize)282*2d9fd380Sjfb8856606 run_on_core_pair(struct lcore_pair *cores, struct rte_ring *r, const int esize)
2834418919fSjohnjiang {
284*2d9fd380Sjfb8856606 	lcore_function_t *f1, *f2;
2854418919fSjohnjiang 	struct thread_params param1 = {0}, param2 = {0};
2864418919fSjohnjiang 	unsigned i;
287*2d9fd380Sjfb8856606 
288*2d9fd380Sjfb8856606 	if (esize == -1) {
289*2d9fd380Sjfb8856606 		f1 = enqueue_bulk;
290*2d9fd380Sjfb8856606 		f2 = dequeue_bulk;
291*2d9fd380Sjfb8856606 	} else {
292*2d9fd380Sjfb8856606 		f1 = enqueue_bulk_16B;
293*2d9fd380Sjfb8856606 		f2 = dequeue_bulk_16B;
294*2d9fd380Sjfb8856606 	}
295*2d9fd380Sjfb8856606 
296*2d9fd380Sjfb8856606 	for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
2974418919fSjohnjiang 		lcore_count = 0;
2984418919fSjohnjiang 		param1.size = param2.size = bulk_sizes[i];
2994418919fSjohnjiang 		param1.r = param2.r = r;
300*2d9fd380Sjfb8856606 		if (cores->c1 == rte_get_main_lcore()) {
3014418919fSjohnjiang 			rte_eal_remote_launch(f2, &param2, cores->c2);
3024418919fSjohnjiang 			f1(&param1);
3034418919fSjohnjiang 			rte_eal_wait_lcore(cores->c2);
3044418919fSjohnjiang 		} else {
3054418919fSjohnjiang 			rte_eal_remote_launch(f1, &param1, cores->c1);
3064418919fSjohnjiang 			rte_eal_remote_launch(f2, &param2, cores->c2);
307*2d9fd380Sjfb8856606 			if (rte_eal_wait_lcore(cores->c1) < 0)
308*2d9fd380Sjfb8856606 				return -1;
309*2d9fd380Sjfb8856606 			if (rte_eal_wait_lcore(cores->c2) < 0)
310*2d9fd380Sjfb8856606 				return -1;
3114418919fSjohnjiang 		}
312*2d9fd380Sjfb8856606 		test_ring_print_test_string(
313*2d9fd380Sjfb8856606 			TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK,
314*2d9fd380Sjfb8856606 			esize, bulk_sizes[i], param1.spsc + param2.spsc);
315*2d9fd380Sjfb8856606 		test_ring_print_test_string(
316*2d9fd380Sjfb8856606 			TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK,
317*2d9fd380Sjfb8856606 			esize, bulk_sizes[i], param1.mpmc + param2.mpmc);
3184418919fSjohnjiang 	}
319*2d9fd380Sjfb8856606 
320*2d9fd380Sjfb8856606 	return 0;
3214418919fSjohnjiang }
3224418919fSjohnjiang 
3234418919fSjohnjiang static rte_atomic32_t synchro;
3244418919fSjohnjiang static uint64_t queue_count[RTE_MAX_LCORE];
3254418919fSjohnjiang 
3264418919fSjohnjiang #define TIME_MS 100
3274418919fSjohnjiang 
3284418919fSjohnjiang static int
load_loop_fn_helper(struct thread_params * p,const int esize)329*2d9fd380Sjfb8856606 load_loop_fn_helper(struct thread_params *p, const int esize)
3304418919fSjohnjiang {
3314418919fSjohnjiang 	uint64_t time_diff = 0;
3324418919fSjohnjiang 	uint64_t begin = 0;
3334418919fSjohnjiang 	uint64_t hz = rte_get_timer_hz();
3344418919fSjohnjiang 	uint64_t lcount = 0;
3354418919fSjohnjiang 	const unsigned int lcore = rte_lcore_id();
3364418919fSjohnjiang 	struct thread_params *params = p;
337*2d9fd380Sjfb8856606 	void *burst = NULL;
3384418919fSjohnjiang 
339*2d9fd380Sjfb8856606 	burst = test_ring_calloc(MAX_BURST, esize);
340*2d9fd380Sjfb8856606 	if (burst == NULL)
341*2d9fd380Sjfb8856606 		return -1;
342*2d9fd380Sjfb8856606 
343*2d9fd380Sjfb8856606 	/* wait synchro for workers */
344*2d9fd380Sjfb8856606 	if (lcore != rte_get_main_lcore())
3454418919fSjohnjiang 		while (rte_atomic32_read(&synchro) == 0)
3464418919fSjohnjiang 			rte_pause();
3474418919fSjohnjiang 
3484418919fSjohnjiang 	begin = rte_get_timer_cycles();
3494418919fSjohnjiang 	while (time_diff < hz * TIME_MS / 1000) {
350*2d9fd380Sjfb8856606 		test_ring_enqueue(params->r, burst, esize, params->size,
351*2d9fd380Sjfb8856606 				TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);
352*2d9fd380Sjfb8856606 		test_ring_dequeue(params->r, burst, esize, params->size,
353*2d9fd380Sjfb8856606 				TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);
3544418919fSjohnjiang 		lcount++;
3554418919fSjohnjiang 		time_diff = rte_get_timer_cycles() - begin;
3564418919fSjohnjiang 	}
3574418919fSjohnjiang 	queue_count[lcore] = lcount;
358*2d9fd380Sjfb8856606 
359*2d9fd380Sjfb8856606 	rte_free(burst);
360*2d9fd380Sjfb8856606 
3614418919fSjohnjiang 	return 0;
3624418919fSjohnjiang }
3634418919fSjohnjiang 
3644418919fSjohnjiang static int
load_loop_fn(void * p)365*2d9fd380Sjfb8856606 load_loop_fn(void *p)
366*2d9fd380Sjfb8856606 {
367*2d9fd380Sjfb8856606 	struct thread_params *params = p;
368*2d9fd380Sjfb8856606 
369*2d9fd380Sjfb8856606 	return load_loop_fn_helper(params, -1);
370*2d9fd380Sjfb8856606 }
371*2d9fd380Sjfb8856606 
372*2d9fd380Sjfb8856606 static int
load_loop_fn_16B(void * p)373*2d9fd380Sjfb8856606 load_loop_fn_16B(void *p)
374*2d9fd380Sjfb8856606 {
375*2d9fd380Sjfb8856606 	struct thread_params *params = p;
376*2d9fd380Sjfb8856606 
377*2d9fd380Sjfb8856606 	return load_loop_fn_helper(params, 16);
378*2d9fd380Sjfb8856606 }
379*2d9fd380Sjfb8856606 
380*2d9fd380Sjfb8856606 static int
run_on_all_cores(struct rte_ring * r,const int esize)381*2d9fd380Sjfb8856606 run_on_all_cores(struct rte_ring *r, const int esize)
3824418919fSjohnjiang {
3830c6bd470Sfengbojiang 	uint64_t total;
3844418919fSjohnjiang 	struct thread_params param;
385*2d9fd380Sjfb8856606 	lcore_function_t *lcore_f;
3864418919fSjohnjiang 	unsigned int i, c;
3874418919fSjohnjiang 
388*2d9fd380Sjfb8856606 	if (esize == -1)
389*2d9fd380Sjfb8856606 		lcore_f = load_loop_fn;
390*2d9fd380Sjfb8856606 	else
391*2d9fd380Sjfb8856606 		lcore_f = load_loop_fn_16B;
392*2d9fd380Sjfb8856606 
3934418919fSjohnjiang 	memset(&param, 0, sizeof(struct thread_params));
3944418919fSjohnjiang 	for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
3950c6bd470Sfengbojiang 		total = 0;
3964418919fSjohnjiang 		printf("\nBulk enq/dequeue count on size %u\n", bulk_sizes[i]);
3974418919fSjohnjiang 		param.size = bulk_sizes[i];
3984418919fSjohnjiang 		param.r = r;
3994418919fSjohnjiang 
400*2d9fd380Sjfb8856606 		/* clear synchro and start workers */
4014418919fSjohnjiang 		rte_atomic32_set(&synchro, 0);
402*2d9fd380Sjfb8856606 		if (rte_eal_mp_remote_launch(lcore_f, &param, SKIP_MAIN) < 0)
4034418919fSjohnjiang 			return -1;
4044418919fSjohnjiang 
405*2d9fd380Sjfb8856606 		/* start synchro and launch test on main */
4064418919fSjohnjiang 		rte_atomic32_set(&synchro, 1);
407*2d9fd380Sjfb8856606 		lcore_f(&param);
4084418919fSjohnjiang 
4094418919fSjohnjiang 		rte_eal_mp_wait_lcore();
4104418919fSjohnjiang 
4114418919fSjohnjiang 		RTE_LCORE_FOREACH(c) {
4124418919fSjohnjiang 			printf("Core [%u] count = %"PRIu64"\n",
4134418919fSjohnjiang 					c, queue_count[c]);
4144418919fSjohnjiang 			total += queue_count[c];
4154418919fSjohnjiang 		}
4164418919fSjohnjiang 
4174418919fSjohnjiang 		printf("Total count (size: %u): %"PRIu64"\n",
4184418919fSjohnjiang 				bulk_sizes[i], total);
4194418919fSjohnjiang 	}
4204418919fSjohnjiang 
4214418919fSjohnjiang 	return 0;
4224418919fSjohnjiang }
4234418919fSjohnjiang 
4244418919fSjohnjiang /*
4254418919fSjohnjiang  * Test function that determines how long an enqueue + dequeue of a single item
4264418919fSjohnjiang  * takes on a single lcore. Result is for comparison with the bulk enq+deq.
4274418919fSjohnjiang  */
428*2d9fd380Sjfb8856606 static int
test_single_enqueue_dequeue(struct rte_ring * r,const int esize,const unsigned int api_type)429*2d9fd380Sjfb8856606 test_single_enqueue_dequeue(struct rte_ring *r, const int esize,
430*2d9fd380Sjfb8856606 	const unsigned int api_type)
4314418919fSjohnjiang {
432*2d9fd380Sjfb8856606 	const unsigned int iter_shift = 24;
433*2d9fd380Sjfb8856606 	const unsigned int iterations = 1 << iter_shift;
434*2d9fd380Sjfb8856606 	unsigned int i = 0;
4354418919fSjohnjiang 	void *burst = NULL;
4364418919fSjohnjiang 
437*2d9fd380Sjfb8856606 	/* alloc dummy object pointers */
438*2d9fd380Sjfb8856606 	burst = test_ring_calloc(1, esize);
439*2d9fd380Sjfb8856606 	if (burst == NULL)
440*2d9fd380Sjfb8856606 		return -1;
4414418919fSjohnjiang 
442*2d9fd380Sjfb8856606 	const uint64_t start = rte_rdtsc();
4434418919fSjohnjiang 	for (i = 0; i < iterations; i++) {
444*2d9fd380Sjfb8856606 		test_ring_enqueue(r, burst, esize, 1, api_type);
445*2d9fd380Sjfb8856606 		test_ring_dequeue(r, burst, esize, 1, api_type);
4464418919fSjohnjiang 	}
447*2d9fd380Sjfb8856606 	const uint64_t end = rte_rdtsc();
4484418919fSjohnjiang 
449*2d9fd380Sjfb8856606 	test_ring_print_test_string(api_type, esize, 1,
450*2d9fd380Sjfb8856606 					((double)(end - start)) / iterations);
451*2d9fd380Sjfb8856606 
452*2d9fd380Sjfb8856606 	rte_free(burst);
453*2d9fd380Sjfb8856606 
454*2d9fd380Sjfb8856606 	return 0;
4554418919fSjohnjiang }
4564418919fSjohnjiang 
4574418919fSjohnjiang /*
458*2d9fd380Sjfb8856606  * Test that does both enqueue and dequeue on a core using the burst/bulk API
459*2d9fd380Sjfb8856606  * calls Results should be the same as for the bulk function called on a
460*2d9fd380Sjfb8856606  * single lcore.
4614418919fSjohnjiang  */
462*2d9fd380Sjfb8856606 static int
test_burst_bulk_enqueue_dequeue(struct rte_ring * r,const int esize,const unsigned int api_type)463*2d9fd380Sjfb8856606 test_burst_bulk_enqueue_dequeue(struct rte_ring *r, const int esize,
464*2d9fd380Sjfb8856606 	const unsigned int api_type)
4654418919fSjohnjiang {
466*2d9fd380Sjfb8856606 	const unsigned int iter_shift = 23;
467*2d9fd380Sjfb8856606 	const unsigned int iterations = 1 << iter_shift;
468*2d9fd380Sjfb8856606 	unsigned int sz, i = 0;
469*2d9fd380Sjfb8856606 	void **burst = NULL;
4704418919fSjohnjiang 
471*2d9fd380Sjfb8856606 	burst = test_ring_calloc(MAX_BURST, esize);
472*2d9fd380Sjfb8856606 	if (burst == NULL)
473*2d9fd380Sjfb8856606 		return -1;
474*2d9fd380Sjfb8856606 
475*2d9fd380Sjfb8856606 	for (sz = 0; sz < RTE_DIM(bulk_sizes); sz++) {
476*2d9fd380Sjfb8856606 		const uint64_t start = rte_rdtsc();
4774418919fSjohnjiang 		for (i = 0; i < iterations; i++) {
478*2d9fd380Sjfb8856606 			test_ring_enqueue(r, burst, esize, bulk_sizes[sz],
479*2d9fd380Sjfb8856606 						api_type);
480*2d9fd380Sjfb8856606 			test_ring_dequeue(r, burst, esize, bulk_sizes[sz],
481*2d9fd380Sjfb8856606 						api_type);
4824418919fSjohnjiang 		}
483*2d9fd380Sjfb8856606 		const uint64_t end = rte_rdtsc();
4844418919fSjohnjiang 
485*2d9fd380Sjfb8856606 		test_ring_print_test_string(api_type, esize, bulk_sizes[sz],
486*2d9fd380Sjfb8856606 					((double)(end - start)) / iterations);
4874418919fSjohnjiang 	}
4884418919fSjohnjiang 
489*2d9fd380Sjfb8856606 	rte_free(burst);
490*2d9fd380Sjfb8856606 
491*2d9fd380Sjfb8856606 	return 0;
492*2d9fd380Sjfb8856606 }
493*2d9fd380Sjfb8856606 
494*2d9fd380Sjfb8856606 /* Run all tests for a given element size */
495*2d9fd380Sjfb8856606 static __rte_always_inline int
test_ring_perf_esize(const int esize)496*2d9fd380Sjfb8856606 test_ring_perf_esize(const int esize)
4974418919fSjohnjiang {
498*2d9fd380Sjfb8856606 	struct lcore_pair cores;
499*2d9fd380Sjfb8856606 	struct rte_ring *r = NULL;
5004418919fSjohnjiang 
501*2d9fd380Sjfb8856606 	/*
502*2d9fd380Sjfb8856606 	 * Performance test for legacy/_elem APIs
503*2d9fd380Sjfb8856606 	 * SP-SC/MP-MC, single
504*2d9fd380Sjfb8856606 	 */
505*2d9fd380Sjfb8856606 	r = test_ring_create(RING_NAME, esize, RING_SIZE, rte_socket_id(), 0);
506*2d9fd380Sjfb8856606 	if (r == NULL)
507*2d9fd380Sjfb8856606 		goto test_fail;
508*2d9fd380Sjfb8856606 
509*2d9fd380Sjfb8856606 	printf("\n### Testing single element enq/deq ###\n");
510*2d9fd380Sjfb8856606 	if (test_single_enqueue_dequeue(r, esize,
511*2d9fd380Sjfb8856606 			TEST_RING_THREAD_SPSC | TEST_RING_ELEM_SINGLE) < 0)
512*2d9fd380Sjfb8856606 		goto test_fail;
513*2d9fd380Sjfb8856606 	if (test_single_enqueue_dequeue(r, esize,
514*2d9fd380Sjfb8856606 			TEST_RING_THREAD_MPMC | TEST_RING_ELEM_SINGLE) < 0)
515*2d9fd380Sjfb8856606 		goto test_fail;
516*2d9fd380Sjfb8856606 
517*2d9fd380Sjfb8856606 	printf("\n### Testing burst enq/deq ###\n");
518*2d9fd380Sjfb8856606 	if (test_burst_bulk_enqueue_dequeue(r, esize,
519*2d9fd380Sjfb8856606 			TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BURST) < 0)
520*2d9fd380Sjfb8856606 		goto test_fail;
521*2d9fd380Sjfb8856606 	if (test_burst_bulk_enqueue_dequeue(r, esize,
522*2d9fd380Sjfb8856606 			TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BURST) < 0)
523*2d9fd380Sjfb8856606 		goto test_fail;
524*2d9fd380Sjfb8856606 
525*2d9fd380Sjfb8856606 	printf("\n### Testing bulk enq/deq ###\n");
526*2d9fd380Sjfb8856606 	if (test_burst_bulk_enqueue_dequeue(r, esize,
527*2d9fd380Sjfb8856606 			TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK) < 0)
528*2d9fd380Sjfb8856606 		goto test_fail;
529*2d9fd380Sjfb8856606 	if (test_burst_bulk_enqueue_dequeue(r, esize,
530*2d9fd380Sjfb8856606 			TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK) < 0)
531*2d9fd380Sjfb8856606 		goto test_fail;
532*2d9fd380Sjfb8856606 
533*2d9fd380Sjfb8856606 	printf("\n### Testing empty bulk deq ###\n");
534*2d9fd380Sjfb8856606 	test_empty_dequeue(r, esize,
535*2d9fd380Sjfb8856606 			TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK);
536*2d9fd380Sjfb8856606 	test_empty_dequeue(r, esize,
537*2d9fd380Sjfb8856606 			TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);
538*2d9fd380Sjfb8856606 
539*2d9fd380Sjfb8856606 	if (get_two_hyperthreads(&cores) == 0) {
540*2d9fd380Sjfb8856606 		printf("\n### Testing using two hyperthreads ###\n");
541*2d9fd380Sjfb8856606 		if (run_on_core_pair(&cores, r, esize) < 0)
542*2d9fd380Sjfb8856606 			goto test_fail;
5434418919fSjohnjiang 	}
5444418919fSjohnjiang 
545*2d9fd380Sjfb8856606 	if (get_two_cores(&cores) == 0) {
546*2d9fd380Sjfb8856606 		printf("\n### Testing using two physical cores ###\n");
547*2d9fd380Sjfb8856606 		if (run_on_core_pair(&cores, r, esize) < 0)
548*2d9fd380Sjfb8856606 			goto test_fail;
5494418919fSjohnjiang 	}
550*2d9fd380Sjfb8856606 	if (get_two_sockets(&cores) == 0) {
551*2d9fd380Sjfb8856606 		printf("\n### Testing using two NUMA nodes ###\n");
552*2d9fd380Sjfb8856606 		if (run_on_core_pair(&cores, r, esize) < 0)
553*2d9fd380Sjfb8856606 			goto test_fail;
5544418919fSjohnjiang 	}
555*2d9fd380Sjfb8856606 
556*2d9fd380Sjfb8856606 	printf("\n### Testing using all worker nodes ###\n");
557*2d9fd380Sjfb8856606 	if (run_on_all_cores(r, esize) < 0)
558*2d9fd380Sjfb8856606 		goto test_fail;
559*2d9fd380Sjfb8856606 
560*2d9fd380Sjfb8856606 	rte_ring_free(r);
561*2d9fd380Sjfb8856606 
562*2d9fd380Sjfb8856606 	return 0;
563*2d9fd380Sjfb8856606 
564*2d9fd380Sjfb8856606 test_fail:
565*2d9fd380Sjfb8856606 	rte_ring_free(r);
566*2d9fd380Sjfb8856606 
567*2d9fd380Sjfb8856606 	return -1;
5684418919fSjohnjiang }
5694418919fSjohnjiang 
5704418919fSjohnjiang static int
test_ring_perf(void)5714418919fSjohnjiang test_ring_perf(void)
5724418919fSjohnjiang {
573*2d9fd380Sjfb8856606 	/* Run all the tests for different element sizes */
574*2d9fd380Sjfb8856606 	if (test_ring_perf_esize(-1) == -1)
5754418919fSjohnjiang 		return -1;
5764418919fSjohnjiang 
577*2d9fd380Sjfb8856606 	if (test_ring_perf_esize(16) == -1)
578*2d9fd380Sjfb8856606 		return -1;
5794418919fSjohnjiang 
5804418919fSjohnjiang 	return 0;
5814418919fSjohnjiang }
5824418919fSjohnjiang 
5834418919fSjohnjiang REGISTER_TEST_COMMAND(ring_perf_autotest, test_ring_perf);
584