14418919fSjohnjiang /* SPDX-License-Identifier: BSD-3-Clause
24418919fSjohnjiang * Copyright(c) 2010-2014 Intel Corporation
34418919fSjohnjiang * Copyright(c) 2019 Arm Limited
44418919fSjohnjiang */
54418919fSjohnjiang
64418919fSjohnjiang
74418919fSjohnjiang #include <stdio.h>
84418919fSjohnjiang #include <inttypes.h>
94418919fSjohnjiang #include <rte_ring.h>
104418919fSjohnjiang #include <rte_cycles.h>
114418919fSjohnjiang #include <rte_launch.h>
124418919fSjohnjiang #include <rte_pause.h>
134418919fSjohnjiang #include <string.h>
144418919fSjohnjiang
154418919fSjohnjiang #include "test.h"
16*2d9fd380Sjfb8856606 #include "test_ring.h"
174418919fSjohnjiang
184418919fSjohnjiang /*
19*2d9fd380Sjfb8856606 * Ring performance test cases, measures performance of various operations
20*2d9fd380Sjfb8856606 * using rdtsc for legacy and 16B size ring elements.
214418919fSjohnjiang */
224418919fSjohnjiang
234418919fSjohnjiang #define RING_NAME "RING_PERF"
244418919fSjohnjiang #define RING_SIZE 4096
254418919fSjohnjiang #define MAX_BURST 32
264418919fSjohnjiang
274418919fSjohnjiang /*
284418919fSjohnjiang * the sizes to enqueue and dequeue in testing
294418919fSjohnjiang * (marked volatile so they won't be seen as compile-time constants)
304418919fSjohnjiang */
314418919fSjohnjiang static const volatile unsigned bulk_sizes[] = { 8, 32 };
324418919fSjohnjiang
334418919fSjohnjiang struct lcore_pair {
344418919fSjohnjiang unsigned c1, c2;
354418919fSjohnjiang };
364418919fSjohnjiang
374418919fSjohnjiang static volatile unsigned lcore_count = 0;
384418919fSjohnjiang
39*2d9fd380Sjfb8856606 static void
test_ring_print_test_string(unsigned int api_type,int esize,unsigned int bsz,double value)40*2d9fd380Sjfb8856606 test_ring_print_test_string(unsigned int api_type, int esize,
41*2d9fd380Sjfb8856606 unsigned int bsz, double value)
42*2d9fd380Sjfb8856606 {
43*2d9fd380Sjfb8856606 if (esize == -1)
44*2d9fd380Sjfb8856606 printf("legacy APIs");
45*2d9fd380Sjfb8856606 else
46*2d9fd380Sjfb8856606 printf("elem APIs: element size %dB", esize);
47*2d9fd380Sjfb8856606
48*2d9fd380Sjfb8856606 if (api_type == TEST_RING_IGNORE_API_TYPE)
49*2d9fd380Sjfb8856606 return;
50*2d9fd380Sjfb8856606
51*2d9fd380Sjfb8856606 if ((api_type & TEST_RING_THREAD_DEF) == TEST_RING_THREAD_DEF)
52*2d9fd380Sjfb8856606 printf(": default enqueue/dequeue: ");
53*2d9fd380Sjfb8856606 else if ((api_type & TEST_RING_THREAD_SPSC) == TEST_RING_THREAD_SPSC)
54*2d9fd380Sjfb8856606 printf(": SP/SC: ");
55*2d9fd380Sjfb8856606 else if ((api_type & TEST_RING_THREAD_MPMC) == TEST_RING_THREAD_MPMC)
56*2d9fd380Sjfb8856606 printf(": MP/MC: ");
57*2d9fd380Sjfb8856606
58*2d9fd380Sjfb8856606 if ((api_type & TEST_RING_ELEM_SINGLE) == TEST_RING_ELEM_SINGLE)
59*2d9fd380Sjfb8856606 printf("single: ");
60*2d9fd380Sjfb8856606 else if ((api_type & TEST_RING_ELEM_BULK) == TEST_RING_ELEM_BULK)
61*2d9fd380Sjfb8856606 printf("bulk (size: %u): ", bsz);
62*2d9fd380Sjfb8856606 else if ((api_type & TEST_RING_ELEM_BURST) == TEST_RING_ELEM_BURST)
63*2d9fd380Sjfb8856606 printf("burst (size: %u): ", bsz);
64*2d9fd380Sjfb8856606
65*2d9fd380Sjfb8856606 printf("%.2F\n", value);
66*2d9fd380Sjfb8856606 }
67*2d9fd380Sjfb8856606
684418919fSjohnjiang /**** Functions to analyse our core mask to get cores for different tests ***/
694418919fSjohnjiang
704418919fSjohnjiang static int
get_two_hyperthreads(struct lcore_pair * lcp)714418919fSjohnjiang get_two_hyperthreads(struct lcore_pair *lcp)
724418919fSjohnjiang {
734418919fSjohnjiang unsigned id1, id2;
744418919fSjohnjiang unsigned c1, c2, s1, s2;
754418919fSjohnjiang RTE_LCORE_FOREACH(id1) {
764418919fSjohnjiang /* inner loop just re-reads all id's. We could skip the first few
774418919fSjohnjiang * elements, but since number of cores is small there is little point
784418919fSjohnjiang */
794418919fSjohnjiang RTE_LCORE_FOREACH(id2) {
804418919fSjohnjiang if (id1 == id2)
814418919fSjohnjiang continue;
824418919fSjohnjiang
834418919fSjohnjiang c1 = rte_lcore_to_cpu_id(id1);
844418919fSjohnjiang c2 = rte_lcore_to_cpu_id(id2);
854418919fSjohnjiang s1 = rte_lcore_to_socket_id(id1);
864418919fSjohnjiang s2 = rte_lcore_to_socket_id(id2);
874418919fSjohnjiang if ((c1 == c2) && (s1 == s2)){
884418919fSjohnjiang lcp->c1 = id1;
894418919fSjohnjiang lcp->c2 = id2;
904418919fSjohnjiang return 0;
914418919fSjohnjiang }
924418919fSjohnjiang }
934418919fSjohnjiang }
944418919fSjohnjiang return 1;
954418919fSjohnjiang }
964418919fSjohnjiang
974418919fSjohnjiang static int
get_two_cores(struct lcore_pair * lcp)984418919fSjohnjiang get_two_cores(struct lcore_pair *lcp)
994418919fSjohnjiang {
1004418919fSjohnjiang unsigned id1, id2;
1014418919fSjohnjiang unsigned c1, c2, s1, s2;
1024418919fSjohnjiang RTE_LCORE_FOREACH(id1) {
1034418919fSjohnjiang RTE_LCORE_FOREACH(id2) {
1044418919fSjohnjiang if (id1 == id2)
1054418919fSjohnjiang continue;
1064418919fSjohnjiang
1074418919fSjohnjiang c1 = rte_lcore_to_cpu_id(id1);
1084418919fSjohnjiang c2 = rte_lcore_to_cpu_id(id2);
1094418919fSjohnjiang s1 = rte_lcore_to_socket_id(id1);
1104418919fSjohnjiang s2 = rte_lcore_to_socket_id(id2);
1114418919fSjohnjiang if ((c1 != c2) && (s1 == s2)){
1124418919fSjohnjiang lcp->c1 = id1;
1134418919fSjohnjiang lcp->c2 = id2;
1144418919fSjohnjiang return 0;
1154418919fSjohnjiang }
1164418919fSjohnjiang }
1174418919fSjohnjiang }
1184418919fSjohnjiang return 1;
1194418919fSjohnjiang }
1204418919fSjohnjiang
1214418919fSjohnjiang static int
get_two_sockets(struct lcore_pair * lcp)1224418919fSjohnjiang get_two_sockets(struct lcore_pair *lcp)
1234418919fSjohnjiang {
1244418919fSjohnjiang unsigned id1, id2;
1254418919fSjohnjiang unsigned s1, s2;
1264418919fSjohnjiang RTE_LCORE_FOREACH(id1) {
1274418919fSjohnjiang RTE_LCORE_FOREACH(id2) {
1284418919fSjohnjiang if (id1 == id2)
1294418919fSjohnjiang continue;
1304418919fSjohnjiang s1 = rte_lcore_to_socket_id(id1);
1314418919fSjohnjiang s2 = rte_lcore_to_socket_id(id2);
1324418919fSjohnjiang if (s1 != s2){
1334418919fSjohnjiang lcp->c1 = id1;
1344418919fSjohnjiang lcp->c2 = id2;
1354418919fSjohnjiang return 0;
1364418919fSjohnjiang }
1374418919fSjohnjiang }
1384418919fSjohnjiang }
1394418919fSjohnjiang return 1;
1404418919fSjohnjiang }
1414418919fSjohnjiang
1424418919fSjohnjiang /* Get cycle counts for dequeuing from an empty ring. Should be 2 or 3 cycles */
1434418919fSjohnjiang static void
test_empty_dequeue(struct rte_ring * r,const int esize,const unsigned int api_type)144*2d9fd380Sjfb8856606 test_empty_dequeue(struct rte_ring *r, const int esize,
145*2d9fd380Sjfb8856606 const unsigned int api_type)
1464418919fSjohnjiang {
147*2d9fd380Sjfb8856606 const unsigned int iter_shift = 26;
148*2d9fd380Sjfb8856606 const unsigned int iterations = 1 << iter_shift;
149*2d9fd380Sjfb8856606 unsigned int i = 0;
1504418919fSjohnjiang void *burst[MAX_BURST];
1514418919fSjohnjiang
152*2d9fd380Sjfb8856606 const uint64_t start = rte_rdtsc();
1534418919fSjohnjiang for (i = 0; i < iterations; i++)
154*2d9fd380Sjfb8856606 test_ring_dequeue(r, burst, esize, bulk_sizes[0], api_type);
155*2d9fd380Sjfb8856606 const uint64_t end = rte_rdtsc();
1564418919fSjohnjiang
157*2d9fd380Sjfb8856606 test_ring_print_test_string(api_type, esize, bulk_sizes[0],
158*2d9fd380Sjfb8856606 ((double)(end - start)) / iterations);
1594418919fSjohnjiang }
1604418919fSjohnjiang
1614418919fSjohnjiang /*
1624418919fSjohnjiang * for the separate enqueue and dequeue threads they take in one param
1634418919fSjohnjiang * and return two. Input = burst size, output = cycle average for sp/sc & mp/mc
1644418919fSjohnjiang */
1654418919fSjohnjiang struct thread_params {
1664418919fSjohnjiang struct rte_ring *r;
1674418919fSjohnjiang unsigned size; /* input value, the burst size */
1684418919fSjohnjiang double spsc, mpmc; /* output value, the single or multi timings */
1694418919fSjohnjiang };
1704418919fSjohnjiang
1714418919fSjohnjiang /*
172*2d9fd380Sjfb8856606 * Helper function to call bulk SP/MP enqueue functions.
173*2d9fd380Sjfb8856606 * flag == 0 -> enqueue
174*2d9fd380Sjfb8856606 * flag == 1 -> dequeue
1754418919fSjohnjiang */
176*2d9fd380Sjfb8856606 static __rte_always_inline int
enqueue_dequeue_bulk_helper(const unsigned int flag,const int esize,struct thread_params * p)177*2d9fd380Sjfb8856606 enqueue_dequeue_bulk_helper(const unsigned int flag, const int esize,
178*2d9fd380Sjfb8856606 struct thread_params *p)
1794418919fSjohnjiang {
180*2d9fd380Sjfb8856606 int ret;
181*2d9fd380Sjfb8856606 const unsigned int iter_shift = 23;
182*2d9fd380Sjfb8856606 const unsigned int iterations = 1 << iter_shift;
183*2d9fd380Sjfb8856606 struct rte_ring *r = p->r;
184*2d9fd380Sjfb8856606 unsigned int bsize = p->size;
185*2d9fd380Sjfb8856606 unsigned int i;
186*2d9fd380Sjfb8856606 void *burst = NULL;
1874418919fSjohnjiang
1884418919fSjohnjiang #ifdef RTE_USE_C11_MEM_MODEL
1894418919fSjohnjiang if (__atomic_add_fetch(&lcore_count, 1, __ATOMIC_RELAXED) != 2)
1904418919fSjohnjiang #else
1914418919fSjohnjiang if (__sync_add_and_fetch(&lcore_count, 1) != 2)
1924418919fSjohnjiang #endif
1934418919fSjohnjiang while(lcore_count != 2)
1944418919fSjohnjiang rte_pause();
1954418919fSjohnjiang
196*2d9fd380Sjfb8856606 burst = test_ring_calloc(MAX_BURST, esize);
197*2d9fd380Sjfb8856606 if (burst == NULL)
198*2d9fd380Sjfb8856606 return -1;
199*2d9fd380Sjfb8856606
2004418919fSjohnjiang const uint64_t sp_start = rte_rdtsc();
2014418919fSjohnjiang for (i = 0; i < iterations; i++)
202*2d9fd380Sjfb8856606 do {
203*2d9fd380Sjfb8856606 if (flag == 0)
204*2d9fd380Sjfb8856606 ret = test_ring_enqueue(r, burst, esize, bsize,
205*2d9fd380Sjfb8856606 TEST_RING_THREAD_SPSC |
206*2d9fd380Sjfb8856606 TEST_RING_ELEM_BULK);
207*2d9fd380Sjfb8856606 else if (flag == 1)
208*2d9fd380Sjfb8856606 ret = test_ring_dequeue(r, burst, esize, bsize,
209*2d9fd380Sjfb8856606 TEST_RING_THREAD_SPSC |
210*2d9fd380Sjfb8856606 TEST_RING_ELEM_BULK);
211*2d9fd380Sjfb8856606 if (ret == 0)
2124418919fSjohnjiang rte_pause();
213*2d9fd380Sjfb8856606 } while (!ret);
2144418919fSjohnjiang const uint64_t sp_end = rte_rdtsc();
2154418919fSjohnjiang
2164418919fSjohnjiang const uint64_t mp_start = rte_rdtsc();
2174418919fSjohnjiang for (i = 0; i < iterations; i++)
218*2d9fd380Sjfb8856606 do {
219*2d9fd380Sjfb8856606 if (flag == 0)
220*2d9fd380Sjfb8856606 ret = test_ring_enqueue(r, burst, esize, bsize,
221*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC |
222*2d9fd380Sjfb8856606 TEST_RING_ELEM_BULK);
223*2d9fd380Sjfb8856606 else if (flag == 1)
224*2d9fd380Sjfb8856606 ret = test_ring_dequeue(r, burst, esize, bsize,
225*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC |
226*2d9fd380Sjfb8856606 TEST_RING_ELEM_BULK);
227*2d9fd380Sjfb8856606 if (ret == 0)
2284418919fSjohnjiang rte_pause();
229*2d9fd380Sjfb8856606 } while (!ret);
2304418919fSjohnjiang const uint64_t mp_end = rte_rdtsc();
2314418919fSjohnjiang
232*2d9fd380Sjfb8856606 p->spsc = ((double)(sp_end - sp_start))/(iterations * bsize);
233*2d9fd380Sjfb8856606 p->mpmc = ((double)(mp_end - mp_start))/(iterations * bsize);
2344418919fSjohnjiang return 0;
2354418919fSjohnjiang }
2364418919fSjohnjiang
2374418919fSjohnjiang /*
238*2d9fd380Sjfb8856606 * Function that uses rdtsc to measure timing for ring enqueue. Needs pair
239*2d9fd380Sjfb8856606 * thread running dequeue_bulk function
240*2d9fd380Sjfb8856606 */
241*2d9fd380Sjfb8856606 static int
enqueue_bulk(void * p)242*2d9fd380Sjfb8856606 enqueue_bulk(void *p)
243*2d9fd380Sjfb8856606 {
244*2d9fd380Sjfb8856606 struct thread_params *params = p;
245*2d9fd380Sjfb8856606
246*2d9fd380Sjfb8856606 return enqueue_dequeue_bulk_helper(0, -1, params);
247*2d9fd380Sjfb8856606 }
248*2d9fd380Sjfb8856606
249*2d9fd380Sjfb8856606 static int
enqueue_bulk_16B(void * p)250*2d9fd380Sjfb8856606 enqueue_bulk_16B(void *p)
251*2d9fd380Sjfb8856606 {
252*2d9fd380Sjfb8856606 struct thread_params *params = p;
253*2d9fd380Sjfb8856606
254*2d9fd380Sjfb8856606 return enqueue_dequeue_bulk_helper(0, 16, params);
255*2d9fd380Sjfb8856606 }
256*2d9fd380Sjfb8856606
257*2d9fd380Sjfb8856606 /*
2584418919fSjohnjiang * Function that uses rdtsc to measure timing for ring dequeue. Needs pair
2594418919fSjohnjiang * thread running enqueue_bulk function
2604418919fSjohnjiang */
2614418919fSjohnjiang static int
dequeue_bulk(void * p)2624418919fSjohnjiang dequeue_bulk(void *p)
2634418919fSjohnjiang {
2644418919fSjohnjiang struct thread_params *params = p;
2654418919fSjohnjiang
266*2d9fd380Sjfb8856606 return enqueue_dequeue_bulk_helper(1, -1, params);
267*2d9fd380Sjfb8856606 }
2684418919fSjohnjiang
269*2d9fd380Sjfb8856606 static int
dequeue_bulk_16B(void * p)270*2d9fd380Sjfb8856606 dequeue_bulk_16B(void *p)
271*2d9fd380Sjfb8856606 {
272*2d9fd380Sjfb8856606 struct thread_params *params = p;
2734418919fSjohnjiang
274*2d9fd380Sjfb8856606 return enqueue_dequeue_bulk_helper(1, 16, params);
2754418919fSjohnjiang }
2764418919fSjohnjiang
2774418919fSjohnjiang /*
2784418919fSjohnjiang * Function that calls the enqueue and dequeue bulk functions on pairs of cores.
2794418919fSjohnjiang * used to measure ring perf between hyperthreads, cores and sockets.
2804418919fSjohnjiang */
281*2d9fd380Sjfb8856606 static int
run_on_core_pair(struct lcore_pair * cores,struct rte_ring * r,const int esize)282*2d9fd380Sjfb8856606 run_on_core_pair(struct lcore_pair *cores, struct rte_ring *r, const int esize)
2834418919fSjohnjiang {
284*2d9fd380Sjfb8856606 lcore_function_t *f1, *f2;
2854418919fSjohnjiang struct thread_params param1 = {0}, param2 = {0};
2864418919fSjohnjiang unsigned i;
287*2d9fd380Sjfb8856606
288*2d9fd380Sjfb8856606 if (esize == -1) {
289*2d9fd380Sjfb8856606 f1 = enqueue_bulk;
290*2d9fd380Sjfb8856606 f2 = dequeue_bulk;
291*2d9fd380Sjfb8856606 } else {
292*2d9fd380Sjfb8856606 f1 = enqueue_bulk_16B;
293*2d9fd380Sjfb8856606 f2 = dequeue_bulk_16B;
294*2d9fd380Sjfb8856606 }
295*2d9fd380Sjfb8856606
296*2d9fd380Sjfb8856606 for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
2974418919fSjohnjiang lcore_count = 0;
2984418919fSjohnjiang param1.size = param2.size = bulk_sizes[i];
2994418919fSjohnjiang param1.r = param2.r = r;
300*2d9fd380Sjfb8856606 if (cores->c1 == rte_get_main_lcore()) {
3014418919fSjohnjiang rte_eal_remote_launch(f2, ¶m2, cores->c2);
3024418919fSjohnjiang f1(¶m1);
3034418919fSjohnjiang rte_eal_wait_lcore(cores->c2);
3044418919fSjohnjiang } else {
3054418919fSjohnjiang rte_eal_remote_launch(f1, ¶m1, cores->c1);
3064418919fSjohnjiang rte_eal_remote_launch(f2, ¶m2, cores->c2);
307*2d9fd380Sjfb8856606 if (rte_eal_wait_lcore(cores->c1) < 0)
308*2d9fd380Sjfb8856606 return -1;
309*2d9fd380Sjfb8856606 if (rte_eal_wait_lcore(cores->c2) < 0)
310*2d9fd380Sjfb8856606 return -1;
3114418919fSjohnjiang }
312*2d9fd380Sjfb8856606 test_ring_print_test_string(
313*2d9fd380Sjfb8856606 TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK,
314*2d9fd380Sjfb8856606 esize, bulk_sizes[i], param1.spsc + param2.spsc);
315*2d9fd380Sjfb8856606 test_ring_print_test_string(
316*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK,
317*2d9fd380Sjfb8856606 esize, bulk_sizes[i], param1.mpmc + param2.mpmc);
3184418919fSjohnjiang }
319*2d9fd380Sjfb8856606
320*2d9fd380Sjfb8856606 return 0;
3214418919fSjohnjiang }
3224418919fSjohnjiang
3234418919fSjohnjiang static rte_atomic32_t synchro;
3244418919fSjohnjiang static uint64_t queue_count[RTE_MAX_LCORE];
3254418919fSjohnjiang
3264418919fSjohnjiang #define TIME_MS 100
3274418919fSjohnjiang
3284418919fSjohnjiang static int
load_loop_fn_helper(struct thread_params * p,const int esize)329*2d9fd380Sjfb8856606 load_loop_fn_helper(struct thread_params *p, const int esize)
3304418919fSjohnjiang {
3314418919fSjohnjiang uint64_t time_diff = 0;
3324418919fSjohnjiang uint64_t begin = 0;
3334418919fSjohnjiang uint64_t hz = rte_get_timer_hz();
3344418919fSjohnjiang uint64_t lcount = 0;
3354418919fSjohnjiang const unsigned int lcore = rte_lcore_id();
3364418919fSjohnjiang struct thread_params *params = p;
337*2d9fd380Sjfb8856606 void *burst = NULL;
3384418919fSjohnjiang
339*2d9fd380Sjfb8856606 burst = test_ring_calloc(MAX_BURST, esize);
340*2d9fd380Sjfb8856606 if (burst == NULL)
341*2d9fd380Sjfb8856606 return -1;
342*2d9fd380Sjfb8856606
343*2d9fd380Sjfb8856606 /* wait synchro for workers */
344*2d9fd380Sjfb8856606 if (lcore != rte_get_main_lcore())
3454418919fSjohnjiang while (rte_atomic32_read(&synchro) == 0)
3464418919fSjohnjiang rte_pause();
3474418919fSjohnjiang
3484418919fSjohnjiang begin = rte_get_timer_cycles();
3494418919fSjohnjiang while (time_diff < hz * TIME_MS / 1000) {
350*2d9fd380Sjfb8856606 test_ring_enqueue(params->r, burst, esize, params->size,
351*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);
352*2d9fd380Sjfb8856606 test_ring_dequeue(params->r, burst, esize, params->size,
353*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);
3544418919fSjohnjiang lcount++;
3554418919fSjohnjiang time_diff = rte_get_timer_cycles() - begin;
3564418919fSjohnjiang }
3574418919fSjohnjiang queue_count[lcore] = lcount;
358*2d9fd380Sjfb8856606
359*2d9fd380Sjfb8856606 rte_free(burst);
360*2d9fd380Sjfb8856606
3614418919fSjohnjiang return 0;
3624418919fSjohnjiang }
3634418919fSjohnjiang
3644418919fSjohnjiang static int
load_loop_fn(void * p)365*2d9fd380Sjfb8856606 load_loop_fn(void *p)
366*2d9fd380Sjfb8856606 {
367*2d9fd380Sjfb8856606 struct thread_params *params = p;
368*2d9fd380Sjfb8856606
369*2d9fd380Sjfb8856606 return load_loop_fn_helper(params, -1);
370*2d9fd380Sjfb8856606 }
371*2d9fd380Sjfb8856606
372*2d9fd380Sjfb8856606 static int
load_loop_fn_16B(void * p)373*2d9fd380Sjfb8856606 load_loop_fn_16B(void *p)
374*2d9fd380Sjfb8856606 {
375*2d9fd380Sjfb8856606 struct thread_params *params = p;
376*2d9fd380Sjfb8856606
377*2d9fd380Sjfb8856606 return load_loop_fn_helper(params, 16);
378*2d9fd380Sjfb8856606 }
379*2d9fd380Sjfb8856606
380*2d9fd380Sjfb8856606 static int
run_on_all_cores(struct rte_ring * r,const int esize)381*2d9fd380Sjfb8856606 run_on_all_cores(struct rte_ring *r, const int esize)
3824418919fSjohnjiang {
3830c6bd470Sfengbojiang uint64_t total;
3844418919fSjohnjiang struct thread_params param;
385*2d9fd380Sjfb8856606 lcore_function_t *lcore_f;
3864418919fSjohnjiang unsigned int i, c;
3874418919fSjohnjiang
388*2d9fd380Sjfb8856606 if (esize == -1)
389*2d9fd380Sjfb8856606 lcore_f = load_loop_fn;
390*2d9fd380Sjfb8856606 else
391*2d9fd380Sjfb8856606 lcore_f = load_loop_fn_16B;
392*2d9fd380Sjfb8856606
3934418919fSjohnjiang memset(¶m, 0, sizeof(struct thread_params));
3944418919fSjohnjiang for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
3950c6bd470Sfengbojiang total = 0;
3964418919fSjohnjiang printf("\nBulk enq/dequeue count on size %u\n", bulk_sizes[i]);
3974418919fSjohnjiang param.size = bulk_sizes[i];
3984418919fSjohnjiang param.r = r;
3994418919fSjohnjiang
400*2d9fd380Sjfb8856606 /* clear synchro and start workers */
4014418919fSjohnjiang rte_atomic32_set(&synchro, 0);
402*2d9fd380Sjfb8856606 if (rte_eal_mp_remote_launch(lcore_f, ¶m, SKIP_MAIN) < 0)
4034418919fSjohnjiang return -1;
4044418919fSjohnjiang
405*2d9fd380Sjfb8856606 /* start synchro and launch test on main */
4064418919fSjohnjiang rte_atomic32_set(&synchro, 1);
407*2d9fd380Sjfb8856606 lcore_f(¶m);
4084418919fSjohnjiang
4094418919fSjohnjiang rte_eal_mp_wait_lcore();
4104418919fSjohnjiang
4114418919fSjohnjiang RTE_LCORE_FOREACH(c) {
4124418919fSjohnjiang printf("Core [%u] count = %"PRIu64"\n",
4134418919fSjohnjiang c, queue_count[c]);
4144418919fSjohnjiang total += queue_count[c];
4154418919fSjohnjiang }
4164418919fSjohnjiang
4174418919fSjohnjiang printf("Total count (size: %u): %"PRIu64"\n",
4184418919fSjohnjiang bulk_sizes[i], total);
4194418919fSjohnjiang }
4204418919fSjohnjiang
4214418919fSjohnjiang return 0;
4224418919fSjohnjiang }
4234418919fSjohnjiang
4244418919fSjohnjiang /*
4254418919fSjohnjiang * Test function that determines how long an enqueue + dequeue of a single item
4264418919fSjohnjiang * takes on a single lcore. Result is for comparison with the bulk enq+deq.
4274418919fSjohnjiang */
428*2d9fd380Sjfb8856606 static int
test_single_enqueue_dequeue(struct rte_ring * r,const int esize,const unsigned int api_type)429*2d9fd380Sjfb8856606 test_single_enqueue_dequeue(struct rte_ring *r, const int esize,
430*2d9fd380Sjfb8856606 const unsigned int api_type)
4314418919fSjohnjiang {
432*2d9fd380Sjfb8856606 const unsigned int iter_shift = 24;
433*2d9fd380Sjfb8856606 const unsigned int iterations = 1 << iter_shift;
434*2d9fd380Sjfb8856606 unsigned int i = 0;
4354418919fSjohnjiang void *burst = NULL;
4364418919fSjohnjiang
437*2d9fd380Sjfb8856606 /* alloc dummy object pointers */
438*2d9fd380Sjfb8856606 burst = test_ring_calloc(1, esize);
439*2d9fd380Sjfb8856606 if (burst == NULL)
440*2d9fd380Sjfb8856606 return -1;
4414418919fSjohnjiang
442*2d9fd380Sjfb8856606 const uint64_t start = rte_rdtsc();
4434418919fSjohnjiang for (i = 0; i < iterations; i++) {
444*2d9fd380Sjfb8856606 test_ring_enqueue(r, burst, esize, 1, api_type);
445*2d9fd380Sjfb8856606 test_ring_dequeue(r, burst, esize, 1, api_type);
4464418919fSjohnjiang }
447*2d9fd380Sjfb8856606 const uint64_t end = rte_rdtsc();
4484418919fSjohnjiang
449*2d9fd380Sjfb8856606 test_ring_print_test_string(api_type, esize, 1,
450*2d9fd380Sjfb8856606 ((double)(end - start)) / iterations);
451*2d9fd380Sjfb8856606
452*2d9fd380Sjfb8856606 rte_free(burst);
453*2d9fd380Sjfb8856606
454*2d9fd380Sjfb8856606 return 0;
4554418919fSjohnjiang }
4564418919fSjohnjiang
4574418919fSjohnjiang /*
458*2d9fd380Sjfb8856606 * Test that does both enqueue and dequeue on a core using the burst/bulk API
459*2d9fd380Sjfb8856606 * calls Results should be the same as for the bulk function called on a
460*2d9fd380Sjfb8856606 * single lcore.
4614418919fSjohnjiang */
462*2d9fd380Sjfb8856606 static int
test_burst_bulk_enqueue_dequeue(struct rte_ring * r,const int esize,const unsigned int api_type)463*2d9fd380Sjfb8856606 test_burst_bulk_enqueue_dequeue(struct rte_ring *r, const int esize,
464*2d9fd380Sjfb8856606 const unsigned int api_type)
4654418919fSjohnjiang {
466*2d9fd380Sjfb8856606 const unsigned int iter_shift = 23;
467*2d9fd380Sjfb8856606 const unsigned int iterations = 1 << iter_shift;
468*2d9fd380Sjfb8856606 unsigned int sz, i = 0;
469*2d9fd380Sjfb8856606 void **burst = NULL;
4704418919fSjohnjiang
471*2d9fd380Sjfb8856606 burst = test_ring_calloc(MAX_BURST, esize);
472*2d9fd380Sjfb8856606 if (burst == NULL)
473*2d9fd380Sjfb8856606 return -1;
474*2d9fd380Sjfb8856606
475*2d9fd380Sjfb8856606 for (sz = 0; sz < RTE_DIM(bulk_sizes); sz++) {
476*2d9fd380Sjfb8856606 const uint64_t start = rte_rdtsc();
4774418919fSjohnjiang for (i = 0; i < iterations; i++) {
478*2d9fd380Sjfb8856606 test_ring_enqueue(r, burst, esize, bulk_sizes[sz],
479*2d9fd380Sjfb8856606 api_type);
480*2d9fd380Sjfb8856606 test_ring_dequeue(r, burst, esize, bulk_sizes[sz],
481*2d9fd380Sjfb8856606 api_type);
4824418919fSjohnjiang }
483*2d9fd380Sjfb8856606 const uint64_t end = rte_rdtsc();
4844418919fSjohnjiang
485*2d9fd380Sjfb8856606 test_ring_print_test_string(api_type, esize, bulk_sizes[sz],
486*2d9fd380Sjfb8856606 ((double)(end - start)) / iterations);
4874418919fSjohnjiang }
4884418919fSjohnjiang
489*2d9fd380Sjfb8856606 rte_free(burst);
490*2d9fd380Sjfb8856606
491*2d9fd380Sjfb8856606 return 0;
492*2d9fd380Sjfb8856606 }
493*2d9fd380Sjfb8856606
494*2d9fd380Sjfb8856606 /* Run all tests for a given element size */
495*2d9fd380Sjfb8856606 static __rte_always_inline int
test_ring_perf_esize(const int esize)496*2d9fd380Sjfb8856606 test_ring_perf_esize(const int esize)
4974418919fSjohnjiang {
498*2d9fd380Sjfb8856606 struct lcore_pair cores;
499*2d9fd380Sjfb8856606 struct rte_ring *r = NULL;
5004418919fSjohnjiang
501*2d9fd380Sjfb8856606 /*
502*2d9fd380Sjfb8856606 * Performance test for legacy/_elem APIs
503*2d9fd380Sjfb8856606 * SP-SC/MP-MC, single
504*2d9fd380Sjfb8856606 */
505*2d9fd380Sjfb8856606 r = test_ring_create(RING_NAME, esize, RING_SIZE, rte_socket_id(), 0);
506*2d9fd380Sjfb8856606 if (r == NULL)
507*2d9fd380Sjfb8856606 goto test_fail;
508*2d9fd380Sjfb8856606
509*2d9fd380Sjfb8856606 printf("\n### Testing single element enq/deq ###\n");
510*2d9fd380Sjfb8856606 if (test_single_enqueue_dequeue(r, esize,
511*2d9fd380Sjfb8856606 TEST_RING_THREAD_SPSC | TEST_RING_ELEM_SINGLE) < 0)
512*2d9fd380Sjfb8856606 goto test_fail;
513*2d9fd380Sjfb8856606 if (test_single_enqueue_dequeue(r, esize,
514*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC | TEST_RING_ELEM_SINGLE) < 0)
515*2d9fd380Sjfb8856606 goto test_fail;
516*2d9fd380Sjfb8856606
517*2d9fd380Sjfb8856606 printf("\n### Testing burst enq/deq ###\n");
518*2d9fd380Sjfb8856606 if (test_burst_bulk_enqueue_dequeue(r, esize,
519*2d9fd380Sjfb8856606 TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BURST) < 0)
520*2d9fd380Sjfb8856606 goto test_fail;
521*2d9fd380Sjfb8856606 if (test_burst_bulk_enqueue_dequeue(r, esize,
522*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BURST) < 0)
523*2d9fd380Sjfb8856606 goto test_fail;
524*2d9fd380Sjfb8856606
525*2d9fd380Sjfb8856606 printf("\n### Testing bulk enq/deq ###\n");
526*2d9fd380Sjfb8856606 if (test_burst_bulk_enqueue_dequeue(r, esize,
527*2d9fd380Sjfb8856606 TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK) < 0)
528*2d9fd380Sjfb8856606 goto test_fail;
529*2d9fd380Sjfb8856606 if (test_burst_bulk_enqueue_dequeue(r, esize,
530*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK) < 0)
531*2d9fd380Sjfb8856606 goto test_fail;
532*2d9fd380Sjfb8856606
533*2d9fd380Sjfb8856606 printf("\n### Testing empty bulk deq ###\n");
534*2d9fd380Sjfb8856606 test_empty_dequeue(r, esize,
535*2d9fd380Sjfb8856606 TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK);
536*2d9fd380Sjfb8856606 test_empty_dequeue(r, esize,
537*2d9fd380Sjfb8856606 TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);
538*2d9fd380Sjfb8856606
539*2d9fd380Sjfb8856606 if (get_two_hyperthreads(&cores) == 0) {
540*2d9fd380Sjfb8856606 printf("\n### Testing using two hyperthreads ###\n");
541*2d9fd380Sjfb8856606 if (run_on_core_pair(&cores, r, esize) < 0)
542*2d9fd380Sjfb8856606 goto test_fail;
5434418919fSjohnjiang }
5444418919fSjohnjiang
545*2d9fd380Sjfb8856606 if (get_two_cores(&cores) == 0) {
546*2d9fd380Sjfb8856606 printf("\n### Testing using two physical cores ###\n");
547*2d9fd380Sjfb8856606 if (run_on_core_pair(&cores, r, esize) < 0)
548*2d9fd380Sjfb8856606 goto test_fail;
5494418919fSjohnjiang }
550*2d9fd380Sjfb8856606 if (get_two_sockets(&cores) == 0) {
551*2d9fd380Sjfb8856606 printf("\n### Testing using two NUMA nodes ###\n");
552*2d9fd380Sjfb8856606 if (run_on_core_pair(&cores, r, esize) < 0)
553*2d9fd380Sjfb8856606 goto test_fail;
5544418919fSjohnjiang }
555*2d9fd380Sjfb8856606
556*2d9fd380Sjfb8856606 printf("\n### Testing using all worker nodes ###\n");
557*2d9fd380Sjfb8856606 if (run_on_all_cores(r, esize) < 0)
558*2d9fd380Sjfb8856606 goto test_fail;
559*2d9fd380Sjfb8856606
560*2d9fd380Sjfb8856606 rte_ring_free(r);
561*2d9fd380Sjfb8856606
562*2d9fd380Sjfb8856606 return 0;
563*2d9fd380Sjfb8856606
564*2d9fd380Sjfb8856606 test_fail:
565*2d9fd380Sjfb8856606 rte_ring_free(r);
566*2d9fd380Sjfb8856606
567*2d9fd380Sjfb8856606 return -1;
5684418919fSjohnjiang }
5694418919fSjohnjiang
5704418919fSjohnjiang static int
test_ring_perf(void)5714418919fSjohnjiang test_ring_perf(void)
5724418919fSjohnjiang {
573*2d9fd380Sjfb8856606 /* Run all the tests for different element sizes */
574*2d9fd380Sjfb8856606 if (test_ring_perf_esize(-1) == -1)
5754418919fSjohnjiang return -1;
5764418919fSjohnjiang
577*2d9fd380Sjfb8856606 if (test_ring_perf_esize(16) == -1)
578*2d9fd380Sjfb8856606 return -1;
5794418919fSjohnjiang
5804418919fSjohnjiang return 0;
5814418919fSjohnjiang }
5824418919fSjohnjiang
5834418919fSjohnjiang REGISTER_TEST_COMMAND(ring_perf_autotest, test_ring_perf);
584