1*2d9fd380Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause
2*2d9fd380Sjfb8856606 * Copyright(C) 2020 Marvell International Ltd.
3*2d9fd380Sjfb8856606 */
4*2d9fd380Sjfb8856606
5*2d9fd380Sjfb8856606 #include <rte_cycles.h>
6*2d9fd380Sjfb8856606 #include <rte_debug.h>
7*2d9fd380Sjfb8856606 #include <rte_eal.h>
8*2d9fd380Sjfb8856606 #include <rte_eal_trace.h>
9*2d9fd380Sjfb8856606 #include <rte_malloc.h>
10*2d9fd380Sjfb8856606 #include <rte_lcore.h>
11*2d9fd380Sjfb8856606
12*2d9fd380Sjfb8856606 #include "test.h"
13*2d9fd380Sjfb8856606 #include "test_trace.h"
14*2d9fd380Sjfb8856606
15*2d9fd380Sjfb8856606 struct test_data;
16*2d9fd380Sjfb8856606
17*2d9fd380Sjfb8856606 struct lcore_data {
18*2d9fd380Sjfb8856606 volatile bool done;
19*2d9fd380Sjfb8856606 volatile bool started;
20*2d9fd380Sjfb8856606 uint64_t total_cycles;
21*2d9fd380Sjfb8856606 uint64_t total_calls;
22*2d9fd380Sjfb8856606 } __rte_cache_aligned;
23*2d9fd380Sjfb8856606
24*2d9fd380Sjfb8856606 struct test_data {
25*2d9fd380Sjfb8856606 unsigned int nb_workers;
26*2d9fd380Sjfb8856606 struct lcore_data ldata[];
27*2d9fd380Sjfb8856606 } __rte_cache_aligned;
28*2d9fd380Sjfb8856606
29*2d9fd380Sjfb8856606 #define STEP 100
30*2d9fd380Sjfb8856606 #define CENT_OPS(OP) do { \
31*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
32*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
33*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
34*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
35*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
36*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
37*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
38*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
39*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
40*2d9fd380Sjfb8856606 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
41*2d9fd380Sjfb8856606 } while (0)
42*2d9fd380Sjfb8856606
43*2d9fd380Sjfb8856606 static void
measure_perf(const char * str,struct test_data * data)44*2d9fd380Sjfb8856606 measure_perf(const char *str, struct test_data *data)
45*2d9fd380Sjfb8856606 {
46*2d9fd380Sjfb8856606 uint64_t hz = rte_get_timer_hz();
47*2d9fd380Sjfb8856606 uint64_t total_cycles = 0;
48*2d9fd380Sjfb8856606 uint64_t total_calls = 0;
49*2d9fd380Sjfb8856606 double cycles, ns;
50*2d9fd380Sjfb8856606 unsigned int workers;
51*2d9fd380Sjfb8856606
52*2d9fd380Sjfb8856606 for (workers = 0; workers < data->nb_workers; workers++) {
53*2d9fd380Sjfb8856606 total_cycles += data->ldata[workers].total_cycles;
54*2d9fd380Sjfb8856606 total_calls += data->ldata[workers].total_calls;
55*2d9fd380Sjfb8856606 }
56*2d9fd380Sjfb8856606
57*2d9fd380Sjfb8856606 cycles = total_calls ? (double)total_cycles / (double)total_calls : 0;
58*2d9fd380Sjfb8856606 cycles /= STEP;
59*2d9fd380Sjfb8856606 cycles /= 100; /* CENT_OPS */
60*2d9fd380Sjfb8856606
61*2d9fd380Sjfb8856606 ns = (cycles / (double)hz) * 1E9;
62*2d9fd380Sjfb8856606 printf("%16s: cycles=%f ns=%f\n", str, cycles, ns);
63*2d9fd380Sjfb8856606 }
64*2d9fd380Sjfb8856606
65*2d9fd380Sjfb8856606 static void
wait_till_workers_are_ready(struct test_data * data)66*2d9fd380Sjfb8856606 wait_till_workers_are_ready(struct test_data *data)
67*2d9fd380Sjfb8856606 {
68*2d9fd380Sjfb8856606 unsigned int workers;
69*2d9fd380Sjfb8856606
70*2d9fd380Sjfb8856606 for (workers = 0; workers < data->nb_workers; workers++)
71*2d9fd380Sjfb8856606 while (!data->ldata[workers].started)
72*2d9fd380Sjfb8856606 rte_pause();
73*2d9fd380Sjfb8856606 }
74*2d9fd380Sjfb8856606
75*2d9fd380Sjfb8856606 static void
signal_workers_to_finish(struct test_data * data)76*2d9fd380Sjfb8856606 signal_workers_to_finish(struct test_data *data)
77*2d9fd380Sjfb8856606 {
78*2d9fd380Sjfb8856606 unsigned int workers;
79*2d9fd380Sjfb8856606
80*2d9fd380Sjfb8856606 for (workers = 0; workers < data->nb_workers; workers++) {
81*2d9fd380Sjfb8856606 data->ldata[workers].done = 1;
82*2d9fd380Sjfb8856606 rte_smp_wmb();
83*2d9fd380Sjfb8856606 }
84*2d9fd380Sjfb8856606 }
85*2d9fd380Sjfb8856606
86*2d9fd380Sjfb8856606 #define WORKER_DEFINE(func) \
87*2d9fd380Sjfb8856606 static void __rte_noinline \
88*2d9fd380Sjfb8856606 __worker_##func(struct lcore_data *ldata) \
89*2d9fd380Sjfb8856606 { \
90*2d9fd380Sjfb8856606 uint64_t start; \
91*2d9fd380Sjfb8856606 int i; \
92*2d9fd380Sjfb8856606 while (!ldata->done) { \
93*2d9fd380Sjfb8856606 start = rte_get_timer_cycles(); \
94*2d9fd380Sjfb8856606 for (i = 0; i < STEP; i++) \
95*2d9fd380Sjfb8856606 CENT_OPS(func); \
96*2d9fd380Sjfb8856606 ldata->total_cycles += rte_get_timer_cycles() - start; \
97*2d9fd380Sjfb8856606 ldata->total_calls++; \
98*2d9fd380Sjfb8856606 } \
99*2d9fd380Sjfb8856606 } \
100*2d9fd380Sjfb8856606 static int \
101*2d9fd380Sjfb8856606 worker_fn_##func(void *arg) \
102*2d9fd380Sjfb8856606 { \
103*2d9fd380Sjfb8856606 struct lcore_data *ldata = arg; \
104*2d9fd380Sjfb8856606 ldata->started = 1; \
105*2d9fd380Sjfb8856606 rte_smp_wmb(); \
106*2d9fd380Sjfb8856606 __worker_##func(ldata); \
107*2d9fd380Sjfb8856606 return 0; \
108*2d9fd380Sjfb8856606 }
109*2d9fd380Sjfb8856606
110*2d9fd380Sjfb8856606
111*2d9fd380Sjfb8856606 /* Test to find trace overhead */
112*2d9fd380Sjfb8856606 #define GENERIC_VOID rte_eal_trace_generic_void()
113*2d9fd380Sjfb8856606 #define GENERIC_U64 rte_eal_trace_generic_u64(0x120000)
114*2d9fd380Sjfb8856606 #define GENERIC_INT rte_eal_trace_generic_int(-34)
115*2d9fd380Sjfb8856606 #define GENERIC_FLOAT rte_eal_trace_generic_float(3.3f)
116*2d9fd380Sjfb8856606 #define GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666)
117*2d9fd380Sjfb8856606 #define GENERIC_STR rte_eal_trace_generic_str("hello world")
118*2d9fd380Sjfb8856606 #define VOID_FP app_dpdk_test_fp()
119*2d9fd380Sjfb8856606
120*2d9fd380Sjfb8856606 WORKER_DEFINE(GENERIC_VOID)
WORKER_DEFINE(GENERIC_U64)121*2d9fd380Sjfb8856606 WORKER_DEFINE(GENERIC_U64)
122*2d9fd380Sjfb8856606 WORKER_DEFINE(GENERIC_INT)
123*2d9fd380Sjfb8856606 WORKER_DEFINE(GENERIC_FLOAT)
124*2d9fd380Sjfb8856606 WORKER_DEFINE(GENERIC_DOUBLE)
125*2d9fd380Sjfb8856606 WORKER_DEFINE(GENERIC_STR)
126*2d9fd380Sjfb8856606 WORKER_DEFINE(VOID_FP)
127*2d9fd380Sjfb8856606
128*2d9fd380Sjfb8856606 static void
129*2d9fd380Sjfb8856606 run_test(const char *str, lcore_function_t f, struct test_data *data, size_t sz)
130*2d9fd380Sjfb8856606 {
131*2d9fd380Sjfb8856606 unsigned int id, worker = 0;
132*2d9fd380Sjfb8856606
133*2d9fd380Sjfb8856606 memset(data, 0, sz);
134*2d9fd380Sjfb8856606 data->nb_workers = rte_lcore_count() - 1;
135*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(id)
136*2d9fd380Sjfb8856606 rte_eal_remote_launch(f, &data->ldata[worker++], id);
137*2d9fd380Sjfb8856606
138*2d9fd380Sjfb8856606 wait_till_workers_are_ready(data);
139*2d9fd380Sjfb8856606 rte_delay_ms(100); /* Wait for some time to accumulate the stats */
140*2d9fd380Sjfb8856606 measure_perf(str, data);
141*2d9fd380Sjfb8856606 signal_workers_to_finish(data);
142*2d9fd380Sjfb8856606
143*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(id)
144*2d9fd380Sjfb8856606 rte_eal_wait_lcore(id);
145*2d9fd380Sjfb8856606 }
146*2d9fd380Sjfb8856606
147*2d9fd380Sjfb8856606 static int
test_trace_perf(void)148*2d9fd380Sjfb8856606 test_trace_perf(void)
149*2d9fd380Sjfb8856606 {
150*2d9fd380Sjfb8856606 unsigned int nb_cores, nb_workers;
151*2d9fd380Sjfb8856606 struct test_data *data;
152*2d9fd380Sjfb8856606 size_t sz;
153*2d9fd380Sjfb8856606
154*2d9fd380Sjfb8856606 nb_cores = rte_lcore_count();
155*2d9fd380Sjfb8856606 nb_workers = nb_cores - 1;
156*2d9fd380Sjfb8856606 if (nb_cores < 2) {
157*2d9fd380Sjfb8856606 printf("Need minimum two cores for testing\n");
158*2d9fd380Sjfb8856606 return TEST_SKIPPED;
159*2d9fd380Sjfb8856606 }
160*2d9fd380Sjfb8856606
161*2d9fd380Sjfb8856606 printf("Timer running at %5.2fMHz\n", rte_get_timer_hz()/1E6);
162*2d9fd380Sjfb8856606 sz = sizeof(struct test_data);
163*2d9fd380Sjfb8856606 sz += nb_workers * sizeof(struct lcore_data);
164*2d9fd380Sjfb8856606
165*2d9fd380Sjfb8856606 data = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE);
166*2d9fd380Sjfb8856606 if (data == NULL) {
167*2d9fd380Sjfb8856606 printf("Failed to allocate memory\n");
168*2d9fd380Sjfb8856606 return TEST_FAILED;
169*2d9fd380Sjfb8856606 }
170*2d9fd380Sjfb8856606
171*2d9fd380Sjfb8856606 run_test("void", worker_fn_GENERIC_VOID, data, sz);
172*2d9fd380Sjfb8856606 run_test("u64", worker_fn_GENERIC_U64, data, sz);
173*2d9fd380Sjfb8856606 run_test("int", worker_fn_GENERIC_INT, data, sz);
174*2d9fd380Sjfb8856606 run_test("float", worker_fn_GENERIC_FLOAT, data, sz);
175*2d9fd380Sjfb8856606 run_test("double", worker_fn_GENERIC_DOUBLE, data, sz);
176*2d9fd380Sjfb8856606 run_test("string", worker_fn_GENERIC_STR, data, sz);
177*2d9fd380Sjfb8856606 run_test("void_fp", worker_fn_VOID_FP, data, sz);
178*2d9fd380Sjfb8856606
179*2d9fd380Sjfb8856606 rte_free(data);
180*2d9fd380Sjfb8856606 return TEST_SUCCESS;
181*2d9fd380Sjfb8856606 }
182*2d9fd380Sjfb8856606
183*2d9fd380Sjfb8856606 REGISTER_TEST_COMMAND(trace_perf_autotest, test_trace_perf);
184