1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2018 Arm Limited
3 */
4
5 #include <stdio.h>
6 #include <stdbool.h>
7 #include <inttypes.h>
8 #include <rte_pause.h>
9 #include <rte_rcu_qsbr.h>
10 #include <rte_hash.h>
11 #include <rte_hash_crc.h>
12 #include <rte_malloc.h>
13 #include <rte_cycles.h>
14 #include <unistd.h>
15
16 #include "test.h"
17
18 /* Check condition and return an error if true. */
19 static uint16_t enabled_core_ids[RTE_MAX_LCORE];
20 static unsigned int num_cores;
21
22 static uint32_t *keys;
23 #define TOTAL_ENTRY (1024 * 8)
24 #define COUNTER_VALUE 4096
25 static uint32_t *hash_data[TOTAL_ENTRY];
26 static volatile uint8_t writer_done;
27 static volatile uint8_t all_registered;
28 static volatile uint32_t thr_id;
29
30 static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];
31 static struct rte_hash *h;
32 static char hash_name[8];
33 static rte_atomic64_t updates, checks;
34 static rte_atomic64_t update_cycles, check_cycles;
35
36 /* Scale down results to 1000 operations to support lower
37 * granularity clocks.
38 */
39 #define RCU_SCALE_DOWN 1000
40
41 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */
42 static inline uint32_t
alloc_thread_id(void)43 alloc_thread_id(void)
44 {
45 uint32_t tmp_thr_id;
46
47 tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED);
48 if (tmp_thr_id >= RTE_MAX_LCORE)
49 printf("Invalid thread id %u\n", tmp_thr_id);
50
51 return tmp_thr_id;
52 }
53
54 static int
test_rcu_qsbr_reader_perf(void * arg)55 test_rcu_qsbr_reader_perf(void *arg)
56 {
57 bool writer_present = (bool)arg;
58 uint32_t thread_id = alloc_thread_id();
59 uint64_t loop_cnt = 0;
60 uint64_t begin, cycles;
61
62 /* Register for report QS */
63 rte_rcu_qsbr_thread_register(t[0], thread_id);
64 /* Make the thread online */
65 rte_rcu_qsbr_thread_online(t[0], thread_id);
66
67 begin = rte_rdtsc_precise();
68
69 if (writer_present) {
70 while (!writer_done) {
71 /* Update quiescent state counter */
72 rte_rcu_qsbr_quiescent(t[0], thread_id);
73 loop_cnt++;
74 }
75 } else {
76 while (loop_cnt < 100000000) {
77 /* Update quiescent state counter */
78 rte_rcu_qsbr_quiescent(t[0], thread_id);
79 loop_cnt++;
80 }
81 }
82
83 cycles = rte_rdtsc_precise() - begin;
84 rte_atomic64_add(&update_cycles, cycles);
85 rte_atomic64_add(&updates, loop_cnt);
86
87 /* Make the thread offline */
88 rte_rcu_qsbr_thread_offline(t[0], thread_id);
89 /* Unregister before exiting to avoid writer from waiting */
90 rte_rcu_qsbr_thread_unregister(t[0], thread_id);
91
92 return 0;
93 }
94
95 static int
test_rcu_qsbr_writer_perf(void * arg)96 test_rcu_qsbr_writer_perf(void *arg)
97 {
98 bool wait = (bool)arg;
99 uint64_t token = 0;
100 uint64_t loop_cnt = 0;
101 uint64_t begin, cycles;
102
103 begin = rte_rdtsc_precise();
104
105 do {
106 /* Start the quiescent state query process */
107 if (wait)
108 token = rte_rcu_qsbr_start(t[0]);
109
110 /* Check quiescent state status */
111 rte_rcu_qsbr_check(t[0], token, wait);
112 loop_cnt++;
113 } while (loop_cnt < 20000000);
114
115 cycles = rte_rdtsc_precise() - begin;
116 rte_atomic64_add(&check_cycles, cycles);
117 rte_atomic64_add(&checks, loop_cnt);
118 return 0;
119 }
120
121 /*
122 * Perf test: Reader/writer
123 * Single writer, Multiple Readers, Single QS var, Non-Blocking rcu_qsbr_check
124 */
125 static int
test_rcu_qsbr_perf(void)126 test_rcu_qsbr_perf(void)
127 {
128 size_t sz;
129 unsigned int i, tmp_num_cores;
130
131 writer_done = 0;
132
133 rte_atomic64_clear(&updates);
134 rte_atomic64_clear(&update_cycles);
135 rte_atomic64_clear(&checks);
136 rte_atomic64_clear(&check_cycles);
137
138 printf("\nPerf Test: %d Readers/1 Writer('wait' in qsbr_check == true)\n",
139 num_cores - 1);
140
141 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
142
143 if (all_registered == 1)
144 tmp_num_cores = num_cores - 1;
145 else
146 tmp_num_cores = RTE_MAX_LCORE;
147
148 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
149 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
150 RTE_CACHE_LINE_SIZE);
151 /* QS variable is initialized */
152 rte_rcu_qsbr_init(t[0], tmp_num_cores);
153
154 /* Reader threads are launched */
155 for (i = 0; i < num_cores - 1; i++)
156 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, (void *)1,
157 enabled_core_ids[i]);
158
159 /* Writer thread is launched */
160 rte_eal_remote_launch(test_rcu_qsbr_writer_perf,
161 (void *)1, enabled_core_ids[i]);
162
163 /* Wait for the writer thread */
164 rte_eal_wait_lcore(enabled_core_ids[i]);
165 writer_done = 1;
166
167 /* Wait until all readers have exited */
168 rte_eal_mp_wait_lcore();
169
170 printf("Total quiescent state updates = %"PRIi64"\n",
171 rte_atomic64_read(&updates));
172 printf("Cycles per %d quiescent state updates: %"PRIi64"\n",
173 RCU_SCALE_DOWN,
174 rte_atomic64_read(&update_cycles) /
175 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN));
176 printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks));
177 printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
178 rte_atomic64_read(&check_cycles) /
179 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN));
180
181 rte_free(t[0]);
182
183 return 0;
184 }
185
186 /*
187 * Perf test: Readers
188 * Single writer, Multiple readers, Single QS variable
189 */
190 static int
test_rcu_qsbr_rperf(void)191 test_rcu_qsbr_rperf(void)
192 {
193 size_t sz;
194 unsigned int i, tmp_num_cores;
195
196 rte_atomic64_clear(&updates);
197 rte_atomic64_clear(&update_cycles);
198
199 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
200
201 printf("\nPerf Test: %d Readers\n", num_cores);
202
203 if (all_registered == 1)
204 tmp_num_cores = num_cores;
205 else
206 tmp_num_cores = RTE_MAX_LCORE;
207
208 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
209 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
210 RTE_CACHE_LINE_SIZE);
211 /* QS variable is initialized */
212 rte_rcu_qsbr_init(t[0], tmp_num_cores);
213
214 /* Reader threads are launched */
215 for (i = 0; i < num_cores; i++)
216 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, NULL,
217 enabled_core_ids[i]);
218
219 /* Wait until all readers have exited */
220 rte_eal_mp_wait_lcore();
221
222 printf("Total quiescent state updates = %"PRIi64"\n",
223 rte_atomic64_read(&updates));
224 printf("Cycles per %d quiescent state updates: %"PRIi64"\n",
225 RCU_SCALE_DOWN,
226 rte_atomic64_read(&update_cycles) /
227 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN));
228
229 rte_free(t[0]);
230
231 return 0;
232 }
233
234 /*
235 * Perf test:
236 * Multiple writer, Single QS variable, Non-blocking rcu_qsbr_check
237 */
238 static int
test_rcu_qsbr_wperf(void)239 test_rcu_qsbr_wperf(void)
240 {
241 size_t sz;
242 unsigned int i;
243
244 rte_atomic64_clear(&checks);
245 rte_atomic64_clear(&check_cycles);
246
247 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
248
249 printf("\nPerf test: %d Writers ('wait' in qsbr_check == false)\n",
250 num_cores);
251
252 /* Number of readers does not matter for QS variable in this test
253 * case as no reader will be registered.
254 */
255 sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE);
256 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
257 RTE_CACHE_LINE_SIZE);
258 /* QS variable is initialized */
259 rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);
260
261 /* Writer threads are launched */
262 for (i = 0; i < num_cores; i++)
263 rte_eal_remote_launch(test_rcu_qsbr_writer_perf,
264 (void *)0, enabled_core_ids[i]);
265
266 /* Wait until all readers have exited */
267 rte_eal_mp_wait_lcore();
268
269 printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks));
270 printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
271 rte_atomic64_read(&check_cycles) /
272 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN));
273
274 rte_free(t[0]);
275
276 return 0;
277 }
278
279 /*
280 * RCU test cases using rte_hash data structure.
281 */
282 static int
test_rcu_qsbr_hash_reader(void * arg)283 test_rcu_qsbr_hash_reader(void *arg)
284 {
285 struct rte_rcu_qsbr *temp;
286 struct rte_hash *hash = NULL;
287 int i;
288 uint64_t loop_cnt = 0;
289 uint64_t begin, cycles;
290 uint32_t thread_id = alloc_thread_id();
291 uint8_t read_type = (uint8_t)((uintptr_t)arg);
292 uint32_t *pdata;
293
294 temp = t[read_type];
295 hash = h;
296
297 rte_rcu_qsbr_thread_register(temp, thread_id);
298
299 begin = rte_rdtsc_precise();
300
301 do {
302 rte_rcu_qsbr_thread_online(temp, thread_id);
303 for (i = 0; i < TOTAL_ENTRY; i++) {
304 rte_rcu_qsbr_lock(temp, thread_id);
305 if (rte_hash_lookup_data(hash, keys + i,
306 (void **)&pdata) != -ENOENT) {
307 pdata[thread_id] = 0;
308 while (pdata[thread_id] < COUNTER_VALUE)
309 pdata[thread_id]++;
310 }
311 rte_rcu_qsbr_unlock(temp, thread_id);
312 }
313 /* Update quiescent state counter */
314 rte_rcu_qsbr_quiescent(temp, thread_id);
315 rte_rcu_qsbr_thread_offline(temp, thread_id);
316 loop_cnt++;
317 } while (!writer_done);
318
319 cycles = rte_rdtsc_precise() - begin;
320 rte_atomic64_add(&update_cycles, cycles);
321 rte_atomic64_add(&updates, loop_cnt);
322
323 rte_rcu_qsbr_thread_unregister(temp, thread_id);
324
325 return 0;
326 }
327
init_hash(void)328 static struct rte_hash *init_hash(void)
329 {
330 int i;
331 struct rte_hash *hash = NULL;
332
333 snprintf(hash_name, 8, "hash");
334 struct rte_hash_parameters hash_params = {
335 .entries = TOTAL_ENTRY,
336 .key_len = sizeof(uint32_t),
337 .hash_func_init_val = 0,
338 .socket_id = rte_socket_id(),
339 .hash_func = rte_hash_crc,
340 .extra_flag =
341 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF,
342 .name = hash_name,
343 };
344
345 hash = rte_hash_create(&hash_params);
346 if (hash == NULL) {
347 printf("Hash create Failed\n");
348 return NULL;
349 }
350
351 for (i = 0; i < TOTAL_ENTRY; i++) {
352 hash_data[i] = rte_zmalloc(NULL,
353 sizeof(uint32_t) * RTE_MAX_LCORE, 0);
354 if (hash_data[i] == NULL) {
355 printf("No memory\n");
356 return NULL;
357 }
358 }
359 keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
360 if (keys == NULL) {
361 printf("No memory\n");
362 return NULL;
363 }
364
365 for (i = 0; i < TOTAL_ENTRY; i++)
366 keys[i] = i;
367
368 for (i = 0; i < TOTAL_ENTRY; i++) {
369 if (rte_hash_add_key_data(hash, keys + i,
370 (void *)((uintptr_t)hash_data[i])) < 0) {
371 printf("Hash key add Failed #%d\n", i);
372 return NULL;
373 }
374 }
375 return hash;
376 }
377
378 /*
379 * Functional test:
380 * Single writer, Single QS variable Single QSBR query, Blocking rcu_qsbr_check
381 */
382 static int
test_rcu_qsbr_sw_sv_1qs(void)383 test_rcu_qsbr_sw_sv_1qs(void)
384 {
385 uint64_t token, begin, cycles;
386 size_t sz;
387 unsigned int i, j, tmp_num_cores;
388 int32_t pos;
389
390 writer_done = 0;
391
392 rte_atomic64_clear(&updates);
393 rte_atomic64_clear(&update_cycles);
394 rte_atomic64_clear(&checks);
395 rte_atomic64_clear(&check_cycles);
396
397 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
398
399 printf("\nPerf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Blocking QSBR Check\n", num_cores);
400
401 if (all_registered == 1)
402 tmp_num_cores = num_cores;
403 else
404 tmp_num_cores = RTE_MAX_LCORE;
405
406 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
407 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
408 RTE_CACHE_LINE_SIZE);
409 /* QS variable is initialized */
410 rte_rcu_qsbr_init(t[0], tmp_num_cores);
411
412 /* Shared data structure created */
413 h = init_hash();
414 if (h == NULL) {
415 printf("Hash init failed\n");
416 goto error;
417 }
418
419 /* Reader threads are launched */
420 for (i = 0; i < num_cores; i++)
421 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL,
422 enabled_core_ids[i]);
423
424 begin = rte_rdtsc_precise();
425
426 for (i = 0; i < TOTAL_ENTRY; i++) {
427 /* Delete elements from the shared data structure */
428 pos = rte_hash_del_key(h, keys + i);
429 if (pos < 0) {
430 printf("Delete key failed #%d\n", keys[i]);
431 goto error;
432 }
433 /* Start the quiescent state query process */
434 token = rte_rcu_qsbr_start(t[0]);
435
436 /* Check the quiescent state status */
437 rte_rcu_qsbr_check(t[0], token, true);
438 for (j = 0; j < tmp_num_cores; j++) {
439 if (hash_data[i][j] != COUNTER_VALUE &&
440 hash_data[i][j] != 0) {
441 printf("Reader thread ID %u did not complete #%d = %d\n",
442 j, i, hash_data[i][j]);
443 goto error;
444 }
445 }
446
447 if (rte_hash_free_key_with_position(h, pos) < 0) {
448 printf("Failed to free the key #%d\n", keys[i]);
449 goto error;
450 }
451 rte_free(hash_data[i]);
452 hash_data[i] = NULL;
453 }
454
455 cycles = rte_rdtsc_precise() - begin;
456 rte_atomic64_add(&check_cycles, cycles);
457 rte_atomic64_add(&checks, i);
458
459 writer_done = 1;
460
461 /* Wait and check return value from reader threads */
462 for (i = 0; i < num_cores; i++)
463 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
464 goto error;
465 rte_hash_free(h);
466 rte_free(keys);
467
468 printf("Following numbers include calls to rte_hash functions\n");
469 printf("Cycles per 1 quiescent state update(online/update/offline): %"PRIi64"\n",
470 rte_atomic64_read(&update_cycles) /
471 rte_atomic64_read(&updates));
472
473 printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
474 rte_atomic64_read(&check_cycles) /
475 rte_atomic64_read(&checks));
476
477 rte_free(t[0]);
478
479 return 0;
480
481 error:
482 writer_done = 1;
483 /* Wait until all readers have exited */
484 rte_eal_mp_wait_lcore();
485
486 rte_hash_free(h);
487 rte_free(keys);
488 for (i = 0; i < TOTAL_ENTRY; i++)
489 rte_free(hash_data[i]);
490
491 rte_free(t[0]);
492
493 return -1;
494 }
495
496 /*
497 * Functional test:
498 * Single writer, Single QS variable, Single QSBR query,
499 * Non-blocking rcu_qsbr_check
500 */
501 static int
test_rcu_qsbr_sw_sv_1qs_non_blocking(void)502 test_rcu_qsbr_sw_sv_1qs_non_blocking(void)
503 {
504 uint64_t token, begin, cycles;
505 int ret;
506 size_t sz;
507 unsigned int i, j, tmp_num_cores;
508 int32_t pos;
509
510 writer_done = 0;
511
512 printf("Perf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Non-Blocking QSBR check\n", num_cores);
513
514 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
515
516 if (all_registered == 1)
517 tmp_num_cores = num_cores;
518 else
519 tmp_num_cores = RTE_MAX_LCORE;
520
521 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
522 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
523 RTE_CACHE_LINE_SIZE);
524 /* QS variable is initialized */
525 rte_rcu_qsbr_init(t[0], tmp_num_cores);
526
527 /* Shared data structure created */
528 h = init_hash();
529 if (h == NULL) {
530 printf("Hash init failed\n");
531 goto error;
532 }
533
534 /* Reader threads are launched */
535 for (i = 0; i < num_cores; i++)
536 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL,
537 enabled_core_ids[i]);
538
539 begin = rte_rdtsc_precise();
540
541 for (i = 0; i < TOTAL_ENTRY; i++) {
542 /* Delete elements from the shared data structure */
543 pos = rte_hash_del_key(h, keys + i);
544 if (pos < 0) {
545 printf("Delete key failed #%d\n", keys[i]);
546 goto error;
547 }
548 /* Start the quiescent state query process */
549 token = rte_rcu_qsbr_start(t[0]);
550
551 /* Check the quiescent state status */
552 do {
553 ret = rte_rcu_qsbr_check(t[0], token, false);
554 } while (ret == 0);
555 for (j = 0; j < tmp_num_cores; j++) {
556 if (hash_data[i][j] != COUNTER_VALUE &&
557 hash_data[i][j] != 0) {
558 printf("Reader thread ID %u did not complete #%d = %d\n",
559 j, i, hash_data[i][j]);
560 goto error;
561 }
562 }
563
564 if (rte_hash_free_key_with_position(h, pos) < 0) {
565 printf("Failed to free the key #%d\n", keys[i]);
566 goto error;
567 }
568 rte_free(hash_data[i]);
569 hash_data[i] = NULL;
570 }
571
572 cycles = rte_rdtsc_precise() - begin;
573 rte_atomic64_add(&check_cycles, cycles);
574 rte_atomic64_add(&checks, i);
575
576 writer_done = 1;
577 /* Wait and check return value from reader threads */
578 for (i = 0; i < num_cores; i++)
579 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
580 goto error;
581 rte_hash_free(h);
582 rte_free(keys);
583
584 printf("Following numbers include calls to rte_hash functions\n");
585 printf("Cycles per 1 quiescent state update(online/update/offline): %"PRIi64"\n",
586 rte_atomic64_read(&update_cycles) /
587 rte_atomic64_read(&updates));
588
589 printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
590 rte_atomic64_read(&check_cycles) /
591 rte_atomic64_read(&checks));
592
593 rte_free(t[0]);
594
595 return 0;
596
597 error:
598 writer_done = 1;
599 /* Wait until all readers have exited */
600 rte_eal_mp_wait_lcore();
601
602 rte_hash_free(h);
603 rte_free(keys);
604 for (i = 0; i < TOTAL_ENTRY; i++)
605 rte_free(hash_data[i]);
606
607 rte_free(t[0]);
608
609 return -1;
610 }
611
612 static int
test_rcu_qsbr_main(void)613 test_rcu_qsbr_main(void)
614 {
615 uint16_t core_id;
616
617 if (rte_lcore_count() < 3) {
618 printf("Not enough cores for rcu_qsbr_perf_autotest, expecting at least 3\n");
619 return TEST_SKIPPED;
620 }
621
622 rte_atomic64_init(&updates);
623 rte_atomic64_init(&update_cycles);
624 rte_atomic64_init(&checks);
625 rte_atomic64_init(&check_cycles);
626
627 num_cores = 0;
628 RTE_LCORE_FOREACH_WORKER(core_id) {
629 enabled_core_ids[num_cores] = core_id;
630 num_cores++;
631 }
632
633 printf("Number of cores provided = %d\n", num_cores);
634 printf("Perf test with all reader threads registered\n");
635 printf("--------------------------------------------\n");
636 all_registered = 1;
637
638 if (test_rcu_qsbr_perf() < 0)
639 goto test_fail;
640
641 if (test_rcu_qsbr_rperf() < 0)
642 goto test_fail;
643
644 if (test_rcu_qsbr_wperf() < 0)
645 goto test_fail;
646
647 if (test_rcu_qsbr_sw_sv_1qs() < 0)
648 goto test_fail;
649
650 if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0)
651 goto test_fail;
652
653 /* Make sure the actual number of cores provided is less than
654 * RTE_MAX_LCORE. This will allow for some threads not
655 * to be registered on the QS variable.
656 */
657 if (num_cores >= RTE_MAX_LCORE) {
658 printf("Test failed! number of cores provided should be less than %d\n",
659 RTE_MAX_LCORE);
660 goto test_fail;
661 }
662
663 printf("Perf test with some of reader threads registered\n");
664 printf("------------------------------------------------\n");
665 all_registered = 0;
666
667 if (test_rcu_qsbr_perf() < 0)
668 goto test_fail;
669
670 if (test_rcu_qsbr_rperf() < 0)
671 goto test_fail;
672
673 if (test_rcu_qsbr_wperf() < 0)
674 goto test_fail;
675
676 if (test_rcu_qsbr_sw_sv_1qs() < 0)
677 goto test_fail;
678
679 if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0)
680 goto test_fail;
681
682 printf("\n");
683
684 return 0;
685
686 test_fail:
687 return -1;
688 }
689
690 REGISTER_TEST_COMMAND(rcu_qsbr_perf_autotest, test_rcu_qsbr_main);
691