14418919fSjohnjiang /* SPDX-License-Identifier: BSD-3-Clause
24418919fSjohnjiang * Copyright(c) 2010-2014 Intel Corporation
34418919fSjohnjiang */
44418919fSjohnjiang
54418919fSjohnjiang #include <string.h>
64418919fSjohnjiang #include <stdio.h>
74418919fSjohnjiang #include <stdlib.h>
84418919fSjohnjiang #include <stdint.h>
94418919fSjohnjiang #include <inttypes.h>
104418919fSjohnjiang #include <stdarg.h>
114418919fSjohnjiang #include <errno.h>
124418919fSjohnjiang #include <sys/queue.h>
134418919fSjohnjiang
144418919fSjohnjiang #include <rte_common.h>
154418919fSjohnjiang #include <rte_log.h>
164418919fSjohnjiang #include <rte_debug.h>
174418919fSjohnjiang #include <rte_memory.h>
184418919fSjohnjiang #include <rte_launch.h>
194418919fSjohnjiang #include <rte_cycles.h>
204418919fSjohnjiang #include <rte_eal.h>
214418919fSjohnjiang #include <rte_per_lcore.h>
224418919fSjohnjiang #include <rte_lcore.h>
234418919fSjohnjiang #include <rte_atomic.h>
244418919fSjohnjiang #include <rte_branch_prediction.h>
254418919fSjohnjiang #include <rte_mempool.h>
264418919fSjohnjiang #include <rte_spinlock.h>
274418919fSjohnjiang #include <rte_malloc.h>
284418919fSjohnjiang #include <rte_mbuf_pool_ops.h>
294418919fSjohnjiang
304418919fSjohnjiang #include "test.h"
314418919fSjohnjiang
324418919fSjohnjiang /*
334418919fSjohnjiang * Mempool performance
344418919fSjohnjiang * =======
354418919fSjohnjiang *
364418919fSjohnjiang * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
374418919fSjohnjiang * objects are put back in the pool per bulk of *n_put_bulk*.
384418919fSjohnjiang *
394418919fSjohnjiang * This sequence is done during TIME_S seconds.
404418919fSjohnjiang *
414418919fSjohnjiang * This test is done on the following configurations:
424418919fSjohnjiang *
434418919fSjohnjiang * - Cores configuration (*cores*)
444418919fSjohnjiang *
454418919fSjohnjiang * - One core with cache
464418919fSjohnjiang * - Two cores with cache
474418919fSjohnjiang * - Max. cores with cache
484418919fSjohnjiang * - One core without cache
494418919fSjohnjiang * - Two cores without cache
504418919fSjohnjiang * - Max. cores without cache
514418919fSjohnjiang * - One core with user-owned cache
524418919fSjohnjiang * - Two cores with user-owned cache
534418919fSjohnjiang * - Max. cores with user-owned cache
544418919fSjohnjiang *
554418919fSjohnjiang * - Bulk size (*n_get_bulk*, *n_put_bulk*)
564418919fSjohnjiang *
574418919fSjohnjiang * - Bulk get from 1 to 32
584418919fSjohnjiang * - Bulk put from 1 to 32
594418919fSjohnjiang *
604418919fSjohnjiang * - Number of kept objects (*n_keep*)
614418919fSjohnjiang *
624418919fSjohnjiang * - 32
634418919fSjohnjiang * - 128
644418919fSjohnjiang */
654418919fSjohnjiang
664418919fSjohnjiang #define N 65536
674418919fSjohnjiang #define TIME_S 5
684418919fSjohnjiang #define MEMPOOL_ELT_SIZE 2048
694418919fSjohnjiang #define MAX_KEEP 128
704418919fSjohnjiang #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
714418919fSjohnjiang
724418919fSjohnjiang #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__)
734418919fSjohnjiang #define RET_ERR() do { \
744418919fSjohnjiang LOG_ERR(); \
754418919fSjohnjiang return -1; \
764418919fSjohnjiang } while (0)
774418919fSjohnjiang #define GOTO_ERR(var, label) do { \
784418919fSjohnjiang LOG_ERR(); \
794418919fSjohnjiang var = -1; \
804418919fSjohnjiang goto label; \
814418919fSjohnjiang } while (0)
824418919fSjohnjiang
834418919fSjohnjiang static int use_external_cache;
844418919fSjohnjiang static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
854418919fSjohnjiang
864418919fSjohnjiang static rte_atomic32_t synchro;
874418919fSjohnjiang
884418919fSjohnjiang /* number of objects in one bulk operation (get or put) */
894418919fSjohnjiang static unsigned n_get_bulk;
904418919fSjohnjiang static unsigned n_put_bulk;
914418919fSjohnjiang
924418919fSjohnjiang /* number of objects retrived from mempool before putting them back */
934418919fSjohnjiang static unsigned n_keep;
944418919fSjohnjiang
954418919fSjohnjiang /* number of enqueues / dequeues */
964418919fSjohnjiang struct mempool_test_stats {
974418919fSjohnjiang uint64_t enq_count;
984418919fSjohnjiang } __rte_cache_aligned;
994418919fSjohnjiang
1004418919fSjohnjiang static struct mempool_test_stats stats[RTE_MAX_LCORE];
1014418919fSjohnjiang
1024418919fSjohnjiang /*
1034418919fSjohnjiang * save the object number in the first 4 bytes of object data. All
1044418919fSjohnjiang * other bytes are set to 0.
1054418919fSjohnjiang */
1064418919fSjohnjiang static void
my_obj_init(struct rte_mempool * mp,__rte_unused void * arg,void * obj,unsigned i)107*2d9fd380Sjfb8856606 my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
1084418919fSjohnjiang void *obj, unsigned i)
1094418919fSjohnjiang {
1104418919fSjohnjiang uint32_t *objnum = obj;
1114418919fSjohnjiang memset(obj, 0, mp->elt_size);
1124418919fSjohnjiang *objnum = i;
1134418919fSjohnjiang }
1144418919fSjohnjiang
1154418919fSjohnjiang static int
per_lcore_mempool_test(void * arg)1164418919fSjohnjiang per_lcore_mempool_test(void *arg)
1174418919fSjohnjiang {
1184418919fSjohnjiang void *obj_table[MAX_KEEP];
1194418919fSjohnjiang unsigned i, idx;
1204418919fSjohnjiang struct rte_mempool *mp = arg;
1214418919fSjohnjiang unsigned lcore_id = rte_lcore_id();
1224418919fSjohnjiang int ret = 0;
1234418919fSjohnjiang uint64_t start_cycles, end_cycles;
1244418919fSjohnjiang uint64_t time_diff = 0, hz = rte_get_timer_hz();
1254418919fSjohnjiang struct rte_mempool_cache *cache;
1264418919fSjohnjiang
1274418919fSjohnjiang if (use_external_cache) {
1284418919fSjohnjiang /* Create a user-owned mempool cache. */
1294418919fSjohnjiang cache = rte_mempool_cache_create(external_cache_size,
1304418919fSjohnjiang SOCKET_ID_ANY);
1314418919fSjohnjiang if (cache == NULL)
1324418919fSjohnjiang RET_ERR();
1334418919fSjohnjiang } else {
1344418919fSjohnjiang /* May be NULL if cache is disabled. */
1354418919fSjohnjiang cache = rte_mempool_default_cache(mp, lcore_id);
1364418919fSjohnjiang }
1374418919fSjohnjiang
1384418919fSjohnjiang /* n_get_bulk and n_put_bulk must be divisors of n_keep */
1394418919fSjohnjiang if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
1404418919fSjohnjiang GOTO_ERR(ret, out);
1414418919fSjohnjiang if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
1424418919fSjohnjiang GOTO_ERR(ret, out);
1434418919fSjohnjiang
1444418919fSjohnjiang stats[lcore_id].enq_count = 0;
1454418919fSjohnjiang
146*2d9fd380Sjfb8856606 /* wait synchro for workers */
147*2d9fd380Sjfb8856606 if (lcore_id != rte_get_main_lcore())
1484418919fSjohnjiang while (rte_atomic32_read(&synchro) == 0);
1494418919fSjohnjiang
1504418919fSjohnjiang start_cycles = rte_get_timer_cycles();
1514418919fSjohnjiang
1524418919fSjohnjiang while (time_diff/hz < TIME_S) {
1534418919fSjohnjiang for (i = 0; likely(i < (N/n_keep)); i++) {
1544418919fSjohnjiang /* get n_keep objects by bulk of n_bulk */
1554418919fSjohnjiang idx = 0;
1564418919fSjohnjiang while (idx < n_keep) {
1574418919fSjohnjiang ret = rte_mempool_generic_get(mp,
1584418919fSjohnjiang &obj_table[idx],
1594418919fSjohnjiang n_get_bulk,
1604418919fSjohnjiang cache);
1614418919fSjohnjiang if (unlikely(ret < 0)) {
1624418919fSjohnjiang rte_mempool_dump(stdout, mp);
1634418919fSjohnjiang /* in this case, objects are lost... */
1644418919fSjohnjiang GOTO_ERR(ret, out);
1654418919fSjohnjiang }
1664418919fSjohnjiang idx += n_get_bulk;
1674418919fSjohnjiang }
1684418919fSjohnjiang
1694418919fSjohnjiang /* put the objects back */
1704418919fSjohnjiang idx = 0;
1714418919fSjohnjiang while (idx < n_keep) {
1724418919fSjohnjiang rte_mempool_generic_put(mp, &obj_table[idx],
1734418919fSjohnjiang n_put_bulk,
1744418919fSjohnjiang cache);
1754418919fSjohnjiang idx += n_put_bulk;
1764418919fSjohnjiang }
1774418919fSjohnjiang }
1784418919fSjohnjiang end_cycles = rte_get_timer_cycles();
1794418919fSjohnjiang time_diff = end_cycles - start_cycles;
1804418919fSjohnjiang stats[lcore_id].enq_count += N;
1814418919fSjohnjiang }
1824418919fSjohnjiang
1834418919fSjohnjiang out:
1844418919fSjohnjiang if (use_external_cache) {
1854418919fSjohnjiang rte_mempool_cache_flush(cache, mp);
1864418919fSjohnjiang rte_mempool_cache_free(cache);
1874418919fSjohnjiang }
1884418919fSjohnjiang
1894418919fSjohnjiang return ret;
1904418919fSjohnjiang }
1914418919fSjohnjiang
1924418919fSjohnjiang /* launch all the per-lcore test, and display the result */
1934418919fSjohnjiang static int
launch_cores(struct rte_mempool * mp,unsigned int cores)1944418919fSjohnjiang launch_cores(struct rte_mempool *mp, unsigned int cores)
1954418919fSjohnjiang {
1964418919fSjohnjiang unsigned lcore_id;
1974418919fSjohnjiang uint64_t rate;
1984418919fSjohnjiang int ret;
1994418919fSjohnjiang unsigned cores_save = cores;
2004418919fSjohnjiang
2014418919fSjohnjiang rte_atomic32_set(&synchro, 0);
2024418919fSjohnjiang
2034418919fSjohnjiang /* reset stats */
2044418919fSjohnjiang memset(stats, 0, sizeof(stats));
2054418919fSjohnjiang
2064418919fSjohnjiang printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
2074418919fSjohnjiang "n_put_bulk=%u n_keep=%u ",
2084418919fSjohnjiang use_external_cache ?
2094418919fSjohnjiang external_cache_size : (unsigned) mp->cache_size,
2104418919fSjohnjiang cores, n_get_bulk, n_put_bulk, n_keep);
2114418919fSjohnjiang
2124418919fSjohnjiang if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
2134418919fSjohnjiang printf("mempool is not full\n");
2144418919fSjohnjiang return -1;
2154418919fSjohnjiang }
2164418919fSjohnjiang
217*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(lcore_id) {
2184418919fSjohnjiang if (cores == 1)
2194418919fSjohnjiang break;
2204418919fSjohnjiang cores--;
2214418919fSjohnjiang rte_eal_remote_launch(per_lcore_mempool_test,
2224418919fSjohnjiang mp, lcore_id);
2234418919fSjohnjiang }
2244418919fSjohnjiang
225*2d9fd380Sjfb8856606 /* start synchro and launch test on main */
2264418919fSjohnjiang rte_atomic32_set(&synchro, 1);
2274418919fSjohnjiang
2284418919fSjohnjiang ret = per_lcore_mempool_test(mp);
2294418919fSjohnjiang
2304418919fSjohnjiang cores = cores_save;
231*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(lcore_id) {
2324418919fSjohnjiang if (cores == 1)
2334418919fSjohnjiang break;
2344418919fSjohnjiang cores--;
2354418919fSjohnjiang if (rte_eal_wait_lcore(lcore_id) < 0)
2364418919fSjohnjiang ret = -1;
2374418919fSjohnjiang }
2384418919fSjohnjiang
2394418919fSjohnjiang if (ret < 0) {
2404418919fSjohnjiang printf("per-lcore test returned -1\n");
2414418919fSjohnjiang return -1;
2424418919fSjohnjiang }
2434418919fSjohnjiang
2444418919fSjohnjiang rate = 0;
2454418919fSjohnjiang for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
2464418919fSjohnjiang rate += (stats[lcore_id].enq_count / TIME_S);
2474418919fSjohnjiang
2484418919fSjohnjiang printf("rate_persec=%" PRIu64 "\n", rate);
2494418919fSjohnjiang
2504418919fSjohnjiang return 0;
2514418919fSjohnjiang }
2524418919fSjohnjiang
2534418919fSjohnjiang /* for a given number of core, launch all test cases */
2544418919fSjohnjiang static int
do_one_mempool_test(struct rte_mempool * mp,unsigned int cores)2554418919fSjohnjiang do_one_mempool_test(struct rte_mempool *mp, unsigned int cores)
2564418919fSjohnjiang {
2574418919fSjohnjiang unsigned bulk_tab_get[] = { 1, 4, 32, 0 };
2584418919fSjohnjiang unsigned bulk_tab_put[] = { 1, 4, 32, 0 };
2594418919fSjohnjiang unsigned keep_tab[] = { 32, 128, 0 };
2604418919fSjohnjiang unsigned *get_bulk_ptr;
2614418919fSjohnjiang unsigned *put_bulk_ptr;
2624418919fSjohnjiang unsigned *keep_ptr;
2634418919fSjohnjiang int ret;
2644418919fSjohnjiang
2654418919fSjohnjiang for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
2664418919fSjohnjiang for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
2674418919fSjohnjiang for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
2684418919fSjohnjiang
2694418919fSjohnjiang n_get_bulk = *get_bulk_ptr;
2704418919fSjohnjiang n_put_bulk = *put_bulk_ptr;
2714418919fSjohnjiang n_keep = *keep_ptr;
2724418919fSjohnjiang ret = launch_cores(mp, cores);
2734418919fSjohnjiang
2744418919fSjohnjiang if (ret < 0)
2754418919fSjohnjiang return -1;
2764418919fSjohnjiang }
2774418919fSjohnjiang }
2784418919fSjohnjiang }
2794418919fSjohnjiang return 0;
2804418919fSjohnjiang }
2814418919fSjohnjiang
2824418919fSjohnjiang static int
test_mempool_perf(void)2834418919fSjohnjiang test_mempool_perf(void)
2844418919fSjohnjiang {
2854418919fSjohnjiang struct rte_mempool *mp_cache = NULL;
2864418919fSjohnjiang struct rte_mempool *mp_nocache = NULL;
2874418919fSjohnjiang struct rte_mempool *default_pool = NULL;
2884418919fSjohnjiang const char *default_pool_ops;
2894418919fSjohnjiang int ret = -1;
2904418919fSjohnjiang
2914418919fSjohnjiang rte_atomic32_init(&synchro);
2924418919fSjohnjiang
2934418919fSjohnjiang /* create a mempool (without cache) */
2944418919fSjohnjiang mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE,
2954418919fSjohnjiang MEMPOOL_ELT_SIZE, 0, 0,
2964418919fSjohnjiang NULL, NULL,
2974418919fSjohnjiang my_obj_init, NULL,
2984418919fSjohnjiang SOCKET_ID_ANY, 0);
2994418919fSjohnjiang if (mp_nocache == NULL)
3004418919fSjohnjiang goto err;
3014418919fSjohnjiang
3024418919fSjohnjiang /* create a mempool (with cache) */
3034418919fSjohnjiang mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
3044418919fSjohnjiang MEMPOOL_ELT_SIZE,
3054418919fSjohnjiang RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
3064418919fSjohnjiang NULL, NULL,
3074418919fSjohnjiang my_obj_init, NULL,
3084418919fSjohnjiang SOCKET_ID_ANY, 0);
3094418919fSjohnjiang if (mp_cache == NULL)
3104418919fSjohnjiang goto err;
3114418919fSjohnjiang
3124418919fSjohnjiang default_pool_ops = rte_mbuf_best_mempool_ops();
3134418919fSjohnjiang /* Create a mempool based on Default handler */
3144418919fSjohnjiang default_pool = rte_mempool_create_empty("default_pool",
3154418919fSjohnjiang MEMPOOL_SIZE,
3164418919fSjohnjiang MEMPOOL_ELT_SIZE,
3174418919fSjohnjiang 0, 0,
3184418919fSjohnjiang SOCKET_ID_ANY, 0);
3194418919fSjohnjiang
3204418919fSjohnjiang if (default_pool == NULL) {
3214418919fSjohnjiang printf("cannot allocate %s mempool\n", default_pool_ops);
3224418919fSjohnjiang goto err;
3234418919fSjohnjiang }
3244418919fSjohnjiang
3254418919fSjohnjiang if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
3264418919fSjohnjiang < 0) {
3274418919fSjohnjiang printf("cannot set %s handler\n", default_pool_ops);
3284418919fSjohnjiang goto err;
3294418919fSjohnjiang }
3304418919fSjohnjiang
3314418919fSjohnjiang if (rte_mempool_populate_default(default_pool) < 0) {
3324418919fSjohnjiang printf("cannot populate %s mempool\n", default_pool_ops);
3334418919fSjohnjiang goto err;
3344418919fSjohnjiang }
3354418919fSjohnjiang
3364418919fSjohnjiang rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
3374418919fSjohnjiang
3384418919fSjohnjiang /* performance test with 1, 2 and max cores */
3394418919fSjohnjiang printf("start performance test (without cache)\n");
3404418919fSjohnjiang
3414418919fSjohnjiang if (do_one_mempool_test(mp_nocache, 1) < 0)
3424418919fSjohnjiang goto err;
3434418919fSjohnjiang
3444418919fSjohnjiang if (do_one_mempool_test(mp_nocache, 2) < 0)
3454418919fSjohnjiang goto err;
3464418919fSjohnjiang
3474418919fSjohnjiang if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
3484418919fSjohnjiang goto err;
3494418919fSjohnjiang
3504418919fSjohnjiang /* performance test with 1, 2 and max cores */
3514418919fSjohnjiang printf("start performance test for %s (without cache)\n",
3524418919fSjohnjiang default_pool_ops);
3534418919fSjohnjiang
3544418919fSjohnjiang if (do_one_mempool_test(default_pool, 1) < 0)
3554418919fSjohnjiang goto err;
3564418919fSjohnjiang
3574418919fSjohnjiang if (do_one_mempool_test(default_pool, 2) < 0)
3584418919fSjohnjiang goto err;
3594418919fSjohnjiang
3604418919fSjohnjiang if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0)
3614418919fSjohnjiang goto err;
3624418919fSjohnjiang
3634418919fSjohnjiang /* performance test with 1, 2 and max cores */
3644418919fSjohnjiang printf("start performance test (with cache)\n");
3654418919fSjohnjiang
3664418919fSjohnjiang if (do_one_mempool_test(mp_cache, 1) < 0)
3674418919fSjohnjiang goto err;
3684418919fSjohnjiang
3694418919fSjohnjiang if (do_one_mempool_test(mp_cache, 2) < 0)
3704418919fSjohnjiang goto err;
3714418919fSjohnjiang
3724418919fSjohnjiang if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0)
3734418919fSjohnjiang goto err;
3744418919fSjohnjiang
3754418919fSjohnjiang /* performance test with 1, 2 and max cores */
3764418919fSjohnjiang printf("start performance test (with user-owned cache)\n");
3774418919fSjohnjiang use_external_cache = 1;
3784418919fSjohnjiang
3794418919fSjohnjiang if (do_one_mempool_test(mp_nocache, 1) < 0)
3804418919fSjohnjiang goto err;
3814418919fSjohnjiang
3824418919fSjohnjiang if (do_one_mempool_test(mp_nocache, 2) < 0)
3834418919fSjohnjiang goto err;
3844418919fSjohnjiang
3854418919fSjohnjiang if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
3864418919fSjohnjiang goto err;
3874418919fSjohnjiang
3884418919fSjohnjiang rte_mempool_list_dump(stdout);
3894418919fSjohnjiang
3904418919fSjohnjiang ret = 0;
3914418919fSjohnjiang
3924418919fSjohnjiang err:
3934418919fSjohnjiang rte_mempool_free(mp_cache);
3944418919fSjohnjiang rte_mempool_free(mp_nocache);
3954418919fSjohnjiang rte_mempool_free(default_pool);
3964418919fSjohnjiang return ret;
3974418919fSjohnjiang }
3984418919fSjohnjiang
3994418919fSjohnjiang REGISTER_TEST_COMMAND(mempool_perf_autotest, test_mempool_perf);
400