xref: /f-stack/dpdk/app/test/test_mempool_perf.c (revision 2d9fd380)
14418919fSjohnjiang /* SPDX-License-Identifier: BSD-3-Clause
24418919fSjohnjiang  * Copyright(c) 2010-2014 Intel Corporation
34418919fSjohnjiang  */
44418919fSjohnjiang 
54418919fSjohnjiang #include <string.h>
64418919fSjohnjiang #include <stdio.h>
74418919fSjohnjiang #include <stdlib.h>
84418919fSjohnjiang #include <stdint.h>
94418919fSjohnjiang #include <inttypes.h>
104418919fSjohnjiang #include <stdarg.h>
114418919fSjohnjiang #include <errno.h>
124418919fSjohnjiang #include <sys/queue.h>
134418919fSjohnjiang 
144418919fSjohnjiang #include <rte_common.h>
154418919fSjohnjiang #include <rte_log.h>
164418919fSjohnjiang #include <rte_debug.h>
174418919fSjohnjiang #include <rte_memory.h>
184418919fSjohnjiang #include <rte_launch.h>
194418919fSjohnjiang #include <rte_cycles.h>
204418919fSjohnjiang #include <rte_eal.h>
214418919fSjohnjiang #include <rte_per_lcore.h>
224418919fSjohnjiang #include <rte_lcore.h>
234418919fSjohnjiang #include <rte_atomic.h>
244418919fSjohnjiang #include <rte_branch_prediction.h>
254418919fSjohnjiang #include <rte_mempool.h>
264418919fSjohnjiang #include <rte_spinlock.h>
274418919fSjohnjiang #include <rte_malloc.h>
284418919fSjohnjiang #include <rte_mbuf_pool_ops.h>
294418919fSjohnjiang 
304418919fSjohnjiang #include "test.h"
314418919fSjohnjiang 
324418919fSjohnjiang /*
334418919fSjohnjiang  * Mempool performance
344418919fSjohnjiang  * =======
354418919fSjohnjiang  *
364418919fSjohnjiang  *    Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
374418919fSjohnjiang  *    objects are put back in the pool per bulk of *n_put_bulk*.
384418919fSjohnjiang  *
394418919fSjohnjiang  *    This sequence is done during TIME_S seconds.
404418919fSjohnjiang  *
414418919fSjohnjiang  *    This test is done on the following configurations:
424418919fSjohnjiang  *
434418919fSjohnjiang  *    - Cores configuration (*cores*)
444418919fSjohnjiang  *
454418919fSjohnjiang  *      - One core with cache
464418919fSjohnjiang  *      - Two cores with cache
474418919fSjohnjiang  *      - Max. cores with cache
484418919fSjohnjiang  *      - One core without cache
494418919fSjohnjiang  *      - Two cores without cache
504418919fSjohnjiang  *      - Max. cores without cache
514418919fSjohnjiang  *      - One core with user-owned cache
524418919fSjohnjiang  *      - Two cores with user-owned cache
534418919fSjohnjiang  *      - Max. cores with user-owned cache
544418919fSjohnjiang  *
554418919fSjohnjiang  *    - Bulk size (*n_get_bulk*, *n_put_bulk*)
564418919fSjohnjiang  *
574418919fSjohnjiang  *      - Bulk get from 1 to 32
584418919fSjohnjiang  *      - Bulk put from 1 to 32
594418919fSjohnjiang  *
604418919fSjohnjiang  *    - Number of kept objects (*n_keep*)
614418919fSjohnjiang  *
624418919fSjohnjiang  *      - 32
634418919fSjohnjiang  *      - 128
644418919fSjohnjiang  */
654418919fSjohnjiang 
664418919fSjohnjiang #define N 65536
674418919fSjohnjiang #define TIME_S 5
684418919fSjohnjiang #define MEMPOOL_ELT_SIZE 2048
694418919fSjohnjiang #define MAX_KEEP 128
704418919fSjohnjiang #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
714418919fSjohnjiang 
724418919fSjohnjiang #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__)
734418919fSjohnjiang #define RET_ERR() do {							\
744418919fSjohnjiang 		LOG_ERR();						\
754418919fSjohnjiang 		return -1;						\
764418919fSjohnjiang 	} while (0)
774418919fSjohnjiang #define GOTO_ERR(var, label) do {					\
784418919fSjohnjiang 		LOG_ERR();						\
794418919fSjohnjiang 		var = -1;						\
804418919fSjohnjiang 		goto label;						\
814418919fSjohnjiang 	} while (0)
824418919fSjohnjiang 
834418919fSjohnjiang static int use_external_cache;
844418919fSjohnjiang static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
854418919fSjohnjiang 
864418919fSjohnjiang static rte_atomic32_t synchro;
874418919fSjohnjiang 
884418919fSjohnjiang /* number of objects in one bulk operation (get or put) */
894418919fSjohnjiang static unsigned n_get_bulk;
904418919fSjohnjiang static unsigned n_put_bulk;
914418919fSjohnjiang 
924418919fSjohnjiang /* number of objects retrived from mempool before putting them back */
934418919fSjohnjiang static unsigned n_keep;
944418919fSjohnjiang 
954418919fSjohnjiang /* number of enqueues / dequeues */
964418919fSjohnjiang struct mempool_test_stats {
974418919fSjohnjiang 	uint64_t enq_count;
984418919fSjohnjiang } __rte_cache_aligned;
994418919fSjohnjiang 
1004418919fSjohnjiang static struct mempool_test_stats stats[RTE_MAX_LCORE];
1014418919fSjohnjiang 
1024418919fSjohnjiang /*
1034418919fSjohnjiang  * save the object number in the first 4 bytes of object data. All
1044418919fSjohnjiang  * other bytes are set to 0.
1054418919fSjohnjiang  */
1064418919fSjohnjiang static void
my_obj_init(struct rte_mempool * mp,__rte_unused void * arg,void * obj,unsigned i)107*2d9fd380Sjfb8856606 my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
1084418919fSjohnjiang 	    void *obj, unsigned i)
1094418919fSjohnjiang {
1104418919fSjohnjiang 	uint32_t *objnum = obj;
1114418919fSjohnjiang 	memset(obj, 0, mp->elt_size);
1124418919fSjohnjiang 	*objnum = i;
1134418919fSjohnjiang }
1144418919fSjohnjiang 
1154418919fSjohnjiang static int
per_lcore_mempool_test(void * arg)1164418919fSjohnjiang per_lcore_mempool_test(void *arg)
1174418919fSjohnjiang {
1184418919fSjohnjiang 	void *obj_table[MAX_KEEP];
1194418919fSjohnjiang 	unsigned i, idx;
1204418919fSjohnjiang 	struct rte_mempool *mp = arg;
1214418919fSjohnjiang 	unsigned lcore_id = rte_lcore_id();
1224418919fSjohnjiang 	int ret = 0;
1234418919fSjohnjiang 	uint64_t start_cycles, end_cycles;
1244418919fSjohnjiang 	uint64_t time_diff = 0, hz = rte_get_timer_hz();
1254418919fSjohnjiang 	struct rte_mempool_cache *cache;
1264418919fSjohnjiang 
1274418919fSjohnjiang 	if (use_external_cache) {
1284418919fSjohnjiang 		/* Create a user-owned mempool cache. */
1294418919fSjohnjiang 		cache = rte_mempool_cache_create(external_cache_size,
1304418919fSjohnjiang 						 SOCKET_ID_ANY);
1314418919fSjohnjiang 		if (cache == NULL)
1324418919fSjohnjiang 			RET_ERR();
1334418919fSjohnjiang 	} else {
1344418919fSjohnjiang 		/* May be NULL if cache is disabled. */
1354418919fSjohnjiang 		cache = rte_mempool_default_cache(mp, lcore_id);
1364418919fSjohnjiang 	}
1374418919fSjohnjiang 
1384418919fSjohnjiang 	/* n_get_bulk and n_put_bulk must be divisors of n_keep */
1394418919fSjohnjiang 	if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
1404418919fSjohnjiang 		GOTO_ERR(ret, out);
1414418919fSjohnjiang 	if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
1424418919fSjohnjiang 		GOTO_ERR(ret, out);
1434418919fSjohnjiang 
1444418919fSjohnjiang 	stats[lcore_id].enq_count = 0;
1454418919fSjohnjiang 
146*2d9fd380Sjfb8856606 	/* wait synchro for workers */
147*2d9fd380Sjfb8856606 	if (lcore_id != rte_get_main_lcore())
1484418919fSjohnjiang 		while (rte_atomic32_read(&synchro) == 0);
1494418919fSjohnjiang 
1504418919fSjohnjiang 	start_cycles = rte_get_timer_cycles();
1514418919fSjohnjiang 
1524418919fSjohnjiang 	while (time_diff/hz < TIME_S) {
1534418919fSjohnjiang 		for (i = 0; likely(i < (N/n_keep)); i++) {
1544418919fSjohnjiang 			/* get n_keep objects by bulk of n_bulk */
1554418919fSjohnjiang 			idx = 0;
1564418919fSjohnjiang 			while (idx < n_keep) {
1574418919fSjohnjiang 				ret = rte_mempool_generic_get(mp,
1584418919fSjohnjiang 							      &obj_table[idx],
1594418919fSjohnjiang 							      n_get_bulk,
1604418919fSjohnjiang 							      cache);
1614418919fSjohnjiang 				if (unlikely(ret < 0)) {
1624418919fSjohnjiang 					rte_mempool_dump(stdout, mp);
1634418919fSjohnjiang 					/* in this case, objects are lost... */
1644418919fSjohnjiang 					GOTO_ERR(ret, out);
1654418919fSjohnjiang 				}
1664418919fSjohnjiang 				idx += n_get_bulk;
1674418919fSjohnjiang 			}
1684418919fSjohnjiang 
1694418919fSjohnjiang 			/* put the objects back */
1704418919fSjohnjiang 			idx = 0;
1714418919fSjohnjiang 			while (idx < n_keep) {
1724418919fSjohnjiang 				rte_mempool_generic_put(mp, &obj_table[idx],
1734418919fSjohnjiang 							n_put_bulk,
1744418919fSjohnjiang 							cache);
1754418919fSjohnjiang 				idx += n_put_bulk;
1764418919fSjohnjiang 			}
1774418919fSjohnjiang 		}
1784418919fSjohnjiang 		end_cycles = rte_get_timer_cycles();
1794418919fSjohnjiang 		time_diff = end_cycles - start_cycles;
1804418919fSjohnjiang 		stats[lcore_id].enq_count += N;
1814418919fSjohnjiang 	}
1824418919fSjohnjiang 
1834418919fSjohnjiang out:
1844418919fSjohnjiang 	if (use_external_cache) {
1854418919fSjohnjiang 		rte_mempool_cache_flush(cache, mp);
1864418919fSjohnjiang 		rte_mempool_cache_free(cache);
1874418919fSjohnjiang 	}
1884418919fSjohnjiang 
1894418919fSjohnjiang 	return ret;
1904418919fSjohnjiang }
1914418919fSjohnjiang 
1924418919fSjohnjiang /* launch all the per-lcore test, and display the result */
1934418919fSjohnjiang static int
launch_cores(struct rte_mempool * mp,unsigned int cores)1944418919fSjohnjiang launch_cores(struct rte_mempool *mp, unsigned int cores)
1954418919fSjohnjiang {
1964418919fSjohnjiang 	unsigned lcore_id;
1974418919fSjohnjiang 	uint64_t rate;
1984418919fSjohnjiang 	int ret;
1994418919fSjohnjiang 	unsigned cores_save = cores;
2004418919fSjohnjiang 
2014418919fSjohnjiang 	rte_atomic32_set(&synchro, 0);
2024418919fSjohnjiang 
2034418919fSjohnjiang 	/* reset stats */
2044418919fSjohnjiang 	memset(stats, 0, sizeof(stats));
2054418919fSjohnjiang 
2064418919fSjohnjiang 	printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
2074418919fSjohnjiang 	       "n_put_bulk=%u n_keep=%u ",
2084418919fSjohnjiang 	       use_external_cache ?
2094418919fSjohnjiang 		   external_cache_size : (unsigned) mp->cache_size,
2104418919fSjohnjiang 	       cores, n_get_bulk, n_put_bulk, n_keep);
2114418919fSjohnjiang 
2124418919fSjohnjiang 	if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
2134418919fSjohnjiang 		printf("mempool is not full\n");
2144418919fSjohnjiang 		return -1;
2154418919fSjohnjiang 	}
2164418919fSjohnjiang 
217*2d9fd380Sjfb8856606 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
2184418919fSjohnjiang 		if (cores == 1)
2194418919fSjohnjiang 			break;
2204418919fSjohnjiang 		cores--;
2214418919fSjohnjiang 		rte_eal_remote_launch(per_lcore_mempool_test,
2224418919fSjohnjiang 				      mp, lcore_id);
2234418919fSjohnjiang 	}
2244418919fSjohnjiang 
225*2d9fd380Sjfb8856606 	/* start synchro and launch test on main */
2264418919fSjohnjiang 	rte_atomic32_set(&synchro, 1);
2274418919fSjohnjiang 
2284418919fSjohnjiang 	ret = per_lcore_mempool_test(mp);
2294418919fSjohnjiang 
2304418919fSjohnjiang 	cores = cores_save;
231*2d9fd380Sjfb8856606 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
2324418919fSjohnjiang 		if (cores == 1)
2334418919fSjohnjiang 			break;
2344418919fSjohnjiang 		cores--;
2354418919fSjohnjiang 		if (rte_eal_wait_lcore(lcore_id) < 0)
2364418919fSjohnjiang 			ret = -1;
2374418919fSjohnjiang 	}
2384418919fSjohnjiang 
2394418919fSjohnjiang 	if (ret < 0) {
2404418919fSjohnjiang 		printf("per-lcore test returned -1\n");
2414418919fSjohnjiang 		return -1;
2424418919fSjohnjiang 	}
2434418919fSjohnjiang 
2444418919fSjohnjiang 	rate = 0;
2454418919fSjohnjiang 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
2464418919fSjohnjiang 		rate += (stats[lcore_id].enq_count / TIME_S);
2474418919fSjohnjiang 
2484418919fSjohnjiang 	printf("rate_persec=%" PRIu64 "\n", rate);
2494418919fSjohnjiang 
2504418919fSjohnjiang 	return 0;
2514418919fSjohnjiang }
2524418919fSjohnjiang 
2534418919fSjohnjiang /* for a given number of core, launch all test cases */
2544418919fSjohnjiang static int
do_one_mempool_test(struct rte_mempool * mp,unsigned int cores)2554418919fSjohnjiang do_one_mempool_test(struct rte_mempool *mp, unsigned int cores)
2564418919fSjohnjiang {
2574418919fSjohnjiang 	unsigned bulk_tab_get[] = { 1, 4, 32, 0 };
2584418919fSjohnjiang 	unsigned bulk_tab_put[] = { 1, 4, 32, 0 };
2594418919fSjohnjiang 	unsigned keep_tab[] = { 32, 128, 0 };
2604418919fSjohnjiang 	unsigned *get_bulk_ptr;
2614418919fSjohnjiang 	unsigned *put_bulk_ptr;
2624418919fSjohnjiang 	unsigned *keep_ptr;
2634418919fSjohnjiang 	int ret;
2644418919fSjohnjiang 
2654418919fSjohnjiang 	for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
2664418919fSjohnjiang 		for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
2674418919fSjohnjiang 			for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
2684418919fSjohnjiang 
2694418919fSjohnjiang 				n_get_bulk = *get_bulk_ptr;
2704418919fSjohnjiang 				n_put_bulk = *put_bulk_ptr;
2714418919fSjohnjiang 				n_keep = *keep_ptr;
2724418919fSjohnjiang 				ret = launch_cores(mp, cores);
2734418919fSjohnjiang 
2744418919fSjohnjiang 				if (ret < 0)
2754418919fSjohnjiang 					return -1;
2764418919fSjohnjiang 			}
2774418919fSjohnjiang 		}
2784418919fSjohnjiang 	}
2794418919fSjohnjiang 	return 0;
2804418919fSjohnjiang }
2814418919fSjohnjiang 
2824418919fSjohnjiang static int
test_mempool_perf(void)2834418919fSjohnjiang test_mempool_perf(void)
2844418919fSjohnjiang {
2854418919fSjohnjiang 	struct rte_mempool *mp_cache = NULL;
2864418919fSjohnjiang 	struct rte_mempool *mp_nocache = NULL;
2874418919fSjohnjiang 	struct rte_mempool *default_pool = NULL;
2884418919fSjohnjiang 	const char *default_pool_ops;
2894418919fSjohnjiang 	int ret = -1;
2904418919fSjohnjiang 
2914418919fSjohnjiang 	rte_atomic32_init(&synchro);
2924418919fSjohnjiang 
2934418919fSjohnjiang 	/* create a mempool (without cache) */
2944418919fSjohnjiang 	mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE,
2954418919fSjohnjiang 					MEMPOOL_ELT_SIZE, 0, 0,
2964418919fSjohnjiang 					NULL, NULL,
2974418919fSjohnjiang 					my_obj_init, NULL,
2984418919fSjohnjiang 					SOCKET_ID_ANY, 0);
2994418919fSjohnjiang 	if (mp_nocache == NULL)
3004418919fSjohnjiang 		goto err;
3014418919fSjohnjiang 
3024418919fSjohnjiang 	/* create a mempool (with cache) */
3034418919fSjohnjiang 	mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
3044418919fSjohnjiang 				      MEMPOOL_ELT_SIZE,
3054418919fSjohnjiang 				      RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
3064418919fSjohnjiang 				      NULL, NULL,
3074418919fSjohnjiang 				      my_obj_init, NULL,
3084418919fSjohnjiang 				      SOCKET_ID_ANY, 0);
3094418919fSjohnjiang 	if (mp_cache == NULL)
3104418919fSjohnjiang 		goto err;
3114418919fSjohnjiang 
3124418919fSjohnjiang 	default_pool_ops = rte_mbuf_best_mempool_ops();
3134418919fSjohnjiang 	/* Create a mempool based on Default handler */
3144418919fSjohnjiang 	default_pool = rte_mempool_create_empty("default_pool",
3154418919fSjohnjiang 						MEMPOOL_SIZE,
3164418919fSjohnjiang 						MEMPOOL_ELT_SIZE,
3174418919fSjohnjiang 						0, 0,
3184418919fSjohnjiang 						SOCKET_ID_ANY, 0);
3194418919fSjohnjiang 
3204418919fSjohnjiang 	if (default_pool == NULL) {
3214418919fSjohnjiang 		printf("cannot allocate %s mempool\n", default_pool_ops);
3224418919fSjohnjiang 		goto err;
3234418919fSjohnjiang 	}
3244418919fSjohnjiang 
3254418919fSjohnjiang 	if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
3264418919fSjohnjiang 				       < 0) {
3274418919fSjohnjiang 		printf("cannot set %s handler\n", default_pool_ops);
3284418919fSjohnjiang 		goto err;
3294418919fSjohnjiang 	}
3304418919fSjohnjiang 
3314418919fSjohnjiang 	if (rte_mempool_populate_default(default_pool) < 0) {
3324418919fSjohnjiang 		printf("cannot populate %s mempool\n", default_pool_ops);
3334418919fSjohnjiang 		goto err;
3344418919fSjohnjiang 	}
3354418919fSjohnjiang 
3364418919fSjohnjiang 	rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
3374418919fSjohnjiang 
3384418919fSjohnjiang 	/* performance test with 1, 2 and max cores */
3394418919fSjohnjiang 	printf("start performance test (without cache)\n");
3404418919fSjohnjiang 
3414418919fSjohnjiang 	if (do_one_mempool_test(mp_nocache, 1) < 0)
3424418919fSjohnjiang 		goto err;
3434418919fSjohnjiang 
3444418919fSjohnjiang 	if (do_one_mempool_test(mp_nocache, 2) < 0)
3454418919fSjohnjiang 		goto err;
3464418919fSjohnjiang 
3474418919fSjohnjiang 	if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
3484418919fSjohnjiang 		goto err;
3494418919fSjohnjiang 
3504418919fSjohnjiang 	/* performance test with 1, 2 and max cores */
3514418919fSjohnjiang 	printf("start performance test for %s (without cache)\n",
3524418919fSjohnjiang 	       default_pool_ops);
3534418919fSjohnjiang 
3544418919fSjohnjiang 	if (do_one_mempool_test(default_pool, 1) < 0)
3554418919fSjohnjiang 		goto err;
3564418919fSjohnjiang 
3574418919fSjohnjiang 	if (do_one_mempool_test(default_pool, 2) < 0)
3584418919fSjohnjiang 		goto err;
3594418919fSjohnjiang 
3604418919fSjohnjiang 	if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0)
3614418919fSjohnjiang 		goto err;
3624418919fSjohnjiang 
3634418919fSjohnjiang 	/* performance test with 1, 2 and max cores */
3644418919fSjohnjiang 	printf("start performance test (with cache)\n");
3654418919fSjohnjiang 
3664418919fSjohnjiang 	if (do_one_mempool_test(mp_cache, 1) < 0)
3674418919fSjohnjiang 		goto err;
3684418919fSjohnjiang 
3694418919fSjohnjiang 	if (do_one_mempool_test(mp_cache, 2) < 0)
3704418919fSjohnjiang 		goto err;
3714418919fSjohnjiang 
3724418919fSjohnjiang 	if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0)
3734418919fSjohnjiang 		goto err;
3744418919fSjohnjiang 
3754418919fSjohnjiang 	/* performance test with 1, 2 and max cores */
3764418919fSjohnjiang 	printf("start performance test (with user-owned cache)\n");
3774418919fSjohnjiang 	use_external_cache = 1;
3784418919fSjohnjiang 
3794418919fSjohnjiang 	if (do_one_mempool_test(mp_nocache, 1) < 0)
3804418919fSjohnjiang 		goto err;
3814418919fSjohnjiang 
3824418919fSjohnjiang 	if (do_one_mempool_test(mp_nocache, 2) < 0)
3834418919fSjohnjiang 		goto err;
3844418919fSjohnjiang 
3854418919fSjohnjiang 	if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
3864418919fSjohnjiang 		goto err;
3874418919fSjohnjiang 
3884418919fSjohnjiang 	rte_mempool_list_dump(stdout);
3894418919fSjohnjiang 
3904418919fSjohnjiang 	ret = 0;
3914418919fSjohnjiang 
3924418919fSjohnjiang err:
3934418919fSjohnjiang 	rte_mempool_free(mp_cache);
3944418919fSjohnjiang 	rte_mempool_free(mp_nocache);
3954418919fSjohnjiang 	rte_mempool_free(default_pool);
3964418919fSjohnjiang 	return ret;
3974418919fSjohnjiang }
3984418919fSjohnjiang 
3994418919fSjohnjiang REGISTER_TEST_COMMAND(mempool_perf_autotest, test_mempool_perf);
400