xref: /f-stack/dpdk/drivers/common/mlx5/mlx5_malloc.c (revision 2d9fd380)
1*2d9fd380Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause
2*2d9fd380Sjfb8856606  * Copyright 2020 Mellanox Technologies, Ltd
3*2d9fd380Sjfb8856606  */
4*2d9fd380Sjfb8856606 
5*2d9fd380Sjfb8856606 #include <errno.h>
6*2d9fd380Sjfb8856606 #include <rte_malloc.h>
7*2d9fd380Sjfb8856606 #include <malloc.h>
8*2d9fd380Sjfb8856606 #include <stdbool.h>
9*2d9fd380Sjfb8856606 #include <string.h>
10*2d9fd380Sjfb8856606 
11*2d9fd380Sjfb8856606 #include "mlx5_common_utils.h"
12*2d9fd380Sjfb8856606 #include "mlx5_malloc.h"
13*2d9fd380Sjfb8856606 
14*2d9fd380Sjfb8856606 struct mlx5_sys_mem {
15*2d9fd380Sjfb8856606 	uint32_t init:1; /* Memory allocator initialized. */
16*2d9fd380Sjfb8856606 	uint32_t enable:1; /* System memory select. */
17*2d9fd380Sjfb8856606 	uint32_t reserve:30; /* Reserve. */
18*2d9fd380Sjfb8856606 	struct rte_memseg_list *last_msl;
19*2d9fd380Sjfb8856606 	/* last allocated rte memory memseg list. */
20*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
21*2d9fd380Sjfb8856606 	uint64_t malloc_sys;
22*2d9fd380Sjfb8856606 	/* Memory allocated from system count. */
23*2d9fd380Sjfb8856606 	uint64_t malloc_rte;
24*2d9fd380Sjfb8856606 	/* Memory allocated from hugepage count. */
25*2d9fd380Sjfb8856606 	uint64_t realloc_sys;
26*2d9fd380Sjfb8856606 	/* Memory reallocate from system count. */
27*2d9fd380Sjfb8856606 	uint64_t realloc_rte;
28*2d9fd380Sjfb8856606 	/* Memory reallocate from hugepage count. */
29*2d9fd380Sjfb8856606 	uint64_t free_sys;
30*2d9fd380Sjfb8856606 	/* Memory free to system count. */
31*2d9fd380Sjfb8856606 	uint64_t free_rte;
32*2d9fd380Sjfb8856606 	/* Memory free to hugepage count. */
33*2d9fd380Sjfb8856606 	uint64_t msl_miss;
34*2d9fd380Sjfb8856606 	/* MSL miss count. */
35*2d9fd380Sjfb8856606 	uint64_t msl_update;
36*2d9fd380Sjfb8856606 	/* MSL update count. */
37*2d9fd380Sjfb8856606 #endif
38*2d9fd380Sjfb8856606 };
39*2d9fd380Sjfb8856606 
40*2d9fd380Sjfb8856606 /* Initialize default as not */
41*2d9fd380Sjfb8856606 static struct mlx5_sys_mem mlx5_sys_mem = {
42*2d9fd380Sjfb8856606 	.init = 0,
43*2d9fd380Sjfb8856606 	.enable = 0,
44*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
45*2d9fd380Sjfb8856606 	.malloc_sys = 0,
46*2d9fd380Sjfb8856606 	.malloc_rte = 0,
47*2d9fd380Sjfb8856606 	.realloc_sys = 0,
48*2d9fd380Sjfb8856606 	.realloc_rte = 0,
49*2d9fd380Sjfb8856606 	.free_sys = 0,
50*2d9fd380Sjfb8856606 	.free_rte = 0,
51*2d9fd380Sjfb8856606 	.msl_miss = 0,
52*2d9fd380Sjfb8856606 	.msl_update = 0,
53*2d9fd380Sjfb8856606 #endif
54*2d9fd380Sjfb8856606 };
55*2d9fd380Sjfb8856606 
56*2d9fd380Sjfb8856606 /**
57*2d9fd380Sjfb8856606  * Check if the address belongs to memory seg list.
58*2d9fd380Sjfb8856606  *
59*2d9fd380Sjfb8856606  * @param addr
60*2d9fd380Sjfb8856606  *   Memory address to be ckeced.
61*2d9fd380Sjfb8856606  * @param msl
62*2d9fd380Sjfb8856606  *   Memory seg list.
63*2d9fd380Sjfb8856606  *
64*2d9fd380Sjfb8856606  * @return
65*2d9fd380Sjfb8856606  *   True if it belongs, false otherwise.
66*2d9fd380Sjfb8856606  */
67*2d9fd380Sjfb8856606 static bool
mlx5_mem_check_msl(void * addr,struct rte_memseg_list * msl)68*2d9fd380Sjfb8856606 mlx5_mem_check_msl(void *addr, struct rte_memseg_list *msl)
69*2d9fd380Sjfb8856606 {
70*2d9fd380Sjfb8856606 	void *start, *end;
71*2d9fd380Sjfb8856606 
72*2d9fd380Sjfb8856606 	if (!msl)
73*2d9fd380Sjfb8856606 		return false;
74*2d9fd380Sjfb8856606 	start = msl->base_va;
75*2d9fd380Sjfb8856606 	end = RTE_PTR_ADD(start, msl->len);
76*2d9fd380Sjfb8856606 	if (addr >= start && addr < end)
77*2d9fd380Sjfb8856606 		return true;
78*2d9fd380Sjfb8856606 	return false;
79*2d9fd380Sjfb8856606 }
80*2d9fd380Sjfb8856606 
81*2d9fd380Sjfb8856606 /**
82*2d9fd380Sjfb8856606  * Update the msl if memory belongs to new msl.
83*2d9fd380Sjfb8856606  *
84*2d9fd380Sjfb8856606  * @param addr
85*2d9fd380Sjfb8856606  *   Memory address.
86*2d9fd380Sjfb8856606  */
87*2d9fd380Sjfb8856606 static void
mlx5_mem_update_msl(void * addr)88*2d9fd380Sjfb8856606 mlx5_mem_update_msl(void *addr)
89*2d9fd380Sjfb8856606 {
90*2d9fd380Sjfb8856606 	/*
91*2d9fd380Sjfb8856606 	 * Update the cache msl if the new addr comes from the new msl
92*2d9fd380Sjfb8856606 	 * different with the cached msl.
93*2d9fd380Sjfb8856606 	 */
94*2d9fd380Sjfb8856606 	if (addr && !mlx5_mem_check_msl(addr,
95*2d9fd380Sjfb8856606 	    (struct rte_memseg_list *)__atomic_load_n
96*2d9fd380Sjfb8856606 	    (&mlx5_sys_mem.last_msl, __ATOMIC_RELAXED))) {
97*2d9fd380Sjfb8856606 		__atomic_store_n(&mlx5_sys_mem.last_msl,
98*2d9fd380Sjfb8856606 			rte_mem_virt2memseg_list(addr),
99*2d9fd380Sjfb8856606 			__ATOMIC_RELAXED);
100*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
101*2d9fd380Sjfb8856606 		__atomic_add_fetch(&mlx5_sys_mem.msl_update, 1,
102*2d9fd380Sjfb8856606 				   __ATOMIC_RELAXED);
103*2d9fd380Sjfb8856606 #endif
104*2d9fd380Sjfb8856606 	}
105*2d9fd380Sjfb8856606 }
106*2d9fd380Sjfb8856606 
107*2d9fd380Sjfb8856606 /**
108*2d9fd380Sjfb8856606  * Check if the address belongs to rte memory.
109*2d9fd380Sjfb8856606  *
110*2d9fd380Sjfb8856606  * @param addr
111*2d9fd380Sjfb8856606  *   Memory address to be ckeced.
112*2d9fd380Sjfb8856606  *
113*2d9fd380Sjfb8856606  * @return
114*2d9fd380Sjfb8856606  *   True if it belongs, false otherwise.
115*2d9fd380Sjfb8856606  */
116*2d9fd380Sjfb8856606 static bool
mlx5_mem_is_rte(void * addr)117*2d9fd380Sjfb8856606 mlx5_mem_is_rte(void *addr)
118*2d9fd380Sjfb8856606 {
119*2d9fd380Sjfb8856606 	/*
120*2d9fd380Sjfb8856606 	 * Check if the last cache msl matches. Drop to slow path
121*2d9fd380Sjfb8856606 	 * to check if the memory belongs to rte memory.
122*2d9fd380Sjfb8856606 	 */
123*2d9fd380Sjfb8856606 	if (!mlx5_mem_check_msl(addr, (struct rte_memseg_list *)
124*2d9fd380Sjfb8856606 	    __atomic_load_n(&mlx5_sys_mem.last_msl, __ATOMIC_RELAXED))) {
125*2d9fd380Sjfb8856606 		if (!rte_mem_virt2memseg_list(addr))
126*2d9fd380Sjfb8856606 			return false;
127*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
128*2d9fd380Sjfb8856606 		__atomic_add_fetch(&mlx5_sys_mem.msl_miss, 1, __ATOMIC_RELAXED);
129*2d9fd380Sjfb8856606 #endif
130*2d9fd380Sjfb8856606 	}
131*2d9fd380Sjfb8856606 	return true;
132*2d9fd380Sjfb8856606 }
133*2d9fd380Sjfb8856606 
134*2d9fd380Sjfb8856606 /**
135*2d9fd380Sjfb8856606  * Allocate memory with alignment.
136*2d9fd380Sjfb8856606  *
137*2d9fd380Sjfb8856606  * @param size
138*2d9fd380Sjfb8856606  *   Memory size to be allocated.
139*2d9fd380Sjfb8856606  * @param align
140*2d9fd380Sjfb8856606  *   Memory alignment.
141*2d9fd380Sjfb8856606  * @param zero
142*2d9fd380Sjfb8856606  *   Clear the allocated memory or not.
143*2d9fd380Sjfb8856606  *
144*2d9fd380Sjfb8856606  * @return
145*2d9fd380Sjfb8856606  *   Pointer of the allocated memory, NULL otherwise.
146*2d9fd380Sjfb8856606  */
147*2d9fd380Sjfb8856606 static void *
mlx5_alloc_align(size_t size,unsigned int align,unsigned int zero)148*2d9fd380Sjfb8856606 mlx5_alloc_align(size_t size, unsigned int align, unsigned int zero)
149*2d9fd380Sjfb8856606 {
150*2d9fd380Sjfb8856606 	void *buf;
151*2d9fd380Sjfb8856606 	int ret;
152*2d9fd380Sjfb8856606 
153*2d9fd380Sjfb8856606 	ret = posix_memalign(&buf, align, size);
154*2d9fd380Sjfb8856606 	if (ret) {
155*2d9fd380Sjfb8856606 		DRV_LOG(ERR,
156*2d9fd380Sjfb8856606 			"Couldn't allocate buf size=%zu align=%u. Err=%d\n",
157*2d9fd380Sjfb8856606 			size, align, ret);
158*2d9fd380Sjfb8856606 
159*2d9fd380Sjfb8856606 		return NULL;
160*2d9fd380Sjfb8856606 	}
161*2d9fd380Sjfb8856606 	if (zero)
162*2d9fd380Sjfb8856606 		memset(buf, 0, size);
163*2d9fd380Sjfb8856606 	return buf;
164*2d9fd380Sjfb8856606 }
165*2d9fd380Sjfb8856606 
166*2d9fd380Sjfb8856606 void *
mlx5_malloc(uint32_t flags,size_t size,unsigned int align,int socket)167*2d9fd380Sjfb8856606 mlx5_malloc(uint32_t flags, size_t size, unsigned int align, int socket)
168*2d9fd380Sjfb8856606 {
169*2d9fd380Sjfb8856606 	void *addr;
170*2d9fd380Sjfb8856606 	bool rte_mem;
171*2d9fd380Sjfb8856606 
172*2d9fd380Sjfb8856606 	/*
173*2d9fd380Sjfb8856606 	 * If neither system memory nor rte memory is required, allocate
174*2d9fd380Sjfb8856606 	 * memory according to mlx5_sys_mem.enable.
175*2d9fd380Sjfb8856606 	 */
176*2d9fd380Sjfb8856606 	if (flags & MLX5_MEM_RTE)
177*2d9fd380Sjfb8856606 		rte_mem = true;
178*2d9fd380Sjfb8856606 	else if (flags & MLX5_MEM_SYS)
179*2d9fd380Sjfb8856606 		rte_mem = false;
180*2d9fd380Sjfb8856606 	else
181*2d9fd380Sjfb8856606 		rte_mem = mlx5_sys_mem.enable ? false : true;
182*2d9fd380Sjfb8856606 	if (rte_mem) {
183*2d9fd380Sjfb8856606 		if (flags & MLX5_MEM_ZERO)
184*2d9fd380Sjfb8856606 			addr = rte_zmalloc_socket(NULL, size, align, socket);
185*2d9fd380Sjfb8856606 		else
186*2d9fd380Sjfb8856606 			addr = rte_malloc_socket(NULL, size, align, socket);
187*2d9fd380Sjfb8856606 		mlx5_mem_update_msl(addr);
188*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
189*2d9fd380Sjfb8856606 		if (addr)
190*2d9fd380Sjfb8856606 			__atomic_add_fetch(&mlx5_sys_mem.malloc_rte, 1,
191*2d9fd380Sjfb8856606 					   __ATOMIC_RELAXED);
192*2d9fd380Sjfb8856606 #endif
193*2d9fd380Sjfb8856606 		return addr;
194*2d9fd380Sjfb8856606 	}
195*2d9fd380Sjfb8856606 	/* The memory will be allocated from system. */
196*2d9fd380Sjfb8856606 	if (align > MLX5_MALLOC_ALIGNMENT)
197*2d9fd380Sjfb8856606 		addr = mlx5_alloc_align(size, align, !!(flags & MLX5_MEM_ZERO));
198*2d9fd380Sjfb8856606 	else if (flags & MLX5_MEM_ZERO)
199*2d9fd380Sjfb8856606 		addr = calloc(1, size);
200*2d9fd380Sjfb8856606 	else
201*2d9fd380Sjfb8856606 		addr = malloc(size);
202*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
203*2d9fd380Sjfb8856606 	if (addr)
204*2d9fd380Sjfb8856606 		__atomic_add_fetch(&mlx5_sys_mem.malloc_sys, 1,
205*2d9fd380Sjfb8856606 				   __ATOMIC_RELAXED);
206*2d9fd380Sjfb8856606 #endif
207*2d9fd380Sjfb8856606 	return addr;
208*2d9fd380Sjfb8856606 }
209*2d9fd380Sjfb8856606 
210*2d9fd380Sjfb8856606 void *
mlx5_realloc(void * addr,uint32_t flags,size_t size,unsigned int align,int socket)211*2d9fd380Sjfb8856606 mlx5_realloc(void *addr, uint32_t flags, size_t size, unsigned int align,
212*2d9fd380Sjfb8856606 	     int socket)
213*2d9fd380Sjfb8856606 {
214*2d9fd380Sjfb8856606 	void *new_addr;
215*2d9fd380Sjfb8856606 	bool rte_mem;
216*2d9fd380Sjfb8856606 
217*2d9fd380Sjfb8856606 	/* Allocate directly if old memory address is NULL. */
218*2d9fd380Sjfb8856606 	if (!addr)
219*2d9fd380Sjfb8856606 		return mlx5_malloc(flags, size, align, socket);
220*2d9fd380Sjfb8856606 	/* Get the memory type. */
221*2d9fd380Sjfb8856606 	if (flags & MLX5_MEM_RTE)
222*2d9fd380Sjfb8856606 		rte_mem = true;
223*2d9fd380Sjfb8856606 	else if (flags & MLX5_MEM_SYS)
224*2d9fd380Sjfb8856606 		rte_mem = false;
225*2d9fd380Sjfb8856606 	else
226*2d9fd380Sjfb8856606 		rte_mem = mlx5_sys_mem.enable ? false : true;
227*2d9fd380Sjfb8856606 	/* Check if old memory and to be allocated memory are the same type. */
228*2d9fd380Sjfb8856606 	if (rte_mem != mlx5_mem_is_rte(addr)) {
229*2d9fd380Sjfb8856606 		DRV_LOG(ERR, "Couldn't reallocate to different memory type.");
230*2d9fd380Sjfb8856606 		return NULL;
231*2d9fd380Sjfb8856606 	}
232*2d9fd380Sjfb8856606 	/* Allocate memory from rte memory. */
233*2d9fd380Sjfb8856606 	if (rte_mem) {
234*2d9fd380Sjfb8856606 		new_addr = rte_realloc_socket(addr, size, align, socket);
235*2d9fd380Sjfb8856606 		mlx5_mem_update_msl(new_addr);
236*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
237*2d9fd380Sjfb8856606 		if (new_addr)
238*2d9fd380Sjfb8856606 			__atomic_add_fetch(&mlx5_sys_mem.realloc_rte, 1,
239*2d9fd380Sjfb8856606 					   __ATOMIC_RELAXED);
240*2d9fd380Sjfb8856606 #endif
241*2d9fd380Sjfb8856606 		return new_addr;
242*2d9fd380Sjfb8856606 	}
243*2d9fd380Sjfb8856606 	/* Align is not supported for system memory. */
244*2d9fd380Sjfb8856606 	if (align) {
245*2d9fd380Sjfb8856606 		DRV_LOG(ERR, "Couldn't reallocate with alignment");
246*2d9fd380Sjfb8856606 		return NULL;
247*2d9fd380Sjfb8856606 	}
248*2d9fd380Sjfb8856606 	new_addr = realloc(addr, size);
249*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
250*2d9fd380Sjfb8856606 	if (new_addr)
251*2d9fd380Sjfb8856606 		__atomic_add_fetch(&mlx5_sys_mem.realloc_sys, 1,
252*2d9fd380Sjfb8856606 				   __ATOMIC_RELAXED);
253*2d9fd380Sjfb8856606 #endif
254*2d9fd380Sjfb8856606 	return new_addr;
255*2d9fd380Sjfb8856606 }
256*2d9fd380Sjfb8856606 
257*2d9fd380Sjfb8856606 void
mlx5_free(void * addr)258*2d9fd380Sjfb8856606 mlx5_free(void *addr)
259*2d9fd380Sjfb8856606 {
260*2d9fd380Sjfb8856606 	if (addr == NULL)
261*2d9fd380Sjfb8856606 		return;
262*2d9fd380Sjfb8856606 	if (!mlx5_mem_is_rte(addr)) {
263*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
264*2d9fd380Sjfb8856606 		__atomic_add_fetch(&mlx5_sys_mem.free_sys, 1,
265*2d9fd380Sjfb8856606 				   __ATOMIC_RELAXED);
266*2d9fd380Sjfb8856606 #endif
267*2d9fd380Sjfb8856606 		free(addr);
268*2d9fd380Sjfb8856606 	} else {
269*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
270*2d9fd380Sjfb8856606 		__atomic_add_fetch(&mlx5_sys_mem.free_rte, 1,
271*2d9fd380Sjfb8856606 				   __ATOMIC_RELAXED);
272*2d9fd380Sjfb8856606 #endif
273*2d9fd380Sjfb8856606 		rte_free(addr);
274*2d9fd380Sjfb8856606 	}
275*2d9fd380Sjfb8856606 }
276*2d9fd380Sjfb8856606 
277*2d9fd380Sjfb8856606 void
mlx5_memory_stat_dump(void)278*2d9fd380Sjfb8856606 mlx5_memory_stat_dump(void)
279*2d9fd380Sjfb8856606 {
280*2d9fd380Sjfb8856606 #ifdef RTE_LIBRTE_MLX5_DEBUG
281*2d9fd380Sjfb8856606 	DRV_LOG(INFO, "System memory malloc:%"PRIi64", realloc:%"PRIi64","
282*2d9fd380Sjfb8856606 		" free:%"PRIi64"\nRTE memory malloc:%"PRIi64","
283*2d9fd380Sjfb8856606 		" realloc:%"PRIi64", free:%"PRIi64"\nMSL miss:%"PRIi64","
284*2d9fd380Sjfb8856606 		" update:%"PRIi64"",
285*2d9fd380Sjfb8856606 		__atomic_load_n(&mlx5_sys_mem.malloc_sys, __ATOMIC_RELAXED),
286*2d9fd380Sjfb8856606 		__atomic_load_n(&mlx5_sys_mem.realloc_sys, __ATOMIC_RELAXED),
287*2d9fd380Sjfb8856606 		__atomic_load_n(&mlx5_sys_mem.free_sys, __ATOMIC_RELAXED),
288*2d9fd380Sjfb8856606 		__atomic_load_n(&mlx5_sys_mem.malloc_rte, __ATOMIC_RELAXED),
289*2d9fd380Sjfb8856606 		__atomic_load_n(&mlx5_sys_mem.realloc_rte, __ATOMIC_RELAXED),
290*2d9fd380Sjfb8856606 		__atomic_load_n(&mlx5_sys_mem.free_rte, __ATOMIC_RELAXED),
291*2d9fd380Sjfb8856606 		__atomic_load_n(&mlx5_sys_mem.msl_miss, __ATOMIC_RELAXED),
292*2d9fd380Sjfb8856606 		__atomic_load_n(&mlx5_sys_mem.msl_update, __ATOMIC_RELAXED));
293*2d9fd380Sjfb8856606 #endif
294*2d9fd380Sjfb8856606 }
295*2d9fd380Sjfb8856606 
296*2d9fd380Sjfb8856606 void
mlx5_malloc_mem_select(uint32_t sys_mem_en)297*2d9fd380Sjfb8856606 mlx5_malloc_mem_select(uint32_t sys_mem_en)
298*2d9fd380Sjfb8856606 {
299*2d9fd380Sjfb8856606 	/*
300*2d9fd380Sjfb8856606 	 * The initialization should be called only once and all devices
301*2d9fd380Sjfb8856606 	 * should use the same memory type. Otherwise, when new device is
302*2d9fd380Sjfb8856606 	 * being attached with some different memory allocation configuration,
303*2d9fd380Sjfb8856606 	 * the memory will get wrong behavior or a failure will be raised.
304*2d9fd380Sjfb8856606 	 */
305*2d9fd380Sjfb8856606 	if (!mlx5_sys_mem.init) {
306*2d9fd380Sjfb8856606 		if (sys_mem_en)
307*2d9fd380Sjfb8856606 			mlx5_sys_mem.enable = 1;
308*2d9fd380Sjfb8856606 		mlx5_sys_mem.init = 1;
309*2d9fd380Sjfb8856606 		DRV_LOG(INFO, "%s is selected.", sys_mem_en ? "SYS_MEM" : "RTE_MEM");
310*2d9fd380Sjfb8856606 	} else if (mlx5_sys_mem.enable != sys_mem_en) {
311*2d9fd380Sjfb8856606 		DRV_LOG(WARNING, "%s is already selected.",
312*2d9fd380Sjfb8856606 			mlx5_sys_mem.enable ? "SYS_MEM" : "RTE_MEM");
313*2d9fd380Sjfb8856606 	}
314*2d9fd380Sjfb8856606 }
315