1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2020 Dmitry Kozlyuk
3  */
4 
5 #include <rte_errno.h>
6 #include <rte_os.h>
7 
8 #include "eal_internal_cfg.h"
9 #include "eal_memalloc.h"
10 #include "eal_memcfg.h"
11 #include "eal_private.h"
12 #include "eal_windows.h"
13 
14 int
eal_memalloc_get_seg_fd(int list_idx,int seg_idx)15 eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
16 {
17 	/* Hugepages have no associated files in Windows. */
18 	RTE_SET_USED(list_idx);
19 	RTE_SET_USED(seg_idx);
20 	EAL_LOG_NOT_IMPLEMENTED();
21 	return -1;
22 }
23 
24 int
eal_memalloc_get_seg_fd_offset(int list_idx,int seg_idx,size_t * offset)25 eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
26 {
27 	/* Hugepages have no associated files in Windows. */
28 	RTE_SET_USED(list_idx);
29 	RTE_SET_USED(seg_idx);
30 	RTE_SET_USED(offset);
31 	EAL_LOG_NOT_IMPLEMENTED();
32 	return -1;
33 }
34 
35 static int
alloc_seg(struct rte_memseg * ms,void * requested_addr,int socket_id,struct hugepage_info * hi)36 alloc_seg(struct rte_memseg *ms, void *requested_addr, int socket_id,
37 	struct hugepage_info *hi)
38 {
39 	HANDLE current_process;
40 	unsigned int numa_node;
41 	size_t alloc_sz;
42 	void *addr;
43 	rte_iova_t iova = RTE_BAD_IOVA;
44 	PSAPI_WORKING_SET_EX_INFORMATION info;
45 	PSAPI_WORKING_SET_EX_BLOCK *page;
46 
47 	if (ms->len > 0) {
48 		/* If a segment is already allocated as needed, return it. */
49 		if ((ms->addr == requested_addr) &&
50 			(ms->socket_id == socket_id) &&
51 			(ms->hugepage_sz == hi->hugepage_sz)) {
52 			return 0;
53 		}
54 
55 		/* Bugcheck, should not happen. */
56 		RTE_LOG(DEBUG, EAL, "Attempted to reallocate segment %p "
57 			"(size %zu) on socket %d", ms->addr,
58 			ms->len, ms->socket_id);
59 		return -1;
60 	}
61 
62 	current_process = GetCurrentProcess();
63 	numa_node = eal_socket_numa_node(socket_id);
64 	alloc_sz = hi->hugepage_sz;
65 
66 	if (requested_addr == NULL) {
67 		/* Request a new chunk of memory from OS. */
68 		addr = eal_mem_alloc_socket(alloc_sz, socket_id);
69 		if (addr == NULL) {
70 			RTE_LOG(DEBUG, EAL, "Cannot allocate %zu bytes "
71 				"on socket %d\n", alloc_sz, socket_id);
72 			return -1;
73 		}
74 	} else {
75 		/* Requested address is already reserved, commit memory. */
76 		addr = eal_mem_commit(requested_addr, alloc_sz, socket_id);
77 
78 		/* During commitment, memory is temporary freed and might
79 		 * be allocated by different non-EAL thread. This is a fatal
80 		 * error, because it breaks MSL assumptions.
81 		 */
82 		if ((addr != NULL) && (addr != requested_addr)) {
83 			RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
84 				" allocation - MSL is not VA-contiguous!\n",
85 				requested_addr);
86 			return -1;
87 		}
88 
89 		if (addr == NULL) {
90 			RTE_LOG(DEBUG, EAL, "Cannot commit reserved memory %p "
91 				"(size %zu) on socket %d\n",
92 				requested_addr, alloc_sz, socket_id);
93 			return -1;
94 		}
95 	}
96 
97 	/* Force OS to allocate a physical page and select a NUMA node.
98 	 * Hugepages are not pageable in Windows, so there's no race
99 	 * for physical address.
100 	 */
101 	*(volatile int *)addr = *(volatile int *)addr;
102 
103 	/* Only try to obtain IOVA if it's available, so that applications
104 	 * that do not need IOVA can use this allocator.
105 	 */
106 	if (rte_eal_using_phys_addrs()) {
107 		iova = rte_mem_virt2iova(addr);
108 		if (iova == RTE_BAD_IOVA) {
109 			RTE_LOG(DEBUG, EAL,
110 				"Cannot get IOVA of allocated segment\n");
111 			goto error;
112 		}
113 	}
114 
115 	/* Only "Ex" function can handle hugepages. */
116 	info.VirtualAddress = addr;
117 	if (!QueryWorkingSetEx(current_process, &info, sizeof(info))) {
118 		RTE_LOG_WIN32_ERR("QueryWorkingSetEx(%p)", addr);
119 		goto error;
120 	}
121 
122 	page = &info.VirtualAttributes;
123 	if (!page->Valid || !page->LargePage) {
124 		RTE_LOG(DEBUG, EAL, "Got regular page instead of a hugepage\n");
125 		goto error;
126 	}
127 	if (page->Node != numa_node) {
128 		RTE_LOG(DEBUG, EAL,
129 			"NUMA node hint %u (socket %d) not respected, got %u\n",
130 			numa_node, socket_id, page->Node);
131 		goto error;
132 	}
133 
134 	ms->addr = addr;
135 	ms->hugepage_sz = hi->hugepage_sz;
136 	ms->len = alloc_sz;
137 	ms->nchannel = rte_memory_get_nchannel();
138 	ms->nrank = rte_memory_get_nrank();
139 	ms->iova = iova;
140 	ms->socket_id = socket_id;
141 
142 	return 0;
143 
144 error:
145 	/* Only jump here when `addr` and `alloc_sz` are valid. */
146 	if (eal_mem_decommit(addr, alloc_sz) && (rte_errno == EADDRNOTAVAIL)) {
147 		/* During decommitment, memory is temporarily returned
148 		 * to the system and the address may become unavailable.
149 		 */
150 		RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
151 			" allocation - MSL is not VA-contiguous!\n", addr);
152 	}
153 	return -1;
154 }
155 
156 static int
free_seg(struct rte_memseg * ms)157 free_seg(struct rte_memseg *ms)
158 {
159 	if (eal_mem_decommit(ms->addr, ms->len)) {
160 		if (rte_errno == EADDRNOTAVAIL) {
161 			/* See alloc_seg() for explanation. */
162 			RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
163 				" allocation - MSL is not VA-contiguous!\n",
164 				ms->addr);
165 		}
166 		return -1;
167 	}
168 
169 	/* Must clear the segment, because alloc_seg() inspects it. */
170 	memset(ms, 0, sizeof(*ms));
171 	return 0;
172 }
173 
174 struct alloc_walk_param {
175 	struct hugepage_info *hi;
176 	struct rte_memseg **ms;
177 	size_t page_sz;
178 	unsigned int segs_allocated;
179 	unsigned int n_segs;
180 	int socket;
181 	bool exact;
182 };
183 
184 static int
alloc_seg_walk(const struct rte_memseg_list * msl,void * arg)185 alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
186 {
187 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
188 	struct alloc_walk_param *wa = arg;
189 	struct rte_memseg_list *cur_msl;
190 	size_t page_sz;
191 	int cur_idx, start_idx, j;
192 	unsigned int msl_idx, need, i;
193 
194 	if (msl->page_sz != wa->page_sz)
195 		return 0;
196 	if (msl->socket_id != wa->socket)
197 		return 0;
198 
199 	page_sz = (size_t)msl->page_sz;
200 
201 	msl_idx = msl - mcfg->memsegs;
202 	cur_msl = &mcfg->memsegs[msl_idx];
203 
204 	need = wa->n_segs;
205 
206 	/* try finding space in memseg list */
207 	if (wa->exact) {
208 		/* if we require exact number of pages in a list, find them */
209 		cur_idx = rte_fbarray_find_next_n_free(
210 			&cur_msl->memseg_arr, 0, need);
211 		if (cur_idx < 0)
212 			return 0;
213 		start_idx = cur_idx;
214 	} else {
215 		int cur_len;
216 
217 		/* we don't require exact number of pages, so we're going to go
218 		 * for best-effort allocation. that means finding the biggest
219 		 * unused block, and going with that.
220 		 */
221 		cur_idx = rte_fbarray_find_biggest_free(
222 			&cur_msl->memseg_arr, 0);
223 		if (cur_idx < 0)
224 			return 0;
225 		start_idx = cur_idx;
226 		/* adjust the size to possibly be smaller than original
227 		 * request, but do not allow it to be bigger.
228 		 */
229 		cur_len = rte_fbarray_find_contig_free(
230 			&cur_msl->memseg_arr, cur_idx);
231 		need = RTE_MIN(need, (unsigned int)cur_len);
232 	}
233 
234 	for (i = 0; i < need; i++, cur_idx++) {
235 		struct rte_memseg *cur;
236 		void *map_addr;
237 
238 		cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx);
239 		map_addr = RTE_PTR_ADD(cur_msl->base_va, cur_idx * page_sz);
240 
241 		if (alloc_seg(cur, map_addr, wa->socket, wa->hi)) {
242 			RTE_LOG(DEBUG, EAL, "attempted to allocate %i segments, "
243 				"but only %i were allocated\n", need, i);
244 
245 			/* if exact number wasn't requested, stop */
246 			if (!wa->exact)
247 				goto out;
248 
249 			/* clean up */
250 			for (j = start_idx; j < cur_idx; j++) {
251 				struct rte_memseg *tmp;
252 				struct rte_fbarray *arr = &cur_msl->memseg_arr;
253 
254 				tmp = rte_fbarray_get(arr, j);
255 				rte_fbarray_set_free(arr, j);
256 
257 				if (free_seg(tmp))
258 					RTE_LOG(DEBUG, EAL, "Cannot free page\n");
259 			}
260 			/* clear the list */
261 			if (wa->ms)
262 				memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);
263 
264 			return -1;
265 		}
266 		if (wa->ms)
267 			wa->ms[i] = cur;
268 
269 		rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx);
270 	}
271 
272 out:
273 	wa->segs_allocated = i;
274 	if (i > 0)
275 		cur_msl->version++;
276 
277 	/* if we didn't allocate any segments, move on to the next list */
278 	return i > 0;
279 }
280 
281 struct free_walk_param {
282 	struct hugepage_info *hi;
283 	struct rte_memseg *ms;
284 };
285 static int
free_seg_walk(const struct rte_memseg_list * msl,void * arg)286 free_seg_walk(const struct rte_memseg_list *msl, void *arg)
287 {
288 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
289 	struct rte_memseg_list *found_msl;
290 	struct free_walk_param *wa = arg;
291 	uintptr_t start_addr, end_addr;
292 	int msl_idx, seg_idx, ret;
293 
294 	start_addr = (uintptr_t) msl->base_va;
295 	end_addr = start_addr + msl->len;
296 
297 	if ((uintptr_t)wa->ms->addr < start_addr ||
298 		(uintptr_t)wa->ms->addr >= end_addr)
299 		return 0;
300 
301 	msl_idx = msl - mcfg->memsegs;
302 	seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz;
303 
304 	/* msl is const */
305 	found_msl = &mcfg->memsegs[msl_idx];
306 	found_msl->version++;
307 
308 	rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx);
309 
310 	ret = free_seg(wa->ms);
311 
312 	return (ret < 0) ? (-1) : 1;
313 }
314 
315 int
eal_memalloc_alloc_seg_bulk(struct rte_memseg ** ms,int n_segs,size_t page_sz,int socket,bool exact)316 eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs,
317 		size_t page_sz, int socket, bool exact)
318 {
319 	unsigned int i;
320 	int ret = -1;
321 	struct alloc_walk_param wa;
322 	struct hugepage_info *hi = NULL;
323 	struct internal_config *internal_conf =
324 		eal_get_internal_configuration();
325 
326 	if (internal_conf->legacy_mem) {
327 		RTE_LOG(ERR, EAL, "dynamic allocation not supported in legacy mode\n");
328 		return -ENOTSUP;
329 	}
330 
331 	for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
332 		struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
333 		if (page_sz == hpi->hugepage_sz) {
334 			hi = hpi;
335 			break;
336 		}
337 	}
338 	if (!hi) {
339 		RTE_LOG(ERR, EAL, "cannot find relevant hugepage_info entry\n");
340 		return -1;
341 	}
342 
343 	memset(&wa, 0, sizeof(wa));
344 	wa.exact = exact;
345 	wa.hi = hi;
346 	wa.ms = ms;
347 	wa.n_segs = n_segs;
348 	wa.page_sz = page_sz;
349 	wa.socket = socket;
350 	wa.segs_allocated = 0;
351 
352 	/* memalloc is locked, so it's safe to use thread-unsafe version */
353 	ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
354 	if (ret == 0) {
355 		RTE_LOG(ERR, EAL, "cannot find suitable memseg_list\n");
356 		ret = -1;
357 	} else if (ret > 0) {
358 		ret = (int)wa.segs_allocated;
359 	}
360 
361 	return ret;
362 }
363 
364 struct rte_memseg *
eal_memalloc_alloc_seg(size_t page_sz,int socket)365 eal_memalloc_alloc_seg(size_t page_sz, int socket)
366 {
367 	struct rte_memseg *ms = NULL;
368 	eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true);
369 	return ms;
370 }
371 
372 int
eal_memalloc_free_seg_bulk(struct rte_memseg ** ms,int n_segs)373 eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
374 {
375 	int seg, ret = 0;
376 	struct internal_config *internal_conf =
377 		eal_get_internal_configuration();
378 
379 	/* dynamic free not supported in legacy mode */
380 	if (internal_conf->legacy_mem)
381 		return -1;
382 
383 	for (seg = 0; seg < n_segs; seg++) {
384 		struct rte_memseg *cur = ms[seg];
385 		struct hugepage_info *hi = NULL;
386 		struct free_walk_param wa;
387 		size_t i;
388 		int walk_res;
389 
390 		/* if this page is marked as unfreeable, fail */
391 		if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
392 			RTE_LOG(DEBUG, EAL, "Page is not allowed to be freed\n");
393 			ret = -1;
394 			continue;
395 		}
396 
397 		memset(&wa, 0, sizeof(wa));
398 
399 		for (i = 0; i < RTE_DIM(internal_conf->hugepage_info); i++) {
400 			hi = &internal_conf->hugepage_info[i];
401 			if (cur->hugepage_sz == hi->hugepage_sz)
402 				break;
403 		}
404 		if (i == RTE_DIM(internal_conf->hugepage_info)) {
405 			RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
406 			ret = -1;
407 			continue;
408 		}
409 
410 		wa.ms = cur;
411 		wa.hi = hi;
412 
413 		/* memalloc is locked, so it's safe to use thread-unsafe version
414 		 */
415 		walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
416 				&wa);
417 		if (walk_res == 1)
418 			continue;
419 		if (walk_res == 0)
420 			RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
421 		ret = -1;
422 	}
423 	return ret;
424 }
425 
426 int
eal_memalloc_free_seg(struct rte_memseg * ms)427 eal_memalloc_free_seg(struct rte_memseg *ms)
428 {
429 	return eal_memalloc_free_seg_bulk(&ms, 1);
430 }
431 
432 int
eal_memalloc_sync_with_primary(void)433 eal_memalloc_sync_with_primary(void)
434 {
435 	/* No multi-process support. */
436 	EAL_LOG_NOT_IMPLEMENTED();
437 	return -1;
438 }
439 
440 int
eal_memalloc_init(void)441 eal_memalloc_init(void)
442 {
443 	/* No action required. */
444 	return 0;
445 }
446