1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2020 Dmitry Kozlyuk
3 */
4
5 #include <rte_errno.h>
6 #include <rte_os.h>
7
8 #include "eal_internal_cfg.h"
9 #include "eal_memalloc.h"
10 #include "eal_memcfg.h"
11 #include "eal_private.h"
12 #include "eal_windows.h"
13
14 int
eal_memalloc_get_seg_fd(int list_idx,int seg_idx)15 eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
16 {
17 /* Hugepages have no associated files in Windows. */
18 RTE_SET_USED(list_idx);
19 RTE_SET_USED(seg_idx);
20 EAL_LOG_NOT_IMPLEMENTED();
21 return -1;
22 }
23
24 int
eal_memalloc_get_seg_fd_offset(int list_idx,int seg_idx,size_t * offset)25 eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
26 {
27 /* Hugepages have no associated files in Windows. */
28 RTE_SET_USED(list_idx);
29 RTE_SET_USED(seg_idx);
30 RTE_SET_USED(offset);
31 EAL_LOG_NOT_IMPLEMENTED();
32 return -1;
33 }
34
35 static int
alloc_seg(struct rte_memseg * ms,void * requested_addr,int socket_id,struct hugepage_info * hi)36 alloc_seg(struct rte_memseg *ms, void *requested_addr, int socket_id,
37 struct hugepage_info *hi)
38 {
39 HANDLE current_process;
40 unsigned int numa_node;
41 size_t alloc_sz;
42 void *addr;
43 rte_iova_t iova = RTE_BAD_IOVA;
44 PSAPI_WORKING_SET_EX_INFORMATION info;
45 PSAPI_WORKING_SET_EX_BLOCK *page;
46
47 if (ms->len > 0) {
48 /* If a segment is already allocated as needed, return it. */
49 if ((ms->addr == requested_addr) &&
50 (ms->socket_id == socket_id) &&
51 (ms->hugepage_sz == hi->hugepage_sz)) {
52 return 0;
53 }
54
55 /* Bugcheck, should not happen. */
56 RTE_LOG(DEBUG, EAL, "Attempted to reallocate segment %p "
57 "(size %zu) on socket %d", ms->addr,
58 ms->len, ms->socket_id);
59 return -1;
60 }
61
62 current_process = GetCurrentProcess();
63 numa_node = eal_socket_numa_node(socket_id);
64 alloc_sz = hi->hugepage_sz;
65
66 if (requested_addr == NULL) {
67 /* Request a new chunk of memory from OS. */
68 addr = eal_mem_alloc_socket(alloc_sz, socket_id);
69 if (addr == NULL) {
70 RTE_LOG(DEBUG, EAL, "Cannot allocate %zu bytes "
71 "on socket %d\n", alloc_sz, socket_id);
72 return -1;
73 }
74 } else {
75 /* Requested address is already reserved, commit memory. */
76 addr = eal_mem_commit(requested_addr, alloc_sz, socket_id);
77
78 /* During commitment, memory is temporary freed and might
79 * be allocated by different non-EAL thread. This is a fatal
80 * error, because it breaks MSL assumptions.
81 */
82 if ((addr != NULL) && (addr != requested_addr)) {
83 RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
84 " allocation - MSL is not VA-contiguous!\n",
85 requested_addr);
86 return -1;
87 }
88
89 if (addr == NULL) {
90 RTE_LOG(DEBUG, EAL, "Cannot commit reserved memory %p "
91 "(size %zu) on socket %d\n",
92 requested_addr, alloc_sz, socket_id);
93 return -1;
94 }
95 }
96
97 /* Force OS to allocate a physical page and select a NUMA node.
98 * Hugepages are not pageable in Windows, so there's no race
99 * for physical address.
100 */
101 *(volatile int *)addr = *(volatile int *)addr;
102
103 /* Only try to obtain IOVA if it's available, so that applications
104 * that do not need IOVA can use this allocator.
105 */
106 if (rte_eal_using_phys_addrs()) {
107 iova = rte_mem_virt2iova(addr);
108 if (iova == RTE_BAD_IOVA) {
109 RTE_LOG(DEBUG, EAL,
110 "Cannot get IOVA of allocated segment\n");
111 goto error;
112 }
113 }
114
115 /* Only "Ex" function can handle hugepages. */
116 info.VirtualAddress = addr;
117 if (!QueryWorkingSetEx(current_process, &info, sizeof(info))) {
118 RTE_LOG_WIN32_ERR("QueryWorkingSetEx(%p)", addr);
119 goto error;
120 }
121
122 page = &info.VirtualAttributes;
123 if (!page->Valid || !page->LargePage) {
124 RTE_LOG(DEBUG, EAL, "Got regular page instead of a hugepage\n");
125 goto error;
126 }
127 if (page->Node != numa_node) {
128 RTE_LOG(DEBUG, EAL,
129 "NUMA node hint %u (socket %d) not respected, got %u\n",
130 numa_node, socket_id, page->Node);
131 goto error;
132 }
133
134 ms->addr = addr;
135 ms->hugepage_sz = hi->hugepage_sz;
136 ms->len = alloc_sz;
137 ms->nchannel = rte_memory_get_nchannel();
138 ms->nrank = rte_memory_get_nrank();
139 ms->iova = iova;
140 ms->socket_id = socket_id;
141
142 return 0;
143
144 error:
145 /* Only jump here when `addr` and `alloc_sz` are valid. */
146 if (eal_mem_decommit(addr, alloc_sz) && (rte_errno == EADDRNOTAVAIL)) {
147 /* During decommitment, memory is temporarily returned
148 * to the system and the address may become unavailable.
149 */
150 RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
151 " allocation - MSL is not VA-contiguous!\n", addr);
152 }
153 return -1;
154 }
155
156 static int
free_seg(struct rte_memseg * ms)157 free_seg(struct rte_memseg *ms)
158 {
159 if (eal_mem_decommit(ms->addr, ms->len)) {
160 if (rte_errno == EADDRNOTAVAIL) {
161 /* See alloc_seg() for explanation. */
162 RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
163 " allocation - MSL is not VA-contiguous!\n",
164 ms->addr);
165 }
166 return -1;
167 }
168
169 /* Must clear the segment, because alloc_seg() inspects it. */
170 memset(ms, 0, sizeof(*ms));
171 return 0;
172 }
173
174 struct alloc_walk_param {
175 struct hugepage_info *hi;
176 struct rte_memseg **ms;
177 size_t page_sz;
178 unsigned int segs_allocated;
179 unsigned int n_segs;
180 int socket;
181 bool exact;
182 };
183
184 static int
alloc_seg_walk(const struct rte_memseg_list * msl,void * arg)185 alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
186 {
187 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
188 struct alloc_walk_param *wa = arg;
189 struct rte_memseg_list *cur_msl;
190 size_t page_sz;
191 int cur_idx, start_idx, j;
192 unsigned int msl_idx, need, i;
193
194 if (msl->page_sz != wa->page_sz)
195 return 0;
196 if (msl->socket_id != wa->socket)
197 return 0;
198
199 page_sz = (size_t)msl->page_sz;
200
201 msl_idx = msl - mcfg->memsegs;
202 cur_msl = &mcfg->memsegs[msl_idx];
203
204 need = wa->n_segs;
205
206 /* try finding space in memseg list */
207 if (wa->exact) {
208 /* if we require exact number of pages in a list, find them */
209 cur_idx = rte_fbarray_find_next_n_free(
210 &cur_msl->memseg_arr, 0, need);
211 if (cur_idx < 0)
212 return 0;
213 start_idx = cur_idx;
214 } else {
215 int cur_len;
216
217 /* we don't require exact number of pages, so we're going to go
218 * for best-effort allocation. that means finding the biggest
219 * unused block, and going with that.
220 */
221 cur_idx = rte_fbarray_find_biggest_free(
222 &cur_msl->memseg_arr, 0);
223 if (cur_idx < 0)
224 return 0;
225 start_idx = cur_idx;
226 /* adjust the size to possibly be smaller than original
227 * request, but do not allow it to be bigger.
228 */
229 cur_len = rte_fbarray_find_contig_free(
230 &cur_msl->memseg_arr, cur_idx);
231 need = RTE_MIN(need, (unsigned int)cur_len);
232 }
233
234 for (i = 0; i < need; i++, cur_idx++) {
235 struct rte_memseg *cur;
236 void *map_addr;
237
238 cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx);
239 map_addr = RTE_PTR_ADD(cur_msl->base_va, cur_idx * page_sz);
240
241 if (alloc_seg(cur, map_addr, wa->socket, wa->hi)) {
242 RTE_LOG(DEBUG, EAL, "attempted to allocate %i segments, "
243 "but only %i were allocated\n", need, i);
244
245 /* if exact number wasn't requested, stop */
246 if (!wa->exact)
247 goto out;
248
249 /* clean up */
250 for (j = start_idx; j < cur_idx; j++) {
251 struct rte_memseg *tmp;
252 struct rte_fbarray *arr = &cur_msl->memseg_arr;
253
254 tmp = rte_fbarray_get(arr, j);
255 rte_fbarray_set_free(arr, j);
256
257 if (free_seg(tmp))
258 RTE_LOG(DEBUG, EAL, "Cannot free page\n");
259 }
260 /* clear the list */
261 if (wa->ms)
262 memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);
263
264 return -1;
265 }
266 if (wa->ms)
267 wa->ms[i] = cur;
268
269 rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx);
270 }
271
272 out:
273 wa->segs_allocated = i;
274 if (i > 0)
275 cur_msl->version++;
276
277 /* if we didn't allocate any segments, move on to the next list */
278 return i > 0;
279 }
280
281 struct free_walk_param {
282 struct hugepage_info *hi;
283 struct rte_memseg *ms;
284 };
285 static int
free_seg_walk(const struct rte_memseg_list * msl,void * arg)286 free_seg_walk(const struct rte_memseg_list *msl, void *arg)
287 {
288 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
289 struct rte_memseg_list *found_msl;
290 struct free_walk_param *wa = arg;
291 uintptr_t start_addr, end_addr;
292 int msl_idx, seg_idx, ret;
293
294 start_addr = (uintptr_t) msl->base_va;
295 end_addr = start_addr + msl->len;
296
297 if ((uintptr_t)wa->ms->addr < start_addr ||
298 (uintptr_t)wa->ms->addr >= end_addr)
299 return 0;
300
301 msl_idx = msl - mcfg->memsegs;
302 seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz;
303
304 /* msl is const */
305 found_msl = &mcfg->memsegs[msl_idx];
306 found_msl->version++;
307
308 rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx);
309
310 ret = free_seg(wa->ms);
311
312 return (ret < 0) ? (-1) : 1;
313 }
314
315 int
eal_memalloc_alloc_seg_bulk(struct rte_memseg ** ms,int n_segs,size_t page_sz,int socket,bool exact)316 eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs,
317 size_t page_sz, int socket, bool exact)
318 {
319 unsigned int i;
320 int ret = -1;
321 struct alloc_walk_param wa;
322 struct hugepage_info *hi = NULL;
323 struct internal_config *internal_conf =
324 eal_get_internal_configuration();
325
326 if (internal_conf->legacy_mem) {
327 RTE_LOG(ERR, EAL, "dynamic allocation not supported in legacy mode\n");
328 return -ENOTSUP;
329 }
330
331 for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
332 struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
333 if (page_sz == hpi->hugepage_sz) {
334 hi = hpi;
335 break;
336 }
337 }
338 if (!hi) {
339 RTE_LOG(ERR, EAL, "cannot find relevant hugepage_info entry\n");
340 return -1;
341 }
342
343 memset(&wa, 0, sizeof(wa));
344 wa.exact = exact;
345 wa.hi = hi;
346 wa.ms = ms;
347 wa.n_segs = n_segs;
348 wa.page_sz = page_sz;
349 wa.socket = socket;
350 wa.segs_allocated = 0;
351
352 /* memalloc is locked, so it's safe to use thread-unsafe version */
353 ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
354 if (ret == 0) {
355 RTE_LOG(ERR, EAL, "cannot find suitable memseg_list\n");
356 ret = -1;
357 } else if (ret > 0) {
358 ret = (int)wa.segs_allocated;
359 }
360
361 return ret;
362 }
363
364 struct rte_memseg *
eal_memalloc_alloc_seg(size_t page_sz,int socket)365 eal_memalloc_alloc_seg(size_t page_sz, int socket)
366 {
367 struct rte_memseg *ms = NULL;
368 eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true);
369 return ms;
370 }
371
372 int
eal_memalloc_free_seg_bulk(struct rte_memseg ** ms,int n_segs)373 eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
374 {
375 int seg, ret = 0;
376 struct internal_config *internal_conf =
377 eal_get_internal_configuration();
378
379 /* dynamic free not supported in legacy mode */
380 if (internal_conf->legacy_mem)
381 return -1;
382
383 for (seg = 0; seg < n_segs; seg++) {
384 struct rte_memseg *cur = ms[seg];
385 struct hugepage_info *hi = NULL;
386 struct free_walk_param wa;
387 size_t i;
388 int walk_res;
389
390 /* if this page is marked as unfreeable, fail */
391 if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
392 RTE_LOG(DEBUG, EAL, "Page is not allowed to be freed\n");
393 ret = -1;
394 continue;
395 }
396
397 memset(&wa, 0, sizeof(wa));
398
399 for (i = 0; i < RTE_DIM(internal_conf->hugepage_info); i++) {
400 hi = &internal_conf->hugepage_info[i];
401 if (cur->hugepage_sz == hi->hugepage_sz)
402 break;
403 }
404 if (i == RTE_DIM(internal_conf->hugepage_info)) {
405 RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
406 ret = -1;
407 continue;
408 }
409
410 wa.ms = cur;
411 wa.hi = hi;
412
413 /* memalloc is locked, so it's safe to use thread-unsafe version
414 */
415 walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
416 &wa);
417 if (walk_res == 1)
418 continue;
419 if (walk_res == 0)
420 RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
421 ret = -1;
422 }
423 return ret;
424 }
425
426 int
eal_memalloc_free_seg(struct rte_memseg * ms)427 eal_memalloc_free_seg(struct rte_memseg *ms)
428 {
429 return eal_memalloc_free_seg_bulk(&ms, 1);
430 }
431
432 int
eal_memalloc_sync_with_primary(void)433 eal_memalloc_sync_with_primary(void)
434 {
435 /* No multi-process support. */
436 EAL_LOG_NOT_IMPLEMENTED();
437 return -1;
438 }
439
440 int
eal_memalloc_init(void)441 eal_memalloc_init(void)
442 {
443 /* No action required. */
444 return 0;
445 }
446