1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4 #include <sys/mman.h>
5 #include <unistd.h>
6 #include <sys/types.h>
7 #include <sys/sysctl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <fcntl.h>
12
13 #include <rte_eal.h>
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_string_fns.h>
17
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21 #include "eal_memcfg.h"
22 #include "eal_options.h"
23
24 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
25
eal_get_baseaddr(void)26 uint64_t eal_get_baseaddr(void)
27 {
28 /*
29 * FreeBSD may allocate something in the space we will be mapping things
30 * before we get a chance to do that, so use a base address that's far
31 * away from where malloc() et al usually map things.
32 */
33 return 0x1000000000ULL;
34 }
35
36 /*
37 * Get physical address of any mapped virtual address in the current process.
38 */
39 phys_addr_t
rte_mem_virt2phy(const void * virtaddr)40 rte_mem_virt2phy(const void *virtaddr)
41 {
42 /* XXX not implemented. This function is only used by
43 * rte_mempool_virt2iova() when hugepages are disabled. */
44 (void)virtaddr;
45 return RTE_BAD_IOVA;
46 }
47 rte_iova_t
rte_mem_virt2iova(const void * virtaddr)48 rte_mem_virt2iova(const void *virtaddr)
49 {
50 return rte_mem_virt2phy(virtaddr);
51 }
52
53 int
rte_eal_hugepage_init(void)54 rte_eal_hugepage_init(void)
55 {
56 struct rte_mem_config *mcfg;
57 uint64_t total_mem = 0;
58 void *addr;
59 unsigned int i, j, seg_idx = 0;
60 struct internal_config *internal_conf =
61 eal_get_internal_configuration();
62
63 /* get pointer to global configuration */
64 mcfg = rte_eal_get_configuration()->mem_config;
65
66 /* for debug purposes, hugetlbfs can be disabled */
67 if (internal_conf->no_hugetlbfs) {
68 struct rte_memseg_list *msl;
69 uint64_t mem_sz, page_sz;
70 int n_segs;
71
72 /* create a memseg list */
73 msl = &mcfg->memsegs[0];
74
75 mem_sz = internal_conf->memory;
76 page_sz = RTE_PGSIZE_4K;
77 n_segs = mem_sz / page_sz;
78
79 if (eal_memseg_list_init_named(
80 msl, "nohugemem", page_sz, n_segs, 0, true)) {
81 return -1;
82 }
83
84 addr = mmap(NULL, mem_sz, PROT_READ | PROT_WRITE,
85 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
86 if (addr == MAP_FAILED) {
87 RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
88 strerror(errno));
89 return -1;
90 }
91
92 msl->base_va = addr;
93 msl->len = mem_sz;
94
95 eal_memseg_list_populate(msl, addr, n_segs);
96
97 return 0;
98 }
99
100 /* map all hugepages and sort them */
101 for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
102 struct hugepage_info *hpi;
103 rte_iova_t prev_end = 0;
104 int prev_ms_idx = -1;
105 uint64_t page_sz, mem_needed;
106 unsigned int n_pages, max_pages;
107
108 hpi = &internal_conf->hugepage_info[i];
109 page_sz = hpi->hugepage_sz;
110 max_pages = hpi->num_pages[0];
111 mem_needed = RTE_ALIGN_CEIL(internal_conf->memory - total_mem,
112 page_sz);
113
114 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
115
116 for (j = 0; j < n_pages; j++) {
117 struct rte_memseg_list *msl;
118 struct rte_fbarray *arr;
119 struct rte_memseg *seg;
120 int msl_idx, ms_idx;
121 rte_iova_t physaddr;
122 int error;
123 size_t sysctl_size = sizeof(physaddr);
124 char physaddr_str[64];
125 bool is_adjacent;
126
127 /* first, check if this segment is IOVA-adjacent to
128 * the previous one.
129 */
130 snprintf(physaddr_str, sizeof(physaddr_str),
131 "hw.contigmem.physaddr.%d", j);
132 error = sysctlbyname(physaddr_str, &physaddr,
133 &sysctl_size, NULL, 0);
134 if (error < 0) {
135 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
136 "from %s\n", j, hpi->hugedir);
137 return -1;
138 }
139
140 is_adjacent = prev_end != 0 && physaddr == prev_end;
141 prev_end = physaddr + hpi->hugepage_sz;
142
143 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
144 msl_idx++) {
145 bool empty, need_hole;
146 msl = &mcfg->memsegs[msl_idx];
147 arr = &msl->memseg_arr;
148
149 if (msl->page_sz != page_sz)
150 continue;
151
152 empty = arr->count == 0;
153
154 /* we need a hole if this isn't an empty memseg
155 * list, and if previous segment was not
156 * adjacent to current one.
157 */
158 need_hole = !empty && !is_adjacent;
159
160 /* we need 1, plus hole if not adjacent */
161 ms_idx = rte_fbarray_find_next_n_free(arr,
162 0, 1 + (need_hole ? 1 : 0));
163
164 /* memseg list is full? */
165 if (ms_idx < 0)
166 continue;
167
168 if (need_hole && prev_ms_idx == ms_idx - 1)
169 ms_idx++;
170 prev_ms_idx = ms_idx;
171
172 break;
173 }
174 if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
175 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
176 RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
177 RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
178 return -1;
179 }
180 arr = &msl->memseg_arr;
181 seg = rte_fbarray_get(arr, ms_idx);
182
183 addr = RTE_PTR_ADD(msl->base_va,
184 (size_t)msl->page_sz * ms_idx);
185
186 /* address is already mapped in memseg list, so using
187 * MAP_FIXED here is safe.
188 */
189 addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
190 MAP_SHARED | MAP_FIXED,
191 hpi->lock_descriptor,
192 j * EAL_PAGE_SIZE);
193 if (addr == MAP_FAILED) {
194 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
195 j, hpi->hugedir);
196 return -1;
197 }
198
199 seg->addr = addr;
200 seg->iova = physaddr;
201 seg->hugepage_sz = page_sz;
202 seg->len = page_sz;
203 seg->nchannel = mcfg->nchannel;
204 seg->nrank = mcfg->nrank;
205 seg->socket_id = 0;
206
207 rte_fbarray_set_used(arr, ms_idx);
208
209 RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
210 PRIx64", len %zu\n",
211 seg_idx++, addr, physaddr, page_sz);
212
213 total_mem += seg->len;
214 }
215 if (total_mem >= internal_conf->memory)
216 break;
217 }
218 if (total_mem < internal_conf->memory) {
219 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
220 "requested: %" PRIu64 "M "
221 "available: %" PRIu64 "M\n",
222 internal_conf->memory >> 20, total_mem >> 20);
223 return -1;
224 }
225 return 0;
226 }
227
228 struct attach_walk_args {
229 int fd_hugepage;
230 int seg_idx;
231 };
232 static int
attach_segment(const struct rte_memseg_list * msl,const struct rte_memseg * ms,void * arg)233 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
234 void *arg)
235 {
236 struct attach_walk_args *wa = arg;
237 void *addr;
238
239 if (msl->external)
240 return 0;
241
242 addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
243 MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
244 wa->seg_idx * EAL_PAGE_SIZE);
245 if (addr == MAP_FAILED || addr != ms->addr)
246 return -1;
247 wa->seg_idx++;
248
249 return 0;
250 }
251
252 int
rte_eal_hugepage_attach(void)253 rte_eal_hugepage_attach(void)
254 {
255 struct hugepage_info *hpi;
256 int fd_hugepage = -1;
257 unsigned int i;
258 struct internal_config *internal_conf =
259 eal_get_internal_configuration();
260
261 hpi = &internal_conf->hugepage_info[0];
262
263 for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
264 const struct hugepage_info *cur_hpi = &hpi[i];
265 struct attach_walk_args wa;
266
267 memset(&wa, 0, sizeof(wa));
268
269 /* Obtain a file descriptor for contiguous memory */
270 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
271 if (fd_hugepage < 0) {
272 RTE_LOG(ERR, EAL, "Could not open %s\n",
273 cur_hpi->hugedir);
274 goto error;
275 }
276 wa.fd_hugepage = fd_hugepage;
277 wa.seg_idx = 0;
278
279 /* Map the contiguous memory into each memory segment */
280 if (rte_memseg_walk(attach_segment, &wa) < 0) {
281 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
282 wa.seg_idx, cur_hpi->hugedir);
283 goto error;
284 }
285
286 close(fd_hugepage);
287 fd_hugepage = -1;
288 }
289
290 /* hugepage_info is no longer required */
291 return 0;
292
293 error:
294 if (fd_hugepage >= 0)
295 close(fd_hugepage);
296 return -1;
297 }
298
299 int
rte_eal_using_phys_addrs(void)300 rte_eal_using_phys_addrs(void)
301 {
302 return 0;
303 }
304
305 static uint64_t
get_mem_amount(uint64_t page_sz,uint64_t max_mem)306 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
307 {
308 uint64_t area_sz, max_pages;
309
310 /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
311 max_pages = RTE_MAX_MEMSEG_PER_LIST;
312 max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
313
314 area_sz = RTE_MIN(page_sz * max_pages, max_mem);
315
316 /* make sure the list isn't smaller than the page size */
317 area_sz = RTE_MAX(area_sz, page_sz);
318
319 return RTE_ALIGN(area_sz, page_sz);
320 }
321
322 static int
memseg_list_alloc(struct rte_memseg_list * msl)323 memseg_list_alloc(struct rte_memseg_list *msl)
324 {
325 int flags = 0;
326
327 #ifdef RTE_ARCH_PPC_64
328 flags |= EAL_RESERVE_HUGEPAGES;
329 #endif
330 return eal_memseg_list_alloc(msl, flags);
331 }
332
333 static int
memseg_primary_init(void)334 memseg_primary_init(void)
335 {
336 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
337 int hpi_idx, msl_idx = 0;
338 struct rte_memseg_list *msl;
339 uint64_t max_mem, total_mem;
340 struct internal_config *internal_conf =
341 eal_get_internal_configuration();
342
343 /* no-huge does not need this at all */
344 if (internal_conf->no_hugetlbfs)
345 return 0;
346
347 /* FreeBSD has an issue where core dump will dump the entire memory
348 * contents, including anonymous zero-page memory. Therefore, while we
349 * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
350 * also be further limiting total memory amount to whatever memory is
351 * available to us through contigmem driver (plus spacing blocks).
352 *
353 * so, at each stage, we will be checking how much memory we are
354 * preallocating, and adjust all the values accordingly.
355 */
356
357 max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
358 total_mem = 0;
359
360 /* create memseg lists */
361 for (hpi_idx = 0; hpi_idx < (int) internal_conf->num_hugepage_sizes;
362 hpi_idx++) {
363 uint64_t max_type_mem, total_type_mem = 0;
364 uint64_t avail_mem;
365 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
366 struct hugepage_info *hpi;
367 uint64_t hugepage_sz;
368
369 hpi = &internal_conf->hugepage_info[hpi_idx];
370 hugepage_sz = hpi->hugepage_sz;
371
372 /* no NUMA support on FreeBSD */
373
374 /* check if we've already exceeded total memory amount */
375 if (total_mem >= max_mem)
376 break;
377
378 /* first, calculate theoretical limits according to config */
379 max_type_mem = RTE_MIN(max_mem - total_mem,
380 (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
381 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
382
383 /* now, limit all of that to whatever will actually be
384 * available to us, because without dynamic allocation support,
385 * all of that extra memory will be sitting there being useless
386 * and slowing down core dumps in case of a crash.
387 *
388 * we need (N*2)-1 segments because we cannot guarantee that
389 * each segment will be IOVA-contiguous with the previous one,
390 * so we will allocate more and put spaces between segments
391 * that are non-contiguous.
392 */
393 avail_segs = (hpi->num_pages[0] * 2) - 1;
394 avail_mem = avail_segs * hugepage_sz;
395
396 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
397 max_segs = RTE_MIN(avail_segs, max_segs);
398
399 type_msl_idx = 0;
400 while (total_type_mem < max_type_mem &&
401 total_segs < max_segs) {
402 uint64_t cur_max_mem, cur_mem;
403 unsigned int n_segs;
404
405 if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
406 RTE_LOG(ERR, EAL,
407 "No more space in memseg lists, please increase %s\n",
408 RTE_STR(RTE_MAX_MEMSEG_LISTS));
409 return -1;
410 }
411
412 msl = &mcfg->memsegs[msl_idx++];
413
414 cur_max_mem = max_type_mem - total_type_mem;
415
416 cur_mem = get_mem_amount(hugepage_sz,
417 cur_max_mem);
418 n_segs = cur_mem / hugepage_sz;
419
420 if (eal_memseg_list_init(msl, hugepage_sz, n_segs,
421 0, type_msl_idx, false))
422 return -1;
423
424 total_segs += msl->memseg_arr.len;
425 total_type_mem = total_segs * hugepage_sz;
426 type_msl_idx++;
427
428 if (memseg_list_alloc(msl)) {
429 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
430 return -1;
431 }
432 }
433 total_mem += total_type_mem;
434 }
435 return 0;
436 }
437
438 static int
memseg_secondary_init(void)439 memseg_secondary_init(void)
440 {
441 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
442 int msl_idx = 0;
443 struct rte_memseg_list *msl;
444
445 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
446
447 msl = &mcfg->memsegs[msl_idx];
448
449 /* skip empty memseg lists */
450 if (msl->memseg_arr.len == 0)
451 continue;
452
453 if (rte_fbarray_attach(&msl->memseg_arr)) {
454 RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
455 return -1;
456 }
457
458 /* preallocate VA space */
459 if (memseg_list_alloc(msl)) {
460 RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
461 return -1;
462 }
463 }
464
465 return 0;
466 }
467
468 int
rte_eal_memseg_init(void)469 rte_eal_memseg_init(void)
470 {
471 return rte_eal_process_type() == RTE_PROC_PRIMARY ?
472 memseg_primary_init() :
473 memseg_secondary_init();
474 }
475