17cc577a4SJonathan Peyton /*
2de4749b7SJonathan Peyton * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
37cc577a4SJonathan Peyton */
47cc577a4SJonathan Peyton
57cc577a4SJonathan Peyton //===----------------------------------------------------------------------===//
67cc577a4SJonathan Peyton //
757b08b09SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
857b08b09SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
957b08b09SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
107cc577a4SJonathan Peyton //
117cc577a4SJonathan Peyton //===----------------------------------------------------------------------===//
127cc577a4SJonathan Peyton
137cc577a4SJonathan Peyton #include "kmp.h"
147cc577a4SJonathan Peyton #include "kmp_io.h"
153041982dSJonathan Peyton #include "kmp_wrapper_malloc.h"
167cc577a4SJonathan Peyton
177cc577a4SJonathan Peyton // Disable bget when it is not used
187cc577a4SJonathan Peyton #if KMP_USE_BGET
197cc577a4SJonathan Peyton
207cc577a4SJonathan Peyton /* Thread private buffer management code */
217cc577a4SJonathan Peyton
227cc577a4SJonathan Peyton typedef int (*bget_compact_t)(size_t, int);
237cc577a4SJonathan Peyton typedef void *(*bget_acquire_t)(size_t);
247cc577a4SJonathan Peyton typedef void (*bget_release_t)(void *);
257cc577a4SJonathan Peyton
267cc577a4SJonathan Peyton /* NOTE: bufsize must be a signed datatype */
277cc577a4SJonathan Peyton
287cc577a4SJonathan Peyton #if KMP_OS_WINDOWS
297cc577a4SJonathan Peyton #if KMP_ARCH_X86 || KMP_ARCH_ARM
307cc577a4SJonathan Peyton typedef kmp_int32 bufsize;
317cc577a4SJonathan Peyton #else
327cc577a4SJonathan Peyton typedef kmp_int64 bufsize;
337cc577a4SJonathan Peyton #endif
347cc577a4SJonathan Peyton #else
357cc577a4SJonathan Peyton typedef ssize_t bufsize;
36e4b4f994SJonathan Peyton #endif // KMP_OS_WINDOWS
377cc577a4SJonathan Peyton
387cc577a4SJonathan Peyton /* The three modes of operation are, fifo search, lifo search, and best-fit */
397cc577a4SJonathan Peyton
407cc577a4SJonathan Peyton typedef enum bget_mode {
417cc577a4SJonathan Peyton bget_mode_fifo = 0,
427cc577a4SJonathan Peyton bget_mode_lifo = 1,
437cc577a4SJonathan Peyton bget_mode_best = 2
447cc577a4SJonathan Peyton } bget_mode_t;
457cc577a4SJonathan Peyton
467cc577a4SJonathan Peyton static void bpool(kmp_info_t *th, void *buffer, bufsize len);
477cc577a4SJonathan Peyton static void *bget(kmp_info_t *th, bufsize size);
487cc577a4SJonathan Peyton static void *bgetz(kmp_info_t *th, bufsize size);
497cc577a4SJonathan Peyton static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
507cc577a4SJonathan Peyton static void brel(kmp_info_t *th, void *buf);
513041982dSJonathan Peyton static void bectl(kmp_info_t *th, bget_compact_t compact,
523041982dSJonathan Peyton bget_acquire_t acquire, bget_release_t release,
533041982dSJonathan Peyton bufsize pool_incr);
547cc577a4SJonathan Peyton
557cc577a4SJonathan Peyton /* BGET CONFIGURATION */
563041982dSJonathan Peyton /* Buffer allocation size quantum: all buffers allocated are a
573041982dSJonathan Peyton multiple of this size. This MUST be a power of two. */
587cc577a4SJonathan Peyton
593041982dSJonathan Peyton /* On IA-32 architecture with Linux* OS, malloc() does not
60ed5fe645SKelvin Li ensure 16 byte alignment */
617cc577a4SJonathan Peyton
627cc577a4SJonathan Peyton #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
637cc577a4SJonathan Peyton
647cc577a4SJonathan Peyton #define SizeQuant 8
657cc577a4SJonathan Peyton #define AlignType double
667cc577a4SJonathan Peyton
677cc577a4SJonathan Peyton #else
687cc577a4SJonathan Peyton
697cc577a4SJonathan Peyton #define SizeQuant 16
707cc577a4SJonathan Peyton #define AlignType _Quad
717cc577a4SJonathan Peyton
727cc577a4SJonathan Peyton #endif
737cc577a4SJonathan Peyton
743041982dSJonathan Peyton // Define this symbol to enable the bstats() function which calculates the
753041982dSJonathan Peyton // total free space in the buffer pool, the largest available buffer, and the
763041982dSJonathan Peyton // total space currently allocated.
773041982dSJonathan Peyton #define BufStats 1
787cc577a4SJonathan Peyton
797cc577a4SJonathan Peyton #ifdef KMP_DEBUG
807cc577a4SJonathan Peyton
813041982dSJonathan Peyton // Define this symbol to enable the bpoold() function which dumps the buffers
823041982dSJonathan Peyton // in a buffer pool.
833041982dSJonathan Peyton #define BufDump 1
847cc577a4SJonathan Peyton
853041982dSJonathan Peyton // Define this symbol to enable the bpoolv() function for validating a buffer
863041982dSJonathan Peyton // pool.
873041982dSJonathan Peyton #define BufValid 1
887cc577a4SJonathan Peyton
893041982dSJonathan Peyton // Define this symbol to enable the bufdump() function which allows dumping the
903041982dSJonathan Peyton // contents of an allocated or free buffer.
913041982dSJonathan Peyton #define DumpData 1
923041982dSJonathan Peyton
937cc577a4SJonathan Peyton #ifdef NOT_USED_NOW
947cc577a4SJonathan Peyton
953041982dSJonathan Peyton // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
963041982dSJonathan Peyton // who attempt to use pointers into released buffers.
973041982dSJonathan Peyton #define FreeWipe 1
987cc577a4SJonathan Peyton
993041982dSJonathan Peyton // Use a best fit algorithm when searching for space for an allocation request.
1003041982dSJonathan Peyton // This uses memory more efficiently, but allocation will be much slower.
1013041982dSJonathan Peyton #define BestFit 1
1023041982dSJonathan Peyton
1037cc577a4SJonathan Peyton #endif /* NOT_USED_NOW */
1047cc577a4SJonathan Peyton #endif /* KMP_DEBUG */
1057cc577a4SJonathan Peyton
1067cc577a4SJonathan Peyton static bufsize bget_bin_size[] = {
1077cc577a4SJonathan Peyton 0,
1087cc577a4SJonathan Peyton // 1 << 6, /* .5 Cache line */
1097cc577a4SJonathan Peyton 1 << 7, /* 1 Cache line, new */
1107cc577a4SJonathan Peyton 1 << 8, /* 2 Cache lines */
1117cc577a4SJonathan Peyton 1 << 9, /* 4 Cache lines, new */
1127cc577a4SJonathan Peyton 1 << 10, /* 8 Cache lines */
1137cc577a4SJonathan Peyton 1 << 11, /* 16 Cache lines, new */
1143041982dSJonathan Peyton 1 << 12, 1 << 13, /* new */
1153041982dSJonathan Peyton 1 << 14, 1 << 15, /* new */
1163041982dSJonathan Peyton 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
1177cc577a4SJonathan Peyton 1 << 21, /* 2MB */
1187cc577a4SJonathan Peyton 1 << 22, /* 4MB */
1197cc577a4SJonathan Peyton 1 << 23, /* 8MB */
1207cc577a4SJonathan Peyton 1 << 24, /* 16MB */
1217cc577a4SJonathan Peyton 1 << 25, /* 32MB */
1227cc577a4SJonathan Peyton };
1237cc577a4SJonathan Peyton
1247cc577a4SJonathan Peyton #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
1257cc577a4SJonathan Peyton
1267cc577a4SJonathan Peyton struct bfhead;
1277cc577a4SJonathan Peyton
1283041982dSJonathan Peyton // Declare the interface, including the requested buffer size type, bufsize.
1297cc577a4SJonathan Peyton
1307cc577a4SJonathan Peyton /* Queue links */
1317cc577a4SJonathan Peyton typedef struct qlinks {
1327cc577a4SJonathan Peyton struct bfhead *flink; /* Forward link */
1337cc577a4SJonathan Peyton struct bfhead *blink; /* Backward link */
1347cc577a4SJonathan Peyton } qlinks_t;
1357cc577a4SJonathan Peyton
1367cc577a4SJonathan Peyton /* Header in allocated and free buffers */
1377cc577a4SJonathan Peyton typedef struct bhead2 {
1387cc577a4SJonathan Peyton kmp_info_t *bthr; /* The thread which owns the buffer pool */
1393041982dSJonathan Peyton bufsize prevfree; /* Relative link back to previous free buffer in memory or
1403041982dSJonathan Peyton 0 if previous buffer is allocated. */
1413041982dSJonathan Peyton bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
1427cc577a4SJonathan Peyton } bhead2_t;
1437cc577a4SJonathan Peyton
1447cc577a4SJonathan Peyton /* Make sure the bhead structure is a multiple of SizeQuant in size. */
1457cc577a4SJonathan Peyton typedef union bhead {
1467cc577a4SJonathan Peyton KMP_ALIGN(SizeQuant)
1477cc577a4SJonathan Peyton AlignType b_align;
1487cc577a4SJonathan Peyton char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
1497cc577a4SJonathan Peyton bhead2_t bb;
1507cc577a4SJonathan Peyton } bhead_t;
1517cc577a4SJonathan Peyton #define BH(p) ((bhead_t *)(p))
1527cc577a4SJonathan Peyton
1537cc577a4SJonathan Peyton /* Header in directly allocated buffers (by acqfcn) */
1543041982dSJonathan Peyton typedef struct bdhead {
1557cc577a4SJonathan Peyton bufsize tsize; /* Total size, including overhead */
1567cc577a4SJonathan Peyton bhead_t bh; /* Common header */
1577cc577a4SJonathan Peyton } bdhead_t;
1587cc577a4SJonathan Peyton #define BDH(p) ((bdhead_t *)(p))
1597cc577a4SJonathan Peyton
1607cc577a4SJonathan Peyton /* Header in free buffers */
1617cc577a4SJonathan Peyton typedef struct bfhead {
1627cc577a4SJonathan Peyton bhead_t bh; /* Common allocated/free header */
1637cc577a4SJonathan Peyton qlinks_t ql; /* Links on free list */
1647cc577a4SJonathan Peyton } bfhead_t;
1657cc577a4SJonathan Peyton #define BFH(p) ((bfhead_t *)(p))
1667cc577a4SJonathan Peyton
1677cc577a4SJonathan Peyton typedef struct thr_data {
1687cc577a4SJonathan Peyton bfhead_t freelist[MAX_BGET_BINS];
1697cc577a4SJonathan Peyton #if BufStats
1707cc577a4SJonathan Peyton size_t totalloc; /* Total space currently allocated */
1717cc577a4SJonathan Peyton long numget, numrel; /* Number of bget() and brel() calls */
1727cc577a4SJonathan Peyton long numpblk; /* Number of pool blocks */
1737cc577a4SJonathan Peyton long numpget, numprel; /* Number of block gets and rels */
1747cc577a4SJonathan Peyton long numdget, numdrel; /* Number of direct gets and rels */
1757cc577a4SJonathan Peyton #endif /* BufStats */
1767cc577a4SJonathan Peyton
1777cc577a4SJonathan Peyton /* Automatic expansion block management functions */
1787cc577a4SJonathan Peyton bget_compact_t compfcn;
1797cc577a4SJonathan Peyton bget_acquire_t acqfcn;
1807cc577a4SJonathan Peyton bget_release_t relfcn;
1817cc577a4SJonathan Peyton
1827cc577a4SJonathan Peyton bget_mode_t mode; /* what allocation mode to use? */
1837cc577a4SJonathan Peyton
1847cc577a4SJonathan Peyton bufsize exp_incr; /* Expansion block size */
1857cc577a4SJonathan Peyton bufsize pool_len; /* 0: no bpool calls have been made
1863041982dSJonathan Peyton -1: not all pool blocks are the same size
1873041982dSJonathan Peyton >0: (common) block size for all bpool calls made so far
1887cc577a4SJonathan Peyton */
18942016791SKazuaki Ishizaki bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
1907cc577a4SJonathan Peyton } thr_data_t;
1917cc577a4SJonathan Peyton
1927cc577a4SJonathan Peyton /* Minimum allocation quantum: */
1937cc577a4SJonathan Peyton #define QLSize (sizeof(qlinks_t))
1947cc577a4SJonathan Peyton #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
1953041982dSJonathan Peyton #define MaxSize \
1963041982dSJonathan Peyton (bufsize)( \
1973041982dSJonathan Peyton ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
19842016791SKazuaki Ishizaki // Maximum for the requested size.
1997cc577a4SJonathan Peyton
2007cc577a4SJonathan Peyton /* End sentinel: value placed in bsize field of dummy block delimiting
2017cc577a4SJonathan Peyton end of pool block. The most negative number which will fit in a
2027cc577a4SJonathan Peyton bufsize, defined in a way that the compiler will accept. */
2037cc577a4SJonathan Peyton
2043041982dSJonathan Peyton #define ESent \
2053041982dSJonathan Peyton ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
2067cc577a4SJonathan Peyton
2077cc577a4SJonathan Peyton /* Thread Data management routines */
bget_get_bin(bufsize size)2083041982dSJonathan Peyton static int bget_get_bin(bufsize size) {
2097cc577a4SJonathan Peyton // binary chop bins
2107cc577a4SJonathan Peyton int lo = 0, hi = MAX_BGET_BINS - 1;
2117cc577a4SJonathan Peyton
2127cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(size > 0);
2137cc577a4SJonathan Peyton
2147cc577a4SJonathan Peyton while ((hi - lo) > 1) {
2157cc577a4SJonathan Peyton int mid = (lo + hi) >> 1;
2167cc577a4SJonathan Peyton if (size < bget_bin_size[mid])
2177cc577a4SJonathan Peyton hi = mid - 1;
2187cc577a4SJonathan Peyton else
2197cc577a4SJonathan Peyton lo = mid;
2207cc577a4SJonathan Peyton }
2217cc577a4SJonathan Peyton
2227cc577a4SJonathan Peyton KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
2237cc577a4SJonathan Peyton
2247cc577a4SJonathan Peyton return lo;
2257cc577a4SJonathan Peyton }
2267cc577a4SJonathan Peyton
set_thr_data(kmp_info_t * th)2273041982dSJonathan Peyton static void set_thr_data(kmp_info_t *th) {
2287cc577a4SJonathan Peyton int i;
2297cc577a4SJonathan Peyton thr_data_t *data;
2307cc577a4SJonathan Peyton
2313041982dSJonathan Peyton data = (thr_data_t *)((!th->th.th_local.bget_data)
2323041982dSJonathan Peyton ? __kmp_allocate(sizeof(*data))
2333041982dSJonathan Peyton : th->th.th_local.bget_data);
2347cc577a4SJonathan Peyton
2357cc577a4SJonathan Peyton memset(data, '\0', sizeof(*data));
2367cc577a4SJonathan Peyton
2377cc577a4SJonathan Peyton for (i = 0; i < MAX_BGET_BINS; ++i) {
2387cc577a4SJonathan Peyton data->freelist[i].ql.flink = &data->freelist[i];
2397cc577a4SJonathan Peyton data->freelist[i].ql.blink = &data->freelist[i];
2407cc577a4SJonathan Peyton }
2417cc577a4SJonathan Peyton
2427cc577a4SJonathan Peyton th->th.th_local.bget_data = data;
2437cc577a4SJonathan Peyton th->th.th_local.bget_list = 0;
2447cc577a4SJonathan Peyton #if !USE_CMP_XCHG_FOR_BGET
2457cc577a4SJonathan Peyton #ifdef USE_QUEUING_LOCK_FOR_BGET
2467cc577a4SJonathan Peyton __kmp_init_lock(&th->th.th_local.bget_lock);
2477cc577a4SJonathan Peyton #else
2487cc577a4SJonathan Peyton __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
2497cc577a4SJonathan Peyton #endif /* USE_LOCK_FOR_BGET */
2507cc577a4SJonathan Peyton #endif /* ! USE_CMP_XCHG_FOR_BGET */
2517cc577a4SJonathan Peyton }
2527cc577a4SJonathan Peyton
get_thr_data(kmp_info_t * th)2533041982dSJonathan Peyton static thr_data_t *get_thr_data(kmp_info_t *th) {
2547cc577a4SJonathan Peyton thr_data_t *data;
2557cc577a4SJonathan Peyton
2567cc577a4SJonathan Peyton data = (thr_data_t *)th->th.th_local.bget_data;
2577cc577a4SJonathan Peyton
2587cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(data != 0);
2597cc577a4SJonathan Peyton
2607cc577a4SJonathan Peyton return data;
2617cc577a4SJonathan Peyton }
2627cc577a4SJonathan Peyton
2637cc577a4SJonathan Peyton /* Walk the free list and release the enqueued buffers */
__kmp_bget_dequeue(kmp_info_t * th)2643041982dSJonathan Peyton static void __kmp_bget_dequeue(kmp_info_t *th) {
2657cc577a4SJonathan Peyton void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
2667cc577a4SJonathan Peyton
2677cc577a4SJonathan Peyton if (p != 0) {
2687cc577a4SJonathan Peyton #if USE_CMP_XCHG_FOR_BGET
2697cc577a4SJonathan Peyton {
2707cc577a4SJonathan Peyton volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
271c47afcd9SAndrey Churbanov while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
2725ba90c79SAndrey Churbanov CCAST(void *, old_value), nullptr)) {
2737cc577a4SJonathan Peyton KMP_CPU_PAUSE();
2747cc577a4SJonathan Peyton old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
2757cc577a4SJonathan Peyton }
276c47afcd9SAndrey Churbanov p = CCAST(void *, old_value);
2777cc577a4SJonathan Peyton }
2787cc577a4SJonathan Peyton #else /* ! USE_CMP_XCHG_FOR_BGET */
2797cc577a4SJonathan Peyton #ifdef USE_QUEUING_LOCK_FOR_BGET
2803041982dSJonathan Peyton __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
2817cc577a4SJonathan Peyton #else
2827cc577a4SJonathan Peyton __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
2837cc577a4SJonathan Peyton #endif /* USE_QUEUING_LOCK_FOR_BGET */
2847cc577a4SJonathan Peyton
2857cc577a4SJonathan Peyton p = (void *)th->th.th_local.bget_list;
2867cc577a4SJonathan Peyton th->th.th_local.bget_list = 0;
2877cc577a4SJonathan Peyton
2887cc577a4SJonathan Peyton #ifdef USE_QUEUING_LOCK_FOR_BGET
2893041982dSJonathan Peyton __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
2907cc577a4SJonathan Peyton #else
2917cc577a4SJonathan Peyton __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
2927cc577a4SJonathan Peyton #endif
2937cc577a4SJonathan Peyton #endif /* USE_CMP_XCHG_FOR_BGET */
2947cc577a4SJonathan Peyton
2957cc577a4SJonathan Peyton /* Check again to make sure the list is not empty */
2967cc577a4SJonathan Peyton while (p != 0) {
2977cc577a4SJonathan Peyton void *buf = p;
2987cc577a4SJonathan Peyton bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
2997cc577a4SJonathan Peyton
3007cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
3017cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
3027cc577a4SJonathan Peyton (kmp_uintptr_t)th); // clear possible mark
3037cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->ql.blink == 0);
3047cc577a4SJonathan Peyton
3057cc577a4SJonathan Peyton p = (void *)b->ql.flink;
3067cc577a4SJonathan Peyton
3077cc577a4SJonathan Peyton brel(th, buf);
3087cc577a4SJonathan Peyton }
3097cc577a4SJonathan Peyton }
3107cc577a4SJonathan Peyton }
3117cc577a4SJonathan Peyton
3127cc577a4SJonathan Peyton /* Chain together the free buffers by using the thread owner field */
__kmp_bget_enqueue(kmp_info_t * th,void * buf,kmp_int32 rel_gtid)3133041982dSJonathan Peyton static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
3147cc577a4SJonathan Peyton #ifdef USE_QUEUING_LOCK_FOR_BGET
3153041982dSJonathan Peyton ,
3163041982dSJonathan Peyton kmp_int32 rel_gtid
3177cc577a4SJonathan Peyton #endif
3183041982dSJonathan Peyton ) {
3197cc577a4SJonathan Peyton bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
3207cc577a4SJonathan Peyton
3217cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
3227cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
3237cc577a4SJonathan Peyton (kmp_uintptr_t)th); // clear possible mark
3247cc577a4SJonathan Peyton
3257cc577a4SJonathan Peyton b->ql.blink = 0;
3267cc577a4SJonathan Peyton
3277cc577a4SJonathan Peyton KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
3287cc577a4SJonathan Peyton __kmp_gtid_from_thread(th)));
3297cc577a4SJonathan Peyton
3307cc577a4SJonathan Peyton #if USE_CMP_XCHG_FOR_BGET
3317cc577a4SJonathan Peyton {
3327cc577a4SJonathan Peyton volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
3337cc577a4SJonathan Peyton /* the next pointer must be set before setting bget_list to buf to avoid
3347cc577a4SJonathan Peyton exposing a broken list to other threads, even for an instant. */
335c47afcd9SAndrey Churbanov b->ql.flink = BFH(CCAST(void *, old_value));
3367cc577a4SJonathan Peyton
337c47afcd9SAndrey Churbanov while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
338c47afcd9SAndrey Churbanov CCAST(void *, old_value), buf)) {
3397cc577a4SJonathan Peyton KMP_CPU_PAUSE();
3407cc577a4SJonathan Peyton old_value = TCR_PTR(th->th.th_local.bget_list);
3417cc577a4SJonathan Peyton /* the next pointer must be set before setting bget_list to buf to avoid
3427cc577a4SJonathan Peyton exposing a broken list to other threads, even for an instant. */
343c47afcd9SAndrey Churbanov b->ql.flink = BFH(CCAST(void *, old_value));
3447cc577a4SJonathan Peyton }
3457cc577a4SJonathan Peyton }
3467cc577a4SJonathan Peyton #else /* ! USE_CMP_XCHG_FOR_BGET */
3477cc577a4SJonathan Peyton #ifdef USE_QUEUING_LOCK_FOR_BGET
3487cc577a4SJonathan Peyton __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
3497cc577a4SJonathan Peyton #else
3507cc577a4SJonathan Peyton __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
3517cc577a4SJonathan Peyton #endif
3527cc577a4SJonathan Peyton
3537cc577a4SJonathan Peyton b->ql.flink = BFH(th->th.th_local.bget_list);
3547cc577a4SJonathan Peyton th->th.th_local.bget_list = (void *)buf;
3557cc577a4SJonathan Peyton
3567cc577a4SJonathan Peyton #ifdef USE_QUEUING_LOCK_FOR_BGET
3577cc577a4SJonathan Peyton __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
3587cc577a4SJonathan Peyton #else
3597cc577a4SJonathan Peyton __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
3607cc577a4SJonathan Peyton #endif
3617cc577a4SJonathan Peyton #endif /* USE_CMP_XCHG_FOR_BGET */
3627cc577a4SJonathan Peyton }
3637cc577a4SJonathan Peyton
3647cc577a4SJonathan Peyton /* insert buffer back onto a new freelist */
__kmp_bget_insert_into_freelist(thr_data_t * thr,bfhead_t * b)3653041982dSJonathan Peyton static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
3667cc577a4SJonathan Peyton int bin;
3677cc577a4SJonathan Peyton
3687cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
3697cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
3707cc577a4SJonathan Peyton
3717cc577a4SJonathan Peyton bin = bget_get_bin(b->bh.bb.bsize);
3727cc577a4SJonathan Peyton
3733041982dSJonathan Peyton KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
3743041982dSJonathan Peyton &thr->freelist[bin]);
3753041982dSJonathan Peyton KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
3763041982dSJonathan Peyton &thr->freelist[bin]);
3777cc577a4SJonathan Peyton
3787cc577a4SJonathan Peyton b->ql.flink = &thr->freelist[bin];
3797cc577a4SJonathan Peyton b->ql.blink = thr->freelist[bin].ql.blink;
3807cc577a4SJonathan Peyton
3817cc577a4SJonathan Peyton thr->freelist[bin].ql.blink = b;
3827cc577a4SJonathan Peyton b->ql.blink->ql.flink = b;
3837cc577a4SJonathan Peyton }
3847cc577a4SJonathan Peyton
3857cc577a4SJonathan Peyton /* unlink the buffer from the old freelist */
__kmp_bget_remove_from_freelist(bfhead_t * b)3863041982dSJonathan Peyton static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
3877cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
3887cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
3897cc577a4SJonathan Peyton
3907cc577a4SJonathan Peyton b->ql.blink->ql.flink = b->ql.flink;
3917cc577a4SJonathan Peyton b->ql.flink->ql.blink = b->ql.blink;
3927cc577a4SJonathan Peyton }
3937cc577a4SJonathan Peyton
3947cc577a4SJonathan Peyton /* GET STATS -- check info on free list */
bcheck(kmp_info_t * th,bufsize * max_free,bufsize * total_free)3953041982dSJonathan Peyton static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
3967cc577a4SJonathan Peyton thr_data_t *thr = get_thr_data(th);
3977cc577a4SJonathan Peyton int bin;
3987cc577a4SJonathan Peyton
3997cc577a4SJonathan Peyton *total_free = *max_free = 0;
4007cc577a4SJonathan Peyton
4017cc577a4SJonathan Peyton for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
4027cc577a4SJonathan Peyton bfhead_t *b, *best;
4037cc577a4SJonathan Peyton
4047cc577a4SJonathan Peyton best = &thr->freelist[bin];
4057cc577a4SJonathan Peyton b = best->ql.flink;
4067cc577a4SJonathan Peyton
4077cc577a4SJonathan Peyton while (b != &thr->freelist[bin]) {
4087cc577a4SJonathan Peyton *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
4097cc577a4SJonathan Peyton if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
4107cc577a4SJonathan Peyton best = b;
4117cc577a4SJonathan Peyton
4127cc577a4SJonathan Peyton /* Link to next buffer */
4137cc577a4SJonathan Peyton b = b->ql.flink;
4147cc577a4SJonathan Peyton }
4157cc577a4SJonathan Peyton
4167cc577a4SJonathan Peyton if (*max_free < best->bh.bb.bsize)
4177cc577a4SJonathan Peyton *max_free = best->bh.bb.bsize;
4187cc577a4SJonathan Peyton }
4197cc577a4SJonathan Peyton
4207cc577a4SJonathan Peyton if (*max_free > (bufsize)sizeof(bhead_t))
4217cc577a4SJonathan Peyton *max_free -= sizeof(bhead_t);
4227cc577a4SJonathan Peyton }
4237cc577a4SJonathan Peyton
4247cc577a4SJonathan Peyton /* BGET -- Allocate a buffer. */
bget(kmp_info_t * th,bufsize requested_size)4253041982dSJonathan Peyton static void *bget(kmp_info_t *th, bufsize requested_size) {
4267cc577a4SJonathan Peyton thr_data_t *thr = get_thr_data(th);
4277cc577a4SJonathan Peyton bufsize size = requested_size;
4287cc577a4SJonathan Peyton bfhead_t *b;
4297cc577a4SJonathan Peyton void *buf;
4307cc577a4SJonathan Peyton int compactseq = 0;
4317cc577a4SJonathan Peyton int use_blink = 0;
4327cc577a4SJonathan Peyton /* For BestFit */
4337cc577a4SJonathan Peyton bfhead_t *best;
4347cc577a4SJonathan Peyton
4357cc577a4SJonathan Peyton if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
4367cc577a4SJonathan Peyton return NULL;
437bd3a7633SJonathan Peyton }
4387cc577a4SJonathan Peyton
4397cc577a4SJonathan Peyton __kmp_bget_dequeue(th); /* Release any queued buffers */
4407cc577a4SJonathan Peyton
4413041982dSJonathan Peyton if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
4423041982dSJonathan Peyton size = SizeQ;
4437cc577a4SJonathan Peyton }
4447cc577a4SJonathan Peyton #if defined(SizeQuant) && (SizeQuant > 1)
4457cc577a4SJonathan Peyton size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
4467cc577a4SJonathan Peyton #endif
4477cc577a4SJonathan Peyton
4483041982dSJonathan Peyton size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
4497cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(size >= 0);
4507cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(size % SizeQuant == 0);
4517cc577a4SJonathan Peyton
4527cc577a4SJonathan Peyton use_blink = (thr->mode == bget_mode_lifo);
4537cc577a4SJonathan Peyton
4547cc577a4SJonathan Peyton /* If a compact function was provided in the call to bectl(), wrap
4557cc577a4SJonathan Peyton a loop around the allocation process to allow compaction to
4567cc577a4SJonathan Peyton intervene in case we don't find a suitable buffer in the chain. */
4577cc577a4SJonathan Peyton
4587cc577a4SJonathan Peyton for (;;) {
4597cc577a4SJonathan Peyton int bin;
4607cc577a4SJonathan Peyton
4617cc577a4SJonathan Peyton for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
4627cc577a4SJonathan Peyton /* Link to next buffer */
4633041982dSJonathan Peyton b = (use_blink ? thr->freelist[bin].ql.blink
4643041982dSJonathan Peyton : thr->freelist[bin].ql.flink);
4657cc577a4SJonathan Peyton
4667cc577a4SJonathan Peyton if (thr->mode == bget_mode_best) {
4677cc577a4SJonathan Peyton best = &thr->freelist[bin];
4687cc577a4SJonathan Peyton
4697cc577a4SJonathan Peyton /* Scan the free list searching for the first buffer big enough
4707cc577a4SJonathan Peyton to hold the requested size buffer. */
4717cc577a4SJonathan Peyton while (b != &thr->freelist[bin]) {
4727cc577a4SJonathan Peyton if (b->bh.bb.bsize >= (bufsize)size) {
4733041982dSJonathan Peyton if ((best == &thr->freelist[bin]) ||
4743041982dSJonathan Peyton (b->bh.bb.bsize < best->bh.bb.bsize)) {
4757cc577a4SJonathan Peyton best = b;
4767cc577a4SJonathan Peyton }
4777cc577a4SJonathan Peyton }
4787cc577a4SJonathan Peyton
4797cc577a4SJonathan Peyton /* Link to next buffer */
4807cc577a4SJonathan Peyton b = (use_blink ? b->ql.blink : b->ql.flink);
4817cc577a4SJonathan Peyton }
4827cc577a4SJonathan Peyton b = best;
4837cc577a4SJonathan Peyton }
4847cc577a4SJonathan Peyton
4857cc577a4SJonathan Peyton while (b != &thr->freelist[bin]) {
4867cc577a4SJonathan Peyton if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
4877cc577a4SJonathan Peyton
4883041982dSJonathan Peyton // Buffer is big enough to satisfy the request. Allocate it to the
4893041982dSJonathan Peyton // caller. We must decide whether the buffer is large enough to split
4903041982dSJonathan Peyton // into the part given to the caller and a free buffer that remains
4913041982dSJonathan Peyton // on the free list, or whether the entire buffer should be removed
4923041982dSJonathan Peyton // from the free list and given to the caller in its entirety. We
4933041982dSJonathan Peyton // only split the buffer if enough room remains for a header plus the
4943041982dSJonathan Peyton // minimum quantum of allocation.
4953041982dSJonathan Peyton if ((b->bh.bb.bsize - (bufsize)size) >
4963041982dSJonathan Peyton (bufsize)(SizeQ + (sizeof(bhead_t)))) {
4977cc577a4SJonathan Peyton bhead_t *ba, *bn;
4987cc577a4SJonathan Peyton
4997cc577a4SJonathan Peyton ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
5007cc577a4SJonathan Peyton bn = BH(((char *)ba) + size);
5017cc577a4SJonathan Peyton
5027cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
5037cc577a4SJonathan Peyton
5047cc577a4SJonathan Peyton /* Subtract size from length of free block. */
5057cc577a4SJonathan Peyton b->bh.bb.bsize -= (bufsize)size;
5067cc577a4SJonathan Peyton
5077cc577a4SJonathan Peyton /* Link allocated buffer to the previous free buffer. */
5087cc577a4SJonathan Peyton ba->bb.prevfree = b->bh.bb.bsize;
5097cc577a4SJonathan Peyton
5107cc577a4SJonathan Peyton /* Plug negative size into user buffer. */
5117cc577a4SJonathan Peyton ba->bb.bsize = -size;
5127cc577a4SJonathan Peyton
5137cc577a4SJonathan Peyton /* Mark this buffer as owned by this thread. */
5143041982dSJonathan Peyton TCW_PTR(ba->bb.bthr,
5153041982dSJonathan Peyton th); // not an allocated address (do not mark it)
5167cc577a4SJonathan Peyton /* Mark buffer after this one not preceded by free block. */
5177cc577a4SJonathan Peyton bn->bb.prevfree = 0;
5187cc577a4SJonathan Peyton
5193041982dSJonathan Peyton // unlink buffer from old freelist, and reinsert into new freelist
5207cc577a4SJonathan Peyton __kmp_bget_remove_from_freelist(b);
5217cc577a4SJonathan Peyton __kmp_bget_insert_into_freelist(thr, b);
5227cc577a4SJonathan Peyton #if BufStats
5237cc577a4SJonathan Peyton thr->totalloc += (size_t)size;
5247cc577a4SJonathan Peyton thr->numget++; /* Increment number of bget() calls */
5257cc577a4SJonathan Peyton #endif
5267cc577a4SJonathan Peyton buf = (void *)((((char *)ba) + sizeof(bhead_t)));
5277cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
5287cc577a4SJonathan Peyton return buf;
5297cc577a4SJonathan Peyton } else {
5307cc577a4SJonathan Peyton bhead_t *ba;
5317cc577a4SJonathan Peyton
5327cc577a4SJonathan Peyton ba = BH(((char *)b) + b->bh.bb.bsize);
5337cc577a4SJonathan Peyton
5347cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
5357cc577a4SJonathan Peyton
5367cc577a4SJonathan Peyton /* The buffer isn't big enough to split. Give the whole
5377cc577a4SJonathan Peyton shebang to the caller and remove it from the free list. */
5387cc577a4SJonathan Peyton
5397cc577a4SJonathan Peyton __kmp_bget_remove_from_freelist(b);
5407cc577a4SJonathan Peyton #if BufStats
5417cc577a4SJonathan Peyton thr->totalloc += (size_t)b->bh.bb.bsize;
5427cc577a4SJonathan Peyton thr->numget++; /* Increment number of bget() calls */
5437cc577a4SJonathan Peyton #endif
5447cc577a4SJonathan Peyton /* Negate size to mark buffer allocated. */
5457cc577a4SJonathan Peyton b->bh.bb.bsize = -(b->bh.bb.bsize);
5467cc577a4SJonathan Peyton
5477cc577a4SJonathan Peyton /* Mark this buffer as owned by this thread. */
5483041982dSJonathan Peyton TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
5497cc577a4SJonathan Peyton /* Zero the back pointer in the next buffer in memory
5507cc577a4SJonathan Peyton to indicate that this buffer is allocated. */
5517cc577a4SJonathan Peyton ba->bb.prevfree = 0;
5527cc577a4SJonathan Peyton
5537cc577a4SJonathan Peyton /* Give user buffer starting at queue links. */
5547cc577a4SJonathan Peyton buf = (void *)&(b->ql);
5557cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
5567cc577a4SJonathan Peyton return buf;
5577cc577a4SJonathan Peyton }
5587cc577a4SJonathan Peyton }
5597cc577a4SJonathan Peyton
5607cc577a4SJonathan Peyton /* Link to next buffer */
5617cc577a4SJonathan Peyton b = (use_blink ? b->ql.blink : b->ql.flink);
5627cc577a4SJonathan Peyton }
5637cc577a4SJonathan Peyton }
5647cc577a4SJonathan Peyton
5653041982dSJonathan Peyton /* We failed to find a buffer. If there's a compact function defined,
5663041982dSJonathan Peyton notify it of the size requested. If it returns TRUE, try the allocation
5673041982dSJonathan Peyton again. */
5687cc577a4SJonathan Peyton
5697cc577a4SJonathan Peyton if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
5707cc577a4SJonathan Peyton break;
5717cc577a4SJonathan Peyton }
5727cc577a4SJonathan Peyton }
5737cc577a4SJonathan Peyton
5747cc577a4SJonathan Peyton /* No buffer available with requested size free. */
5757cc577a4SJonathan Peyton
5767cc577a4SJonathan Peyton /* Don't give up yet -- look in the reserve supply. */
5777cc577a4SJonathan Peyton if (thr->acqfcn != 0) {
5787cc577a4SJonathan Peyton if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
5793041982dSJonathan Peyton /* Request is too large to fit in a single expansion block.
58042016791SKazuaki Ishizaki Try to satisfy it by a direct buffer acquisition. */
5817cc577a4SJonathan Peyton bdhead_t *bdh;
5827cc577a4SJonathan Peyton
5837cc577a4SJonathan Peyton size += sizeof(bdhead_t) - sizeof(bhead_t);
5847cc577a4SJonathan Peyton
5857cc577a4SJonathan Peyton KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
5867cc577a4SJonathan Peyton
5877cc577a4SJonathan Peyton /* richryan */
5887cc577a4SJonathan Peyton bdh = BDH((*thr->acqfcn)((bufsize)size));
5897cc577a4SJonathan Peyton if (bdh != NULL) {
5907cc577a4SJonathan Peyton
5913041982dSJonathan Peyton // Mark the buffer special by setting size field of its header to zero.
5927cc577a4SJonathan Peyton bdh->bh.bb.bsize = 0;
5937cc577a4SJonathan Peyton
5947cc577a4SJonathan Peyton /* Mark this buffer as owned by this thread. */
5957cc577a4SJonathan Peyton TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
5967cc577a4SJonathan Peyton // because direct buffer never goes to free list
5977cc577a4SJonathan Peyton bdh->bh.bb.prevfree = 0;
5987cc577a4SJonathan Peyton bdh->tsize = size;
5997cc577a4SJonathan Peyton #if BufStats
6007cc577a4SJonathan Peyton thr->totalloc += (size_t)size;
6017cc577a4SJonathan Peyton thr->numget++; /* Increment number of bget() calls */
6027cc577a4SJonathan Peyton thr->numdget++; /* Direct bget() call count */
6037cc577a4SJonathan Peyton #endif
6047cc577a4SJonathan Peyton buf = (void *)(bdh + 1);
6057cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
6067cc577a4SJonathan Peyton return buf;
6077cc577a4SJonathan Peyton }
6087cc577a4SJonathan Peyton
6097cc577a4SJonathan Peyton } else {
6107cc577a4SJonathan Peyton
6117cc577a4SJonathan Peyton /* Try to obtain a new expansion block */
6127cc577a4SJonathan Peyton void *newpool;
6137cc577a4SJonathan Peyton
6147cc577a4SJonathan Peyton KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
6157cc577a4SJonathan Peyton
6167cc577a4SJonathan Peyton /* richryan */
6177cc577a4SJonathan Peyton newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
6187cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
6197cc577a4SJonathan Peyton if (newpool != NULL) {
6207cc577a4SJonathan Peyton bpool(th, newpool, thr->exp_incr);
6213041982dSJonathan Peyton buf = bget(
6223041982dSJonathan Peyton th, requested_size); /* This can't, I say, can't get into a loop. */
6237cc577a4SJonathan Peyton return buf;
6247cc577a4SJonathan Peyton }
6257cc577a4SJonathan Peyton }
6267cc577a4SJonathan Peyton }
6277cc577a4SJonathan Peyton
6287cc577a4SJonathan Peyton /* Still no buffer available */
6297cc577a4SJonathan Peyton
6307cc577a4SJonathan Peyton return NULL;
6317cc577a4SJonathan Peyton }
6327cc577a4SJonathan Peyton
6337cc577a4SJonathan Peyton /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
6347cc577a4SJonathan Peyton the entire contents of the buffer to zero, not just the
6357cc577a4SJonathan Peyton region requested by the caller. */
6367cc577a4SJonathan Peyton
bgetz(kmp_info_t * th,bufsize size)6373041982dSJonathan Peyton static void *bgetz(kmp_info_t *th, bufsize size) {
6387cc577a4SJonathan Peyton char *buf = (char *)bget(th, size);
6397cc577a4SJonathan Peyton
6407cc577a4SJonathan Peyton if (buf != NULL) {
6417cc577a4SJonathan Peyton bhead_t *b;
6427cc577a4SJonathan Peyton bufsize rsize;
6437cc577a4SJonathan Peyton
6447cc577a4SJonathan Peyton b = BH(buf - sizeof(bhead_t));
6457cc577a4SJonathan Peyton rsize = -(b->bb.bsize);
6467cc577a4SJonathan Peyton if (rsize == 0) {
6477cc577a4SJonathan Peyton bdhead_t *bd;
6487cc577a4SJonathan Peyton
6497cc577a4SJonathan Peyton bd = BDH(buf - sizeof(bdhead_t));
6507cc577a4SJonathan Peyton rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
6517cc577a4SJonathan Peyton } else {
6527cc577a4SJonathan Peyton rsize -= sizeof(bhead_t);
6537cc577a4SJonathan Peyton }
6547cc577a4SJonathan Peyton
6557cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(rsize >= size);
6567cc577a4SJonathan Peyton
6577cc577a4SJonathan Peyton (void)memset(buf, 0, (bufsize)rsize);
6587cc577a4SJonathan Peyton }
6597cc577a4SJonathan Peyton return ((void *)buf);
6607cc577a4SJonathan Peyton }
6617cc577a4SJonathan Peyton
6627cc577a4SJonathan Peyton /* BGETR -- Reallocate a buffer. This is a minimal implementation,
6637cc577a4SJonathan Peyton simply in terms of brel() and bget(). It could be
6647cc577a4SJonathan Peyton enhanced to allow the buffer to grow into adjacent free
6657cc577a4SJonathan Peyton blocks and to avoid moving data unnecessarily. */
6667cc577a4SJonathan Peyton
bgetr(kmp_info_t * th,void * buf,bufsize size)6673041982dSJonathan Peyton static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
6687cc577a4SJonathan Peyton void *nbuf;
6697cc577a4SJonathan Peyton bufsize osize; /* Old size of buffer */
6707cc577a4SJonathan Peyton bhead_t *b;
6717cc577a4SJonathan Peyton
6727cc577a4SJonathan Peyton nbuf = bget(th, size);
6737cc577a4SJonathan Peyton if (nbuf == NULL) { /* Acquire new buffer */
6747cc577a4SJonathan Peyton return NULL;
6757cc577a4SJonathan Peyton }
6767cc577a4SJonathan Peyton if (buf == NULL) {
6777cc577a4SJonathan Peyton return nbuf;
6787cc577a4SJonathan Peyton }
6797cc577a4SJonathan Peyton b = BH(((char *)buf) - sizeof(bhead_t));
6807cc577a4SJonathan Peyton osize = -b->bb.bsize;
6817cc577a4SJonathan Peyton if (osize == 0) {
6827cc577a4SJonathan Peyton /* Buffer acquired directly through acqfcn. */
6837cc577a4SJonathan Peyton bdhead_t *bd;
6847cc577a4SJonathan Peyton
6857cc577a4SJonathan Peyton bd = BDH(((char *)buf) - sizeof(bdhead_t));
6867cc577a4SJonathan Peyton osize = bd->tsize - (bufsize)sizeof(bdhead_t);
6877cc577a4SJonathan Peyton } else {
6887cc577a4SJonathan Peyton osize -= sizeof(bhead_t);
689bd3a7633SJonathan Peyton }
6907cc577a4SJonathan Peyton
6917cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(osize > 0);
6927cc577a4SJonathan Peyton
6937cc577a4SJonathan Peyton (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
6947cc577a4SJonathan Peyton (size_t)((size < osize) ? size : osize));
6957cc577a4SJonathan Peyton brel(th, buf);
6967cc577a4SJonathan Peyton
6977cc577a4SJonathan Peyton return nbuf;
6987cc577a4SJonathan Peyton }
6997cc577a4SJonathan Peyton
7007cc577a4SJonathan Peyton /* BREL -- Release a buffer. */
brel(kmp_info_t * th,void * buf)7013041982dSJonathan Peyton static void brel(kmp_info_t *th, void *buf) {
7027cc577a4SJonathan Peyton thr_data_t *thr = get_thr_data(th);
7037cc577a4SJonathan Peyton bfhead_t *b, *bn;
7047cc577a4SJonathan Peyton kmp_info_t *bth;
7057cc577a4SJonathan Peyton
7067cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(buf != NULL);
7077cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
7087cc577a4SJonathan Peyton
7097cc577a4SJonathan Peyton b = BFH(((char *)buf) - sizeof(bhead_t));
7107cc577a4SJonathan Peyton
7117cc577a4SJonathan Peyton if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
7127cc577a4SJonathan Peyton bdhead_t *bdh;
7137cc577a4SJonathan Peyton
7147cc577a4SJonathan Peyton bdh = BDH(((char *)buf) - sizeof(bdhead_t));
7157cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
7167cc577a4SJonathan Peyton #if BufStats
7177cc577a4SJonathan Peyton thr->totalloc -= (size_t)bdh->tsize;
7187cc577a4SJonathan Peyton thr->numdrel++; /* Number of direct releases */
7197cc577a4SJonathan Peyton thr->numrel++; /* Increment number of brel() calls */
7207cc577a4SJonathan Peyton #endif /* BufStats */
7217cc577a4SJonathan Peyton #ifdef FreeWipe
7223041982dSJonathan Peyton (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
7237cc577a4SJonathan Peyton #endif /* FreeWipe */
7247cc577a4SJonathan Peyton
7257cc577a4SJonathan Peyton KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
7267cc577a4SJonathan Peyton
7277cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(thr->relfcn != 0);
7287cc577a4SJonathan Peyton (*thr->relfcn)((void *)bdh); /* Release it directly. */
7297cc577a4SJonathan Peyton return;
7307cc577a4SJonathan Peyton }
7317cc577a4SJonathan Peyton
7323041982dSJonathan Peyton bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
7333041982dSJonathan Peyton ~1); // clear possible mark before comparison
7347cc577a4SJonathan Peyton if (bth != th) {
7357cc577a4SJonathan Peyton /* Add this buffer to be released by the owning thread later */
7367cc577a4SJonathan Peyton __kmp_bget_enqueue(bth, buf
7377cc577a4SJonathan Peyton #ifdef USE_QUEUING_LOCK_FOR_BGET
7383041982dSJonathan Peyton ,
7393041982dSJonathan Peyton __kmp_gtid_from_thread(th)
7407cc577a4SJonathan Peyton #endif
7417cc577a4SJonathan Peyton );
7427cc577a4SJonathan Peyton return;
7437cc577a4SJonathan Peyton }
7447cc577a4SJonathan Peyton
7453041982dSJonathan Peyton /* Buffer size must be negative, indicating that the buffer is allocated. */
7467cc577a4SJonathan Peyton if (b->bh.bb.bsize >= 0) {
7477cc577a4SJonathan Peyton bn = NULL;
7487cc577a4SJonathan Peyton }
7497cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
7507cc577a4SJonathan Peyton
7513041982dSJonathan Peyton /* Back pointer in next buffer must be zero, indicating the same thing: */
7527cc577a4SJonathan Peyton
7537cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
7547cc577a4SJonathan Peyton
7557cc577a4SJonathan Peyton #if BufStats
7567cc577a4SJonathan Peyton thr->numrel++; /* Increment number of brel() calls */
7577cc577a4SJonathan Peyton thr->totalloc += (size_t)b->bh.bb.bsize;
7587cc577a4SJonathan Peyton #endif
7597cc577a4SJonathan Peyton
7607cc577a4SJonathan Peyton /* If the back link is nonzero, the previous buffer is free. */
7617cc577a4SJonathan Peyton
7627cc577a4SJonathan Peyton if (b->bh.bb.prevfree != 0) {
7633041982dSJonathan Peyton /* The previous buffer is free. Consolidate this buffer with it by adding
7643041982dSJonathan Peyton the length of this buffer to the previous free buffer. Note that we
7653041982dSJonathan Peyton subtract the size in the buffer being released, since it's negative to
7663041982dSJonathan Peyton indicate that the buffer is allocated. */
767414544c9SEd Maste bufsize size = b->bh.bb.bsize;
7687cc577a4SJonathan Peyton
7697cc577a4SJonathan Peyton /* Make the previous buffer the one we're working on. */
7703041982dSJonathan Peyton KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
7713041982dSJonathan Peyton b->bh.bb.prevfree);
7727cc577a4SJonathan Peyton b = BFH(((char *)b) - b->bh.bb.prevfree);
7737cc577a4SJonathan Peyton b->bh.bb.bsize -= size;
7747cc577a4SJonathan Peyton
7757cc577a4SJonathan Peyton /* unlink the buffer from the old freelist */
7767cc577a4SJonathan Peyton __kmp_bget_remove_from_freelist(b);
7773041982dSJonathan Peyton } else {
7783041982dSJonathan Peyton /* The previous buffer isn't allocated. Mark this buffer size as positive
7793041982dSJonathan Peyton (i.e. free) and fall through to place the buffer on the free list as an
7803041982dSJonathan Peyton isolated free block. */
7817cc577a4SJonathan Peyton b->bh.bb.bsize = -b->bh.bb.bsize;
7827cc577a4SJonathan Peyton }
7837cc577a4SJonathan Peyton
7847cc577a4SJonathan Peyton /* insert buffer back onto a new freelist */
7857cc577a4SJonathan Peyton __kmp_bget_insert_into_freelist(thr, b);
7867cc577a4SJonathan Peyton
7877cc577a4SJonathan Peyton /* Now we look at the next buffer in memory, located by advancing from
7887cc577a4SJonathan Peyton the start of this buffer by its size, to see if that buffer is
7897cc577a4SJonathan Peyton free. If it is, we combine this buffer with the next one in
7907cc577a4SJonathan Peyton memory, dechaining the second buffer from the free list. */
7917cc577a4SJonathan Peyton bn = BFH(((char *)b) + b->bh.bb.bsize);
7927cc577a4SJonathan Peyton if (bn->bh.bb.bsize > 0) {
7937cc577a4SJonathan Peyton
7947cc577a4SJonathan Peyton /* The buffer is free. Remove it from the free list and add
7957cc577a4SJonathan Peyton its size to that of our buffer. */
7963041982dSJonathan Peyton KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
7973041982dSJonathan Peyton bn->bh.bb.bsize);
7987cc577a4SJonathan Peyton
7997cc577a4SJonathan Peyton __kmp_bget_remove_from_freelist(bn);
8007cc577a4SJonathan Peyton
8017cc577a4SJonathan Peyton b->bh.bb.bsize += bn->bh.bb.bsize;
8027cc577a4SJonathan Peyton
8033041982dSJonathan Peyton /* unlink the buffer from the old freelist, and reinsert it into the new
8043041982dSJonathan Peyton * freelist */
8057cc577a4SJonathan Peyton __kmp_bget_remove_from_freelist(b);
8067cc577a4SJonathan Peyton __kmp_bget_insert_into_freelist(thr, b);
8077cc577a4SJonathan Peyton
8087cc577a4SJonathan Peyton /* Finally, advance to the buffer that follows the newly
8097cc577a4SJonathan Peyton consolidated free block. We must set its backpointer to the
8107cc577a4SJonathan Peyton head of the consolidated free block. We know the next block
8117cc577a4SJonathan Peyton must be an allocated block because the process of recombination
8127cc577a4SJonathan Peyton guarantees that two free blocks will never be contiguous in
8137cc577a4SJonathan Peyton memory. */
8147cc577a4SJonathan Peyton bn = BFH(((char *)b) + b->bh.bb.bsize);
8157cc577a4SJonathan Peyton }
8167cc577a4SJonathan Peyton #ifdef FreeWipe
8177cc577a4SJonathan Peyton (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
8187cc577a4SJonathan Peyton (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
8197cc577a4SJonathan Peyton #endif
8207cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
8217cc577a4SJonathan Peyton
8227cc577a4SJonathan Peyton /* The next buffer is allocated. Set the backpointer in it to point
8237cc577a4SJonathan Peyton to this buffer; the previous free buffer in memory. */
8247cc577a4SJonathan Peyton
8257cc577a4SJonathan Peyton bn->bh.bb.prevfree = b->bh.bb.bsize;
8267cc577a4SJonathan Peyton
8277cc577a4SJonathan Peyton /* If a block-release function is defined, and this free buffer
8287cc577a4SJonathan Peyton constitutes the entire block, release it. Note that pool_len
8297cc577a4SJonathan Peyton is defined in such a way that the test will fail unless all
8307cc577a4SJonathan Peyton pool blocks are the same size. */
8317cc577a4SJonathan Peyton if (thr->relfcn != 0 &&
8323041982dSJonathan Peyton b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
8337cc577a4SJonathan Peyton #if BufStats
8343041982dSJonathan Peyton if (thr->numpblk !=
8353041982dSJonathan Peyton 1) { /* Do not release the last buffer until finalization time */
8367cc577a4SJonathan Peyton #endif
8377cc577a4SJonathan Peyton
8387cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
8397cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
8403041982dSJonathan Peyton KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
8413041982dSJonathan Peyton b->bh.bb.bsize);
8427cc577a4SJonathan Peyton
8437cc577a4SJonathan Peyton /* Unlink the buffer from the free list */
8447cc577a4SJonathan Peyton __kmp_bget_remove_from_freelist(b);
8457cc577a4SJonathan Peyton
8467cc577a4SJonathan Peyton KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
8477cc577a4SJonathan Peyton
8487cc577a4SJonathan Peyton (*thr->relfcn)(b);
8497cc577a4SJonathan Peyton #if BufStats
8507cc577a4SJonathan Peyton thr->numprel++; /* Nr of expansion block releases */
8517cc577a4SJonathan Peyton thr->numpblk--; /* Total number of blocks */
8527cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
8537cc577a4SJonathan Peyton
8543041982dSJonathan Peyton // avoid leaving stale last_pool pointer around if it is being dealloced
8553041982dSJonathan Peyton if (thr->last_pool == b)
8563041982dSJonathan Peyton thr->last_pool = 0;
8573041982dSJonathan Peyton } else {
8587cc577a4SJonathan Peyton thr->last_pool = b;
8597cc577a4SJonathan Peyton }
8607cc577a4SJonathan Peyton #endif /* BufStats */
8617cc577a4SJonathan Peyton }
8627cc577a4SJonathan Peyton }
8637cc577a4SJonathan Peyton
8647cc577a4SJonathan Peyton /* BECTL -- Establish automatic pool expansion control */
bectl(kmp_info_t * th,bget_compact_t compact,bget_acquire_t acquire,bget_release_t release,bufsize pool_incr)8653041982dSJonathan Peyton static void bectl(kmp_info_t *th, bget_compact_t compact,
8663041982dSJonathan Peyton bget_acquire_t acquire, bget_release_t release,
8673041982dSJonathan Peyton bufsize pool_incr) {
8687cc577a4SJonathan Peyton thr_data_t *thr = get_thr_data(th);
8697cc577a4SJonathan Peyton
8707cc577a4SJonathan Peyton thr->compfcn = compact;
8717cc577a4SJonathan Peyton thr->acqfcn = acquire;
8727cc577a4SJonathan Peyton thr->relfcn = release;
8737cc577a4SJonathan Peyton thr->exp_incr = pool_incr;
8747cc577a4SJonathan Peyton }
8757cc577a4SJonathan Peyton
8767cc577a4SJonathan Peyton /* BPOOL -- Add a region of memory to the buffer pool. */
bpool(kmp_info_t * th,void * buf,bufsize len)8773041982dSJonathan Peyton static void bpool(kmp_info_t *th, void *buf, bufsize len) {
8787cc577a4SJonathan Peyton /* int bin = 0; */
8797cc577a4SJonathan Peyton thr_data_t *thr = get_thr_data(th);
8807cc577a4SJonathan Peyton bfhead_t *b = BFH(buf);
8817cc577a4SJonathan Peyton bhead_t *bn;
8827cc577a4SJonathan Peyton
8837cc577a4SJonathan Peyton __kmp_bget_dequeue(th); /* Release any queued buffers */
8847cc577a4SJonathan Peyton
8857cc577a4SJonathan Peyton #ifdef SizeQuant
88652cac541SAndreyChurbanov len &= ~((bufsize)(SizeQuant - 1));
8877cc577a4SJonathan Peyton #endif
8887cc577a4SJonathan Peyton if (thr->pool_len == 0) {
8897cc577a4SJonathan Peyton thr->pool_len = len;
8907cc577a4SJonathan Peyton } else if (len != thr->pool_len) {
8917cc577a4SJonathan Peyton thr->pool_len = -1;
8927cc577a4SJonathan Peyton }
8937cc577a4SJonathan Peyton #if BufStats
8947cc577a4SJonathan Peyton thr->numpget++; /* Number of block acquisitions */
8957cc577a4SJonathan Peyton thr->numpblk++; /* Number of blocks total */
8967cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
8977cc577a4SJonathan Peyton #endif /* BufStats */
8987cc577a4SJonathan Peyton
8997cc577a4SJonathan Peyton /* Since the block is initially occupied by a single free buffer,
9007cc577a4SJonathan Peyton it had better not be (much) larger than the largest buffer
9017cc577a4SJonathan Peyton whose size we can store in bhead.bb.bsize. */
9027cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
9037cc577a4SJonathan Peyton
9047cc577a4SJonathan Peyton /* Clear the backpointer at the start of the block to indicate that
9057cc577a4SJonathan Peyton there is no free block prior to this one. That blocks
9067cc577a4SJonathan Peyton recombination when the first block in memory is released. */
9077cc577a4SJonathan Peyton b->bh.bb.prevfree = 0;
9087cc577a4SJonathan Peyton
9097cc577a4SJonathan Peyton /* Create a dummy allocated buffer at the end of the pool. This dummy
9107cc577a4SJonathan Peyton buffer is seen when a buffer at the end of the pool is released and
9117cc577a4SJonathan Peyton blocks recombination of the last buffer with the dummy buffer at
9127cc577a4SJonathan Peyton the end. The length in the dummy buffer is set to the largest
9137cc577a4SJonathan Peyton negative number to denote the end of the pool for diagnostic
9147cc577a4SJonathan Peyton routines (this specific value is not counted on by the actual
9157cc577a4SJonathan Peyton allocation and release functions). */
9167cc577a4SJonathan Peyton len -= sizeof(bhead_t);
9177cc577a4SJonathan Peyton b->bh.bb.bsize = (bufsize)len;
9187cc577a4SJonathan Peyton /* Set the owner of this buffer */
9193041982dSJonathan Peyton TCW_PTR(b->bh.bb.bthr,
9203041982dSJonathan Peyton (kmp_info_t *)((kmp_uintptr_t)th |
9213041982dSJonathan Peyton 1)); // mark the buffer as allocated address
9227cc577a4SJonathan Peyton
9237cc577a4SJonathan Peyton /* Chain the new block to the free list. */
9247cc577a4SJonathan Peyton __kmp_bget_insert_into_freelist(thr, b);
9257cc577a4SJonathan Peyton
9267cc577a4SJonathan Peyton #ifdef FreeWipe
9277cc577a4SJonathan Peyton (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
9287cc577a4SJonathan Peyton (size_t)(len - sizeof(bfhead_t)));
9297cc577a4SJonathan Peyton #endif
9307cc577a4SJonathan Peyton bn = BH(((char *)b) + len);
9317cc577a4SJonathan Peyton bn->bb.prevfree = (bufsize)len;
9327cc577a4SJonathan Peyton /* Definition of ESent assumes two's complement! */
9337cc577a4SJonathan Peyton KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
9347cc577a4SJonathan Peyton
9357cc577a4SJonathan Peyton bn->bb.bsize = ESent;
9367cc577a4SJonathan Peyton }
9377cc577a4SJonathan Peyton
9387cc577a4SJonathan Peyton /* BFREED -- Dump the free lists for this thread. */
bfreed(kmp_info_t * th)9393041982dSJonathan Peyton static void bfreed(kmp_info_t *th) {
9407cc577a4SJonathan Peyton int bin = 0, count = 0;
9417cc577a4SJonathan Peyton int gtid = __kmp_gtid_from_thread(th);
9427cc577a4SJonathan Peyton thr_data_t *thr = get_thr_data(th);
9437cc577a4SJonathan Peyton
9447cc577a4SJonathan Peyton #if BufStats
9453041982dSJonathan Peyton __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
9463041982dSJonathan Peyton " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
9473041982dSJonathan Peyton " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
9483041982dSJonathan Peyton " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
9493041982dSJonathan Peyton " drel=%" KMP_INT64_SPEC "\n",
9503041982dSJonathan Peyton gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
9513041982dSJonathan Peyton (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
9527cc577a4SJonathan Peyton (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
9537cc577a4SJonathan Peyton (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
9547cc577a4SJonathan Peyton #endif
9557cc577a4SJonathan Peyton
9567cc577a4SJonathan Peyton for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
9577cc577a4SJonathan Peyton bfhead_t *b;
9587cc577a4SJonathan Peyton
9593041982dSJonathan Peyton for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
9603041982dSJonathan Peyton b = b->ql.flink) {
9617cc577a4SJonathan Peyton bufsize bs = b->bh.bb.bsize;
9627cc577a4SJonathan Peyton
9637cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
9647cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
9657cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(bs > 0);
9667cc577a4SJonathan Peyton
9677cc577a4SJonathan Peyton count += 1;
9687cc577a4SJonathan Peyton
9693041982dSJonathan Peyton __kmp_printf_no_lock(
9703041982dSJonathan Peyton "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
9713041982dSJonathan Peyton (long)bs);
9727cc577a4SJonathan Peyton #ifdef FreeWipe
9737cc577a4SJonathan Peyton {
9747cc577a4SJonathan Peyton char *lerr = ((char *)b) + sizeof(bfhead_t);
9753041982dSJonathan Peyton if ((bs > sizeof(bfhead_t)) &&
9763041982dSJonathan Peyton ((*lerr != 0x55) ||
9773041982dSJonathan Peyton (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
9783041982dSJonathan Peyton 0))) {
9793041982dSJonathan Peyton __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
9803041982dSJonathan Peyton "free block have been overstored.)\n",
9813041982dSJonathan Peyton gtid);
9827cc577a4SJonathan Peyton }
9837cc577a4SJonathan Peyton }
9847cc577a4SJonathan Peyton #endif
9857cc577a4SJonathan Peyton }
9867cc577a4SJonathan Peyton }
9877cc577a4SJonathan Peyton
9887cc577a4SJonathan Peyton if (count == 0)
9897cc577a4SJonathan Peyton __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
9907cc577a4SJonathan Peyton }
9917cc577a4SJonathan Peyton
__kmp_initialize_bget(kmp_info_t * th)9923041982dSJonathan Peyton void __kmp_initialize_bget(kmp_info_t *th) {
9937cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
9947cc577a4SJonathan Peyton
9957cc577a4SJonathan Peyton set_thr_data(th);
9967cc577a4SJonathan Peyton
9977cc577a4SJonathan Peyton bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
9987cc577a4SJonathan Peyton (bufsize)__kmp_malloc_pool_incr);
9997cc577a4SJonathan Peyton }
10007cc577a4SJonathan Peyton
__kmp_finalize_bget(kmp_info_t * th)10013041982dSJonathan Peyton void __kmp_finalize_bget(kmp_info_t *th) {
10027cc577a4SJonathan Peyton thr_data_t *thr;
10037cc577a4SJonathan Peyton bfhead_t *b;
10047cc577a4SJonathan Peyton
10057cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(th != 0);
10067cc577a4SJonathan Peyton
10077cc577a4SJonathan Peyton #if BufStats
10087cc577a4SJonathan Peyton thr = (thr_data_t *)th->th.th_local.bget_data;
10097cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(thr != NULL);
10107cc577a4SJonathan Peyton b = thr->last_pool;
10117cc577a4SJonathan Peyton
10123041982dSJonathan Peyton /* If a block-release function is defined, and this free buffer constitutes
10133041982dSJonathan Peyton the entire block, release it. Note that pool_len is defined in such a way
10143041982dSJonathan Peyton that the test will fail unless all pool blocks are the same size. */
10157cc577a4SJonathan Peyton
10163041982dSJonathan Peyton // Deallocate the last pool if one exists because we no longer do it in brel()
10177cc577a4SJonathan Peyton if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
10183041982dSJonathan Peyton b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
10197cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
10207cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
10213041982dSJonathan Peyton KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
10223041982dSJonathan Peyton b->bh.bb.bsize);
10237cc577a4SJonathan Peyton
10247cc577a4SJonathan Peyton /* Unlink the buffer from the free list */
10257cc577a4SJonathan Peyton __kmp_bget_remove_from_freelist(b);
10267cc577a4SJonathan Peyton
10277cc577a4SJonathan Peyton KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
10287cc577a4SJonathan Peyton
10297cc577a4SJonathan Peyton (*thr->relfcn)(b);
10307cc577a4SJonathan Peyton thr->numprel++; /* Nr of expansion block releases */
10317cc577a4SJonathan Peyton thr->numpblk--; /* Total number of blocks */
10327cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
10337cc577a4SJonathan Peyton }
10347cc577a4SJonathan Peyton #endif /* BufStats */
10357cc577a4SJonathan Peyton
10367cc577a4SJonathan Peyton /* Deallocate bget_data */
10377cc577a4SJonathan Peyton if (th->th.th_local.bget_data != NULL) {
10387cc577a4SJonathan Peyton __kmp_free(th->th.th_local.bget_data);
10397cc577a4SJonathan Peyton th->th.th_local.bget_data = NULL;
1040bd3a7633SJonathan Peyton }
10417cc577a4SJonathan Peyton }
10427cc577a4SJonathan Peyton
kmpc_set_poolsize(size_t size)10433041982dSJonathan Peyton void kmpc_set_poolsize(size_t size) {
10447cc577a4SJonathan Peyton bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
10457cc577a4SJonathan Peyton (bget_release_t)free, (bufsize)size);
10467cc577a4SJonathan Peyton }
10477cc577a4SJonathan Peyton
kmpc_get_poolsize(void)10483041982dSJonathan Peyton size_t kmpc_get_poolsize(void) {
10497cc577a4SJonathan Peyton thr_data_t *p;
10507cc577a4SJonathan Peyton
10517cc577a4SJonathan Peyton p = get_thr_data(__kmp_get_thread());
10527cc577a4SJonathan Peyton
10537cc577a4SJonathan Peyton return p->exp_incr;
10547cc577a4SJonathan Peyton }
10557cc577a4SJonathan Peyton
kmpc_set_poolmode(int mode)10563041982dSJonathan Peyton void kmpc_set_poolmode(int mode) {
10577cc577a4SJonathan Peyton thr_data_t *p;
10587cc577a4SJonathan Peyton
10593041982dSJonathan Peyton if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
10603041982dSJonathan Peyton mode == bget_mode_best) {
10617cc577a4SJonathan Peyton p = get_thr_data(__kmp_get_thread());
10627cc577a4SJonathan Peyton p->mode = (bget_mode_t)mode;
10637cc577a4SJonathan Peyton }
10647cc577a4SJonathan Peyton }
10657cc577a4SJonathan Peyton
kmpc_get_poolmode(void)10663041982dSJonathan Peyton int kmpc_get_poolmode(void) {
10677cc577a4SJonathan Peyton thr_data_t *p;
10687cc577a4SJonathan Peyton
10697cc577a4SJonathan Peyton p = get_thr_data(__kmp_get_thread());
10707cc577a4SJonathan Peyton
10717cc577a4SJonathan Peyton return p->mode;
10727cc577a4SJonathan Peyton }
10737cc577a4SJonathan Peyton
kmpc_get_poolstat(size_t * maxmem,size_t * allmem)10743041982dSJonathan Peyton void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
10757cc577a4SJonathan Peyton kmp_info_t *th = __kmp_get_thread();
10767cc577a4SJonathan Peyton bufsize a, b;
10777cc577a4SJonathan Peyton
10787cc577a4SJonathan Peyton __kmp_bget_dequeue(th); /* Release any queued buffers */
10797cc577a4SJonathan Peyton
10807cc577a4SJonathan Peyton bcheck(th, &a, &b);
10817cc577a4SJonathan Peyton
10827cc577a4SJonathan Peyton *maxmem = a;
10837cc577a4SJonathan Peyton *allmem = b;
10847cc577a4SJonathan Peyton }
10857cc577a4SJonathan Peyton
kmpc_poolprint(void)10863041982dSJonathan Peyton void kmpc_poolprint(void) {
10877cc577a4SJonathan Peyton kmp_info_t *th = __kmp_get_thread();
10887cc577a4SJonathan Peyton
10897cc577a4SJonathan Peyton __kmp_bget_dequeue(th); /* Release any queued buffers */
10907cc577a4SJonathan Peyton
10917cc577a4SJonathan Peyton bfreed(th);
10927cc577a4SJonathan Peyton }
10937cc577a4SJonathan Peyton
10947cc577a4SJonathan Peyton #endif // #if KMP_USE_BGET
10957cc577a4SJonathan Peyton
kmpc_malloc(size_t size)10963041982dSJonathan Peyton void *kmpc_malloc(size_t size) {
10977cc577a4SJonathan Peyton void *ptr;
10987cc577a4SJonathan Peyton ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
10997cc577a4SJonathan Peyton if (ptr != NULL) {
11007cc577a4SJonathan Peyton // save allocated pointer just before one returned to user
11017cc577a4SJonathan Peyton *(void **)ptr = ptr;
11027cc577a4SJonathan Peyton ptr = (void **)ptr + 1;
11037cc577a4SJonathan Peyton }
11047cc577a4SJonathan Peyton return ptr;
11057cc577a4SJonathan Peyton }
11067cc577a4SJonathan Peyton
11077cc577a4SJonathan Peyton #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
11087cc577a4SJonathan Peyton
kmpc_aligned_malloc(size_t size,size_t alignment)11093041982dSJonathan Peyton void *kmpc_aligned_malloc(size_t size, size_t alignment) {
11107cc577a4SJonathan Peyton void *ptr;
11117cc577a4SJonathan Peyton void *ptr_allocated;
11127cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
11137cc577a4SJonathan Peyton if (!IS_POWER_OF_TWO(alignment)) {
11147cc577a4SJonathan Peyton // AC: do we need to issue a warning here?
11157cc577a4SJonathan Peyton errno = EINVAL;
11167cc577a4SJonathan Peyton return NULL;
11177cc577a4SJonathan Peyton }
11187cc577a4SJonathan Peyton size = size + sizeof(void *) + alignment;
11197cc577a4SJonathan Peyton ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
11207cc577a4SJonathan Peyton if (ptr_allocated != NULL) {
11217cc577a4SJonathan Peyton // save allocated pointer just before one returned to user
11223041982dSJonathan Peyton ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
11233041982dSJonathan Peyton ~(alignment - 1));
11247cc577a4SJonathan Peyton *((void **)ptr - 1) = ptr_allocated;
11257cc577a4SJonathan Peyton } else {
11267cc577a4SJonathan Peyton ptr = NULL;
11277cc577a4SJonathan Peyton }
11287cc577a4SJonathan Peyton return ptr;
11297cc577a4SJonathan Peyton }
11307cc577a4SJonathan Peyton
kmpc_calloc(size_t nelem,size_t elsize)11313041982dSJonathan Peyton void *kmpc_calloc(size_t nelem, size_t elsize) {
11327cc577a4SJonathan Peyton void *ptr;
11337cc577a4SJonathan Peyton ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
11347cc577a4SJonathan Peyton if (ptr != NULL) {
11357cc577a4SJonathan Peyton // save allocated pointer just before one returned to user
11367cc577a4SJonathan Peyton *(void **)ptr = ptr;
11377cc577a4SJonathan Peyton ptr = (void **)ptr + 1;
11387cc577a4SJonathan Peyton }
11397cc577a4SJonathan Peyton return ptr;
11407cc577a4SJonathan Peyton }
11417cc577a4SJonathan Peyton
kmpc_realloc(void * ptr,size_t size)11423041982dSJonathan Peyton void *kmpc_realloc(void *ptr, size_t size) {
11437cc577a4SJonathan Peyton void *result = NULL;
11447cc577a4SJonathan Peyton if (ptr == NULL) {
11457cc577a4SJonathan Peyton // If pointer is NULL, realloc behaves like malloc.
11467cc577a4SJonathan Peyton result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
11477cc577a4SJonathan Peyton // save allocated pointer just before one returned to user
11487cc577a4SJonathan Peyton if (result != NULL) {
11497cc577a4SJonathan Peyton *(void **)result = result;
11507cc577a4SJonathan Peyton result = (void **)result + 1;
11517cc577a4SJonathan Peyton }
11527cc577a4SJonathan Peyton } else if (size == 0) {
11537cc577a4SJonathan Peyton // If size is 0, realloc behaves like free.
11543041982dSJonathan Peyton // The thread must be registered by the call to kmpc_malloc() or
11553041982dSJonathan Peyton // kmpc_calloc() before.
11563041982dSJonathan Peyton // So it should be safe to call __kmp_get_thread(), not
11573041982dSJonathan Peyton // __kmp_entry_thread().
11587cc577a4SJonathan Peyton KMP_ASSERT(*((void **)ptr - 1));
11597cc577a4SJonathan Peyton brel(__kmp_get_thread(), *((void **)ptr - 1));
11607cc577a4SJonathan Peyton } else {
11613041982dSJonathan Peyton result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
11623041982dSJonathan Peyton (bufsize)(size + sizeof(ptr)));
11637cc577a4SJonathan Peyton if (result != NULL) {
11647cc577a4SJonathan Peyton *(void **)result = result;
11657cc577a4SJonathan Peyton result = (void **)result + 1;
11667cc577a4SJonathan Peyton }
1167bd3a7633SJonathan Peyton }
11687cc577a4SJonathan Peyton return result;
11697cc577a4SJonathan Peyton }
11707cc577a4SJonathan Peyton
11713041982dSJonathan Peyton // NOTE: the library must have already been initialized by a previous allocate
kmpc_free(void * ptr)11723041982dSJonathan Peyton void kmpc_free(void *ptr) {
11737cc577a4SJonathan Peyton if (!__kmp_init_serial) {
11747cc577a4SJonathan Peyton return;
1175bd3a7633SJonathan Peyton }
11767cc577a4SJonathan Peyton if (ptr != NULL) {
11777cc577a4SJonathan Peyton kmp_info_t *th = __kmp_get_thread();
11787cc577a4SJonathan Peyton __kmp_bget_dequeue(th); /* Release any queued buffers */
11797cc577a4SJonathan Peyton // extract allocated pointer and free it
11807cc577a4SJonathan Peyton KMP_ASSERT(*((void **)ptr - 1));
11817cc577a4SJonathan Peyton brel(th, *((void **)ptr - 1));
1182bd3a7633SJonathan Peyton }
11837cc577a4SJonathan Peyton }
11847cc577a4SJonathan Peyton
___kmp_thread_malloc(kmp_info_t * th,size_t size KMP_SRC_LOC_DECL)11853041982dSJonathan Peyton void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
11867cc577a4SJonathan Peyton void *ptr;
11873041982dSJonathan Peyton KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
11883041982dSJonathan Peyton (int)size KMP_SRC_LOC_PARM));
11897cc577a4SJonathan Peyton ptr = bget(th, (bufsize)size);
11907cc577a4SJonathan Peyton KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
11917cc577a4SJonathan Peyton return ptr;
11927cc577a4SJonathan Peyton }
11937cc577a4SJonathan Peyton
___kmp_thread_calloc(kmp_info_t * th,size_t nelem,size_t elsize KMP_SRC_LOC_DECL)11943041982dSJonathan Peyton void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
11953041982dSJonathan Peyton size_t elsize KMP_SRC_LOC_DECL) {
11967cc577a4SJonathan Peyton void *ptr;
11973041982dSJonathan Peyton KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
11983041982dSJonathan Peyton (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
11997cc577a4SJonathan Peyton ptr = bgetz(th, (bufsize)(nelem * elsize));
12007cc577a4SJonathan Peyton KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
12017cc577a4SJonathan Peyton return ptr;
12027cc577a4SJonathan Peyton }
12037cc577a4SJonathan Peyton
___kmp_thread_realloc(kmp_info_t * th,void * ptr,size_t size KMP_SRC_LOC_DECL)12043041982dSJonathan Peyton void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
12053041982dSJonathan Peyton size_t size KMP_SRC_LOC_DECL) {
12063041982dSJonathan Peyton KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
12073041982dSJonathan Peyton ptr, (int)size KMP_SRC_LOC_PARM));
12087cc577a4SJonathan Peyton ptr = bgetr(th, ptr, (bufsize)size);
12097cc577a4SJonathan Peyton KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
12107cc577a4SJonathan Peyton return ptr;
12117cc577a4SJonathan Peyton }
12127cc577a4SJonathan Peyton
___kmp_thread_free(kmp_info_t * th,void * ptr KMP_SRC_LOC_DECL)12133041982dSJonathan Peyton void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
12143041982dSJonathan Peyton KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
12153041982dSJonathan Peyton ptr KMP_SRC_LOC_PARM));
12167cc577a4SJonathan Peyton if (ptr != NULL) {
12177cc577a4SJonathan Peyton __kmp_bget_dequeue(th); /* Release any queued buffers */
12187cc577a4SJonathan Peyton brel(th, ptr);
12197cc577a4SJonathan Peyton }
12207cc577a4SJonathan Peyton KE_TRACE(30, ("<- __kmp_thread_free()\n"));
12217cc577a4SJonathan Peyton }
12227cc577a4SJonathan Peyton
122392ca6188SJonathan Peyton /* OMP 5.0 Memory Management support */
122492ca6188SJonathan Peyton static const char *kmp_mk_lib_name;
122592ca6188SJonathan Peyton static void *h_memkind;
1226ebf1830bSJonathan Peyton /* memkind experimental API: */
1227ebf1830bSJonathan Peyton // memkind_alloc
1228ebf1830bSJonathan Peyton static void *(*kmp_mk_alloc)(void *k, size_t sz);
1229ebf1830bSJonathan Peyton // memkind_free
1230ebf1830bSJonathan Peyton static void (*kmp_mk_free)(void *kind, void *ptr);
1231ebf1830bSJonathan Peyton // memkind_check_available
1232ebf1830bSJonathan Peyton static int (*kmp_mk_check)(void *kind);
1233ebf1830bSJonathan Peyton // kinds we are going to use
1234ebf1830bSJonathan Peyton static void **mk_default;
1235ebf1830bSJonathan Peyton static void **mk_interleave;
1236ebf1830bSJonathan Peyton static void **mk_hbw;
1237ebf1830bSJonathan Peyton static void **mk_hbw_interleave;
1238ebf1830bSJonathan Peyton static void **mk_hbw_preferred;
1239ebf1830bSJonathan Peyton static void **mk_hugetlb;
1240ebf1830bSJonathan Peyton static void **mk_hbw_hugetlb;
1241ebf1830bSJonathan Peyton static void **mk_hbw_preferred_hugetlb;
1242bba3a82bSHansang Bae static void **mk_dax_kmem;
1243bba3a82bSHansang Bae static void **mk_dax_kmem_all;
1244bba3a82bSHansang Bae static void **mk_dax_kmem_preferred;
1245b6c2f538SHansang Bae static void *(*kmp_target_alloc_host)(size_t size, int device);
1246b6c2f538SHansang Bae static void *(*kmp_target_alloc_shared)(size_t size, int device);
1247b6c2f538SHansang Bae static void *(*kmp_target_alloc_device)(size_t size, int device);
1248b6c2f538SHansang Bae static void *(*kmp_target_free)(void *ptr, int device);
1249b6c2f538SHansang Bae static bool __kmp_target_mem_available;
1250b6c2f538SHansang Bae #define KMP_IS_TARGET_MEM_SPACE(MS) \
1251b6c2f538SHansang Bae (MS == llvm_omp_target_host_mem_space || \
1252b6c2f538SHansang Bae MS == llvm_omp_target_shared_mem_space || \
1253b6c2f538SHansang Bae MS == llvm_omp_target_device_mem_space)
1254b6c2f538SHansang Bae #define KMP_IS_TARGET_MEM_ALLOC(MA) \
1255b6c2f538SHansang Bae (MA == llvm_omp_target_host_mem_alloc || \
1256b6c2f538SHansang Bae MA == llvm_omp_target_shared_mem_alloc || \
1257b6c2f538SHansang Bae MA == llvm_omp_target_device_mem_alloc)
1258ebf1830bSJonathan Peyton
1259*5d25dbffSDaniel Douglas #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
chk_kind(void *** pkind)1260ebf1830bSJonathan Peyton static inline void chk_kind(void ***pkind) {
1261ebf1830bSJonathan Peyton KMP_DEBUG_ASSERT(pkind);
1262ebf1830bSJonathan Peyton if (*pkind) // symbol found
1263ebf1830bSJonathan Peyton if (kmp_mk_check(**pkind)) // kind not available or error
1264ebf1830bSJonathan Peyton *pkind = NULL;
1265ebf1830bSJonathan Peyton }
1266ebf1830bSJonathan Peyton #endif
126792ca6188SJonathan Peyton
__kmp_init_memkind()126892ca6188SJonathan Peyton void __kmp_init_memkind() {
1269ebf1830bSJonathan Peyton // as of 2018-07-31 memkind does not support Windows*, exclude it for now
1270*5d25dbffSDaniel Douglas #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1271ebf1830bSJonathan Peyton // use of statically linked memkind is problematic, as it depends on libnuma
127292ca6188SJonathan Peyton kmp_mk_lib_name = "libmemkind.so";
127392ca6188SJonathan Peyton h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
127492ca6188SJonathan Peyton if (h_memkind) {
1275ebf1830bSJonathan Peyton kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
1276ebf1830bSJonathan Peyton kmp_mk_alloc =
1277ebf1830bSJonathan Peyton (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
1278ebf1830bSJonathan Peyton kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
1279ebf1830bSJonathan Peyton mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
1280ebf1830bSJonathan Peyton if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1281ebf1830bSJonathan Peyton !kmp_mk_check(*mk_default)) {
128292ca6188SJonathan Peyton __kmp_memkind_available = 1;
1283ebf1830bSJonathan Peyton mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
1284ebf1830bSJonathan Peyton chk_kind(&mk_interleave);
1285ebf1830bSJonathan Peyton mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
1286ebf1830bSJonathan Peyton chk_kind(&mk_hbw);
1287ebf1830bSJonathan Peyton mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
1288ebf1830bSJonathan Peyton chk_kind(&mk_hbw_interleave);
1289ebf1830bSJonathan Peyton mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
1290ebf1830bSJonathan Peyton chk_kind(&mk_hbw_preferred);
1291ebf1830bSJonathan Peyton mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
1292ebf1830bSJonathan Peyton chk_kind(&mk_hugetlb);
1293ebf1830bSJonathan Peyton mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
1294ebf1830bSJonathan Peyton chk_kind(&mk_hbw_hugetlb);
1295ebf1830bSJonathan Peyton mk_hbw_preferred_hugetlb =
1296ebf1830bSJonathan Peyton (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
1297ebf1830bSJonathan Peyton chk_kind(&mk_hbw_preferred_hugetlb);
1298bba3a82bSHansang Bae mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM");
1299bba3a82bSHansang Bae chk_kind(&mk_dax_kmem);
1300bba3a82bSHansang Bae mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL");
1301bba3a82bSHansang Bae chk_kind(&mk_dax_kmem_all);
1302bba3a82bSHansang Bae mk_dax_kmem_preferred =
1303bba3a82bSHansang Bae (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED");
1304bba3a82bSHansang Bae chk_kind(&mk_dax_kmem_preferred);
1305ebf1830bSJonathan Peyton KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
1306ebf1830bSJonathan Peyton return; // success
130792ca6188SJonathan Peyton }
130892ca6188SJonathan Peyton dlclose(h_memkind); // failure
130992ca6188SJonathan Peyton }
1310bba3a82bSHansang Bae #else // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
131192ca6188SJonathan Peyton kmp_mk_lib_name = "";
1312bba3a82bSHansang Bae #endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
131392ca6188SJonathan Peyton h_memkind = NULL;
1314ebf1830bSJonathan Peyton kmp_mk_check = NULL;
1315ebf1830bSJonathan Peyton kmp_mk_alloc = NULL;
1316ebf1830bSJonathan Peyton kmp_mk_free = NULL;
1317ebf1830bSJonathan Peyton mk_default = NULL;
1318ebf1830bSJonathan Peyton mk_interleave = NULL;
1319ebf1830bSJonathan Peyton mk_hbw = NULL;
1320ebf1830bSJonathan Peyton mk_hbw_interleave = NULL;
1321ebf1830bSJonathan Peyton mk_hbw_preferred = NULL;
1322ebf1830bSJonathan Peyton mk_hugetlb = NULL;
1323ebf1830bSJonathan Peyton mk_hbw_hugetlb = NULL;
1324ebf1830bSJonathan Peyton mk_hbw_preferred_hugetlb = NULL;
1325bba3a82bSHansang Bae mk_dax_kmem = NULL;
1326bba3a82bSHansang Bae mk_dax_kmem_all = NULL;
1327bba3a82bSHansang Bae mk_dax_kmem_preferred = NULL;
132892ca6188SJonathan Peyton }
132992ca6188SJonathan Peyton
__kmp_fini_memkind()133092ca6188SJonathan Peyton void __kmp_fini_memkind() {
133192ca6188SJonathan Peyton #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1332ebf1830bSJonathan Peyton if (__kmp_memkind_available)
1333ebf1830bSJonathan Peyton KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
133492ca6188SJonathan Peyton if (h_memkind) {
133592ca6188SJonathan Peyton dlclose(h_memkind);
133692ca6188SJonathan Peyton h_memkind = NULL;
133792ca6188SJonathan Peyton }
1338ebf1830bSJonathan Peyton kmp_mk_check = NULL;
1339ebf1830bSJonathan Peyton kmp_mk_alloc = NULL;
1340ebf1830bSJonathan Peyton kmp_mk_free = NULL;
1341ebf1830bSJonathan Peyton mk_default = NULL;
1342ebf1830bSJonathan Peyton mk_interleave = NULL;
1343ebf1830bSJonathan Peyton mk_hbw = NULL;
1344ebf1830bSJonathan Peyton mk_hbw_interleave = NULL;
1345ebf1830bSJonathan Peyton mk_hbw_preferred = NULL;
1346ebf1830bSJonathan Peyton mk_hugetlb = NULL;
1347ebf1830bSJonathan Peyton mk_hbw_hugetlb = NULL;
1348ebf1830bSJonathan Peyton mk_hbw_preferred_hugetlb = NULL;
1349bba3a82bSHansang Bae mk_dax_kmem = NULL;
1350bba3a82bSHansang Bae mk_dax_kmem_all = NULL;
1351bba3a82bSHansang Bae mk_dax_kmem_preferred = NULL;
135292ca6188SJonathan Peyton #endif
135392ca6188SJonathan Peyton }
1354840c0404SJoseph Huber
__kmp_init_target_mem()1355b6c2f538SHansang Bae void __kmp_init_target_mem() {
1356b6c2f538SHansang Bae *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1357b6c2f538SHansang Bae *(void **)(&kmp_target_alloc_shared) =
1358b6c2f538SHansang Bae KMP_DLSYM("llvm_omp_target_alloc_shared");
1359b6c2f538SHansang Bae *(void **)(&kmp_target_alloc_device) =
1360b6c2f538SHansang Bae KMP_DLSYM("llvm_omp_target_alloc_device");
1361b6c2f538SHansang Bae *(void **)(&kmp_target_free) = KMP_DLSYM("omp_target_free");
1362b6c2f538SHansang Bae __kmp_target_mem_available = kmp_target_alloc_host &&
1363b6c2f538SHansang Bae kmp_target_alloc_shared &&
1364b6c2f538SHansang Bae kmp_target_alloc_device && kmp_target_free;
1365b6c2f538SHansang Bae }
136692ca6188SJonathan Peyton
__kmpc_init_allocator(int gtid,omp_memspace_handle_t ms,int ntraits,omp_alloctrait_t traits[])1367ebf1830bSJonathan Peyton omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
1368ebf1830bSJonathan Peyton int ntraits,
1369ebf1830bSJonathan Peyton omp_alloctrait_t traits[]) {
1370ebf1830bSJonathan Peyton // OpenMP 5.0 only allows predefined memspaces
1371ebf1830bSJonathan Peyton KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
1372ebf1830bSJonathan Peyton ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
1373b6c2f538SHansang Bae ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
1374ebf1830bSJonathan Peyton kmp_allocator_t *al;
1375ebf1830bSJonathan Peyton int i;
1376ebf1830bSJonathan Peyton al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
1377ebf1830bSJonathan Peyton al->memspace = ms; // not used currently
1378ebf1830bSJonathan Peyton for (i = 0; i < ntraits; ++i) {
1379ebf1830bSJonathan Peyton switch (traits[i].key) {
13806f0f0220SHansang Bae case omp_atk_sync_hint:
1381ad24cf2aSKelvin Li case omp_atk_access:
1382ad24cf2aSKelvin Li case omp_atk_pinned:
1383ebf1830bSJonathan Peyton break;
1384ad24cf2aSKelvin Li case omp_atk_alignment:
13856b316febSTerry Wilmarth __kmp_type_convert(traits[i].value, &(al->alignment));
1386ebf1830bSJonathan Peyton KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1387ebf1830bSJonathan Peyton break;
1388ad24cf2aSKelvin Li case omp_atk_pool_size:
1389ebf1830bSJonathan Peyton al->pool_size = traits[i].value;
1390ebf1830bSJonathan Peyton break;
1391ad24cf2aSKelvin Li case omp_atk_fallback:
1392ebf1830bSJonathan Peyton al->fb = (omp_alloctrait_value_t)traits[i].value;
139392ca6188SJonathan Peyton KMP_DEBUG_ASSERT(
1394ad24cf2aSKelvin Li al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1395ad24cf2aSKelvin Li al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1396ebf1830bSJonathan Peyton break;
1397ad24cf2aSKelvin Li case omp_atk_fb_data:
1398ebf1830bSJonathan Peyton al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1399ebf1830bSJonathan Peyton break;
1400ad24cf2aSKelvin Li case omp_atk_partition:
1401ebf1830bSJonathan Peyton al->memkind = RCAST(void **, traits[i].value);
1402ebf1830bSJonathan Peyton break;
1403ebf1830bSJonathan Peyton default:
1404ebf1830bSJonathan Peyton KMP_ASSERT2(0, "Unexpected allocator trait");
1405ebf1830bSJonathan Peyton }
1406ebf1830bSJonathan Peyton }
1407ebf1830bSJonathan Peyton if (al->fb == 0) {
1408ebf1830bSJonathan Peyton // set default allocator
1409ad24cf2aSKelvin Li al->fb = omp_atv_default_mem_fb;
1410ebf1830bSJonathan Peyton al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1411ad24cf2aSKelvin Li } else if (al->fb == omp_atv_allocator_fb) {
1412ebf1830bSJonathan Peyton KMP_ASSERT(al->fb_data != NULL);
1413ad24cf2aSKelvin Li } else if (al->fb == omp_atv_default_mem_fb) {
1414ebf1830bSJonathan Peyton al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1415ebf1830bSJonathan Peyton }
1416ebf1830bSJonathan Peyton if (__kmp_memkind_available) {
1417ebf1830bSJonathan Peyton // Let's use memkind library if available
1418ebf1830bSJonathan Peyton if (ms == omp_high_bw_mem_space) {
1419ad24cf2aSKelvin Li if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
1420ebf1830bSJonathan Peyton al->memkind = mk_hbw_interleave;
1421ebf1830bSJonathan Peyton } else if (mk_hbw_preferred) {
1422ebf1830bSJonathan Peyton // AC: do not try to use MEMKIND_HBW for now, because memkind library
1423ebf1830bSJonathan Peyton // cannot reliably detect exhaustion of HBW memory.
1424ebf1830bSJonathan Peyton // It could be possible using hbw_verify_memory_region() but memkind
1425ebf1830bSJonathan Peyton // manual says: "Using this function in production code may result in
1426ebf1830bSJonathan Peyton // serious performance penalty".
1427ebf1830bSJonathan Peyton al->memkind = mk_hbw_preferred;
1428ebf1830bSJonathan Peyton } else {
1429ebf1830bSJonathan Peyton // HBW is requested but not available --> return NULL allocator
1430ebf1830bSJonathan Peyton __kmp_free(al);
1431ebf1830bSJonathan Peyton return omp_null_allocator;
1432ebf1830bSJonathan Peyton }
1433bba3a82bSHansang Bae } else if (ms == omp_large_cap_mem_space) {
1434bba3a82bSHansang Bae if (mk_dax_kmem_all) {
1435bba3a82bSHansang Bae // All pmem nodes are visited
1436bba3a82bSHansang Bae al->memkind = mk_dax_kmem_all;
1437bba3a82bSHansang Bae } else if (mk_dax_kmem) {
1438bba3a82bSHansang Bae // Only closest pmem node is visited
1439bba3a82bSHansang Bae al->memkind = mk_dax_kmem;
1440bba3a82bSHansang Bae } else {
1441bba3a82bSHansang Bae __kmp_free(al);
1442bba3a82bSHansang Bae return omp_null_allocator;
1443bba3a82bSHansang Bae }
1444ebf1830bSJonathan Peyton } else {
1445ad24cf2aSKelvin Li if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
1446ebf1830bSJonathan Peyton al->memkind = mk_interleave;
1447ebf1830bSJonathan Peyton } else {
1448ebf1830bSJonathan Peyton al->memkind = mk_default;
1449ebf1830bSJonathan Peyton }
1450ebf1830bSJonathan Peyton }
1451b6c2f538SHansang Bae } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1452b6c2f538SHansang Bae __kmp_free(al);
1453b6c2f538SHansang Bae return omp_null_allocator;
1454ebf1830bSJonathan Peyton } else {
1455ebf1830bSJonathan Peyton if (ms == omp_high_bw_mem_space) {
1456ebf1830bSJonathan Peyton // cannot detect HBW memory presence without memkind library
1457ebf1830bSJonathan Peyton __kmp_free(al);
1458ebf1830bSJonathan Peyton return omp_null_allocator;
1459ebf1830bSJonathan Peyton }
1460ebf1830bSJonathan Peyton }
1461ebf1830bSJonathan Peyton return (omp_allocator_handle_t)al;
1462ebf1830bSJonathan Peyton }
1463ebf1830bSJonathan Peyton
__kmpc_destroy_allocator(int gtid,omp_allocator_handle_t allocator)1464ebf1830bSJonathan Peyton void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
1465ebf1830bSJonathan Peyton if (allocator > kmp_max_mem_alloc)
1466ebf1830bSJonathan Peyton __kmp_free(allocator);
1467ebf1830bSJonathan Peyton }
1468ebf1830bSJonathan Peyton
__kmpc_set_default_allocator(int gtid,omp_allocator_handle_t allocator)1469ebf1830bSJonathan Peyton void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
1470ebf1830bSJonathan Peyton if (allocator == omp_null_allocator)
1471ebf1830bSJonathan Peyton allocator = omp_default_mem_alloc;
147292ca6188SJonathan Peyton __kmp_threads[gtid]->th.th_def_allocator = allocator;
147392ca6188SJonathan Peyton }
1474ebf1830bSJonathan Peyton
__kmpc_get_default_allocator(int gtid)1475ebf1830bSJonathan Peyton omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
147692ca6188SJonathan Peyton return __kmp_threads[gtid]->th.th_def_allocator;
147792ca6188SJonathan Peyton }
147892ca6188SJonathan Peyton
147992ca6188SJonathan Peyton typedef struct kmp_mem_desc { // Memory block descriptor
148092ca6188SJonathan Peyton void *ptr_alloc; // Pointer returned by allocator
148192ca6188SJonathan Peyton size_t size_a; // Size of allocated memory block (initial+descriptor+align)
14825439db05SNawrin Sultana size_t size_orig; // Original size requested
148392ca6188SJonathan Peyton void *ptr_align; // Pointer to aligned memory, returned
1484ebf1830bSJonathan Peyton kmp_allocator_t *allocator; // allocator
148592ca6188SJonathan Peyton } kmp_mem_desc_t;
1486f5c0c917SAndreyChurbanov static int alignment = sizeof(void *); // align to pointer size by default
148792ca6188SJonathan Peyton
1488f5c0c917SAndreyChurbanov // external interfaces are wrappers over internal implementation
__kmpc_alloc(int gtid,size_t size,omp_allocator_handle_t allocator)1489ebf1830bSJonathan Peyton void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1490f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1491f5c0c917SAndreyChurbanov void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1492f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1493f5c0c917SAndreyChurbanov return ptr;
1494f5c0c917SAndreyChurbanov }
1495f5c0c917SAndreyChurbanov
__kmpc_aligned_alloc(int gtid,size_t algn,size_t size,omp_allocator_handle_t allocator)1496f5c0c917SAndreyChurbanov void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
1497f5c0c917SAndreyChurbanov omp_allocator_handle_t allocator) {
1498f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
1499f5c0c917SAndreyChurbanov (int)size, allocator));
1500f5c0c917SAndreyChurbanov void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1501f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1502f5c0c917SAndreyChurbanov return ptr;
1503f5c0c917SAndreyChurbanov }
1504f5c0c917SAndreyChurbanov
__kmpc_calloc(int gtid,size_t nmemb,size_t size,omp_allocator_handle_t allocator)1505f5c0c917SAndreyChurbanov void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
1506f5c0c917SAndreyChurbanov omp_allocator_handle_t allocator) {
1507f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
1508f5c0c917SAndreyChurbanov (int)size, allocator));
1509f5c0c917SAndreyChurbanov void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1510f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1511f5c0c917SAndreyChurbanov return ptr;
1512f5c0c917SAndreyChurbanov }
1513f5c0c917SAndreyChurbanov
__kmpc_realloc(int gtid,void * ptr,size_t size,omp_allocator_handle_t allocator,omp_allocator_handle_t free_allocator)1514f5c0c917SAndreyChurbanov void *__kmpc_realloc(int gtid, void *ptr, size_t size,
1515f5c0c917SAndreyChurbanov omp_allocator_handle_t allocator,
1516f5c0c917SAndreyChurbanov omp_allocator_handle_t free_allocator) {
1517f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
1518f5c0c917SAndreyChurbanov allocator, free_allocator));
1519f5c0c917SAndreyChurbanov void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1520f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1521f5c0c917SAndreyChurbanov return nptr;
1522f5c0c917SAndreyChurbanov }
1523f5c0c917SAndreyChurbanov
__kmpc_free(int gtid,void * ptr,omp_allocator_handle_t allocator)1524f5c0c917SAndreyChurbanov void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1525f5c0c917SAndreyChurbanov KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1526f5c0c917SAndreyChurbanov ___kmpc_free(gtid, ptr, allocator);
1527f5c0c917SAndreyChurbanov KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1528f5c0c917SAndreyChurbanov return;
1529f5c0c917SAndreyChurbanov }
1530f5c0c917SAndreyChurbanov
1531f5c0c917SAndreyChurbanov // internal implementation, called from inside the library
__kmp_alloc(int gtid,size_t algn,size_t size,omp_allocator_handle_t allocator)1532f5c0c917SAndreyChurbanov void *__kmp_alloc(int gtid, size_t algn, size_t size,
1533f5c0c917SAndreyChurbanov omp_allocator_handle_t allocator) {
1534ebf1830bSJonathan Peyton void *ptr = NULL;
1535ebf1830bSJonathan Peyton kmp_allocator_t *al;
153692ca6188SJonathan Peyton KMP_DEBUG_ASSERT(__kmp_init_serial);
1537938f1b85SNawrin Sultana if (size == 0)
1538938f1b85SNawrin Sultana return NULL;
1539ebf1830bSJonathan Peyton if (allocator == omp_null_allocator)
154092ca6188SJonathan Peyton allocator = __kmp_threads[gtid]->th.th_def_allocator;
154192ca6188SJonathan Peyton
1542f5c0c917SAndreyChurbanov al = RCAST(kmp_allocator_t *, allocator);
1543ebf1830bSJonathan Peyton
154492ca6188SJonathan Peyton int sz_desc = sizeof(kmp_mem_desc_t);
154592ca6188SJonathan Peyton kmp_mem_desc_t desc;
154692ca6188SJonathan Peyton kmp_uintptr_t addr; // address returned by allocator
154792ca6188SJonathan Peyton kmp_uintptr_t addr_align; // address to return to caller
154892ca6188SJonathan Peyton kmp_uintptr_t addr_descr; // address of memory block descriptor
1549f5c0c917SAndreyChurbanov size_t align = alignment; // default alignment
1550f5c0c917SAndreyChurbanov if (allocator > kmp_max_mem_alloc && al->alignment > align)
1551f5c0c917SAndreyChurbanov align = al->alignment; // alignment required by allocator trait
1552f5c0c917SAndreyChurbanov if (align < algn)
1553f5c0c917SAndreyChurbanov align = algn; // max of allocator trait, parameter and sizeof(void*)
15545439db05SNawrin Sultana desc.size_orig = size;
1555ebf1830bSJonathan Peyton desc.size_a = size + sz_desc + align;
155692ca6188SJonathan Peyton
1557ebf1830bSJonathan Peyton if (__kmp_memkind_available) {
1558ebf1830bSJonathan Peyton if (allocator < kmp_max_mem_alloc) {
1559ebf1830bSJonathan Peyton // pre-defined allocator
1560ebf1830bSJonathan Peyton if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1561ebf1830bSJonathan Peyton ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
1562bba3a82bSHansang Bae } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1563bba3a82bSHansang Bae ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
1564ebf1830bSJonathan Peyton } else {
1565ebf1830bSJonathan Peyton ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1566ebf1830bSJonathan Peyton }
1567ebf1830bSJonathan Peyton } else if (al->pool_size > 0) {
1568ebf1830bSJonathan Peyton // custom allocator with pool size requested
1569ebf1830bSJonathan Peyton kmp_uint64 used =
1570ebf1830bSJonathan Peyton KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1571ebf1830bSJonathan Peyton if (used + desc.size_a > al->pool_size) {
1572ebf1830bSJonathan Peyton // not enough space, need to go fallback path
1573ebf1830bSJonathan Peyton KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1574ad24cf2aSKelvin Li if (al->fb == omp_atv_default_mem_fb) {
1575ebf1830bSJonathan Peyton al = (kmp_allocator_t *)omp_default_mem_alloc;
1576ebf1830bSJonathan Peyton ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1577ad24cf2aSKelvin Li } else if (al->fb == omp_atv_abort_fb) {
1578ebf1830bSJonathan Peyton KMP_ASSERT(0); // abort fallback requested
1579ad24cf2aSKelvin Li } else if (al->fb == omp_atv_allocator_fb) {
1580ebf1830bSJonathan Peyton KMP_ASSERT(al != al->fb_data);
1581ebf1830bSJonathan Peyton al = al->fb_data;
1582f5c0c917SAndreyChurbanov return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1583ebf1830bSJonathan Peyton } // else ptr == NULL;
1584ebf1830bSJonathan Peyton } else {
1585ebf1830bSJonathan Peyton // pool has enough space
1586ebf1830bSJonathan Peyton ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1587ebf1830bSJonathan Peyton if (ptr == NULL) {
1588ad24cf2aSKelvin Li if (al->fb == omp_atv_default_mem_fb) {
1589ebf1830bSJonathan Peyton al = (kmp_allocator_t *)omp_default_mem_alloc;
1590ebf1830bSJonathan Peyton ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1591ad24cf2aSKelvin Li } else if (al->fb == omp_atv_abort_fb) {
1592ebf1830bSJonathan Peyton KMP_ASSERT(0); // abort fallback requested
1593ad24cf2aSKelvin Li } else if (al->fb == omp_atv_allocator_fb) {
1594ebf1830bSJonathan Peyton KMP_ASSERT(al != al->fb_data);
1595ebf1830bSJonathan Peyton al = al->fb_data;
1596f5c0c917SAndreyChurbanov return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1597ebf1830bSJonathan Peyton }
1598ebf1830bSJonathan Peyton }
1599ebf1830bSJonathan Peyton }
1600ebf1830bSJonathan Peyton } else {
1601ebf1830bSJonathan Peyton // custom allocator, pool size not requested
1602ebf1830bSJonathan Peyton ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1603ebf1830bSJonathan Peyton if (ptr == NULL) {
1604ad24cf2aSKelvin Li if (al->fb == omp_atv_default_mem_fb) {
1605ebf1830bSJonathan Peyton al = (kmp_allocator_t *)omp_default_mem_alloc;
1606ebf1830bSJonathan Peyton ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1607ad24cf2aSKelvin Li } else if (al->fb == omp_atv_abort_fb) {
1608ebf1830bSJonathan Peyton KMP_ASSERT(0); // abort fallback requested
1609ad24cf2aSKelvin Li } else if (al->fb == omp_atv_allocator_fb) {
1610ebf1830bSJonathan Peyton KMP_ASSERT(al != al->fb_data);
1611ebf1830bSJonathan Peyton al = al->fb_data;
1612f5c0c917SAndreyChurbanov return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1613ebf1830bSJonathan Peyton }
1614ebf1830bSJonathan Peyton }
1615ebf1830bSJonathan Peyton }
1616ebf1830bSJonathan Peyton } else if (allocator < kmp_max_mem_alloc) {
1617b6c2f538SHansang Bae if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1618b6c2f538SHansang Bae // Use size input directly as the memory may not be accessible on host.
1619b6c2f538SHansang Bae // Use default device for now.
1620b6c2f538SHansang Bae if (__kmp_target_mem_available) {
1621b6c2f538SHansang Bae kmp_int32 device =
1622b6c2f538SHansang Bae __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1623b6c2f538SHansang Bae if (allocator == llvm_omp_target_host_mem_alloc)
1624b6c2f538SHansang Bae ptr = kmp_target_alloc_host(size, device);
1625b6c2f538SHansang Bae else if (allocator == llvm_omp_target_shared_mem_alloc)
1626b6c2f538SHansang Bae ptr = kmp_target_alloc_shared(size, device);
1627b6c2f538SHansang Bae else // allocator == llvm_omp_target_device_mem_alloc
1628b6c2f538SHansang Bae ptr = kmp_target_alloc_device(size, device);
1629b6c2f538SHansang Bae }
1630b6c2f538SHansang Bae return ptr;
1631b6c2f538SHansang Bae }
1632b6c2f538SHansang Bae
1633ebf1830bSJonathan Peyton // pre-defined allocator
1634ebf1830bSJonathan Peyton if (allocator == omp_high_bw_mem_alloc) {
1635ebf1830bSJonathan Peyton // ptr = NULL;
1636bba3a82bSHansang Bae } else if (allocator == omp_large_cap_mem_alloc) {
1637bba3a82bSHansang Bae // warnings?
1638ebf1830bSJonathan Peyton } else {
1639ebf1830bSJonathan Peyton ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1640ebf1830bSJonathan Peyton }
1641b6c2f538SHansang Bae } else if (KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1642b6c2f538SHansang Bae if (__kmp_target_mem_available) {
1643b6c2f538SHansang Bae kmp_int32 device =
1644b6c2f538SHansang Bae __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1645b6c2f538SHansang Bae if (al->memspace == llvm_omp_target_host_mem_space)
1646b6c2f538SHansang Bae ptr = kmp_target_alloc_host(size, device);
1647b6c2f538SHansang Bae else if (al->memspace == llvm_omp_target_shared_mem_space)
1648b6c2f538SHansang Bae ptr = kmp_target_alloc_shared(size, device);
1649b6c2f538SHansang Bae else // al->memspace == llvm_omp_target_device_mem_space
1650b6c2f538SHansang Bae ptr = kmp_target_alloc_device(size, device);
1651b6c2f538SHansang Bae }
1652b6c2f538SHansang Bae return ptr;
1653ebf1830bSJonathan Peyton } else if (al->pool_size > 0) {
1654ebf1830bSJonathan Peyton // custom allocator with pool size requested
1655ebf1830bSJonathan Peyton kmp_uint64 used =
1656ebf1830bSJonathan Peyton KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1657ebf1830bSJonathan Peyton if (used + desc.size_a > al->pool_size) {
1658ebf1830bSJonathan Peyton // not enough space, need to go fallback path
1659ebf1830bSJonathan Peyton KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1660ad24cf2aSKelvin Li if (al->fb == omp_atv_default_mem_fb) {
1661ebf1830bSJonathan Peyton al = (kmp_allocator_t *)omp_default_mem_alloc;
1662ebf1830bSJonathan Peyton ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1663ad24cf2aSKelvin Li } else if (al->fb == omp_atv_abort_fb) {
1664ebf1830bSJonathan Peyton KMP_ASSERT(0); // abort fallback requested
1665ad24cf2aSKelvin Li } else if (al->fb == omp_atv_allocator_fb) {
1666ebf1830bSJonathan Peyton KMP_ASSERT(al != al->fb_data);
1667ebf1830bSJonathan Peyton al = al->fb_data;
1668f5c0c917SAndreyChurbanov return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1669ebf1830bSJonathan Peyton } // else ptr == NULL;
1670ebf1830bSJonathan Peyton } else {
1671ebf1830bSJonathan Peyton // pool has enough space
1672ebf1830bSJonathan Peyton ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1673ad24cf2aSKelvin Li if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1674ebf1830bSJonathan Peyton KMP_ASSERT(0); // abort fallback requested
1675ebf1830bSJonathan Peyton } // no sense to look for another fallback because of same internal alloc
1676ebf1830bSJonathan Peyton }
1677ebf1830bSJonathan Peyton } else {
1678ebf1830bSJonathan Peyton // custom allocator, pool size not requested
1679ebf1830bSJonathan Peyton ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1680ad24cf2aSKelvin Li if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1681ebf1830bSJonathan Peyton KMP_ASSERT(0); // abort fallback requested
1682ebf1830bSJonathan Peyton } // no sense to look for another fallback because of same internal alloc
1683ebf1830bSJonathan Peyton }
1684f5c0c917SAndreyChurbanov KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
168592ca6188SJonathan Peyton if (ptr == NULL)
168692ca6188SJonathan Peyton return NULL;
168792ca6188SJonathan Peyton
168892ca6188SJonathan Peyton addr = (kmp_uintptr_t)ptr;
1689ebf1830bSJonathan Peyton addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
169092ca6188SJonathan Peyton addr_descr = addr_align - sz_desc;
169192ca6188SJonathan Peyton
169292ca6188SJonathan Peyton desc.ptr_alloc = ptr;
169392ca6188SJonathan Peyton desc.ptr_align = (void *)addr_align;
1694ebf1830bSJonathan Peyton desc.allocator = al;
169592ca6188SJonathan Peyton *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
169692ca6188SJonathan Peyton KMP_MB();
169792ca6188SJonathan Peyton
169892ca6188SJonathan Peyton return desc.ptr_align;
169992ca6188SJonathan Peyton }
170092ca6188SJonathan Peyton
__kmp_calloc(int gtid,size_t algn,size_t nmemb,size_t size,omp_allocator_handle_t allocator)1701f5c0c917SAndreyChurbanov void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
1702938f1b85SNawrin Sultana omp_allocator_handle_t allocator) {
1703938f1b85SNawrin Sultana void *ptr = NULL;
1704938f1b85SNawrin Sultana kmp_allocator_t *al;
1705938f1b85SNawrin Sultana KMP_DEBUG_ASSERT(__kmp_init_serial);
1706938f1b85SNawrin Sultana
1707938f1b85SNawrin Sultana if (allocator == omp_null_allocator)
1708938f1b85SNawrin Sultana allocator = __kmp_threads[gtid]->th.th_def_allocator;
1709938f1b85SNawrin Sultana
1710f5c0c917SAndreyChurbanov al = RCAST(kmp_allocator_t *, allocator);
1711938f1b85SNawrin Sultana
1712938f1b85SNawrin Sultana if (nmemb == 0 || size == 0)
1713938f1b85SNawrin Sultana return ptr;
1714938f1b85SNawrin Sultana
1715938f1b85SNawrin Sultana if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
1716938f1b85SNawrin Sultana if (al->fb == omp_atv_abort_fb) {
1717938f1b85SNawrin Sultana KMP_ASSERT(0);
1718938f1b85SNawrin Sultana }
1719938f1b85SNawrin Sultana return ptr;
1720938f1b85SNawrin Sultana }
1721938f1b85SNawrin Sultana
1722f5c0c917SAndreyChurbanov ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
1723938f1b85SNawrin Sultana
1724938f1b85SNawrin Sultana if (ptr) {
1725938f1b85SNawrin Sultana memset(ptr, 0x00, nmemb * size);
1726938f1b85SNawrin Sultana }
1727938f1b85SNawrin Sultana return ptr;
1728938f1b85SNawrin Sultana }
1729938f1b85SNawrin Sultana
__kmp_realloc(int gtid,void * ptr,size_t size,omp_allocator_handle_t allocator,omp_allocator_handle_t free_allocator)1730f5c0c917SAndreyChurbanov void *__kmp_realloc(int gtid, void *ptr, size_t size,
17315439db05SNawrin Sultana omp_allocator_handle_t allocator,
17325439db05SNawrin Sultana omp_allocator_handle_t free_allocator) {
17335439db05SNawrin Sultana void *nptr = NULL;
17345439db05SNawrin Sultana KMP_DEBUG_ASSERT(__kmp_init_serial);
17355439db05SNawrin Sultana
17365439db05SNawrin Sultana if (size == 0) {
17375439db05SNawrin Sultana if (ptr != NULL)
1738f5c0c917SAndreyChurbanov ___kmpc_free(gtid, ptr, free_allocator);
17395439db05SNawrin Sultana return nptr;
17405439db05SNawrin Sultana }
17415439db05SNawrin Sultana
1742f5c0c917SAndreyChurbanov nptr = __kmp_alloc(gtid, 0, size, allocator);
17435439db05SNawrin Sultana
17445439db05SNawrin Sultana if (nptr != NULL && ptr != NULL) {
17455439db05SNawrin Sultana kmp_mem_desc_t desc;
17465439db05SNawrin Sultana kmp_uintptr_t addr_align; // address to return to caller
17475439db05SNawrin Sultana kmp_uintptr_t addr_descr; // address of memory block descriptor
17485439db05SNawrin Sultana
17495439db05SNawrin Sultana addr_align = (kmp_uintptr_t)ptr;
17505439db05SNawrin Sultana addr_descr = addr_align - sizeof(kmp_mem_desc_t);
17515439db05SNawrin Sultana desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
17525439db05SNawrin Sultana
17535439db05SNawrin Sultana KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
17545439db05SNawrin Sultana KMP_DEBUG_ASSERT(desc.size_orig > 0);
17555439db05SNawrin Sultana KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
17565439db05SNawrin Sultana KMP_MEMCPY((char *)nptr, (char *)ptr,
17575439db05SNawrin Sultana (size_t)((size < desc.size_orig) ? size : desc.size_orig));
17585439db05SNawrin Sultana }
17595439db05SNawrin Sultana
17605439db05SNawrin Sultana if (nptr != NULL) {
1761f5c0c917SAndreyChurbanov ___kmpc_free(gtid, ptr, free_allocator);
17625439db05SNawrin Sultana }
17635439db05SNawrin Sultana
17645439db05SNawrin Sultana return nptr;
17655439db05SNawrin Sultana }
17665439db05SNawrin Sultana
___kmpc_free(int gtid,void * ptr,omp_allocator_handle_t allocator)1767f5c0c917SAndreyChurbanov void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
176892ca6188SJonathan Peyton if (ptr == NULL)
176992ca6188SJonathan Peyton return;
177092ca6188SJonathan Peyton
1771ebf1830bSJonathan Peyton kmp_allocator_t *al;
1772ebf1830bSJonathan Peyton omp_allocator_handle_t oal;
1773ebf1830bSJonathan Peyton al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
177492ca6188SJonathan Peyton kmp_mem_desc_t desc;
177592ca6188SJonathan Peyton kmp_uintptr_t addr_align; // address to return to caller
177692ca6188SJonathan Peyton kmp_uintptr_t addr_descr; // address of memory block descriptor
1777b6c2f538SHansang Bae if (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
1778b6c2f538SHansang Bae (allocator > kmp_max_mem_alloc &&
1779b6c2f538SHansang Bae KMP_IS_TARGET_MEM_SPACE(al->memspace))) {
1780b6c2f538SHansang Bae KMP_DEBUG_ASSERT(kmp_target_free);
1781b6c2f538SHansang Bae kmp_int32 device =
1782b6c2f538SHansang Bae __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1783b6c2f538SHansang Bae kmp_target_free(ptr, device);
1784b6c2f538SHansang Bae return;
1785b6c2f538SHansang Bae }
178692ca6188SJonathan Peyton
178792ca6188SJonathan Peyton addr_align = (kmp_uintptr_t)ptr;
178892ca6188SJonathan Peyton addr_descr = addr_align - sizeof(kmp_mem_desc_t);
178992ca6188SJonathan Peyton desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
179092ca6188SJonathan Peyton
179192ca6188SJonathan Peyton KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
179292ca6188SJonathan Peyton if (allocator) {
1793ebf1830bSJonathan Peyton KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
179492ca6188SJonathan Peyton }
1795ebf1830bSJonathan Peyton al = desc.allocator;
1796ebf1830bSJonathan Peyton oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
1797ebf1830bSJonathan Peyton KMP_DEBUG_ASSERT(al);
179892ca6188SJonathan Peyton
1799ebf1830bSJonathan Peyton if (__kmp_memkind_available) {
1800ebf1830bSJonathan Peyton if (oal < kmp_max_mem_alloc) {
1801ebf1830bSJonathan Peyton // pre-defined allocator
1802ebf1830bSJonathan Peyton if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1803ebf1830bSJonathan Peyton kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
1804bba3a82bSHansang Bae } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1805bba3a82bSHansang Bae kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
1806ebf1830bSJonathan Peyton } else {
1807ebf1830bSJonathan Peyton kmp_mk_free(*mk_default, desc.ptr_alloc);
1808ebf1830bSJonathan Peyton }
1809ebf1830bSJonathan Peyton } else {
1810ebf1830bSJonathan Peyton if (al->pool_size > 0) { // custom allocator with pool size requested
1811ebf1830bSJonathan Peyton kmp_uint64 used =
1812ebf1830bSJonathan Peyton KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1813ebf1830bSJonathan Peyton (void)used; // to suppress compiler warning
1814ebf1830bSJonathan Peyton KMP_DEBUG_ASSERT(used >= desc.size_a);
1815ebf1830bSJonathan Peyton }
1816ebf1830bSJonathan Peyton kmp_mk_free(*al->memkind, desc.ptr_alloc);
1817ebf1830bSJonathan Peyton }
1818ebf1830bSJonathan Peyton } else {
1819ebf1830bSJonathan Peyton if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
1820ebf1830bSJonathan Peyton kmp_uint64 used =
1821ebf1830bSJonathan Peyton KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1822ebf1830bSJonathan Peyton (void)used; // to suppress compiler warning
1823ebf1830bSJonathan Peyton KMP_DEBUG_ASSERT(used >= desc.size_a);
1824ebf1830bSJonathan Peyton }
1825ebf1830bSJonathan Peyton __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
182692ca6188SJonathan Peyton }
182792ca6188SJonathan Peyton }
182892ca6188SJonathan Peyton
18293041982dSJonathan Peyton /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
18303041982dSJonathan Peyton memory leaks, but it may be useful for debugging memory corruptions, used
18313041982dSJonathan Peyton freed pointers, etc. */
18327cc577a4SJonathan Peyton /* #define LEAK_MEMORY */
18337cc577a4SJonathan Peyton struct kmp_mem_descr { // Memory block descriptor.
18347cc577a4SJonathan Peyton void *ptr_allocated; // Pointer returned by malloc(), subject for free().
18357cc577a4SJonathan Peyton size_t size_allocated; // Size of allocated memory block.
18367cc577a4SJonathan Peyton void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
18377cc577a4SJonathan Peyton size_t size_aligned; // Size of aligned memory block.
18387cc577a4SJonathan Peyton };
18397cc577a4SJonathan Peyton typedef struct kmp_mem_descr kmp_mem_descr_t;
18407cc577a4SJonathan Peyton
18413041982dSJonathan Peyton /* Allocate memory on requested boundary, fill allocated memory with 0x00.
18423041982dSJonathan Peyton NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
18433041982dSJonathan Peyton error. Must use __kmp_free when freeing memory allocated by this routine! */
___kmp_allocate_align(size_t size,size_t alignment KMP_SRC_LOC_DECL)18443041982dSJonathan Peyton static void *___kmp_allocate_align(size_t size,
18453041982dSJonathan Peyton size_t alignment KMP_SRC_LOC_DECL) {
18463041982dSJonathan Peyton /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
18473041982dSJonathan Peyton requested to return properly aligned pointer. Original pointer returned
18483041982dSJonathan Peyton by malloc() and size of allocated block is saved in descriptor just
18493041982dSJonathan Peyton before the aligned pointer. This information used by __kmp_free() -- it
18503041982dSJonathan Peyton has to pass to free() original pointer, not aligned one.
18517cc577a4SJonathan Peyton
18527cc577a4SJonathan Peyton +---------+------------+-----------------------------------+---------+
18537cc577a4SJonathan Peyton | padding | descriptor | aligned block | padding |
18547cc577a4SJonathan Peyton +---------+------------+-----------------------------------+---------+
18557cc577a4SJonathan Peyton ^ ^
18567cc577a4SJonathan Peyton | |
18577cc577a4SJonathan Peyton | +- Aligned pointer returned to caller
18587cc577a4SJonathan Peyton +- Pointer returned by malloc()
18597cc577a4SJonathan Peyton
18603041982dSJonathan Peyton Aligned block is filled with zeros, paddings are filled with 0xEF. */
18617cc577a4SJonathan Peyton
18627cc577a4SJonathan Peyton kmp_mem_descr_t descr;
18637cc577a4SJonathan Peyton kmp_uintptr_t addr_allocated; // Address returned by malloc().
18647cc577a4SJonathan Peyton kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
18657cc577a4SJonathan Peyton kmp_uintptr_t addr_descr; // Address of memory block descriptor.
18667cc577a4SJonathan Peyton
18673041982dSJonathan Peyton KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
18683041982dSJonathan Peyton (int)size, (int)alignment KMP_SRC_LOC_PARM));
18697cc577a4SJonathan Peyton
18707cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
18717cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
18727cc577a4SJonathan Peyton // Make sure kmp_uintptr_t is enough to store addresses.
18737cc577a4SJonathan Peyton
18747cc577a4SJonathan Peyton descr.size_aligned = size;
18753041982dSJonathan Peyton descr.size_allocated =
18763041982dSJonathan Peyton descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
18777cc577a4SJonathan Peyton
18787cc577a4SJonathan Peyton #if KMP_DEBUG
18797cc577a4SJonathan Peyton descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
18807cc577a4SJonathan Peyton #else
18817cc577a4SJonathan Peyton descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
18827cc577a4SJonathan Peyton #endif
18833041982dSJonathan Peyton KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
18843041982dSJonathan Peyton descr.ptr_allocated));
18857cc577a4SJonathan Peyton if (descr.ptr_allocated == NULL) {
18867cc577a4SJonathan Peyton KMP_FATAL(OutOfHeapMemory);
1887bd3a7633SJonathan Peyton }
18887cc577a4SJonathan Peyton
18897cc577a4SJonathan Peyton addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
18907cc577a4SJonathan Peyton addr_aligned =
18913041982dSJonathan Peyton (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
18927cc577a4SJonathan Peyton addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
18937cc577a4SJonathan Peyton
18947cc577a4SJonathan Peyton descr.ptr_aligned = (void *)addr_aligned;
18957cc577a4SJonathan Peyton
18963041982dSJonathan Peyton KE_TRACE(26, (" ___kmp_allocate_align: "
18977cc577a4SJonathan Peyton "ptr_allocated=%p, size_allocated=%d, "
18987cc577a4SJonathan Peyton "ptr_aligned=%p, size_aligned=%d\n",
18993041982dSJonathan Peyton descr.ptr_allocated, (int)descr.size_allocated,
19003041982dSJonathan Peyton descr.ptr_aligned, (int)descr.size_aligned));
19017cc577a4SJonathan Peyton
19027cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
19037cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
19043041982dSJonathan Peyton KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
19053041982dSJonathan Peyton addr_allocated + descr.size_allocated);
19067cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
19077cc577a4SJonathan Peyton #ifdef KMP_DEBUG
19087cc577a4SJonathan Peyton memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
19097cc577a4SJonathan Peyton // Fill allocated memory block with 0xEF.
19107cc577a4SJonathan Peyton #endif
19117cc577a4SJonathan Peyton memset(descr.ptr_aligned, 0x00, descr.size_aligned);
19123041982dSJonathan Peyton // Fill the aligned memory block (which is intended for using by caller) with
19133041982dSJonathan Peyton // 0x00. Do not
19143041982dSJonathan Peyton // put this filling under KMP_DEBUG condition! Many callers expect zeroed
19153041982dSJonathan Peyton // memory. (Padding
19167cc577a4SJonathan Peyton // bytes remain filled with 0xEF in debugging library.)
19177cc577a4SJonathan Peyton *((kmp_mem_descr_t *)addr_descr) = descr;
19187cc577a4SJonathan Peyton
19197cc577a4SJonathan Peyton KMP_MB();
19207cc577a4SJonathan Peyton
19217cc577a4SJonathan Peyton KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
19227cc577a4SJonathan Peyton return descr.ptr_aligned;
19237cc577a4SJonathan Peyton } // func ___kmp_allocate_align
19247cc577a4SJonathan Peyton
19253041982dSJonathan Peyton /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
19267cc577a4SJonathan Peyton Do not call this func directly! Use __kmp_allocate macro instead.
19273041982dSJonathan Peyton NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
19283041982dSJonathan Peyton error. Must use __kmp_free when freeing memory allocated by this routine! */
___kmp_allocate(size_t size KMP_SRC_LOC_DECL)19293041982dSJonathan Peyton void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
19307cc577a4SJonathan Peyton void *ptr;
19313041982dSJonathan Peyton KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
19323041982dSJonathan Peyton (int)size KMP_SRC_LOC_PARM));
19337cc577a4SJonathan Peyton ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
19347cc577a4SJonathan Peyton KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
19357cc577a4SJonathan Peyton return ptr;
19367cc577a4SJonathan Peyton } // func ___kmp_allocate
19377cc577a4SJonathan Peyton
19383041982dSJonathan Peyton /* Allocate memory on page boundary, fill allocated memory with 0x00.
19397cc577a4SJonathan Peyton Does not call this func directly! Use __kmp_page_allocate macro instead.
19403041982dSJonathan Peyton NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
19413041982dSJonathan Peyton error. Must use __kmp_free when freeing memory allocated by this routine! */
___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL)19423041982dSJonathan Peyton void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
19437cc577a4SJonathan Peyton int page_size = 8 * 1024;
19447cc577a4SJonathan Peyton void *ptr;
19457cc577a4SJonathan Peyton
19463041982dSJonathan Peyton KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
19473041982dSJonathan Peyton (int)size KMP_SRC_LOC_PARM));
19487cc577a4SJonathan Peyton ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
19497cc577a4SJonathan Peyton KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
19507cc577a4SJonathan Peyton return ptr;
19517cc577a4SJonathan Peyton } // ___kmp_page_allocate
19527cc577a4SJonathan Peyton
19533041982dSJonathan Peyton /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
19543041982dSJonathan Peyton In debug mode, fill the memory block with 0xEF before call to free(). */
___kmp_free(void * ptr KMP_SRC_LOC_DECL)19553041982dSJonathan Peyton void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
19567cc577a4SJonathan Peyton kmp_mem_descr_t descr;
19578b81524cSAndreyChurbanov #if KMP_DEBUG
19587cc577a4SJonathan Peyton kmp_uintptr_t addr_allocated; // Address returned by malloc().
19597cc577a4SJonathan Peyton kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
19608b81524cSAndreyChurbanov #endif
19613041982dSJonathan Peyton KE_TRACE(25,
19623041982dSJonathan Peyton ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
19637cc577a4SJonathan Peyton KMP_ASSERT(ptr != NULL);
19647cc577a4SJonathan Peyton
19657cc577a4SJonathan Peyton descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
19667cc577a4SJonathan Peyton
19677cc577a4SJonathan Peyton KE_TRACE(26, (" __kmp_free: "
19687cc577a4SJonathan Peyton "ptr_allocated=%p, size_allocated=%d, "
19697cc577a4SJonathan Peyton "ptr_aligned=%p, size_aligned=%d\n",
19707cc577a4SJonathan Peyton descr.ptr_allocated, (int)descr.size_allocated,
19717cc577a4SJonathan Peyton descr.ptr_aligned, (int)descr.size_aligned));
19728b81524cSAndreyChurbanov #if KMP_DEBUG
19737cc577a4SJonathan Peyton addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
19747cc577a4SJonathan Peyton addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
19757cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
19767cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
19777cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
19787cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
19793041982dSJonathan Peyton KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
19803041982dSJonathan Peyton addr_allocated + descr.size_allocated);
19817cc577a4SJonathan Peyton memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
19827cc577a4SJonathan Peyton // Fill memory block with 0xEF, it helps catch using freed memory.
19837cc577a4SJonathan Peyton #endif
19847cc577a4SJonathan Peyton
19857cc577a4SJonathan Peyton #ifndef LEAK_MEMORY
19867cc577a4SJonathan Peyton KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
19877cc577a4SJonathan Peyton #ifdef KMP_DEBUG
19887cc577a4SJonathan Peyton _free_src_loc(descr.ptr_allocated, _file_, _line_);
19897cc577a4SJonathan Peyton #else
19907cc577a4SJonathan Peyton free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
19917cc577a4SJonathan Peyton #endif
19927cc577a4SJonathan Peyton #endif
19937cc577a4SJonathan Peyton KMP_MB();
19947cc577a4SJonathan Peyton KE_TRACE(25, ("<- __kmp_free() returns\n"));
19957cc577a4SJonathan Peyton } // func ___kmp_free
19967cc577a4SJonathan Peyton
19977cc577a4SJonathan Peyton #if USE_FAST_MEMORY == 3
19987cc577a4SJonathan Peyton // Allocate fast memory by first scanning the thread's free lists
19997cc577a4SJonathan Peyton // If a chunk the right size exists, grab it off the free list.
20007cc577a4SJonathan Peyton // Otherwise allocate normally using kmp_thread_malloc.
20017cc577a4SJonathan Peyton
20027cc577a4SJonathan Peyton // AC: How to choose the limit? Just get 16 for now...
20037cc577a4SJonathan Peyton #define KMP_FREE_LIST_LIMIT 16
20047cc577a4SJonathan Peyton
20057cc577a4SJonathan Peyton // Always use 128 bytes for determining buckets for caching memory blocks
20067cc577a4SJonathan Peyton #define DCACHE_LINE 128
20077cc577a4SJonathan Peyton
___kmp_fast_allocate(kmp_info_t * this_thr,size_t size KMP_SRC_LOC_DECL)20083041982dSJonathan Peyton void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
20097cc577a4SJonathan Peyton void *ptr;
20106b316febSTerry Wilmarth size_t num_lines, idx;
20117cc577a4SJonathan Peyton int index;
20127cc577a4SJonathan Peyton void *alloc_ptr;
20137cc577a4SJonathan Peyton size_t alloc_size;
20147cc577a4SJonathan Peyton kmp_mem_descr_t *descr;
20157cc577a4SJonathan Peyton
20167cc577a4SJonathan Peyton KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
20177cc577a4SJonathan Peyton __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
20187cc577a4SJonathan Peyton
20197cc577a4SJonathan Peyton num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
20207cc577a4SJonathan Peyton idx = num_lines - 1;
20217cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(idx >= 0);
20227cc577a4SJonathan Peyton if (idx < 2) {
20237cc577a4SJonathan Peyton index = 0; // idx is [ 0, 1 ], use first free list
20247cc577a4SJonathan Peyton num_lines = 2; // 1, 2 cache lines or less than cache line
20257cc577a4SJonathan Peyton } else if ((idx >>= 2) == 0) {
20267cc577a4SJonathan Peyton index = 1; // idx is [ 2, 3 ], use second free list
20277cc577a4SJonathan Peyton num_lines = 4; // 3, 4 cache lines
20287cc577a4SJonathan Peyton } else if ((idx >>= 2) == 0) {
20297cc577a4SJonathan Peyton index = 2; // idx is [ 4, 15 ], use third free list
20307cc577a4SJonathan Peyton num_lines = 16; // 5, 6, ..., 16 cache lines
20317cc577a4SJonathan Peyton } else if ((idx >>= 2) == 0) {
20327cc577a4SJonathan Peyton index = 3; // idx is [ 16, 63 ], use fourth free list
20337cc577a4SJonathan Peyton num_lines = 64; // 17, 18, ..., 64 cache lines
20347cc577a4SJonathan Peyton } else {
20357cc577a4SJonathan Peyton goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
20367cc577a4SJonathan Peyton }
20377cc577a4SJonathan Peyton
20387cc577a4SJonathan Peyton ptr = this_thr->th.th_free_lists[index].th_free_list_self;
20397cc577a4SJonathan Peyton if (ptr != NULL) {
20407cc577a4SJonathan Peyton // pop the head of no-sync free list
20417cc577a4SJonathan Peyton this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2042309b00a4SShilei Tian KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2043309b00a4SShilei Tian sizeof(kmp_mem_descr_t)))
20443041982dSJonathan Peyton ->ptr_aligned);
20457cc577a4SJonathan Peyton goto end;
2046bd3a7633SJonathan Peyton }
20477cc577a4SJonathan Peyton ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
20487cc577a4SJonathan Peyton if (ptr != NULL) {
20493041982dSJonathan Peyton // no-sync free list is empty, use sync free list (filled in by other
20503041982dSJonathan Peyton // threads only)
20517cc577a4SJonathan Peyton // pop the head of the sync free list, push NULL instead
20527cc577a4SJonathan Peyton while (!KMP_COMPARE_AND_STORE_PTR(
20535ba90c79SAndrey Churbanov &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
20547cc577a4SJonathan Peyton KMP_CPU_PAUSE();
20557cc577a4SJonathan Peyton ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
20567cc577a4SJonathan Peyton }
20573041982dSJonathan Peyton // push the rest of chain into no-sync free list (can be NULL if there was
20583041982dSJonathan Peyton // the only block)
20597cc577a4SJonathan Peyton this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2060309b00a4SShilei Tian KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2061309b00a4SShilei Tian sizeof(kmp_mem_descr_t)))
20623041982dSJonathan Peyton ->ptr_aligned);
20637cc577a4SJonathan Peyton goto end;
20647cc577a4SJonathan Peyton }
20657cc577a4SJonathan Peyton
20667cc577a4SJonathan Peyton alloc_call:
20677cc577a4SJonathan Peyton // haven't found block in the free lists, thus allocate it
20687cc577a4SJonathan Peyton size = num_lines * DCACHE_LINE;
20697cc577a4SJonathan Peyton
20707cc577a4SJonathan Peyton alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
20713041982dSJonathan Peyton KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
20723041982dSJonathan Peyton "alloc_size %d\n",
20737cc577a4SJonathan Peyton __kmp_gtid_from_thread(this_thr), alloc_size));
20747cc577a4SJonathan Peyton alloc_ptr = bget(this_thr, (bufsize)alloc_size);
20757cc577a4SJonathan Peyton
20767cc577a4SJonathan Peyton // align ptr to DCACHE_LINE
20773041982dSJonathan Peyton ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
20783041982dSJonathan Peyton DCACHE_LINE) &
20793041982dSJonathan Peyton ~(DCACHE_LINE - 1));
20807cc577a4SJonathan Peyton descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
20817cc577a4SJonathan Peyton
20827cc577a4SJonathan Peyton descr->ptr_allocated = alloc_ptr; // remember allocated pointer
20837cc577a4SJonathan Peyton // we don't need size_allocated
20847cc577a4SJonathan Peyton descr->ptr_aligned = (void *)this_thr; // remember allocating thread
20857cc577a4SJonathan Peyton // (it is already saved in bget buffer,
20867cc577a4SJonathan Peyton // but we may want to use another allocator in future)
20877cc577a4SJonathan Peyton descr->size_aligned = size;
20887cc577a4SJonathan Peyton
20897cc577a4SJonathan Peyton end:
20907cc577a4SJonathan Peyton KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
20917cc577a4SJonathan Peyton __kmp_gtid_from_thread(this_thr), ptr));
20927cc577a4SJonathan Peyton return ptr;
20937cc577a4SJonathan Peyton } // func __kmp_fast_allocate
20947cc577a4SJonathan Peyton
20957cc577a4SJonathan Peyton // Free fast memory and place it on the thread's free list if it is of
20967cc577a4SJonathan Peyton // the correct size.
___kmp_fast_free(kmp_info_t * this_thr,void * ptr KMP_SRC_LOC_DECL)20973041982dSJonathan Peyton void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
20987cc577a4SJonathan Peyton kmp_mem_descr_t *descr;
20997cc577a4SJonathan Peyton kmp_info_t *alloc_thr;
21007cc577a4SJonathan Peyton size_t size;
21017cc577a4SJonathan Peyton size_t idx;
21027cc577a4SJonathan Peyton int index;
21037cc577a4SJonathan Peyton
21047cc577a4SJonathan Peyton KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
21057cc577a4SJonathan Peyton __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
21067cc577a4SJonathan Peyton KMP_ASSERT(ptr != NULL);
21077cc577a4SJonathan Peyton
21087cc577a4SJonathan Peyton descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
21097cc577a4SJonathan Peyton
21107cc577a4SJonathan Peyton KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
21117cc577a4SJonathan Peyton (int)descr->size_aligned));
21127cc577a4SJonathan Peyton
21137cc577a4SJonathan Peyton size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
21147cc577a4SJonathan Peyton
21157cc577a4SJonathan Peyton idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
21167cc577a4SJonathan Peyton if (idx == size) {
21177cc577a4SJonathan Peyton index = 0; // 2 cache lines
21187cc577a4SJonathan Peyton } else if ((idx <<= 1) == size) {
21197cc577a4SJonathan Peyton index = 1; // 4 cache lines
21207cc577a4SJonathan Peyton } else if ((idx <<= 2) == size) {
21217cc577a4SJonathan Peyton index = 2; // 16 cache lines
21227cc577a4SJonathan Peyton } else if ((idx <<= 2) == size) {
21237cc577a4SJonathan Peyton index = 3; // 64 cache lines
21247cc577a4SJonathan Peyton } else {
21257cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
21267cc577a4SJonathan Peyton goto free_call; // 65 or more cache lines ( > 8KB )
21277cc577a4SJonathan Peyton }
21287cc577a4SJonathan Peyton
21297cc577a4SJonathan Peyton alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
21307cc577a4SJonathan Peyton if (alloc_thr == this_thr) {
21317cc577a4SJonathan Peyton // push block to self no-sync free list, linking previous head (LIFO)
21327cc577a4SJonathan Peyton *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
21337cc577a4SJonathan Peyton this_thr->th.th_free_lists[index].th_free_list_self = ptr;
21347cc577a4SJonathan Peyton } else {
21357cc577a4SJonathan Peyton void *head = this_thr->th.th_free_lists[index].th_free_list_other;
21367cc577a4SJonathan Peyton if (head == NULL) {
21377cc577a4SJonathan Peyton // Create new free list
21387cc577a4SJonathan Peyton this_thr->th.th_free_lists[index].th_free_list_other = ptr;
21397cc577a4SJonathan Peyton *((void **)ptr) = NULL; // mark the tail of the list
21407cc577a4SJonathan Peyton descr->size_allocated = (size_t)1; // head of the list keeps its length
21417cc577a4SJonathan Peyton } else {
21427cc577a4SJonathan Peyton // need to check existed "other" list's owner thread and size of queue
21433041982dSJonathan Peyton kmp_mem_descr_t *dsc =
21443041982dSJonathan Peyton (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
21453041982dSJonathan Peyton // allocating thread, same for all queue nodes
21463041982dSJonathan Peyton kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
21473041982dSJonathan Peyton size_t q_sz =
21483041982dSJonathan Peyton dsc->size_allocated + 1; // new size in case we add current task
21497cc577a4SJonathan Peyton if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
21507cc577a4SJonathan Peyton // we can add current task to "other" list, no sync needed
21517cc577a4SJonathan Peyton *((void **)ptr) = head;
21527cc577a4SJonathan Peyton descr->size_allocated = q_sz;
21537cc577a4SJonathan Peyton this_thr->th.th_free_lists[index].th_free_list_other = ptr;
21547cc577a4SJonathan Peyton } else {
21557cc577a4SJonathan Peyton // either queue blocks owner is changing or size limit exceeded
215642016791SKazuaki Ishizaki // return old queue to allocating thread (q_th) synchronously,
21577cc577a4SJonathan Peyton // and start new list for alloc_thr's tasks
21587cc577a4SJonathan Peyton void *old_ptr;
21597cc577a4SJonathan Peyton void *tail = head;
21607cc577a4SJonathan Peyton void *next = *((void **)head);
21617cc577a4SJonathan Peyton while (next != NULL) {
21627cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(
21637cc577a4SJonathan Peyton // queue size should decrease by 1 each step through the list
21643041982dSJonathan Peyton ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
21653041982dSJonathan Peyton ->size_allocated +
21663041982dSJonathan Peyton 1 ==
21673041982dSJonathan Peyton ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
21683041982dSJonathan Peyton ->size_allocated);
21697cc577a4SJonathan Peyton tail = next; // remember tail node
21707cc577a4SJonathan Peyton next = *((void **)next);
21717cc577a4SJonathan Peyton }
21727cc577a4SJonathan Peyton KMP_DEBUG_ASSERT(q_th != NULL);
21737cc577a4SJonathan Peyton // push block to owner's sync free list
21747cc577a4SJonathan Peyton old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
21757cc577a4SJonathan Peyton /* the next pointer must be set before setting free_list to ptr to avoid
21767cc577a4SJonathan Peyton exposing a broken list to other threads, even for an instant. */
21777cc577a4SJonathan Peyton *((void **)tail) = old_ptr;
21787cc577a4SJonathan Peyton
21797cc577a4SJonathan Peyton while (!KMP_COMPARE_AND_STORE_PTR(
21803041982dSJonathan Peyton &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
21817cc577a4SJonathan Peyton KMP_CPU_PAUSE();
21827cc577a4SJonathan Peyton old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
21837cc577a4SJonathan Peyton *((void **)tail) = old_ptr;
21847cc577a4SJonathan Peyton }
21857cc577a4SJonathan Peyton
21867cc577a4SJonathan Peyton // start new list of not-selt tasks
21877cc577a4SJonathan Peyton this_thr->th.th_free_lists[index].th_free_list_other = ptr;
21887cc577a4SJonathan Peyton *((void **)ptr) = NULL;
21897cc577a4SJonathan Peyton descr->size_allocated = (size_t)1; // head of queue keeps its length
21907cc577a4SJonathan Peyton }
21917cc577a4SJonathan Peyton }
21927cc577a4SJonathan Peyton }
21937cc577a4SJonathan Peyton goto end;
21947cc577a4SJonathan Peyton
21957cc577a4SJonathan Peyton free_call:
21967cc577a4SJonathan Peyton KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
21977cc577a4SJonathan Peyton __kmp_gtid_from_thread(this_thr), size));
21987cc577a4SJonathan Peyton __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
21997cc577a4SJonathan Peyton brel(this_thr, descr->ptr_allocated);
22007cc577a4SJonathan Peyton
22017cc577a4SJonathan Peyton end:
22027cc577a4SJonathan Peyton KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
22037cc577a4SJonathan Peyton
22047cc577a4SJonathan Peyton } // func __kmp_fast_free
22057cc577a4SJonathan Peyton
22067cc577a4SJonathan Peyton // Initialize the thread free lists related to fast memory
22077cc577a4SJonathan Peyton // Only do this when a thread is initially created.
__kmp_initialize_fast_memory(kmp_info_t * this_thr)22083041982dSJonathan Peyton void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
22097cc577a4SJonathan Peyton KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
22107cc577a4SJonathan Peyton
22117cc577a4SJonathan Peyton memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
22127cc577a4SJonathan Peyton }
22137cc577a4SJonathan Peyton
22147cc577a4SJonathan Peyton // Free the memory in the thread free lists related to fast memory
22157cc577a4SJonathan Peyton // Only do this when a thread is being reaped (destroyed).
__kmp_free_fast_memory(kmp_info_t * th)22163041982dSJonathan Peyton void __kmp_free_fast_memory(kmp_info_t *th) {
22177cc577a4SJonathan Peyton // Suppose we use BGET underlying allocator, walk through its structures...
22187cc577a4SJonathan Peyton int bin;
22197cc577a4SJonathan Peyton thr_data_t *thr = get_thr_data(th);
22207cc577a4SJonathan Peyton void **lst = NULL;
22217cc577a4SJonathan Peyton
22223041982dSJonathan Peyton KE_TRACE(
22233041982dSJonathan Peyton 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
22247cc577a4SJonathan Peyton
22257cc577a4SJonathan Peyton __kmp_bget_dequeue(th); // Release any queued buffers
22267cc577a4SJonathan Peyton
22277cc577a4SJonathan Peyton // Dig through free lists and extract all allocated blocks
22287cc577a4SJonathan Peyton for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
22297cc577a4SJonathan Peyton bfhead_t *b = thr->freelist[bin].ql.flink;
22307cc577a4SJonathan Peyton while (b != &thr->freelist[bin]) {
22313041982dSJonathan Peyton if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
22323041982dSJonathan Peyton *((void **)b) =
22333041982dSJonathan Peyton lst; // link the list (override bthr, but keep flink yet)
22347cc577a4SJonathan Peyton lst = (void **)b; // push b into lst
22357cc577a4SJonathan Peyton }
22367cc577a4SJonathan Peyton b = b->ql.flink; // get next buffer
22377cc577a4SJonathan Peyton }
22387cc577a4SJonathan Peyton }
22397cc577a4SJonathan Peyton while (lst != NULL) {
22407cc577a4SJonathan Peyton void *next = *lst;
22417cc577a4SJonathan Peyton KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
22427cc577a4SJonathan Peyton lst, next, th, __kmp_gtid_from_thread(th)));
22437cc577a4SJonathan Peyton (*thr->relfcn)(lst);
22447cc577a4SJonathan Peyton #if BufStats
22457cc577a4SJonathan Peyton // count blocks to prevent problems in __kmp_finalize_bget()
22467cc577a4SJonathan Peyton thr->numprel++; /* Nr of expansion block releases */
22477cc577a4SJonathan Peyton thr->numpblk--; /* Total number of blocks */
22487cc577a4SJonathan Peyton #endif
22497cc577a4SJonathan Peyton lst = (void **)next;
22507cc577a4SJonathan Peyton }
22517cc577a4SJonathan Peyton
22523041982dSJonathan Peyton KE_TRACE(
22533041982dSJonathan Peyton 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
22547cc577a4SJonathan Peyton }
22557cc577a4SJonathan Peyton
22567cc577a4SJonathan Peyton #endif // USE_FAST_MEMORY
2257