1 /*
2  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp.h"
15 #include "kmp_io.h"
16 #include "kmp_wrapper_malloc.h"
17 
18 // Disable bget when it is not used
19 #if KMP_USE_BGET
20 
21 /* Thread private buffer management code */
22 
23 typedef int (*bget_compact_t)(size_t, int);
24 typedef void *(*bget_acquire_t)(size_t);
25 typedef void (*bget_release_t)(void *);
26 
27 /* NOTE: bufsize must be a signed datatype */
28 
29 #if KMP_OS_WINDOWS
30 #if KMP_ARCH_X86 || KMP_ARCH_ARM
31 typedef kmp_int32 bufsize;
32 #else
33 typedef kmp_int64 bufsize;
34 #endif
35 #else
36 typedef ssize_t bufsize;
37 #endif
38 
39 /* The three modes of operation are, fifo search, lifo search, and best-fit */
40 
41 typedef enum bget_mode {
42   bget_mode_fifo = 0,
43   bget_mode_lifo = 1,
44   bget_mode_best = 2
45 } bget_mode_t;
46 
47 static void bpool(kmp_info_t *th, void *buffer, bufsize len);
48 static void *bget(kmp_info_t *th, bufsize size);
49 static void *bgetz(kmp_info_t *th, bufsize size);
50 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
51 static void brel(kmp_info_t *th, void *buf);
52 static void bectl(kmp_info_t *th, bget_compact_t compact,
53                   bget_acquire_t acquire, bget_release_t release,
54                   bufsize pool_incr);
55 
56 #ifdef KMP_DEBUG
57 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
58                    bufsize *maxfree, long *nget, long *nrel);
59 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool,
60                     long *npget, long *nprel, long *ndget, long *ndrel);
61 static void bufdump(kmp_info_t *th, void *buf);
62 static void bpoold(kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
63 static int bpoolv(kmp_info_t *th, void *pool);
64 #endif
65 
66 /* BGET CONFIGURATION */
67 /* Buffer allocation size quantum: all buffers allocated are a
68    multiple of this size.  This MUST be a power of two. */
69 
70 /* On IA-32 architecture with  Linux* OS, malloc() does not
71    ensure 16 byte alignmnent */
72 
73 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
74 
75 #define SizeQuant 8
76 #define AlignType double
77 
78 #else
79 
80 #define SizeQuant 16
81 #define AlignType _Quad
82 
83 #endif
84 
85 // Define this symbol to enable the bstats() function which calculates the
86 // total free space in the buffer pool, the largest available buffer, and the
87 // total space currently allocated.
88 #define BufStats 1
89 
90 #ifdef KMP_DEBUG
91 
92 // Define this symbol to enable the bpoold() function which dumps the buffers
93 // in a buffer pool.
94 #define BufDump 1
95 
96 // Define this symbol to enable the bpoolv() function for validating a buffer
97 // pool.
98 #define BufValid 1
99 
100 // Define this symbol to enable the bufdump() function which allows dumping the
101 // contents of an allocated or free buffer.
102 #define DumpData 1
103 
104 #ifdef NOT_USED_NOW
105 
106 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
107 // who attempt to use pointers into released buffers.
108 #define FreeWipe 1
109 
110 // Use a best fit algorithm when searching for space for an allocation request.
111 // This uses memory more efficiently, but allocation will be much slower.
112 #define BestFit 1
113 
114 #endif /* NOT_USED_NOW */
115 #endif /* KMP_DEBUG */
116 
117 static bufsize bget_bin_size[] = {
118     0,
119     //    1 << 6,    /* .5 Cache line */
120     1 << 7, /* 1 Cache line, new */
121     1 << 8, /* 2 Cache lines */
122     1 << 9, /* 4 Cache lines, new */
123     1 << 10, /* 8 Cache lines */
124     1 << 11, /* 16 Cache lines, new */
125     1 << 12, 1 << 13, /* new */
126     1 << 14, 1 << 15, /* new */
127     1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /*  1MB */
128     1 << 21, /*  2MB */
129     1 << 22, /*  4MB */
130     1 << 23, /*  8MB */
131     1 << 24, /* 16MB */
132     1 << 25, /* 32MB */
133 };
134 
135 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
136 
137 struct bfhead;
138 
139 //  Declare the interface, including the requested buffer size type, bufsize.
140 
141 /* Queue links */
142 typedef struct qlinks {
143   struct bfhead *flink; /* Forward link */
144   struct bfhead *blink; /* Backward link */
145 } qlinks_t;
146 
147 /* Header in allocated and free buffers */
148 typedef struct bhead2 {
149   kmp_info_t *bthr; /* The thread which owns the buffer pool */
150   bufsize prevfree; /* Relative link back to previous free buffer in memory or
151                        0 if previous buffer is allocated.  */
152   bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
153 } bhead2_t;
154 
155 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
156 typedef union bhead {
157   KMP_ALIGN(SizeQuant)
158   AlignType b_align;
159   char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
160   bhead2_t bb;
161 } bhead_t;
162 #define BH(p) ((bhead_t *)(p))
163 
164 /*  Header in directly allocated buffers (by acqfcn) */
165 typedef struct bdhead {
166   bufsize tsize; /* Total size, including overhead */
167   bhead_t bh; /* Common header */
168 } bdhead_t;
169 #define BDH(p) ((bdhead_t *)(p))
170 
171 /* Header in free buffers */
172 typedef struct bfhead {
173   bhead_t bh; /* Common allocated/free header */
174   qlinks_t ql; /* Links on free list */
175 } bfhead_t;
176 #define BFH(p) ((bfhead_t *)(p))
177 
178 typedef struct thr_data {
179   bfhead_t freelist[MAX_BGET_BINS];
180 #if BufStats
181   size_t totalloc; /* Total space currently allocated */
182   long numget, numrel; /* Number of bget() and brel() calls */
183   long numpblk; /* Number of pool blocks */
184   long numpget, numprel; /* Number of block gets and rels */
185   long numdget, numdrel; /* Number of direct gets and rels */
186 #endif /* BufStats */
187 
188   /* Automatic expansion block management functions */
189   bget_compact_t compfcn;
190   bget_acquire_t acqfcn;
191   bget_release_t relfcn;
192 
193   bget_mode_t mode; /* what allocation mode to use? */
194 
195   bufsize exp_incr; /* Expansion block size */
196   bufsize pool_len; /* 0: no bpool calls have been made
197                        -1: not all pool blocks are the same size
198                        >0: (common) block size for all bpool calls made so far
199                     */
200   bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
201 } thr_data_t;
202 
203 /*  Minimum allocation quantum: */
204 #define QLSize (sizeof(qlinks_t))
205 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
206 #define MaxSize                                                                \
207   (bufsize)(                                                                   \
208       ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
209 // Maximun for the requested size.
210 
211 /* End sentinel: value placed in bsize field of dummy block delimiting
212    end of pool block.  The most negative number which will  fit  in  a
213    bufsize, defined in a way that the compiler will accept. */
214 
215 #define ESent                                                                  \
216   ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
217 
218 /* Thread Data management routines */
219 static int bget_get_bin(bufsize size) {
220   // binary chop bins
221   int lo = 0, hi = MAX_BGET_BINS - 1;
222 
223   KMP_DEBUG_ASSERT(size > 0);
224 
225   while ((hi - lo) > 1) {
226     int mid = (lo + hi) >> 1;
227     if (size < bget_bin_size[mid])
228       hi = mid - 1;
229     else
230       lo = mid;
231   }
232 
233   KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
234 
235   return lo;
236 }
237 
238 static void set_thr_data(kmp_info_t *th) {
239   int i;
240   thr_data_t *data;
241 
242   data = (thr_data_t *)((!th->th.th_local.bget_data)
243                             ? __kmp_allocate(sizeof(*data))
244                             : th->th.th_local.bget_data);
245 
246   memset(data, '\0', sizeof(*data));
247 
248   for (i = 0; i < MAX_BGET_BINS; ++i) {
249     data->freelist[i].ql.flink = &data->freelist[i];
250     data->freelist[i].ql.blink = &data->freelist[i];
251   }
252 
253   th->th.th_local.bget_data = data;
254   th->th.th_local.bget_list = 0;
255 #if !USE_CMP_XCHG_FOR_BGET
256 #ifdef USE_QUEUING_LOCK_FOR_BGET
257   __kmp_init_lock(&th->th.th_local.bget_lock);
258 #else
259   __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
260 #endif /* USE_LOCK_FOR_BGET */
261 #endif /* ! USE_CMP_XCHG_FOR_BGET */
262 }
263 
264 static thr_data_t *get_thr_data(kmp_info_t *th) {
265   thr_data_t *data;
266 
267   data = (thr_data_t *)th->th.th_local.bget_data;
268 
269   KMP_DEBUG_ASSERT(data != 0);
270 
271   return data;
272 }
273 
274 #ifdef KMP_DEBUG
275 
276 static void __kmp_bget_validate_queue(kmp_info_t *th) {
277   /* NOTE: assume that the global_lock is held */
278 
279   void *p = (void *)th->th.th_local.bget_list;
280 
281   while (p != 0) {
282     bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
283 
284     KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
285     p = (void *)b->ql.flink;
286   }
287 }
288 
289 #endif
290 
291 /* Walk the free list and release the enqueued buffers */
292 static void __kmp_bget_dequeue(kmp_info_t *th) {
293   void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
294 
295   if (p != 0) {
296 #if USE_CMP_XCHG_FOR_BGET
297     {
298       volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
299       while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
300                                         CCAST(void *, old_value), nullptr)) {
301         KMP_CPU_PAUSE();
302         old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
303       }
304       p = CCAST(void *, old_value);
305     }
306 #else /* ! USE_CMP_XCHG_FOR_BGET */
307 #ifdef USE_QUEUING_LOCK_FOR_BGET
308     __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
309 #else
310     __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
311 #endif /* USE_QUEUING_LOCK_FOR_BGET */
312 
313     p = (void *)th->th.th_local.bget_list;
314     th->th.th_local.bget_list = 0;
315 
316 #ifdef USE_QUEUING_LOCK_FOR_BGET
317     __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
318 #else
319     __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
320 #endif
321 #endif /* USE_CMP_XCHG_FOR_BGET */
322 
323     /* Check again to make sure the list is not empty */
324     while (p != 0) {
325       void *buf = p;
326       bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
327 
328       KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
329       KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
330                        (kmp_uintptr_t)th); // clear possible mark
331       KMP_DEBUG_ASSERT(b->ql.blink == 0);
332 
333       p = (void *)b->ql.flink;
334 
335       brel(th, buf);
336     }
337   }
338 }
339 
340 /* Chain together the free buffers by using the thread owner field */
341 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
342 #ifdef USE_QUEUING_LOCK_FOR_BGET
343                                ,
344                                kmp_int32 rel_gtid
345 #endif
346                                ) {
347   bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
348 
349   KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
350   KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
351                    (kmp_uintptr_t)th); // clear possible mark
352 
353   b->ql.blink = 0;
354 
355   KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
356                 __kmp_gtid_from_thread(th)));
357 
358 #if USE_CMP_XCHG_FOR_BGET
359   {
360     volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
361     /* the next pointer must be set before setting bget_list to buf to avoid
362        exposing a broken list to other threads, even for an instant. */
363     b->ql.flink = BFH(CCAST(void *, old_value));
364 
365     while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
366                                       CCAST(void *, old_value), buf)) {
367       KMP_CPU_PAUSE();
368       old_value = TCR_PTR(th->th.th_local.bget_list);
369       /* the next pointer must be set before setting bget_list to buf to avoid
370          exposing a broken list to other threads, even for an instant. */
371       b->ql.flink = BFH(CCAST(void *, old_value));
372     }
373   }
374 #else /* ! USE_CMP_XCHG_FOR_BGET */
375 #ifdef USE_QUEUING_LOCK_FOR_BGET
376   __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
377 #else
378   __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
379 #endif
380 
381   b->ql.flink = BFH(th->th.th_local.bget_list);
382   th->th.th_local.bget_list = (void *)buf;
383 
384 #ifdef USE_QUEUING_LOCK_FOR_BGET
385   __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
386 #else
387   __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
388 #endif
389 #endif /* USE_CMP_XCHG_FOR_BGET */
390 }
391 
392 /* insert buffer back onto a new freelist */
393 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
394   int bin;
395 
396   KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
397   KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
398 
399   bin = bget_get_bin(b->bh.bb.bsize);
400 
401   KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
402                    &thr->freelist[bin]);
403   KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
404                    &thr->freelist[bin]);
405 
406   b->ql.flink = &thr->freelist[bin];
407   b->ql.blink = thr->freelist[bin].ql.blink;
408 
409   thr->freelist[bin].ql.blink = b;
410   b->ql.blink->ql.flink = b;
411 }
412 
413 /* unlink the buffer from the old freelist */
414 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
415   KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
416   KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
417 
418   b->ql.blink->ql.flink = b->ql.flink;
419   b->ql.flink->ql.blink = b->ql.blink;
420 }
421 
422 /*  GET STATS -- check info on free list */
423 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
424   thr_data_t *thr = get_thr_data(th);
425   int bin;
426 
427   *total_free = *max_free = 0;
428 
429   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
430     bfhead_t *b, *best;
431 
432     best = &thr->freelist[bin];
433     b = best->ql.flink;
434 
435     while (b != &thr->freelist[bin]) {
436       *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
437       if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
438         best = b;
439 
440       /* Link to next buffer */
441       b = b->ql.flink;
442     }
443 
444     if (*max_free < best->bh.bb.bsize)
445       *max_free = best->bh.bb.bsize;
446   }
447 
448   if (*max_free > (bufsize)sizeof(bhead_t))
449     *max_free -= sizeof(bhead_t);
450 }
451 
452 /*  BGET  --  Allocate a buffer.  */
453 static void *bget(kmp_info_t *th, bufsize requested_size) {
454   thr_data_t *thr = get_thr_data(th);
455   bufsize size = requested_size;
456   bfhead_t *b;
457   void *buf;
458   int compactseq = 0;
459   int use_blink = 0;
460   /* For BestFit */
461   bfhead_t *best;
462 
463   if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
464     return NULL;
465   }
466 
467   __kmp_bget_dequeue(th); /* Release any queued buffers */
468 
469   if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
470     size = SizeQ;
471   }
472 #if defined(SizeQuant) && (SizeQuant > 1)
473   size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
474 #endif
475 
476   size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
477   KMP_DEBUG_ASSERT(size >= 0);
478   KMP_DEBUG_ASSERT(size % SizeQuant == 0);
479 
480   use_blink = (thr->mode == bget_mode_lifo);
481 
482   /* If a compact function was provided in the call to bectl(), wrap
483      a loop around the allocation process  to  allow  compaction  to
484      intervene in case we don't find a suitable buffer in the chain. */
485 
486   for (;;) {
487     int bin;
488 
489     for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
490       /* Link to next buffer */
491       b = (use_blink ? thr->freelist[bin].ql.blink
492                      : thr->freelist[bin].ql.flink);
493 
494       if (thr->mode == bget_mode_best) {
495         best = &thr->freelist[bin];
496 
497         /* Scan the free list searching for the first buffer big enough
498            to hold the requested size buffer. */
499         while (b != &thr->freelist[bin]) {
500           if (b->bh.bb.bsize >= (bufsize)size) {
501             if ((best == &thr->freelist[bin]) ||
502                 (b->bh.bb.bsize < best->bh.bb.bsize)) {
503               best = b;
504             }
505           }
506 
507           /* Link to next buffer */
508           b = (use_blink ? b->ql.blink : b->ql.flink);
509         }
510         b = best;
511       }
512 
513       while (b != &thr->freelist[bin]) {
514         if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
515 
516           // Buffer is big enough to satisfy the request. Allocate it to the
517           // caller. We must decide whether the buffer is large enough to split
518           // into the part given to the caller and a free buffer that remains
519           // on the free list, or whether the entire buffer should be removed
520           // from the free list and given to the caller in its entirety. We
521           // only split the buffer if enough room remains for a header plus the
522           // minimum quantum of allocation.
523           if ((b->bh.bb.bsize - (bufsize)size) >
524               (bufsize)(SizeQ + (sizeof(bhead_t)))) {
525             bhead_t *ba, *bn;
526 
527             ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
528             bn = BH(((char *)ba) + size);
529 
530             KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
531 
532             /* Subtract size from length of free block. */
533             b->bh.bb.bsize -= (bufsize)size;
534 
535             /* Link allocated buffer to the previous free buffer. */
536             ba->bb.prevfree = b->bh.bb.bsize;
537 
538             /* Plug negative size into user buffer. */
539             ba->bb.bsize = -size;
540 
541             /* Mark this buffer as owned by this thread. */
542             TCW_PTR(ba->bb.bthr,
543                     th); // not an allocated address (do not mark it)
544             /* Mark buffer after this one not preceded by free block. */
545             bn->bb.prevfree = 0;
546 
547             // unlink buffer from old freelist, and reinsert into new freelist
548             __kmp_bget_remove_from_freelist(b);
549             __kmp_bget_insert_into_freelist(thr, b);
550 #if BufStats
551             thr->totalloc += (size_t)size;
552             thr->numget++; /* Increment number of bget() calls */
553 #endif
554             buf = (void *)((((char *)ba) + sizeof(bhead_t)));
555             KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
556             return buf;
557           } else {
558             bhead_t *ba;
559 
560             ba = BH(((char *)b) + b->bh.bb.bsize);
561 
562             KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
563 
564             /* The buffer isn't big enough to split.  Give  the  whole
565                shebang to the caller and remove it from the free list. */
566 
567             __kmp_bget_remove_from_freelist(b);
568 #if BufStats
569             thr->totalloc += (size_t)b->bh.bb.bsize;
570             thr->numget++; /* Increment number of bget() calls */
571 #endif
572             /* Negate size to mark buffer allocated. */
573             b->bh.bb.bsize = -(b->bh.bb.bsize);
574 
575             /* Mark this buffer as owned by this thread. */
576             TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
577             /* Zero the back pointer in the next buffer in memory
578                to indicate that this buffer is allocated. */
579             ba->bb.prevfree = 0;
580 
581             /* Give user buffer starting at queue links. */
582             buf = (void *)&(b->ql);
583             KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
584             return buf;
585           }
586         }
587 
588         /* Link to next buffer */
589         b = (use_blink ? b->ql.blink : b->ql.flink);
590       }
591     }
592 
593     /* We failed to find a buffer. If there's a compact function defined,
594        notify it of the size requested. If it returns TRUE, try the allocation
595        again. */
596 
597     if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
598       break;
599     }
600   }
601 
602   /* No buffer available with requested size free. */
603 
604   /* Don't give up yet -- look in the reserve supply. */
605   if (thr->acqfcn != 0) {
606     if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
607       /* Request is too large to fit in a single expansion block.
608          Try to satisy it by a direct buffer acquisition. */
609       bdhead_t *bdh;
610 
611       size += sizeof(bdhead_t) - sizeof(bhead_t);
612 
613       KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
614 
615       /* richryan */
616       bdh = BDH((*thr->acqfcn)((bufsize)size));
617       if (bdh != NULL) {
618 
619         // Mark the buffer special by setting size field of its header to zero.
620         bdh->bh.bb.bsize = 0;
621 
622         /* Mark this buffer as owned by this thread. */
623         TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
624         // because direct buffer never goes to free list
625         bdh->bh.bb.prevfree = 0;
626         bdh->tsize = size;
627 #if BufStats
628         thr->totalloc += (size_t)size;
629         thr->numget++; /* Increment number of bget() calls */
630         thr->numdget++; /* Direct bget() call count */
631 #endif
632         buf = (void *)(bdh + 1);
633         KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
634         return buf;
635       }
636 
637     } else {
638 
639       /*  Try to obtain a new expansion block */
640       void *newpool;
641 
642       KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
643 
644       /* richryan */
645       newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
646       KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
647       if (newpool != NULL) {
648         bpool(th, newpool, thr->exp_incr);
649         buf = bget(
650             th, requested_size); /* This can't, I say, can't get into a loop. */
651         return buf;
652       }
653     }
654   }
655 
656   /*  Still no buffer available */
657 
658   return NULL;
659 }
660 
661 /*  BGETZ  --  Allocate a buffer and clear its contents to zero.  We clear
662                the  entire  contents  of  the buffer to zero, not just the
663                region requested by the caller. */
664 
665 static void *bgetz(kmp_info_t *th, bufsize size) {
666   char *buf = (char *)bget(th, size);
667 
668   if (buf != NULL) {
669     bhead_t *b;
670     bufsize rsize;
671 
672     b = BH(buf - sizeof(bhead_t));
673     rsize = -(b->bb.bsize);
674     if (rsize == 0) {
675       bdhead_t *bd;
676 
677       bd = BDH(buf - sizeof(bdhead_t));
678       rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
679     } else {
680       rsize -= sizeof(bhead_t);
681     }
682 
683     KMP_DEBUG_ASSERT(rsize >= size);
684 
685     (void)memset(buf, 0, (bufsize)rsize);
686   }
687   return ((void *)buf);
688 }
689 
690 /*  BGETR  --  Reallocate a buffer.  This is a minimal implementation,
691                simply in terms of brel()  and  bget().   It  could  be
692                enhanced to allow the buffer to grow into adjacent free
693                blocks and to avoid moving data unnecessarily.  */
694 
695 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
696   void *nbuf;
697   bufsize osize; /* Old size of buffer */
698   bhead_t *b;
699 
700   nbuf = bget(th, size);
701   if (nbuf == NULL) { /* Acquire new buffer */
702     return NULL;
703   }
704   if (buf == NULL) {
705     return nbuf;
706   }
707   b = BH(((char *)buf) - sizeof(bhead_t));
708   osize = -b->bb.bsize;
709   if (osize == 0) {
710     /*  Buffer acquired directly through acqfcn. */
711     bdhead_t *bd;
712 
713     bd = BDH(((char *)buf) - sizeof(bdhead_t));
714     osize = bd->tsize - (bufsize)sizeof(bdhead_t);
715   } else {
716     osize -= sizeof(bhead_t);
717   }
718 
719   KMP_DEBUG_ASSERT(osize > 0);
720 
721   (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
722                    (size_t)((size < osize) ? size : osize));
723   brel(th, buf);
724 
725   return nbuf;
726 }
727 
728 /*  BREL  --  Release a buffer.  */
729 static void brel(kmp_info_t *th, void *buf) {
730   thr_data_t *thr = get_thr_data(th);
731   bfhead_t *b, *bn;
732   kmp_info_t *bth;
733 
734   KMP_DEBUG_ASSERT(buf != NULL);
735   KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
736 
737   b = BFH(((char *)buf) - sizeof(bhead_t));
738 
739   if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
740     bdhead_t *bdh;
741 
742     bdh = BDH(((char *)buf) - sizeof(bdhead_t));
743     KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
744 #if BufStats
745     thr->totalloc -= (size_t)bdh->tsize;
746     thr->numdrel++; /* Number of direct releases */
747     thr->numrel++; /* Increment number of brel() calls */
748 #endif /* BufStats */
749 #ifdef FreeWipe
750     (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
751 #endif /* FreeWipe */
752 
753     KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
754 
755     KMP_DEBUG_ASSERT(thr->relfcn != 0);
756     (*thr->relfcn)((void *)bdh); /* Release it directly. */
757     return;
758   }
759 
760   bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
761                        ~1); // clear possible mark before comparison
762   if (bth != th) {
763     /* Add this buffer to be released by the owning thread later */
764     __kmp_bget_enqueue(bth, buf
765 #ifdef USE_QUEUING_LOCK_FOR_BGET
766                        ,
767                        __kmp_gtid_from_thread(th)
768 #endif
769                            );
770     return;
771   }
772 
773   /* Buffer size must be negative, indicating that the buffer is allocated. */
774   if (b->bh.bb.bsize >= 0) {
775     bn = NULL;
776   }
777   KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
778 
779   /*  Back pointer in next buffer must be zero, indicating the same thing: */
780 
781   KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
782 
783 #if BufStats
784   thr->numrel++; /* Increment number of brel() calls */
785   thr->totalloc += (size_t)b->bh.bb.bsize;
786 #endif
787 
788   /* If the back link is nonzero, the previous buffer is free.  */
789 
790   if (b->bh.bb.prevfree != 0) {
791     /* The previous buffer is free. Consolidate this buffer with it by adding
792        the length of this buffer to the previous free buffer. Note that we
793        subtract the size in the buffer being released, since it's negative to
794        indicate that the buffer is allocated. */
795     bufsize size = b->bh.bb.bsize;
796 
797     /* Make the previous buffer the one we're working on. */
798     KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
799                      b->bh.bb.prevfree);
800     b = BFH(((char *)b) - b->bh.bb.prevfree);
801     b->bh.bb.bsize -= size;
802 
803     /* unlink the buffer from the old freelist */
804     __kmp_bget_remove_from_freelist(b);
805   } else {
806     /* The previous buffer isn't allocated. Mark this buffer size as positive
807        (i.e. free) and fall through to place the buffer on the free list as an
808        isolated free block. */
809     b->bh.bb.bsize = -b->bh.bb.bsize;
810   }
811 
812   /* insert buffer back onto a new freelist */
813   __kmp_bget_insert_into_freelist(thr, b);
814 
815   /* Now we look at the next buffer in memory, located by advancing from
816      the  start  of  this  buffer  by its size, to see if that buffer is
817      free.  If it is, we combine  this  buffer  with  the  next  one  in
818      memory, dechaining the second buffer from the free list. */
819   bn = BFH(((char *)b) + b->bh.bb.bsize);
820   if (bn->bh.bb.bsize > 0) {
821 
822     /* The buffer is free.  Remove it from the free list and add
823        its size to that of our buffer. */
824     KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
825                      bn->bh.bb.bsize);
826 
827     __kmp_bget_remove_from_freelist(bn);
828 
829     b->bh.bb.bsize += bn->bh.bb.bsize;
830 
831     /* unlink the buffer from the old freelist, and reinsert it into the new
832      * freelist */
833     __kmp_bget_remove_from_freelist(b);
834     __kmp_bget_insert_into_freelist(thr, b);
835 
836     /* Finally,  advance  to   the  buffer  that   follows  the  newly
837        consolidated free block.  We must set its  backpointer  to  the
838        head  of  the  consolidated free block.  We know the next block
839        must be an allocated block because the process of recombination
840        guarantees  that  two  free  blocks will never be contiguous in
841        memory.  */
842     bn = BFH(((char *)b) + b->bh.bb.bsize);
843   }
844 #ifdef FreeWipe
845   (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
846                (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
847 #endif
848   KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
849 
850   /* The next buffer is allocated.  Set the backpointer in it  to  point
851      to this buffer; the previous free buffer in memory. */
852 
853   bn->bh.bb.prevfree = b->bh.bb.bsize;
854 
855   /*  If  a  block-release function is defined, and this free buffer
856       constitutes the entire block, release it.  Note that  pool_len
857       is  defined  in  such a way that the test will fail unless all
858       pool blocks are the same size.  */
859   if (thr->relfcn != 0 &&
860       b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
861 #if BufStats
862     if (thr->numpblk !=
863         1) { /* Do not release the last buffer until finalization time */
864 #endif
865 
866       KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
867       KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
868       KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
869                        b->bh.bb.bsize);
870 
871       /*  Unlink the buffer from the free list  */
872       __kmp_bget_remove_from_freelist(b);
873 
874       KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
875 
876       (*thr->relfcn)(b);
877 #if BufStats
878       thr->numprel++; /* Nr of expansion block releases */
879       thr->numpblk--; /* Total number of blocks */
880       KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
881 
882       // avoid leaving stale last_pool pointer around if it is being dealloced
883       if (thr->last_pool == b)
884         thr->last_pool = 0;
885     } else {
886       thr->last_pool = b;
887     }
888 #endif /* BufStats */
889   }
890 }
891 
892 /*  BECTL  --  Establish automatic pool expansion control  */
893 static void bectl(kmp_info_t *th, bget_compact_t compact,
894                   bget_acquire_t acquire, bget_release_t release,
895                   bufsize pool_incr) {
896   thr_data_t *thr = get_thr_data(th);
897 
898   thr->compfcn = compact;
899   thr->acqfcn = acquire;
900   thr->relfcn = release;
901   thr->exp_incr = pool_incr;
902 }
903 
904 /*  BPOOL  --  Add a region of memory to the buffer pool.  */
905 static void bpool(kmp_info_t *th, void *buf, bufsize len) {
906   /*    int bin = 0; */
907   thr_data_t *thr = get_thr_data(th);
908   bfhead_t *b = BFH(buf);
909   bhead_t *bn;
910 
911   __kmp_bget_dequeue(th); /* Release any queued buffers */
912 
913 #ifdef SizeQuant
914   len &= ~(SizeQuant - 1);
915 #endif
916   if (thr->pool_len == 0) {
917     thr->pool_len = len;
918   } else if (len != thr->pool_len) {
919     thr->pool_len = -1;
920   }
921 #if BufStats
922   thr->numpget++; /* Number of block acquisitions */
923   thr->numpblk++; /* Number of blocks total */
924   KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
925 #endif /* BufStats */
926 
927   /* Since the block is initially occupied by a single free  buffer,
928      it  had  better  not  be  (much) larger than the largest buffer
929      whose size we can store in bhead.bb.bsize. */
930   KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
931 
932   /* Clear  the  backpointer at  the start of the block to indicate that
933      there  is  no  free  block  prior  to  this   one.    That   blocks
934      recombination when the first block in memory is released. */
935   b->bh.bb.prevfree = 0;
936 
937   /* Create a dummy allocated buffer at the end of the pool.  This dummy
938      buffer is seen when a buffer at the end of the pool is released and
939      blocks  recombination  of  the last buffer with the dummy buffer at
940      the end.  The length in the dummy buffer  is  set  to  the  largest
941      negative  number  to  denote  the  end  of  the pool for diagnostic
942      routines (this specific value is  not  counted  on  by  the  actual
943      allocation and release functions). */
944   len -= sizeof(bhead_t);
945   b->bh.bb.bsize = (bufsize)len;
946   /* Set the owner of this buffer */
947   TCW_PTR(b->bh.bb.bthr,
948           (kmp_info_t *)((kmp_uintptr_t)th |
949                          1)); // mark the buffer as allocated address
950 
951   /* Chain the new block to the free list. */
952   __kmp_bget_insert_into_freelist(thr, b);
953 
954 #ifdef FreeWipe
955   (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
956                (size_t)(len - sizeof(bfhead_t)));
957 #endif
958   bn = BH(((char *)b) + len);
959   bn->bb.prevfree = (bufsize)len;
960   /* Definition of ESent assumes two's complement! */
961   KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
962 
963   bn->bb.bsize = ESent;
964 }
965 
966 /*  BFREED  --  Dump the free lists for this thread. */
967 static void bfreed(kmp_info_t *th) {
968   int bin = 0, count = 0;
969   int gtid = __kmp_gtid_from_thread(th);
970   thr_data_t *thr = get_thr_data(th);
971 
972 #if BufStats
973   __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
974                        " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
975                        " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
976                        " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
977                        " drel=%" KMP_INT64_SPEC "\n",
978                        gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
979                        (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
980                        (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
981                        (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
982 #endif
983 
984   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
985     bfhead_t *b;
986 
987     for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
988          b = b->ql.flink) {
989       bufsize bs = b->bh.bb.bsize;
990 
991       KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
992       KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
993       KMP_DEBUG_ASSERT(bs > 0);
994 
995       count += 1;
996 
997       __kmp_printf_no_lock(
998           "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
999           (long)bs);
1000 #ifdef FreeWipe
1001       {
1002         char *lerr = ((char *)b) + sizeof(bfhead_t);
1003         if ((bs > sizeof(bfhead_t)) &&
1004             ((*lerr != 0x55) ||
1005              (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1006               0))) {
1007           __kmp_printf_no_lock("__kmp_printpool: T#%d     (Contents of above "
1008                                "free block have been overstored.)\n",
1009                                gtid);
1010         }
1011       }
1012 #endif
1013     }
1014   }
1015 
1016   if (count == 0)
1017     __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
1018 }
1019 
1020 #ifdef KMP_DEBUG
1021 
1022 #if BufStats
1023 
1024 /*  BSTATS  --  Return buffer allocation free space statistics.  */
1025 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
1026                    bufsize *maxfree, long *nget, long *nrel) {
1027   int bin = 0;
1028   thr_data_t *thr = get_thr_data(th);
1029 
1030   *nget = thr->numget;
1031   *nrel = thr->numrel;
1032   *curalloc = (bufsize)thr->totalloc;
1033   *totfree = 0;
1034   *maxfree = -1;
1035 
1036   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1037     bfhead_t *b = thr->freelist[bin].ql.flink;
1038 
1039     while (b != &thr->freelist[bin]) {
1040       KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1041       *totfree += b->bh.bb.bsize;
1042       if (b->bh.bb.bsize > *maxfree) {
1043         *maxfree = b->bh.bb.bsize;
1044       }
1045       b = b->ql.flink; /* Link to next buffer */
1046     }
1047   }
1048 }
1049 
1050 /*  BSTATSE  --  Return extended statistics  */
1051 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool,
1052                     long *npget, long *nprel, long *ndget, long *ndrel) {
1053   thr_data_t *thr = get_thr_data(th);
1054 
1055   *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1056   *npool = thr->numpblk;
1057   *npget = thr->numpget;
1058   *nprel = thr->numprel;
1059   *ndget = thr->numdget;
1060   *ndrel = thr->numdrel;
1061 }
1062 
1063 #endif /* BufStats */
1064 
1065 /*  BUFDUMP  --  Dump the data in a buffer.  This is called with the  user
1066                  data pointer, and backs up to the buffer header.  It will
1067                  dump either a free block or an allocated one.  */
1068 static void bufdump(kmp_info_t *th, void *buf) {
1069   bfhead_t *b;
1070   unsigned char *bdump;
1071   bufsize bdlen;
1072 
1073   b = BFH(((char *)buf) - sizeof(bhead_t));
1074   KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1075   if (b->bh.bb.bsize < 0) {
1076     bdump = (unsigned char *)buf;
1077     bdlen = (-b->bh.bb.bsize) - (bufsize)sizeof(bhead_t);
1078   } else {
1079     bdump = (unsigned char *)(((char *)b) + sizeof(bfhead_t));
1080     bdlen = b->bh.bb.bsize - (bufsize)sizeof(bfhead_t);
1081   }
1082 
1083   while (bdlen > 0) {
1084     int i, dupes = 0;
1085     bufsize l = bdlen;
1086     char bhex[50], bascii[20];
1087 
1088     if (l > 16) {
1089       l = 16;
1090     }
1091 
1092     for (i = 0; i < l; i++) {
1093       (void)KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
1094       if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1095         bascii[i] = bdump[i];
1096       else
1097         bascii[i] = ' ';
1098     }
1099     bascii[i] = 0;
1100     (void)__kmp_printf_no_lock("%-48s   %s\n", bhex, bascii);
1101     bdump += l;
1102     bdlen -= l;
1103     while ((bdlen > 16) &&
1104            (memcmp((char *)(bdump - 16), (char *)bdump, 16) == 0)) {
1105       dupes++;
1106       bdump += 16;
1107       bdlen -= 16;
1108     }
1109     if (dupes > 1) {
1110       (void)__kmp_printf_no_lock(
1111           "     (%d lines [%d bytes] identical to above line skipped)\n", dupes,
1112           dupes * 16);
1113     } else if (dupes == 1) {
1114       bdump -= 16;
1115       bdlen += 16;
1116     }
1117   }
1118 }
1119 
1120 /*  BPOOLD  --  Dump a buffer pool.  The buffer headers are always listed.
1121                 If DUMPALLOC is nonzero, the contents of allocated buffers
1122                 are  dumped.   If  DUMPFREE  is  nonzero,  free blocks are
1123                 dumped as well.  If FreeWipe  checking  is  enabled,  free
1124                 blocks  which  have  been clobbered will always be dumped. */
1125 static void bpoold(kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) {
1126   bfhead_t *b = BFH((char *)buf - sizeof(bhead_t));
1127 
1128   while (b->bh.bb.bsize != ESent) {
1129     bufsize bs = b->bh.bb.bsize;
1130 
1131     if (bs < 0) {
1132       bs = -bs;
1133       (void)__kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n",
1134                                  (long)bs);
1135       if (dumpalloc) {
1136         bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1137       }
1138     } else {
1139       const char *lerr = "";
1140 
1141       KMP_DEBUG_ASSERT(bs > 0);
1142       if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1143         lerr = "  (Bad free list links)";
1144       }
1145       (void)__kmp_printf_no_lock("Free block:       size %6ld bytes.%s\n",
1146                                  (long)bs, lerr);
1147 #ifdef FreeWipe
1148       lerr = ((char *)b) + sizeof(bfhead_t);
1149       if ((bs > sizeof(bfhead_t)) &&
1150           ((*lerr != 0x55) ||
1151            (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1152             0))) {
1153         (void)__kmp_printf_no_lock(
1154             "(Contents of above free block have been overstored.)\n");
1155         bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1156       } else
1157 #endif
1158           if (dumpfree) {
1159         bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1160       }
1161     }
1162     b = BFH(((char *)b) + bs);
1163   }
1164 }
1165 
1166 /*  BPOOLV  --  Validate a buffer pool. */
1167 static int bpoolv(kmp_info_t *th, void *buf) {
1168   bfhead_t *b = BFH(buf);
1169 
1170   while (b->bh.bb.bsize != ESent) {
1171     bufsize bs = b->bh.bb.bsize;
1172 
1173     if (bs < 0) {
1174       bs = -bs;
1175     } else {
1176 #ifdef FreeWipe
1177       char *lerr = "";
1178 #endif
1179 
1180       KMP_DEBUG_ASSERT(bs > 0);
1181       if (bs <= 0) {
1182         return 0;
1183       }
1184       if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1185         (void)__kmp_printf_no_lock(
1186             "Free block: size %6ld bytes.  (Bad free list links)\n", (long)bs);
1187         KMP_DEBUG_ASSERT(0);
1188         return 0;
1189       }
1190 #ifdef FreeWipe
1191       lerr = ((char *)b) + sizeof(bfhead_t);
1192       if ((bs > sizeof(bfhead_t)) &&
1193           ((*lerr != 0x55) ||
1194            (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1195             0))) {
1196         (void)__kmp_printf_no_lock(
1197             "(Contents of above free block have been overstored.)\n");
1198         bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1199         KMP_DEBUG_ASSERT(0);
1200         return 0;
1201       }
1202 #endif /* FreeWipe */
1203     }
1204     b = BFH(((char *)b) + bs);
1205   }
1206   return 1;
1207 }
1208 
1209 #endif /* KMP_DEBUG */
1210 
1211 void __kmp_initialize_bget(kmp_info_t *th) {
1212   KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
1213 
1214   set_thr_data(th);
1215 
1216   bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1217         (bufsize)__kmp_malloc_pool_incr);
1218 }
1219 
1220 void __kmp_finalize_bget(kmp_info_t *th) {
1221   thr_data_t *thr;
1222   bfhead_t *b;
1223 
1224   KMP_DEBUG_ASSERT(th != 0);
1225 
1226 #if BufStats
1227   thr = (thr_data_t *)th->th.th_local.bget_data;
1228   KMP_DEBUG_ASSERT(thr != NULL);
1229   b = thr->last_pool;
1230 
1231   /*  If a block-release function is defined, and this free buffer constitutes
1232       the entire block, release it. Note that pool_len is defined in such a way
1233       that the test will fail unless all pool blocks are the same size.  */
1234 
1235   // Deallocate the last pool if one exists because we no longer do it in brel()
1236   if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1237       b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1238     KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1239     KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1240     KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1241                      b->bh.bb.bsize);
1242 
1243     /*  Unlink the buffer from the free list  */
1244     __kmp_bget_remove_from_freelist(b);
1245 
1246     KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1247 
1248     (*thr->relfcn)(b);
1249     thr->numprel++; /* Nr of expansion block releases */
1250     thr->numpblk--; /* Total number of blocks */
1251     KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1252   }
1253 #endif /* BufStats */
1254 
1255   /* Deallocate bget_data */
1256   if (th->th.th_local.bget_data != NULL) {
1257     __kmp_free(th->th.th_local.bget_data);
1258     th->th.th_local.bget_data = NULL;
1259   }
1260 }
1261 
1262 void kmpc_set_poolsize(size_t size) {
1263   bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1264         (bget_release_t)free, (bufsize)size);
1265 }
1266 
1267 size_t kmpc_get_poolsize(void) {
1268   thr_data_t *p;
1269 
1270   p = get_thr_data(__kmp_get_thread());
1271 
1272   return p->exp_incr;
1273 }
1274 
1275 void kmpc_set_poolmode(int mode) {
1276   thr_data_t *p;
1277 
1278   if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1279       mode == bget_mode_best) {
1280     p = get_thr_data(__kmp_get_thread());
1281     p->mode = (bget_mode_t)mode;
1282   }
1283 }
1284 
1285 int kmpc_get_poolmode(void) {
1286   thr_data_t *p;
1287 
1288   p = get_thr_data(__kmp_get_thread());
1289 
1290   return p->mode;
1291 }
1292 
1293 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1294   kmp_info_t *th = __kmp_get_thread();
1295   bufsize a, b;
1296 
1297   __kmp_bget_dequeue(th); /* Release any queued buffers */
1298 
1299   bcheck(th, &a, &b);
1300 
1301   *maxmem = a;
1302   *allmem = b;
1303 }
1304 
1305 void kmpc_poolprint(void) {
1306   kmp_info_t *th = __kmp_get_thread();
1307 
1308   __kmp_bget_dequeue(th); /* Release any queued buffers */
1309 
1310   bfreed(th);
1311 }
1312 
1313 #endif // #if KMP_USE_BGET
1314 
1315 void *kmpc_malloc(size_t size) {
1316   void *ptr;
1317   ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1318   if (ptr != NULL) {
1319     // save allocated pointer just before one returned to user
1320     *(void **)ptr = ptr;
1321     ptr = (void **)ptr + 1;
1322   }
1323   return ptr;
1324 }
1325 
1326 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1327 
1328 void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1329   void *ptr;
1330   void *ptr_allocated;
1331   KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1332   if (!IS_POWER_OF_TWO(alignment)) {
1333     // AC: do we need to issue a warning here?
1334     errno = EINVAL;
1335     return NULL;
1336   }
1337   size = size + sizeof(void *) + alignment;
1338   ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1339   if (ptr_allocated != NULL) {
1340     // save allocated pointer just before one returned to user
1341     ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1342                    ~(alignment - 1));
1343     *((void **)ptr - 1) = ptr_allocated;
1344   } else {
1345     ptr = NULL;
1346   }
1347   return ptr;
1348 }
1349 
1350 void *kmpc_calloc(size_t nelem, size_t elsize) {
1351   void *ptr;
1352   ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1353   if (ptr != NULL) {
1354     // save allocated pointer just before one returned to user
1355     *(void **)ptr = ptr;
1356     ptr = (void **)ptr + 1;
1357   }
1358   return ptr;
1359 }
1360 
1361 void *kmpc_realloc(void *ptr, size_t size) {
1362   void *result = NULL;
1363   if (ptr == NULL) {
1364     // If pointer is NULL, realloc behaves like malloc.
1365     result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1366     // save allocated pointer just before one returned to user
1367     if (result != NULL) {
1368       *(void **)result = result;
1369       result = (void **)result + 1;
1370     }
1371   } else if (size == 0) {
1372     // If size is 0, realloc behaves like free.
1373     // The thread must be registered by the call to kmpc_malloc() or
1374     // kmpc_calloc() before.
1375     // So it should be safe to call __kmp_get_thread(), not
1376     // __kmp_entry_thread().
1377     KMP_ASSERT(*((void **)ptr - 1));
1378     brel(__kmp_get_thread(), *((void **)ptr - 1));
1379   } else {
1380     result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1381                    (bufsize)(size + sizeof(ptr)));
1382     if (result != NULL) {
1383       *(void **)result = result;
1384       result = (void **)result + 1;
1385     }
1386   }
1387   return result;
1388 }
1389 
1390 // NOTE: the library must have already been initialized by a previous allocate
1391 void kmpc_free(void *ptr) {
1392   if (!__kmp_init_serial) {
1393     return;
1394   }
1395   if (ptr != NULL) {
1396     kmp_info_t *th = __kmp_get_thread();
1397     __kmp_bget_dequeue(th); /* Release any queued buffers */
1398     // extract allocated pointer and free it
1399     KMP_ASSERT(*((void **)ptr - 1));
1400     brel(th, *((void **)ptr - 1));
1401   }
1402 }
1403 
1404 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1405   void *ptr;
1406   KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1407                 (int)size KMP_SRC_LOC_PARM));
1408   ptr = bget(th, (bufsize)size);
1409   KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1410   return ptr;
1411 }
1412 
1413 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1414                            size_t elsize KMP_SRC_LOC_DECL) {
1415   void *ptr;
1416   KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1417                 (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1418   ptr = bgetz(th, (bufsize)(nelem * elsize));
1419   KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1420   return ptr;
1421 }
1422 
1423 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1424                             size_t size KMP_SRC_LOC_DECL) {
1425   KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1426                 ptr, (int)size KMP_SRC_LOC_PARM));
1427   ptr = bgetr(th, ptr, (bufsize)size);
1428   KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1429   return ptr;
1430 }
1431 
1432 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1433   KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1434                 ptr KMP_SRC_LOC_PARM));
1435   if (ptr != NULL) {
1436     __kmp_bget_dequeue(th); /* Release any queued buffers */
1437     brel(th, ptr);
1438   }
1439   KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1440 }
1441 
1442 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1443    memory leaks, but it may be useful for debugging memory corruptions, used
1444    freed pointers, etc. */
1445 /* #define LEAK_MEMORY */
1446 struct kmp_mem_descr { // Memory block descriptor.
1447   void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1448   size_t size_allocated; // Size of allocated memory block.
1449   void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1450   size_t size_aligned; // Size of aligned memory block.
1451 };
1452 typedef struct kmp_mem_descr kmp_mem_descr_t;
1453 
1454 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
1455    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1456    error. Must use __kmp_free when freeing memory allocated by this routine! */
1457 static void *___kmp_allocate_align(size_t size,
1458                                    size_t alignment KMP_SRC_LOC_DECL) {
1459   /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1460      requested to return properly aligned pointer. Original pointer returned
1461      by malloc() and size of allocated block is saved in descriptor just
1462      before the aligned pointer. This information used by __kmp_free() -- it
1463      has to pass to free() original pointer, not aligned one.
1464 
1465           +---------+------------+-----------------------------------+---------+
1466           | padding | descriptor |           aligned block           | padding |
1467           +---------+------------+-----------------------------------+---------+
1468           ^                      ^
1469           |                      |
1470           |                      +- Aligned pointer returned to caller
1471           +- Pointer returned by malloc()
1472 
1473       Aligned block is filled with zeros, paddings are filled with 0xEF. */
1474 
1475   kmp_mem_descr_t descr;
1476   kmp_uintptr_t addr_allocated; // Address returned by malloc().
1477   kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1478   kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1479 
1480   KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1481                 (int)size, (int)alignment KMP_SRC_LOC_PARM));
1482 
1483   KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1484   KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1485   // Make sure kmp_uintptr_t is enough to store addresses.
1486 
1487   descr.size_aligned = size;
1488   descr.size_allocated =
1489       descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
1490 
1491 #if KMP_DEBUG
1492   descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1493 #else
1494   descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1495 #endif
1496   KE_TRACE(10, ("   malloc( %d ) returned %p\n", (int)descr.size_allocated,
1497                 descr.ptr_allocated));
1498   if (descr.ptr_allocated == NULL) {
1499     KMP_FATAL(OutOfHeapMemory);
1500   }
1501 
1502   addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1503   addr_aligned =
1504       (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1505   addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
1506 
1507   descr.ptr_aligned = (void *)addr_aligned;
1508 
1509   KE_TRACE(26, ("   ___kmp_allocate_align: "
1510                 "ptr_allocated=%p, size_allocated=%d, "
1511                 "ptr_aligned=%p, size_aligned=%d\n",
1512                 descr.ptr_allocated, (int)descr.size_allocated,
1513                 descr.ptr_aligned, (int)descr.size_aligned));
1514 
1515   KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1516   KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1517   KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1518                    addr_allocated + descr.size_allocated);
1519   KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1520 #ifdef KMP_DEBUG
1521   memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1522 // Fill allocated memory block with 0xEF.
1523 #endif
1524   memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1525   // Fill the aligned memory block (which is intended for using by caller) with
1526   // 0x00. Do not
1527   // put this filling under KMP_DEBUG condition! Many callers expect zeroed
1528   // memory. (Padding
1529   // bytes remain filled with 0xEF in debugging library.)
1530   *((kmp_mem_descr_t *)addr_descr) = descr;
1531 
1532   KMP_MB();
1533 
1534   KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1535   return descr.ptr_aligned;
1536 } // func ___kmp_allocate_align
1537 
1538 /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1539    Do not call this func directly! Use __kmp_allocate macro instead.
1540    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1541    error. Must use __kmp_free when freeing memory allocated by this routine! */
1542 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1543   void *ptr;
1544   KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
1545                 (int)size KMP_SRC_LOC_PARM));
1546   ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
1547   KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
1548   return ptr;
1549 } // func ___kmp_allocate
1550 
1551 #if (BUILD_MEMORY == FIRST_TOUCH)
1552 void *__kmp_ft_page_allocate(size_t size) {
1553   void *adr, *aadr;
1554 
1555   const int page_size = KMP_GET_PAGE_SIZE();
1556 
1557   adr = (void *)__kmp_thread_malloc(__kmp_get_thread(),
1558                                     size + page_size + KMP_PTR_SKIP);
1559   if (adr == 0)
1560     KMP_FATAL(OutOfHeapMemory);
1561 
1562   /* check to see if adr is on a page boundary. */
1563   if (((kmp_uintptr_t)adr & (page_size - 1)) == 0)
1564     /* nothing to do if adr is already on a page boundary. */
1565     aadr = adr;
1566   else
1567     /* else set aadr to the first page boundary in the allocated memory. */
1568     aadr = (void *)(((kmp_uintptr_t)adr + page_size) & ~(page_size - 1));
1569 
1570   /* the first touch by the owner thread. */
1571   *((void **)aadr) = adr;
1572 
1573   /* skip the memory space used for storing adr above. */
1574   return (void *)((char *)aadr + KMP_PTR_SKIP);
1575 }
1576 #endif
1577 
1578 /* Allocate memory on page boundary, fill allocated memory with 0x00.
1579    Does not call this func directly! Use __kmp_page_allocate macro instead.
1580    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1581    error. Must use __kmp_free when freeing memory allocated by this routine! */
1582 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
1583   int page_size = 8 * 1024;
1584   void *ptr;
1585 
1586   KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
1587                 (int)size KMP_SRC_LOC_PARM));
1588   ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
1589   KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
1590   return ptr;
1591 } // ___kmp_page_allocate
1592 
1593 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1594    In debug mode, fill the memory block with 0xEF before call to free(). */
1595 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
1596   kmp_mem_descr_t descr;
1597   kmp_uintptr_t addr_allocated; // Address returned by malloc().
1598   kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1599 
1600   KE_TRACE(25,
1601            ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
1602   KMP_ASSERT(ptr != NULL);
1603 
1604   descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
1605 
1606   KE_TRACE(26, ("   __kmp_free:     "
1607                 "ptr_allocated=%p, size_allocated=%d, "
1608                 "ptr_aligned=%p, size_aligned=%d\n",
1609                 descr.ptr_allocated, (int)descr.size_allocated,
1610                 descr.ptr_aligned, (int)descr.size_aligned));
1611 
1612   addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1613   addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
1614 
1615   KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
1616   KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
1617   KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
1618   KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
1619   KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1620                    addr_allocated + descr.size_allocated);
1621 
1622 #ifdef KMP_DEBUG
1623   memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1624 // Fill memory block with 0xEF, it helps catch using freed memory.
1625 #endif
1626 
1627 #ifndef LEAK_MEMORY
1628   KE_TRACE(10, ("   free( %p )\n", descr.ptr_allocated));
1629 #ifdef KMP_DEBUG
1630   _free_src_loc(descr.ptr_allocated, _file_, _line_);
1631 #else
1632   free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
1633 #endif
1634 #endif
1635   KMP_MB();
1636   KE_TRACE(25, ("<- __kmp_free() returns\n"));
1637 } // func ___kmp_free
1638 
1639 #if USE_FAST_MEMORY == 3
1640 // Allocate fast memory by first scanning the thread's free lists
1641 // If a chunk the right size exists, grab it off the free list.
1642 // Otherwise allocate normally using kmp_thread_malloc.
1643 
1644 // AC: How to choose the limit? Just get 16 for now...
1645 #define KMP_FREE_LIST_LIMIT 16
1646 
1647 // Always use 128 bytes for determining buckets for caching memory blocks
1648 #define DCACHE_LINE 128
1649 
1650 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
1651   void *ptr;
1652   int num_lines;
1653   int idx;
1654   int index;
1655   void *alloc_ptr;
1656   size_t alloc_size;
1657   kmp_mem_descr_t *descr;
1658 
1659   KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1660                 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
1661 
1662   num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
1663   idx = num_lines - 1;
1664   KMP_DEBUG_ASSERT(idx >= 0);
1665   if (idx < 2) {
1666     index = 0; // idx is [ 0, 1 ], use first free list
1667     num_lines = 2; // 1, 2 cache lines or less than cache line
1668   } else if ((idx >>= 2) == 0) {
1669     index = 1; // idx is [ 2, 3 ], use second free list
1670     num_lines = 4; // 3, 4 cache lines
1671   } else if ((idx >>= 2) == 0) {
1672     index = 2; // idx is [ 4, 15 ], use third free list
1673     num_lines = 16; // 5, 6, ..., 16 cache lines
1674   } else if ((idx >>= 2) == 0) {
1675     index = 3; // idx is [ 16, 63 ], use fourth free list
1676     num_lines = 64; // 17, 18, ..., 64 cache lines
1677   } else {
1678     goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1679   }
1680 
1681   ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1682   if (ptr != NULL) {
1683     // pop the head of no-sync free list
1684     this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1685     KMP_DEBUG_ASSERT(
1686         this_thr ==
1687         ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1688             ->ptr_aligned);
1689     goto end;
1690   }
1691   ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1692   if (ptr != NULL) {
1693     // no-sync free list is empty, use sync free list (filled in by other
1694     // threads only)
1695     // pop the head of the sync free list, push NULL instead
1696     while (!KMP_COMPARE_AND_STORE_PTR(
1697         &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
1698       KMP_CPU_PAUSE();
1699       ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1700     }
1701     // push the rest of chain into no-sync free list (can be NULL if there was
1702     // the only block)
1703     this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1704     KMP_DEBUG_ASSERT(
1705         this_thr ==
1706         ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1707             ->ptr_aligned);
1708     goto end;
1709   }
1710 
1711 alloc_call:
1712   // haven't found block in the free lists, thus allocate it
1713   size = num_lines * DCACHE_LINE;
1714 
1715   alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
1716   KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
1717                 "alloc_size %d\n",
1718                 __kmp_gtid_from_thread(this_thr), alloc_size));
1719   alloc_ptr = bget(this_thr, (bufsize)alloc_size);
1720 
1721   // align ptr to DCACHE_LINE
1722   ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
1723                   DCACHE_LINE) &
1724                  ~(DCACHE_LINE - 1));
1725   descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1726 
1727   descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1728   // we don't need size_allocated
1729   descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1730   // (it is already saved in bget buffer,
1731   // but we may want to use another allocator in future)
1732   descr->size_aligned = size;
1733 
1734 end:
1735   KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
1736                 __kmp_gtid_from_thread(this_thr), ptr));
1737   return ptr;
1738 } // func __kmp_fast_allocate
1739 
1740 // Free fast memory and place it on the thread's free list if it is of
1741 // the correct size.
1742 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
1743   kmp_mem_descr_t *descr;
1744   kmp_info_t *alloc_thr;
1745   size_t size;
1746   size_t idx;
1747   int index;
1748 
1749   KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1750                 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
1751   KMP_ASSERT(ptr != NULL);
1752 
1753   descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1754 
1755   KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n",
1756                 (int)descr->size_aligned));
1757 
1758   size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1759 
1760   idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1761   if (idx == size) {
1762     index = 0; // 2 cache lines
1763   } else if ((idx <<= 1) == size) {
1764     index = 1; // 4 cache lines
1765   } else if ((idx <<= 2) == size) {
1766     index = 2; // 16 cache lines
1767   } else if ((idx <<= 2) == size) {
1768     index = 3; // 64 cache lines
1769   } else {
1770     KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
1771     goto free_call; // 65 or more cache lines ( > 8KB )
1772   }
1773 
1774   alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1775   if (alloc_thr == this_thr) {
1776     // push block to self no-sync free list, linking previous head (LIFO)
1777     *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1778     this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1779   } else {
1780     void *head = this_thr->th.th_free_lists[index].th_free_list_other;
1781     if (head == NULL) {
1782       // Create new free list
1783       this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1784       *((void **)ptr) = NULL; // mark the tail of the list
1785       descr->size_allocated = (size_t)1; // head of the list keeps its length
1786     } else {
1787       // need to check existed "other" list's owner thread and size of queue
1788       kmp_mem_descr_t *dsc =
1789           (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
1790       // allocating thread, same for all queue nodes
1791       kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
1792       size_t q_sz =
1793           dsc->size_allocated + 1; // new size in case we add current task
1794       if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
1795         // we can add current task to "other" list, no sync needed
1796         *((void **)ptr) = head;
1797         descr->size_allocated = q_sz;
1798         this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1799       } else {
1800         // either queue blocks owner is changing or size limit exceeded
1801         // return old queue to allocating thread (q_th) synchroneously,
1802         // and start new list for alloc_thr's tasks
1803         void *old_ptr;
1804         void *tail = head;
1805         void *next = *((void **)head);
1806         while (next != NULL) {
1807           KMP_DEBUG_ASSERT(
1808               // queue size should decrease by 1 each step through the list
1809               ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
1810                       ->size_allocated +
1811                   1 ==
1812               ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
1813                   ->size_allocated);
1814           tail = next; // remember tail node
1815           next = *((void **)next);
1816         }
1817         KMP_DEBUG_ASSERT(q_th != NULL);
1818         // push block to owner's sync free list
1819         old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1820         /* the next pointer must be set before setting free_list to ptr to avoid
1821            exposing a broken list to other threads, even for an instant. */
1822         *((void **)tail) = old_ptr;
1823 
1824         while (!KMP_COMPARE_AND_STORE_PTR(
1825             &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
1826           KMP_CPU_PAUSE();
1827           old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1828           *((void **)tail) = old_ptr;
1829         }
1830 
1831         // start new list of not-selt tasks
1832         this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1833         *((void **)ptr) = NULL;
1834         descr->size_allocated = (size_t)1; // head of queue keeps its length
1835       }
1836     }
1837   }
1838   goto end;
1839 
1840 free_call:
1841   KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1842                 __kmp_gtid_from_thread(this_thr), size));
1843   __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
1844   brel(this_thr, descr->ptr_allocated);
1845 
1846 end:
1847   KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
1848 
1849 } // func __kmp_fast_free
1850 
1851 // Initialize the thread free lists related to fast memory
1852 // Only do this when a thread is initially created.
1853 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
1854   KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
1855 
1856   memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
1857 }
1858 
1859 // Free the memory in the thread free lists related to fast memory
1860 // Only do this when a thread is being reaped (destroyed).
1861 void __kmp_free_fast_memory(kmp_info_t *th) {
1862   // Suppose we use BGET underlying allocator, walk through its structures...
1863   int bin;
1864   thr_data_t *thr = get_thr_data(th);
1865   void **lst = NULL;
1866 
1867   KE_TRACE(
1868       5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
1869 
1870   __kmp_bget_dequeue(th); // Release any queued buffers
1871 
1872   // Dig through free lists and extract all allocated blocks
1873   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1874     bfhead_t *b = thr->freelist[bin].ql.flink;
1875     while (b != &thr->freelist[bin]) {
1876       if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
1877         *((void **)b) =
1878             lst; // link the list (override bthr, but keep flink yet)
1879         lst = (void **)b; // push b into lst
1880       }
1881       b = b->ql.flink; // get next buffer
1882     }
1883   }
1884   while (lst != NULL) {
1885     void *next = *lst;
1886     KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
1887                   lst, next, th, __kmp_gtid_from_thread(th)));
1888     (*thr->relfcn)(lst);
1889 #if BufStats
1890     // count blocks to prevent problems in __kmp_finalize_bget()
1891     thr->numprel++; /* Nr of expansion block releases */
1892     thr->numpblk--; /* Total number of blocks */
1893 #endif
1894     lst = (void **)next;
1895   }
1896 
1897   KE_TRACE(
1898       5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
1899 }
1900 
1901 #endif // USE_FAST_MEMORY
1902