1 /*
2  * kmp_alloc.c -- private/shared dynamic memory allocation and management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_wrapper_malloc.h"
18 #include "kmp_io.h"
19 
20 // Disable bget when it is not used
21 #if KMP_USE_BGET
22 
23 /* Thread private buffer management code */
24 
25 typedef int   (*bget_compact_t)(size_t, int);
26 typedef void *(*bget_acquire_t)(size_t);
27 typedef void  (*bget_release_t)(void *);
28 
29 /* NOTE: bufsize must be a signed datatype */
30 
31 #if KMP_OS_WINDOWS
32 # if KMP_ARCH_X86 || KMP_ARCH_ARM
33    typedef kmp_int32 bufsize;
34 # else
35    typedef kmp_int64 bufsize;
36 # endif
37 #else
38   typedef ssize_t bufsize;
39 #endif
40 
41 /* The three modes of operation are, fifo search, lifo search, and best-fit */
42 
43 typedef enum bget_mode {
44     bget_mode_fifo = 0,
45     bget_mode_lifo = 1,
46     bget_mode_best = 2
47 } bget_mode_t;
48 
49 
50 static void    bpool( kmp_info_t *th, void *buffer, bufsize len);
51 static void   *bget( kmp_info_t *th, bufsize size);
52 static void   *bgetz( kmp_info_t *th, bufsize size);
53 static void   *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
54 static void    brel( kmp_info_t *th, void *buf);
55 static void    bectl(  kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
56 
57 #ifdef KMP_DEBUG
58 static void    bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
59 static void    bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
60 static void    bufdump( kmp_info_t *th, void *buf);
61 static void    bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
62 static int     bpoolv( kmp_info_t *th, void *pool);
63 #endif
64 
65 /* BGET CONFIGURATION */
66                                       /* Buffer allocation size quantum:
67                                          all buffers allocated are a
68                                          multiple of this size.  This
69                                          MUST be a power of two. */
70 
71                                       /* On IA-32 architecture with  Linux* OS,
72                                          malloc() does not
73                                          ensure 16 byte alignmnent */
74 
75 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
76 
77 #define SizeQuant   8
78 #define AlignType   double
79 
80 #else
81 
82 #define SizeQuant   16
83 #define AlignType   _Quad
84 
85 #endif
86 
87 #define BufStats    1                 /* Define this symbol to enable the
88                                          bstats() function which calculates
89                                          the total free space in the buffer
90                                          pool, the largest available
91                                          buffer, and the total space
92                                          currently allocated. */
93 
94 #ifdef KMP_DEBUG
95 
96 #define BufDump     1                 /* Define this symbol to enable the
97                                          bpoold() function which dumps the
98                                          buffers in a buffer pool. */
99 
100 #define BufValid    1                 /* Define this symbol to enable the
101                                          bpoolv() function for validating
102                                          a buffer pool. */
103 
104 #define DumpData    1                 /* Define this symbol to enable the
105                                          bufdump() function which allows
106                                          dumping the contents of an allocated
107                                          or free buffer. */
108 #ifdef NOT_USED_NOW
109 
110 #define FreeWipe    1                 /* Wipe free buffers to a guaranteed
111                                          pattern of garbage to trip up
112                                          miscreants who attempt to use
113                                          pointers into released buffers. */
114 
115 #define BestFit     1                 /* Use a best fit algorithm when
116                                          searching for space for an
117                                          allocation request.  This uses
118                                          memory more efficiently, but
119                                          allocation will be much slower. */
120 #endif /* NOT_USED_NOW */
121 #endif /* KMP_DEBUG */
122 
123 
124 static bufsize bget_bin_size[ ] = {
125     0,
126 //    1 << 6,    /* .5 Cache line */
127     1 << 7,    /* 1 Cache line, new */
128     1 << 8,    /* 2 Cache lines */
129     1 << 9,    /* 4 Cache lines, new */
130     1 << 10,   /* 8 Cache lines */
131     1 << 11,   /* 16 Cache lines, new */
132     1 << 12,
133     1 << 13,   /* new */
134     1 << 14,
135     1 << 15,   /* new */
136     1 << 16,
137     1 << 17,
138     1 << 18,
139     1 << 19,
140     1 << 20,    /*  1MB */
141     1 << 21,    /*  2MB */
142     1 << 22,    /*  4MB */
143     1 << 23,    /*  8MB */
144     1 << 24,    /* 16MB */
145     1 << 25,    /* 32MB */
146 };
147 
148 #define MAX_BGET_BINS   (int)(sizeof(bget_bin_size) / sizeof(bufsize))
149 
150 struct bfhead;
151 
152 /*  Declare the interface, including the requested buffer size type,
153     bufsize.  */
154 
155 /* Queue links */
156 
157 typedef struct qlinks {
158     struct bfhead *flink;             /* Forward link */
159     struct bfhead *blink;             /* Backward link */
160 } qlinks_t;
161 
162 /* Header in allocated and free buffers */
163 
164 typedef struct bhead2 {
165     kmp_info_t *bthr;                 /* The thread which owns the buffer pool */
166     bufsize     prevfree;             /* Relative link back to previous
167                                          free buffer in memory or 0 if
168                                          previous buffer is allocated.  */
169     bufsize     bsize;                /* Buffer size: positive if free,
170                                          negative if allocated. */
171 } bhead2_t;
172 
173 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
174 
175 typedef union bhead {
176     KMP_ALIGN( SizeQuant )
177     AlignType           b_align;
178     char                b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
179     bhead2_t            bb;
180 } bhead_t;
181 #define BH(p)   ((bhead_t *) (p))
182 
183 /*  Header in directly allocated buffers (by acqfcn) */
184 
185 typedef struct bdhead
186 {
187     bufsize tsize;                    /* Total size, including overhead */
188     bhead_t bh;                       /* Common header */
189 } bdhead_t;
190 #define BDH(p)  ((bdhead_t *) (p))
191 
192 /* Header in free buffers */
193 
194 typedef struct bfhead {
195     bhead_t  bh;                      /* Common allocated/free header */
196     qlinks_t ql;                      /* Links on free list */
197 } bfhead_t;
198 #define BFH(p)  ((bfhead_t *) (p))
199 
200 typedef struct thr_data {
201     bfhead_t freelist[ MAX_BGET_BINS ];
202 #if BufStats
203     size_t totalloc;               /* Total space currently allocated */
204     long numget, numrel;           /* Number of bget() and brel() calls */
205     long numpblk;                  /* Number of pool blocks */
206     long numpget, numprel;         /* Number of block gets and rels */
207     long numdget, numdrel;         /* Number of direct gets and rels */
208 #endif /* BufStats */
209 
210     /* Automatic expansion block management functions */
211     bget_compact_t compfcn;
212     bget_acquire_t acqfcn;
213     bget_release_t relfcn;
214 
215     bget_mode_t    mode;              /* what allocation mode to use? */
216 
217     bufsize exp_incr;                 /* Expansion block size */
218     bufsize pool_len;                 /* 0: no bpool calls have been made
219                                          -1: not all pool blocks are
220                                              the same size
221                                          >0: (common) block size for all
222                                              bpool calls made so far
223                                       */
224     bfhead_t * last_pool;             /* Last pool owned by this thread (delay dealocation) */
225 } thr_data_t;
226 
227 /*  Minimum allocation quantum: */
228 
229 #define QLSize  (sizeof(qlinks_t))
230 #define SizeQ   ((SizeQuant > QLSize) ? SizeQuant : QLSize)
231 #define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
232     // Maximun for the requested size.
233 
234 /* End sentinel: value placed in bsize field of dummy block delimiting
235    end of pool block.  The most negative number which will  fit  in  a
236    bufsize, defined in a way that the compiler will accept. */
237 
238 #define ESent   ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
239 
240 /* ------------------------------------------------------------------------ */
241 
242 /* Thread Data management routines */
243 
244 static int
245 bget_get_bin( bufsize size )
246 {
247     // binary chop bins
248     int lo = 0, hi = MAX_BGET_BINS - 1;
249 
250     KMP_DEBUG_ASSERT( size > 0 );
251 
252     while ( (hi - lo) > 1 ) {
253         int mid = (lo + hi) >> 1;
254         if (size < bget_bin_size[ mid ])
255             hi = mid - 1;
256         else
257             lo = mid;
258     }
259 
260     KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
261 
262     return lo;
263 }
264 
265 static void
266 set_thr_data( kmp_info_t *th )
267 {
268     int i;
269     thr_data_t *data;
270 
271     data =
272         (thr_data_t *)(
273             ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
274         );
275 
276     memset( data, '\0', sizeof( *data ) );
277 
278     for (i = 0; i < MAX_BGET_BINS; ++i) {
279         data->freelist[ i ].ql.flink = & data->freelist[ i ];
280         data->freelist[ i ].ql.blink = & data->freelist[ i ];
281     }
282 
283     th->th.th_local.bget_data = data;
284     th->th.th_local.bget_list = 0;
285 #if ! USE_CMP_XCHG_FOR_BGET
286 #ifdef USE_QUEUING_LOCK_FOR_BGET
287     __kmp_init_lock( & th->th.th_local.bget_lock );
288 #else
289     __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
290 #endif /* USE_LOCK_FOR_BGET */
291 #endif /* ! USE_CMP_XCHG_FOR_BGET */
292 }
293 
294 static thr_data_t *
295 get_thr_data( kmp_info_t *th )
296 {
297     thr_data_t *data;
298 
299     data = (thr_data_t *) th->th.th_local.bget_data;
300 
301     KMP_DEBUG_ASSERT( data != 0 );
302 
303     return data;
304 }
305 
306 
307 #ifdef KMP_DEBUG
308 
309 static void
310 __kmp_bget_validate_queue( kmp_info_t *th )
311 {
312     /* NOTE: assume that the global_lock is held */
313 
314     void *p = (void *) th->th.th_local.bget_list;
315 
316     while (p != 0) {
317         bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
318 
319         KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
320         p = (void *) b->ql.flink;
321     }
322 }
323 
324 #endif
325 
326 /* Walk the free list and release the enqueued buffers */
327 
328 static void
329 __kmp_bget_dequeue( kmp_info_t *th )
330 {
331     void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
332 
333     if (p != 0) {
334         #if USE_CMP_XCHG_FOR_BGET
335             {
336                 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
337                 while ( ! KMP_COMPARE_AND_STORE_PTR(
338                     & th->th.th_local.bget_list, old_value, NULL ) )
339                 {
340                     KMP_CPU_PAUSE();
341                     old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
342                 }
343                 p = (void *) old_value;
344             }
345         #else /* ! USE_CMP_XCHG_FOR_BGET */
346             #ifdef USE_QUEUING_LOCK_FOR_BGET
347                 __kmp_acquire_lock( & th->th.th_local.bget_lock,
348                                     __kmp_gtid_from_thread(th) );
349             #else
350                 __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
351             #endif /* USE_QUEUING_LOCK_FOR_BGET */
352 
353              p = (void *) th->th.th_local.bget_list;
354              th->th.th_local.bget_list = 0;
355 
356             #ifdef USE_QUEUING_LOCK_FOR_BGET
357                 __kmp_release_lock( & th->th.th_local.bget_lock,
358                                     __kmp_gtid_from_thread(th) );
359             #else
360                 __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
361             #endif
362         #endif /* USE_CMP_XCHG_FOR_BGET */
363 
364         /* Check again to make sure the list is not empty */
365 
366         while (p != 0) {
367             void *buf = p;
368             bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
369 
370             KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
371             KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
372                                 (kmp_uintptr_t)th ); // clear possible mark
373             KMP_DEBUG_ASSERT( b->ql.blink == 0 );
374 
375             p = (void *) b->ql.flink;
376 
377             brel( th, buf );
378         }
379     }
380 }
381 
382 /* Chain together the free buffers by using the thread owner field */
383 
384 static void
385 __kmp_bget_enqueue( kmp_info_t *th, void *buf
386 #ifdef USE_QUEUING_LOCK_FOR_BGET
387                     , kmp_int32 rel_gtid
388 #endif
389                   )
390 {
391     bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
392 
393     KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
394     KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
395                         (kmp_uintptr_t)th ); // clear possible mark
396 
397     b->ql.blink = 0;
398 
399     KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
400                     __kmp_gtid_from_thread( th ) ) );
401 
402 #if USE_CMP_XCHG_FOR_BGET
403     {
404         volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
405         /* the next pointer must be set before setting bget_list to buf to avoid
406            exposing a broken list to other threads, even for an instant. */
407         b->ql.flink = BFH( old_value );
408 
409         while ( ! KMP_COMPARE_AND_STORE_PTR(
410             & th->th.th_local.bget_list, old_value, buf ) )
411         {
412             KMP_CPU_PAUSE();
413             old_value = TCR_PTR(th->th.th_local.bget_list);
414             /* the next pointer must be set before setting bget_list to buf to avoid
415                exposing a broken list to other threads, even for an instant. */
416             b->ql.flink = BFH( old_value );
417         }
418     }
419 #else /* ! USE_CMP_XCHG_FOR_BGET */
420 # ifdef USE_QUEUING_LOCK_FOR_BGET
421     __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
422 # else
423     __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
424  # endif
425 
426     b->ql.flink = BFH( th->th.th_local.bget_list );
427     th->th.th_local.bget_list = (void *) buf;
428 
429 # ifdef USE_QUEUING_LOCK_FOR_BGET
430     __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
431 # else
432     __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
433 # endif
434 #endif /* USE_CMP_XCHG_FOR_BGET */
435 }
436 
437 /* insert buffer back onto a new freelist */
438 
439 static void
440 __kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
441 {
442     int bin;
443 
444     KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
445     KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
446 
447     bin = bget_get_bin( b->bh.bb.bsize );
448 
449     KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
450     KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
451 
452     b->ql.flink = &thr->freelist[ bin ];
453     b->ql.blink = thr->freelist[ bin ].ql.blink;
454 
455     thr->freelist[ bin ].ql.blink = b;
456     b->ql.blink->ql.flink = b;
457 }
458 
459 /* unlink the buffer from the old freelist */
460 
461 static void
462 __kmp_bget_remove_from_freelist( bfhead_t *b )
463 {
464     KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
465     KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
466 
467     b->ql.blink->ql.flink = b->ql.flink;
468     b->ql.flink->ql.blink = b->ql.blink;
469 }
470 
471 /* ------------------------------------------------------------------------ */
472 
473 /*  GET STATS -- check info on free list */
474 
475 static void
476 bcheck(  kmp_info_t *th, bufsize *max_free, bufsize *total_free )
477 {
478     thr_data_t *thr = get_thr_data( th );
479     int bin;
480 
481     *total_free = *max_free = 0;
482 
483     for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
484         bfhead_t *b, *best;
485 
486         best = &thr->freelist[ bin ];
487         b = best->ql.flink;
488 
489         while (b != &thr->freelist[ bin ]) {
490             *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
491             if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
492                 best = b;
493 
494             /* Link to next buffer */
495             b = b->ql.flink;
496         }
497 
498         if (*max_free < best->bh.bb.bsize)
499             *max_free = best->bh.bb.bsize;
500     }
501 
502     if (*max_free > (bufsize)sizeof( bhead_t ))
503         *max_free -= sizeof( bhead_t );
504 }
505 
506 /* ------------------------------------------------------------------------ */
507 
508 /*  BGET  --  Allocate a buffer.  */
509 
510 static void *
511 bget(  kmp_info_t *th, bufsize requested_size )
512 {
513     thr_data_t *thr = get_thr_data( th );
514     bufsize size = requested_size;
515     bfhead_t *b;
516     void *buf;
517     int compactseq = 0;
518     int use_blink = 0;
519 /* For BestFit */
520     bfhead_t *best;
521 
522     if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
523         return NULL;
524     }; // if
525 
526     __kmp_bget_dequeue( th );         /* Release any queued buffers */
527 
528     if (size < (bufsize)SizeQ) {      /* Need at least room for the */
529         size = SizeQ;                 /*    queue links.  */
530     }
531     #if defined( SizeQuant ) && ( SizeQuant > 1 )
532         size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
533     #endif
534 
535     size += sizeof(bhead_t);     /* Add overhead in allocated buffer
536                                          to size required. */
537     KMP_DEBUG_ASSERT( size >= 0 );
538     KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
539 
540     use_blink = ( thr->mode == bget_mode_lifo );
541 
542     /* If a compact function was provided in the call to bectl(), wrap
543        a loop around the allocation process  to  allow  compaction  to
544        intervene in case we don't find a suitable buffer in the chain. */
545 
546     for (;;) {
547         int bin;
548 
549         for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
550             /* Link to next buffer */
551             b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
552 
553             if (thr->mode == bget_mode_best) {
554                 best = &thr->freelist[ bin ];
555 
556                 /* Scan the free list searching for the first buffer big enough
557                    to hold the requested size buffer. */
558 
559                 while (b != &thr->freelist[ bin ]) {
560                     if (b->bh.bb.bsize >= (bufsize) size) {
561                         if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
562                             best = b;
563                         }
564                     }
565 
566                     /* Link to next buffer */
567                     b = ( use_blink ? b->ql.blink : b->ql.flink );
568                 }
569                 b = best;
570             }
571 
572             while (b != &thr->freelist[ bin ]) {
573                 if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
574 
575                     /* Buffer  is big enough to satisfy  the request.  Allocate it
576                        to the caller.  We must decide whether the buffer is  large
577                        enough  to  split  into  the part given to the caller and a
578                        free buffer that remains on the free list, or  whether  the
579                        entire  buffer  should  be  removed  from the free list and
580                        given to the caller in its entirety.   We  only  split  the
581                        buffer if enough room remains for a header plus the minimum
582                        quantum of allocation. */
583 
584                     if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
585                         bhead_t *ba, *bn;
586 
587                         ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
588                         bn = BH(((char *) ba) + size);
589 
590                         KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
591 
592                         /* Subtract size from length of free block. */
593                         b->bh.bb.bsize -= (bufsize) size;
594 
595                         /* Link allocated buffer to the previous free buffer. */
596                         ba->bb.prevfree = b->bh.bb.bsize;
597 
598                         /* Plug negative size into user buffer. */
599                         ba->bb.bsize = -size;
600 
601                         /* Mark this buffer as owned by this thread. */
602                         TCW_PTR(ba->bb.bthr, th);   // not an allocated address (do not mark it)
603                         /* Mark buffer after this one not preceded by free block. */
604                         bn->bb.prevfree = 0;
605 
606                         /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
607                         __kmp_bget_remove_from_freelist( b );
608                         __kmp_bget_insert_into_freelist( thr, b );
609 #if BufStats
610                         thr->totalloc += (size_t) size;
611                         thr->numget++;        /* Increment number of bget() calls */
612 #endif
613                         buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
614                         KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
615                         return buf;
616                     } else {
617                         bhead_t *ba;
618 
619                         ba = BH(((char *) b) + b->bh.bb.bsize);
620 
621                         KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
622 
623                         /* The buffer isn't big enough to split.  Give  the  whole
624                            shebang to the caller and remove it from the free list. */
625 
626                        __kmp_bget_remove_from_freelist( b );
627 #if BufStats
628                         thr->totalloc += (size_t) b->bh.bb.bsize;
629                         thr->numget++;        /* Increment number of bget() calls */
630 #endif
631                         /* Negate size to mark buffer allocated. */
632                         b->bh.bb.bsize = -(b->bh.bb.bsize);
633 
634                         /* Mark this buffer as owned by this thread. */
635                         TCW_PTR(ba->bb.bthr, th);   // not an allocated address (do not mark it)
636                         /* Zero the back pointer in the next buffer in memory
637                            to indicate that this buffer is allocated. */
638                         ba->bb.prevfree = 0;
639 
640                         /* Give user buffer starting at queue links. */
641                         buf =  (void *) &(b->ql);
642                         KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
643                         return buf;
644                     }
645                 }
646 
647                 /* Link to next buffer */
648                 b = ( use_blink ? b->ql.blink : b->ql.flink );
649             }
650         }
651 
652         /* We failed to find a buffer.  If there's a compact  function
653            defined,  notify  it  of the size requested.  If it returns
654            TRUE, try the allocation again. */
655 
656         if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
657             break;
658         }
659     }
660 
661     /* No buffer available with requested size free. */
662 
663     /* Don't give up yet -- look in the reserve supply. */
664 
665     if (thr->acqfcn != 0) {
666         if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
667 
668             /* Request  is  too  large  to  fit in a single expansion
669                block.  Try to satisy it by a direct buffer acquisition. */
670 
671             bdhead_t *bdh;
672 
673             size += sizeof(bdhead_t) - sizeof(bhead_t);
674 
675             KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
676 
677             /* richryan */
678             bdh = BDH((*thr->acqfcn)((bufsize) size));
679             if (bdh != NULL) {
680 
681                 /*  Mark the buffer special by setting the size field
682                     of its header to zero.  */
683                 bdh->bh.bb.bsize = 0;
684 
685                 /* Mark this buffer as owned by this thread. */
686                 TCW_PTR(bdh->bh.bb.bthr, th);  // don't mark buffer as allocated,
687                                                // because direct buffer never goes to free list
688                 bdh->bh.bb.prevfree = 0;
689                 bdh->tsize = size;
690 #if BufStats
691                 thr->totalloc += (size_t) size;
692                 thr->numget++;        /* Increment number of bget() calls */
693                 thr->numdget++;       /* Direct bget() call count */
694 #endif
695                 buf =  (void *) (bdh + 1);
696                 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
697                 return buf;
698             }
699 
700         } else {
701 
702             /*  Try to obtain a new expansion block */
703 
704             void *newpool;
705 
706             KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
707 
708             /* richryan */
709             newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
710             KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
711             if (newpool != NULL) {
712                 bpool( th, newpool, thr->exp_incr);
713                 buf =  bget( th, requested_size);  /* This can't, I say, can't get into a loop. */
714                 return buf;
715             }
716         }
717     }
718 
719     /*  Still no buffer available */
720 
721     return NULL;
722 }
723 
724 /*  BGETZ  --  Allocate a buffer and clear its contents to zero.  We clear
725                the  entire  contents  of  the buffer to zero, not just the
726                region requested by the caller. */
727 
728 static void *
729 bgetz(  kmp_info_t *th, bufsize size )
730 {
731     char *buf = (char *) bget( th, size);
732 
733     if (buf != NULL) {
734         bhead_t *b;
735         bufsize rsize;
736 
737         b = BH(buf - sizeof(bhead_t));
738         rsize = -(b->bb.bsize);
739         if (rsize == 0) {
740             bdhead_t *bd;
741 
742             bd = BDH(buf - sizeof(bdhead_t));
743             rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
744         } else {
745             rsize -= sizeof(bhead_t);
746         }
747 
748         KMP_DEBUG_ASSERT(rsize >= size);
749 
750         (void) memset(buf, 0, (bufsize) rsize);
751     }
752     return ((void *) buf);
753 }
754 
755 /*  BGETR  --  Reallocate a buffer.  This is a minimal implementation,
756                simply in terms of brel()  and  bget().   It  could  be
757                enhanced to allow the buffer to grow into adjacent free
758                blocks and to avoid moving data unnecessarily.  */
759 
760 static void *
761 bgetr(  kmp_info_t *th, void *buf, bufsize size)
762 {
763     void *nbuf;
764     bufsize osize;                    /* Old size of buffer */
765     bhead_t *b;
766 
767     nbuf = bget( th, size );
768     if ( nbuf == NULL ) { /* Acquire new buffer */
769         return NULL;
770     }
771     if ( buf == NULL ) {
772         return nbuf;
773     }
774     b = BH(((char *) buf) - sizeof(bhead_t));
775     osize = -b->bb.bsize;
776     if (osize == 0) {
777         /*  Buffer acquired directly through acqfcn. */
778         bdhead_t *bd;
779 
780         bd = BDH(((char *) buf) - sizeof(bdhead_t));
781         osize = bd->tsize - (bufsize) sizeof(bdhead_t);
782     } else {
783         osize -= sizeof(bhead_t);
784     };
785 
786     KMP_DEBUG_ASSERT(osize > 0);
787 
788     (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */
789              (size_t) ((size < osize) ? size : osize));
790     brel( th, buf );
791 
792     return nbuf;
793 }
794 
795 /*  BREL  --  Release a buffer.  */
796 
797 static void
798 brel(  kmp_info_t *th, void *buf )
799 {
800     thr_data_t *thr = get_thr_data( th );
801     bfhead_t *b, *bn;
802     kmp_info_t *bth;
803 
804     KMP_DEBUG_ASSERT(buf != NULL);
805     KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
806 
807     b = BFH(((char *) buf) - sizeof(bhead_t));
808 
809     if (b->bh.bb.bsize == 0) {        /* Directly-acquired buffer? */
810         bdhead_t *bdh;
811 
812         bdh = BDH(((char *) buf) - sizeof(bdhead_t));
813         KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
814 #if BufStats
815         thr->totalloc -= (size_t) bdh->tsize;
816         thr->numdrel++;               /* Number of direct releases */
817         thr->numrel++;                /* Increment number of brel() calls */
818 #endif /* BufStats */
819 #ifdef FreeWipe
820         (void) memset((char *) buf, 0x55,
821                  (size_t) (bdh->tsize - sizeof(bdhead_t)));
822 #endif /* FreeWipe */
823 
824         KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
825 
826         KMP_DEBUG_ASSERT( thr->relfcn != 0 );
827         (*thr->relfcn)((void *) bdh);      /* Release it directly. */
828         return;
829     }
830 
831     bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
832     if ( bth != th ) {
833         /* Add this buffer to be released by the owning thread later */
834         __kmp_bget_enqueue( bth, buf
835 #ifdef USE_QUEUING_LOCK_FOR_BGET
836                             , __kmp_gtid_from_thread( th )
837 #endif
838         );
839         return;
840     }
841 
842     /* Buffer size must be negative, indicating that the buffer is
843        allocated. */
844 
845     if (b->bh.bb.bsize >= 0) {
846         bn = NULL;
847     }
848     KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
849 
850     /*  Back pointer in next buffer must be zero, indicating the
851         same thing: */
852 
853     KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
854 
855 #if BufStats
856     thr->numrel++;                    /* Increment number of brel() calls */
857     thr->totalloc += (size_t) b->bh.bb.bsize;
858 #endif
859 
860     /* If the back link is nonzero, the previous buffer is free.  */
861 
862     if (b->bh.bb.prevfree != 0) {
863         /* The previous buffer is free.  Consolidate this buffer  with  it
864            by  adding  the  length  of  this  buffer  to the previous free
865            buffer.  Note that we subtract the size  in  the  buffer  being
866            released,  since  it's  negative to indicate that the buffer is
867            allocated. */
868 
869         register bufsize size = b->bh.bb.bsize;
870 
871         /* Make the previous buffer the one we're working on. */
872         KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
873         b = BFH(((char *) b) - b->bh.bb.prevfree);
874         b->bh.bb.bsize -= size;
875 
876         /* unlink the buffer from the old freelist */
877         __kmp_bget_remove_from_freelist( b );
878     }
879     else {
880         /* The previous buffer isn't allocated.  Mark this buffer
881            size as positive (i.e. free) and fall through to place
882            the buffer on the free list as an isolated free block. */
883 
884         b->bh.bb.bsize = -b->bh.bb.bsize;
885     }
886 
887     /* insert buffer back onto a new freelist */
888     __kmp_bget_insert_into_freelist( thr, b );
889 
890 
891     /* Now we look at the next buffer in memory, located by advancing from
892        the  start  of  this  buffer  by its size, to see if that buffer is
893        free.  If it is, we combine  this  buffer  with  the  next  one  in
894        memory, dechaining the second buffer from the free list. */
895 
896     bn =  BFH(((char *) b) + b->bh.bb.bsize);
897     if (bn->bh.bb.bsize > 0) {
898 
899         /* The buffer is free.  Remove it from the free list and add
900            its size to that of our buffer. */
901 
902         KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
903 
904         __kmp_bget_remove_from_freelist( bn );
905 
906         b->bh.bb.bsize += bn->bh.bb.bsize;
907 
908         /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
909 
910         __kmp_bget_remove_from_freelist( b );
911         __kmp_bget_insert_into_freelist( thr, b );
912 
913         /* Finally,  advance  to   the  buffer  that   follows  the  newly
914            consolidated free block.  We must set its  backpointer  to  the
915            head  of  the  consolidated free block.  We know the next block
916            must be an allocated block because the process of recombination
917            guarantees  that  two  free  blocks will never be contiguous in
918            memory.  */
919 
920         bn = BFH(((char *) b) + b->bh.bb.bsize);
921     }
922 #ifdef FreeWipe
923     (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
924             (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
925 #endif
926     KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
927 
928     /* The next buffer is allocated.  Set the backpointer in it  to  point
929        to this buffer; the previous free buffer in memory. */
930 
931     bn->bh.bb.prevfree = b->bh.bb.bsize;
932 
933     /*  If  a  block-release function is defined, and this free buffer
934         constitutes the entire block, release it.  Note that  pool_len
935         is  defined  in  such a way that the test will fail unless all
936         pool blocks are the same size.  */
937 
938     if (thr->relfcn != 0 &&
939         b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
940     {
941 #if BufStats
942         if (thr->numpblk != 1) {        /* Do not release the last buffer until finalization time */
943 #endif
944 
945             KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
946             KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
947             KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
948 
949             /*  Unlink the buffer from the free list  */
950             __kmp_bget_remove_from_freelist( b );
951 
952             KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
953 
954             (*thr->relfcn)(b);
955 #if BufStats
956             thr->numprel++;               /* Nr of expansion block releases */
957             thr->numpblk--;               /* Total number of blocks */
958             KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
959 
960             /* avoid leaving stale last_pool pointer around if it is being dealloced */
961             if (thr->last_pool == b) thr->last_pool = 0;
962         }
963         else {
964             thr->last_pool = b;
965         }
966 #endif /* BufStats */
967     }
968 }
969 
970 /*  BECTL  --  Establish automatic pool expansion control  */
971 
972 static void
973 bectl(  kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
974 {
975     thr_data_t *thr = get_thr_data( th );
976 
977     thr->compfcn = compact;
978     thr->acqfcn = acquire;
979     thr->relfcn = release;
980     thr->exp_incr = pool_incr;
981 }
982 
983 /*  BPOOL  --  Add a region of memory to the buffer pool.  */
984 
985 static void
986 bpool(  kmp_info_t *th, void *buf, bufsize len)
987 {
988 /*    int bin = 0; */
989     thr_data_t *thr = get_thr_data( th );
990     bfhead_t *b = BFH(buf);
991     bhead_t *bn;
992 
993     __kmp_bget_dequeue( th );         /* Release any queued buffers */
994 
995 #ifdef SizeQuant
996     len &= ~(SizeQuant - 1);
997 #endif
998     if (thr->pool_len == 0) {
999         thr->pool_len = len;
1000     } else if (len != thr->pool_len) {
1001         thr->pool_len = -1;
1002     }
1003 #if BufStats
1004     thr->numpget++;                   /* Number of block acquisitions */
1005     thr->numpblk++;                   /* Number of blocks total */
1006     KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1007 #endif /* BufStats */
1008 
1009     /* Since the block is initially occupied by a single free  buffer,
1010        it  had  better  not  be  (much) larger than the largest buffer
1011        whose size we can store in bhead.bb.bsize. */
1012 
1013     KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1014 
1015     /* Clear  the  backpointer at  the start of the block to indicate that
1016        there  is  no  free  block  prior  to  this   one.    That   blocks
1017        recombination when the first block in memory is released. */
1018 
1019     b->bh.bb.prevfree = 0;
1020 
1021     /* Create a dummy allocated buffer at the end of the pool.  This dummy
1022        buffer is seen when a buffer at the end of the pool is released and
1023        blocks  recombination  of  the last buffer with the dummy buffer at
1024        the end.  The length in the dummy buffer  is  set  to  the  largest
1025        negative  number  to  denote  the  end  of  the pool for diagnostic
1026        routines (this specific value is  not  counted  on  by  the  actual
1027        allocation and release functions). */
1028 
1029     len -= sizeof(bhead_t);
1030     b->bh.bb.bsize = (bufsize) len;
1031     /* Set the owner of this buffer */
1032     TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1033 
1034     /* Chain the new block to the free list. */
1035     __kmp_bget_insert_into_freelist( thr, b );
1036 
1037 #ifdef FreeWipe
1038     (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1039              (size_t) (len - sizeof(bfhead_t)));
1040 #endif
1041     bn = BH(((char *) b) + len);
1042     bn->bb.prevfree = (bufsize) len;
1043     /* Definition of ESent assumes two's complement! */
1044     KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1045 
1046     bn->bb.bsize = ESent;
1047 }
1048 
1049 /* ------------------------------------------------------------------------ */
1050 
1051 /*  BFREED  --  Dump the free lists for this thread. */
1052 
1053 static void
1054 bfreed(  kmp_info_t *th )
1055 {
1056     int bin = 0, count = 0;
1057     int gtid = __kmp_gtid_from_thread( th );
1058     thr_data_t *thr = get_thr_data( th );
1059 
1060 #if BufStats
1061     __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1062            KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1063            " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1064            gtid, (kmp_uint64) thr->totalloc,
1065            (kmp_int64) thr->numget,  (kmp_int64) thr->numrel,
1066            (kmp_int64) thr->numpblk,
1067            (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1068            (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1069 #endif
1070 
1071     for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1072         bfhead_t *b;
1073 
1074         for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1075             bufsize bs = b->bh.bb.bsize;
1076 
1077             KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1078             KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1079             KMP_DEBUG_ASSERT( bs > 0 );
1080 
1081             count += 1;
1082 
1083             __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1084 #ifdef FreeWipe
1085             {
1086                 char *lerr = ((char *) b) + sizeof(bfhead_t);
1087                 if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1088                     __kmp_printf_no_lock( "__kmp_printpool: T#%d     (Contents of above free block have been overstored.)\n", gtid );
1089                 }
1090             }
1091 #endif
1092         }
1093     }
1094 
1095     if (count == 0)
1096         __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1097 }
1098 
1099 /* ------------------------------------------------------------------------ */
1100 
1101 #ifdef KMP_DEBUG
1102 
1103 #if BufStats
1104 
1105 /*  BSTATS  --  Return buffer allocation free space statistics.  */
1106 
1107 static void
1108 bstats(  kmp_info_t *th, bufsize *curalloc,  bufsize *totfree,  bufsize *maxfree, long *nget, long *nrel)
1109 {
1110     int bin = 0;
1111     thr_data_t *thr = get_thr_data( th );
1112 
1113     *nget = thr->numget;
1114     *nrel = thr->numrel;
1115     *curalloc = (bufsize) thr->totalloc;
1116     *totfree = 0;
1117     *maxfree = -1;
1118 
1119     for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1120         bfhead_t *b = thr->freelist[ bin ].ql.flink;
1121 
1122         while (b != &thr->freelist[ bin ]) {
1123             KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1124             *totfree += b->bh.bb.bsize;
1125             if (b->bh.bb.bsize > *maxfree) {
1126                 *maxfree = b->bh.bb.bsize;
1127             }
1128             b = b->ql.flink;              /* Link to next buffer */
1129         }
1130     }
1131 }
1132 
1133 /*  BSTATSE  --  Return extended statistics  */
1134 
1135 static void
1136 bstatse(  kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1137 {
1138     thr_data_t *thr = get_thr_data( th );
1139 
1140     *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1141     *npool = thr->numpblk;
1142     *npget = thr->numpget;
1143     *nprel = thr->numprel;
1144     *ndget = thr->numdget;
1145     *ndrel = thr->numdrel;
1146 }
1147 
1148 #endif /* BufStats */
1149 
1150 /*  BUFDUMP  --  Dump the data in a buffer.  This is called with the  user
1151                  data pointer, and backs up to the buffer header.  It will
1152                  dump either a free block or an allocated one.  */
1153 
1154 static void
1155 bufdump(  kmp_info_t *th, void *buf )
1156 {
1157     bfhead_t *b;
1158     unsigned char *bdump;
1159     bufsize bdlen;
1160 
1161     b = BFH(((char *) buf) - sizeof(bhead_t));
1162     KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1163     if (b->bh.bb.bsize < 0) {
1164         bdump = (unsigned char *) buf;
1165         bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1166     } else {
1167         bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1168         bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1169     }
1170 
1171     while (bdlen > 0) {
1172         int i, dupes = 0;
1173         bufsize l = bdlen;
1174         char bhex[50], bascii[20];
1175 
1176         if (l > 16) {
1177             l = 16;
1178         }
1179 
1180         for (i = 0; i < l; i++) {
1181             (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
1182             if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1183                 bascii[ i ] = bdump[ i ];
1184             else
1185                 bascii[ i ] = ' ';
1186         }
1187         bascii[i] = 0;
1188         (void) __kmp_printf_no_lock("%-48s   %s\n", bhex, bascii);
1189         bdump += l;
1190         bdlen -= l;
1191         while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1192                                        (char *) bdump, 16) == 0)) {
1193             dupes++;
1194             bdump += 16;
1195             bdlen -= 16;
1196         }
1197         if (dupes > 1) {
1198             (void) __kmp_printf_no_lock(
1199                 "     (%d lines [%d bytes] identical to above line skipped)\n",
1200                 dupes, dupes * 16);
1201         } else if (dupes == 1) {
1202             bdump -= 16;
1203             bdlen += 16;
1204         }
1205     }
1206 }
1207 
1208 /*  BPOOLD  --  Dump a buffer pool.  The buffer headers are always listed.
1209                 If DUMPALLOC is nonzero, the contents of allocated buffers
1210                 are  dumped.   If  DUMPFREE  is  nonzero,  free blocks are
1211                 dumped as well.  If FreeWipe  checking  is  enabled,  free
1212                 blocks  which  have  been clobbered will always be dumped. */
1213 
1214 static void
1215 bpoold(  kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1216 {
1217     bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1218 
1219     while (b->bh.bb.bsize != ESent) {
1220         bufsize bs = b->bh.bb.bsize;
1221 
1222         if (bs < 0) {
1223             bs = -bs;
1224             (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1225             if (dumpalloc) {
1226                 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1227             }
1228         } else {
1229             const char *lerr = "";
1230 
1231             KMP_DEBUG_ASSERT(bs > 0);
1232             if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1233                 lerr = "  (Bad free list links)";
1234             }
1235             (void) __kmp_printf_no_lock("Free block:       size %6ld bytes.%s\n",
1236                 (long) bs, lerr);
1237 #ifdef FreeWipe
1238             lerr = ((char *) b) + sizeof(bfhead_t);
1239             if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1240                 (memcmp(lerr, lerr + 1,
1241                   (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1242                 (void) __kmp_printf_no_lock(
1243                     "(Contents of above free block have been overstored.)\n");
1244                 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1245             } else
1246 #endif
1247             if (dumpfree) {
1248                 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1249             }
1250         }
1251         b = BFH(((char *) b) + bs);
1252     }
1253 }
1254 
1255 /*  BPOOLV  --  Validate a buffer pool. */
1256 
1257 static int
1258 bpoolv(  kmp_info_t *th, void *buf )
1259 {
1260     bfhead_t *b = BFH(buf);
1261 
1262     while (b->bh.bb.bsize != ESent) {
1263         bufsize bs = b->bh.bb.bsize;
1264 
1265         if (bs < 0) {
1266             bs = -bs;
1267         } else {
1268 #ifdef FreeWipe
1269             char *lerr = "";
1270 #endif
1271 
1272             KMP_DEBUG_ASSERT(bs > 0);
1273             if (bs <= 0) {
1274                 return 0;
1275             }
1276             if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1277                 (void) __kmp_printf_no_lock("Free block: size %6ld bytes.  (Bad free list links)\n",
1278                      (long) bs);
1279                 KMP_DEBUG_ASSERT(0);
1280                 return 0;
1281             }
1282 #ifdef FreeWipe
1283             lerr = ((char *) b) + sizeof(bfhead_t);
1284             if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1285                 (memcmp(lerr, lerr + 1,
1286                   (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1287                 (void) __kmp_printf_no_lock(
1288                     "(Contents of above free block have been overstored.)\n");
1289                 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1290                 KMP_DEBUG_ASSERT(0);
1291                 return 0;
1292             }
1293 #endif /* FreeWipe */
1294         }
1295         b = BFH(((char *) b) + bs);
1296     }
1297     return 1;
1298 }
1299 
1300 #endif /* KMP_DEBUG */
1301 
1302 /* ------------------------------------------------------------------------ */
1303 
1304 void
1305 __kmp_initialize_bget( kmp_info_t *th )
1306 {
1307     KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1308 
1309     set_thr_data( th );
1310 
1311     bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1312            (bufsize) __kmp_malloc_pool_incr );
1313 }
1314 
1315 void
1316 __kmp_finalize_bget( kmp_info_t *th )
1317 {
1318     thr_data_t *thr;
1319     bfhead_t *b;
1320 
1321     KMP_DEBUG_ASSERT( th != 0 );
1322 
1323 #if BufStats
1324     thr = (thr_data_t *) th->th.th_local.bget_data;
1325     KMP_DEBUG_ASSERT( thr != NULL );
1326     b = thr->last_pool;
1327 
1328     /*  If  a  block-release function is defined, and this free buffer
1329         constitutes the entire block, release it.  Note that  pool_len
1330         is  defined  in  such a way that the test will fail unless all
1331         pool blocks are the same size.  */
1332 
1333     /* Deallocate the last pool if one exists because we no longer do it in brel() */
1334     if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1335         b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1336     {
1337         KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1338         KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1339         KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1340 
1341         /*  Unlink the buffer from the free list  */
1342         __kmp_bget_remove_from_freelist( b );
1343 
1344         KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1345 
1346         (*thr->relfcn)(b);
1347         thr->numprel++;               /* Nr of expansion block releases */
1348         thr->numpblk--;               /* Total number of blocks */
1349         KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1350     }
1351 #endif /* BufStats */
1352 
1353     /* Deallocate bget_data */
1354     if ( th->th.th_local.bget_data != NULL ) {
1355         __kmp_free( th->th.th_local.bget_data );
1356         th->th.th_local.bget_data = NULL;
1357     }; // if
1358 }
1359 
1360 void
1361 kmpc_set_poolsize( size_t size )
1362 {
1363     bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1364            (bget_release_t) free, (bufsize) size );
1365 }
1366 
1367 size_t
1368 kmpc_get_poolsize( void )
1369 {
1370     thr_data_t *p;
1371 
1372     p = get_thr_data( __kmp_get_thread() );
1373 
1374     return p->exp_incr;
1375 }
1376 
1377 void
1378 kmpc_set_poolmode( int mode )
1379 {
1380     thr_data_t *p;
1381 
1382     if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1383         p = get_thr_data( __kmp_get_thread() );
1384         p->mode = (bget_mode_t) mode;
1385     }
1386 }
1387 
1388 int
1389 kmpc_get_poolmode( void )
1390 {
1391     thr_data_t *p;
1392 
1393     p = get_thr_data( __kmp_get_thread() );
1394 
1395     return p->mode;
1396 }
1397 
1398 void
1399 kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1400 {
1401     kmp_info_t *th = __kmp_get_thread();
1402     bufsize a, b;
1403 
1404     __kmp_bget_dequeue( th );         /* Release any queued buffers */
1405 
1406     bcheck( th, &a, &b );
1407 
1408     *maxmem = a;
1409     *allmem = b;
1410 }
1411 
1412 void
1413 kmpc_poolprint( void )
1414 {
1415     kmp_info_t *th = __kmp_get_thread();
1416 
1417     __kmp_bget_dequeue( th );         /* Release any queued buffers */
1418 
1419     bfreed( th );
1420 }
1421 
1422 #endif // #if KMP_USE_BGET
1423 
1424 /* ------------------------------------------------------------------------ */
1425 
1426 void *
1427 kmpc_malloc( size_t size )
1428 {
1429     void * ptr;
1430     ptr = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) );
1431     if( ptr != NULL ) {
1432         // save allocated pointer just before one returned to user
1433         *(void**)ptr = ptr;
1434         ptr = (void**)ptr + 1;
1435     }
1436     return ptr;
1437 }
1438 
1439 #define IS_POWER_OF_TWO(n) (((n)&((n)-1))==0)
1440 
1441 void *
1442 kmpc_aligned_malloc( size_t size, size_t alignment )
1443 {
1444     void * ptr;
1445     void * ptr_allocated;
1446     KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too big
1447     if( !IS_POWER_OF_TWO(alignment) ) {
1448         // AC: do we need to issue a warning here?
1449         errno = EINVAL;
1450         return NULL;
1451     }
1452     size = size + sizeof( void* ) + alignment;
1453     ptr_allocated = bget( __kmp_entry_thread(), (bufsize)size );
1454     if( ptr_allocated != NULL ) {
1455         // save allocated pointer just before one returned to user
1456         ptr = (void*)(((kmp_uintptr_t)ptr_allocated + sizeof( void* ) + alignment) & ~(alignment - 1));
1457         *((void**)ptr - 1) = ptr_allocated;
1458     } else {
1459         ptr = NULL;
1460     }
1461     return ptr;
1462 }
1463 
1464 void *
1465 kmpc_calloc( size_t nelem, size_t elsize )
1466 {
1467     void * ptr;
1468     ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize + sizeof(ptr)) );
1469     if( ptr != NULL ) {
1470         // save allocated pointer just before one returned to user
1471         *(void**)ptr = ptr;
1472         ptr = (void**)ptr + 1;
1473     }
1474     return ptr;
1475 }
1476 
1477 void *
1478 kmpc_realloc( void * ptr, size_t size )
1479 {
1480     void * result = NULL;
1481     if ( ptr == NULL ) {
1482         // If pointer is NULL, realloc behaves like malloc.
1483         result = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) );
1484         // save allocated pointer just before one returned to user
1485         if( result != NULL ) {
1486             *(void**)result = result;
1487             result = (void**)result + 1;
1488         }
1489     } else if ( size == 0 ) {
1490         // If size is 0, realloc behaves like free.
1491         // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1492         // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1493         KMP_ASSERT(*((void**)ptr - 1));
1494         brel( __kmp_get_thread(), *((void**)ptr - 1) );
1495     } else {
1496         result = bgetr( __kmp_entry_thread(), *((void**)ptr - 1), (bufsize)(size + sizeof(ptr)) );
1497         if( result != NULL ) {
1498             *(void**)result = result;
1499             result = (void**)result + 1;
1500         }
1501     }; // if
1502     return result;
1503 }
1504 
1505 /* NOTE: the library must have already been initialized by a previous allocate */
1506 
1507 void
1508 kmpc_free( void * ptr )
1509 {
1510     if ( ! __kmp_init_serial ) {
1511         return;
1512     }; // if
1513     if ( ptr != NULL ) {
1514         kmp_info_t *th = __kmp_get_thread();
1515         __kmp_bget_dequeue( th );         /* Release any queued buffers */
1516         // extract allocated pointer and free it
1517         KMP_ASSERT(*((void**)ptr - 1));
1518         brel( th, *((void**)ptr - 1) );
1519     };
1520 }
1521 
1522 
1523 /* ------------------------------------------------------------------------ */
1524 
1525 void *
1526 ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1527 {
1528     void * ptr;
1529     KE_TRACE( 30, (
1530         "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1531         th,
1532         (int) size
1533         KMP_SRC_LOC_PARM
1534     ) );
1535     ptr = bget( th, (bufsize) size );
1536     KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1537     return ptr;
1538 }
1539 
1540 void *
1541 ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1542 {
1543     void * ptr;
1544     KE_TRACE( 30, (
1545         "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1546         th,
1547         (int) nelem,
1548         (int) elsize
1549         KMP_SRC_LOC_PARM
1550     ) );
1551     ptr = bgetz( th, (bufsize) (nelem * elsize) );
1552     KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1553     return ptr;
1554 }
1555 
1556 void *
1557 ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1558 {
1559     KE_TRACE( 30, (
1560         "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1561         th,
1562         ptr,
1563         (int) size
1564         KMP_SRC_LOC_PARM
1565     ) );
1566     ptr = bgetr( th, ptr, (bufsize) size );
1567     KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1568     return ptr;
1569 }
1570 
1571 void
1572 ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1573 {
1574     KE_TRACE( 30, (
1575         "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1576         th,
1577         ptr
1578         KMP_SRC_LOC_PARM
1579     ) );
1580     if ( ptr != NULL ) {
1581         __kmp_bget_dequeue( th );         /* Release any queued buffers */
1582         brel( th, ptr );
1583     }
1584     KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1585 }
1586 
1587 /* ------------------------------------------------------------------------ */
1588 /* ------------------------------------------------------------------------ */
1589 /*
1590     If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1591     may be useful for debugging memory corruptions, used freed pointers, etc.
1592 */
1593 /* #define LEAK_MEMORY */
1594 
1595 struct kmp_mem_descr {      // Memory block descriptor.
1596     void * ptr_allocated;   // Pointer returned by malloc(), subject for free().
1597     size_t size_allocated;  // Size of allocated memory block.
1598     void * ptr_aligned;     // Pointer to aligned memory, to be used by client code.
1599     size_t size_aligned;    // Size of aligned memory block.
1600 };
1601 typedef struct kmp_mem_descr kmp_mem_descr_t;
1602 
1603 /*
1604     Allocate memory on requested boundary, fill allocated memory with 0x00.
1605     NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1606     Must use __kmp_free when freeing memory allocated by this routine!
1607  */
1608 static
1609 void *
1610 ___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1611 {
1612     /*
1613             __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1614         return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1615         block is saved in descriptor just before the aligned pointer. This information used by
1616         __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1617 
1618             +---------+------------+-----------------------------------+---------+
1619             | padding | descriptor |           aligned block           | padding |
1620             +---------+------------+-----------------------------------+---------+
1621             ^                      ^
1622             |                      |
1623             |                      +- Aligned pointer returned to caller
1624             +- Pointer returned by malloc()
1625 
1626         Aligned block is filled with zeros, paddings are filled with 0xEF.
1627     */
1628 
1629     kmp_mem_descr_t  descr;
1630     kmp_uintptr_t    addr_allocated;        // Address returned by malloc().
1631     kmp_uintptr_t    addr_aligned;          // Aligned address to return to caller.
1632     kmp_uintptr_t    addr_descr;            // Address of memory block descriptor.
1633 
1634     KE_TRACE( 25, (
1635         "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1636         (int) size,
1637         (int) alignment
1638         KMP_SRC_LOC_PARM
1639     ) );
1640 
1641     KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1642     KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1643         // Make sure kmp_uintptr_t is enough to store addresses.
1644 
1645     descr.size_aligned = size;
1646     descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1647 
1648 #if KMP_DEBUG
1649     descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1650 #else
1651     descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1652 #endif
1653     KE_TRACE( 10, (
1654         "   malloc( %d ) returned %p\n",
1655         (int) descr.size_allocated,
1656         descr.ptr_allocated
1657     ) );
1658     if ( descr.ptr_allocated == NULL ) {
1659         KMP_FATAL( OutOfHeapMemory );
1660     };
1661 
1662     addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1663     addr_aligned =
1664         ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1665         & ~ ( alignment - 1 );
1666     addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1667 
1668     descr.ptr_aligned = (void *) addr_aligned;
1669 
1670     KE_TRACE( 26, (
1671         "   ___kmp_allocate_align: "
1672             "ptr_allocated=%p, size_allocated=%d, "
1673             "ptr_aligned=%p, size_aligned=%d\n",
1674         descr.ptr_allocated,
1675         (int) descr.size_allocated,
1676         descr.ptr_aligned,
1677         (int) descr.size_aligned
1678     ) );
1679 
1680     KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1681     KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1682     KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1683     KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1684 #ifdef KMP_DEBUG
1685     memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1686         // Fill allocated memory block with 0xEF.
1687 #endif
1688     memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1689         // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1690         // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1691         // bytes remain filled with 0xEF in debugging library.)
1692     * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1693 
1694     KMP_MB();
1695 
1696     KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1697     return descr.ptr_aligned;
1698 } // func ___kmp_allocate_align
1699 
1700 
1701 /*
1702     Allocate memory on cache line boundary, fill allocated memory with 0x00.
1703     Do not call this func directly! Use __kmp_allocate macro instead.
1704     NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1705     Must use __kmp_free when freeing memory allocated by this routine!
1706  */
1707 void *
1708 ___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1709 {
1710     void * ptr;
1711     KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1712     ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1713     KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1714     return ptr;
1715 } // func ___kmp_allocate
1716 
1717 #if (BUILD_MEMORY==FIRST_TOUCH)
1718 void *
1719 __kmp_ft_page_allocate(size_t size)
1720 {
1721   void *adr, *aadr;
1722 
1723   const int page_size = KMP_GET_PAGE_SIZE();
1724 
1725   adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1726                                     size + page_size + KMP_PTR_SKIP);
1727   if ( adr == 0 )
1728     KMP_FATAL( OutOfHeapMemory );
1729 
1730   /* check to see if adr is on a page boundary. */
1731   if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1732     /* nothing to do if adr is already on a page boundary. */
1733     aadr = adr;
1734   else
1735     /* else set aadr to the first page boundary in the allocated memory. */
1736     aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1737 
1738   /* the first touch by the owner thread. */
1739   *((void**)aadr) = adr;
1740 
1741   /* skip the memory space used for storing adr above. */
1742   return (void*)((char*)aadr + KMP_PTR_SKIP);
1743 }
1744 #endif
1745 
1746 /*
1747     Allocate memory on page boundary, fill allocated memory with 0x00.
1748     Does not call this func directly! Use __kmp_page_allocate macro instead.
1749     NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1750     Must use __kmp_free when freeing memory allocated by this routine!
1751  */
1752 void *
1753 ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1754 {
1755     int    page_size = 8 * 1024;
1756     void * ptr;
1757 
1758     KE_TRACE( 25, (
1759         "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1760         (int) size
1761         KMP_SRC_LOC_PARM
1762     ) );
1763     ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1764     KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1765     return ptr;
1766 } // ___kmp_page_allocate
1767 
1768 /*
1769     Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1770     In debug mode, fill the memory block with 0xEF before call to free().
1771 */
1772 void
1773 ___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1774 {
1775     kmp_mem_descr_t descr;
1776     kmp_uintptr_t   addr_allocated;        // Address returned by malloc().
1777     kmp_uintptr_t   addr_aligned;          // Aligned address passed by caller.
1778 
1779     KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1780     KMP_ASSERT( ptr != NULL );
1781 
1782     descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1783 
1784     KE_TRACE( 26, ( "   __kmp_free:     "
1785                     "ptr_allocated=%p, size_allocated=%d, "
1786                     "ptr_aligned=%p, size_aligned=%d\n",
1787                     descr.ptr_allocated, (int) descr.size_allocated,
1788                     descr.ptr_aligned, (int) descr.size_aligned ));
1789 
1790     addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1791     addr_aligned   = (kmp_uintptr_t) descr.ptr_aligned;
1792 
1793     KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1794     KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1795     KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1796     KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1797     KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1798 
1799     #ifdef KMP_DEBUG
1800         memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1801             // Fill memory block with 0xEF, it helps catch using freed memory.
1802     #endif
1803 
1804     #ifndef LEAK_MEMORY
1805         KE_TRACE( 10, ( "   free( %p )\n", descr.ptr_allocated ) );
1806     # ifdef KMP_DEBUG
1807         _free_src_loc( descr.ptr_allocated, _file_, _line_ );
1808     # else
1809         free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
1810     # endif
1811     #endif
1812     KMP_MB();
1813     KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1814 } // func ___kmp_free
1815 
1816 /* ------------------------------------------------------------------------ */
1817 /* ------------------------------------------------------------------------ */
1818 
1819 #if USE_FAST_MEMORY == 3
1820 // Allocate fast memory by first scanning the thread's free lists
1821 // If a chunk the right size exists, grab it off the free list.
1822 // Otherwise allocate normally using kmp_thread_malloc.
1823 
1824 // AC: How to choose the limit? Just get 16 for now...
1825 #define KMP_FREE_LIST_LIMIT 16
1826 
1827 // Always use 128 bytes for determining buckets for caching memory blocks
1828 #define DCACHE_LINE  128
1829 
1830 void *
1831 ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1832 {
1833     void            * ptr;
1834     int               num_lines;
1835     int               idx;
1836     int               index;
1837     void            * alloc_ptr;
1838     size_t            alloc_size;
1839     kmp_mem_descr_t * descr;
1840 
1841     KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1842       __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1843 
1844     num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1845     idx = num_lines - 1;
1846     KMP_DEBUG_ASSERT( idx >= 0 );
1847     if ( idx < 2 ) {
1848         index = 0;       // idx is [ 0, 1 ], use first free list
1849         num_lines = 2;   // 1, 2 cache lines or less than cache line
1850     } else if ( ( idx >>= 2 ) == 0 ) {
1851         index = 1;       // idx is [ 2, 3 ], use second free list
1852         num_lines = 4;   // 3, 4 cache lines
1853     } else if ( ( idx >>= 2 ) == 0 ) {
1854         index = 2;       // idx is [ 4, 15 ], use third free list
1855         num_lines = 16;  // 5, 6, ..., 16 cache lines
1856     } else if ( ( idx >>= 2 ) == 0 ) {
1857         index = 3;       // idx is [ 16, 63 ], use fourth free list
1858         num_lines = 64;  // 17, 18, ..., 64 cache lines
1859     } else {
1860         goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1861     }
1862 
1863     ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1864     if ( ptr != NULL ) {
1865         // pop the head of no-sync free list
1866         this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1867         KMP_DEBUG_ASSERT( this_thr ==
1868             ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1869         goto end;
1870     };
1871     ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1872     if ( ptr != NULL ) {
1873         // no-sync free list is empty, use sync free list (filled in by other threads only)
1874         // pop the head of the sync free list, push NULL instead
1875         while ( ! KMP_COMPARE_AND_STORE_PTR(
1876             &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1877         {
1878             KMP_CPU_PAUSE();
1879             ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1880         }
1881         // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1882         this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1883         KMP_DEBUG_ASSERT( this_thr ==
1884             ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1885         goto end;
1886     }
1887 
1888     alloc_call:
1889     // haven't found block in the free lists, thus allocate it
1890     size = num_lines * DCACHE_LINE;
1891 
1892     alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1893     KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1894                    __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1895     alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1896 
1897     // align ptr to DCACHE_LINE
1898     ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1899     descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1900 
1901     descr->ptr_allocated = alloc_ptr;        // remember allocated pointer
1902     // we don't need size_allocated
1903     descr->ptr_aligned   = (void *)this_thr; // remember allocating thread
1904                                              // (it is already saved in bget buffer,
1905                                              // but we may want to use another allocator in future)
1906     descr->size_aligned  = size;
1907 
1908     end:
1909     KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1910                     __kmp_gtid_from_thread( this_thr ), ptr ) );
1911     return ptr;
1912 } // func __kmp_fast_allocate
1913 
1914 // Free fast memory and place it on the thread's free list if it is of
1915 // the correct size.
1916 void
1917 ___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1918 {
1919     kmp_mem_descr_t * descr;
1920     kmp_info_t      * alloc_thr;
1921     size_t            size;
1922     size_t            idx;
1923     int               index;
1924 
1925     KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1926       __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1927     KMP_ASSERT( ptr != NULL );
1928 
1929     descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1930 
1931     KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n",
1932                   (int) descr->size_aligned ) );
1933 
1934     size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1935 
1936     idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1937     if ( idx == size ) {
1938         index = 0;       // 2 cache lines
1939     } else if ( ( idx <<= 1 ) == size ) {
1940         index = 1;       // 4 cache lines
1941     } else if ( ( idx <<= 2 ) == size ) {
1942         index = 2;       // 16 cache lines
1943     } else if ( ( idx <<= 2 ) == size ) {
1944         index = 3;       // 64 cache lines
1945     } else {
1946         KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1947         goto free_call;  // 65 or more cache lines ( > 8KB )
1948     }
1949 
1950     alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1951     if ( alloc_thr == this_thr ) {
1952         // push block to self no-sync free list, linking previous head (LIFO)
1953         *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1954         this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1955     } else {
1956         void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1957         if ( head == NULL ) {
1958             // Create new free list
1959             this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1960             *((void **)ptr) = NULL;             // mark the tail of the list
1961             descr->size_allocated = (size_t)1;  // head of the list keeps its length
1962         } else {
1963             // need to check existed "other" list's owner thread and size of queue
1964             kmp_mem_descr_t * dsc  = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1965             kmp_info_t      * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1966             size_t            q_sz = dsc->size_allocated + 1;          // new size in case we add current task
1967             if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
1968                 // we can add current task to "other" list, no sync needed
1969                 *((void **)ptr) = head;
1970                 descr->size_allocated = q_sz;
1971                 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1972             } else {
1973                 // either queue blocks owner is changing or size limit exceeded
1974                 // return old queue to allocating thread (q_th) synchroneously,
1975                 // and start new list for alloc_thr's tasks
1976                 void * old_ptr;
1977                 void * tail = head;
1978                 void * next = *((void **)head);
1979                 while ( next != NULL ) {
1980                     KMP_DEBUG_ASSERT(
1981                         // queue size should decrease by 1 each step through the list
1982                         ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1983                         ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1984                     tail = next;   // remember tail node
1985                     next = *((void **)next);
1986                 }
1987                 KMP_DEBUG_ASSERT( q_th != NULL );
1988                 // push block to owner's sync free list
1989                 old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1990                 /* the next pointer must be set before setting free_list to ptr to avoid
1991                    exposing a broken list to other threads, even for an instant. */
1992                 *((void **)tail) = old_ptr;
1993 
1994                 while ( ! KMP_COMPARE_AND_STORE_PTR(
1995                     &q_th->th.th_free_lists[index].th_free_list_sync,
1996                     old_ptr,
1997                     head ) )
1998                 {
1999                     KMP_CPU_PAUSE();
2000                     old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
2001                     *((void **)tail) = old_ptr;
2002                 }
2003 
2004                 // start new list of not-selt tasks
2005                 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2006                 *((void **)ptr) = NULL;
2007                 descr->size_allocated = (size_t)1;  // head of queue keeps its length
2008             }
2009         }
2010     }
2011     goto end;
2012 
2013     free_call:
2014     KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2015                    __kmp_gtid_from_thread( this_thr), size ) );
2016     __kmp_bget_dequeue( this_thr );         /* Release any queued buffers */
2017     brel( this_thr, descr->ptr_allocated );
2018 
2019     end:
2020     KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
2021 
2022 } // func __kmp_fast_free
2023 
2024 
2025 // Initialize the thread free lists related to fast memory
2026 // Only do this when a thread is initially created.
2027 void
2028 __kmp_initialize_fast_memory( kmp_info_t *this_thr )
2029 {
2030     KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
2031 
2032     memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2033 }
2034 
2035 // Free the memory in the thread free lists related to fast memory
2036 // Only do this when a thread is being reaped (destroyed).
2037 void
2038 __kmp_free_fast_memory( kmp_info_t *th )
2039 {
2040     // Suppose we use BGET underlying allocator, walk through its structures...
2041     int          bin;
2042     thr_data_t * thr = get_thr_data( th );
2043     void      ** lst = NULL;
2044 
2045     KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2046                    __kmp_gtid_from_thread( th ) ) );
2047 
2048     __kmp_bget_dequeue( th );         // Release any queued buffers
2049 
2050     // Dig through free lists and extract all allocated blocks
2051     for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2052         bfhead_t * b = thr->freelist[ bin ].ql.flink;
2053         while ( b != &thr->freelist[ bin ] ) {
2054             if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) {   // if the buffer is an allocated address?
2055                 *((void**)b) = lst;   // link the list (override bthr, but keep flink yet)
2056                 lst = (void**)b;      // push b into lst
2057             }
2058             b = b->ql.flink;          // get next buffer
2059         }
2060     }
2061     while ( lst != NULL ) {
2062         void * next = *lst;
2063         KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2064                       lst, next, th, __kmp_gtid_from_thread( th ) ) );
2065         (*thr->relfcn)(lst);
2066         #if BufStats
2067             // count blocks to prevent problems in __kmp_finalize_bget()
2068             thr->numprel++;       /* Nr of expansion block releases */
2069             thr->numpblk--;       /* Total number of blocks */
2070         #endif
2071         lst = (void**)next;
2072     }
2073 
2074     KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2075                   __kmp_gtid_from_thread( th ) ) );
2076 }
2077 
2078 #endif // USE_FAST_MEMORY
2079