1 /* 2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "kmp.h" 15 #include "kmp_io.h" 16 #include "kmp_wrapper_malloc.h" 17 18 // Disable bget when it is not used 19 #if KMP_USE_BGET 20 21 /* Thread private buffer management code */ 22 23 typedef int (*bget_compact_t)(size_t, int); 24 typedef void *(*bget_acquire_t)(size_t); 25 typedef void (*bget_release_t)(void *); 26 27 /* NOTE: bufsize must be a signed datatype */ 28 29 #if KMP_OS_WINDOWS 30 #if KMP_ARCH_X86 || KMP_ARCH_ARM 31 typedef kmp_int32 bufsize; 32 #else 33 typedef kmp_int64 bufsize; 34 #endif 35 #else 36 typedef ssize_t bufsize; 37 #endif 38 39 /* The three modes of operation are, fifo search, lifo search, and best-fit */ 40 41 typedef enum bget_mode { 42 bget_mode_fifo = 0, 43 bget_mode_lifo = 1, 44 bget_mode_best = 2 45 } bget_mode_t; 46 47 static void bpool(kmp_info_t *th, void *buffer, bufsize len); 48 static void *bget(kmp_info_t *th, bufsize size); 49 static void *bgetz(kmp_info_t *th, bufsize size); 50 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize); 51 static void brel(kmp_info_t *th, void *buf); 52 static void bectl(kmp_info_t *th, bget_compact_t compact, 53 bget_acquire_t acquire, bget_release_t release, 54 bufsize pool_incr); 55 56 /* BGET CONFIGURATION */ 57 /* Buffer allocation size quantum: all buffers allocated are a 58 multiple of this size. This MUST be a power of two. */ 59 60 /* On IA-32 architecture with Linux* OS, malloc() does not 61 ensure 16 byte alignmnent */ 62 63 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD 64 65 #define SizeQuant 8 66 #define AlignType double 67 68 #else 69 70 #define SizeQuant 16 71 #define AlignType _Quad 72 73 #endif 74 75 // Define this symbol to enable the bstats() function which calculates the 76 // total free space in the buffer pool, the largest available buffer, and the 77 // total space currently allocated. 78 #define BufStats 1 79 80 #ifdef KMP_DEBUG 81 82 // Define this symbol to enable the bpoold() function which dumps the buffers 83 // in a buffer pool. 84 #define BufDump 1 85 86 // Define this symbol to enable the bpoolv() function for validating a buffer 87 // pool. 88 #define BufValid 1 89 90 // Define this symbol to enable the bufdump() function which allows dumping the 91 // contents of an allocated or free buffer. 92 #define DumpData 1 93 94 #ifdef NOT_USED_NOW 95 96 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants 97 // who attempt to use pointers into released buffers. 98 #define FreeWipe 1 99 100 // Use a best fit algorithm when searching for space for an allocation request. 101 // This uses memory more efficiently, but allocation will be much slower. 102 #define BestFit 1 103 104 #endif /* NOT_USED_NOW */ 105 #endif /* KMP_DEBUG */ 106 107 static bufsize bget_bin_size[] = { 108 0, 109 // 1 << 6, /* .5 Cache line */ 110 1 << 7, /* 1 Cache line, new */ 111 1 << 8, /* 2 Cache lines */ 112 1 << 9, /* 4 Cache lines, new */ 113 1 << 10, /* 8 Cache lines */ 114 1 << 11, /* 16 Cache lines, new */ 115 1 << 12, 1 << 13, /* new */ 116 1 << 14, 1 << 15, /* new */ 117 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */ 118 1 << 21, /* 2MB */ 119 1 << 22, /* 4MB */ 120 1 << 23, /* 8MB */ 121 1 << 24, /* 16MB */ 122 1 << 25, /* 32MB */ 123 }; 124 125 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) 126 127 struct bfhead; 128 129 // Declare the interface, including the requested buffer size type, bufsize. 130 131 /* Queue links */ 132 typedef struct qlinks { 133 struct bfhead *flink; /* Forward link */ 134 struct bfhead *blink; /* Backward link */ 135 } qlinks_t; 136 137 /* Header in allocated and free buffers */ 138 typedef struct bhead2 { 139 kmp_info_t *bthr; /* The thread which owns the buffer pool */ 140 bufsize prevfree; /* Relative link back to previous free buffer in memory or 141 0 if previous buffer is allocated. */ 142 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */ 143 } bhead2_t; 144 145 /* Make sure the bhead structure is a multiple of SizeQuant in size. */ 146 typedef union bhead { 147 KMP_ALIGN(SizeQuant) 148 AlignType b_align; 149 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))]; 150 bhead2_t bb; 151 } bhead_t; 152 #define BH(p) ((bhead_t *)(p)) 153 154 /* Header in directly allocated buffers (by acqfcn) */ 155 typedef struct bdhead { 156 bufsize tsize; /* Total size, including overhead */ 157 bhead_t bh; /* Common header */ 158 } bdhead_t; 159 #define BDH(p) ((bdhead_t *)(p)) 160 161 /* Header in free buffers */ 162 typedef struct bfhead { 163 bhead_t bh; /* Common allocated/free header */ 164 qlinks_t ql; /* Links on free list */ 165 } bfhead_t; 166 #define BFH(p) ((bfhead_t *)(p)) 167 168 typedef struct thr_data { 169 bfhead_t freelist[MAX_BGET_BINS]; 170 #if BufStats 171 size_t totalloc; /* Total space currently allocated */ 172 long numget, numrel; /* Number of bget() and brel() calls */ 173 long numpblk; /* Number of pool blocks */ 174 long numpget, numprel; /* Number of block gets and rels */ 175 long numdget, numdrel; /* Number of direct gets and rels */ 176 #endif /* BufStats */ 177 178 /* Automatic expansion block management functions */ 179 bget_compact_t compfcn; 180 bget_acquire_t acqfcn; 181 bget_release_t relfcn; 182 183 bget_mode_t mode; /* what allocation mode to use? */ 184 185 bufsize exp_incr; /* Expansion block size */ 186 bufsize pool_len; /* 0: no bpool calls have been made 187 -1: not all pool blocks are the same size 188 >0: (common) block size for all bpool calls made so far 189 */ 190 bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */ 191 } thr_data_t; 192 193 /* Minimum allocation quantum: */ 194 #define QLSize (sizeof(qlinks_t)) 195 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) 196 #define MaxSize \ 197 (bufsize)( \ 198 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) 199 // Maximun for the requested size. 200 201 /* End sentinel: value placed in bsize field of dummy block delimiting 202 end of pool block. The most negative number which will fit in a 203 bufsize, defined in a way that the compiler will accept. */ 204 205 #define ESent \ 206 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) 207 208 /* Thread Data management routines */ 209 static int bget_get_bin(bufsize size) { 210 // binary chop bins 211 int lo = 0, hi = MAX_BGET_BINS - 1; 212 213 KMP_DEBUG_ASSERT(size > 0); 214 215 while ((hi - lo) > 1) { 216 int mid = (lo + hi) >> 1; 217 if (size < bget_bin_size[mid]) 218 hi = mid - 1; 219 else 220 lo = mid; 221 } 222 223 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS)); 224 225 return lo; 226 } 227 228 static void set_thr_data(kmp_info_t *th) { 229 int i; 230 thr_data_t *data; 231 232 data = (thr_data_t *)((!th->th.th_local.bget_data) 233 ? __kmp_allocate(sizeof(*data)) 234 : th->th.th_local.bget_data); 235 236 memset(data, '\0', sizeof(*data)); 237 238 for (i = 0; i < MAX_BGET_BINS; ++i) { 239 data->freelist[i].ql.flink = &data->freelist[i]; 240 data->freelist[i].ql.blink = &data->freelist[i]; 241 } 242 243 th->th.th_local.bget_data = data; 244 th->th.th_local.bget_list = 0; 245 #if !USE_CMP_XCHG_FOR_BGET 246 #ifdef USE_QUEUING_LOCK_FOR_BGET 247 __kmp_init_lock(&th->th.th_local.bget_lock); 248 #else 249 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock); 250 #endif /* USE_LOCK_FOR_BGET */ 251 #endif /* ! USE_CMP_XCHG_FOR_BGET */ 252 } 253 254 static thr_data_t *get_thr_data(kmp_info_t *th) { 255 thr_data_t *data; 256 257 data = (thr_data_t *)th->th.th_local.bget_data; 258 259 KMP_DEBUG_ASSERT(data != 0); 260 261 return data; 262 } 263 264 /* Walk the free list and release the enqueued buffers */ 265 static void __kmp_bget_dequeue(kmp_info_t *th) { 266 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); 267 268 if (p != 0) { 269 #if USE_CMP_XCHG_FOR_BGET 270 { 271 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 272 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 273 CCAST(void *, old_value), nullptr)) { 274 KMP_CPU_PAUSE(); 275 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 276 } 277 p = CCAST(void *, old_value); 278 } 279 #else /* ! USE_CMP_XCHG_FOR_BGET */ 280 #ifdef USE_QUEUING_LOCK_FOR_BGET 281 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 282 #else 283 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 284 #endif /* USE_QUEUING_LOCK_FOR_BGET */ 285 286 p = (void *)th->th.th_local.bget_list; 287 th->th.th_local.bget_list = 0; 288 289 #ifdef USE_QUEUING_LOCK_FOR_BGET 290 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 291 #else 292 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 293 #endif 294 #endif /* USE_CMP_XCHG_FOR_BGET */ 295 296 /* Check again to make sure the list is not empty */ 297 while (p != 0) { 298 void *buf = p; 299 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); 300 301 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 302 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 303 (kmp_uintptr_t)th); // clear possible mark 304 KMP_DEBUG_ASSERT(b->ql.blink == 0); 305 306 p = (void *)b->ql.flink; 307 308 brel(th, buf); 309 } 310 } 311 } 312 313 /* Chain together the free buffers by using the thread owner field */ 314 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf 315 #ifdef USE_QUEUING_LOCK_FOR_BGET 316 , 317 kmp_int32 rel_gtid 318 #endif 319 ) { 320 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t)); 321 322 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 323 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 324 (kmp_uintptr_t)th); // clear possible mark 325 326 b->ql.blink = 0; 327 328 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n", 329 __kmp_gtid_from_thread(th))); 330 331 #if USE_CMP_XCHG_FOR_BGET 332 { 333 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); 334 /* the next pointer must be set before setting bget_list to buf to avoid 335 exposing a broken list to other threads, even for an instant. */ 336 b->ql.flink = BFH(CCAST(void *, old_value)); 337 338 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 339 CCAST(void *, old_value), buf)) { 340 KMP_CPU_PAUSE(); 341 old_value = TCR_PTR(th->th.th_local.bget_list); 342 /* the next pointer must be set before setting bget_list to buf to avoid 343 exposing a broken list to other threads, even for an instant. */ 344 b->ql.flink = BFH(CCAST(void *, old_value)); 345 } 346 } 347 #else /* ! USE_CMP_XCHG_FOR_BGET */ 348 #ifdef USE_QUEUING_LOCK_FOR_BGET 349 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid); 350 #else 351 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 352 #endif 353 354 b->ql.flink = BFH(th->th.th_local.bget_list); 355 th->th.th_local.bget_list = (void *)buf; 356 357 #ifdef USE_QUEUING_LOCK_FOR_BGET 358 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid); 359 #else 360 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 361 #endif 362 #endif /* USE_CMP_XCHG_FOR_BGET */ 363 } 364 365 /* insert buffer back onto a new freelist */ 366 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) { 367 int bin; 368 369 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0); 370 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0); 371 372 bin = bget_get_bin(b->bh.bb.bsize); 373 374 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink == 375 &thr->freelist[bin]); 376 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink == 377 &thr->freelist[bin]); 378 379 b->ql.flink = &thr->freelist[bin]; 380 b->ql.blink = thr->freelist[bin].ql.blink; 381 382 thr->freelist[bin].ql.blink = b; 383 b->ql.blink->ql.flink = b; 384 } 385 386 /* unlink the buffer from the old freelist */ 387 static void __kmp_bget_remove_from_freelist(bfhead_t *b) { 388 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 389 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 390 391 b->ql.blink->ql.flink = b->ql.flink; 392 b->ql.flink->ql.blink = b->ql.blink; 393 } 394 395 /* GET STATS -- check info on free list */ 396 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) { 397 thr_data_t *thr = get_thr_data(th); 398 int bin; 399 400 *total_free = *max_free = 0; 401 402 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 403 bfhead_t *b, *best; 404 405 best = &thr->freelist[bin]; 406 b = best->ql.flink; 407 408 while (b != &thr->freelist[bin]) { 409 *total_free += (b->bh.bb.bsize - sizeof(bhead_t)); 410 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize)) 411 best = b; 412 413 /* Link to next buffer */ 414 b = b->ql.flink; 415 } 416 417 if (*max_free < best->bh.bb.bsize) 418 *max_free = best->bh.bb.bsize; 419 } 420 421 if (*max_free > (bufsize)sizeof(bhead_t)) 422 *max_free -= sizeof(bhead_t); 423 } 424 425 /* BGET -- Allocate a buffer. */ 426 static void *bget(kmp_info_t *th, bufsize requested_size) { 427 thr_data_t *thr = get_thr_data(th); 428 bufsize size = requested_size; 429 bfhead_t *b; 430 void *buf; 431 int compactseq = 0; 432 int use_blink = 0; 433 /* For BestFit */ 434 bfhead_t *best; 435 436 if (size < 0 || size + sizeof(bhead_t) > MaxSize) { 437 return NULL; 438 } 439 440 __kmp_bget_dequeue(th); /* Release any queued buffers */ 441 442 if (size < (bufsize)SizeQ) { // Need at least room for the queue links. 443 size = SizeQ; 444 } 445 #if defined(SizeQuant) && (SizeQuant > 1) 446 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); 447 #endif 448 449 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required. 450 KMP_DEBUG_ASSERT(size >= 0); 451 KMP_DEBUG_ASSERT(size % SizeQuant == 0); 452 453 use_blink = (thr->mode == bget_mode_lifo); 454 455 /* If a compact function was provided in the call to bectl(), wrap 456 a loop around the allocation process to allow compaction to 457 intervene in case we don't find a suitable buffer in the chain. */ 458 459 for (;;) { 460 int bin; 461 462 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) { 463 /* Link to next buffer */ 464 b = (use_blink ? thr->freelist[bin].ql.blink 465 : thr->freelist[bin].ql.flink); 466 467 if (thr->mode == bget_mode_best) { 468 best = &thr->freelist[bin]; 469 470 /* Scan the free list searching for the first buffer big enough 471 to hold the requested size buffer. */ 472 while (b != &thr->freelist[bin]) { 473 if (b->bh.bb.bsize >= (bufsize)size) { 474 if ((best == &thr->freelist[bin]) || 475 (b->bh.bb.bsize < best->bh.bb.bsize)) { 476 best = b; 477 } 478 } 479 480 /* Link to next buffer */ 481 b = (use_blink ? b->ql.blink : b->ql.flink); 482 } 483 b = best; 484 } 485 486 while (b != &thr->freelist[bin]) { 487 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) { 488 489 // Buffer is big enough to satisfy the request. Allocate it to the 490 // caller. We must decide whether the buffer is large enough to split 491 // into the part given to the caller and a free buffer that remains 492 // on the free list, or whether the entire buffer should be removed 493 // from the free list and given to the caller in its entirety. We 494 // only split the buffer if enough room remains for a header plus the 495 // minimum quantum of allocation. 496 if ((b->bh.bb.bsize - (bufsize)size) > 497 (bufsize)(SizeQ + (sizeof(bhead_t)))) { 498 bhead_t *ba, *bn; 499 500 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size)); 501 bn = BH(((char *)ba) + size); 502 503 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); 504 505 /* Subtract size from length of free block. */ 506 b->bh.bb.bsize -= (bufsize)size; 507 508 /* Link allocated buffer to the previous free buffer. */ 509 ba->bb.prevfree = b->bh.bb.bsize; 510 511 /* Plug negative size into user buffer. */ 512 ba->bb.bsize = -size; 513 514 /* Mark this buffer as owned by this thread. */ 515 TCW_PTR(ba->bb.bthr, 516 th); // not an allocated address (do not mark it) 517 /* Mark buffer after this one not preceded by free block. */ 518 bn->bb.prevfree = 0; 519 520 // unlink buffer from old freelist, and reinsert into new freelist 521 __kmp_bget_remove_from_freelist(b); 522 __kmp_bget_insert_into_freelist(thr, b); 523 #if BufStats 524 thr->totalloc += (size_t)size; 525 thr->numget++; /* Increment number of bget() calls */ 526 #endif 527 buf = (void *)((((char *)ba) + sizeof(bhead_t))); 528 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 529 return buf; 530 } else { 531 bhead_t *ba; 532 533 ba = BH(((char *)b) + b->bh.bb.bsize); 534 535 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); 536 537 /* The buffer isn't big enough to split. Give the whole 538 shebang to the caller and remove it from the free list. */ 539 540 __kmp_bget_remove_from_freelist(b); 541 #if BufStats 542 thr->totalloc += (size_t)b->bh.bb.bsize; 543 thr->numget++; /* Increment number of bget() calls */ 544 #endif 545 /* Negate size to mark buffer allocated. */ 546 b->bh.bb.bsize = -(b->bh.bb.bsize); 547 548 /* Mark this buffer as owned by this thread. */ 549 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark) 550 /* Zero the back pointer in the next buffer in memory 551 to indicate that this buffer is allocated. */ 552 ba->bb.prevfree = 0; 553 554 /* Give user buffer starting at queue links. */ 555 buf = (void *)&(b->ql); 556 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 557 return buf; 558 } 559 } 560 561 /* Link to next buffer */ 562 b = (use_blink ? b->ql.blink : b->ql.flink); 563 } 564 } 565 566 /* We failed to find a buffer. If there's a compact function defined, 567 notify it of the size requested. If it returns TRUE, try the allocation 568 again. */ 569 570 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { 571 break; 572 } 573 } 574 575 /* No buffer available with requested size free. */ 576 577 /* Don't give up yet -- look in the reserve supply. */ 578 if (thr->acqfcn != 0) { 579 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) { 580 /* Request is too large to fit in a single expansion block. 581 Try to satisy it by a direct buffer acquisition. */ 582 bdhead_t *bdh; 583 584 size += sizeof(bdhead_t) - sizeof(bhead_t); 585 586 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size)); 587 588 /* richryan */ 589 bdh = BDH((*thr->acqfcn)((bufsize)size)); 590 if (bdh != NULL) { 591 592 // Mark the buffer special by setting size field of its header to zero. 593 bdh->bh.bb.bsize = 0; 594 595 /* Mark this buffer as owned by this thread. */ 596 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, 597 // because direct buffer never goes to free list 598 bdh->bh.bb.prevfree = 0; 599 bdh->tsize = size; 600 #if BufStats 601 thr->totalloc += (size_t)size; 602 thr->numget++; /* Increment number of bget() calls */ 603 thr->numdget++; /* Direct bget() call count */ 604 #endif 605 buf = (void *)(bdh + 1); 606 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 607 return buf; 608 } 609 610 } else { 611 612 /* Try to obtain a new expansion block */ 613 void *newpool; 614 615 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr)); 616 617 /* richryan */ 618 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr); 619 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0); 620 if (newpool != NULL) { 621 bpool(th, newpool, thr->exp_incr); 622 buf = bget( 623 th, requested_size); /* This can't, I say, can't get into a loop. */ 624 return buf; 625 } 626 } 627 } 628 629 /* Still no buffer available */ 630 631 return NULL; 632 } 633 634 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear 635 the entire contents of the buffer to zero, not just the 636 region requested by the caller. */ 637 638 static void *bgetz(kmp_info_t *th, bufsize size) { 639 char *buf = (char *)bget(th, size); 640 641 if (buf != NULL) { 642 bhead_t *b; 643 bufsize rsize; 644 645 b = BH(buf - sizeof(bhead_t)); 646 rsize = -(b->bb.bsize); 647 if (rsize == 0) { 648 bdhead_t *bd; 649 650 bd = BDH(buf - sizeof(bdhead_t)); 651 rsize = bd->tsize - (bufsize)sizeof(bdhead_t); 652 } else { 653 rsize -= sizeof(bhead_t); 654 } 655 656 KMP_DEBUG_ASSERT(rsize >= size); 657 658 (void)memset(buf, 0, (bufsize)rsize); 659 } 660 return ((void *)buf); 661 } 662 663 /* BGETR -- Reallocate a buffer. This is a minimal implementation, 664 simply in terms of brel() and bget(). It could be 665 enhanced to allow the buffer to grow into adjacent free 666 blocks and to avoid moving data unnecessarily. */ 667 668 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) { 669 void *nbuf; 670 bufsize osize; /* Old size of buffer */ 671 bhead_t *b; 672 673 nbuf = bget(th, size); 674 if (nbuf == NULL) { /* Acquire new buffer */ 675 return NULL; 676 } 677 if (buf == NULL) { 678 return nbuf; 679 } 680 b = BH(((char *)buf) - sizeof(bhead_t)); 681 osize = -b->bb.bsize; 682 if (osize == 0) { 683 /* Buffer acquired directly through acqfcn. */ 684 bdhead_t *bd; 685 686 bd = BDH(((char *)buf) - sizeof(bdhead_t)); 687 osize = bd->tsize - (bufsize)sizeof(bdhead_t); 688 } else { 689 osize -= sizeof(bhead_t); 690 } 691 692 KMP_DEBUG_ASSERT(osize > 0); 693 694 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */ 695 (size_t)((size < osize) ? size : osize)); 696 brel(th, buf); 697 698 return nbuf; 699 } 700 701 /* BREL -- Release a buffer. */ 702 static void brel(kmp_info_t *th, void *buf) { 703 thr_data_t *thr = get_thr_data(th); 704 bfhead_t *b, *bn; 705 kmp_info_t *bth; 706 707 KMP_DEBUG_ASSERT(buf != NULL); 708 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 709 710 b = BFH(((char *)buf) - sizeof(bhead_t)); 711 712 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ 713 bdhead_t *bdh; 714 715 bdh = BDH(((char *)buf) - sizeof(bdhead_t)); 716 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 717 #if BufStats 718 thr->totalloc -= (size_t)bdh->tsize; 719 thr->numdrel++; /* Number of direct releases */ 720 thr->numrel++; /* Increment number of brel() calls */ 721 #endif /* BufStats */ 722 #ifdef FreeWipe 723 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t))); 724 #endif /* FreeWipe */ 725 726 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh)); 727 728 KMP_DEBUG_ASSERT(thr->relfcn != 0); 729 (*thr->relfcn)((void *)bdh); /* Release it directly. */ 730 return; 731 } 732 733 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & 734 ~1); // clear possible mark before comparison 735 if (bth != th) { 736 /* Add this buffer to be released by the owning thread later */ 737 __kmp_bget_enqueue(bth, buf 738 #ifdef USE_QUEUING_LOCK_FOR_BGET 739 , 740 __kmp_gtid_from_thread(th) 741 #endif 742 ); 743 return; 744 } 745 746 /* Buffer size must be negative, indicating that the buffer is allocated. */ 747 if (b->bh.bb.bsize >= 0) { 748 bn = NULL; 749 } 750 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); 751 752 /* Back pointer in next buffer must be zero, indicating the same thing: */ 753 754 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0); 755 756 #if BufStats 757 thr->numrel++; /* Increment number of brel() calls */ 758 thr->totalloc += (size_t)b->bh.bb.bsize; 759 #endif 760 761 /* If the back link is nonzero, the previous buffer is free. */ 762 763 if (b->bh.bb.prevfree != 0) { 764 /* The previous buffer is free. Consolidate this buffer with it by adding 765 the length of this buffer to the previous free buffer. Note that we 766 subtract the size in the buffer being released, since it's negative to 767 indicate that the buffer is allocated. */ 768 bufsize size = b->bh.bb.bsize; 769 770 /* Make the previous buffer the one we're working on. */ 771 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize == 772 b->bh.bb.prevfree); 773 b = BFH(((char *)b) - b->bh.bb.prevfree); 774 b->bh.bb.bsize -= size; 775 776 /* unlink the buffer from the old freelist */ 777 __kmp_bget_remove_from_freelist(b); 778 } else { 779 /* The previous buffer isn't allocated. Mark this buffer size as positive 780 (i.e. free) and fall through to place the buffer on the free list as an 781 isolated free block. */ 782 b->bh.bb.bsize = -b->bh.bb.bsize; 783 } 784 785 /* insert buffer back onto a new freelist */ 786 __kmp_bget_insert_into_freelist(thr, b); 787 788 /* Now we look at the next buffer in memory, located by advancing from 789 the start of this buffer by its size, to see if that buffer is 790 free. If it is, we combine this buffer with the next one in 791 memory, dechaining the second buffer from the free list. */ 792 bn = BFH(((char *)b) + b->bh.bb.bsize); 793 if (bn->bh.bb.bsize > 0) { 794 795 /* The buffer is free. Remove it from the free list and add 796 its size to that of our buffer. */ 797 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree == 798 bn->bh.bb.bsize); 799 800 __kmp_bget_remove_from_freelist(bn); 801 802 b->bh.bb.bsize += bn->bh.bb.bsize; 803 804 /* unlink the buffer from the old freelist, and reinsert it into the new 805 * freelist */ 806 __kmp_bget_remove_from_freelist(b); 807 __kmp_bget_insert_into_freelist(thr, b); 808 809 /* Finally, advance to the buffer that follows the newly 810 consolidated free block. We must set its backpointer to the 811 head of the consolidated free block. We know the next block 812 must be an allocated block because the process of recombination 813 guarantees that two free blocks will never be contiguous in 814 memory. */ 815 bn = BFH(((char *)b) + b->bh.bb.bsize); 816 } 817 #ifdef FreeWipe 818 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 819 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t))); 820 #endif 821 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); 822 823 /* The next buffer is allocated. Set the backpointer in it to point 824 to this buffer; the previous free buffer in memory. */ 825 826 bn->bh.bb.prevfree = b->bh.bb.bsize; 827 828 /* If a block-release function is defined, and this free buffer 829 constitutes the entire block, release it. Note that pool_len 830 is defined in such a way that the test will fail unless all 831 pool blocks are the same size. */ 832 if (thr->relfcn != 0 && 833 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 834 #if BufStats 835 if (thr->numpblk != 836 1) { /* Do not release the last buffer until finalization time */ 837 #endif 838 839 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 840 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 841 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 842 b->bh.bb.bsize); 843 844 /* Unlink the buffer from the free list */ 845 __kmp_bget_remove_from_freelist(b); 846 847 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 848 849 (*thr->relfcn)(b); 850 #if BufStats 851 thr->numprel++; /* Nr of expansion block releases */ 852 thr->numpblk--; /* Total number of blocks */ 853 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 854 855 // avoid leaving stale last_pool pointer around if it is being dealloced 856 if (thr->last_pool == b) 857 thr->last_pool = 0; 858 } else { 859 thr->last_pool = b; 860 } 861 #endif /* BufStats */ 862 } 863 } 864 865 /* BECTL -- Establish automatic pool expansion control */ 866 static void bectl(kmp_info_t *th, bget_compact_t compact, 867 bget_acquire_t acquire, bget_release_t release, 868 bufsize pool_incr) { 869 thr_data_t *thr = get_thr_data(th); 870 871 thr->compfcn = compact; 872 thr->acqfcn = acquire; 873 thr->relfcn = release; 874 thr->exp_incr = pool_incr; 875 } 876 877 /* BPOOL -- Add a region of memory to the buffer pool. */ 878 static void bpool(kmp_info_t *th, void *buf, bufsize len) { 879 /* int bin = 0; */ 880 thr_data_t *thr = get_thr_data(th); 881 bfhead_t *b = BFH(buf); 882 bhead_t *bn; 883 884 __kmp_bget_dequeue(th); /* Release any queued buffers */ 885 886 #ifdef SizeQuant 887 len &= ~(SizeQuant - 1); 888 #endif 889 if (thr->pool_len == 0) { 890 thr->pool_len = len; 891 } else if (len != thr->pool_len) { 892 thr->pool_len = -1; 893 } 894 #if BufStats 895 thr->numpget++; /* Number of block acquisitions */ 896 thr->numpblk++; /* Number of blocks total */ 897 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 898 #endif /* BufStats */ 899 900 /* Since the block is initially occupied by a single free buffer, 901 it had better not be (much) larger than the largest buffer 902 whose size we can store in bhead.bb.bsize. */ 903 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1)); 904 905 /* Clear the backpointer at the start of the block to indicate that 906 there is no free block prior to this one. That blocks 907 recombination when the first block in memory is released. */ 908 b->bh.bb.prevfree = 0; 909 910 /* Create a dummy allocated buffer at the end of the pool. This dummy 911 buffer is seen when a buffer at the end of the pool is released and 912 blocks recombination of the last buffer with the dummy buffer at 913 the end. The length in the dummy buffer is set to the largest 914 negative number to denote the end of the pool for diagnostic 915 routines (this specific value is not counted on by the actual 916 allocation and release functions). */ 917 len -= sizeof(bhead_t); 918 b->bh.bb.bsize = (bufsize)len; 919 /* Set the owner of this buffer */ 920 TCW_PTR(b->bh.bb.bthr, 921 (kmp_info_t *)((kmp_uintptr_t)th | 922 1)); // mark the buffer as allocated address 923 924 /* Chain the new block to the free list. */ 925 __kmp_bget_insert_into_freelist(thr, b); 926 927 #ifdef FreeWipe 928 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 929 (size_t)(len - sizeof(bfhead_t))); 930 #endif 931 bn = BH(((char *)b) + len); 932 bn->bb.prevfree = (bufsize)len; 933 /* Definition of ESent assumes two's complement! */ 934 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0)); 935 936 bn->bb.bsize = ESent; 937 } 938 939 /* BFREED -- Dump the free lists for this thread. */ 940 static void bfreed(kmp_info_t *th) { 941 int bin = 0, count = 0; 942 int gtid = __kmp_gtid_from_thread(th); 943 thr_data_t *thr = get_thr_data(th); 944 945 #if BufStats 946 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC 947 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC 948 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC 949 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC 950 " drel=%" KMP_INT64_SPEC "\n", 951 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget, 952 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk, 953 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel, 954 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel); 955 #endif 956 957 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 958 bfhead_t *b; 959 960 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin]; 961 b = b->ql.flink) { 962 bufsize bs = b->bh.bb.bsize; 963 964 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 965 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 966 KMP_DEBUG_ASSERT(bs > 0); 967 968 count += 1; 969 970 __kmp_printf_no_lock( 971 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, 972 (long)bs); 973 #ifdef FreeWipe 974 { 975 char *lerr = ((char *)b) + sizeof(bfhead_t); 976 if ((bs > sizeof(bfhead_t)) && 977 ((*lerr != 0x55) || 978 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 979 0))) { 980 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above " 981 "free block have been overstored.)\n", 982 gtid); 983 } 984 } 985 #endif 986 } 987 } 988 989 if (count == 0) 990 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid); 991 } 992 993 void __kmp_initialize_bget(kmp_info_t *th) { 994 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0)); 995 996 set_thr_data(th); 997 998 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free, 999 (bufsize)__kmp_malloc_pool_incr); 1000 } 1001 1002 void __kmp_finalize_bget(kmp_info_t *th) { 1003 thr_data_t *thr; 1004 bfhead_t *b; 1005 1006 KMP_DEBUG_ASSERT(th != 0); 1007 1008 #if BufStats 1009 thr = (thr_data_t *)th->th.th_local.bget_data; 1010 KMP_DEBUG_ASSERT(thr != NULL); 1011 b = thr->last_pool; 1012 1013 /* If a block-release function is defined, and this free buffer constitutes 1014 the entire block, release it. Note that pool_len is defined in such a way 1015 that the test will fail unless all pool blocks are the same size. */ 1016 1017 // Deallocate the last pool if one exists because we no longer do it in brel() 1018 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && 1019 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 1020 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 1021 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 1022 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 1023 b->bh.bb.bsize); 1024 1025 /* Unlink the buffer from the free list */ 1026 __kmp_bget_remove_from_freelist(b); 1027 1028 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 1029 1030 (*thr->relfcn)(b); 1031 thr->numprel++; /* Nr of expansion block releases */ 1032 thr->numpblk--; /* Total number of blocks */ 1033 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 1034 } 1035 #endif /* BufStats */ 1036 1037 /* Deallocate bget_data */ 1038 if (th->th.th_local.bget_data != NULL) { 1039 __kmp_free(th->th.th_local.bget_data); 1040 th->th.th_local.bget_data = NULL; 1041 } 1042 } 1043 1044 void kmpc_set_poolsize(size_t size) { 1045 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc, 1046 (bget_release_t)free, (bufsize)size); 1047 } 1048 1049 size_t kmpc_get_poolsize(void) { 1050 thr_data_t *p; 1051 1052 p = get_thr_data(__kmp_get_thread()); 1053 1054 return p->exp_incr; 1055 } 1056 1057 void kmpc_set_poolmode(int mode) { 1058 thr_data_t *p; 1059 1060 if (mode == bget_mode_fifo || mode == bget_mode_lifo || 1061 mode == bget_mode_best) { 1062 p = get_thr_data(__kmp_get_thread()); 1063 p->mode = (bget_mode_t)mode; 1064 } 1065 } 1066 1067 int kmpc_get_poolmode(void) { 1068 thr_data_t *p; 1069 1070 p = get_thr_data(__kmp_get_thread()); 1071 1072 return p->mode; 1073 } 1074 1075 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) { 1076 kmp_info_t *th = __kmp_get_thread(); 1077 bufsize a, b; 1078 1079 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1080 1081 bcheck(th, &a, &b); 1082 1083 *maxmem = a; 1084 *allmem = b; 1085 } 1086 1087 void kmpc_poolprint(void) { 1088 kmp_info_t *th = __kmp_get_thread(); 1089 1090 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1091 1092 bfreed(th); 1093 } 1094 1095 #endif // #if KMP_USE_BGET 1096 1097 void *kmpc_malloc(size_t size) { 1098 void *ptr; 1099 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1100 if (ptr != NULL) { 1101 // save allocated pointer just before one returned to user 1102 *(void **)ptr = ptr; 1103 ptr = (void **)ptr + 1; 1104 } 1105 return ptr; 1106 } 1107 1108 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) 1109 1110 void *kmpc_aligned_malloc(size_t size, size_t alignment) { 1111 void *ptr; 1112 void *ptr_allocated; 1113 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big 1114 if (!IS_POWER_OF_TWO(alignment)) { 1115 // AC: do we need to issue a warning here? 1116 errno = EINVAL; 1117 return NULL; 1118 } 1119 size = size + sizeof(void *) + alignment; 1120 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size); 1121 if (ptr_allocated != NULL) { 1122 // save allocated pointer just before one returned to user 1123 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) & 1124 ~(alignment - 1)); 1125 *((void **)ptr - 1) = ptr_allocated; 1126 } else { 1127 ptr = NULL; 1128 } 1129 return ptr; 1130 } 1131 1132 void *kmpc_calloc(size_t nelem, size_t elsize) { 1133 void *ptr; 1134 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr))); 1135 if (ptr != NULL) { 1136 // save allocated pointer just before one returned to user 1137 *(void **)ptr = ptr; 1138 ptr = (void **)ptr + 1; 1139 } 1140 return ptr; 1141 } 1142 1143 void *kmpc_realloc(void *ptr, size_t size) { 1144 void *result = NULL; 1145 if (ptr == NULL) { 1146 // If pointer is NULL, realloc behaves like malloc. 1147 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1148 // save allocated pointer just before one returned to user 1149 if (result != NULL) { 1150 *(void **)result = result; 1151 result = (void **)result + 1; 1152 } 1153 } else if (size == 0) { 1154 // If size is 0, realloc behaves like free. 1155 // The thread must be registered by the call to kmpc_malloc() or 1156 // kmpc_calloc() before. 1157 // So it should be safe to call __kmp_get_thread(), not 1158 // __kmp_entry_thread(). 1159 KMP_ASSERT(*((void **)ptr - 1)); 1160 brel(__kmp_get_thread(), *((void **)ptr - 1)); 1161 } else { 1162 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1), 1163 (bufsize)(size + sizeof(ptr))); 1164 if (result != NULL) { 1165 *(void **)result = result; 1166 result = (void **)result + 1; 1167 } 1168 } 1169 return result; 1170 } 1171 1172 // NOTE: the library must have already been initialized by a previous allocate 1173 void kmpc_free(void *ptr) { 1174 if (!__kmp_init_serial) { 1175 return; 1176 } 1177 if (ptr != NULL) { 1178 kmp_info_t *th = __kmp_get_thread(); 1179 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1180 // extract allocated pointer and free it 1181 KMP_ASSERT(*((void **)ptr - 1)); 1182 brel(th, *((void **)ptr - 1)); 1183 } 1184 } 1185 1186 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) { 1187 void *ptr; 1188 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th, 1189 (int)size KMP_SRC_LOC_PARM)); 1190 ptr = bget(th, (bufsize)size); 1191 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr)); 1192 return ptr; 1193 } 1194 1195 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, 1196 size_t elsize KMP_SRC_LOC_DECL) { 1197 void *ptr; 1198 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th, 1199 (int)nelem, (int)elsize KMP_SRC_LOC_PARM)); 1200 ptr = bgetz(th, (bufsize)(nelem * elsize)); 1201 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr)); 1202 return ptr; 1203 } 1204 1205 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, 1206 size_t size KMP_SRC_LOC_DECL) { 1207 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th, 1208 ptr, (int)size KMP_SRC_LOC_PARM)); 1209 ptr = bgetr(th, ptr, (bufsize)size); 1210 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr)); 1211 return ptr; 1212 } 1213 1214 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) { 1215 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th, 1216 ptr KMP_SRC_LOC_PARM)); 1217 if (ptr != NULL) { 1218 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1219 brel(th, ptr); 1220 } 1221 KE_TRACE(30, ("<- __kmp_thread_free()\n")); 1222 } 1223 1224 #if OMP_50_ENABLED 1225 /* OMP 5.0 Memory Management support */ 1226 static int (*p_hbw_check)(void); 1227 static void *(*p_hbw_malloc)(size_t); 1228 static void (*p_hbw_free)(void *); 1229 static int (*p_hbw_set_policy)(int); 1230 static const char *kmp_mk_lib_name; 1231 static void *h_memkind; 1232 1233 void __kmp_init_memkind() { 1234 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 1235 kmp_mk_lib_name = "libmemkind.so"; 1236 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); 1237 if (h_memkind) { 1238 p_hbw_check = (int (*)())dlsym(h_memkind, "hbw_check_available"); 1239 p_hbw_malloc = (void *(*)(size_t))dlsym(h_memkind, "hbw_malloc"); 1240 p_hbw_free = (void (*)(void *))dlsym(h_memkind, "hbw_free"); 1241 p_hbw_set_policy = (int (*)(int))dlsym(h_memkind, "hbw_set_policy"); 1242 if (p_hbw_check && p_hbw_malloc && p_hbw_free && p_hbw_set_policy) { 1243 __kmp_memkind_available = 1; 1244 if (p_hbw_check() == 0) { 1245 p_hbw_set_policy(1); // return NULL is not enough memory 1246 __kmp_hbw_mem_available = 1; // found HBW memory available 1247 } 1248 return; // success - all symbols resolved 1249 } 1250 dlclose(h_memkind); // failure 1251 h_memkind = NULL; 1252 } 1253 p_hbw_check = NULL; 1254 p_hbw_malloc = NULL; 1255 p_hbw_free = NULL; 1256 p_hbw_set_policy = NULL; 1257 #else 1258 kmp_mk_lib_name = ""; 1259 h_memkind = NULL; 1260 p_hbw_check = NULL; 1261 p_hbw_malloc = NULL; 1262 p_hbw_free = NULL; 1263 p_hbw_set_policy = NULL; 1264 #endif 1265 } 1266 1267 void __kmp_fini_memkind() { 1268 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 1269 if (h_memkind) { 1270 dlclose(h_memkind); 1271 h_memkind = NULL; 1272 } 1273 p_hbw_check = NULL; 1274 p_hbw_malloc = NULL; 1275 p_hbw_free = NULL; 1276 p_hbw_set_policy = NULL; 1277 #endif 1278 } 1279 1280 void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *allocator) { 1281 if (allocator == OMP_NULL_ALLOCATOR) 1282 allocator = omp_default_mem_alloc; 1283 KMP_DEBUG_ASSERT( 1284 allocator == omp_default_mem_alloc || 1285 allocator == omp_large_cap_mem_alloc || 1286 allocator == omp_const_mem_alloc || allocator == omp_high_bw_mem_alloc || 1287 allocator == omp_low_lat_mem_alloc || allocator == omp_cgroup_mem_alloc || 1288 allocator == omp_pteam_mem_alloc || allocator == omp_thread_mem_alloc); 1289 __kmp_threads[gtid]->th.th_def_allocator = allocator; 1290 } 1291 const omp_allocator_t *__kmpc_get_default_allocator(int gtid) { 1292 return __kmp_threads[gtid]->th.th_def_allocator; 1293 } 1294 1295 typedef struct kmp_mem_desc { // Memory block descriptor 1296 void *ptr_alloc; // Pointer returned by allocator 1297 size_t size_a; // Size of allocated memory block (initial+descriptor+align) 1298 void *ptr_align; // Pointer to aligned memory, returned 1299 const omp_allocator_t *allocator; // allocator 1300 } kmp_mem_desc_t; 1301 static int alignment = sizeof(void *); // let's align to pointer size 1302 1303 void *__kmpc_alloc(int gtid, size_t size, const omp_allocator_t *allocator) { 1304 KMP_DEBUG_ASSERT(__kmp_init_serial); 1305 if (allocator == OMP_NULL_ALLOCATOR) 1306 allocator = __kmp_threads[gtid]->th.th_def_allocator; 1307 1308 int sz_desc = sizeof(kmp_mem_desc_t); 1309 void *ptr = NULL; 1310 kmp_mem_desc_t desc; 1311 kmp_uintptr_t addr; // address returned by allocator 1312 kmp_uintptr_t addr_align; // address to return to caller 1313 kmp_uintptr_t addr_descr; // address of memory block descriptor 1314 1315 KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); 1316 1317 desc.size_a = size + sz_desc + alignment; 1318 if (allocator == omp_default_mem_alloc) 1319 ptr = __kmp_allocate(desc.size_a); 1320 if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) { 1321 KMP_DEBUG_ASSERT(p_hbw_malloc != NULL); 1322 ptr = p_hbw_malloc(desc.size_a); 1323 } 1324 1325 KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d) hbw %d\n", gtid, ptr, 1326 desc.size_a, __kmp_hbw_mem_available)); 1327 if (ptr == NULL) 1328 return NULL; 1329 1330 addr = (kmp_uintptr_t)ptr; 1331 addr_align = (addr + sz_desc + alignment - 1) & ~(alignment - 1); 1332 addr_descr = addr_align - sz_desc; 1333 1334 desc.ptr_alloc = ptr; 1335 desc.ptr_align = (void *)addr_align; 1336 desc.allocator = allocator; 1337 *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents 1338 KMP_MB(); 1339 1340 KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid)); 1341 return desc.ptr_align; 1342 } 1343 1344 void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *allocator) { 1345 KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator)); 1346 if (ptr == NULL) 1347 return; 1348 1349 kmp_mem_desc_t desc; 1350 kmp_uintptr_t addr_align; // address to return to caller 1351 kmp_uintptr_t addr_descr; // address of memory block descriptor 1352 1353 addr_align = (kmp_uintptr_t)ptr; 1354 addr_descr = addr_align - sizeof(kmp_mem_desc_t); 1355 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor 1356 1357 KMP_DEBUG_ASSERT(desc.ptr_align == ptr); 1358 if (allocator) { 1359 KMP_DEBUG_ASSERT(desc.allocator == allocator); 1360 } else { 1361 allocator = desc.allocator; 1362 } 1363 KMP_DEBUG_ASSERT(allocator); 1364 1365 if (allocator == omp_default_mem_alloc) 1366 __kmp_free(desc.ptr_alloc); 1367 if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) { 1368 KMP_DEBUG_ASSERT(p_hbw_free != NULL); 1369 p_hbw_free(desc.ptr_alloc); 1370 } 1371 KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc, 1372 allocator)); 1373 } 1374 1375 #endif 1376 1377 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes 1378 memory leaks, but it may be useful for debugging memory corruptions, used 1379 freed pointers, etc. */ 1380 /* #define LEAK_MEMORY */ 1381 struct kmp_mem_descr { // Memory block descriptor. 1382 void *ptr_allocated; // Pointer returned by malloc(), subject for free(). 1383 size_t size_allocated; // Size of allocated memory block. 1384 void *ptr_aligned; // Pointer to aligned memory, to be used by client code. 1385 size_t size_aligned; // Size of aligned memory block. 1386 }; 1387 typedef struct kmp_mem_descr kmp_mem_descr_t; 1388 1389 /* Allocate memory on requested boundary, fill allocated memory with 0x00. 1390 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1391 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1392 static void *___kmp_allocate_align(size_t size, 1393 size_t alignment KMP_SRC_LOC_DECL) { 1394 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than 1395 requested to return properly aligned pointer. Original pointer returned 1396 by malloc() and size of allocated block is saved in descriptor just 1397 before the aligned pointer. This information used by __kmp_free() -- it 1398 has to pass to free() original pointer, not aligned one. 1399 1400 +---------+------------+-----------------------------------+---------+ 1401 | padding | descriptor | aligned block | padding | 1402 +---------+------------+-----------------------------------+---------+ 1403 ^ ^ 1404 | | 1405 | +- Aligned pointer returned to caller 1406 +- Pointer returned by malloc() 1407 1408 Aligned block is filled with zeros, paddings are filled with 0xEF. */ 1409 1410 kmp_mem_descr_t descr; 1411 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 1412 kmp_uintptr_t addr_aligned; // Aligned address to return to caller. 1413 kmp_uintptr_t addr_descr; // Address of memory block descriptor. 1414 1415 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", 1416 (int)size, (int)alignment KMP_SRC_LOC_PARM)); 1417 1418 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too 1419 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t)); 1420 // Make sure kmp_uintptr_t is enough to store addresses. 1421 1422 descr.size_aligned = size; 1423 descr.size_allocated = 1424 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment; 1425 1426 #if KMP_DEBUG 1427 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_); 1428 #else 1429 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM); 1430 #endif 1431 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated, 1432 descr.ptr_allocated)); 1433 if (descr.ptr_allocated == NULL) { 1434 KMP_FATAL(OutOfHeapMemory); 1435 } 1436 1437 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 1438 addr_aligned = 1439 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1); 1440 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t); 1441 1442 descr.ptr_aligned = (void *)addr_aligned; 1443 1444 KE_TRACE(26, (" ___kmp_allocate_align: " 1445 "ptr_allocated=%p, size_allocated=%d, " 1446 "ptr_aligned=%p, size_aligned=%d\n", 1447 descr.ptr_allocated, (int)descr.size_allocated, 1448 descr.ptr_aligned, (int)descr.size_aligned)); 1449 1450 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr); 1451 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned); 1452 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 1453 addr_allocated + descr.size_allocated); 1454 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0); 1455 #ifdef KMP_DEBUG 1456 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 1457 // Fill allocated memory block with 0xEF. 1458 #endif 1459 memset(descr.ptr_aligned, 0x00, descr.size_aligned); 1460 // Fill the aligned memory block (which is intended for using by caller) with 1461 // 0x00. Do not 1462 // put this filling under KMP_DEBUG condition! Many callers expect zeroed 1463 // memory. (Padding 1464 // bytes remain filled with 0xEF in debugging library.) 1465 *((kmp_mem_descr_t *)addr_descr) = descr; 1466 1467 KMP_MB(); 1468 1469 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned)); 1470 return descr.ptr_aligned; 1471 } // func ___kmp_allocate_align 1472 1473 /* Allocate memory on cache line boundary, fill allocated memory with 0x00. 1474 Do not call this func directly! Use __kmp_allocate macro instead. 1475 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1476 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1477 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) { 1478 void *ptr; 1479 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n", 1480 (int)size KMP_SRC_LOC_PARM)); 1481 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM); 1482 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr)); 1483 return ptr; 1484 } // func ___kmp_allocate 1485 1486 /* Allocate memory on page boundary, fill allocated memory with 0x00. 1487 Does not call this func directly! Use __kmp_page_allocate macro instead. 1488 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1489 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1490 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) { 1491 int page_size = 8 * 1024; 1492 void *ptr; 1493 1494 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n", 1495 (int)size KMP_SRC_LOC_PARM)); 1496 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM); 1497 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr)); 1498 return ptr; 1499 } // ___kmp_page_allocate 1500 1501 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). 1502 In debug mode, fill the memory block with 0xEF before call to free(). */ 1503 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) { 1504 kmp_mem_descr_t descr; 1505 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 1506 kmp_uintptr_t addr_aligned; // Aligned address passed by caller. 1507 1508 KE_TRACE(25, 1509 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM)); 1510 KMP_ASSERT(ptr != NULL); 1511 1512 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)); 1513 1514 KE_TRACE(26, (" __kmp_free: " 1515 "ptr_allocated=%p, size_allocated=%d, " 1516 "ptr_aligned=%p, size_aligned=%d\n", 1517 descr.ptr_allocated, (int)descr.size_allocated, 1518 descr.ptr_aligned, (int)descr.size_aligned)); 1519 1520 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 1521 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned; 1522 1523 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0); 1524 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr); 1525 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned); 1526 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated); 1527 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 1528 addr_allocated + descr.size_allocated); 1529 1530 #ifdef KMP_DEBUG 1531 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 1532 // Fill memory block with 0xEF, it helps catch using freed memory. 1533 #endif 1534 1535 #ifndef LEAK_MEMORY 1536 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated)); 1537 #ifdef KMP_DEBUG 1538 _free_src_loc(descr.ptr_allocated, _file_, _line_); 1539 #else 1540 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM); 1541 #endif 1542 #endif 1543 KMP_MB(); 1544 KE_TRACE(25, ("<- __kmp_free() returns\n")); 1545 } // func ___kmp_free 1546 1547 #if USE_FAST_MEMORY == 3 1548 // Allocate fast memory by first scanning the thread's free lists 1549 // If a chunk the right size exists, grab it off the free list. 1550 // Otherwise allocate normally using kmp_thread_malloc. 1551 1552 // AC: How to choose the limit? Just get 16 for now... 1553 #define KMP_FREE_LIST_LIMIT 16 1554 1555 // Always use 128 bytes for determining buckets for caching memory blocks 1556 #define DCACHE_LINE 128 1557 1558 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) { 1559 void *ptr; 1560 int num_lines; 1561 int idx; 1562 int index; 1563 void *alloc_ptr; 1564 size_t alloc_size; 1565 kmp_mem_descr_t *descr; 1566 1567 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", 1568 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM)); 1569 1570 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE; 1571 idx = num_lines - 1; 1572 KMP_DEBUG_ASSERT(idx >= 0); 1573 if (idx < 2) { 1574 index = 0; // idx is [ 0, 1 ], use first free list 1575 num_lines = 2; // 1, 2 cache lines or less than cache line 1576 } else if ((idx >>= 2) == 0) { 1577 index = 1; // idx is [ 2, 3 ], use second free list 1578 num_lines = 4; // 3, 4 cache lines 1579 } else if ((idx >>= 2) == 0) { 1580 index = 2; // idx is [ 4, 15 ], use third free list 1581 num_lines = 16; // 5, 6, ..., 16 cache lines 1582 } else if ((idx >>= 2) == 0) { 1583 index = 3; // idx is [ 16, 63 ], use fourth free list 1584 num_lines = 64; // 17, 18, ..., 64 cache lines 1585 } else { 1586 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists 1587 } 1588 1589 ptr = this_thr->th.th_free_lists[index].th_free_list_self; 1590 if (ptr != NULL) { 1591 // pop the head of no-sync free list 1592 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 1593 KMP_DEBUG_ASSERT( 1594 this_thr == 1595 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) 1596 ->ptr_aligned); 1597 goto end; 1598 } 1599 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 1600 if (ptr != NULL) { 1601 // no-sync free list is empty, use sync free list (filled in by other 1602 // threads only) 1603 // pop the head of the sync free list, push NULL instead 1604 while (!KMP_COMPARE_AND_STORE_PTR( 1605 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) { 1606 KMP_CPU_PAUSE(); 1607 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 1608 } 1609 // push the rest of chain into no-sync free list (can be NULL if there was 1610 // the only block) 1611 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 1612 KMP_DEBUG_ASSERT( 1613 this_thr == 1614 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) 1615 ->ptr_aligned); 1616 goto end; 1617 } 1618 1619 alloc_call: 1620 // haven't found block in the free lists, thus allocate it 1621 size = num_lines * DCACHE_LINE; 1622 1623 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE; 1624 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " 1625 "alloc_size %d\n", 1626 __kmp_gtid_from_thread(this_thr), alloc_size)); 1627 alloc_ptr = bget(this_thr, (bufsize)alloc_size); 1628 1629 // align ptr to DCACHE_LINE 1630 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + 1631 DCACHE_LINE) & 1632 ~(DCACHE_LINE - 1)); 1633 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 1634 1635 descr->ptr_allocated = alloc_ptr; // remember allocated pointer 1636 // we don't need size_allocated 1637 descr->ptr_aligned = (void *)this_thr; // remember allocating thread 1638 // (it is already saved in bget buffer, 1639 // but we may want to use another allocator in future) 1640 descr->size_aligned = size; 1641 1642 end: 1643 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n", 1644 __kmp_gtid_from_thread(this_thr), ptr)); 1645 return ptr; 1646 } // func __kmp_fast_allocate 1647 1648 // Free fast memory and place it on the thread's free list if it is of 1649 // the correct size. 1650 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) { 1651 kmp_mem_descr_t *descr; 1652 kmp_info_t *alloc_thr; 1653 size_t size; 1654 size_t idx; 1655 int index; 1656 1657 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", 1658 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM)); 1659 KMP_ASSERT(ptr != NULL); 1660 1661 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 1662 1663 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", 1664 (int)descr->size_aligned)); 1665 1666 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines 1667 1668 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block 1669 if (idx == size) { 1670 index = 0; // 2 cache lines 1671 } else if ((idx <<= 1) == size) { 1672 index = 1; // 4 cache lines 1673 } else if ((idx <<= 2) == size) { 1674 index = 2; // 16 cache lines 1675 } else if ((idx <<= 2) == size) { 1676 index = 3; // 64 cache lines 1677 } else { 1678 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64); 1679 goto free_call; // 65 or more cache lines ( > 8KB ) 1680 } 1681 1682 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block 1683 if (alloc_thr == this_thr) { 1684 // push block to self no-sync free list, linking previous head (LIFO) 1685 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; 1686 this_thr->th.th_free_lists[index].th_free_list_self = ptr; 1687 } else { 1688 void *head = this_thr->th.th_free_lists[index].th_free_list_other; 1689 if (head == NULL) { 1690 // Create new free list 1691 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1692 *((void **)ptr) = NULL; // mark the tail of the list 1693 descr->size_allocated = (size_t)1; // head of the list keeps its length 1694 } else { 1695 // need to check existed "other" list's owner thread and size of queue 1696 kmp_mem_descr_t *dsc = 1697 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t)); 1698 // allocating thread, same for all queue nodes 1699 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned); 1700 size_t q_sz = 1701 dsc->size_allocated + 1; // new size in case we add current task 1702 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) { 1703 // we can add current task to "other" list, no sync needed 1704 *((void **)ptr) = head; 1705 descr->size_allocated = q_sz; 1706 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1707 } else { 1708 // either queue blocks owner is changing or size limit exceeded 1709 // return old queue to allocating thread (q_th) synchroneously, 1710 // and start new list for alloc_thr's tasks 1711 void *old_ptr; 1712 void *tail = head; 1713 void *next = *((void **)head); 1714 while (next != NULL) { 1715 KMP_DEBUG_ASSERT( 1716 // queue size should decrease by 1 each step through the list 1717 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t))) 1718 ->size_allocated + 1719 1 == 1720 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t))) 1721 ->size_allocated); 1722 tail = next; // remember tail node 1723 next = *((void **)next); 1724 } 1725 KMP_DEBUG_ASSERT(q_th != NULL); 1726 // push block to owner's sync free list 1727 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 1728 /* the next pointer must be set before setting free_list to ptr to avoid 1729 exposing a broken list to other threads, even for an instant. */ 1730 *((void **)tail) = old_ptr; 1731 1732 while (!KMP_COMPARE_AND_STORE_PTR( 1733 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) { 1734 KMP_CPU_PAUSE(); 1735 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 1736 *((void **)tail) = old_ptr; 1737 } 1738 1739 // start new list of not-selt tasks 1740 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1741 *((void **)ptr) = NULL; 1742 descr->size_allocated = (size_t)1; // head of queue keeps its length 1743 } 1744 } 1745 } 1746 goto end; 1747 1748 free_call: 1749 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", 1750 __kmp_gtid_from_thread(this_thr), size)); 1751 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */ 1752 brel(this_thr, descr->ptr_allocated); 1753 1754 end: 1755 KE_TRACE(25, ("<- __kmp_fast_free() returns\n")); 1756 1757 } // func __kmp_fast_free 1758 1759 // Initialize the thread free lists related to fast memory 1760 // Only do this when a thread is initially created. 1761 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) { 1762 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr)); 1763 1764 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t)); 1765 } 1766 1767 // Free the memory in the thread free lists related to fast memory 1768 // Only do this when a thread is being reaped (destroyed). 1769 void __kmp_free_fast_memory(kmp_info_t *th) { 1770 // Suppose we use BGET underlying allocator, walk through its structures... 1771 int bin; 1772 thr_data_t *thr = get_thr_data(th); 1773 void **lst = NULL; 1774 1775 KE_TRACE( 1776 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th))); 1777 1778 __kmp_bget_dequeue(th); // Release any queued buffers 1779 1780 // Dig through free lists and extract all allocated blocks 1781 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 1782 bfhead_t *b = thr->freelist[bin].ql.flink; 1783 while (b != &thr->freelist[bin]) { 1784 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address 1785 *((void **)b) = 1786 lst; // link the list (override bthr, but keep flink yet) 1787 lst = (void **)b; // push b into lst 1788 } 1789 b = b->ql.flink; // get next buffer 1790 } 1791 } 1792 while (lst != NULL) { 1793 void *next = *lst; 1794 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", 1795 lst, next, th, __kmp_gtid_from_thread(th))); 1796 (*thr->relfcn)(lst); 1797 #if BufStats 1798 // count blocks to prevent problems in __kmp_finalize_bget() 1799 thr->numprel++; /* Nr of expansion block releases */ 1800 thr->numpblk--; /* Total number of blocks */ 1801 #endif 1802 lst = (void **)next; 1803 } 1804 1805 KE_TRACE( 1806 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th))); 1807 } 1808 1809 #endif // USE_FAST_MEMORY 1810