1 /* 2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp.h" 14 #include "kmp_io.h" 15 #include "kmp_wrapper_malloc.h" 16 17 // Disable bget when it is not used 18 #if KMP_USE_BGET 19 20 /* Thread private buffer management code */ 21 22 typedef int (*bget_compact_t)(size_t, int); 23 typedef void *(*bget_acquire_t)(size_t); 24 typedef void (*bget_release_t)(void *); 25 26 /* NOTE: bufsize must be a signed datatype */ 27 28 #if KMP_OS_WINDOWS 29 #if KMP_ARCH_X86 || KMP_ARCH_ARM 30 typedef kmp_int32 bufsize; 31 #else 32 typedef kmp_int64 bufsize; 33 #endif 34 #else 35 typedef ssize_t bufsize; 36 #endif // KMP_OS_WINDOWS 37 38 /* The three modes of operation are, fifo search, lifo search, and best-fit */ 39 40 typedef enum bget_mode { 41 bget_mode_fifo = 0, 42 bget_mode_lifo = 1, 43 bget_mode_best = 2 44 } bget_mode_t; 45 46 static void bpool(kmp_info_t *th, void *buffer, bufsize len); 47 static void *bget(kmp_info_t *th, bufsize size); 48 static void *bgetz(kmp_info_t *th, bufsize size); 49 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize); 50 static void brel(kmp_info_t *th, void *buf); 51 static void bectl(kmp_info_t *th, bget_compact_t compact, 52 bget_acquire_t acquire, bget_release_t release, 53 bufsize pool_incr); 54 55 /* BGET CONFIGURATION */ 56 /* Buffer allocation size quantum: all buffers allocated are a 57 multiple of this size. This MUST be a power of two. */ 58 59 /* On IA-32 architecture with Linux* OS, malloc() does not 60 ensure 16 byte alignment */ 61 62 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD 63 64 #define SizeQuant 8 65 #define AlignType double 66 67 #else 68 69 #define SizeQuant 16 70 #define AlignType _Quad 71 72 #endif 73 74 // Define this symbol to enable the bstats() function which calculates the 75 // total free space in the buffer pool, the largest available buffer, and the 76 // total space currently allocated. 77 #define BufStats 1 78 79 #ifdef KMP_DEBUG 80 81 // Define this symbol to enable the bpoold() function which dumps the buffers 82 // in a buffer pool. 83 #define BufDump 1 84 85 // Define this symbol to enable the bpoolv() function for validating a buffer 86 // pool. 87 #define BufValid 1 88 89 // Define this symbol to enable the bufdump() function which allows dumping the 90 // contents of an allocated or free buffer. 91 #define DumpData 1 92 93 #ifdef NOT_USED_NOW 94 95 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants 96 // who attempt to use pointers into released buffers. 97 #define FreeWipe 1 98 99 // Use a best fit algorithm when searching for space for an allocation request. 100 // This uses memory more efficiently, but allocation will be much slower. 101 #define BestFit 1 102 103 #endif /* NOT_USED_NOW */ 104 #endif /* KMP_DEBUG */ 105 106 static bufsize bget_bin_size[] = { 107 0, 108 // 1 << 6, /* .5 Cache line */ 109 1 << 7, /* 1 Cache line, new */ 110 1 << 8, /* 2 Cache lines */ 111 1 << 9, /* 4 Cache lines, new */ 112 1 << 10, /* 8 Cache lines */ 113 1 << 11, /* 16 Cache lines, new */ 114 1 << 12, 1 << 13, /* new */ 115 1 << 14, 1 << 15, /* new */ 116 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */ 117 1 << 21, /* 2MB */ 118 1 << 22, /* 4MB */ 119 1 << 23, /* 8MB */ 120 1 << 24, /* 16MB */ 121 1 << 25, /* 32MB */ 122 }; 123 124 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) 125 126 struct bfhead; 127 128 // Declare the interface, including the requested buffer size type, bufsize. 129 130 /* Queue links */ 131 typedef struct qlinks { 132 struct bfhead *flink; /* Forward link */ 133 struct bfhead *blink; /* Backward link */ 134 } qlinks_t; 135 136 /* Header in allocated and free buffers */ 137 typedef struct bhead2 { 138 kmp_info_t *bthr; /* The thread which owns the buffer pool */ 139 bufsize prevfree; /* Relative link back to previous free buffer in memory or 140 0 if previous buffer is allocated. */ 141 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */ 142 } bhead2_t; 143 144 /* Make sure the bhead structure is a multiple of SizeQuant in size. */ 145 typedef union bhead { 146 KMP_ALIGN(SizeQuant) 147 AlignType b_align; 148 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))]; 149 bhead2_t bb; 150 } bhead_t; 151 #define BH(p) ((bhead_t *)(p)) 152 153 /* Header in directly allocated buffers (by acqfcn) */ 154 typedef struct bdhead { 155 bufsize tsize; /* Total size, including overhead */ 156 bhead_t bh; /* Common header */ 157 } bdhead_t; 158 #define BDH(p) ((bdhead_t *)(p)) 159 160 /* Header in free buffers */ 161 typedef struct bfhead { 162 bhead_t bh; /* Common allocated/free header */ 163 qlinks_t ql; /* Links on free list */ 164 } bfhead_t; 165 #define BFH(p) ((bfhead_t *)(p)) 166 167 typedef struct thr_data { 168 bfhead_t freelist[MAX_BGET_BINS]; 169 #if BufStats 170 size_t totalloc; /* Total space currently allocated */ 171 long numget, numrel; /* Number of bget() and brel() calls */ 172 long numpblk; /* Number of pool blocks */ 173 long numpget, numprel; /* Number of block gets and rels */ 174 long numdget, numdrel; /* Number of direct gets and rels */ 175 #endif /* BufStats */ 176 177 /* Automatic expansion block management functions */ 178 bget_compact_t compfcn; 179 bget_acquire_t acqfcn; 180 bget_release_t relfcn; 181 182 bget_mode_t mode; /* what allocation mode to use? */ 183 184 bufsize exp_incr; /* Expansion block size */ 185 bufsize pool_len; /* 0: no bpool calls have been made 186 -1: not all pool blocks are the same size 187 >0: (common) block size for all bpool calls made so far 188 */ 189 bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */ 190 } thr_data_t; 191 192 /* Minimum allocation quantum: */ 193 #define QLSize (sizeof(qlinks_t)) 194 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) 195 #define MaxSize \ 196 (bufsize)( \ 197 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) 198 // Maximum for the requested size. 199 200 /* End sentinel: value placed in bsize field of dummy block delimiting 201 end of pool block. The most negative number which will fit in a 202 bufsize, defined in a way that the compiler will accept. */ 203 204 #define ESent \ 205 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) 206 207 /* Thread Data management routines */ 208 static int bget_get_bin(bufsize size) { 209 // binary chop bins 210 int lo = 0, hi = MAX_BGET_BINS - 1; 211 212 KMP_DEBUG_ASSERT(size > 0); 213 214 while ((hi - lo) > 1) { 215 int mid = (lo + hi) >> 1; 216 if (size < bget_bin_size[mid]) 217 hi = mid - 1; 218 else 219 lo = mid; 220 } 221 222 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS)); 223 224 return lo; 225 } 226 227 static void set_thr_data(kmp_info_t *th) { 228 int i; 229 thr_data_t *data; 230 231 data = (thr_data_t *)((!th->th.th_local.bget_data) 232 ? __kmp_allocate(sizeof(*data)) 233 : th->th.th_local.bget_data); 234 235 memset(data, '\0', sizeof(*data)); 236 237 for (i = 0; i < MAX_BGET_BINS; ++i) { 238 data->freelist[i].ql.flink = &data->freelist[i]; 239 data->freelist[i].ql.blink = &data->freelist[i]; 240 } 241 242 th->th.th_local.bget_data = data; 243 th->th.th_local.bget_list = 0; 244 #if !USE_CMP_XCHG_FOR_BGET 245 #ifdef USE_QUEUING_LOCK_FOR_BGET 246 __kmp_init_lock(&th->th.th_local.bget_lock); 247 #else 248 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock); 249 #endif /* USE_LOCK_FOR_BGET */ 250 #endif /* ! USE_CMP_XCHG_FOR_BGET */ 251 } 252 253 static thr_data_t *get_thr_data(kmp_info_t *th) { 254 thr_data_t *data; 255 256 data = (thr_data_t *)th->th.th_local.bget_data; 257 258 KMP_DEBUG_ASSERT(data != 0); 259 260 return data; 261 } 262 263 /* Walk the free list and release the enqueued buffers */ 264 static void __kmp_bget_dequeue(kmp_info_t *th) { 265 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); 266 267 if (p != 0) { 268 #if USE_CMP_XCHG_FOR_BGET 269 { 270 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 271 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 272 CCAST(void *, old_value), nullptr)) { 273 KMP_CPU_PAUSE(); 274 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 275 } 276 p = CCAST(void *, old_value); 277 } 278 #else /* ! USE_CMP_XCHG_FOR_BGET */ 279 #ifdef USE_QUEUING_LOCK_FOR_BGET 280 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 281 #else 282 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 283 #endif /* USE_QUEUING_LOCK_FOR_BGET */ 284 285 p = (void *)th->th.th_local.bget_list; 286 th->th.th_local.bget_list = 0; 287 288 #ifdef USE_QUEUING_LOCK_FOR_BGET 289 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 290 #else 291 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 292 #endif 293 #endif /* USE_CMP_XCHG_FOR_BGET */ 294 295 /* Check again to make sure the list is not empty */ 296 while (p != 0) { 297 void *buf = p; 298 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); 299 300 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 301 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 302 (kmp_uintptr_t)th); // clear possible mark 303 KMP_DEBUG_ASSERT(b->ql.blink == 0); 304 305 p = (void *)b->ql.flink; 306 307 brel(th, buf); 308 } 309 } 310 } 311 312 /* Chain together the free buffers by using the thread owner field */ 313 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf 314 #ifdef USE_QUEUING_LOCK_FOR_BGET 315 , 316 kmp_int32 rel_gtid 317 #endif 318 ) { 319 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t)); 320 321 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 322 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 323 (kmp_uintptr_t)th); // clear possible mark 324 325 b->ql.blink = 0; 326 327 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n", 328 __kmp_gtid_from_thread(th))); 329 330 #if USE_CMP_XCHG_FOR_BGET 331 { 332 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); 333 /* the next pointer must be set before setting bget_list to buf to avoid 334 exposing a broken list to other threads, even for an instant. */ 335 b->ql.flink = BFH(CCAST(void *, old_value)); 336 337 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 338 CCAST(void *, old_value), buf)) { 339 KMP_CPU_PAUSE(); 340 old_value = TCR_PTR(th->th.th_local.bget_list); 341 /* the next pointer must be set before setting bget_list to buf to avoid 342 exposing a broken list to other threads, even for an instant. */ 343 b->ql.flink = BFH(CCAST(void *, old_value)); 344 } 345 } 346 #else /* ! USE_CMP_XCHG_FOR_BGET */ 347 #ifdef USE_QUEUING_LOCK_FOR_BGET 348 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid); 349 #else 350 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 351 #endif 352 353 b->ql.flink = BFH(th->th.th_local.bget_list); 354 th->th.th_local.bget_list = (void *)buf; 355 356 #ifdef USE_QUEUING_LOCK_FOR_BGET 357 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid); 358 #else 359 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 360 #endif 361 #endif /* USE_CMP_XCHG_FOR_BGET */ 362 } 363 364 /* insert buffer back onto a new freelist */ 365 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) { 366 int bin; 367 368 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0); 369 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0); 370 371 bin = bget_get_bin(b->bh.bb.bsize); 372 373 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink == 374 &thr->freelist[bin]); 375 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink == 376 &thr->freelist[bin]); 377 378 b->ql.flink = &thr->freelist[bin]; 379 b->ql.blink = thr->freelist[bin].ql.blink; 380 381 thr->freelist[bin].ql.blink = b; 382 b->ql.blink->ql.flink = b; 383 } 384 385 /* unlink the buffer from the old freelist */ 386 static void __kmp_bget_remove_from_freelist(bfhead_t *b) { 387 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 388 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 389 390 b->ql.blink->ql.flink = b->ql.flink; 391 b->ql.flink->ql.blink = b->ql.blink; 392 } 393 394 /* GET STATS -- check info on free list */ 395 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) { 396 thr_data_t *thr = get_thr_data(th); 397 int bin; 398 399 *total_free = *max_free = 0; 400 401 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 402 bfhead_t *b, *best; 403 404 best = &thr->freelist[bin]; 405 b = best->ql.flink; 406 407 while (b != &thr->freelist[bin]) { 408 *total_free += (b->bh.bb.bsize - sizeof(bhead_t)); 409 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize)) 410 best = b; 411 412 /* Link to next buffer */ 413 b = b->ql.flink; 414 } 415 416 if (*max_free < best->bh.bb.bsize) 417 *max_free = best->bh.bb.bsize; 418 } 419 420 if (*max_free > (bufsize)sizeof(bhead_t)) 421 *max_free -= sizeof(bhead_t); 422 } 423 424 /* BGET -- Allocate a buffer. */ 425 static void *bget(kmp_info_t *th, bufsize requested_size) { 426 thr_data_t *thr = get_thr_data(th); 427 bufsize size = requested_size; 428 bfhead_t *b; 429 void *buf; 430 int compactseq = 0; 431 int use_blink = 0; 432 /* For BestFit */ 433 bfhead_t *best; 434 435 if (size < 0 || size + sizeof(bhead_t) > MaxSize) { 436 return NULL; 437 } 438 439 __kmp_bget_dequeue(th); /* Release any queued buffers */ 440 441 if (size < (bufsize)SizeQ) { // Need at least room for the queue links. 442 size = SizeQ; 443 } 444 #if defined(SizeQuant) && (SizeQuant > 1) 445 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); 446 #endif 447 448 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required. 449 KMP_DEBUG_ASSERT(size >= 0); 450 KMP_DEBUG_ASSERT(size % SizeQuant == 0); 451 452 use_blink = (thr->mode == bget_mode_lifo); 453 454 /* If a compact function was provided in the call to bectl(), wrap 455 a loop around the allocation process to allow compaction to 456 intervene in case we don't find a suitable buffer in the chain. */ 457 458 for (;;) { 459 int bin; 460 461 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) { 462 /* Link to next buffer */ 463 b = (use_blink ? thr->freelist[bin].ql.blink 464 : thr->freelist[bin].ql.flink); 465 466 if (thr->mode == bget_mode_best) { 467 best = &thr->freelist[bin]; 468 469 /* Scan the free list searching for the first buffer big enough 470 to hold the requested size buffer. */ 471 while (b != &thr->freelist[bin]) { 472 if (b->bh.bb.bsize >= (bufsize)size) { 473 if ((best == &thr->freelist[bin]) || 474 (b->bh.bb.bsize < best->bh.bb.bsize)) { 475 best = b; 476 } 477 } 478 479 /* Link to next buffer */ 480 b = (use_blink ? b->ql.blink : b->ql.flink); 481 } 482 b = best; 483 } 484 485 while (b != &thr->freelist[bin]) { 486 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) { 487 488 // Buffer is big enough to satisfy the request. Allocate it to the 489 // caller. We must decide whether the buffer is large enough to split 490 // into the part given to the caller and a free buffer that remains 491 // on the free list, or whether the entire buffer should be removed 492 // from the free list and given to the caller in its entirety. We 493 // only split the buffer if enough room remains for a header plus the 494 // minimum quantum of allocation. 495 if ((b->bh.bb.bsize - (bufsize)size) > 496 (bufsize)(SizeQ + (sizeof(bhead_t)))) { 497 bhead_t *ba, *bn; 498 499 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size)); 500 bn = BH(((char *)ba) + size); 501 502 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); 503 504 /* Subtract size from length of free block. */ 505 b->bh.bb.bsize -= (bufsize)size; 506 507 /* Link allocated buffer to the previous free buffer. */ 508 ba->bb.prevfree = b->bh.bb.bsize; 509 510 /* Plug negative size into user buffer. */ 511 ba->bb.bsize = -size; 512 513 /* Mark this buffer as owned by this thread. */ 514 TCW_PTR(ba->bb.bthr, 515 th); // not an allocated address (do not mark it) 516 /* Mark buffer after this one not preceded by free block. */ 517 bn->bb.prevfree = 0; 518 519 // unlink buffer from old freelist, and reinsert into new freelist 520 __kmp_bget_remove_from_freelist(b); 521 __kmp_bget_insert_into_freelist(thr, b); 522 #if BufStats 523 thr->totalloc += (size_t)size; 524 thr->numget++; /* Increment number of bget() calls */ 525 #endif 526 buf = (void *)((((char *)ba) + sizeof(bhead_t))); 527 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 528 return buf; 529 } else { 530 bhead_t *ba; 531 532 ba = BH(((char *)b) + b->bh.bb.bsize); 533 534 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); 535 536 /* The buffer isn't big enough to split. Give the whole 537 shebang to the caller and remove it from the free list. */ 538 539 __kmp_bget_remove_from_freelist(b); 540 #if BufStats 541 thr->totalloc += (size_t)b->bh.bb.bsize; 542 thr->numget++; /* Increment number of bget() calls */ 543 #endif 544 /* Negate size to mark buffer allocated. */ 545 b->bh.bb.bsize = -(b->bh.bb.bsize); 546 547 /* Mark this buffer as owned by this thread. */ 548 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark) 549 /* Zero the back pointer in the next buffer in memory 550 to indicate that this buffer is allocated. */ 551 ba->bb.prevfree = 0; 552 553 /* Give user buffer starting at queue links. */ 554 buf = (void *)&(b->ql); 555 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 556 return buf; 557 } 558 } 559 560 /* Link to next buffer */ 561 b = (use_blink ? b->ql.blink : b->ql.flink); 562 } 563 } 564 565 /* We failed to find a buffer. If there's a compact function defined, 566 notify it of the size requested. If it returns TRUE, try the allocation 567 again. */ 568 569 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { 570 break; 571 } 572 } 573 574 /* No buffer available with requested size free. */ 575 576 /* Don't give up yet -- look in the reserve supply. */ 577 if (thr->acqfcn != 0) { 578 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) { 579 /* Request is too large to fit in a single expansion block. 580 Try to satisfy it by a direct buffer acquisition. */ 581 bdhead_t *bdh; 582 583 size += sizeof(bdhead_t) - sizeof(bhead_t); 584 585 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size)); 586 587 /* richryan */ 588 bdh = BDH((*thr->acqfcn)((bufsize)size)); 589 if (bdh != NULL) { 590 591 // Mark the buffer special by setting size field of its header to zero. 592 bdh->bh.bb.bsize = 0; 593 594 /* Mark this buffer as owned by this thread. */ 595 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, 596 // because direct buffer never goes to free list 597 bdh->bh.bb.prevfree = 0; 598 bdh->tsize = size; 599 #if BufStats 600 thr->totalloc += (size_t)size; 601 thr->numget++; /* Increment number of bget() calls */ 602 thr->numdget++; /* Direct bget() call count */ 603 #endif 604 buf = (void *)(bdh + 1); 605 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 606 return buf; 607 } 608 609 } else { 610 611 /* Try to obtain a new expansion block */ 612 void *newpool; 613 614 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr)); 615 616 /* richryan */ 617 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr); 618 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0); 619 if (newpool != NULL) { 620 bpool(th, newpool, thr->exp_incr); 621 buf = bget( 622 th, requested_size); /* This can't, I say, can't get into a loop. */ 623 return buf; 624 } 625 } 626 } 627 628 /* Still no buffer available */ 629 630 return NULL; 631 } 632 633 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear 634 the entire contents of the buffer to zero, not just the 635 region requested by the caller. */ 636 637 static void *bgetz(kmp_info_t *th, bufsize size) { 638 char *buf = (char *)bget(th, size); 639 640 if (buf != NULL) { 641 bhead_t *b; 642 bufsize rsize; 643 644 b = BH(buf - sizeof(bhead_t)); 645 rsize = -(b->bb.bsize); 646 if (rsize == 0) { 647 bdhead_t *bd; 648 649 bd = BDH(buf - sizeof(bdhead_t)); 650 rsize = bd->tsize - (bufsize)sizeof(bdhead_t); 651 } else { 652 rsize -= sizeof(bhead_t); 653 } 654 655 KMP_DEBUG_ASSERT(rsize >= size); 656 657 (void)memset(buf, 0, (bufsize)rsize); 658 } 659 return ((void *)buf); 660 } 661 662 /* BGETR -- Reallocate a buffer. This is a minimal implementation, 663 simply in terms of brel() and bget(). It could be 664 enhanced to allow the buffer to grow into adjacent free 665 blocks and to avoid moving data unnecessarily. */ 666 667 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) { 668 void *nbuf; 669 bufsize osize; /* Old size of buffer */ 670 bhead_t *b; 671 672 nbuf = bget(th, size); 673 if (nbuf == NULL) { /* Acquire new buffer */ 674 return NULL; 675 } 676 if (buf == NULL) { 677 return nbuf; 678 } 679 b = BH(((char *)buf) - sizeof(bhead_t)); 680 osize = -b->bb.bsize; 681 if (osize == 0) { 682 /* Buffer acquired directly through acqfcn. */ 683 bdhead_t *bd; 684 685 bd = BDH(((char *)buf) - sizeof(bdhead_t)); 686 osize = bd->tsize - (bufsize)sizeof(bdhead_t); 687 } else { 688 osize -= sizeof(bhead_t); 689 } 690 691 KMP_DEBUG_ASSERT(osize > 0); 692 693 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */ 694 (size_t)((size < osize) ? size : osize)); 695 brel(th, buf); 696 697 return nbuf; 698 } 699 700 /* BREL -- Release a buffer. */ 701 static void brel(kmp_info_t *th, void *buf) { 702 thr_data_t *thr = get_thr_data(th); 703 bfhead_t *b, *bn; 704 kmp_info_t *bth; 705 706 KMP_DEBUG_ASSERT(buf != NULL); 707 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 708 709 b = BFH(((char *)buf) - sizeof(bhead_t)); 710 711 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ 712 bdhead_t *bdh; 713 714 bdh = BDH(((char *)buf) - sizeof(bdhead_t)); 715 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 716 #if BufStats 717 thr->totalloc -= (size_t)bdh->tsize; 718 thr->numdrel++; /* Number of direct releases */ 719 thr->numrel++; /* Increment number of brel() calls */ 720 #endif /* BufStats */ 721 #ifdef FreeWipe 722 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t))); 723 #endif /* FreeWipe */ 724 725 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh)); 726 727 KMP_DEBUG_ASSERT(thr->relfcn != 0); 728 (*thr->relfcn)((void *)bdh); /* Release it directly. */ 729 return; 730 } 731 732 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & 733 ~1); // clear possible mark before comparison 734 if (bth != th) { 735 /* Add this buffer to be released by the owning thread later */ 736 __kmp_bget_enqueue(bth, buf 737 #ifdef USE_QUEUING_LOCK_FOR_BGET 738 , 739 __kmp_gtid_from_thread(th) 740 #endif 741 ); 742 return; 743 } 744 745 /* Buffer size must be negative, indicating that the buffer is allocated. */ 746 if (b->bh.bb.bsize >= 0) { 747 bn = NULL; 748 } 749 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); 750 751 /* Back pointer in next buffer must be zero, indicating the same thing: */ 752 753 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0); 754 755 #if BufStats 756 thr->numrel++; /* Increment number of brel() calls */ 757 thr->totalloc += (size_t)b->bh.bb.bsize; 758 #endif 759 760 /* If the back link is nonzero, the previous buffer is free. */ 761 762 if (b->bh.bb.prevfree != 0) { 763 /* The previous buffer is free. Consolidate this buffer with it by adding 764 the length of this buffer to the previous free buffer. Note that we 765 subtract the size in the buffer being released, since it's negative to 766 indicate that the buffer is allocated. */ 767 bufsize size = b->bh.bb.bsize; 768 769 /* Make the previous buffer the one we're working on. */ 770 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize == 771 b->bh.bb.prevfree); 772 b = BFH(((char *)b) - b->bh.bb.prevfree); 773 b->bh.bb.bsize -= size; 774 775 /* unlink the buffer from the old freelist */ 776 __kmp_bget_remove_from_freelist(b); 777 } else { 778 /* The previous buffer isn't allocated. Mark this buffer size as positive 779 (i.e. free) and fall through to place the buffer on the free list as an 780 isolated free block. */ 781 b->bh.bb.bsize = -b->bh.bb.bsize; 782 } 783 784 /* insert buffer back onto a new freelist */ 785 __kmp_bget_insert_into_freelist(thr, b); 786 787 /* Now we look at the next buffer in memory, located by advancing from 788 the start of this buffer by its size, to see if that buffer is 789 free. If it is, we combine this buffer with the next one in 790 memory, dechaining the second buffer from the free list. */ 791 bn = BFH(((char *)b) + b->bh.bb.bsize); 792 if (bn->bh.bb.bsize > 0) { 793 794 /* The buffer is free. Remove it from the free list and add 795 its size to that of our buffer. */ 796 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree == 797 bn->bh.bb.bsize); 798 799 __kmp_bget_remove_from_freelist(bn); 800 801 b->bh.bb.bsize += bn->bh.bb.bsize; 802 803 /* unlink the buffer from the old freelist, and reinsert it into the new 804 * freelist */ 805 __kmp_bget_remove_from_freelist(b); 806 __kmp_bget_insert_into_freelist(thr, b); 807 808 /* Finally, advance to the buffer that follows the newly 809 consolidated free block. We must set its backpointer to the 810 head of the consolidated free block. We know the next block 811 must be an allocated block because the process of recombination 812 guarantees that two free blocks will never be contiguous in 813 memory. */ 814 bn = BFH(((char *)b) + b->bh.bb.bsize); 815 } 816 #ifdef FreeWipe 817 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 818 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t))); 819 #endif 820 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); 821 822 /* The next buffer is allocated. Set the backpointer in it to point 823 to this buffer; the previous free buffer in memory. */ 824 825 bn->bh.bb.prevfree = b->bh.bb.bsize; 826 827 /* If a block-release function is defined, and this free buffer 828 constitutes the entire block, release it. Note that pool_len 829 is defined in such a way that the test will fail unless all 830 pool blocks are the same size. */ 831 if (thr->relfcn != 0 && 832 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 833 #if BufStats 834 if (thr->numpblk != 835 1) { /* Do not release the last buffer until finalization time */ 836 #endif 837 838 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 839 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 840 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 841 b->bh.bb.bsize); 842 843 /* Unlink the buffer from the free list */ 844 __kmp_bget_remove_from_freelist(b); 845 846 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 847 848 (*thr->relfcn)(b); 849 #if BufStats 850 thr->numprel++; /* Nr of expansion block releases */ 851 thr->numpblk--; /* Total number of blocks */ 852 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 853 854 // avoid leaving stale last_pool pointer around if it is being dealloced 855 if (thr->last_pool == b) 856 thr->last_pool = 0; 857 } else { 858 thr->last_pool = b; 859 } 860 #endif /* BufStats */ 861 } 862 } 863 864 /* BECTL -- Establish automatic pool expansion control */ 865 static void bectl(kmp_info_t *th, bget_compact_t compact, 866 bget_acquire_t acquire, bget_release_t release, 867 bufsize pool_incr) { 868 thr_data_t *thr = get_thr_data(th); 869 870 thr->compfcn = compact; 871 thr->acqfcn = acquire; 872 thr->relfcn = release; 873 thr->exp_incr = pool_incr; 874 } 875 876 /* BPOOL -- Add a region of memory to the buffer pool. */ 877 static void bpool(kmp_info_t *th, void *buf, bufsize len) { 878 /* int bin = 0; */ 879 thr_data_t *thr = get_thr_data(th); 880 bfhead_t *b = BFH(buf); 881 bhead_t *bn; 882 883 __kmp_bget_dequeue(th); /* Release any queued buffers */ 884 885 #ifdef SizeQuant 886 len &= ~(SizeQuant - 1); 887 #endif 888 if (thr->pool_len == 0) { 889 thr->pool_len = len; 890 } else if (len != thr->pool_len) { 891 thr->pool_len = -1; 892 } 893 #if BufStats 894 thr->numpget++; /* Number of block acquisitions */ 895 thr->numpblk++; /* Number of blocks total */ 896 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 897 #endif /* BufStats */ 898 899 /* Since the block is initially occupied by a single free buffer, 900 it had better not be (much) larger than the largest buffer 901 whose size we can store in bhead.bb.bsize. */ 902 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1)); 903 904 /* Clear the backpointer at the start of the block to indicate that 905 there is no free block prior to this one. That blocks 906 recombination when the first block in memory is released. */ 907 b->bh.bb.prevfree = 0; 908 909 /* Create a dummy allocated buffer at the end of the pool. This dummy 910 buffer is seen when a buffer at the end of the pool is released and 911 blocks recombination of the last buffer with the dummy buffer at 912 the end. The length in the dummy buffer is set to the largest 913 negative number to denote the end of the pool for diagnostic 914 routines (this specific value is not counted on by the actual 915 allocation and release functions). */ 916 len -= sizeof(bhead_t); 917 b->bh.bb.bsize = (bufsize)len; 918 /* Set the owner of this buffer */ 919 TCW_PTR(b->bh.bb.bthr, 920 (kmp_info_t *)((kmp_uintptr_t)th | 921 1)); // mark the buffer as allocated address 922 923 /* Chain the new block to the free list. */ 924 __kmp_bget_insert_into_freelist(thr, b); 925 926 #ifdef FreeWipe 927 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 928 (size_t)(len - sizeof(bfhead_t))); 929 #endif 930 bn = BH(((char *)b) + len); 931 bn->bb.prevfree = (bufsize)len; 932 /* Definition of ESent assumes two's complement! */ 933 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0)); 934 935 bn->bb.bsize = ESent; 936 } 937 938 /* BFREED -- Dump the free lists for this thread. */ 939 static void bfreed(kmp_info_t *th) { 940 int bin = 0, count = 0; 941 int gtid = __kmp_gtid_from_thread(th); 942 thr_data_t *thr = get_thr_data(th); 943 944 #if BufStats 945 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC 946 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC 947 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC 948 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC 949 " drel=%" KMP_INT64_SPEC "\n", 950 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget, 951 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk, 952 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel, 953 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel); 954 #endif 955 956 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 957 bfhead_t *b; 958 959 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin]; 960 b = b->ql.flink) { 961 bufsize bs = b->bh.bb.bsize; 962 963 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 964 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 965 KMP_DEBUG_ASSERT(bs > 0); 966 967 count += 1; 968 969 __kmp_printf_no_lock( 970 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, 971 (long)bs); 972 #ifdef FreeWipe 973 { 974 char *lerr = ((char *)b) + sizeof(bfhead_t); 975 if ((bs > sizeof(bfhead_t)) && 976 ((*lerr != 0x55) || 977 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 978 0))) { 979 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above " 980 "free block have been overstored.)\n", 981 gtid); 982 } 983 } 984 #endif 985 } 986 } 987 988 if (count == 0) 989 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid); 990 } 991 992 void __kmp_initialize_bget(kmp_info_t *th) { 993 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0)); 994 995 set_thr_data(th); 996 997 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free, 998 (bufsize)__kmp_malloc_pool_incr); 999 } 1000 1001 void __kmp_finalize_bget(kmp_info_t *th) { 1002 thr_data_t *thr; 1003 bfhead_t *b; 1004 1005 KMP_DEBUG_ASSERT(th != 0); 1006 1007 #if BufStats 1008 thr = (thr_data_t *)th->th.th_local.bget_data; 1009 KMP_DEBUG_ASSERT(thr != NULL); 1010 b = thr->last_pool; 1011 1012 /* If a block-release function is defined, and this free buffer constitutes 1013 the entire block, release it. Note that pool_len is defined in such a way 1014 that the test will fail unless all pool blocks are the same size. */ 1015 1016 // Deallocate the last pool if one exists because we no longer do it in brel() 1017 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && 1018 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 1019 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 1020 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 1021 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 1022 b->bh.bb.bsize); 1023 1024 /* Unlink the buffer from the free list */ 1025 __kmp_bget_remove_from_freelist(b); 1026 1027 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 1028 1029 (*thr->relfcn)(b); 1030 thr->numprel++; /* Nr of expansion block releases */ 1031 thr->numpblk--; /* Total number of blocks */ 1032 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 1033 } 1034 #endif /* BufStats */ 1035 1036 /* Deallocate bget_data */ 1037 if (th->th.th_local.bget_data != NULL) { 1038 __kmp_free(th->th.th_local.bget_data); 1039 th->th.th_local.bget_data = NULL; 1040 } 1041 } 1042 1043 void kmpc_set_poolsize(size_t size) { 1044 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc, 1045 (bget_release_t)free, (bufsize)size); 1046 } 1047 1048 size_t kmpc_get_poolsize(void) { 1049 thr_data_t *p; 1050 1051 p = get_thr_data(__kmp_get_thread()); 1052 1053 return p->exp_incr; 1054 } 1055 1056 void kmpc_set_poolmode(int mode) { 1057 thr_data_t *p; 1058 1059 if (mode == bget_mode_fifo || mode == bget_mode_lifo || 1060 mode == bget_mode_best) { 1061 p = get_thr_data(__kmp_get_thread()); 1062 p->mode = (bget_mode_t)mode; 1063 } 1064 } 1065 1066 int kmpc_get_poolmode(void) { 1067 thr_data_t *p; 1068 1069 p = get_thr_data(__kmp_get_thread()); 1070 1071 return p->mode; 1072 } 1073 1074 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) { 1075 kmp_info_t *th = __kmp_get_thread(); 1076 bufsize a, b; 1077 1078 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1079 1080 bcheck(th, &a, &b); 1081 1082 *maxmem = a; 1083 *allmem = b; 1084 } 1085 1086 void kmpc_poolprint(void) { 1087 kmp_info_t *th = __kmp_get_thread(); 1088 1089 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1090 1091 bfreed(th); 1092 } 1093 1094 #endif // #if KMP_USE_BGET 1095 1096 void *kmpc_malloc(size_t size) { 1097 void *ptr; 1098 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1099 if (ptr != NULL) { 1100 // save allocated pointer just before one returned to user 1101 *(void **)ptr = ptr; 1102 ptr = (void **)ptr + 1; 1103 } 1104 return ptr; 1105 } 1106 1107 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) 1108 1109 void *kmpc_aligned_malloc(size_t size, size_t alignment) { 1110 void *ptr; 1111 void *ptr_allocated; 1112 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big 1113 if (!IS_POWER_OF_TWO(alignment)) { 1114 // AC: do we need to issue a warning here? 1115 errno = EINVAL; 1116 return NULL; 1117 } 1118 size = size + sizeof(void *) + alignment; 1119 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size); 1120 if (ptr_allocated != NULL) { 1121 // save allocated pointer just before one returned to user 1122 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) & 1123 ~(alignment - 1)); 1124 *((void **)ptr - 1) = ptr_allocated; 1125 } else { 1126 ptr = NULL; 1127 } 1128 return ptr; 1129 } 1130 1131 void *kmpc_calloc(size_t nelem, size_t elsize) { 1132 void *ptr; 1133 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr))); 1134 if (ptr != NULL) { 1135 // save allocated pointer just before one returned to user 1136 *(void **)ptr = ptr; 1137 ptr = (void **)ptr + 1; 1138 } 1139 return ptr; 1140 } 1141 1142 void *kmpc_realloc(void *ptr, size_t size) { 1143 void *result = NULL; 1144 if (ptr == NULL) { 1145 // If pointer is NULL, realloc behaves like malloc. 1146 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1147 // save allocated pointer just before one returned to user 1148 if (result != NULL) { 1149 *(void **)result = result; 1150 result = (void **)result + 1; 1151 } 1152 } else if (size == 0) { 1153 // If size is 0, realloc behaves like free. 1154 // The thread must be registered by the call to kmpc_malloc() or 1155 // kmpc_calloc() before. 1156 // So it should be safe to call __kmp_get_thread(), not 1157 // __kmp_entry_thread(). 1158 KMP_ASSERT(*((void **)ptr - 1)); 1159 brel(__kmp_get_thread(), *((void **)ptr - 1)); 1160 } else { 1161 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1), 1162 (bufsize)(size + sizeof(ptr))); 1163 if (result != NULL) { 1164 *(void **)result = result; 1165 result = (void **)result + 1; 1166 } 1167 } 1168 return result; 1169 } 1170 1171 // NOTE: the library must have already been initialized by a previous allocate 1172 void kmpc_free(void *ptr) { 1173 if (!__kmp_init_serial) { 1174 return; 1175 } 1176 if (ptr != NULL) { 1177 kmp_info_t *th = __kmp_get_thread(); 1178 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1179 // extract allocated pointer and free it 1180 KMP_ASSERT(*((void **)ptr - 1)); 1181 brel(th, *((void **)ptr - 1)); 1182 } 1183 } 1184 1185 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) { 1186 void *ptr; 1187 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th, 1188 (int)size KMP_SRC_LOC_PARM)); 1189 ptr = bget(th, (bufsize)size); 1190 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr)); 1191 return ptr; 1192 } 1193 1194 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, 1195 size_t elsize KMP_SRC_LOC_DECL) { 1196 void *ptr; 1197 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th, 1198 (int)nelem, (int)elsize KMP_SRC_LOC_PARM)); 1199 ptr = bgetz(th, (bufsize)(nelem * elsize)); 1200 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr)); 1201 return ptr; 1202 } 1203 1204 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, 1205 size_t size KMP_SRC_LOC_DECL) { 1206 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th, 1207 ptr, (int)size KMP_SRC_LOC_PARM)); 1208 ptr = bgetr(th, ptr, (bufsize)size); 1209 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr)); 1210 return ptr; 1211 } 1212 1213 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) { 1214 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th, 1215 ptr KMP_SRC_LOC_PARM)); 1216 if (ptr != NULL) { 1217 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1218 brel(th, ptr); 1219 } 1220 KE_TRACE(30, ("<- __kmp_thread_free()\n")); 1221 } 1222 1223 /* OMP 5.0 Memory Management support */ 1224 static const char *kmp_mk_lib_name; 1225 static void *h_memkind; 1226 /* memkind experimental API: */ 1227 // memkind_alloc 1228 static void *(*kmp_mk_alloc)(void *k, size_t sz); 1229 // memkind_free 1230 static void (*kmp_mk_free)(void *kind, void *ptr); 1231 // memkind_check_available 1232 static int (*kmp_mk_check)(void *kind); 1233 // kinds we are going to use 1234 static void **mk_default; 1235 static void **mk_interleave; 1236 static void **mk_hbw; 1237 static void **mk_hbw_interleave; 1238 static void **mk_hbw_preferred; 1239 static void **mk_hugetlb; 1240 static void **mk_hbw_hugetlb; 1241 static void **mk_hbw_preferred_hugetlb; 1242 1243 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 1244 static inline void chk_kind(void ***pkind) { 1245 KMP_DEBUG_ASSERT(pkind); 1246 if (*pkind) // symbol found 1247 if (kmp_mk_check(**pkind)) // kind not available or error 1248 *pkind = NULL; 1249 } 1250 #endif 1251 1252 void __kmp_init_memkind() { 1253 // as of 2018-07-31 memkind does not support Windows*, exclude it for now 1254 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 1255 // use of statically linked memkind is problematic, as it depends on libnuma 1256 kmp_mk_lib_name = "libmemkind.so"; 1257 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); 1258 if (h_memkind) { 1259 kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available"); 1260 kmp_mk_alloc = 1261 (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc"); 1262 kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free"); 1263 mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT"); 1264 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default && 1265 !kmp_mk_check(*mk_default)) { 1266 __kmp_memkind_available = 1; 1267 mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE"); 1268 chk_kind(&mk_interleave); 1269 mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW"); 1270 chk_kind(&mk_hbw); 1271 mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE"); 1272 chk_kind(&mk_hbw_interleave); 1273 mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED"); 1274 chk_kind(&mk_hbw_preferred); 1275 mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB"); 1276 chk_kind(&mk_hugetlb); 1277 mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB"); 1278 chk_kind(&mk_hbw_hugetlb); 1279 mk_hbw_preferred_hugetlb = 1280 (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB"); 1281 chk_kind(&mk_hbw_preferred_hugetlb); 1282 KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n")); 1283 return; // success 1284 } 1285 dlclose(h_memkind); // failure 1286 h_memkind = NULL; 1287 } 1288 kmp_mk_check = NULL; 1289 kmp_mk_alloc = NULL; 1290 kmp_mk_free = NULL; 1291 mk_default = NULL; 1292 mk_interleave = NULL; 1293 mk_hbw = NULL; 1294 mk_hbw_interleave = NULL; 1295 mk_hbw_preferred = NULL; 1296 mk_hugetlb = NULL; 1297 mk_hbw_hugetlb = NULL; 1298 mk_hbw_preferred_hugetlb = NULL; 1299 #else 1300 kmp_mk_lib_name = ""; 1301 h_memkind = NULL; 1302 kmp_mk_check = NULL; 1303 kmp_mk_alloc = NULL; 1304 kmp_mk_free = NULL; 1305 mk_default = NULL; 1306 mk_interleave = NULL; 1307 mk_hbw = NULL; 1308 mk_hbw_interleave = NULL; 1309 mk_hbw_preferred = NULL; 1310 mk_hugetlb = NULL; 1311 mk_hbw_hugetlb = NULL; 1312 mk_hbw_preferred_hugetlb = NULL; 1313 #endif 1314 } 1315 1316 void __kmp_fini_memkind() { 1317 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 1318 if (__kmp_memkind_available) 1319 KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n")); 1320 if (h_memkind) { 1321 dlclose(h_memkind); 1322 h_memkind = NULL; 1323 } 1324 kmp_mk_check = NULL; 1325 kmp_mk_alloc = NULL; 1326 kmp_mk_free = NULL; 1327 mk_default = NULL; 1328 mk_interleave = NULL; 1329 mk_hbw = NULL; 1330 mk_hbw_interleave = NULL; 1331 mk_hbw_preferred = NULL; 1332 mk_hugetlb = NULL; 1333 mk_hbw_hugetlb = NULL; 1334 mk_hbw_preferred_hugetlb = NULL; 1335 #endif 1336 } 1337 1338 omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, 1339 int ntraits, 1340 omp_alloctrait_t traits[]) { 1341 // OpenMP 5.0 only allows predefined memspaces 1342 KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space || 1343 ms == omp_large_cap_mem_space || ms == omp_const_mem_space || 1344 ms == omp_high_bw_mem_space); 1345 kmp_allocator_t *al; 1346 int i; 1347 al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed 1348 al->memspace = ms; // not used currently 1349 for (i = 0; i < ntraits; ++i) { 1350 switch (traits[i].key) { 1351 case omp_atk_threadmodel: 1352 case omp_atk_access: 1353 case omp_atk_pinned: 1354 break; 1355 case omp_atk_alignment: 1356 al->alignment = traits[i].value; 1357 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment)); 1358 break; 1359 case omp_atk_pool_size: 1360 al->pool_size = traits[i].value; 1361 break; 1362 case omp_atk_fallback: 1363 al->fb = (omp_alloctrait_value_t)traits[i].value; 1364 KMP_DEBUG_ASSERT( 1365 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb || 1366 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb); 1367 break; 1368 case omp_atk_fb_data: 1369 al->fb_data = RCAST(kmp_allocator_t *, traits[i].value); 1370 break; 1371 case omp_atk_partition: 1372 al->memkind = RCAST(void **, traits[i].value); 1373 break; 1374 default: 1375 KMP_ASSERT2(0, "Unexpected allocator trait"); 1376 } 1377 } 1378 if (al->fb == 0) { 1379 // set default allocator 1380 al->fb = omp_atv_default_mem_fb; 1381 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; 1382 } else if (al->fb == omp_atv_allocator_fb) { 1383 KMP_ASSERT(al->fb_data != NULL); 1384 } else if (al->fb == omp_atv_default_mem_fb) { 1385 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; 1386 } 1387 if (__kmp_memkind_available) { 1388 // Let's use memkind library if available 1389 if (ms == omp_high_bw_mem_space) { 1390 if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) { 1391 al->memkind = mk_hbw_interleave; 1392 } else if (mk_hbw_preferred) { 1393 // AC: do not try to use MEMKIND_HBW for now, because memkind library 1394 // cannot reliably detect exhaustion of HBW memory. 1395 // It could be possible using hbw_verify_memory_region() but memkind 1396 // manual says: "Using this function in production code may result in 1397 // serious performance penalty". 1398 al->memkind = mk_hbw_preferred; 1399 } else { 1400 // HBW is requested but not available --> return NULL allocator 1401 __kmp_free(al); 1402 return omp_null_allocator; 1403 } 1404 } else { 1405 if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) { 1406 al->memkind = mk_interleave; 1407 } else { 1408 al->memkind = mk_default; 1409 } 1410 } 1411 } else { 1412 if (ms == omp_high_bw_mem_space) { 1413 // cannot detect HBW memory presence without memkind library 1414 __kmp_free(al); 1415 return omp_null_allocator; 1416 } 1417 } 1418 return (omp_allocator_handle_t)al; 1419 } 1420 1421 void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) { 1422 if (allocator > kmp_max_mem_alloc) 1423 __kmp_free(allocator); 1424 } 1425 1426 void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) { 1427 if (allocator == omp_null_allocator) 1428 allocator = omp_default_mem_alloc; 1429 __kmp_threads[gtid]->th.th_def_allocator = allocator; 1430 } 1431 1432 omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) { 1433 return __kmp_threads[gtid]->th.th_def_allocator; 1434 } 1435 1436 typedef struct kmp_mem_desc { // Memory block descriptor 1437 void *ptr_alloc; // Pointer returned by allocator 1438 size_t size_a; // Size of allocated memory block (initial+descriptor+align) 1439 void *ptr_align; // Pointer to aligned memory, returned 1440 kmp_allocator_t *allocator; // allocator 1441 } kmp_mem_desc_t; 1442 static int alignment = sizeof(void *); // let's align to pointer size 1443 1444 void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) { 1445 void *ptr = NULL; 1446 kmp_allocator_t *al; 1447 KMP_DEBUG_ASSERT(__kmp_init_serial); 1448 1449 if (size == 0) 1450 return NULL; 1451 1452 if (allocator == omp_null_allocator) 1453 allocator = __kmp_threads[gtid]->th.th_def_allocator; 1454 1455 KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); 1456 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); 1457 1458 int sz_desc = sizeof(kmp_mem_desc_t); 1459 kmp_mem_desc_t desc; 1460 kmp_uintptr_t addr; // address returned by allocator 1461 kmp_uintptr_t addr_align; // address to return to caller 1462 kmp_uintptr_t addr_descr; // address of memory block descriptor 1463 int align = alignment; // default alignment 1464 if (allocator > kmp_max_mem_alloc && al->alignment > 0) { 1465 align = al->alignment; // alignment requested by user 1466 } 1467 desc.size_a = size + sz_desc + align; 1468 1469 if (__kmp_memkind_available) { 1470 if (allocator < kmp_max_mem_alloc) { 1471 // pre-defined allocator 1472 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) { 1473 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a); 1474 } else { 1475 ptr = kmp_mk_alloc(*mk_default, desc.size_a); 1476 } 1477 } else if (al->pool_size > 0) { 1478 // custom allocator with pool size requested 1479 kmp_uint64 used = 1480 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); 1481 if (used + desc.size_a > al->pool_size) { 1482 // not enough space, need to go fallback path 1483 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 1484 if (al->fb == omp_atv_default_mem_fb) { 1485 al = (kmp_allocator_t *)omp_default_mem_alloc; 1486 ptr = kmp_mk_alloc(*mk_default, desc.size_a); 1487 } else if (al->fb == omp_atv_abort_fb) { 1488 KMP_ASSERT(0); // abort fallback requested 1489 } else if (al->fb == omp_atv_allocator_fb) { 1490 KMP_ASSERT(al != al->fb_data); 1491 al = al->fb_data; 1492 return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); 1493 } // else ptr == NULL; 1494 } else { 1495 // pool has enough space 1496 ptr = kmp_mk_alloc(*al->memkind, desc.size_a); 1497 if (ptr == NULL) { 1498 if (al->fb == omp_atv_default_mem_fb) { 1499 al = (kmp_allocator_t *)omp_default_mem_alloc; 1500 ptr = kmp_mk_alloc(*mk_default, desc.size_a); 1501 } else if (al->fb == omp_atv_abort_fb) { 1502 KMP_ASSERT(0); // abort fallback requested 1503 } else if (al->fb == omp_atv_allocator_fb) { 1504 KMP_ASSERT(al != al->fb_data); 1505 al = al->fb_data; 1506 return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); 1507 } 1508 } 1509 } 1510 } else { 1511 // custom allocator, pool size not requested 1512 ptr = kmp_mk_alloc(*al->memkind, desc.size_a); 1513 if (ptr == NULL) { 1514 if (al->fb == omp_atv_default_mem_fb) { 1515 al = (kmp_allocator_t *)omp_default_mem_alloc; 1516 ptr = kmp_mk_alloc(*mk_default, desc.size_a); 1517 } else if (al->fb == omp_atv_abort_fb) { 1518 KMP_ASSERT(0); // abort fallback requested 1519 } else if (al->fb == omp_atv_allocator_fb) { 1520 KMP_ASSERT(al != al->fb_data); 1521 al = al->fb_data; 1522 return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); 1523 } 1524 } 1525 } 1526 } else if (allocator < kmp_max_mem_alloc) { 1527 // pre-defined allocator 1528 if (allocator == omp_high_bw_mem_alloc) { 1529 // ptr = NULL; 1530 } else { 1531 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); 1532 } 1533 } else if (al->pool_size > 0) { 1534 // custom allocator with pool size requested 1535 kmp_uint64 used = 1536 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); 1537 if (used + desc.size_a > al->pool_size) { 1538 // not enough space, need to go fallback path 1539 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 1540 if (al->fb == omp_atv_default_mem_fb) { 1541 al = (kmp_allocator_t *)omp_default_mem_alloc; 1542 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); 1543 } else if (al->fb == omp_atv_abort_fb) { 1544 KMP_ASSERT(0); // abort fallback requested 1545 } else if (al->fb == omp_atv_allocator_fb) { 1546 KMP_ASSERT(al != al->fb_data); 1547 al = al->fb_data; 1548 return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); 1549 } // else ptr == NULL; 1550 } else { 1551 // pool has enough space 1552 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); 1553 if (ptr == NULL && al->fb == omp_atv_abort_fb) { 1554 KMP_ASSERT(0); // abort fallback requested 1555 } // no sense to look for another fallback because of same internal alloc 1556 } 1557 } else { 1558 // custom allocator, pool size not requested 1559 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); 1560 if (ptr == NULL && al->fb == omp_atv_abort_fb) { 1561 KMP_ASSERT(0); // abort fallback requested 1562 } // no sense to look for another fallback because of same internal alloc 1563 } 1564 KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a)); 1565 if (ptr == NULL) 1566 return NULL; 1567 1568 addr = (kmp_uintptr_t)ptr; 1569 addr_align = (addr + sz_desc + align - 1) & ~(align - 1); 1570 addr_descr = addr_align - sz_desc; 1571 1572 desc.ptr_alloc = ptr; 1573 desc.ptr_align = (void *)addr_align; 1574 desc.allocator = al; 1575 *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents 1576 KMP_MB(); 1577 1578 KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid)); 1579 return desc.ptr_align; 1580 } 1581 1582 void *__kmpc_calloc(int gtid, size_t nmemb, size_t size, 1583 omp_allocator_handle_t allocator) { 1584 void *ptr = NULL; 1585 kmp_allocator_t *al; 1586 KMP_DEBUG_ASSERT(__kmp_init_serial); 1587 1588 if (allocator == omp_null_allocator) 1589 allocator = __kmp_threads[gtid]->th.th_def_allocator; 1590 1591 KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb, 1592 (int)size, allocator)); 1593 1594 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); 1595 1596 if (nmemb == 0 || size == 0) 1597 return ptr; 1598 1599 if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) { 1600 if (al->fb == omp_atv_abort_fb) { 1601 KMP_ASSERT(0); 1602 } 1603 return ptr; 1604 } 1605 1606 ptr = __kmpc_alloc(gtid, nmemb * size, allocator); 1607 1608 if (ptr) { 1609 memset(ptr, 0x00, nmemb * size); 1610 } 1611 KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid)); 1612 return ptr; 1613 } 1614 1615 void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) { 1616 KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator)); 1617 if (ptr == NULL) 1618 return; 1619 1620 kmp_allocator_t *al; 1621 omp_allocator_handle_t oal; 1622 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); 1623 kmp_mem_desc_t desc; 1624 kmp_uintptr_t addr_align; // address to return to caller 1625 kmp_uintptr_t addr_descr; // address of memory block descriptor 1626 1627 addr_align = (kmp_uintptr_t)ptr; 1628 addr_descr = addr_align - sizeof(kmp_mem_desc_t); 1629 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor 1630 1631 KMP_DEBUG_ASSERT(desc.ptr_align == ptr); 1632 if (allocator) { 1633 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data); 1634 } 1635 al = desc.allocator; 1636 oal = (omp_allocator_handle_t)al; // cast to void* for comparisons 1637 KMP_DEBUG_ASSERT(al); 1638 1639 if (__kmp_memkind_available) { 1640 if (oal < kmp_max_mem_alloc) { 1641 // pre-defined allocator 1642 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) { 1643 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc); 1644 } else { 1645 kmp_mk_free(*mk_default, desc.ptr_alloc); 1646 } 1647 } else { 1648 if (al->pool_size > 0) { // custom allocator with pool size requested 1649 kmp_uint64 used = 1650 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 1651 (void)used; // to suppress compiler warning 1652 KMP_DEBUG_ASSERT(used >= desc.size_a); 1653 } 1654 kmp_mk_free(*al->memkind, desc.ptr_alloc); 1655 } 1656 } else { 1657 if (oal > kmp_max_mem_alloc && al->pool_size > 0) { 1658 kmp_uint64 used = 1659 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 1660 (void)used; // to suppress compiler warning 1661 KMP_DEBUG_ASSERT(used >= desc.size_a); 1662 } 1663 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc); 1664 } 1665 KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc, 1666 allocator)); 1667 } 1668 1669 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes 1670 memory leaks, but it may be useful for debugging memory corruptions, used 1671 freed pointers, etc. */ 1672 /* #define LEAK_MEMORY */ 1673 struct kmp_mem_descr { // Memory block descriptor. 1674 void *ptr_allocated; // Pointer returned by malloc(), subject for free(). 1675 size_t size_allocated; // Size of allocated memory block. 1676 void *ptr_aligned; // Pointer to aligned memory, to be used by client code. 1677 size_t size_aligned; // Size of aligned memory block. 1678 }; 1679 typedef struct kmp_mem_descr kmp_mem_descr_t; 1680 1681 /* Allocate memory on requested boundary, fill allocated memory with 0x00. 1682 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1683 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1684 static void *___kmp_allocate_align(size_t size, 1685 size_t alignment KMP_SRC_LOC_DECL) { 1686 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than 1687 requested to return properly aligned pointer. Original pointer returned 1688 by malloc() and size of allocated block is saved in descriptor just 1689 before the aligned pointer. This information used by __kmp_free() -- it 1690 has to pass to free() original pointer, not aligned one. 1691 1692 +---------+------------+-----------------------------------+---------+ 1693 | padding | descriptor | aligned block | padding | 1694 +---------+------------+-----------------------------------+---------+ 1695 ^ ^ 1696 | | 1697 | +- Aligned pointer returned to caller 1698 +- Pointer returned by malloc() 1699 1700 Aligned block is filled with zeros, paddings are filled with 0xEF. */ 1701 1702 kmp_mem_descr_t descr; 1703 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 1704 kmp_uintptr_t addr_aligned; // Aligned address to return to caller. 1705 kmp_uintptr_t addr_descr; // Address of memory block descriptor. 1706 1707 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", 1708 (int)size, (int)alignment KMP_SRC_LOC_PARM)); 1709 1710 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too 1711 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t)); 1712 // Make sure kmp_uintptr_t is enough to store addresses. 1713 1714 descr.size_aligned = size; 1715 descr.size_allocated = 1716 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment; 1717 1718 #if KMP_DEBUG 1719 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_); 1720 #else 1721 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM); 1722 #endif 1723 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated, 1724 descr.ptr_allocated)); 1725 if (descr.ptr_allocated == NULL) { 1726 KMP_FATAL(OutOfHeapMemory); 1727 } 1728 1729 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 1730 addr_aligned = 1731 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1); 1732 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t); 1733 1734 descr.ptr_aligned = (void *)addr_aligned; 1735 1736 KE_TRACE(26, (" ___kmp_allocate_align: " 1737 "ptr_allocated=%p, size_allocated=%d, " 1738 "ptr_aligned=%p, size_aligned=%d\n", 1739 descr.ptr_allocated, (int)descr.size_allocated, 1740 descr.ptr_aligned, (int)descr.size_aligned)); 1741 1742 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr); 1743 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned); 1744 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 1745 addr_allocated + descr.size_allocated); 1746 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0); 1747 #ifdef KMP_DEBUG 1748 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 1749 // Fill allocated memory block with 0xEF. 1750 #endif 1751 memset(descr.ptr_aligned, 0x00, descr.size_aligned); 1752 // Fill the aligned memory block (which is intended for using by caller) with 1753 // 0x00. Do not 1754 // put this filling under KMP_DEBUG condition! Many callers expect zeroed 1755 // memory. (Padding 1756 // bytes remain filled with 0xEF in debugging library.) 1757 *((kmp_mem_descr_t *)addr_descr) = descr; 1758 1759 KMP_MB(); 1760 1761 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned)); 1762 return descr.ptr_aligned; 1763 } // func ___kmp_allocate_align 1764 1765 /* Allocate memory on cache line boundary, fill allocated memory with 0x00. 1766 Do not call this func directly! Use __kmp_allocate macro instead. 1767 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1768 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1769 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) { 1770 void *ptr; 1771 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n", 1772 (int)size KMP_SRC_LOC_PARM)); 1773 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM); 1774 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr)); 1775 return ptr; 1776 } // func ___kmp_allocate 1777 1778 /* Allocate memory on page boundary, fill allocated memory with 0x00. 1779 Does not call this func directly! Use __kmp_page_allocate macro instead. 1780 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1781 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1782 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) { 1783 int page_size = 8 * 1024; 1784 void *ptr; 1785 1786 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n", 1787 (int)size KMP_SRC_LOC_PARM)); 1788 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM); 1789 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr)); 1790 return ptr; 1791 } // ___kmp_page_allocate 1792 1793 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). 1794 In debug mode, fill the memory block with 0xEF before call to free(). */ 1795 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) { 1796 kmp_mem_descr_t descr; 1797 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 1798 kmp_uintptr_t addr_aligned; // Aligned address passed by caller. 1799 1800 KE_TRACE(25, 1801 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM)); 1802 KMP_ASSERT(ptr != NULL); 1803 1804 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)); 1805 1806 KE_TRACE(26, (" __kmp_free: " 1807 "ptr_allocated=%p, size_allocated=%d, " 1808 "ptr_aligned=%p, size_aligned=%d\n", 1809 descr.ptr_allocated, (int)descr.size_allocated, 1810 descr.ptr_aligned, (int)descr.size_aligned)); 1811 1812 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 1813 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned; 1814 1815 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0); 1816 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr); 1817 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned); 1818 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated); 1819 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 1820 addr_allocated + descr.size_allocated); 1821 1822 #ifdef KMP_DEBUG 1823 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 1824 // Fill memory block with 0xEF, it helps catch using freed memory. 1825 #endif 1826 1827 #ifndef LEAK_MEMORY 1828 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated)); 1829 #ifdef KMP_DEBUG 1830 _free_src_loc(descr.ptr_allocated, _file_, _line_); 1831 #else 1832 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM); 1833 #endif 1834 #endif 1835 KMP_MB(); 1836 KE_TRACE(25, ("<- __kmp_free() returns\n")); 1837 } // func ___kmp_free 1838 1839 #if USE_FAST_MEMORY == 3 1840 // Allocate fast memory by first scanning the thread's free lists 1841 // If a chunk the right size exists, grab it off the free list. 1842 // Otherwise allocate normally using kmp_thread_malloc. 1843 1844 // AC: How to choose the limit? Just get 16 for now... 1845 #define KMP_FREE_LIST_LIMIT 16 1846 1847 // Always use 128 bytes for determining buckets for caching memory blocks 1848 #define DCACHE_LINE 128 1849 1850 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) { 1851 void *ptr; 1852 int num_lines; 1853 int idx; 1854 int index; 1855 void *alloc_ptr; 1856 size_t alloc_size; 1857 kmp_mem_descr_t *descr; 1858 1859 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", 1860 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM)); 1861 1862 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE; 1863 idx = num_lines - 1; 1864 KMP_DEBUG_ASSERT(idx >= 0); 1865 if (idx < 2) { 1866 index = 0; // idx is [ 0, 1 ], use first free list 1867 num_lines = 2; // 1, 2 cache lines or less than cache line 1868 } else if ((idx >>= 2) == 0) { 1869 index = 1; // idx is [ 2, 3 ], use second free list 1870 num_lines = 4; // 3, 4 cache lines 1871 } else if ((idx >>= 2) == 0) { 1872 index = 2; // idx is [ 4, 15 ], use third free list 1873 num_lines = 16; // 5, 6, ..., 16 cache lines 1874 } else if ((idx >>= 2) == 0) { 1875 index = 3; // idx is [ 16, 63 ], use fourth free list 1876 num_lines = 64; // 17, 18, ..., 64 cache lines 1877 } else { 1878 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists 1879 } 1880 1881 ptr = this_thr->th.th_free_lists[index].th_free_list_self; 1882 if (ptr != NULL) { 1883 // pop the head of no-sync free list 1884 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 1885 KMP_DEBUG_ASSERT( 1886 this_thr == 1887 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) 1888 ->ptr_aligned); 1889 goto end; 1890 } 1891 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 1892 if (ptr != NULL) { 1893 // no-sync free list is empty, use sync free list (filled in by other 1894 // threads only) 1895 // pop the head of the sync free list, push NULL instead 1896 while (!KMP_COMPARE_AND_STORE_PTR( 1897 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) { 1898 KMP_CPU_PAUSE(); 1899 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 1900 } 1901 // push the rest of chain into no-sync free list (can be NULL if there was 1902 // the only block) 1903 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 1904 KMP_DEBUG_ASSERT( 1905 this_thr == 1906 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) 1907 ->ptr_aligned); 1908 goto end; 1909 } 1910 1911 alloc_call: 1912 // haven't found block in the free lists, thus allocate it 1913 size = num_lines * DCACHE_LINE; 1914 1915 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE; 1916 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " 1917 "alloc_size %d\n", 1918 __kmp_gtid_from_thread(this_thr), alloc_size)); 1919 alloc_ptr = bget(this_thr, (bufsize)alloc_size); 1920 1921 // align ptr to DCACHE_LINE 1922 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + 1923 DCACHE_LINE) & 1924 ~(DCACHE_LINE - 1)); 1925 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 1926 1927 descr->ptr_allocated = alloc_ptr; // remember allocated pointer 1928 // we don't need size_allocated 1929 descr->ptr_aligned = (void *)this_thr; // remember allocating thread 1930 // (it is already saved in bget buffer, 1931 // but we may want to use another allocator in future) 1932 descr->size_aligned = size; 1933 1934 end: 1935 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n", 1936 __kmp_gtid_from_thread(this_thr), ptr)); 1937 return ptr; 1938 } // func __kmp_fast_allocate 1939 1940 // Free fast memory and place it on the thread's free list if it is of 1941 // the correct size. 1942 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) { 1943 kmp_mem_descr_t *descr; 1944 kmp_info_t *alloc_thr; 1945 size_t size; 1946 size_t idx; 1947 int index; 1948 1949 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", 1950 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM)); 1951 KMP_ASSERT(ptr != NULL); 1952 1953 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 1954 1955 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", 1956 (int)descr->size_aligned)); 1957 1958 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines 1959 1960 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block 1961 if (idx == size) { 1962 index = 0; // 2 cache lines 1963 } else if ((idx <<= 1) == size) { 1964 index = 1; // 4 cache lines 1965 } else if ((idx <<= 2) == size) { 1966 index = 2; // 16 cache lines 1967 } else if ((idx <<= 2) == size) { 1968 index = 3; // 64 cache lines 1969 } else { 1970 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64); 1971 goto free_call; // 65 or more cache lines ( > 8KB ) 1972 } 1973 1974 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block 1975 if (alloc_thr == this_thr) { 1976 // push block to self no-sync free list, linking previous head (LIFO) 1977 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; 1978 this_thr->th.th_free_lists[index].th_free_list_self = ptr; 1979 } else { 1980 void *head = this_thr->th.th_free_lists[index].th_free_list_other; 1981 if (head == NULL) { 1982 // Create new free list 1983 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1984 *((void **)ptr) = NULL; // mark the tail of the list 1985 descr->size_allocated = (size_t)1; // head of the list keeps its length 1986 } else { 1987 // need to check existed "other" list's owner thread and size of queue 1988 kmp_mem_descr_t *dsc = 1989 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t)); 1990 // allocating thread, same for all queue nodes 1991 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned); 1992 size_t q_sz = 1993 dsc->size_allocated + 1; // new size in case we add current task 1994 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) { 1995 // we can add current task to "other" list, no sync needed 1996 *((void **)ptr) = head; 1997 descr->size_allocated = q_sz; 1998 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1999 } else { 2000 // either queue blocks owner is changing or size limit exceeded 2001 // return old queue to allocating thread (q_th) synchronously, 2002 // and start new list for alloc_thr's tasks 2003 void *old_ptr; 2004 void *tail = head; 2005 void *next = *((void **)head); 2006 while (next != NULL) { 2007 KMP_DEBUG_ASSERT( 2008 // queue size should decrease by 1 each step through the list 2009 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t))) 2010 ->size_allocated + 2011 1 == 2012 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t))) 2013 ->size_allocated); 2014 tail = next; // remember tail node 2015 next = *((void **)next); 2016 } 2017 KMP_DEBUG_ASSERT(q_th != NULL); 2018 // push block to owner's sync free list 2019 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 2020 /* the next pointer must be set before setting free_list to ptr to avoid 2021 exposing a broken list to other threads, even for an instant. */ 2022 *((void **)tail) = old_ptr; 2023 2024 while (!KMP_COMPARE_AND_STORE_PTR( 2025 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) { 2026 KMP_CPU_PAUSE(); 2027 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 2028 *((void **)tail) = old_ptr; 2029 } 2030 2031 // start new list of not-selt tasks 2032 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 2033 *((void **)ptr) = NULL; 2034 descr->size_allocated = (size_t)1; // head of queue keeps its length 2035 } 2036 } 2037 } 2038 goto end; 2039 2040 free_call: 2041 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", 2042 __kmp_gtid_from_thread(this_thr), size)); 2043 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */ 2044 brel(this_thr, descr->ptr_allocated); 2045 2046 end: 2047 KE_TRACE(25, ("<- __kmp_fast_free() returns\n")); 2048 2049 } // func __kmp_fast_free 2050 2051 // Initialize the thread free lists related to fast memory 2052 // Only do this when a thread is initially created. 2053 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) { 2054 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr)); 2055 2056 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t)); 2057 } 2058 2059 // Free the memory in the thread free lists related to fast memory 2060 // Only do this when a thread is being reaped (destroyed). 2061 void __kmp_free_fast_memory(kmp_info_t *th) { 2062 // Suppose we use BGET underlying allocator, walk through its structures... 2063 int bin; 2064 thr_data_t *thr = get_thr_data(th); 2065 void **lst = NULL; 2066 2067 KE_TRACE( 2068 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th))); 2069 2070 __kmp_bget_dequeue(th); // Release any queued buffers 2071 2072 // Dig through free lists and extract all allocated blocks 2073 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 2074 bfhead_t *b = thr->freelist[bin].ql.flink; 2075 while (b != &thr->freelist[bin]) { 2076 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address 2077 *((void **)b) = 2078 lst; // link the list (override bthr, but keep flink yet) 2079 lst = (void **)b; // push b into lst 2080 } 2081 b = b->ql.flink; // get next buffer 2082 } 2083 } 2084 while (lst != NULL) { 2085 void *next = *lst; 2086 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", 2087 lst, next, th, __kmp_gtid_from_thread(th))); 2088 (*thr->relfcn)(lst); 2089 #if BufStats 2090 // count blocks to prevent problems in __kmp_finalize_bget() 2091 thr->numprel++; /* Nr of expansion block releases */ 2092 thr->numpblk--; /* Total number of blocks */ 2093 #endif 2094 lst = (void **)next; 2095 } 2096 2097 KE_TRACE( 2098 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th))); 2099 } 2100 2101 #endif // USE_FAST_MEMORY 2102