1 /* 2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "kmp.h" 17 #include "kmp_io.h" 18 #include "kmp_wrapper_malloc.h" 19 20 // Disable bget when it is not used 21 #if KMP_USE_BGET 22 23 /* Thread private buffer management code */ 24 25 typedef int (*bget_compact_t)(size_t, int); 26 typedef void *(*bget_acquire_t)(size_t); 27 typedef void (*bget_release_t)(void *); 28 29 /* NOTE: bufsize must be a signed datatype */ 30 31 #if KMP_OS_WINDOWS 32 #if KMP_ARCH_X86 || KMP_ARCH_ARM 33 typedef kmp_int32 bufsize; 34 #else 35 typedef kmp_int64 bufsize; 36 #endif 37 #else 38 typedef ssize_t bufsize; 39 #endif 40 41 /* The three modes of operation are, fifo search, lifo search, and best-fit */ 42 43 typedef enum bget_mode { 44 bget_mode_fifo = 0, 45 bget_mode_lifo = 1, 46 bget_mode_best = 2 47 } bget_mode_t; 48 49 static void bpool(kmp_info_t *th, void *buffer, bufsize len); 50 static void *bget(kmp_info_t *th, bufsize size); 51 static void *bgetz(kmp_info_t *th, bufsize size); 52 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize); 53 static void brel(kmp_info_t *th, void *buf); 54 static void bectl(kmp_info_t *th, bget_compact_t compact, 55 bget_acquire_t acquire, bget_release_t release, 56 bufsize pool_incr); 57 58 #ifdef KMP_DEBUG 59 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree, 60 bufsize *maxfree, long *nget, long *nrel); 61 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool, 62 long *npget, long *nprel, long *ndget, long *ndrel); 63 static void bufdump(kmp_info_t *th, void *buf); 64 static void bpoold(kmp_info_t *th, void *pool, int dumpalloc, int dumpfree); 65 static int bpoolv(kmp_info_t *th, void *pool); 66 #endif 67 68 /* BGET CONFIGURATION */ 69 /* Buffer allocation size quantum: all buffers allocated are a 70 multiple of this size. This MUST be a power of two. */ 71 72 /* On IA-32 architecture with Linux* OS, malloc() does not 73 ensure 16 byte alignmnent */ 74 75 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD 76 77 #define SizeQuant 8 78 #define AlignType double 79 80 #else 81 82 #define SizeQuant 16 83 #define AlignType _Quad 84 85 #endif 86 87 // Define this symbol to enable the bstats() function which calculates the 88 // total free space in the buffer pool, the largest available buffer, and the 89 // total space currently allocated. 90 #define BufStats 1 91 92 #ifdef KMP_DEBUG 93 94 // Define this symbol to enable the bpoold() function which dumps the buffers 95 // in a buffer pool. 96 #define BufDump 1 97 98 // Define this symbol to enable the bpoolv() function for validating a buffer 99 // pool. 100 #define BufValid 1 101 102 // Define this symbol to enable the bufdump() function which allows dumping the 103 // contents of an allocated or free buffer. 104 #define DumpData 1 105 106 #ifdef NOT_USED_NOW 107 108 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants 109 // who attempt to use pointers into released buffers. 110 #define FreeWipe 1 111 112 // Use a best fit algorithm when searching for space for an allocation request. 113 // This uses memory more efficiently, but allocation will be much slower. 114 #define BestFit 1 115 116 #endif /* NOT_USED_NOW */ 117 #endif /* KMP_DEBUG */ 118 119 static bufsize bget_bin_size[] = { 120 0, 121 // 1 << 6, /* .5 Cache line */ 122 1 << 7, /* 1 Cache line, new */ 123 1 << 8, /* 2 Cache lines */ 124 1 << 9, /* 4 Cache lines, new */ 125 1 << 10, /* 8 Cache lines */ 126 1 << 11, /* 16 Cache lines, new */ 127 1 << 12, 1 << 13, /* new */ 128 1 << 14, 1 << 15, /* new */ 129 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */ 130 1 << 21, /* 2MB */ 131 1 << 22, /* 4MB */ 132 1 << 23, /* 8MB */ 133 1 << 24, /* 16MB */ 134 1 << 25, /* 32MB */ 135 }; 136 137 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) 138 139 struct bfhead; 140 141 // Declare the interface, including the requested buffer size type, bufsize. 142 143 /* Queue links */ 144 typedef struct qlinks { 145 struct bfhead *flink; /* Forward link */ 146 struct bfhead *blink; /* Backward link */ 147 } qlinks_t; 148 149 /* Header in allocated and free buffers */ 150 typedef struct bhead2 { 151 kmp_info_t *bthr; /* The thread which owns the buffer pool */ 152 bufsize prevfree; /* Relative link back to previous free buffer in memory or 153 0 if previous buffer is allocated. */ 154 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */ 155 } bhead2_t; 156 157 /* Make sure the bhead structure is a multiple of SizeQuant in size. */ 158 typedef union bhead { 159 KMP_ALIGN(SizeQuant) 160 AlignType b_align; 161 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))]; 162 bhead2_t bb; 163 } bhead_t; 164 #define BH(p) ((bhead_t *)(p)) 165 166 /* Header in directly allocated buffers (by acqfcn) */ 167 typedef struct bdhead { 168 bufsize tsize; /* Total size, including overhead */ 169 bhead_t bh; /* Common header */ 170 } bdhead_t; 171 #define BDH(p) ((bdhead_t *)(p)) 172 173 /* Header in free buffers */ 174 typedef struct bfhead { 175 bhead_t bh; /* Common allocated/free header */ 176 qlinks_t ql; /* Links on free list */ 177 } bfhead_t; 178 #define BFH(p) ((bfhead_t *)(p)) 179 180 typedef struct thr_data { 181 bfhead_t freelist[MAX_BGET_BINS]; 182 #if BufStats 183 size_t totalloc; /* Total space currently allocated */ 184 long numget, numrel; /* Number of bget() and brel() calls */ 185 long numpblk; /* Number of pool blocks */ 186 long numpget, numprel; /* Number of block gets and rels */ 187 long numdget, numdrel; /* Number of direct gets and rels */ 188 #endif /* BufStats */ 189 190 /* Automatic expansion block management functions */ 191 bget_compact_t compfcn; 192 bget_acquire_t acqfcn; 193 bget_release_t relfcn; 194 195 bget_mode_t mode; /* what allocation mode to use? */ 196 197 bufsize exp_incr; /* Expansion block size */ 198 bufsize pool_len; /* 0: no bpool calls have been made 199 -1: not all pool blocks are the same size 200 >0: (common) block size for all bpool calls made so far 201 */ 202 bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */ 203 } thr_data_t; 204 205 /* Minimum allocation quantum: */ 206 #define QLSize (sizeof(qlinks_t)) 207 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) 208 #define MaxSize \ 209 (bufsize)( \ 210 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) 211 // Maximun for the requested size. 212 213 /* End sentinel: value placed in bsize field of dummy block delimiting 214 end of pool block. The most negative number which will fit in a 215 bufsize, defined in a way that the compiler will accept. */ 216 217 #define ESent \ 218 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) 219 220 /* Thread Data management routines */ 221 static int bget_get_bin(bufsize size) { 222 // binary chop bins 223 int lo = 0, hi = MAX_BGET_BINS - 1; 224 225 KMP_DEBUG_ASSERT(size > 0); 226 227 while ((hi - lo) > 1) { 228 int mid = (lo + hi) >> 1; 229 if (size < bget_bin_size[mid]) 230 hi = mid - 1; 231 else 232 lo = mid; 233 } 234 235 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS)); 236 237 return lo; 238 } 239 240 static void set_thr_data(kmp_info_t *th) { 241 int i; 242 thr_data_t *data; 243 244 data = (thr_data_t *)((!th->th.th_local.bget_data) 245 ? __kmp_allocate(sizeof(*data)) 246 : th->th.th_local.bget_data); 247 248 memset(data, '\0', sizeof(*data)); 249 250 for (i = 0; i < MAX_BGET_BINS; ++i) { 251 data->freelist[i].ql.flink = &data->freelist[i]; 252 data->freelist[i].ql.blink = &data->freelist[i]; 253 } 254 255 th->th.th_local.bget_data = data; 256 th->th.th_local.bget_list = 0; 257 #if !USE_CMP_XCHG_FOR_BGET 258 #ifdef USE_QUEUING_LOCK_FOR_BGET 259 __kmp_init_lock(&th->th.th_local.bget_lock); 260 #else 261 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock); 262 #endif /* USE_LOCK_FOR_BGET */ 263 #endif /* ! USE_CMP_XCHG_FOR_BGET */ 264 } 265 266 static thr_data_t *get_thr_data(kmp_info_t *th) { 267 thr_data_t *data; 268 269 data = (thr_data_t *)th->th.th_local.bget_data; 270 271 KMP_DEBUG_ASSERT(data != 0); 272 273 return data; 274 } 275 276 #ifdef KMP_DEBUG 277 278 static void __kmp_bget_validate_queue(kmp_info_t *th) { 279 /* NOTE: assume that the global_lock is held */ 280 281 void *p = (void *)th->th.th_local.bget_list; 282 283 while (p != 0) { 284 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); 285 286 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 287 p = (void *)b->ql.flink; 288 } 289 } 290 291 #endif 292 293 /* Walk the free list and release the enqueued buffers */ 294 static void __kmp_bget_dequeue(kmp_info_t *th) { 295 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); 296 297 if (p != 0) { 298 #if USE_CMP_XCHG_FOR_BGET 299 { 300 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 301 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 302 CCAST(void *, old_value), nullptr)) { 303 KMP_CPU_PAUSE(); 304 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 305 } 306 p = CCAST(void *, old_value); 307 } 308 #else /* ! USE_CMP_XCHG_FOR_BGET */ 309 #ifdef USE_QUEUING_LOCK_FOR_BGET 310 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 311 #else 312 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 313 #endif /* USE_QUEUING_LOCK_FOR_BGET */ 314 315 p = (void *)th->th.th_local.bget_list; 316 th->th.th_local.bget_list = 0; 317 318 #ifdef USE_QUEUING_LOCK_FOR_BGET 319 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 320 #else 321 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 322 #endif 323 #endif /* USE_CMP_XCHG_FOR_BGET */ 324 325 /* Check again to make sure the list is not empty */ 326 while (p != 0) { 327 void *buf = p; 328 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); 329 330 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 331 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 332 (kmp_uintptr_t)th); // clear possible mark 333 KMP_DEBUG_ASSERT(b->ql.blink == 0); 334 335 p = (void *)b->ql.flink; 336 337 brel(th, buf); 338 } 339 } 340 } 341 342 /* Chain together the free buffers by using the thread owner field */ 343 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf 344 #ifdef USE_QUEUING_LOCK_FOR_BGET 345 , 346 kmp_int32 rel_gtid 347 #endif 348 ) { 349 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t)); 350 351 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 352 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 353 (kmp_uintptr_t)th); // clear possible mark 354 355 b->ql.blink = 0; 356 357 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n", 358 __kmp_gtid_from_thread(th))); 359 360 #if USE_CMP_XCHG_FOR_BGET 361 { 362 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); 363 /* the next pointer must be set before setting bget_list to buf to avoid 364 exposing a broken list to other threads, even for an instant. */ 365 b->ql.flink = BFH(CCAST(void *, old_value)); 366 367 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 368 CCAST(void *, old_value), buf)) { 369 KMP_CPU_PAUSE(); 370 old_value = TCR_PTR(th->th.th_local.bget_list); 371 /* the next pointer must be set before setting bget_list to buf to avoid 372 exposing a broken list to other threads, even for an instant. */ 373 b->ql.flink = BFH(CCAST(void *, old_value)); 374 } 375 } 376 #else /* ! USE_CMP_XCHG_FOR_BGET */ 377 #ifdef USE_QUEUING_LOCK_FOR_BGET 378 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid); 379 #else 380 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 381 #endif 382 383 b->ql.flink = BFH(th->th.th_local.bget_list); 384 th->th.th_local.bget_list = (void *)buf; 385 386 #ifdef USE_QUEUING_LOCK_FOR_BGET 387 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid); 388 #else 389 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 390 #endif 391 #endif /* USE_CMP_XCHG_FOR_BGET */ 392 } 393 394 /* insert buffer back onto a new freelist */ 395 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) { 396 int bin; 397 398 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0); 399 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0); 400 401 bin = bget_get_bin(b->bh.bb.bsize); 402 403 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink == 404 &thr->freelist[bin]); 405 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink == 406 &thr->freelist[bin]); 407 408 b->ql.flink = &thr->freelist[bin]; 409 b->ql.blink = thr->freelist[bin].ql.blink; 410 411 thr->freelist[bin].ql.blink = b; 412 b->ql.blink->ql.flink = b; 413 } 414 415 /* unlink the buffer from the old freelist */ 416 static void __kmp_bget_remove_from_freelist(bfhead_t *b) { 417 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 418 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 419 420 b->ql.blink->ql.flink = b->ql.flink; 421 b->ql.flink->ql.blink = b->ql.blink; 422 } 423 424 /* GET STATS -- check info on free list */ 425 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) { 426 thr_data_t *thr = get_thr_data(th); 427 int bin; 428 429 *total_free = *max_free = 0; 430 431 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 432 bfhead_t *b, *best; 433 434 best = &thr->freelist[bin]; 435 b = best->ql.flink; 436 437 while (b != &thr->freelist[bin]) { 438 *total_free += (b->bh.bb.bsize - sizeof(bhead_t)); 439 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize)) 440 best = b; 441 442 /* Link to next buffer */ 443 b = b->ql.flink; 444 } 445 446 if (*max_free < best->bh.bb.bsize) 447 *max_free = best->bh.bb.bsize; 448 } 449 450 if (*max_free > (bufsize)sizeof(bhead_t)) 451 *max_free -= sizeof(bhead_t); 452 } 453 454 /* BGET -- Allocate a buffer. */ 455 static void *bget(kmp_info_t *th, bufsize requested_size) { 456 thr_data_t *thr = get_thr_data(th); 457 bufsize size = requested_size; 458 bfhead_t *b; 459 void *buf; 460 int compactseq = 0; 461 int use_blink = 0; 462 /* For BestFit */ 463 bfhead_t *best; 464 465 if (size < 0 || size + sizeof(bhead_t) > MaxSize) { 466 return NULL; 467 }; // if 468 469 __kmp_bget_dequeue(th); /* Release any queued buffers */ 470 471 if (size < (bufsize)SizeQ) { // Need at least room for the queue links. 472 size = SizeQ; 473 } 474 #if defined(SizeQuant) && (SizeQuant > 1) 475 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); 476 #endif 477 478 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required. 479 KMP_DEBUG_ASSERT(size >= 0); 480 KMP_DEBUG_ASSERT(size % SizeQuant == 0); 481 482 use_blink = (thr->mode == bget_mode_lifo); 483 484 /* If a compact function was provided in the call to bectl(), wrap 485 a loop around the allocation process to allow compaction to 486 intervene in case we don't find a suitable buffer in the chain. */ 487 488 for (;;) { 489 int bin; 490 491 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) { 492 /* Link to next buffer */ 493 b = (use_blink ? thr->freelist[bin].ql.blink 494 : thr->freelist[bin].ql.flink); 495 496 if (thr->mode == bget_mode_best) { 497 best = &thr->freelist[bin]; 498 499 /* Scan the free list searching for the first buffer big enough 500 to hold the requested size buffer. */ 501 while (b != &thr->freelist[bin]) { 502 if (b->bh.bb.bsize >= (bufsize)size) { 503 if ((best == &thr->freelist[bin]) || 504 (b->bh.bb.bsize < best->bh.bb.bsize)) { 505 best = b; 506 } 507 } 508 509 /* Link to next buffer */ 510 b = (use_blink ? b->ql.blink : b->ql.flink); 511 } 512 b = best; 513 } 514 515 while (b != &thr->freelist[bin]) { 516 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) { 517 518 // Buffer is big enough to satisfy the request. Allocate it to the 519 // caller. We must decide whether the buffer is large enough to split 520 // into the part given to the caller and a free buffer that remains 521 // on the free list, or whether the entire buffer should be removed 522 // from the free list and given to the caller in its entirety. We 523 // only split the buffer if enough room remains for a header plus the 524 // minimum quantum of allocation. 525 if ((b->bh.bb.bsize - (bufsize)size) > 526 (bufsize)(SizeQ + (sizeof(bhead_t)))) { 527 bhead_t *ba, *bn; 528 529 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size)); 530 bn = BH(((char *)ba) + size); 531 532 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); 533 534 /* Subtract size from length of free block. */ 535 b->bh.bb.bsize -= (bufsize)size; 536 537 /* Link allocated buffer to the previous free buffer. */ 538 ba->bb.prevfree = b->bh.bb.bsize; 539 540 /* Plug negative size into user buffer. */ 541 ba->bb.bsize = -size; 542 543 /* Mark this buffer as owned by this thread. */ 544 TCW_PTR(ba->bb.bthr, 545 th); // not an allocated address (do not mark it) 546 /* Mark buffer after this one not preceded by free block. */ 547 bn->bb.prevfree = 0; 548 549 // unlink buffer from old freelist, and reinsert into new freelist 550 __kmp_bget_remove_from_freelist(b); 551 __kmp_bget_insert_into_freelist(thr, b); 552 #if BufStats 553 thr->totalloc += (size_t)size; 554 thr->numget++; /* Increment number of bget() calls */ 555 #endif 556 buf = (void *)((((char *)ba) + sizeof(bhead_t))); 557 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 558 return buf; 559 } else { 560 bhead_t *ba; 561 562 ba = BH(((char *)b) + b->bh.bb.bsize); 563 564 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); 565 566 /* The buffer isn't big enough to split. Give the whole 567 shebang to the caller and remove it from the free list. */ 568 569 __kmp_bget_remove_from_freelist(b); 570 #if BufStats 571 thr->totalloc += (size_t)b->bh.bb.bsize; 572 thr->numget++; /* Increment number of bget() calls */ 573 #endif 574 /* Negate size to mark buffer allocated. */ 575 b->bh.bb.bsize = -(b->bh.bb.bsize); 576 577 /* Mark this buffer as owned by this thread. */ 578 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark) 579 /* Zero the back pointer in the next buffer in memory 580 to indicate that this buffer is allocated. */ 581 ba->bb.prevfree = 0; 582 583 /* Give user buffer starting at queue links. */ 584 buf = (void *)&(b->ql); 585 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 586 return buf; 587 } 588 } 589 590 /* Link to next buffer */ 591 b = (use_blink ? b->ql.blink : b->ql.flink); 592 } 593 } 594 595 /* We failed to find a buffer. If there's a compact function defined, 596 notify it of the size requested. If it returns TRUE, try the allocation 597 again. */ 598 599 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { 600 break; 601 } 602 } 603 604 /* No buffer available with requested size free. */ 605 606 /* Don't give up yet -- look in the reserve supply. */ 607 if (thr->acqfcn != 0) { 608 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) { 609 /* Request is too large to fit in a single expansion block. 610 Try to satisy it by a direct buffer acquisition. */ 611 bdhead_t *bdh; 612 613 size += sizeof(bdhead_t) - sizeof(bhead_t); 614 615 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size)); 616 617 /* richryan */ 618 bdh = BDH((*thr->acqfcn)((bufsize)size)); 619 if (bdh != NULL) { 620 621 // Mark the buffer special by setting size field of its header to zero. 622 bdh->bh.bb.bsize = 0; 623 624 /* Mark this buffer as owned by this thread. */ 625 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, 626 // because direct buffer never goes to free list 627 bdh->bh.bb.prevfree = 0; 628 bdh->tsize = size; 629 #if BufStats 630 thr->totalloc += (size_t)size; 631 thr->numget++; /* Increment number of bget() calls */ 632 thr->numdget++; /* Direct bget() call count */ 633 #endif 634 buf = (void *)(bdh + 1); 635 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 636 return buf; 637 } 638 639 } else { 640 641 /* Try to obtain a new expansion block */ 642 void *newpool; 643 644 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr)); 645 646 /* richryan */ 647 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr); 648 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0); 649 if (newpool != NULL) { 650 bpool(th, newpool, thr->exp_incr); 651 buf = bget( 652 th, requested_size); /* This can't, I say, can't get into a loop. */ 653 return buf; 654 } 655 } 656 } 657 658 /* Still no buffer available */ 659 660 return NULL; 661 } 662 663 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear 664 the entire contents of the buffer to zero, not just the 665 region requested by the caller. */ 666 667 static void *bgetz(kmp_info_t *th, bufsize size) { 668 char *buf = (char *)bget(th, size); 669 670 if (buf != NULL) { 671 bhead_t *b; 672 bufsize rsize; 673 674 b = BH(buf - sizeof(bhead_t)); 675 rsize = -(b->bb.bsize); 676 if (rsize == 0) { 677 bdhead_t *bd; 678 679 bd = BDH(buf - sizeof(bdhead_t)); 680 rsize = bd->tsize - (bufsize)sizeof(bdhead_t); 681 } else { 682 rsize -= sizeof(bhead_t); 683 } 684 685 KMP_DEBUG_ASSERT(rsize >= size); 686 687 (void)memset(buf, 0, (bufsize)rsize); 688 } 689 return ((void *)buf); 690 } 691 692 /* BGETR -- Reallocate a buffer. This is a minimal implementation, 693 simply in terms of brel() and bget(). It could be 694 enhanced to allow the buffer to grow into adjacent free 695 blocks and to avoid moving data unnecessarily. */ 696 697 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) { 698 void *nbuf; 699 bufsize osize; /* Old size of buffer */ 700 bhead_t *b; 701 702 nbuf = bget(th, size); 703 if (nbuf == NULL) { /* Acquire new buffer */ 704 return NULL; 705 } 706 if (buf == NULL) { 707 return nbuf; 708 } 709 b = BH(((char *)buf) - sizeof(bhead_t)); 710 osize = -b->bb.bsize; 711 if (osize == 0) { 712 /* Buffer acquired directly through acqfcn. */ 713 bdhead_t *bd; 714 715 bd = BDH(((char *)buf) - sizeof(bdhead_t)); 716 osize = bd->tsize - (bufsize)sizeof(bdhead_t); 717 } else { 718 osize -= sizeof(bhead_t); 719 }; 720 721 KMP_DEBUG_ASSERT(osize > 0); 722 723 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */ 724 (size_t)((size < osize) ? size : osize)); 725 brel(th, buf); 726 727 return nbuf; 728 } 729 730 /* BREL -- Release a buffer. */ 731 static void brel(kmp_info_t *th, void *buf) { 732 thr_data_t *thr = get_thr_data(th); 733 bfhead_t *b, *bn; 734 kmp_info_t *bth; 735 736 KMP_DEBUG_ASSERT(buf != NULL); 737 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 738 739 b = BFH(((char *)buf) - sizeof(bhead_t)); 740 741 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ 742 bdhead_t *bdh; 743 744 bdh = BDH(((char *)buf) - sizeof(bdhead_t)); 745 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 746 #if BufStats 747 thr->totalloc -= (size_t)bdh->tsize; 748 thr->numdrel++; /* Number of direct releases */ 749 thr->numrel++; /* Increment number of brel() calls */ 750 #endif /* BufStats */ 751 #ifdef FreeWipe 752 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t))); 753 #endif /* FreeWipe */ 754 755 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh)); 756 757 KMP_DEBUG_ASSERT(thr->relfcn != 0); 758 (*thr->relfcn)((void *)bdh); /* Release it directly. */ 759 return; 760 } 761 762 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & 763 ~1); // clear possible mark before comparison 764 if (bth != th) { 765 /* Add this buffer to be released by the owning thread later */ 766 __kmp_bget_enqueue(bth, buf 767 #ifdef USE_QUEUING_LOCK_FOR_BGET 768 , 769 __kmp_gtid_from_thread(th) 770 #endif 771 ); 772 return; 773 } 774 775 /* Buffer size must be negative, indicating that the buffer is allocated. */ 776 if (b->bh.bb.bsize >= 0) { 777 bn = NULL; 778 } 779 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); 780 781 /* Back pointer in next buffer must be zero, indicating the same thing: */ 782 783 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0); 784 785 #if BufStats 786 thr->numrel++; /* Increment number of brel() calls */ 787 thr->totalloc += (size_t)b->bh.bb.bsize; 788 #endif 789 790 /* If the back link is nonzero, the previous buffer is free. */ 791 792 if (b->bh.bb.prevfree != 0) { 793 /* The previous buffer is free. Consolidate this buffer with it by adding 794 the length of this buffer to the previous free buffer. Note that we 795 subtract the size in the buffer being released, since it's negative to 796 indicate that the buffer is allocated. */ 797 bufsize size = b->bh.bb.bsize; 798 799 /* Make the previous buffer the one we're working on. */ 800 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize == 801 b->bh.bb.prevfree); 802 b = BFH(((char *)b) - b->bh.bb.prevfree); 803 b->bh.bb.bsize -= size; 804 805 /* unlink the buffer from the old freelist */ 806 __kmp_bget_remove_from_freelist(b); 807 } else { 808 /* The previous buffer isn't allocated. Mark this buffer size as positive 809 (i.e. free) and fall through to place the buffer on the free list as an 810 isolated free block. */ 811 b->bh.bb.bsize = -b->bh.bb.bsize; 812 } 813 814 /* insert buffer back onto a new freelist */ 815 __kmp_bget_insert_into_freelist(thr, b); 816 817 /* Now we look at the next buffer in memory, located by advancing from 818 the start of this buffer by its size, to see if that buffer is 819 free. If it is, we combine this buffer with the next one in 820 memory, dechaining the second buffer from the free list. */ 821 bn = BFH(((char *)b) + b->bh.bb.bsize); 822 if (bn->bh.bb.bsize > 0) { 823 824 /* The buffer is free. Remove it from the free list and add 825 its size to that of our buffer. */ 826 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree == 827 bn->bh.bb.bsize); 828 829 __kmp_bget_remove_from_freelist(bn); 830 831 b->bh.bb.bsize += bn->bh.bb.bsize; 832 833 /* unlink the buffer from the old freelist, and reinsert it into the new 834 * freelist */ 835 __kmp_bget_remove_from_freelist(b); 836 __kmp_bget_insert_into_freelist(thr, b); 837 838 /* Finally, advance to the buffer that follows the newly 839 consolidated free block. We must set its backpointer to the 840 head of the consolidated free block. We know the next block 841 must be an allocated block because the process of recombination 842 guarantees that two free blocks will never be contiguous in 843 memory. */ 844 bn = BFH(((char *)b) + b->bh.bb.bsize); 845 } 846 #ifdef FreeWipe 847 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 848 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t))); 849 #endif 850 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); 851 852 /* The next buffer is allocated. Set the backpointer in it to point 853 to this buffer; the previous free buffer in memory. */ 854 855 bn->bh.bb.prevfree = b->bh.bb.bsize; 856 857 /* If a block-release function is defined, and this free buffer 858 constitutes the entire block, release it. Note that pool_len 859 is defined in such a way that the test will fail unless all 860 pool blocks are the same size. */ 861 if (thr->relfcn != 0 && 862 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 863 #if BufStats 864 if (thr->numpblk != 865 1) { /* Do not release the last buffer until finalization time */ 866 #endif 867 868 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 869 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 870 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 871 b->bh.bb.bsize); 872 873 /* Unlink the buffer from the free list */ 874 __kmp_bget_remove_from_freelist(b); 875 876 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 877 878 (*thr->relfcn)(b); 879 #if BufStats 880 thr->numprel++; /* Nr of expansion block releases */ 881 thr->numpblk--; /* Total number of blocks */ 882 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 883 884 // avoid leaving stale last_pool pointer around if it is being dealloced 885 if (thr->last_pool == b) 886 thr->last_pool = 0; 887 } else { 888 thr->last_pool = b; 889 } 890 #endif /* BufStats */ 891 } 892 } 893 894 /* BECTL -- Establish automatic pool expansion control */ 895 static void bectl(kmp_info_t *th, bget_compact_t compact, 896 bget_acquire_t acquire, bget_release_t release, 897 bufsize pool_incr) { 898 thr_data_t *thr = get_thr_data(th); 899 900 thr->compfcn = compact; 901 thr->acqfcn = acquire; 902 thr->relfcn = release; 903 thr->exp_incr = pool_incr; 904 } 905 906 /* BPOOL -- Add a region of memory to the buffer pool. */ 907 static void bpool(kmp_info_t *th, void *buf, bufsize len) { 908 /* int bin = 0; */ 909 thr_data_t *thr = get_thr_data(th); 910 bfhead_t *b = BFH(buf); 911 bhead_t *bn; 912 913 __kmp_bget_dequeue(th); /* Release any queued buffers */ 914 915 #ifdef SizeQuant 916 len &= ~(SizeQuant - 1); 917 #endif 918 if (thr->pool_len == 0) { 919 thr->pool_len = len; 920 } else if (len != thr->pool_len) { 921 thr->pool_len = -1; 922 } 923 #if BufStats 924 thr->numpget++; /* Number of block acquisitions */ 925 thr->numpblk++; /* Number of blocks total */ 926 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 927 #endif /* BufStats */ 928 929 /* Since the block is initially occupied by a single free buffer, 930 it had better not be (much) larger than the largest buffer 931 whose size we can store in bhead.bb.bsize. */ 932 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1)); 933 934 /* Clear the backpointer at the start of the block to indicate that 935 there is no free block prior to this one. That blocks 936 recombination when the first block in memory is released. */ 937 b->bh.bb.prevfree = 0; 938 939 /* Create a dummy allocated buffer at the end of the pool. This dummy 940 buffer is seen when a buffer at the end of the pool is released and 941 blocks recombination of the last buffer with the dummy buffer at 942 the end. The length in the dummy buffer is set to the largest 943 negative number to denote the end of the pool for diagnostic 944 routines (this specific value is not counted on by the actual 945 allocation and release functions). */ 946 len -= sizeof(bhead_t); 947 b->bh.bb.bsize = (bufsize)len; 948 /* Set the owner of this buffer */ 949 TCW_PTR(b->bh.bb.bthr, 950 (kmp_info_t *)((kmp_uintptr_t)th | 951 1)); // mark the buffer as allocated address 952 953 /* Chain the new block to the free list. */ 954 __kmp_bget_insert_into_freelist(thr, b); 955 956 #ifdef FreeWipe 957 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 958 (size_t)(len - sizeof(bfhead_t))); 959 #endif 960 bn = BH(((char *)b) + len); 961 bn->bb.prevfree = (bufsize)len; 962 /* Definition of ESent assumes two's complement! */ 963 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0)); 964 965 bn->bb.bsize = ESent; 966 } 967 968 /* BFREED -- Dump the free lists for this thread. */ 969 static void bfreed(kmp_info_t *th) { 970 int bin = 0, count = 0; 971 int gtid = __kmp_gtid_from_thread(th); 972 thr_data_t *thr = get_thr_data(th); 973 974 #if BufStats 975 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC 976 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC 977 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC 978 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC 979 " drel=%" KMP_INT64_SPEC "\n", 980 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget, 981 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk, 982 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel, 983 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel); 984 #endif 985 986 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 987 bfhead_t *b; 988 989 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin]; 990 b = b->ql.flink) { 991 bufsize bs = b->bh.bb.bsize; 992 993 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 994 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 995 KMP_DEBUG_ASSERT(bs > 0); 996 997 count += 1; 998 999 __kmp_printf_no_lock( 1000 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, 1001 (long)bs); 1002 #ifdef FreeWipe 1003 { 1004 char *lerr = ((char *)b) + sizeof(bfhead_t); 1005 if ((bs > sizeof(bfhead_t)) && 1006 ((*lerr != 0x55) || 1007 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 1008 0))) { 1009 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above " 1010 "free block have been overstored.)\n", 1011 gtid); 1012 } 1013 } 1014 #endif 1015 } 1016 } 1017 1018 if (count == 0) 1019 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid); 1020 } 1021 1022 #ifdef KMP_DEBUG 1023 1024 #if BufStats 1025 1026 /* BSTATS -- Return buffer allocation free space statistics. */ 1027 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree, 1028 bufsize *maxfree, long *nget, long *nrel) { 1029 int bin = 0; 1030 thr_data_t *thr = get_thr_data(th); 1031 1032 *nget = thr->numget; 1033 *nrel = thr->numrel; 1034 *curalloc = (bufsize)thr->totalloc; 1035 *totfree = 0; 1036 *maxfree = -1; 1037 1038 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 1039 bfhead_t *b = thr->freelist[bin].ql.flink; 1040 1041 while (b != &thr->freelist[bin]) { 1042 KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0); 1043 *totfree += b->bh.bb.bsize; 1044 if (b->bh.bb.bsize > *maxfree) { 1045 *maxfree = b->bh.bb.bsize; 1046 } 1047 b = b->ql.flink; /* Link to next buffer */ 1048 } 1049 } 1050 } 1051 1052 /* BSTATSE -- Return extended statistics */ 1053 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool, 1054 long *npget, long *nprel, long *ndget, long *ndrel) { 1055 thr_data_t *thr = get_thr_data(th); 1056 1057 *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr; 1058 *npool = thr->numpblk; 1059 *npget = thr->numpget; 1060 *nprel = thr->numprel; 1061 *ndget = thr->numdget; 1062 *ndrel = thr->numdrel; 1063 } 1064 1065 #endif /* BufStats */ 1066 1067 /* BUFDUMP -- Dump the data in a buffer. This is called with the user 1068 data pointer, and backs up to the buffer header. It will 1069 dump either a free block or an allocated one. */ 1070 static void bufdump(kmp_info_t *th, void *buf) { 1071 bfhead_t *b; 1072 unsigned char *bdump; 1073 bufsize bdlen; 1074 1075 b = BFH(((char *)buf) - sizeof(bhead_t)); 1076 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 1077 if (b->bh.bb.bsize < 0) { 1078 bdump = (unsigned char *)buf; 1079 bdlen = (-b->bh.bb.bsize) - (bufsize)sizeof(bhead_t); 1080 } else { 1081 bdump = (unsigned char *)(((char *)b) + sizeof(bfhead_t)); 1082 bdlen = b->bh.bb.bsize - (bufsize)sizeof(bfhead_t); 1083 } 1084 1085 while (bdlen > 0) { 1086 int i, dupes = 0; 1087 bufsize l = bdlen; 1088 char bhex[50], bascii[20]; 1089 1090 if (l > 16) { 1091 l = 16; 1092 } 1093 1094 for (i = 0; i < l; i++) { 1095 (void)KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]); 1096 if (bdump[i] > 0x20 && bdump[i] < 0x7F) 1097 bascii[i] = bdump[i]; 1098 else 1099 bascii[i] = ' '; 1100 } 1101 bascii[i] = 0; 1102 (void)__kmp_printf_no_lock("%-48s %s\n", bhex, bascii); 1103 bdump += l; 1104 bdlen -= l; 1105 while ((bdlen > 16) && 1106 (memcmp((char *)(bdump - 16), (char *)bdump, 16) == 0)) { 1107 dupes++; 1108 bdump += 16; 1109 bdlen -= 16; 1110 } 1111 if (dupes > 1) { 1112 (void)__kmp_printf_no_lock( 1113 " (%d lines [%d bytes] identical to above line skipped)\n", dupes, 1114 dupes * 16); 1115 } else if (dupes == 1) { 1116 bdump -= 16; 1117 bdlen += 16; 1118 } 1119 } 1120 } 1121 1122 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed. 1123 If DUMPALLOC is nonzero, the contents of allocated buffers 1124 are dumped. If DUMPFREE is nonzero, free blocks are 1125 dumped as well. If FreeWipe checking is enabled, free 1126 blocks which have been clobbered will always be dumped. */ 1127 static void bpoold(kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) { 1128 bfhead_t *b = BFH((char *)buf - sizeof(bhead_t)); 1129 1130 while (b->bh.bb.bsize != ESent) { 1131 bufsize bs = b->bh.bb.bsize; 1132 1133 if (bs < 0) { 1134 bs = -bs; 1135 (void)__kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", 1136 (long)bs); 1137 if (dumpalloc) { 1138 bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); 1139 } 1140 } else { 1141 const char *lerr = ""; 1142 1143 KMP_DEBUG_ASSERT(bs > 0); 1144 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { 1145 lerr = " (Bad free list links)"; 1146 } 1147 (void)__kmp_printf_no_lock("Free block: size %6ld bytes.%s\n", 1148 (long)bs, lerr); 1149 #ifdef FreeWipe 1150 lerr = ((char *)b) + sizeof(bfhead_t); 1151 if ((bs > sizeof(bfhead_t)) && 1152 ((*lerr != 0x55) || 1153 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 1154 0))) { 1155 (void)__kmp_printf_no_lock( 1156 "(Contents of above free block have been overstored.)\n"); 1157 bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); 1158 } else 1159 #endif 1160 if (dumpfree) { 1161 bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); 1162 } 1163 } 1164 b = BFH(((char *)b) + bs); 1165 } 1166 } 1167 1168 /* BPOOLV -- Validate a buffer pool. */ 1169 static int bpoolv(kmp_info_t *th, void *buf) { 1170 bfhead_t *b = BFH(buf); 1171 1172 while (b->bh.bb.bsize != ESent) { 1173 bufsize bs = b->bh.bb.bsize; 1174 1175 if (bs < 0) { 1176 bs = -bs; 1177 } else { 1178 #ifdef FreeWipe 1179 char *lerr = ""; 1180 #endif 1181 1182 KMP_DEBUG_ASSERT(bs > 0); 1183 if (bs <= 0) { 1184 return 0; 1185 } 1186 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { 1187 (void)__kmp_printf_no_lock( 1188 "Free block: size %6ld bytes. (Bad free list links)\n", (long)bs); 1189 KMP_DEBUG_ASSERT(0); 1190 return 0; 1191 } 1192 #ifdef FreeWipe 1193 lerr = ((char *)b) + sizeof(bfhead_t); 1194 if ((bs > sizeof(bfhead_t)) && 1195 ((*lerr != 0x55) || 1196 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 1197 0))) { 1198 (void)__kmp_printf_no_lock( 1199 "(Contents of above free block have been overstored.)\n"); 1200 bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); 1201 KMP_DEBUG_ASSERT(0); 1202 return 0; 1203 } 1204 #endif /* FreeWipe */ 1205 } 1206 b = BFH(((char *)b) + bs); 1207 } 1208 return 1; 1209 } 1210 1211 #endif /* KMP_DEBUG */ 1212 1213 void __kmp_initialize_bget(kmp_info_t *th) { 1214 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0)); 1215 1216 set_thr_data(th); 1217 1218 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free, 1219 (bufsize)__kmp_malloc_pool_incr); 1220 } 1221 1222 void __kmp_finalize_bget(kmp_info_t *th) { 1223 thr_data_t *thr; 1224 bfhead_t *b; 1225 1226 KMP_DEBUG_ASSERT(th != 0); 1227 1228 #if BufStats 1229 thr = (thr_data_t *)th->th.th_local.bget_data; 1230 KMP_DEBUG_ASSERT(thr != NULL); 1231 b = thr->last_pool; 1232 1233 /* If a block-release function is defined, and this free buffer constitutes 1234 the entire block, release it. Note that pool_len is defined in such a way 1235 that the test will fail unless all pool blocks are the same size. */ 1236 1237 // Deallocate the last pool if one exists because we no longer do it in brel() 1238 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && 1239 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 1240 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 1241 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 1242 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 1243 b->bh.bb.bsize); 1244 1245 /* Unlink the buffer from the free list */ 1246 __kmp_bget_remove_from_freelist(b); 1247 1248 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 1249 1250 (*thr->relfcn)(b); 1251 thr->numprel++; /* Nr of expansion block releases */ 1252 thr->numpblk--; /* Total number of blocks */ 1253 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 1254 } 1255 #endif /* BufStats */ 1256 1257 /* Deallocate bget_data */ 1258 if (th->th.th_local.bget_data != NULL) { 1259 __kmp_free(th->th.th_local.bget_data); 1260 th->th.th_local.bget_data = NULL; 1261 }; // if 1262 } 1263 1264 void kmpc_set_poolsize(size_t size) { 1265 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc, 1266 (bget_release_t)free, (bufsize)size); 1267 } 1268 1269 size_t kmpc_get_poolsize(void) { 1270 thr_data_t *p; 1271 1272 p = get_thr_data(__kmp_get_thread()); 1273 1274 return p->exp_incr; 1275 } 1276 1277 void kmpc_set_poolmode(int mode) { 1278 thr_data_t *p; 1279 1280 if (mode == bget_mode_fifo || mode == bget_mode_lifo || 1281 mode == bget_mode_best) { 1282 p = get_thr_data(__kmp_get_thread()); 1283 p->mode = (bget_mode_t)mode; 1284 } 1285 } 1286 1287 int kmpc_get_poolmode(void) { 1288 thr_data_t *p; 1289 1290 p = get_thr_data(__kmp_get_thread()); 1291 1292 return p->mode; 1293 } 1294 1295 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) { 1296 kmp_info_t *th = __kmp_get_thread(); 1297 bufsize a, b; 1298 1299 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1300 1301 bcheck(th, &a, &b); 1302 1303 *maxmem = a; 1304 *allmem = b; 1305 } 1306 1307 void kmpc_poolprint(void) { 1308 kmp_info_t *th = __kmp_get_thread(); 1309 1310 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1311 1312 bfreed(th); 1313 } 1314 1315 #endif // #if KMP_USE_BGET 1316 1317 void *kmpc_malloc(size_t size) { 1318 void *ptr; 1319 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1320 if (ptr != NULL) { 1321 // save allocated pointer just before one returned to user 1322 *(void **)ptr = ptr; 1323 ptr = (void **)ptr + 1; 1324 } 1325 return ptr; 1326 } 1327 1328 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) 1329 1330 void *kmpc_aligned_malloc(size_t size, size_t alignment) { 1331 void *ptr; 1332 void *ptr_allocated; 1333 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big 1334 if (!IS_POWER_OF_TWO(alignment)) { 1335 // AC: do we need to issue a warning here? 1336 errno = EINVAL; 1337 return NULL; 1338 } 1339 size = size + sizeof(void *) + alignment; 1340 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size); 1341 if (ptr_allocated != NULL) { 1342 // save allocated pointer just before one returned to user 1343 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) & 1344 ~(alignment - 1)); 1345 *((void **)ptr - 1) = ptr_allocated; 1346 } else { 1347 ptr = NULL; 1348 } 1349 return ptr; 1350 } 1351 1352 void *kmpc_calloc(size_t nelem, size_t elsize) { 1353 void *ptr; 1354 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr))); 1355 if (ptr != NULL) { 1356 // save allocated pointer just before one returned to user 1357 *(void **)ptr = ptr; 1358 ptr = (void **)ptr + 1; 1359 } 1360 return ptr; 1361 } 1362 1363 void *kmpc_realloc(void *ptr, size_t size) { 1364 void *result = NULL; 1365 if (ptr == NULL) { 1366 // If pointer is NULL, realloc behaves like malloc. 1367 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1368 // save allocated pointer just before one returned to user 1369 if (result != NULL) { 1370 *(void **)result = result; 1371 result = (void **)result + 1; 1372 } 1373 } else if (size == 0) { 1374 // If size is 0, realloc behaves like free. 1375 // The thread must be registered by the call to kmpc_malloc() or 1376 // kmpc_calloc() before. 1377 // So it should be safe to call __kmp_get_thread(), not 1378 // __kmp_entry_thread(). 1379 KMP_ASSERT(*((void **)ptr - 1)); 1380 brel(__kmp_get_thread(), *((void **)ptr - 1)); 1381 } else { 1382 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1), 1383 (bufsize)(size + sizeof(ptr))); 1384 if (result != NULL) { 1385 *(void **)result = result; 1386 result = (void **)result + 1; 1387 } 1388 }; // if 1389 return result; 1390 } 1391 1392 // NOTE: the library must have already been initialized by a previous allocate 1393 void kmpc_free(void *ptr) { 1394 if (!__kmp_init_serial) { 1395 return; 1396 }; // if 1397 if (ptr != NULL) { 1398 kmp_info_t *th = __kmp_get_thread(); 1399 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1400 // extract allocated pointer and free it 1401 KMP_ASSERT(*((void **)ptr - 1)); 1402 brel(th, *((void **)ptr - 1)); 1403 }; 1404 } 1405 1406 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) { 1407 void *ptr; 1408 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th, 1409 (int)size KMP_SRC_LOC_PARM)); 1410 ptr = bget(th, (bufsize)size); 1411 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr)); 1412 return ptr; 1413 } 1414 1415 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, 1416 size_t elsize KMP_SRC_LOC_DECL) { 1417 void *ptr; 1418 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th, 1419 (int)nelem, (int)elsize KMP_SRC_LOC_PARM)); 1420 ptr = bgetz(th, (bufsize)(nelem * elsize)); 1421 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr)); 1422 return ptr; 1423 } 1424 1425 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, 1426 size_t size KMP_SRC_LOC_DECL) { 1427 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th, 1428 ptr, (int)size KMP_SRC_LOC_PARM)); 1429 ptr = bgetr(th, ptr, (bufsize)size); 1430 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr)); 1431 return ptr; 1432 } 1433 1434 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) { 1435 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th, 1436 ptr KMP_SRC_LOC_PARM)); 1437 if (ptr != NULL) { 1438 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1439 brel(th, ptr); 1440 } 1441 KE_TRACE(30, ("<- __kmp_thread_free()\n")); 1442 } 1443 1444 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes 1445 memory leaks, but it may be useful for debugging memory corruptions, used 1446 freed pointers, etc. */ 1447 /* #define LEAK_MEMORY */ 1448 struct kmp_mem_descr { // Memory block descriptor. 1449 void *ptr_allocated; // Pointer returned by malloc(), subject for free(). 1450 size_t size_allocated; // Size of allocated memory block. 1451 void *ptr_aligned; // Pointer to aligned memory, to be used by client code. 1452 size_t size_aligned; // Size of aligned memory block. 1453 }; 1454 typedef struct kmp_mem_descr kmp_mem_descr_t; 1455 1456 /* Allocate memory on requested boundary, fill allocated memory with 0x00. 1457 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1458 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1459 static void *___kmp_allocate_align(size_t size, 1460 size_t alignment KMP_SRC_LOC_DECL) { 1461 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than 1462 requested to return properly aligned pointer. Original pointer returned 1463 by malloc() and size of allocated block is saved in descriptor just 1464 before the aligned pointer. This information used by __kmp_free() -- it 1465 has to pass to free() original pointer, not aligned one. 1466 1467 +---------+------------+-----------------------------------+---------+ 1468 | padding | descriptor | aligned block | padding | 1469 +---------+------------+-----------------------------------+---------+ 1470 ^ ^ 1471 | | 1472 | +- Aligned pointer returned to caller 1473 +- Pointer returned by malloc() 1474 1475 Aligned block is filled with zeros, paddings are filled with 0xEF. */ 1476 1477 kmp_mem_descr_t descr; 1478 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 1479 kmp_uintptr_t addr_aligned; // Aligned address to return to caller. 1480 kmp_uintptr_t addr_descr; // Address of memory block descriptor. 1481 1482 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", 1483 (int)size, (int)alignment KMP_SRC_LOC_PARM)); 1484 1485 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too 1486 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t)); 1487 // Make sure kmp_uintptr_t is enough to store addresses. 1488 1489 descr.size_aligned = size; 1490 descr.size_allocated = 1491 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment; 1492 1493 #if KMP_DEBUG 1494 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_); 1495 #else 1496 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM); 1497 #endif 1498 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated, 1499 descr.ptr_allocated)); 1500 if (descr.ptr_allocated == NULL) { 1501 KMP_FATAL(OutOfHeapMemory); 1502 }; 1503 1504 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 1505 addr_aligned = 1506 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1); 1507 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t); 1508 1509 descr.ptr_aligned = (void *)addr_aligned; 1510 1511 KE_TRACE(26, (" ___kmp_allocate_align: " 1512 "ptr_allocated=%p, size_allocated=%d, " 1513 "ptr_aligned=%p, size_aligned=%d\n", 1514 descr.ptr_allocated, (int)descr.size_allocated, 1515 descr.ptr_aligned, (int)descr.size_aligned)); 1516 1517 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr); 1518 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned); 1519 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 1520 addr_allocated + descr.size_allocated); 1521 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0); 1522 #ifdef KMP_DEBUG 1523 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 1524 // Fill allocated memory block with 0xEF. 1525 #endif 1526 memset(descr.ptr_aligned, 0x00, descr.size_aligned); 1527 // Fill the aligned memory block (which is intended for using by caller) with 1528 // 0x00. Do not 1529 // put this filling under KMP_DEBUG condition! Many callers expect zeroed 1530 // memory. (Padding 1531 // bytes remain filled with 0xEF in debugging library.) 1532 *((kmp_mem_descr_t *)addr_descr) = descr; 1533 1534 KMP_MB(); 1535 1536 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned)); 1537 return descr.ptr_aligned; 1538 } // func ___kmp_allocate_align 1539 1540 /* Allocate memory on cache line boundary, fill allocated memory with 0x00. 1541 Do not call this func directly! Use __kmp_allocate macro instead. 1542 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1543 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1544 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) { 1545 void *ptr; 1546 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n", 1547 (int)size KMP_SRC_LOC_PARM)); 1548 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM); 1549 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr)); 1550 return ptr; 1551 } // func ___kmp_allocate 1552 1553 #if (BUILD_MEMORY == FIRST_TOUCH) 1554 void *__kmp_ft_page_allocate(size_t size) { 1555 void *adr, *aadr; 1556 1557 const int page_size = KMP_GET_PAGE_SIZE(); 1558 1559 adr = (void *)__kmp_thread_malloc(__kmp_get_thread(), 1560 size + page_size + KMP_PTR_SKIP); 1561 if (adr == 0) 1562 KMP_FATAL(OutOfHeapMemory); 1563 1564 /* check to see if adr is on a page boundary. */ 1565 if (((kmp_uintptr_t)adr & (page_size - 1)) == 0) 1566 /* nothing to do if adr is already on a page boundary. */ 1567 aadr = adr; 1568 else 1569 /* else set aadr to the first page boundary in the allocated memory. */ 1570 aadr = (void *)(((kmp_uintptr_t)adr + page_size) & ~(page_size - 1)); 1571 1572 /* the first touch by the owner thread. */ 1573 *((void **)aadr) = adr; 1574 1575 /* skip the memory space used for storing adr above. */ 1576 return (void *)((char *)aadr + KMP_PTR_SKIP); 1577 } 1578 #endif 1579 1580 /* Allocate memory on page boundary, fill allocated memory with 0x00. 1581 Does not call this func directly! Use __kmp_page_allocate macro instead. 1582 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1583 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1584 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) { 1585 int page_size = 8 * 1024; 1586 void *ptr; 1587 1588 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n", 1589 (int)size KMP_SRC_LOC_PARM)); 1590 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM); 1591 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr)); 1592 return ptr; 1593 } // ___kmp_page_allocate 1594 1595 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). 1596 In debug mode, fill the memory block with 0xEF before call to free(). */ 1597 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) { 1598 kmp_mem_descr_t descr; 1599 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 1600 kmp_uintptr_t addr_aligned; // Aligned address passed by caller. 1601 1602 KE_TRACE(25, 1603 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM)); 1604 KMP_ASSERT(ptr != NULL); 1605 1606 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)); 1607 1608 KE_TRACE(26, (" __kmp_free: " 1609 "ptr_allocated=%p, size_allocated=%d, " 1610 "ptr_aligned=%p, size_aligned=%d\n", 1611 descr.ptr_allocated, (int)descr.size_allocated, 1612 descr.ptr_aligned, (int)descr.size_aligned)); 1613 1614 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 1615 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned; 1616 1617 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0); 1618 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr); 1619 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned); 1620 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated); 1621 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 1622 addr_allocated + descr.size_allocated); 1623 1624 #ifdef KMP_DEBUG 1625 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 1626 // Fill memory block with 0xEF, it helps catch using freed memory. 1627 #endif 1628 1629 #ifndef LEAK_MEMORY 1630 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated)); 1631 #ifdef KMP_DEBUG 1632 _free_src_loc(descr.ptr_allocated, _file_, _line_); 1633 #else 1634 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM); 1635 #endif 1636 #endif 1637 KMP_MB(); 1638 KE_TRACE(25, ("<- __kmp_free() returns\n")); 1639 } // func ___kmp_free 1640 1641 #if USE_FAST_MEMORY == 3 1642 // Allocate fast memory by first scanning the thread's free lists 1643 // If a chunk the right size exists, grab it off the free list. 1644 // Otherwise allocate normally using kmp_thread_malloc. 1645 1646 // AC: How to choose the limit? Just get 16 for now... 1647 #define KMP_FREE_LIST_LIMIT 16 1648 1649 // Always use 128 bytes for determining buckets for caching memory blocks 1650 #define DCACHE_LINE 128 1651 1652 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) { 1653 void *ptr; 1654 int num_lines; 1655 int idx; 1656 int index; 1657 void *alloc_ptr; 1658 size_t alloc_size; 1659 kmp_mem_descr_t *descr; 1660 1661 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", 1662 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM)); 1663 1664 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE; 1665 idx = num_lines - 1; 1666 KMP_DEBUG_ASSERT(idx >= 0); 1667 if (idx < 2) { 1668 index = 0; // idx is [ 0, 1 ], use first free list 1669 num_lines = 2; // 1, 2 cache lines or less than cache line 1670 } else if ((idx >>= 2) == 0) { 1671 index = 1; // idx is [ 2, 3 ], use second free list 1672 num_lines = 4; // 3, 4 cache lines 1673 } else if ((idx >>= 2) == 0) { 1674 index = 2; // idx is [ 4, 15 ], use third free list 1675 num_lines = 16; // 5, 6, ..., 16 cache lines 1676 } else if ((idx >>= 2) == 0) { 1677 index = 3; // idx is [ 16, 63 ], use fourth free list 1678 num_lines = 64; // 17, 18, ..., 64 cache lines 1679 } else { 1680 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists 1681 } 1682 1683 ptr = this_thr->th.th_free_lists[index].th_free_list_self; 1684 if (ptr != NULL) { 1685 // pop the head of no-sync free list 1686 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 1687 KMP_DEBUG_ASSERT( 1688 this_thr == 1689 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) 1690 ->ptr_aligned); 1691 goto end; 1692 }; 1693 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 1694 if (ptr != NULL) { 1695 // no-sync free list is empty, use sync free list (filled in by other 1696 // threads only) 1697 // pop the head of the sync free list, push NULL instead 1698 while (!KMP_COMPARE_AND_STORE_PTR( 1699 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) { 1700 KMP_CPU_PAUSE(); 1701 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 1702 } 1703 // push the rest of chain into no-sync free list (can be NULL if there was 1704 // the only block) 1705 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 1706 KMP_DEBUG_ASSERT( 1707 this_thr == 1708 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) 1709 ->ptr_aligned); 1710 goto end; 1711 } 1712 1713 alloc_call: 1714 // haven't found block in the free lists, thus allocate it 1715 size = num_lines * DCACHE_LINE; 1716 1717 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE; 1718 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " 1719 "alloc_size %d\n", 1720 __kmp_gtid_from_thread(this_thr), alloc_size)); 1721 alloc_ptr = bget(this_thr, (bufsize)alloc_size); 1722 1723 // align ptr to DCACHE_LINE 1724 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + 1725 DCACHE_LINE) & 1726 ~(DCACHE_LINE - 1)); 1727 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 1728 1729 descr->ptr_allocated = alloc_ptr; // remember allocated pointer 1730 // we don't need size_allocated 1731 descr->ptr_aligned = (void *)this_thr; // remember allocating thread 1732 // (it is already saved in bget buffer, 1733 // but we may want to use another allocator in future) 1734 descr->size_aligned = size; 1735 1736 end: 1737 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n", 1738 __kmp_gtid_from_thread(this_thr), ptr)); 1739 return ptr; 1740 } // func __kmp_fast_allocate 1741 1742 // Free fast memory and place it on the thread's free list if it is of 1743 // the correct size. 1744 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) { 1745 kmp_mem_descr_t *descr; 1746 kmp_info_t *alloc_thr; 1747 size_t size; 1748 size_t idx; 1749 int index; 1750 1751 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", 1752 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM)); 1753 KMP_ASSERT(ptr != NULL); 1754 1755 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 1756 1757 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", 1758 (int)descr->size_aligned)); 1759 1760 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines 1761 1762 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block 1763 if (idx == size) { 1764 index = 0; // 2 cache lines 1765 } else if ((idx <<= 1) == size) { 1766 index = 1; // 4 cache lines 1767 } else if ((idx <<= 2) == size) { 1768 index = 2; // 16 cache lines 1769 } else if ((idx <<= 2) == size) { 1770 index = 3; // 64 cache lines 1771 } else { 1772 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64); 1773 goto free_call; // 65 or more cache lines ( > 8KB ) 1774 } 1775 1776 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block 1777 if (alloc_thr == this_thr) { 1778 // push block to self no-sync free list, linking previous head (LIFO) 1779 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; 1780 this_thr->th.th_free_lists[index].th_free_list_self = ptr; 1781 } else { 1782 void *head = this_thr->th.th_free_lists[index].th_free_list_other; 1783 if (head == NULL) { 1784 // Create new free list 1785 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1786 *((void **)ptr) = NULL; // mark the tail of the list 1787 descr->size_allocated = (size_t)1; // head of the list keeps its length 1788 } else { 1789 // need to check existed "other" list's owner thread and size of queue 1790 kmp_mem_descr_t *dsc = 1791 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t)); 1792 // allocating thread, same for all queue nodes 1793 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned); 1794 size_t q_sz = 1795 dsc->size_allocated + 1; // new size in case we add current task 1796 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) { 1797 // we can add current task to "other" list, no sync needed 1798 *((void **)ptr) = head; 1799 descr->size_allocated = q_sz; 1800 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1801 } else { 1802 // either queue blocks owner is changing or size limit exceeded 1803 // return old queue to allocating thread (q_th) synchroneously, 1804 // and start new list for alloc_thr's tasks 1805 void *old_ptr; 1806 void *tail = head; 1807 void *next = *((void **)head); 1808 while (next != NULL) { 1809 KMP_DEBUG_ASSERT( 1810 // queue size should decrease by 1 each step through the list 1811 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t))) 1812 ->size_allocated + 1813 1 == 1814 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t))) 1815 ->size_allocated); 1816 tail = next; // remember tail node 1817 next = *((void **)next); 1818 } 1819 KMP_DEBUG_ASSERT(q_th != NULL); 1820 // push block to owner's sync free list 1821 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 1822 /* the next pointer must be set before setting free_list to ptr to avoid 1823 exposing a broken list to other threads, even for an instant. */ 1824 *((void **)tail) = old_ptr; 1825 1826 while (!KMP_COMPARE_AND_STORE_PTR( 1827 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) { 1828 KMP_CPU_PAUSE(); 1829 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 1830 *((void **)tail) = old_ptr; 1831 } 1832 1833 // start new list of not-selt tasks 1834 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1835 *((void **)ptr) = NULL; 1836 descr->size_allocated = (size_t)1; // head of queue keeps its length 1837 } 1838 } 1839 } 1840 goto end; 1841 1842 free_call: 1843 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", 1844 __kmp_gtid_from_thread(this_thr), size)); 1845 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */ 1846 brel(this_thr, descr->ptr_allocated); 1847 1848 end: 1849 KE_TRACE(25, ("<- __kmp_fast_free() returns\n")); 1850 1851 } // func __kmp_fast_free 1852 1853 // Initialize the thread free lists related to fast memory 1854 // Only do this when a thread is initially created. 1855 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) { 1856 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr)); 1857 1858 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t)); 1859 } 1860 1861 // Free the memory in the thread free lists related to fast memory 1862 // Only do this when a thread is being reaped (destroyed). 1863 void __kmp_free_fast_memory(kmp_info_t *th) { 1864 // Suppose we use BGET underlying allocator, walk through its structures... 1865 int bin; 1866 thr_data_t *thr = get_thr_data(th); 1867 void **lst = NULL; 1868 1869 KE_TRACE( 1870 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th))); 1871 1872 __kmp_bget_dequeue(th); // Release any queued buffers 1873 1874 // Dig through free lists and extract all allocated blocks 1875 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 1876 bfhead_t *b = thr->freelist[bin].ql.flink; 1877 while (b != &thr->freelist[bin]) { 1878 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address 1879 *((void **)b) = 1880 lst; // link the list (override bthr, but keep flink yet) 1881 lst = (void **)b; // push b into lst 1882 } 1883 b = b->ql.flink; // get next buffer 1884 } 1885 } 1886 while (lst != NULL) { 1887 void *next = *lst; 1888 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", 1889 lst, next, th, __kmp_gtid_from_thread(th))); 1890 (*thr->relfcn)(lst); 1891 #if BufStats 1892 // count blocks to prevent problems in __kmp_finalize_bget() 1893 thr->numprel++; /* Nr of expansion block releases */ 1894 thr->numpblk--; /* Total number of blocks */ 1895 #endif 1896 lst = (void **)next; 1897 } 1898 1899 KE_TRACE( 1900 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th))); 1901 } 1902 1903 #endif // USE_FAST_MEMORY 1904