1 /* 2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "kmp.h" 15 #include "kmp_io.h" 16 #include "kmp_wrapper_malloc.h" 17 18 // Disable bget when it is not used 19 #if KMP_USE_BGET 20 21 /* Thread private buffer management code */ 22 23 typedef int (*bget_compact_t)(size_t, int); 24 typedef void *(*bget_acquire_t)(size_t); 25 typedef void (*bget_release_t)(void *); 26 27 /* NOTE: bufsize must be a signed datatype */ 28 29 #if KMP_OS_WINDOWS 30 #if KMP_ARCH_X86 || KMP_ARCH_ARM 31 typedef kmp_int32 bufsize; 32 #else 33 typedef kmp_int64 bufsize; 34 #endif 35 #else 36 typedef ssize_t bufsize; 37 #endif 38 39 /* The three modes of operation are, fifo search, lifo search, and best-fit */ 40 41 typedef enum bget_mode { 42 bget_mode_fifo = 0, 43 bget_mode_lifo = 1, 44 bget_mode_best = 2 45 } bget_mode_t; 46 47 static void bpool(kmp_info_t *th, void *buffer, bufsize len); 48 static void *bget(kmp_info_t *th, bufsize size); 49 static void *bgetz(kmp_info_t *th, bufsize size); 50 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize); 51 static void brel(kmp_info_t *th, void *buf); 52 static void bectl(kmp_info_t *th, bget_compact_t compact, 53 bget_acquire_t acquire, bget_release_t release, 54 bufsize pool_incr); 55 56 #ifdef KMP_DEBUG 57 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree, 58 bufsize *maxfree, long *nget, long *nrel); 59 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool, 60 long *npget, long *nprel, long *ndget, long *ndrel); 61 static void bufdump(kmp_info_t *th, void *buf); 62 static void bpoold(kmp_info_t *th, void *pool, int dumpalloc, int dumpfree); 63 static int bpoolv(kmp_info_t *th, void *pool); 64 #endif 65 66 /* BGET CONFIGURATION */ 67 /* Buffer allocation size quantum: all buffers allocated are a 68 multiple of this size. This MUST be a power of two. */ 69 70 /* On IA-32 architecture with Linux* OS, malloc() does not 71 ensure 16 byte alignmnent */ 72 73 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD 74 75 #define SizeQuant 8 76 #define AlignType double 77 78 #else 79 80 #define SizeQuant 16 81 #define AlignType _Quad 82 83 #endif 84 85 // Define this symbol to enable the bstats() function which calculates the 86 // total free space in the buffer pool, the largest available buffer, and the 87 // total space currently allocated. 88 #define BufStats 1 89 90 #ifdef KMP_DEBUG 91 92 // Define this symbol to enable the bpoold() function which dumps the buffers 93 // in a buffer pool. 94 #define BufDump 1 95 96 // Define this symbol to enable the bpoolv() function for validating a buffer 97 // pool. 98 #define BufValid 1 99 100 // Define this symbol to enable the bufdump() function which allows dumping the 101 // contents of an allocated or free buffer. 102 #define DumpData 1 103 104 #ifdef NOT_USED_NOW 105 106 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants 107 // who attempt to use pointers into released buffers. 108 #define FreeWipe 1 109 110 // Use a best fit algorithm when searching for space for an allocation request. 111 // This uses memory more efficiently, but allocation will be much slower. 112 #define BestFit 1 113 114 #endif /* NOT_USED_NOW */ 115 #endif /* KMP_DEBUG */ 116 117 static bufsize bget_bin_size[] = { 118 0, 119 // 1 << 6, /* .5 Cache line */ 120 1 << 7, /* 1 Cache line, new */ 121 1 << 8, /* 2 Cache lines */ 122 1 << 9, /* 4 Cache lines, new */ 123 1 << 10, /* 8 Cache lines */ 124 1 << 11, /* 16 Cache lines, new */ 125 1 << 12, 1 << 13, /* new */ 126 1 << 14, 1 << 15, /* new */ 127 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */ 128 1 << 21, /* 2MB */ 129 1 << 22, /* 4MB */ 130 1 << 23, /* 8MB */ 131 1 << 24, /* 16MB */ 132 1 << 25, /* 32MB */ 133 }; 134 135 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) 136 137 struct bfhead; 138 139 // Declare the interface, including the requested buffer size type, bufsize. 140 141 /* Queue links */ 142 typedef struct qlinks { 143 struct bfhead *flink; /* Forward link */ 144 struct bfhead *blink; /* Backward link */ 145 } qlinks_t; 146 147 /* Header in allocated and free buffers */ 148 typedef struct bhead2 { 149 kmp_info_t *bthr; /* The thread which owns the buffer pool */ 150 bufsize prevfree; /* Relative link back to previous free buffer in memory or 151 0 if previous buffer is allocated. */ 152 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */ 153 } bhead2_t; 154 155 /* Make sure the bhead structure is a multiple of SizeQuant in size. */ 156 typedef union bhead { 157 KMP_ALIGN(SizeQuant) 158 AlignType b_align; 159 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))]; 160 bhead2_t bb; 161 } bhead_t; 162 #define BH(p) ((bhead_t *)(p)) 163 164 /* Header in directly allocated buffers (by acqfcn) */ 165 typedef struct bdhead { 166 bufsize tsize; /* Total size, including overhead */ 167 bhead_t bh; /* Common header */ 168 } bdhead_t; 169 #define BDH(p) ((bdhead_t *)(p)) 170 171 /* Header in free buffers */ 172 typedef struct bfhead { 173 bhead_t bh; /* Common allocated/free header */ 174 qlinks_t ql; /* Links on free list */ 175 } bfhead_t; 176 #define BFH(p) ((bfhead_t *)(p)) 177 178 typedef struct thr_data { 179 bfhead_t freelist[MAX_BGET_BINS]; 180 #if BufStats 181 size_t totalloc; /* Total space currently allocated */ 182 long numget, numrel; /* Number of bget() and brel() calls */ 183 long numpblk; /* Number of pool blocks */ 184 long numpget, numprel; /* Number of block gets and rels */ 185 long numdget, numdrel; /* Number of direct gets and rels */ 186 #endif /* BufStats */ 187 188 /* Automatic expansion block management functions */ 189 bget_compact_t compfcn; 190 bget_acquire_t acqfcn; 191 bget_release_t relfcn; 192 193 bget_mode_t mode; /* what allocation mode to use? */ 194 195 bufsize exp_incr; /* Expansion block size */ 196 bufsize pool_len; /* 0: no bpool calls have been made 197 -1: not all pool blocks are the same size 198 >0: (common) block size for all bpool calls made so far 199 */ 200 bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */ 201 } thr_data_t; 202 203 /* Minimum allocation quantum: */ 204 #define QLSize (sizeof(qlinks_t)) 205 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) 206 #define MaxSize \ 207 (bufsize)( \ 208 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) 209 // Maximun for the requested size. 210 211 /* End sentinel: value placed in bsize field of dummy block delimiting 212 end of pool block. The most negative number which will fit in a 213 bufsize, defined in a way that the compiler will accept. */ 214 215 #define ESent \ 216 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) 217 218 /* Thread Data management routines */ 219 static int bget_get_bin(bufsize size) { 220 // binary chop bins 221 int lo = 0, hi = MAX_BGET_BINS - 1; 222 223 KMP_DEBUG_ASSERT(size > 0); 224 225 while ((hi - lo) > 1) { 226 int mid = (lo + hi) >> 1; 227 if (size < bget_bin_size[mid]) 228 hi = mid - 1; 229 else 230 lo = mid; 231 } 232 233 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS)); 234 235 return lo; 236 } 237 238 static void set_thr_data(kmp_info_t *th) { 239 int i; 240 thr_data_t *data; 241 242 data = (thr_data_t *)((!th->th.th_local.bget_data) 243 ? __kmp_allocate(sizeof(*data)) 244 : th->th.th_local.bget_data); 245 246 memset(data, '\0', sizeof(*data)); 247 248 for (i = 0; i < MAX_BGET_BINS; ++i) { 249 data->freelist[i].ql.flink = &data->freelist[i]; 250 data->freelist[i].ql.blink = &data->freelist[i]; 251 } 252 253 th->th.th_local.bget_data = data; 254 th->th.th_local.bget_list = 0; 255 #if !USE_CMP_XCHG_FOR_BGET 256 #ifdef USE_QUEUING_LOCK_FOR_BGET 257 __kmp_init_lock(&th->th.th_local.bget_lock); 258 #else 259 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock); 260 #endif /* USE_LOCK_FOR_BGET */ 261 #endif /* ! USE_CMP_XCHG_FOR_BGET */ 262 } 263 264 static thr_data_t *get_thr_data(kmp_info_t *th) { 265 thr_data_t *data; 266 267 data = (thr_data_t *)th->th.th_local.bget_data; 268 269 KMP_DEBUG_ASSERT(data != 0); 270 271 return data; 272 } 273 274 #ifdef KMP_DEBUG 275 276 static void __kmp_bget_validate_queue(kmp_info_t *th) { 277 /* NOTE: assume that the global_lock is held */ 278 279 void *p = (void *)th->th.th_local.bget_list; 280 281 while (p != 0) { 282 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); 283 284 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 285 p = (void *)b->ql.flink; 286 } 287 } 288 289 #endif 290 291 /* Walk the free list and release the enqueued buffers */ 292 static void __kmp_bget_dequeue(kmp_info_t *th) { 293 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); 294 295 if (p != 0) { 296 #if USE_CMP_XCHG_FOR_BGET 297 { 298 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 299 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 300 CCAST(void *, old_value), nullptr)) { 301 KMP_CPU_PAUSE(); 302 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 303 } 304 p = CCAST(void *, old_value); 305 } 306 #else /* ! USE_CMP_XCHG_FOR_BGET */ 307 #ifdef USE_QUEUING_LOCK_FOR_BGET 308 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 309 #else 310 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 311 #endif /* USE_QUEUING_LOCK_FOR_BGET */ 312 313 p = (void *)th->th.th_local.bget_list; 314 th->th.th_local.bget_list = 0; 315 316 #ifdef USE_QUEUING_LOCK_FOR_BGET 317 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 318 #else 319 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 320 #endif 321 #endif /* USE_CMP_XCHG_FOR_BGET */ 322 323 /* Check again to make sure the list is not empty */ 324 while (p != 0) { 325 void *buf = p; 326 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); 327 328 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 329 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 330 (kmp_uintptr_t)th); // clear possible mark 331 KMP_DEBUG_ASSERT(b->ql.blink == 0); 332 333 p = (void *)b->ql.flink; 334 335 brel(th, buf); 336 } 337 } 338 } 339 340 /* Chain together the free buffers by using the thread owner field */ 341 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf 342 #ifdef USE_QUEUING_LOCK_FOR_BGET 343 , 344 kmp_int32 rel_gtid 345 #endif 346 ) { 347 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t)); 348 349 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 350 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 351 (kmp_uintptr_t)th); // clear possible mark 352 353 b->ql.blink = 0; 354 355 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n", 356 __kmp_gtid_from_thread(th))); 357 358 #if USE_CMP_XCHG_FOR_BGET 359 { 360 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); 361 /* the next pointer must be set before setting bget_list to buf to avoid 362 exposing a broken list to other threads, even for an instant. */ 363 b->ql.flink = BFH(CCAST(void *, old_value)); 364 365 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 366 CCAST(void *, old_value), buf)) { 367 KMP_CPU_PAUSE(); 368 old_value = TCR_PTR(th->th.th_local.bget_list); 369 /* the next pointer must be set before setting bget_list to buf to avoid 370 exposing a broken list to other threads, even for an instant. */ 371 b->ql.flink = BFH(CCAST(void *, old_value)); 372 } 373 } 374 #else /* ! USE_CMP_XCHG_FOR_BGET */ 375 #ifdef USE_QUEUING_LOCK_FOR_BGET 376 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid); 377 #else 378 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 379 #endif 380 381 b->ql.flink = BFH(th->th.th_local.bget_list); 382 th->th.th_local.bget_list = (void *)buf; 383 384 #ifdef USE_QUEUING_LOCK_FOR_BGET 385 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid); 386 #else 387 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 388 #endif 389 #endif /* USE_CMP_XCHG_FOR_BGET */ 390 } 391 392 /* insert buffer back onto a new freelist */ 393 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) { 394 int bin; 395 396 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0); 397 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0); 398 399 bin = bget_get_bin(b->bh.bb.bsize); 400 401 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink == 402 &thr->freelist[bin]); 403 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink == 404 &thr->freelist[bin]); 405 406 b->ql.flink = &thr->freelist[bin]; 407 b->ql.blink = thr->freelist[bin].ql.blink; 408 409 thr->freelist[bin].ql.blink = b; 410 b->ql.blink->ql.flink = b; 411 } 412 413 /* unlink the buffer from the old freelist */ 414 static void __kmp_bget_remove_from_freelist(bfhead_t *b) { 415 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 416 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 417 418 b->ql.blink->ql.flink = b->ql.flink; 419 b->ql.flink->ql.blink = b->ql.blink; 420 } 421 422 /* GET STATS -- check info on free list */ 423 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) { 424 thr_data_t *thr = get_thr_data(th); 425 int bin; 426 427 *total_free = *max_free = 0; 428 429 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 430 bfhead_t *b, *best; 431 432 best = &thr->freelist[bin]; 433 b = best->ql.flink; 434 435 while (b != &thr->freelist[bin]) { 436 *total_free += (b->bh.bb.bsize - sizeof(bhead_t)); 437 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize)) 438 best = b; 439 440 /* Link to next buffer */ 441 b = b->ql.flink; 442 } 443 444 if (*max_free < best->bh.bb.bsize) 445 *max_free = best->bh.bb.bsize; 446 } 447 448 if (*max_free > (bufsize)sizeof(bhead_t)) 449 *max_free -= sizeof(bhead_t); 450 } 451 452 /* BGET -- Allocate a buffer. */ 453 static void *bget(kmp_info_t *th, bufsize requested_size) { 454 thr_data_t *thr = get_thr_data(th); 455 bufsize size = requested_size; 456 bfhead_t *b; 457 void *buf; 458 int compactseq = 0; 459 int use_blink = 0; 460 /* For BestFit */ 461 bfhead_t *best; 462 463 if (size < 0 || size + sizeof(bhead_t) > MaxSize) { 464 return NULL; 465 } 466 467 __kmp_bget_dequeue(th); /* Release any queued buffers */ 468 469 if (size < (bufsize)SizeQ) { // Need at least room for the queue links. 470 size = SizeQ; 471 } 472 #if defined(SizeQuant) && (SizeQuant > 1) 473 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); 474 #endif 475 476 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required. 477 KMP_DEBUG_ASSERT(size >= 0); 478 KMP_DEBUG_ASSERT(size % SizeQuant == 0); 479 480 use_blink = (thr->mode == bget_mode_lifo); 481 482 /* If a compact function was provided in the call to bectl(), wrap 483 a loop around the allocation process to allow compaction to 484 intervene in case we don't find a suitable buffer in the chain. */ 485 486 for (;;) { 487 int bin; 488 489 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) { 490 /* Link to next buffer */ 491 b = (use_blink ? thr->freelist[bin].ql.blink 492 : thr->freelist[bin].ql.flink); 493 494 if (thr->mode == bget_mode_best) { 495 best = &thr->freelist[bin]; 496 497 /* Scan the free list searching for the first buffer big enough 498 to hold the requested size buffer. */ 499 while (b != &thr->freelist[bin]) { 500 if (b->bh.bb.bsize >= (bufsize)size) { 501 if ((best == &thr->freelist[bin]) || 502 (b->bh.bb.bsize < best->bh.bb.bsize)) { 503 best = b; 504 } 505 } 506 507 /* Link to next buffer */ 508 b = (use_blink ? b->ql.blink : b->ql.flink); 509 } 510 b = best; 511 } 512 513 while (b != &thr->freelist[bin]) { 514 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) { 515 516 // Buffer is big enough to satisfy the request. Allocate it to the 517 // caller. We must decide whether the buffer is large enough to split 518 // into the part given to the caller and a free buffer that remains 519 // on the free list, or whether the entire buffer should be removed 520 // from the free list and given to the caller in its entirety. We 521 // only split the buffer if enough room remains for a header plus the 522 // minimum quantum of allocation. 523 if ((b->bh.bb.bsize - (bufsize)size) > 524 (bufsize)(SizeQ + (sizeof(bhead_t)))) { 525 bhead_t *ba, *bn; 526 527 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size)); 528 bn = BH(((char *)ba) + size); 529 530 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); 531 532 /* Subtract size from length of free block. */ 533 b->bh.bb.bsize -= (bufsize)size; 534 535 /* Link allocated buffer to the previous free buffer. */ 536 ba->bb.prevfree = b->bh.bb.bsize; 537 538 /* Plug negative size into user buffer. */ 539 ba->bb.bsize = -size; 540 541 /* Mark this buffer as owned by this thread. */ 542 TCW_PTR(ba->bb.bthr, 543 th); // not an allocated address (do not mark it) 544 /* Mark buffer after this one not preceded by free block. */ 545 bn->bb.prevfree = 0; 546 547 // unlink buffer from old freelist, and reinsert into new freelist 548 __kmp_bget_remove_from_freelist(b); 549 __kmp_bget_insert_into_freelist(thr, b); 550 #if BufStats 551 thr->totalloc += (size_t)size; 552 thr->numget++; /* Increment number of bget() calls */ 553 #endif 554 buf = (void *)((((char *)ba) + sizeof(bhead_t))); 555 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 556 return buf; 557 } else { 558 bhead_t *ba; 559 560 ba = BH(((char *)b) + b->bh.bb.bsize); 561 562 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); 563 564 /* The buffer isn't big enough to split. Give the whole 565 shebang to the caller and remove it from the free list. */ 566 567 __kmp_bget_remove_from_freelist(b); 568 #if BufStats 569 thr->totalloc += (size_t)b->bh.bb.bsize; 570 thr->numget++; /* Increment number of bget() calls */ 571 #endif 572 /* Negate size to mark buffer allocated. */ 573 b->bh.bb.bsize = -(b->bh.bb.bsize); 574 575 /* Mark this buffer as owned by this thread. */ 576 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark) 577 /* Zero the back pointer in the next buffer in memory 578 to indicate that this buffer is allocated. */ 579 ba->bb.prevfree = 0; 580 581 /* Give user buffer starting at queue links. */ 582 buf = (void *)&(b->ql); 583 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 584 return buf; 585 } 586 } 587 588 /* Link to next buffer */ 589 b = (use_blink ? b->ql.blink : b->ql.flink); 590 } 591 } 592 593 /* We failed to find a buffer. If there's a compact function defined, 594 notify it of the size requested. If it returns TRUE, try the allocation 595 again. */ 596 597 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { 598 break; 599 } 600 } 601 602 /* No buffer available with requested size free. */ 603 604 /* Don't give up yet -- look in the reserve supply. */ 605 if (thr->acqfcn != 0) { 606 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) { 607 /* Request is too large to fit in a single expansion block. 608 Try to satisy it by a direct buffer acquisition. */ 609 bdhead_t *bdh; 610 611 size += sizeof(bdhead_t) - sizeof(bhead_t); 612 613 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size)); 614 615 /* richryan */ 616 bdh = BDH((*thr->acqfcn)((bufsize)size)); 617 if (bdh != NULL) { 618 619 // Mark the buffer special by setting size field of its header to zero. 620 bdh->bh.bb.bsize = 0; 621 622 /* Mark this buffer as owned by this thread. */ 623 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, 624 // because direct buffer never goes to free list 625 bdh->bh.bb.prevfree = 0; 626 bdh->tsize = size; 627 #if BufStats 628 thr->totalloc += (size_t)size; 629 thr->numget++; /* Increment number of bget() calls */ 630 thr->numdget++; /* Direct bget() call count */ 631 #endif 632 buf = (void *)(bdh + 1); 633 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 634 return buf; 635 } 636 637 } else { 638 639 /* Try to obtain a new expansion block */ 640 void *newpool; 641 642 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr)); 643 644 /* richryan */ 645 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr); 646 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0); 647 if (newpool != NULL) { 648 bpool(th, newpool, thr->exp_incr); 649 buf = bget( 650 th, requested_size); /* This can't, I say, can't get into a loop. */ 651 return buf; 652 } 653 } 654 } 655 656 /* Still no buffer available */ 657 658 return NULL; 659 } 660 661 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear 662 the entire contents of the buffer to zero, not just the 663 region requested by the caller. */ 664 665 static void *bgetz(kmp_info_t *th, bufsize size) { 666 char *buf = (char *)bget(th, size); 667 668 if (buf != NULL) { 669 bhead_t *b; 670 bufsize rsize; 671 672 b = BH(buf - sizeof(bhead_t)); 673 rsize = -(b->bb.bsize); 674 if (rsize == 0) { 675 bdhead_t *bd; 676 677 bd = BDH(buf - sizeof(bdhead_t)); 678 rsize = bd->tsize - (bufsize)sizeof(bdhead_t); 679 } else { 680 rsize -= sizeof(bhead_t); 681 } 682 683 KMP_DEBUG_ASSERT(rsize >= size); 684 685 (void)memset(buf, 0, (bufsize)rsize); 686 } 687 return ((void *)buf); 688 } 689 690 /* BGETR -- Reallocate a buffer. This is a minimal implementation, 691 simply in terms of brel() and bget(). It could be 692 enhanced to allow the buffer to grow into adjacent free 693 blocks and to avoid moving data unnecessarily. */ 694 695 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) { 696 void *nbuf; 697 bufsize osize; /* Old size of buffer */ 698 bhead_t *b; 699 700 nbuf = bget(th, size); 701 if (nbuf == NULL) { /* Acquire new buffer */ 702 return NULL; 703 } 704 if (buf == NULL) { 705 return nbuf; 706 } 707 b = BH(((char *)buf) - sizeof(bhead_t)); 708 osize = -b->bb.bsize; 709 if (osize == 0) { 710 /* Buffer acquired directly through acqfcn. */ 711 bdhead_t *bd; 712 713 bd = BDH(((char *)buf) - sizeof(bdhead_t)); 714 osize = bd->tsize - (bufsize)sizeof(bdhead_t); 715 } else { 716 osize -= sizeof(bhead_t); 717 } 718 719 KMP_DEBUG_ASSERT(osize > 0); 720 721 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */ 722 (size_t)((size < osize) ? size : osize)); 723 brel(th, buf); 724 725 return nbuf; 726 } 727 728 /* BREL -- Release a buffer. */ 729 static void brel(kmp_info_t *th, void *buf) { 730 thr_data_t *thr = get_thr_data(th); 731 bfhead_t *b, *bn; 732 kmp_info_t *bth; 733 734 KMP_DEBUG_ASSERT(buf != NULL); 735 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 736 737 b = BFH(((char *)buf) - sizeof(bhead_t)); 738 739 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ 740 bdhead_t *bdh; 741 742 bdh = BDH(((char *)buf) - sizeof(bdhead_t)); 743 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 744 #if BufStats 745 thr->totalloc -= (size_t)bdh->tsize; 746 thr->numdrel++; /* Number of direct releases */ 747 thr->numrel++; /* Increment number of brel() calls */ 748 #endif /* BufStats */ 749 #ifdef FreeWipe 750 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t))); 751 #endif /* FreeWipe */ 752 753 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh)); 754 755 KMP_DEBUG_ASSERT(thr->relfcn != 0); 756 (*thr->relfcn)((void *)bdh); /* Release it directly. */ 757 return; 758 } 759 760 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & 761 ~1); // clear possible mark before comparison 762 if (bth != th) { 763 /* Add this buffer to be released by the owning thread later */ 764 __kmp_bget_enqueue(bth, buf 765 #ifdef USE_QUEUING_LOCK_FOR_BGET 766 , 767 __kmp_gtid_from_thread(th) 768 #endif 769 ); 770 return; 771 } 772 773 /* Buffer size must be negative, indicating that the buffer is allocated. */ 774 if (b->bh.bb.bsize >= 0) { 775 bn = NULL; 776 } 777 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); 778 779 /* Back pointer in next buffer must be zero, indicating the same thing: */ 780 781 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0); 782 783 #if BufStats 784 thr->numrel++; /* Increment number of brel() calls */ 785 thr->totalloc += (size_t)b->bh.bb.bsize; 786 #endif 787 788 /* If the back link is nonzero, the previous buffer is free. */ 789 790 if (b->bh.bb.prevfree != 0) { 791 /* The previous buffer is free. Consolidate this buffer with it by adding 792 the length of this buffer to the previous free buffer. Note that we 793 subtract the size in the buffer being released, since it's negative to 794 indicate that the buffer is allocated. */ 795 bufsize size = b->bh.bb.bsize; 796 797 /* Make the previous buffer the one we're working on. */ 798 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize == 799 b->bh.bb.prevfree); 800 b = BFH(((char *)b) - b->bh.bb.prevfree); 801 b->bh.bb.bsize -= size; 802 803 /* unlink the buffer from the old freelist */ 804 __kmp_bget_remove_from_freelist(b); 805 } else { 806 /* The previous buffer isn't allocated. Mark this buffer size as positive 807 (i.e. free) and fall through to place the buffer on the free list as an 808 isolated free block. */ 809 b->bh.bb.bsize = -b->bh.bb.bsize; 810 } 811 812 /* insert buffer back onto a new freelist */ 813 __kmp_bget_insert_into_freelist(thr, b); 814 815 /* Now we look at the next buffer in memory, located by advancing from 816 the start of this buffer by its size, to see if that buffer is 817 free. If it is, we combine this buffer with the next one in 818 memory, dechaining the second buffer from the free list. */ 819 bn = BFH(((char *)b) + b->bh.bb.bsize); 820 if (bn->bh.bb.bsize > 0) { 821 822 /* The buffer is free. Remove it from the free list and add 823 its size to that of our buffer. */ 824 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree == 825 bn->bh.bb.bsize); 826 827 __kmp_bget_remove_from_freelist(bn); 828 829 b->bh.bb.bsize += bn->bh.bb.bsize; 830 831 /* unlink the buffer from the old freelist, and reinsert it into the new 832 * freelist */ 833 __kmp_bget_remove_from_freelist(b); 834 __kmp_bget_insert_into_freelist(thr, b); 835 836 /* Finally, advance to the buffer that follows the newly 837 consolidated free block. We must set its backpointer to the 838 head of the consolidated free block. We know the next block 839 must be an allocated block because the process of recombination 840 guarantees that two free blocks will never be contiguous in 841 memory. */ 842 bn = BFH(((char *)b) + b->bh.bb.bsize); 843 } 844 #ifdef FreeWipe 845 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 846 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t))); 847 #endif 848 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); 849 850 /* The next buffer is allocated. Set the backpointer in it to point 851 to this buffer; the previous free buffer in memory. */ 852 853 bn->bh.bb.prevfree = b->bh.bb.bsize; 854 855 /* If a block-release function is defined, and this free buffer 856 constitutes the entire block, release it. Note that pool_len 857 is defined in such a way that the test will fail unless all 858 pool blocks are the same size. */ 859 if (thr->relfcn != 0 && 860 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 861 #if BufStats 862 if (thr->numpblk != 863 1) { /* Do not release the last buffer until finalization time */ 864 #endif 865 866 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 867 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 868 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 869 b->bh.bb.bsize); 870 871 /* Unlink the buffer from the free list */ 872 __kmp_bget_remove_from_freelist(b); 873 874 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 875 876 (*thr->relfcn)(b); 877 #if BufStats 878 thr->numprel++; /* Nr of expansion block releases */ 879 thr->numpblk--; /* Total number of blocks */ 880 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 881 882 // avoid leaving stale last_pool pointer around if it is being dealloced 883 if (thr->last_pool == b) 884 thr->last_pool = 0; 885 } else { 886 thr->last_pool = b; 887 } 888 #endif /* BufStats */ 889 } 890 } 891 892 /* BECTL -- Establish automatic pool expansion control */ 893 static void bectl(kmp_info_t *th, bget_compact_t compact, 894 bget_acquire_t acquire, bget_release_t release, 895 bufsize pool_incr) { 896 thr_data_t *thr = get_thr_data(th); 897 898 thr->compfcn = compact; 899 thr->acqfcn = acquire; 900 thr->relfcn = release; 901 thr->exp_incr = pool_incr; 902 } 903 904 /* BPOOL -- Add a region of memory to the buffer pool. */ 905 static void bpool(kmp_info_t *th, void *buf, bufsize len) { 906 /* int bin = 0; */ 907 thr_data_t *thr = get_thr_data(th); 908 bfhead_t *b = BFH(buf); 909 bhead_t *bn; 910 911 __kmp_bget_dequeue(th); /* Release any queued buffers */ 912 913 #ifdef SizeQuant 914 len &= ~(SizeQuant - 1); 915 #endif 916 if (thr->pool_len == 0) { 917 thr->pool_len = len; 918 } else if (len != thr->pool_len) { 919 thr->pool_len = -1; 920 } 921 #if BufStats 922 thr->numpget++; /* Number of block acquisitions */ 923 thr->numpblk++; /* Number of blocks total */ 924 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 925 #endif /* BufStats */ 926 927 /* Since the block is initially occupied by a single free buffer, 928 it had better not be (much) larger than the largest buffer 929 whose size we can store in bhead.bb.bsize. */ 930 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1)); 931 932 /* Clear the backpointer at the start of the block to indicate that 933 there is no free block prior to this one. That blocks 934 recombination when the first block in memory is released. */ 935 b->bh.bb.prevfree = 0; 936 937 /* Create a dummy allocated buffer at the end of the pool. This dummy 938 buffer is seen when a buffer at the end of the pool is released and 939 blocks recombination of the last buffer with the dummy buffer at 940 the end. The length in the dummy buffer is set to the largest 941 negative number to denote the end of the pool for diagnostic 942 routines (this specific value is not counted on by the actual 943 allocation and release functions). */ 944 len -= sizeof(bhead_t); 945 b->bh.bb.bsize = (bufsize)len; 946 /* Set the owner of this buffer */ 947 TCW_PTR(b->bh.bb.bthr, 948 (kmp_info_t *)((kmp_uintptr_t)th | 949 1)); // mark the buffer as allocated address 950 951 /* Chain the new block to the free list. */ 952 __kmp_bget_insert_into_freelist(thr, b); 953 954 #ifdef FreeWipe 955 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 956 (size_t)(len - sizeof(bfhead_t))); 957 #endif 958 bn = BH(((char *)b) + len); 959 bn->bb.prevfree = (bufsize)len; 960 /* Definition of ESent assumes two's complement! */ 961 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0)); 962 963 bn->bb.bsize = ESent; 964 } 965 966 /* BFREED -- Dump the free lists for this thread. */ 967 static void bfreed(kmp_info_t *th) { 968 int bin = 0, count = 0; 969 int gtid = __kmp_gtid_from_thread(th); 970 thr_data_t *thr = get_thr_data(th); 971 972 #if BufStats 973 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC 974 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC 975 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC 976 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC 977 " drel=%" KMP_INT64_SPEC "\n", 978 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget, 979 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk, 980 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel, 981 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel); 982 #endif 983 984 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 985 bfhead_t *b; 986 987 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin]; 988 b = b->ql.flink) { 989 bufsize bs = b->bh.bb.bsize; 990 991 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 992 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 993 KMP_DEBUG_ASSERT(bs > 0); 994 995 count += 1; 996 997 __kmp_printf_no_lock( 998 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, 999 (long)bs); 1000 #ifdef FreeWipe 1001 { 1002 char *lerr = ((char *)b) + sizeof(bfhead_t); 1003 if ((bs > sizeof(bfhead_t)) && 1004 ((*lerr != 0x55) || 1005 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 1006 0))) { 1007 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above " 1008 "free block have been overstored.)\n", 1009 gtid); 1010 } 1011 } 1012 #endif 1013 } 1014 } 1015 1016 if (count == 0) 1017 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid); 1018 } 1019 1020 #ifdef KMP_DEBUG 1021 1022 #if BufStats 1023 1024 /* BSTATS -- Return buffer allocation free space statistics. */ 1025 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree, 1026 bufsize *maxfree, long *nget, long *nrel) { 1027 int bin = 0; 1028 thr_data_t *thr = get_thr_data(th); 1029 1030 *nget = thr->numget; 1031 *nrel = thr->numrel; 1032 *curalloc = (bufsize)thr->totalloc; 1033 *totfree = 0; 1034 *maxfree = -1; 1035 1036 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 1037 bfhead_t *b = thr->freelist[bin].ql.flink; 1038 1039 while (b != &thr->freelist[bin]) { 1040 KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0); 1041 *totfree += b->bh.bb.bsize; 1042 if (b->bh.bb.bsize > *maxfree) { 1043 *maxfree = b->bh.bb.bsize; 1044 } 1045 b = b->ql.flink; /* Link to next buffer */ 1046 } 1047 } 1048 } 1049 1050 /* BSTATSE -- Return extended statistics */ 1051 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool, 1052 long *npget, long *nprel, long *ndget, long *ndrel) { 1053 thr_data_t *thr = get_thr_data(th); 1054 1055 *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr; 1056 *npool = thr->numpblk; 1057 *npget = thr->numpget; 1058 *nprel = thr->numprel; 1059 *ndget = thr->numdget; 1060 *ndrel = thr->numdrel; 1061 } 1062 1063 #endif /* BufStats */ 1064 1065 /* BUFDUMP -- Dump the data in a buffer. This is called with the user 1066 data pointer, and backs up to the buffer header. It will 1067 dump either a free block or an allocated one. */ 1068 static void bufdump(kmp_info_t *th, void *buf) { 1069 bfhead_t *b; 1070 unsigned char *bdump; 1071 bufsize bdlen; 1072 1073 b = BFH(((char *)buf) - sizeof(bhead_t)); 1074 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 1075 if (b->bh.bb.bsize < 0) { 1076 bdump = (unsigned char *)buf; 1077 bdlen = (-b->bh.bb.bsize) - (bufsize)sizeof(bhead_t); 1078 } else { 1079 bdump = (unsigned char *)(((char *)b) + sizeof(bfhead_t)); 1080 bdlen = b->bh.bb.bsize - (bufsize)sizeof(bfhead_t); 1081 } 1082 1083 while (bdlen > 0) { 1084 int i, dupes = 0; 1085 bufsize l = bdlen; 1086 char bhex[50], bascii[20]; 1087 1088 if (l > 16) { 1089 l = 16; 1090 } 1091 1092 for (i = 0; i < l; i++) { 1093 (void)KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]); 1094 if (bdump[i] > 0x20 && bdump[i] < 0x7F) 1095 bascii[i] = bdump[i]; 1096 else 1097 bascii[i] = ' '; 1098 } 1099 bascii[i] = 0; 1100 (void)__kmp_printf_no_lock("%-48s %s\n", bhex, bascii); 1101 bdump += l; 1102 bdlen -= l; 1103 while ((bdlen > 16) && 1104 (memcmp((char *)(bdump - 16), (char *)bdump, 16) == 0)) { 1105 dupes++; 1106 bdump += 16; 1107 bdlen -= 16; 1108 } 1109 if (dupes > 1) { 1110 (void)__kmp_printf_no_lock( 1111 " (%d lines [%d bytes] identical to above line skipped)\n", dupes, 1112 dupes * 16); 1113 } else if (dupes == 1) { 1114 bdump -= 16; 1115 bdlen += 16; 1116 } 1117 } 1118 } 1119 1120 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed. 1121 If DUMPALLOC is nonzero, the contents of allocated buffers 1122 are dumped. If DUMPFREE is nonzero, free blocks are 1123 dumped as well. If FreeWipe checking is enabled, free 1124 blocks which have been clobbered will always be dumped. */ 1125 static void bpoold(kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) { 1126 bfhead_t *b = BFH((char *)buf - sizeof(bhead_t)); 1127 1128 while (b->bh.bb.bsize != ESent) { 1129 bufsize bs = b->bh.bb.bsize; 1130 1131 if (bs < 0) { 1132 bs = -bs; 1133 (void)__kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", 1134 (long)bs); 1135 if (dumpalloc) { 1136 bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); 1137 } 1138 } else { 1139 const char *lerr = ""; 1140 1141 KMP_DEBUG_ASSERT(bs > 0); 1142 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { 1143 lerr = " (Bad free list links)"; 1144 } 1145 (void)__kmp_printf_no_lock("Free block: size %6ld bytes.%s\n", 1146 (long)bs, lerr); 1147 #ifdef FreeWipe 1148 lerr = ((char *)b) + sizeof(bfhead_t); 1149 if ((bs > sizeof(bfhead_t)) && 1150 ((*lerr != 0x55) || 1151 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 1152 0))) { 1153 (void)__kmp_printf_no_lock( 1154 "(Contents of above free block have been overstored.)\n"); 1155 bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); 1156 } else 1157 #endif 1158 if (dumpfree) { 1159 bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); 1160 } 1161 } 1162 b = BFH(((char *)b) + bs); 1163 } 1164 } 1165 1166 /* BPOOLV -- Validate a buffer pool. */ 1167 static int bpoolv(kmp_info_t *th, void *buf) { 1168 bfhead_t *b = BFH(buf); 1169 1170 while (b->bh.bb.bsize != ESent) { 1171 bufsize bs = b->bh.bb.bsize; 1172 1173 if (bs < 0) { 1174 bs = -bs; 1175 } else { 1176 #ifdef FreeWipe 1177 char *lerr = ""; 1178 #endif 1179 1180 KMP_DEBUG_ASSERT(bs > 0); 1181 if (bs <= 0) { 1182 return 0; 1183 } 1184 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { 1185 (void)__kmp_printf_no_lock( 1186 "Free block: size %6ld bytes. (Bad free list links)\n", (long)bs); 1187 KMP_DEBUG_ASSERT(0); 1188 return 0; 1189 } 1190 #ifdef FreeWipe 1191 lerr = ((char *)b) + sizeof(bfhead_t); 1192 if ((bs > sizeof(bfhead_t)) && 1193 ((*lerr != 0x55) || 1194 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 1195 0))) { 1196 (void)__kmp_printf_no_lock( 1197 "(Contents of above free block have been overstored.)\n"); 1198 bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); 1199 KMP_DEBUG_ASSERT(0); 1200 return 0; 1201 } 1202 #endif /* FreeWipe */ 1203 } 1204 b = BFH(((char *)b) + bs); 1205 } 1206 return 1; 1207 } 1208 1209 #endif /* KMP_DEBUG */ 1210 1211 void __kmp_initialize_bget(kmp_info_t *th) { 1212 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0)); 1213 1214 set_thr_data(th); 1215 1216 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free, 1217 (bufsize)__kmp_malloc_pool_incr); 1218 } 1219 1220 void __kmp_finalize_bget(kmp_info_t *th) { 1221 thr_data_t *thr; 1222 bfhead_t *b; 1223 1224 KMP_DEBUG_ASSERT(th != 0); 1225 1226 #if BufStats 1227 thr = (thr_data_t *)th->th.th_local.bget_data; 1228 KMP_DEBUG_ASSERT(thr != NULL); 1229 b = thr->last_pool; 1230 1231 /* If a block-release function is defined, and this free buffer constitutes 1232 the entire block, release it. Note that pool_len is defined in such a way 1233 that the test will fail unless all pool blocks are the same size. */ 1234 1235 // Deallocate the last pool if one exists because we no longer do it in brel() 1236 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && 1237 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 1238 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 1239 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 1240 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 1241 b->bh.bb.bsize); 1242 1243 /* Unlink the buffer from the free list */ 1244 __kmp_bget_remove_from_freelist(b); 1245 1246 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 1247 1248 (*thr->relfcn)(b); 1249 thr->numprel++; /* Nr of expansion block releases */ 1250 thr->numpblk--; /* Total number of blocks */ 1251 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 1252 } 1253 #endif /* BufStats */ 1254 1255 /* Deallocate bget_data */ 1256 if (th->th.th_local.bget_data != NULL) { 1257 __kmp_free(th->th.th_local.bget_data); 1258 th->th.th_local.bget_data = NULL; 1259 } 1260 } 1261 1262 void kmpc_set_poolsize(size_t size) { 1263 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc, 1264 (bget_release_t)free, (bufsize)size); 1265 } 1266 1267 size_t kmpc_get_poolsize(void) { 1268 thr_data_t *p; 1269 1270 p = get_thr_data(__kmp_get_thread()); 1271 1272 return p->exp_incr; 1273 } 1274 1275 void kmpc_set_poolmode(int mode) { 1276 thr_data_t *p; 1277 1278 if (mode == bget_mode_fifo || mode == bget_mode_lifo || 1279 mode == bget_mode_best) { 1280 p = get_thr_data(__kmp_get_thread()); 1281 p->mode = (bget_mode_t)mode; 1282 } 1283 } 1284 1285 int kmpc_get_poolmode(void) { 1286 thr_data_t *p; 1287 1288 p = get_thr_data(__kmp_get_thread()); 1289 1290 return p->mode; 1291 } 1292 1293 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) { 1294 kmp_info_t *th = __kmp_get_thread(); 1295 bufsize a, b; 1296 1297 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1298 1299 bcheck(th, &a, &b); 1300 1301 *maxmem = a; 1302 *allmem = b; 1303 } 1304 1305 void kmpc_poolprint(void) { 1306 kmp_info_t *th = __kmp_get_thread(); 1307 1308 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1309 1310 bfreed(th); 1311 } 1312 1313 #endif // #if KMP_USE_BGET 1314 1315 void *kmpc_malloc(size_t size) { 1316 void *ptr; 1317 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1318 if (ptr != NULL) { 1319 // save allocated pointer just before one returned to user 1320 *(void **)ptr = ptr; 1321 ptr = (void **)ptr + 1; 1322 } 1323 return ptr; 1324 } 1325 1326 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) 1327 1328 void *kmpc_aligned_malloc(size_t size, size_t alignment) { 1329 void *ptr; 1330 void *ptr_allocated; 1331 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big 1332 if (!IS_POWER_OF_TWO(alignment)) { 1333 // AC: do we need to issue a warning here? 1334 errno = EINVAL; 1335 return NULL; 1336 } 1337 size = size + sizeof(void *) + alignment; 1338 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size); 1339 if (ptr_allocated != NULL) { 1340 // save allocated pointer just before one returned to user 1341 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) & 1342 ~(alignment - 1)); 1343 *((void **)ptr - 1) = ptr_allocated; 1344 } else { 1345 ptr = NULL; 1346 } 1347 return ptr; 1348 } 1349 1350 void *kmpc_calloc(size_t nelem, size_t elsize) { 1351 void *ptr; 1352 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr))); 1353 if (ptr != NULL) { 1354 // save allocated pointer just before one returned to user 1355 *(void **)ptr = ptr; 1356 ptr = (void **)ptr + 1; 1357 } 1358 return ptr; 1359 } 1360 1361 void *kmpc_realloc(void *ptr, size_t size) { 1362 void *result = NULL; 1363 if (ptr == NULL) { 1364 // If pointer is NULL, realloc behaves like malloc. 1365 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1366 // save allocated pointer just before one returned to user 1367 if (result != NULL) { 1368 *(void **)result = result; 1369 result = (void **)result + 1; 1370 } 1371 } else if (size == 0) { 1372 // If size is 0, realloc behaves like free. 1373 // The thread must be registered by the call to kmpc_malloc() or 1374 // kmpc_calloc() before. 1375 // So it should be safe to call __kmp_get_thread(), not 1376 // __kmp_entry_thread(). 1377 KMP_ASSERT(*((void **)ptr - 1)); 1378 brel(__kmp_get_thread(), *((void **)ptr - 1)); 1379 } else { 1380 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1), 1381 (bufsize)(size + sizeof(ptr))); 1382 if (result != NULL) { 1383 *(void **)result = result; 1384 result = (void **)result + 1; 1385 } 1386 } 1387 return result; 1388 } 1389 1390 // NOTE: the library must have already been initialized by a previous allocate 1391 void kmpc_free(void *ptr) { 1392 if (!__kmp_init_serial) { 1393 return; 1394 } 1395 if (ptr != NULL) { 1396 kmp_info_t *th = __kmp_get_thread(); 1397 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1398 // extract allocated pointer and free it 1399 KMP_ASSERT(*((void **)ptr - 1)); 1400 brel(th, *((void **)ptr - 1)); 1401 } 1402 } 1403 1404 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) { 1405 void *ptr; 1406 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th, 1407 (int)size KMP_SRC_LOC_PARM)); 1408 ptr = bget(th, (bufsize)size); 1409 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr)); 1410 return ptr; 1411 } 1412 1413 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, 1414 size_t elsize KMP_SRC_LOC_DECL) { 1415 void *ptr; 1416 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th, 1417 (int)nelem, (int)elsize KMP_SRC_LOC_PARM)); 1418 ptr = bgetz(th, (bufsize)(nelem * elsize)); 1419 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr)); 1420 return ptr; 1421 } 1422 1423 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, 1424 size_t size KMP_SRC_LOC_DECL) { 1425 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th, 1426 ptr, (int)size KMP_SRC_LOC_PARM)); 1427 ptr = bgetr(th, ptr, (bufsize)size); 1428 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr)); 1429 return ptr; 1430 } 1431 1432 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) { 1433 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th, 1434 ptr KMP_SRC_LOC_PARM)); 1435 if (ptr != NULL) { 1436 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1437 brel(th, ptr); 1438 } 1439 KE_TRACE(30, ("<- __kmp_thread_free()\n")); 1440 } 1441 1442 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes 1443 memory leaks, but it may be useful for debugging memory corruptions, used 1444 freed pointers, etc. */ 1445 /* #define LEAK_MEMORY */ 1446 struct kmp_mem_descr { // Memory block descriptor. 1447 void *ptr_allocated; // Pointer returned by malloc(), subject for free(). 1448 size_t size_allocated; // Size of allocated memory block. 1449 void *ptr_aligned; // Pointer to aligned memory, to be used by client code. 1450 size_t size_aligned; // Size of aligned memory block. 1451 }; 1452 typedef struct kmp_mem_descr kmp_mem_descr_t; 1453 1454 /* Allocate memory on requested boundary, fill allocated memory with 0x00. 1455 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1456 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1457 static void *___kmp_allocate_align(size_t size, 1458 size_t alignment KMP_SRC_LOC_DECL) { 1459 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than 1460 requested to return properly aligned pointer. Original pointer returned 1461 by malloc() and size of allocated block is saved in descriptor just 1462 before the aligned pointer. This information used by __kmp_free() -- it 1463 has to pass to free() original pointer, not aligned one. 1464 1465 +---------+------------+-----------------------------------+---------+ 1466 | padding | descriptor | aligned block | padding | 1467 +---------+------------+-----------------------------------+---------+ 1468 ^ ^ 1469 | | 1470 | +- Aligned pointer returned to caller 1471 +- Pointer returned by malloc() 1472 1473 Aligned block is filled with zeros, paddings are filled with 0xEF. */ 1474 1475 kmp_mem_descr_t descr; 1476 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 1477 kmp_uintptr_t addr_aligned; // Aligned address to return to caller. 1478 kmp_uintptr_t addr_descr; // Address of memory block descriptor. 1479 1480 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", 1481 (int)size, (int)alignment KMP_SRC_LOC_PARM)); 1482 1483 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too 1484 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t)); 1485 // Make sure kmp_uintptr_t is enough to store addresses. 1486 1487 descr.size_aligned = size; 1488 descr.size_allocated = 1489 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment; 1490 1491 #if KMP_DEBUG 1492 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_); 1493 #else 1494 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM); 1495 #endif 1496 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated, 1497 descr.ptr_allocated)); 1498 if (descr.ptr_allocated == NULL) { 1499 KMP_FATAL(OutOfHeapMemory); 1500 } 1501 1502 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 1503 addr_aligned = 1504 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1); 1505 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t); 1506 1507 descr.ptr_aligned = (void *)addr_aligned; 1508 1509 KE_TRACE(26, (" ___kmp_allocate_align: " 1510 "ptr_allocated=%p, size_allocated=%d, " 1511 "ptr_aligned=%p, size_aligned=%d\n", 1512 descr.ptr_allocated, (int)descr.size_allocated, 1513 descr.ptr_aligned, (int)descr.size_aligned)); 1514 1515 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr); 1516 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned); 1517 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 1518 addr_allocated + descr.size_allocated); 1519 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0); 1520 #ifdef KMP_DEBUG 1521 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 1522 // Fill allocated memory block with 0xEF. 1523 #endif 1524 memset(descr.ptr_aligned, 0x00, descr.size_aligned); 1525 // Fill the aligned memory block (which is intended for using by caller) with 1526 // 0x00. Do not 1527 // put this filling under KMP_DEBUG condition! Many callers expect zeroed 1528 // memory. (Padding 1529 // bytes remain filled with 0xEF in debugging library.) 1530 *((kmp_mem_descr_t *)addr_descr) = descr; 1531 1532 KMP_MB(); 1533 1534 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned)); 1535 return descr.ptr_aligned; 1536 } // func ___kmp_allocate_align 1537 1538 /* Allocate memory on cache line boundary, fill allocated memory with 0x00. 1539 Do not call this func directly! Use __kmp_allocate macro instead. 1540 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1541 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1542 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) { 1543 void *ptr; 1544 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n", 1545 (int)size KMP_SRC_LOC_PARM)); 1546 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM); 1547 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr)); 1548 return ptr; 1549 } // func ___kmp_allocate 1550 1551 #if (BUILD_MEMORY == FIRST_TOUCH) 1552 void *__kmp_ft_page_allocate(size_t size) { 1553 void *adr, *aadr; 1554 1555 const int page_size = KMP_GET_PAGE_SIZE(); 1556 1557 adr = (void *)__kmp_thread_malloc(__kmp_get_thread(), 1558 size + page_size + KMP_PTR_SKIP); 1559 if (adr == 0) 1560 KMP_FATAL(OutOfHeapMemory); 1561 1562 /* check to see if adr is on a page boundary. */ 1563 if (((kmp_uintptr_t)adr & (page_size - 1)) == 0) 1564 /* nothing to do if adr is already on a page boundary. */ 1565 aadr = adr; 1566 else 1567 /* else set aadr to the first page boundary in the allocated memory. */ 1568 aadr = (void *)(((kmp_uintptr_t)adr + page_size) & ~(page_size - 1)); 1569 1570 /* the first touch by the owner thread. */ 1571 *((void **)aadr) = adr; 1572 1573 /* skip the memory space used for storing adr above. */ 1574 return (void *)((char *)aadr + KMP_PTR_SKIP); 1575 } 1576 #endif 1577 1578 /* Allocate memory on page boundary, fill allocated memory with 0x00. 1579 Does not call this func directly! Use __kmp_page_allocate macro instead. 1580 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 1581 error. Must use __kmp_free when freeing memory allocated by this routine! */ 1582 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) { 1583 int page_size = 8 * 1024; 1584 void *ptr; 1585 1586 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n", 1587 (int)size KMP_SRC_LOC_PARM)); 1588 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM); 1589 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr)); 1590 return ptr; 1591 } // ___kmp_page_allocate 1592 1593 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). 1594 In debug mode, fill the memory block with 0xEF before call to free(). */ 1595 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) { 1596 kmp_mem_descr_t descr; 1597 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 1598 kmp_uintptr_t addr_aligned; // Aligned address passed by caller. 1599 1600 KE_TRACE(25, 1601 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM)); 1602 KMP_ASSERT(ptr != NULL); 1603 1604 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)); 1605 1606 KE_TRACE(26, (" __kmp_free: " 1607 "ptr_allocated=%p, size_allocated=%d, " 1608 "ptr_aligned=%p, size_aligned=%d\n", 1609 descr.ptr_allocated, (int)descr.size_allocated, 1610 descr.ptr_aligned, (int)descr.size_aligned)); 1611 1612 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 1613 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned; 1614 1615 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0); 1616 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr); 1617 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned); 1618 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated); 1619 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 1620 addr_allocated + descr.size_allocated); 1621 1622 #ifdef KMP_DEBUG 1623 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 1624 // Fill memory block with 0xEF, it helps catch using freed memory. 1625 #endif 1626 1627 #ifndef LEAK_MEMORY 1628 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated)); 1629 #ifdef KMP_DEBUG 1630 _free_src_loc(descr.ptr_allocated, _file_, _line_); 1631 #else 1632 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM); 1633 #endif 1634 #endif 1635 KMP_MB(); 1636 KE_TRACE(25, ("<- __kmp_free() returns\n")); 1637 } // func ___kmp_free 1638 1639 #if USE_FAST_MEMORY == 3 1640 // Allocate fast memory by first scanning the thread's free lists 1641 // If a chunk the right size exists, grab it off the free list. 1642 // Otherwise allocate normally using kmp_thread_malloc. 1643 1644 // AC: How to choose the limit? Just get 16 for now... 1645 #define KMP_FREE_LIST_LIMIT 16 1646 1647 // Always use 128 bytes for determining buckets for caching memory blocks 1648 #define DCACHE_LINE 128 1649 1650 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) { 1651 void *ptr; 1652 int num_lines; 1653 int idx; 1654 int index; 1655 void *alloc_ptr; 1656 size_t alloc_size; 1657 kmp_mem_descr_t *descr; 1658 1659 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", 1660 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM)); 1661 1662 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE; 1663 idx = num_lines - 1; 1664 KMP_DEBUG_ASSERT(idx >= 0); 1665 if (idx < 2) { 1666 index = 0; // idx is [ 0, 1 ], use first free list 1667 num_lines = 2; // 1, 2 cache lines or less than cache line 1668 } else if ((idx >>= 2) == 0) { 1669 index = 1; // idx is [ 2, 3 ], use second free list 1670 num_lines = 4; // 3, 4 cache lines 1671 } else if ((idx >>= 2) == 0) { 1672 index = 2; // idx is [ 4, 15 ], use third free list 1673 num_lines = 16; // 5, 6, ..., 16 cache lines 1674 } else if ((idx >>= 2) == 0) { 1675 index = 3; // idx is [ 16, 63 ], use fourth free list 1676 num_lines = 64; // 17, 18, ..., 64 cache lines 1677 } else { 1678 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists 1679 } 1680 1681 ptr = this_thr->th.th_free_lists[index].th_free_list_self; 1682 if (ptr != NULL) { 1683 // pop the head of no-sync free list 1684 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 1685 KMP_DEBUG_ASSERT( 1686 this_thr == 1687 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) 1688 ->ptr_aligned); 1689 goto end; 1690 } 1691 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 1692 if (ptr != NULL) { 1693 // no-sync free list is empty, use sync free list (filled in by other 1694 // threads only) 1695 // pop the head of the sync free list, push NULL instead 1696 while (!KMP_COMPARE_AND_STORE_PTR( 1697 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) { 1698 KMP_CPU_PAUSE(); 1699 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 1700 } 1701 // push the rest of chain into no-sync free list (can be NULL if there was 1702 // the only block) 1703 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 1704 KMP_DEBUG_ASSERT( 1705 this_thr == 1706 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) 1707 ->ptr_aligned); 1708 goto end; 1709 } 1710 1711 alloc_call: 1712 // haven't found block in the free lists, thus allocate it 1713 size = num_lines * DCACHE_LINE; 1714 1715 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE; 1716 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " 1717 "alloc_size %d\n", 1718 __kmp_gtid_from_thread(this_thr), alloc_size)); 1719 alloc_ptr = bget(this_thr, (bufsize)alloc_size); 1720 1721 // align ptr to DCACHE_LINE 1722 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + 1723 DCACHE_LINE) & 1724 ~(DCACHE_LINE - 1)); 1725 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 1726 1727 descr->ptr_allocated = alloc_ptr; // remember allocated pointer 1728 // we don't need size_allocated 1729 descr->ptr_aligned = (void *)this_thr; // remember allocating thread 1730 // (it is already saved in bget buffer, 1731 // but we may want to use another allocator in future) 1732 descr->size_aligned = size; 1733 1734 end: 1735 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n", 1736 __kmp_gtid_from_thread(this_thr), ptr)); 1737 return ptr; 1738 } // func __kmp_fast_allocate 1739 1740 // Free fast memory and place it on the thread's free list if it is of 1741 // the correct size. 1742 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) { 1743 kmp_mem_descr_t *descr; 1744 kmp_info_t *alloc_thr; 1745 size_t size; 1746 size_t idx; 1747 int index; 1748 1749 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", 1750 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM)); 1751 KMP_ASSERT(ptr != NULL); 1752 1753 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 1754 1755 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", 1756 (int)descr->size_aligned)); 1757 1758 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines 1759 1760 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block 1761 if (idx == size) { 1762 index = 0; // 2 cache lines 1763 } else if ((idx <<= 1) == size) { 1764 index = 1; // 4 cache lines 1765 } else if ((idx <<= 2) == size) { 1766 index = 2; // 16 cache lines 1767 } else if ((idx <<= 2) == size) { 1768 index = 3; // 64 cache lines 1769 } else { 1770 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64); 1771 goto free_call; // 65 or more cache lines ( > 8KB ) 1772 } 1773 1774 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block 1775 if (alloc_thr == this_thr) { 1776 // push block to self no-sync free list, linking previous head (LIFO) 1777 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; 1778 this_thr->th.th_free_lists[index].th_free_list_self = ptr; 1779 } else { 1780 void *head = this_thr->th.th_free_lists[index].th_free_list_other; 1781 if (head == NULL) { 1782 // Create new free list 1783 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1784 *((void **)ptr) = NULL; // mark the tail of the list 1785 descr->size_allocated = (size_t)1; // head of the list keeps its length 1786 } else { 1787 // need to check existed "other" list's owner thread and size of queue 1788 kmp_mem_descr_t *dsc = 1789 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t)); 1790 // allocating thread, same for all queue nodes 1791 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned); 1792 size_t q_sz = 1793 dsc->size_allocated + 1; // new size in case we add current task 1794 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) { 1795 // we can add current task to "other" list, no sync needed 1796 *((void **)ptr) = head; 1797 descr->size_allocated = q_sz; 1798 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1799 } else { 1800 // either queue blocks owner is changing or size limit exceeded 1801 // return old queue to allocating thread (q_th) synchroneously, 1802 // and start new list for alloc_thr's tasks 1803 void *old_ptr; 1804 void *tail = head; 1805 void *next = *((void **)head); 1806 while (next != NULL) { 1807 KMP_DEBUG_ASSERT( 1808 // queue size should decrease by 1 each step through the list 1809 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t))) 1810 ->size_allocated + 1811 1 == 1812 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t))) 1813 ->size_allocated); 1814 tail = next; // remember tail node 1815 next = *((void **)next); 1816 } 1817 KMP_DEBUG_ASSERT(q_th != NULL); 1818 // push block to owner's sync free list 1819 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 1820 /* the next pointer must be set before setting free_list to ptr to avoid 1821 exposing a broken list to other threads, even for an instant. */ 1822 *((void **)tail) = old_ptr; 1823 1824 while (!KMP_COMPARE_AND_STORE_PTR( 1825 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) { 1826 KMP_CPU_PAUSE(); 1827 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 1828 *((void **)tail) = old_ptr; 1829 } 1830 1831 // start new list of not-selt tasks 1832 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 1833 *((void **)ptr) = NULL; 1834 descr->size_allocated = (size_t)1; // head of queue keeps its length 1835 } 1836 } 1837 } 1838 goto end; 1839 1840 free_call: 1841 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", 1842 __kmp_gtid_from_thread(this_thr), size)); 1843 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */ 1844 brel(this_thr, descr->ptr_allocated); 1845 1846 end: 1847 KE_TRACE(25, ("<- __kmp_fast_free() returns\n")); 1848 1849 } // func __kmp_fast_free 1850 1851 // Initialize the thread free lists related to fast memory 1852 // Only do this when a thread is initially created. 1853 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) { 1854 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr)); 1855 1856 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t)); 1857 } 1858 1859 // Free the memory in the thread free lists related to fast memory 1860 // Only do this when a thread is being reaped (destroyed). 1861 void __kmp_free_fast_memory(kmp_info_t *th) { 1862 // Suppose we use BGET underlying allocator, walk through its structures... 1863 int bin; 1864 thr_data_t *thr = get_thr_data(th); 1865 void **lst = NULL; 1866 1867 KE_TRACE( 1868 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th))); 1869 1870 __kmp_bget_dequeue(th); // Release any queued buffers 1871 1872 // Dig through free lists and extract all allocated blocks 1873 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 1874 bfhead_t *b = thr->freelist[bin].ql.flink; 1875 while (b != &thr->freelist[bin]) { 1876 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address 1877 *((void **)b) = 1878 lst; // link the list (override bthr, but keep flink yet) 1879 lst = (void **)b; // push b into lst 1880 } 1881 b = b->ql.flink; // get next buffer 1882 } 1883 } 1884 while (lst != NULL) { 1885 void *next = *lst; 1886 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", 1887 lst, next, th, __kmp_gtid_from_thread(th))); 1888 (*thr->relfcn)(lst); 1889 #if BufStats 1890 // count blocks to prevent problems in __kmp_finalize_bget() 1891 thr->numprel++; /* Nr of expansion block releases */ 1892 thr->numpblk--; /* Total number of blocks */ 1893 #endif 1894 lst = (void **)next; 1895 } 1896 1897 KE_TRACE( 1898 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th))); 1899 } 1900 1901 #endif // USE_FAST_MEMORY 1902