1 /* 2 * kmp_threadprivate.cpp -- OpenMP threadprivate support library 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "kmp.h" 15 #include "kmp_i18n.h" 16 #include "kmp_itt.h" 17 18 #define USE_CHECKS_COMMON 19 20 #define KMP_INLINE_SUBR 1 21 22 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, 23 void *data_addr, size_t pc_size); 24 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, 25 void *data_addr, 26 size_t pc_size); 27 28 struct shared_table __kmp_threadprivate_d_table; 29 30 static 31 #ifdef KMP_INLINE_SUBR 32 __forceinline 33 #endif 34 struct private_common * 35 __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid, 36 void *pc_addr) 37 38 { 39 struct private_common *tn; 40 41 #ifdef KMP_TASK_COMMON_DEBUG 42 KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with " 43 "address %p\n", 44 gtid, pc_addr)); 45 dump_list(); 46 #endif 47 48 for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { 49 if (tn->gbl_addr == pc_addr) { 50 #ifdef KMP_TASK_COMMON_DEBUG 51 KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found " 52 "node %p on list\n", 53 gtid, pc_addr)); 54 #endif 55 return tn; 56 } 57 } 58 return 0; 59 } 60 61 static 62 #ifdef KMP_INLINE_SUBR 63 __forceinline 64 #endif 65 struct shared_common * 66 __kmp_find_shared_task_common(struct shared_table *tbl, int gtid, 67 void *pc_addr) { 68 struct shared_common *tn; 69 70 for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { 71 if (tn->gbl_addr == pc_addr) { 72 #ifdef KMP_TASK_COMMON_DEBUG 73 KC_TRACE( 74 10, 75 ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n", 76 gtid, pc_addr)); 77 #endif 78 return tn; 79 } 80 } 81 return 0; 82 } 83 84 // Create a template for the data initialized storage. Either the template is 85 // NULL indicating zero fill, or the template is a copy of the original data. 86 static struct private_data *__kmp_init_common_data(void *pc_addr, 87 size_t pc_size) { 88 struct private_data *d; 89 size_t i; 90 char *p; 91 92 d = (struct private_data *)__kmp_allocate(sizeof(struct private_data)); 93 /* 94 d->data = 0; // AC: commented out because __kmp_allocate zeroes the 95 memory 96 d->next = 0; 97 */ 98 d->size = pc_size; 99 d->more = 1; 100 101 p = (char *)pc_addr; 102 103 for (i = pc_size; i > 0; --i) { 104 if (*p++ != '\0') { 105 d->data = __kmp_allocate(pc_size); 106 KMP_MEMCPY(d->data, pc_addr, pc_size); 107 break; 108 } 109 } 110 111 return d; 112 } 113 114 // Initialize the data area from the template. 115 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) { 116 char *addr = (char *)pc_addr; 117 int i, offset; 118 119 for (offset = 0; d != 0; d = d->next) { 120 for (i = d->more; i > 0; --i) { 121 if (d->data == 0) 122 memset(&addr[offset], '\0', d->size); 123 else 124 KMP_MEMCPY(&addr[offset], d->data, d->size); 125 offset += d->size; 126 } 127 } 128 } 129 130 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ 131 void __kmp_common_initialize(void) { 132 if (!TCR_4(__kmp_init_common)) { 133 int q; 134 #ifdef KMP_DEBUG 135 int gtid; 136 #endif 137 138 __kmp_threadpriv_cache_list = NULL; 139 140 #ifdef KMP_DEBUG 141 /* verify the uber masters were initialized */ 142 for (gtid = 0; gtid < __kmp_threads_capacity; gtid++) 143 if (__kmp_root[gtid]) { 144 KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread); 145 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) 146 KMP_DEBUG_ASSERT( 147 !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]); 148 /* __kmp_root[ gitd ]-> r.r_uber_thread -> 149 * th.th_pri_common -> data[ q ] = 0;*/ 150 } 151 #endif /* KMP_DEBUG */ 152 153 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) 154 __kmp_threadprivate_d_table.data[q] = 0; 155 156 TCW_4(__kmp_init_common, TRUE); 157 } 158 } 159 160 /* Call all destructors for threadprivate data belonging to all threads. 161 Currently unused! */ 162 void __kmp_common_destroy(void) { 163 if (TCR_4(__kmp_init_common)) { 164 int q; 165 166 TCW_4(__kmp_init_common, FALSE); 167 168 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { 169 int gtid; 170 struct private_common *tn; 171 struct shared_common *d_tn; 172 173 /* C++ destructors need to be called once per thread before exiting. 174 Don't call destructors for master thread though unless we used copy 175 constructor */ 176 177 for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn; 178 d_tn = d_tn->next) { 179 if (d_tn->is_vec) { 180 if (d_tn->dt.dtorv != 0) { 181 for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { 182 if (__kmp_threads[gtid]) { 183 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) 184 : (!KMP_UBER_GTID(gtid))) { 185 tn = __kmp_threadprivate_find_task_common( 186 __kmp_threads[gtid]->th.th_pri_common, gtid, 187 d_tn->gbl_addr); 188 if (tn) { 189 (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); 190 } 191 } 192 } 193 } 194 if (d_tn->obj_init != 0) { 195 (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); 196 } 197 } 198 } else { 199 if (d_tn->dt.dtor != 0) { 200 for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { 201 if (__kmp_threads[gtid]) { 202 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) 203 : (!KMP_UBER_GTID(gtid))) { 204 tn = __kmp_threadprivate_find_task_common( 205 __kmp_threads[gtid]->th.th_pri_common, gtid, 206 d_tn->gbl_addr); 207 if (tn) { 208 (*d_tn->dt.dtor)(tn->par_addr); 209 } 210 } 211 } 212 } 213 if (d_tn->obj_init != 0) { 214 (*d_tn->dt.dtor)(d_tn->obj_init); 215 } 216 } 217 } 218 } 219 __kmp_threadprivate_d_table.data[q] = 0; 220 } 221 } 222 } 223 224 /* Call all destructors for threadprivate data belonging to this thread */ 225 void __kmp_common_destroy_gtid(int gtid) { 226 struct private_common *tn; 227 struct shared_common *d_tn; 228 229 if (!TCR_4(__kmp_init_gtid)) { 230 // This is possible when one of multiple roots initiates early library 231 // termination in a sequential region while other teams are active, and its 232 // child threads are about to end. 233 return; 234 } 235 236 KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid)); 237 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) { 238 239 if (TCR_4(__kmp_init_common)) { 240 241 /* Cannot do this here since not all threads have destroyed their data */ 242 /* TCW_4(__kmp_init_common, FALSE); */ 243 244 for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) { 245 246 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, 247 tn->gbl_addr); 248 249 KMP_DEBUG_ASSERT(d_tn); 250 251 if (d_tn->is_vec) { 252 if (d_tn->dt.dtorv != 0) { 253 (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); 254 } 255 if (d_tn->obj_init != 0) { 256 (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); 257 } 258 } else { 259 if (d_tn->dt.dtor != 0) { 260 (void)(*d_tn->dt.dtor)(tn->par_addr); 261 } 262 if (d_tn->obj_init != 0) { 263 (void)(*d_tn->dt.dtor)(d_tn->obj_init); 264 } 265 } 266 } 267 KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors " 268 "complete\n", 269 gtid)); 270 } 271 } 272 } 273 274 #ifdef KMP_TASK_COMMON_DEBUG 275 static void dump_list(void) { 276 int p, q; 277 278 for (p = 0; p < __kmp_all_nth; ++p) { 279 if (!__kmp_threads[p]) 280 continue; 281 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { 282 if (__kmp_threads[p]->th.th_pri_common->data[q]) { 283 struct private_common *tn; 284 285 KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p)); 286 287 for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn; 288 tn = tn->next) { 289 KC_TRACE(10, 290 ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", 291 tn->gbl_addr, tn->par_addr)); 292 } 293 } 294 } 295 } 296 } 297 #endif /* KMP_TASK_COMMON_DEBUG */ 298 299 // NOTE: this routine is to be called only from the serial part of the program. 300 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, 301 void *data_addr, size_t pc_size) { 302 struct shared_common **lnk_tn, *d_tn; 303 KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 304 __kmp_threads[gtid]->th.th_root->r.r_active == 0); 305 306 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, 307 pc_addr); 308 309 if (d_tn == 0) { 310 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 311 312 d_tn->gbl_addr = pc_addr; 313 d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); 314 /* 315 d_tn->obj_init = 0; // AC: commented out because __kmp_allocate 316 zeroes the memory 317 d_tn->ct.ctor = 0; 318 d_tn->cct.cctor = 0;; 319 d_tn->dt.dtor = 0; 320 d_tn->is_vec = FALSE; 321 d_tn->vec_len = 0L; 322 */ 323 d_tn->cmn_size = pc_size; 324 325 __kmp_acquire_lock(&__kmp_global_lock, gtid); 326 327 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); 328 329 d_tn->next = *lnk_tn; 330 *lnk_tn = d_tn; 331 332 __kmp_release_lock(&__kmp_global_lock, gtid); 333 } 334 } 335 336 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, 337 void *data_addr, 338 size_t pc_size) { 339 struct private_common *tn, **tt; 340 struct shared_common *d_tn; 341 342 /* +++++++++ START OF CRITICAL SECTION +++++++++ */ 343 __kmp_acquire_lock(&__kmp_global_lock, gtid); 344 345 tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common)); 346 347 tn->gbl_addr = pc_addr; 348 349 d_tn = __kmp_find_shared_task_common( 350 &__kmp_threadprivate_d_table, gtid, 351 pc_addr); /* Only the MASTER data table exists. */ 352 353 if (d_tn != 0) { 354 /* This threadprivate variable has already been seen. */ 355 356 if (d_tn->pod_init == 0 && d_tn->obj_init == 0) { 357 d_tn->cmn_size = pc_size; 358 359 if (d_tn->is_vec) { 360 if (d_tn->ct.ctorv != 0) { 361 /* Construct from scratch so no prototype exists */ 362 d_tn->obj_init = 0; 363 } else if (d_tn->cct.cctorv != 0) { 364 /* Now data initialize the prototype since it was previously 365 * registered */ 366 d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); 367 (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len); 368 } else { 369 d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); 370 } 371 } else { 372 if (d_tn->ct.ctor != 0) { 373 /* Construct from scratch so no prototype exists */ 374 d_tn->obj_init = 0; 375 } else if (d_tn->cct.cctor != 0) { 376 /* Now data initialize the prototype since it was previously 377 registered */ 378 d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); 379 (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr); 380 } else { 381 d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); 382 } 383 } 384 } 385 } else { 386 struct shared_common **lnk_tn; 387 388 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 389 d_tn->gbl_addr = pc_addr; 390 d_tn->cmn_size = pc_size; 391 d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); 392 /* 393 d_tn->obj_init = 0; // AC: commented out because __kmp_allocate 394 zeroes the memory 395 d_tn->ct.ctor = 0; 396 d_tn->cct.cctor = 0; 397 d_tn->dt.dtor = 0; 398 d_tn->is_vec = FALSE; 399 d_tn->vec_len = 0L; 400 */ 401 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); 402 403 d_tn->next = *lnk_tn; 404 *lnk_tn = d_tn; 405 } 406 407 tn->cmn_size = d_tn->cmn_size; 408 409 if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) { 410 tn->par_addr = (void *)pc_addr; 411 } else { 412 tn->par_addr = (void *)__kmp_allocate(tn->cmn_size); 413 } 414 415 __kmp_release_lock(&__kmp_global_lock, gtid); 416 /* +++++++++ END OF CRITICAL SECTION +++++++++ */ 417 418 #ifdef USE_CHECKS_COMMON 419 if (pc_size > d_tn->cmn_size) { 420 KC_TRACE( 421 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC 422 " ,%" KMP_UINTPTR_SPEC ")\n", 423 pc_addr, pc_size, d_tn->cmn_size)); 424 KMP_FATAL(TPCommonBlocksInconsist); 425 } 426 #endif /* USE_CHECKS_COMMON */ 427 428 tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]); 429 430 #ifdef KMP_TASK_COMMON_DEBUG 431 if (*tt != 0) { 432 KC_TRACE( 433 10, 434 ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", 435 gtid, pc_addr)); 436 } 437 #endif 438 tn->next = *tt; 439 *tt = tn; 440 441 #ifdef KMP_TASK_COMMON_DEBUG 442 KC_TRACE(10, 443 ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", 444 gtid, pc_addr)); 445 dump_list(); 446 #endif 447 448 /* Link the node into a simple list */ 449 450 tn->link = __kmp_threads[gtid]->th.th_pri_head; 451 __kmp_threads[gtid]->th.th_pri_head = tn; 452 453 if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) 454 return tn; 455 456 /* if C++ object with copy constructor, use it; 457 * else if C++ object with constructor, use it for the non-master copies only; 458 * else use pod_init and memcpy 459 * 460 * C++ constructors need to be called once for each non-master thread on 461 * allocate 462 * C++ copy constructors need to be called once for each thread on allocate */ 463 464 /* C++ object with constructors/destructors; don't call constructors for 465 master thread though */ 466 if (d_tn->is_vec) { 467 if (d_tn->ct.ctorv != 0) { 468 (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len); 469 } else if (d_tn->cct.cctorv != 0) { 470 (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len); 471 } else if (tn->par_addr != tn->gbl_addr) { 472 __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); 473 } 474 } else { 475 if (d_tn->ct.ctor != 0) { 476 (void)(*d_tn->ct.ctor)(tn->par_addr); 477 } else if (d_tn->cct.cctor != 0) { 478 (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init); 479 } else if (tn->par_addr != tn->gbl_addr) { 480 __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); 481 } 482 } 483 /* !BUILD_OPENMP_C 484 if (tn->par_addr != tn->gbl_addr) 485 __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ 486 487 return tn; 488 } 489 490 /* ------------------------------------------------------------------------ */ 491 /* We are currently parallel, and we know the thread id. */ 492 /* ------------------------------------------------------------------------ */ 493 494 /*! 495 @ingroup THREADPRIVATE 496 497 @param loc source location information 498 @param data pointer to data being privatized 499 @param ctor pointer to constructor function for data 500 @param cctor pointer to copy constructor function for data 501 @param dtor pointer to destructor function for data 502 503 Register constructors and destructors for thread private data. 504 This function is called when executing in parallel, when we know the thread id. 505 */ 506 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, 507 kmpc_cctor cctor, kmpc_dtor dtor) { 508 struct shared_common *d_tn, **lnk_tn; 509 510 KC_TRACE(10, ("__kmpc_threadprivate_register: called\n")); 511 512 #ifdef USE_CHECKS_COMMON 513 /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ 514 KMP_ASSERT(cctor == 0); 515 #endif /* USE_CHECKS_COMMON */ 516 517 /* Only the global data table exists. */ 518 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data); 519 520 if (d_tn == 0) { 521 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 522 d_tn->gbl_addr = data; 523 524 d_tn->ct.ctor = ctor; 525 d_tn->cct.cctor = cctor; 526 d_tn->dt.dtor = dtor; 527 /* 528 d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate 529 zeroes the memory 530 d_tn->vec_len = 0L; 531 d_tn->obj_init = 0; 532 d_tn->pod_init = 0; 533 */ 534 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); 535 536 d_tn->next = *lnk_tn; 537 *lnk_tn = d_tn; 538 } 539 } 540 541 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, 542 size_t size) { 543 void *ret; 544 struct private_common *tn; 545 546 KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid)); 547 548 #ifdef USE_CHECKS_COMMON 549 if (!__kmp_init_serial) 550 KMP_FATAL(RTLNotInitialized); 551 #endif /* USE_CHECKS_COMMON */ 552 553 if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) { 554 /* The parallel address will NEVER overlap with the data_address */ 555 /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the 556 * data_address; use data_address = data */ 557 558 KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n", 559 global_tid)); 560 kmp_threadprivate_insert_private_data(global_tid, data, data, size); 561 562 ret = data; 563 } else { 564 KC_TRACE( 565 50, 566 ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", 567 global_tid, data)); 568 tn = __kmp_threadprivate_find_task_common( 569 __kmp_threads[global_tid]->th.th_pri_common, global_tid, data); 570 571 if (tn) { 572 KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid)); 573 #ifdef USE_CHECKS_COMMON 574 if ((size_t)size > tn->cmn_size) { 575 KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC 576 " ,%" KMP_UINTPTR_SPEC ")\n", 577 data, size, tn->cmn_size)); 578 KMP_FATAL(TPCommonBlocksInconsist); 579 } 580 #endif /* USE_CHECKS_COMMON */ 581 } else { 582 /* The parallel address will NEVER overlap with the data_address */ 583 /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use 584 * data_address = data */ 585 KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid)); 586 tn = kmp_threadprivate_insert(global_tid, data, data, size); 587 } 588 589 ret = tn->par_addr; 590 } 591 KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", 592 global_tid, ret)); 593 594 return ret; 595 } 596 597 static kmp_cached_addr_t *__kmp_find_cache(void *data) { 598 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 599 while (ptr && ptr->data != data) 600 ptr = ptr->next; 601 return ptr; 602 } 603 604 /*! 605 @ingroup THREADPRIVATE 606 @param loc source location information 607 @param global_tid global thread number 608 @param data pointer to data to privatize 609 @param size size of data to privatize 610 @param cache pointer to cache 611 @return pointer to private storage 612 613 Allocate private storage for threadprivate data. 614 */ 615 void * 616 __kmpc_threadprivate_cached(ident_t *loc, 617 kmp_int32 global_tid, // gtid. 618 void *data, // Pointer to original global variable. 619 size_t size, // Size of original global variable. 620 void ***cache) { 621 KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, " 622 "address: %p, size: %" KMP_SIZE_T_SPEC "\n", 623 global_tid, *cache, data, size)); 624 625 if (TCR_PTR(*cache) == 0) { 626 __kmp_acquire_lock(&__kmp_global_lock, global_tid); 627 628 if (TCR_PTR(*cache) == 0) { 629 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 630 // Compiler often passes in NULL cache, even if it's already been created 631 void **my_cache; 632 kmp_cached_addr_t *tp_cache_addr; 633 // Look for an existing cache 634 tp_cache_addr = __kmp_find_cache(data); 635 if (!tp_cache_addr) { // Cache was never created; do it now 636 __kmp_tp_cached = 1; 637 KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate( 638 sizeof(void *) * __kmp_tp_capacity + 639 sizeof(kmp_cached_addr_t));); 640 // No need to zero the allocated memory; __kmp_allocate does that. 641 KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at " 642 "address %p\n", 643 global_tid, my_cache)); 644 /* TODO: free all this memory in __kmp_common_destroy using 645 * __kmp_threadpriv_cache_list */ 646 /* Add address of mycache to linked list for cleanup later */ 647 tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity]; 648 tp_cache_addr->addr = my_cache; 649 tp_cache_addr->data = data; 650 tp_cache_addr->compiler_cache = cache; 651 tp_cache_addr->next = __kmp_threadpriv_cache_list; 652 __kmp_threadpriv_cache_list = tp_cache_addr; 653 } else { // A cache was already created; use it 654 my_cache = tp_cache_addr->addr; 655 tp_cache_addr->compiler_cache = cache; 656 } 657 KMP_MB(); 658 659 TCW_PTR(*cache, my_cache); 660 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 661 662 KMP_MB(); 663 } 664 __kmp_release_lock(&__kmp_global_lock, global_tid); 665 } 666 667 void *ret; 668 if ((ret = TCR_PTR((*cache)[global_tid])) == 0) { 669 ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size); 670 671 TCW_PTR((*cache)[global_tid], ret); 672 } 673 KC_TRACE(10, 674 ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", 675 global_tid, ret)); 676 return ret; 677 } 678 679 // This function should only be called when both __kmp_tp_cached_lock and 680 // kmp_forkjoin_lock are held. 681 void __kmp_threadprivate_resize_cache(int newCapacity) { 682 KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n", 683 newCapacity)); 684 685 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 686 687 while (ptr) { 688 if (ptr->data) { // this location has an active cache; resize it 689 void **my_cache; 690 KMP_ITT_IGNORE(my_cache = 691 (void **)__kmp_allocate(sizeof(void *) * newCapacity + 692 sizeof(kmp_cached_addr_t));); 693 // No need to zero the allocated memory; __kmp_allocate does that. 694 KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n", 695 my_cache)); 696 // Now copy old cache into new cache 697 void **old_cache = ptr->addr; 698 for (int i = 0; i < __kmp_tp_capacity; ++i) { 699 my_cache[i] = old_cache[i]; 700 } 701 702 // Add address of new my_cache to linked list for cleanup later 703 kmp_cached_addr_t *tp_cache_addr; 704 tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity]; 705 tp_cache_addr->addr = my_cache; 706 tp_cache_addr->data = ptr->data; 707 tp_cache_addr->compiler_cache = ptr->compiler_cache; 708 tp_cache_addr->next = __kmp_threadpriv_cache_list; 709 __kmp_threadpriv_cache_list = tp_cache_addr; 710 711 // Copy new cache to compiler's location: We can copy directly 712 // to (*compiler_cache) if compiler guarantees it will keep 713 // using the same location for the cache. This is not yet true 714 // for some compilers, in which case we have to check if 715 // compiler_cache is still pointing at old cache, and if so, we 716 // can point it at the new cache with an atomic compare&swap 717 // operation. (Old method will always work, but we should shift 718 // to new method (commented line below) when Intel and Clang 719 // compilers use new method.) 720 (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache, 721 my_cache); 722 // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache); 723 724 // If the store doesn't happen here, the compiler's old behavior will 725 // inevitably call __kmpc_threadprivate_cache with a new location for the 726 // cache, and that function will store the resized cache there at that 727 // point. 728 729 // Nullify old cache's data pointer so we skip it next time 730 ptr->data = NULL; 731 } 732 ptr = ptr->next; 733 } 734 // After all caches are resized, update __kmp_tp_capacity to the new size 735 *(volatile int *)&__kmp_tp_capacity = newCapacity; 736 } 737 738 /*! 739 @ingroup THREADPRIVATE 740 @param loc source location information 741 @param data pointer to data being privatized 742 @param ctor pointer to constructor function for data 743 @param cctor pointer to copy constructor function for data 744 @param dtor pointer to destructor function for data 745 @param vector_length length of the vector (bytes or elements?) 746 Register vector constructors and destructors for thread private data. 747 */ 748 void __kmpc_threadprivate_register_vec(ident_t *loc, void *data, 749 kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, 750 kmpc_dtor_vec dtor, 751 size_t vector_length) { 752 struct shared_common *d_tn, **lnk_tn; 753 754 KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n")); 755 756 #ifdef USE_CHECKS_COMMON 757 /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ 758 KMP_ASSERT(cctor == 0); 759 #endif /* USE_CHECKS_COMMON */ 760 761 d_tn = __kmp_find_shared_task_common( 762 &__kmp_threadprivate_d_table, -1, 763 data); /* Only the global data table exists. */ 764 765 if (d_tn == 0) { 766 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 767 d_tn->gbl_addr = data; 768 769 d_tn->ct.ctorv = ctor; 770 d_tn->cct.cctorv = cctor; 771 d_tn->dt.dtorv = dtor; 772 d_tn->is_vec = TRUE; 773 d_tn->vec_len = (size_t)vector_length; 774 // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory 775 // d_tn->pod_init = 0; 776 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); 777 778 d_tn->next = *lnk_tn; 779 *lnk_tn = d_tn; 780 } 781 } 782 783 void __kmp_cleanup_threadprivate_caches() { 784 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 785 786 while (ptr) { 787 void **cache = ptr->addr; 788 __kmp_threadpriv_cache_list = ptr->next; 789 if (*ptr->compiler_cache) 790 *ptr->compiler_cache = NULL; 791 ptr->compiler_cache = NULL; 792 ptr->data = NULL; 793 ptr->addr = NULL; 794 ptr->next = NULL; 795 // Threadprivate data pointed at by cache entries are destroyed at end of 796 // __kmp_launch_thread with __kmp_common_destroy_gtid. 797 __kmp_free(cache); // implicitly frees ptr too 798 ptr = __kmp_threadpriv_cache_list; 799 } 800 } 801