1 /*
2  * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp.h"
15 #include "kmp_i18n.h"
16 #include "kmp_itt.h"
17 
18 #define USE_CHECKS_COMMON
19 
20 #define KMP_INLINE_SUBR 1
21 
22 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
23                                            void *data_addr, size_t pc_size);
24 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
25                                                 void *data_addr,
26                                                 size_t pc_size);
27 
28 struct shared_table __kmp_threadprivate_d_table;
29 
30 static
31 #ifdef KMP_INLINE_SUBR
32     __forceinline
33 #endif
34     struct private_common *
__kmp_threadprivate_find_task_common(struct common_table * tbl,int gtid,void * pc_addr)35     __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
36                                          void *pc_addr)
37 
38 {
39   struct private_common *tn;
40 
41 #ifdef KMP_TASK_COMMON_DEBUG
42   KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
43                 "address %p\n",
44                 gtid, pc_addr));
45   dump_list();
46 #endif
47 
48   for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
49     if (tn->gbl_addr == pc_addr) {
50 #ifdef KMP_TASK_COMMON_DEBUG
51       KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
52                     "node %p on list\n",
53                     gtid, pc_addr));
54 #endif
55       return tn;
56     }
57   }
58   return 0;
59 }
60 
61 static
62 #ifdef KMP_INLINE_SUBR
63     __forceinline
64 #endif
65     struct shared_common *
__kmp_find_shared_task_common(struct shared_table * tbl,int gtid,void * pc_addr)66     __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
67                                   void *pc_addr) {
68   struct shared_common *tn;
69 
70   for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
71     if (tn->gbl_addr == pc_addr) {
72 #ifdef KMP_TASK_COMMON_DEBUG
73       KC_TRACE(
74           10,
75           ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
76            gtid, pc_addr));
77 #endif
78       return tn;
79     }
80   }
81   return 0;
82 }
83 
84 // Create a template for the data initialized storage. Either the template is
85 // NULL indicating zero fill, or the template is a copy of the original data.
__kmp_init_common_data(void * pc_addr,size_t pc_size)86 static struct private_data *__kmp_init_common_data(void *pc_addr,
87                                                    size_t pc_size) {
88   struct private_data *d;
89   size_t i;
90   char *p;
91 
92   d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
93   /*
94       d->data = 0;  // AC: commented out because __kmp_allocate zeroes the
95      memory
96       d->next = 0;
97   */
98   d->size = pc_size;
99   d->more = 1;
100 
101   p = (char *)pc_addr;
102 
103   for (i = pc_size; i > 0; --i) {
104     if (*p++ != '\0') {
105       d->data = __kmp_allocate(pc_size);
106       KMP_MEMCPY(d->data, pc_addr, pc_size);
107       break;
108     }
109   }
110 
111   return d;
112 }
113 
114 // Initialize the data area from the template.
__kmp_copy_common_data(void * pc_addr,struct private_data * d)115 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
116   char *addr = (char *)pc_addr;
117   int i, offset;
118 
119   for (offset = 0; d != 0; d = d->next) {
120     for (i = d->more; i > 0; --i) {
121       if (d->data == 0)
122         memset(&addr[offset], '\0', d->size);
123       else
124         KMP_MEMCPY(&addr[offset], d->data, d->size);
125       offset += d->size;
126     }
127   }
128 }
129 
130 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
__kmp_common_initialize(void)131 void __kmp_common_initialize(void) {
132   if (!TCR_4(__kmp_init_common)) {
133     int q;
134 #ifdef KMP_DEBUG
135     int gtid;
136 #endif
137 
138     __kmp_threadpriv_cache_list = NULL;
139 
140 #ifdef KMP_DEBUG
141     /* verify the uber masters were initialized */
142     for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
143       if (__kmp_root[gtid]) {
144         KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
145         for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
146           KMP_DEBUG_ASSERT(
147               !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
148         /*                    __kmp_root[ gitd ]-> r.r_uber_thread ->
149          * th.th_pri_common -> data[ q ] = 0;*/
150       }
151 #endif /* KMP_DEBUG */
152 
153     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
154       __kmp_threadprivate_d_table.data[q] = 0;
155 
156     TCW_4(__kmp_init_common, TRUE);
157   }
158 }
159 
160 /* Call all destructors for threadprivate data belonging to all threads.
161    Currently unused! */
__kmp_common_destroy(void)162 void __kmp_common_destroy(void) {
163   if (TCR_4(__kmp_init_common)) {
164     int q;
165 
166     TCW_4(__kmp_init_common, FALSE);
167 
168     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
169       int gtid;
170       struct private_common *tn;
171       struct shared_common *d_tn;
172 
173       /* C++ destructors need to be called once per thread before exiting.
174          Don't call destructors for master thread though unless we used copy
175          constructor */
176 
177       for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
178            d_tn = d_tn->next) {
179         if (d_tn->is_vec) {
180           if (d_tn->dt.dtorv != 0) {
181             for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
182               if (__kmp_threads[gtid]) {
183                 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
184                                        : (!KMP_UBER_GTID(gtid))) {
185                   tn = __kmp_threadprivate_find_task_common(
186                       __kmp_threads[gtid]->th.th_pri_common, gtid,
187                       d_tn->gbl_addr);
188                   if (tn) {
189                     (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
190                   }
191                 }
192               }
193             }
194             if (d_tn->obj_init != 0) {
195               (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
196             }
197           }
198         } else {
199           if (d_tn->dt.dtor != 0) {
200             for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
201               if (__kmp_threads[gtid]) {
202                 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
203                                        : (!KMP_UBER_GTID(gtid))) {
204                   tn = __kmp_threadprivate_find_task_common(
205                       __kmp_threads[gtid]->th.th_pri_common, gtid,
206                       d_tn->gbl_addr);
207                   if (tn) {
208                     (*d_tn->dt.dtor)(tn->par_addr);
209                   }
210                 }
211               }
212             }
213             if (d_tn->obj_init != 0) {
214               (*d_tn->dt.dtor)(d_tn->obj_init);
215             }
216           }
217         }
218       }
219       __kmp_threadprivate_d_table.data[q] = 0;
220     }
221   }
222 }
223 
224 /* Call all destructors for threadprivate data belonging to this thread */
__kmp_common_destroy_gtid(int gtid)225 void __kmp_common_destroy_gtid(int gtid) {
226   struct private_common *tn;
227   struct shared_common *d_tn;
228 
229   if (!TCR_4(__kmp_init_gtid)) {
230     // This is possible when one of multiple roots initiates early library
231     // termination in a sequential region while other teams are active, and its
232     // child threads are about to end.
233     return;
234   }
235 
236   KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
237   if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
238 
239     if (TCR_4(__kmp_init_common)) {
240 
241       /* Cannot do this here since not all threads have destroyed their data */
242       /* TCW_4(__kmp_init_common, FALSE); */
243 
244       for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
245 
246         d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
247                                              tn->gbl_addr);
248 
249         KMP_DEBUG_ASSERT(d_tn);
250 
251         if (d_tn->is_vec) {
252           if (d_tn->dt.dtorv != 0) {
253             (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
254           }
255           if (d_tn->obj_init != 0) {
256             (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
257           }
258         } else {
259           if (d_tn->dt.dtor != 0) {
260             (void)(*d_tn->dt.dtor)(tn->par_addr);
261           }
262           if (d_tn->obj_init != 0) {
263             (void)(*d_tn->dt.dtor)(d_tn->obj_init);
264           }
265         }
266       }
267       KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
268                     "complete\n",
269                     gtid));
270     }
271   }
272 }
273 
274 #ifdef KMP_TASK_COMMON_DEBUG
dump_list(void)275 static void dump_list(void) {
276   int p, q;
277 
278   for (p = 0; p < __kmp_all_nth; ++p) {
279     if (!__kmp_threads[p])
280       continue;
281     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
282       if (__kmp_threads[p]->th.th_pri_common->data[q]) {
283         struct private_common *tn;
284 
285         KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
286 
287         for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
288              tn = tn->next) {
289           KC_TRACE(10,
290                    ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
291                     tn->gbl_addr, tn->par_addr));
292         }
293       }
294     }
295   }
296 }
297 #endif /* KMP_TASK_COMMON_DEBUG */
298 
299 // NOTE: this routine is to be called only from the serial part of the program.
kmp_threadprivate_insert_private_data(int gtid,void * pc_addr,void * data_addr,size_t pc_size)300 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
301                                            void *data_addr, size_t pc_size) {
302   struct shared_common **lnk_tn, *d_tn;
303   KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
304                    __kmp_threads[gtid]->th.th_root->r.r_active == 0);
305 
306   d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
307                                        pc_addr);
308 
309   if (d_tn == 0) {
310     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
311 
312     d_tn->gbl_addr = pc_addr;
313     d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
314     /*
315             d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
316        zeroes the memory
317             d_tn->ct.ctor = 0;
318             d_tn->cct.cctor = 0;;
319             d_tn->dt.dtor = 0;
320             d_tn->is_vec = FALSE;
321             d_tn->vec_len = 0L;
322     */
323     d_tn->cmn_size = pc_size;
324 
325     __kmp_acquire_lock(&__kmp_global_lock, gtid);
326 
327     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
328 
329     d_tn->next = *lnk_tn;
330     *lnk_tn = d_tn;
331 
332     __kmp_release_lock(&__kmp_global_lock, gtid);
333   }
334 }
335 
kmp_threadprivate_insert(int gtid,void * pc_addr,void * data_addr,size_t pc_size)336 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
337                                                 void *data_addr,
338                                                 size_t pc_size) {
339   struct private_common *tn, **tt;
340   struct shared_common *d_tn;
341 
342   /* +++++++++ START OF CRITICAL SECTION +++++++++ */
343   __kmp_acquire_lock(&__kmp_global_lock, gtid);
344 
345   tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
346 
347   tn->gbl_addr = pc_addr;
348 
349   d_tn = __kmp_find_shared_task_common(
350       &__kmp_threadprivate_d_table, gtid,
351       pc_addr); /* Only the MASTER data table exists. */
352 
353   if (d_tn != 0) {
354     /* This threadprivate variable has already been seen. */
355 
356     if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
357       d_tn->cmn_size = pc_size;
358 
359       if (d_tn->is_vec) {
360         if (d_tn->ct.ctorv != 0) {
361           /* Construct from scratch so no prototype exists */
362           d_tn->obj_init = 0;
363         } else if (d_tn->cct.cctorv != 0) {
364           /* Now data initialize the prototype since it was previously
365            * registered */
366           d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
367           (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
368         } else {
369           d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
370         }
371       } else {
372         if (d_tn->ct.ctor != 0) {
373           /* Construct from scratch so no prototype exists */
374           d_tn->obj_init = 0;
375         } else if (d_tn->cct.cctor != 0) {
376           /* Now data initialize the prototype since it was previously
377              registered */
378           d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
379           (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
380         } else {
381           d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
382         }
383       }
384     }
385   } else {
386     struct shared_common **lnk_tn;
387 
388     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
389     d_tn->gbl_addr = pc_addr;
390     d_tn->cmn_size = pc_size;
391     d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
392     /*
393             d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
394        zeroes the memory
395             d_tn->ct.ctor = 0;
396             d_tn->cct.cctor = 0;
397             d_tn->dt.dtor = 0;
398             d_tn->is_vec = FALSE;
399             d_tn->vec_len = 0L;
400     */
401     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
402 
403     d_tn->next = *lnk_tn;
404     *lnk_tn = d_tn;
405   }
406 
407   tn->cmn_size = d_tn->cmn_size;
408 
409   if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
410     tn->par_addr = (void *)pc_addr;
411   } else {
412     tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
413   }
414 
415   __kmp_release_lock(&__kmp_global_lock, gtid);
416 /* +++++++++ END OF CRITICAL SECTION +++++++++ */
417 
418 #ifdef USE_CHECKS_COMMON
419   if (pc_size > d_tn->cmn_size) {
420     KC_TRACE(
421         10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
422              " ,%" KMP_UINTPTR_SPEC ")\n",
423              pc_addr, pc_size, d_tn->cmn_size));
424     KMP_FATAL(TPCommonBlocksInconsist);
425   }
426 #endif /* USE_CHECKS_COMMON */
427 
428   tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
429 
430 #ifdef KMP_TASK_COMMON_DEBUG
431   if (*tt != 0) {
432     KC_TRACE(
433         10,
434         ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
435          gtid, pc_addr));
436   }
437 #endif
438   tn->next = *tt;
439   *tt = tn;
440 
441 #ifdef KMP_TASK_COMMON_DEBUG
442   KC_TRACE(10,
443            ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
444             gtid, pc_addr));
445   dump_list();
446 #endif
447 
448   /* Link the node into a simple list */
449 
450   tn->link = __kmp_threads[gtid]->th.th_pri_head;
451   __kmp_threads[gtid]->th.th_pri_head = tn;
452 
453   if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
454     return tn;
455 
456   /* if C++ object with copy constructor, use it;
457    * else if C++ object with constructor, use it for the non-master copies only;
458    * else use pod_init and memcpy
459    *
460    * C++ constructors need to be called once for each non-master thread on
461    * allocate
462    * C++ copy constructors need to be called once for each thread on allocate */
463 
464   /* C++ object with constructors/destructors; don't call constructors for
465      master thread though */
466   if (d_tn->is_vec) {
467     if (d_tn->ct.ctorv != 0) {
468       (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
469     } else if (d_tn->cct.cctorv != 0) {
470       (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
471     } else if (tn->par_addr != tn->gbl_addr) {
472       __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
473     }
474   } else {
475     if (d_tn->ct.ctor != 0) {
476       (void)(*d_tn->ct.ctor)(tn->par_addr);
477     } else if (d_tn->cct.cctor != 0) {
478       (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
479     } else if (tn->par_addr != tn->gbl_addr) {
480       __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
481     }
482   }
483   /* !BUILD_OPENMP_C
484       if (tn->par_addr != tn->gbl_addr)
485           __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
486 
487   return tn;
488 }
489 
490 /* ------------------------------------------------------------------------ */
491 /* We are currently parallel, and we know the thread id.                    */
492 /* ------------------------------------------------------------------------ */
493 
494 /*!
495  @ingroup THREADPRIVATE
496 
497  @param loc source location information
498  @param data  pointer to data being privatized
499  @param ctor  pointer to constructor function for data
500  @param cctor  pointer to copy constructor function for data
501  @param dtor  pointer to destructor function for data
502 
503  Register constructors and destructors for thread private data.
504  This function is called when executing in parallel, when we know the thread id.
505 */
__kmpc_threadprivate_register(ident_t * loc,void * data,kmpc_ctor ctor,kmpc_cctor cctor,kmpc_dtor dtor)506 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
507                                    kmpc_cctor cctor, kmpc_dtor dtor) {
508   struct shared_common *d_tn, **lnk_tn;
509 
510   KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
511 
512 #ifdef USE_CHECKS_COMMON
513   /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
514   KMP_ASSERT(cctor == 0);
515 #endif /* USE_CHECKS_COMMON */
516 
517   /* Only the global data table exists. */
518   d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
519 
520   if (d_tn == 0) {
521     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
522     d_tn->gbl_addr = data;
523 
524     d_tn->ct.ctor = ctor;
525     d_tn->cct.cctor = cctor;
526     d_tn->dt.dtor = dtor;
527     /*
528             d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate
529        zeroes the memory
530             d_tn->vec_len = 0L;
531             d_tn->obj_init = 0;
532             d_tn->pod_init = 0;
533     */
534     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
535 
536     d_tn->next = *lnk_tn;
537     *lnk_tn = d_tn;
538   }
539 }
540 
__kmpc_threadprivate(ident_t * loc,kmp_int32 global_tid,void * data,size_t size)541 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
542                            size_t size) {
543   void *ret;
544   struct private_common *tn;
545 
546   KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
547 
548 #ifdef USE_CHECKS_COMMON
549   if (!__kmp_init_serial)
550     KMP_FATAL(RTLNotInitialized);
551 #endif /* USE_CHECKS_COMMON */
552 
553   if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
554     /* The parallel address will NEVER overlap with the data_address */
555     /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
556      * data_address; use data_address = data */
557 
558     KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
559                   global_tid));
560     kmp_threadprivate_insert_private_data(global_tid, data, data, size);
561 
562     ret = data;
563   } else {
564     KC_TRACE(
565         50,
566         ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
567          global_tid, data));
568     tn = __kmp_threadprivate_find_task_common(
569         __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
570 
571     if (tn) {
572       KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
573 #ifdef USE_CHECKS_COMMON
574       if ((size_t)size > tn->cmn_size) {
575         KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
576                       " ,%" KMP_UINTPTR_SPEC ")\n",
577                       data, size, tn->cmn_size));
578         KMP_FATAL(TPCommonBlocksInconsist);
579       }
580 #endif /* USE_CHECKS_COMMON */
581     } else {
582       /* The parallel address will NEVER overlap with the data_address */
583       /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
584        * data_address = data */
585       KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
586       tn = kmp_threadprivate_insert(global_tid, data, data, size);
587     }
588 
589     ret = tn->par_addr;
590   }
591   KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
592                 global_tid, ret));
593 
594   return ret;
595 }
596 
__kmp_find_cache(void * data)597 static kmp_cached_addr_t *__kmp_find_cache(void *data) {
598   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
599   while (ptr && ptr->data != data)
600     ptr = ptr->next;
601   return ptr;
602 }
603 
604 /*!
605  @ingroup THREADPRIVATE
606  @param loc source location information
607  @param global_tid  global thread number
608  @param data  pointer to data to privatize
609  @param size  size of data to privatize
610  @param cache  pointer to cache
611  @return pointer to private storage
612 
613  Allocate private storage for threadprivate data.
614 */
615 void *
__kmpc_threadprivate_cached(ident_t * loc,kmp_int32 global_tid,void * data,size_t size,void *** cache)616 __kmpc_threadprivate_cached(ident_t *loc,
617                             kmp_int32 global_tid, // gtid.
618                             void *data, // Pointer to original global variable.
619                             size_t size, // Size of original global variable.
620                             void ***cache) {
621   KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
622                 "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
623                 global_tid, *cache, data, size));
624 
625   if (TCR_PTR(*cache) == 0) {
626     __kmp_acquire_lock(&__kmp_global_lock, global_tid);
627 
628     if (TCR_PTR(*cache) == 0) {
629       __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
630       // Compiler often passes in NULL cache, even if it's already been created
631       void **my_cache;
632       kmp_cached_addr_t *tp_cache_addr;
633       // Look for an existing cache
634       tp_cache_addr = __kmp_find_cache(data);
635       if (!tp_cache_addr) { // Cache was never created; do it now
636         __kmp_tp_cached = 1;
637         KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
638                            sizeof(void *) * __kmp_tp_capacity +
639                            sizeof(kmp_cached_addr_t)););
640         // No need to zero the allocated memory; __kmp_allocate does that.
641         KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
642                       "address %p\n",
643                       global_tid, my_cache));
644         /* TODO: free all this memory in __kmp_common_destroy using
645          * __kmp_threadpriv_cache_list */
646         /* Add address of mycache to linked list for cleanup later  */
647         tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
648         tp_cache_addr->addr = my_cache;
649         tp_cache_addr->data = data;
650         tp_cache_addr->compiler_cache = cache;
651         tp_cache_addr->next = __kmp_threadpriv_cache_list;
652         __kmp_threadpriv_cache_list = tp_cache_addr;
653       } else { // A cache was already created; use it
654         my_cache = tp_cache_addr->addr;
655         tp_cache_addr->compiler_cache = cache;
656       }
657       KMP_MB();
658 
659       TCW_PTR(*cache, my_cache);
660       __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
661 
662       KMP_MB();
663     }
664     __kmp_release_lock(&__kmp_global_lock, global_tid);
665   }
666 
667   void *ret;
668   if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
669     ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
670 
671     TCW_PTR((*cache)[global_tid], ret);
672   }
673   KC_TRACE(10,
674            ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
675             global_tid, ret));
676   return ret;
677 }
678 
679 // This function should only be called when both __kmp_tp_cached_lock and
680 // kmp_forkjoin_lock are held.
__kmp_threadprivate_resize_cache(int newCapacity)681 void __kmp_threadprivate_resize_cache(int newCapacity) {
682   KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
683                 newCapacity));
684 
685   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
686 
687   while (ptr) {
688     if (ptr->data) { // this location has an active cache; resize it
689       void **my_cache;
690       KMP_ITT_IGNORE(my_cache =
691                          (void **)__kmp_allocate(sizeof(void *) * newCapacity +
692                                                  sizeof(kmp_cached_addr_t)););
693       // No need to zero the allocated memory; __kmp_allocate does that.
694       KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
695                     my_cache));
696       // Now copy old cache into new cache
697       void **old_cache = ptr->addr;
698       for (int i = 0; i < __kmp_tp_capacity; ++i) {
699         my_cache[i] = old_cache[i];
700       }
701 
702       // Add address of new my_cache to linked list for cleanup later
703       kmp_cached_addr_t *tp_cache_addr;
704       tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
705       tp_cache_addr->addr = my_cache;
706       tp_cache_addr->data = ptr->data;
707       tp_cache_addr->compiler_cache = ptr->compiler_cache;
708       tp_cache_addr->next = __kmp_threadpriv_cache_list;
709       __kmp_threadpriv_cache_list = tp_cache_addr;
710 
711       // Copy new cache to compiler's location: We can copy directly
712       // to (*compiler_cache) if compiler guarantees it will keep
713       // using the same location for the cache. This is not yet true
714       // for some compilers, in which case we have to check if
715       // compiler_cache is still pointing at old cache, and if so, we
716       // can point it at the new cache with an atomic compare&swap
717       // operation. (Old method will always work, but we should shift
718       // to new method (commented line below) when Intel and Clang
719       // compilers use new method.)
720       (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
721                                       my_cache);
722       // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
723 
724       // If the store doesn't happen here, the compiler's old behavior will
725       // inevitably call __kmpc_threadprivate_cache with a new location for the
726       // cache, and that function will store the resized cache there at that
727       // point.
728 
729       // Nullify old cache's data pointer so we skip it next time
730       ptr->data = NULL;
731     }
732     ptr = ptr->next;
733   }
734   // After all caches are resized, update __kmp_tp_capacity to the new size
735   *(volatile int *)&__kmp_tp_capacity = newCapacity;
736 }
737 
738 /*!
739  @ingroup THREADPRIVATE
740  @param loc source location information
741  @param data  pointer to data being privatized
742  @param ctor  pointer to constructor function for data
743  @param cctor  pointer to copy constructor function for data
744  @param dtor  pointer to destructor function for data
745  @param vector_length length of the vector (bytes or elements?)
746  Register vector constructors and destructors for thread private data.
747 */
__kmpc_threadprivate_register_vec(ident_t * loc,void * data,kmpc_ctor_vec ctor,kmpc_cctor_vec cctor,kmpc_dtor_vec dtor,size_t vector_length)748 void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
749                                        kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
750                                        kmpc_dtor_vec dtor,
751                                        size_t vector_length) {
752   struct shared_common *d_tn, **lnk_tn;
753 
754   KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
755 
756 #ifdef USE_CHECKS_COMMON
757   /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
758   KMP_ASSERT(cctor == 0);
759 #endif /* USE_CHECKS_COMMON */
760 
761   d_tn = __kmp_find_shared_task_common(
762       &__kmp_threadprivate_d_table, -1,
763       data); /* Only the global data table exists. */
764 
765   if (d_tn == 0) {
766     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
767     d_tn->gbl_addr = data;
768 
769     d_tn->ct.ctorv = ctor;
770     d_tn->cct.cctorv = cctor;
771     d_tn->dt.dtorv = dtor;
772     d_tn->is_vec = TRUE;
773     d_tn->vec_len = (size_t)vector_length;
774     // d_tn->obj_init = 0;  // AC: __kmp_allocate zeroes the memory
775     // d_tn->pod_init = 0;
776     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
777 
778     d_tn->next = *lnk_tn;
779     *lnk_tn = d_tn;
780   }
781 }
782 
__kmp_cleanup_threadprivate_caches()783 void __kmp_cleanup_threadprivate_caches() {
784   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
785 
786   while (ptr) {
787     void **cache = ptr->addr;
788     __kmp_threadpriv_cache_list = ptr->next;
789     if (*ptr->compiler_cache)
790       *ptr->compiler_cache = NULL;
791     ptr->compiler_cache = NULL;
792     ptr->data = NULL;
793     ptr->addr = NULL;
794     ptr->next = NULL;
795     // Threadprivate data pointed at by cache entries are destroyed at end of
796     // __kmp_launch_thread with __kmp_common_destroy_gtid.
797     __kmp_free(cache); // implicitly frees ptr too
798     ptr = __kmp_threadpriv_cache_list;
799   }
800 }
801