1 /*
2 * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for details.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef __STDC_FORMAT_MACROS
14 #define __STDC_FORMAT_MACROS
15 #endif
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <cstdlib>
21 #include <cstring>
22 #include <inttypes.h>
23 #include <iostream>
24 #include <list>
25 #include <mutex>
26 #include <sstream>
27 #include <string>
28 #include <sys/resource.h>
29 #include <unistd.h>
30 #include <unordered_map>
31 #include <vector>
32
33 #if (defined __APPLE__ && defined __MACH__)
34 #include <dlfcn.h>
35 #endif
36
37 #include "omp-tools.h"
38
39 // Define attribute that indicates that the fall through from the previous
40 // case label is intentional and should not be diagnosed by a compiler
41 // Code from libcxx/include/__config
42 // Use a function like macro to imply that it must be followed by a semicolon
43 #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
44 #define KMP_FALLTHROUGH() [[fallthrough]]
45 // icc cannot properly tell this attribute is absent so force off
46 #elif defined(__INTEL_COMPILER)
47 #define KMP_FALLTHROUGH() ((void)0)
48 #elif __has_cpp_attribute(clang::fallthrough)
49 #define KMP_FALLTHROUGH() [[clang::fallthrough]]
50 #elif __has_attribute(fallthrough) || __GNUC__ >= 7
51 #define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
52 #else
53 #define KMP_FALLTHROUGH() ((void)0)
54 #endif
55
56 static int runOnTsan;
57 static int hasReductionCallback;
58
59 class ArcherFlags {
60 public:
61 #if (LLVM_VERSION) >= 40
62 int flush_shadow{0};
63 #endif
64 int print_max_rss{0};
65 int verbose{0};
66 int enabled{1};
67 int report_data_leak{0};
68 int ignore_serial{0};
69
ArcherFlags(const char * env)70 ArcherFlags(const char *env) {
71 if (env) {
72 std::vector<std::string> tokens;
73 std::string token;
74 std::string str(env);
75 std::istringstream iss(str);
76 while (std::getline(iss, token, ' '))
77 tokens.push_back(token);
78
79 for (std::vector<std::string>::iterator it = tokens.begin();
80 it != tokens.end(); ++it) {
81 #if (LLVM_VERSION) >= 40
82 if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow))
83 continue;
84 #endif
85 if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss))
86 continue;
87 if (sscanf(it->c_str(), "verbose=%d", &verbose))
88 continue;
89 if (sscanf(it->c_str(), "report_data_leak=%d", &report_data_leak))
90 continue;
91 if (sscanf(it->c_str(), "enable=%d", &enabled))
92 continue;
93 if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial))
94 continue;
95 std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
96 << std::endl;
97 }
98 }
99 }
100 };
101
102 class TsanFlags {
103 public:
104 int ignore_noninstrumented_modules;
105
TsanFlags(const char * env)106 TsanFlags(const char *env) : ignore_noninstrumented_modules(0) {
107 if (env) {
108 std::vector<std::string> tokens;
109 std::string str(env);
110 auto end = str.end();
111 auto it = str.begin();
112 auto is_sep = [](char c) {
113 return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' ||
114 c == '\r';
115 };
116 while (it != end) {
117 auto next_it = std::find_if(it, end, is_sep);
118 tokens.emplace_back(it, next_it);
119 it = next_it;
120 if (it != end) {
121 ++it;
122 }
123 }
124
125 for (const auto &token : tokens) {
126 // we are interested in ignore_noninstrumented_modules to print a
127 // warning
128 if (sscanf(token.c_str(), "ignore_noninstrumented_modules=%d",
129 &ignore_noninstrumented_modules))
130 continue;
131 }
132 }
133 }
134 };
135
136 #if (LLVM_VERSION) >= 40
137 extern "C" {
138 int __attribute__((weak)) __archer_get_omp_status();
__tsan_flush_memory()139 void __attribute__((weak)) __tsan_flush_memory() {}
140 }
141 #endif
142 ArcherFlags *archer_flags;
143
144 #ifndef TsanHappensBefore
145 // Thread Sanitizer is a tool that finds races in code.
146 // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
147 // tsan detects these exact functions by name.
148 extern "C" {
149 #if (defined __APPLE__ && defined __MACH__)
150 static void (*AnnotateHappensAfter)(const char *, int, const volatile void *);
151 static void (*AnnotateHappensBefore)(const char *, int, const volatile void *);
152 static void (*AnnotateIgnoreWritesBegin)(const char *, int);
153 static void (*AnnotateIgnoreWritesEnd)(const char *, int);
154 static void (*AnnotateNewMemory)(const char *, int, const volatile void *,
155 size_t);
156 static void (*__tsan_func_entry)(const void *);
157 static void (*__tsan_func_exit)(void);
158
RunningOnValgrind()159 static int RunningOnValgrind() {
160 int (*fptr)();
161
162 fptr = (int (*)())dlsym(RTLD_DEFAULT, "RunningOnValgrind");
163 // If we found RunningOnValgrind other than this function, we assume
164 // Annotation functions present in this execution and leave runOnTsan=1
165 // otherwise we change to runOnTsan=0
166 if (!fptr || fptr == RunningOnValgrind)
167 runOnTsan = 0;
168 return 0;
169 }
170 #else
171 void __attribute__((weak))
172 AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {}
173 void __attribute__((weak))
174 AnnotateHappensBefore(const char *file, int line, const volatile void *cv) {}
175 void __attribute__((weak))
176 AnnotateIgnoreWritesBegin(const char *file, int line) {}
177 void __attribute__((weak)) AnnotateIgnoreWritesEnd(const char *file, int line) {
178 }
179 void __attribute__((weak))
180 AnnotateNewMemory(const char *file, int line, const volatile void *cv,
181 size_t size) {}
182 int __attribute__((weak)) RunningOnValgrind() {
183 runOnTsan = 0;
184 return 0;
185 }
186 void __attribute__((weak)) __tsan_func_entry(const void *call_pc) {}
187 void __attribute__((weak)) __tsan_func_exit(void) {}
188 #endif
189 }
190
191 // This marker is used to define a happens-before arc. The race detector will
192 // infer an arc from the begin to the end when they share the same pointer
193 // argument.
194 #define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
195
196 // This marker defines the destination of a happens-before arc.
197 #define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
198
199 // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
200 #define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
201
202 // Resume checking for racy writes.
203 #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
204
205 // We don't really delete the clock for now
206 #define TsanDeleteClock(cv)
207
208 // newMemory
209 #define TsanNewMemory(addr, size) \
210 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
211 #define TsanFreeMemory(addr, size) \
212 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
213 #endif
214
215 // Function entry/exit
216 #define TsanFuncEntry(pc) __tsan_func_entry(pc)
217 #define TsanFuncExit() __tsan_func_exit()
218
219 /// Required OMPT inquiry functions.
220 static ompt_get_parallel_info_t ompt_get_parallel_info;
221 static ompt_get_thread_data_t ompt_get_thread_data;
222
223 typedef char ompt_tsan_clockid;
224
my_next_id()225 static uint64_t my_next_id() {
226 static uint64_t ID = 0;
227 uint64_t ret = __sync_fetch_and_add(&ID, 1);
228 return ret;
229 }
230
231 static int pagesize{0};
232
233 // Data structure to provide a threadsafe pool of reusable objects.
234 // DataPool<Type of objects>
235 template <typename T> struct DataPool final {
236 static __thread DataPool<T> *ThreadDataPool;
237 std::mutex DPMutex{};
238
239 // store unused objects
240 std::vector<T *> DataPointer{};
241 std::vector<T *> RemoteDataPointer{};
242
243 // store all allocated memory to finally release
244 std::list<void *> memory;
245
246 // count remotely returned data (RemoteDataPointer.size())
247 std::atomic<int> remote{0};
248
249 // totally allocated data objects in pool
250 int total{0};
251 #ifdef DEBUG_DATA
252 int remoteReturn{0};
253 int localReturn{0};
254
getRemoteDataPool255 int getRemote() { return remoteReturn + remote; }
getLocalDataPool256 int getLocal() { return localReturn; }
257 #endif
getTotalDataPool258 int getTotal() { return total; }
getMissingDataPool259 int getMissing() {
260 return total - DataPointer.size() - RemoteDataPointer.size();
261 }
262
263 // fill the pool by allocating a page of memory
newDatasDataPool264 void newDatas() {
265 if (remote > 0) {
266 const std::lock_guard<std::mutex> lock(DPMutex);
267 // DataPointer is empty, so just swap the vectors
268 DataPointer.swap(RemoteDataPointer);
269 remote = 0;
270 return;
271 }
272 // calculate size of an object including padding to cacheline size
273 size_t elemSize = sizeof(T);
274 size_t paddedSize = (((elemSize - 1) / 64) + 1) * 64;
275 // number of padded elements to allocate
276 int ndatas = pagesize / paddedSize;
277 char *datas = (char *)malloc(ndatas * paddedSize);
278 memory.push_back(datas);
279 for (int i = 0; i < ndatas; i++) {
280 DataPointer.push_back(new (datas + i * paddedSize) T(this));
281 }
282 total += ndatas;
283 }
284
285 // get data from the pool
getDataDataPool286 T *getData() {
287 T *ret;
288 if (DataPointer.empty())
289 newDatas();
290 ret = DataPointer.back();
291 DataPointer.pop_back();
292 return ret;
293 }
294
295 // accesses to the thread-local datapool don't need locks
returnOwnDataDataPool296 void returnOwnData(T *data) {
297 DataPointer.emplace_back(data);
298 #ifdef DEBUG_DATA
299 localReturn++;
300 #endif
301 }
302
303 // returning to a remote datapool using lock
returnDataDataPool304 void returnData(T *data) {
305 const std::lock_guard<std::mutex> lock(DPMutex);
306 RemoteDataPointer.emplace_back(data);
307 remote++;
308 #ifdef DEBUG_DATA
309 remoteReturn++;
310 #endif
311 }
312
~DataPoolDataPool313 ~DataPool() {
314 // we assume all memory is returned when the thread finished / destructor is
315 // called
316 if (archer_flags->report_data_leak && getMissing() != 0) {
317 printf("ERROR: While freeing DataPool (%s) we are missing %i data "
318 "objects.\n",
319 __PRETTY_FUNCTION__, getMissing());
320 exit(-3);
321 }
322 for (auto i : DataPointer)
323 if (i)
324 i->~T();
325 for (auto i : RemoteDataPointer)
326 if (i)
327 i->~T();
328 for (auto i : memory)
329 if (i)
330 free(i);
331 }
332 };
333
334 template <typename T> struct DataPoolEntry {
335 DataPool<T> *owner;
336
NewDataPoolEntry337 static T *New() { return DataPool<T>::ThreadDataPool->getData(); }
338
DeleteDataPoolEntry339 void Delete() {
340 static_cast<T *>(this)->Reset();
341 if (owner == DataPool<T>::ThreadDataPool)
342 owner->returnOwnData(static_cast<T *>(this));
343 else
344 owner->returnData(static_cast<T *>(this));
345 }
346
DataPoolEntryDataPoolEntry347 DataPoolEntry(DataPool<T> *dp) : owner(dp) {}
348 };
349
350 struct DependencyData;
351 typedef DataPool<DependencyData> DependencyDataPool;
352 template <>
353 __thread DependencyDataPool *DependencyDataPool::ThreadDataPool = nullptr;
354
355 /// Data structure to store additional information for task dependency.
356 struct DependencyData final : DataPoolEntry<DependencyData> {
357 ompt_tsan_clockid in;
358 ompt_tsan_clockid out;
359 ompt_tsan_clockid inoutset;
GetInPtrDependencyData360 void *GetInPtr() { return ∈ }
GetOutPtrDependencyData361 void *GetOutPtr() { return &out; }
GetInoutsetPtrDependencyData362 void *GetInoutsetPtr() { return &inoutset; }
363
ResetDependencyData364 void Reset() {}
365
NewDependencyData366 static DependencyData *New() { return DataPoolEntry<DependencyData>::New(); }
367
DependencyDataDependencyData368 DependencyData(DataPool<DependencyData> *dp)
369 : DataPoolEntry<DependencyData>(dp) {}
370 };
371
372 struct TaskDependency {
373 void *inPtr;
374 void *outPtr;
375 void *inoutsetPtr;
376 ompt_dependence_type_t type;
TaskDependencyTaskDependency377 TaskDependency(DependencyData *depData, ompt_dependence_type_t type)
378 : inPtr(depData->GetInPtr()), outPtr(depData->GetOutPtr()),
379 inoutsetPtr(depData->GetInoutsetPtr()), type(type) {}
AnnotateBeginTaskDependency380 void AnnotateBegin() {
381 if (type == ompt_dependence_type_out ||
382 type == ompt_dependence_type_inout ||
383 type == ompt_dependence_type_mutexinoutset) {
384 TsanHappensAfter(inPtr);
385 TsanHappensAfter(outPtr);
386 TsanHappensAfter(inoutsetPtr);
387 } else if (type == ompt_dependence_type_in) {
388 TsanHappensAfter(outPtr);
389 TsanHappensAfter(inoutsetPtr);
390 } else if (type == ompt_dependence_type_inoutset) {
391 TsanHappensAfter(inPtr);
392 TsanHappensAfter(outPtr);
393 }
394 }
AnnotateEndTaskDependency395 void AnnotateEnd() {
396 if (type == ompt_dependence_type_out ||
397 type == ompt_dependence_type_inout ||
398 type == ompt_dependence_type_mutexinoutset) {
399 TsanHappensBefore(outPtr);
400 } else if (type == ompt_dependence_type_in) {
401 TsanHappensBefore(inPtr);
402 } else if (type == ompt_dependence_type_inoutset) {
403 TsanHappensBefore(inoutsetPtr);
404 }
405 }
406 };
407
408 struct ParallelData;
409 typedef DataPool<ParallelData> ParallelDataPool;
410 template <>
411 __thread ParallelDataPool *ParallelDataPool::ThreadDataPool = nullptr;
412
413 /// Data structure to store additional information for parallel regions.
414 struct ParallelData final : DataPoolEntry<ParallelData> {
415
416 // Parallel fork is just another barrier, use Barrier[1]
417
418 /// Two addresses for relationships with barriers.
419 ompt_tsan_clockid Barrier[2];
420
421 const void *codePtr;
422
GetParallelPtrParallelData423 void *GetParallelPtr() { return &(Barrier[1]); }
424
GetBarrierPtrParallelData425 void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); }
426
InitParallelData427 ParallelData *Init(const void *codeptr) {
428 codePtr = codeptr;
429 return this;
430 }
431
ResetParallelData432 void Reset() {}
433
NewParallelData434 static ParallelData *New(const void *codeptr) {
435 return DataPoolEntry<ParallelData>::New()->Init(codeptr);
436 }
437
ParallelDataParallelData438 ParallelData(DataPool<ParallelData> *dp) : DataPoolEntry<ParallelData>(dp) {}
439 };
440
ToParallelData(ompt_data_t * parallel_data)441 static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
442 return reinterpret_cast<ParallelData *>(parallel_data->ptr);
443 }
444
445 struct Taskgroup;
446 typedef DataPool<Taskgroup> TaskgroupPool;
447 template <> __thread TaskgroupPool *TaskgroupPool::ThreadDataPool = nullptr;
448
449 /// Data structure to support stacking of taskgroups and allow synchronization.
450 struct Taskgroup final : DataPoolEntry<Taskgroup> {
451 /// Its address is used for relationships of the taskgroup's task set.
452 ompt_tsan_clockid Ptr;
453
454 /// Reference to the parent taskgroup.
455 Taskgroup *Parent;
456
GetPtrTaskgroup457 void *GetPtr() { return &Ptr; }
458
InitTaskgroup459 Taskgroup *Init(Taskgroup *parent) {
460 Parent = parent;
461 return this;
462 }
463
ResetTaskgroup464 void Reset() {}
465
NewTaskgroup466 static Taskgroup *New(Taskgroup *Parent) {
467 return DataPoolEntry<Taskgroup>::New()->Init(Parent);
468 }
469
TaskgroupTaskgroup470 Taskgroup(DataPool<Taskgroup> *dp) : DataPoolEntry<Taskgroup>(dp) {}
471 };
472
473 struct TaskData;
474 typedef DataPool<TaskData> TaskDataPool;
475 template <> __thread TaskDataPool *TaskDataPool::ThreadDataPool = nullptr;
476
477 /// Data structure to store additional information for tasks.
478 struct TaskData final : DataPoolEntry<TaskData> {
479 /// Its address is used for relationships of this task.
480 ompt_tsan_clockid Task{0};
481
482 /// Child tasks use its address to declare a relationship to a taskwait in
483 /// this task.
484 ompt_tsan_clockid Taskwait{0};
485
486 /// Whether this task is currently executing a barrier.
487 bool InBarrier{false};
488
489 /// Whether this task is an included task.
490 int TaskType{0};
491
492 /// count execution phase
493 int execution{0};
494
495 /// Index of which barrier to use next.
496 char BarrierIndex{0};
497
498 /// Count how often this structure has been put into child tasks + 1.
499 std::atomic_int RefCount{1};
500
501 /// Reference to the parent that created this task.
502 TaskData *Parent{nullptr};
503
504 /// Reference to the implicit task in the stack above this task.
505 TaskData *ImplicitTask{nullptr};
506
507 /// Reference to the team of this task.
508 ParallelData *Team{nullptr};
509
510 /// Reference to the current taskgroup that this task either belongs to or
511 /// that it just created.
512 Taskgroup *TaskGroup{nullptr};
513
514 /// Dependency information for this task.
515 TaskDependency *Dependencies{nullptr};
516
517 /// Number of dependency entries.
518 unsigned DependencyCount{0};
519
520 // The dependency-map stores DependencyData objects representing
521 // the dependency variables used on the sibling tasks created from
522 // this task
523 // We expect a rare need for the dependency-map, so alloc on demand
524 std::unordered_map<void *, DependencyData *> *DependencyMap{nullptr};
525
526 #ifdef DEBUG
527 int freed{0};
528 #endif
529
isIncludedTaskData530 bool isIncluded() { return TaskType & ompt_task_undeferred; }
isUntiedTaskData531 bool isUntied() { return TaskType & ompt_task_untied; }
isFinalTaskData532 bool isFinal() { return TaskType & ompt_task_final; }
isMergableTaskData533 bool isMergable() { return TaskType & ompt_task_mergeable; }
isMergedTaskData534 bool isMerged() { return TaskType & ompt_task_merged; }
535
isExplicitTaskData536 bool isExplicit() { return TaskType & ompt_task_explicit; }
isImplicitTaskData537 bool isImplicit() { return TaskType & ompt_task_implicit; }
isInitialTaskData538 bool isInitial() { return TaskType & ompt_task_initial; }
isTargetTaskData539 bool isTarget() { return TaskType & ompt_task_target; }
540
GetTaskPtrTaskData541 void *GetTaskPtr() { return &Task; }
542
GetTaskwaitPtrTaskData543 void *GetTaskwaitPtr() { return &Taskwait; }
544
InitTaskData545 TaskData *Init(TaskData *parent, int taskType) {
546 TaskType = taskType;
547 Parent = parent;
548 Team = Parent->Team;
549 if (Parent != nullptr) {
550 Parent->RefCount++;
551 // Copy over pointer to taskgroup. This task may set up its own stack
552 // but for now belongs to its parent's taskgroup.
553 TaskGroup = Parent->TaskGroup;
554 }
555 return this;
556 }
557
InitTaskData558 TaskData *Init(ParallelData *team, int taskType) {
559 TaskType = taskType;
560 execution = 1;
561 ImplicitTask = this;
562 Team = team;
563 return this;
564 }
565
ResetTaskData566 void Reset() {
567 InBarrier = false;
568 TaskType = 0;
569 execution = 0;
570 BarrierIndex = 0;
571 RefCount = 1;
572 Parent = nullptr;
573 ImplicitTask = nullptr;
574 Team = nullptr;
575 TaskGroup = nullptr;
576 if (DependencyMap) {
577 for (auto i : *DependencyMap)
578 i.second->Delete();
579 delete DependencyMap;
580 }
581 DependencyMap = nullptr;
582 if (Dependencies)
583 free(Dependencies);
584 Dependencies = nullptr;
585 DependencyCount = 0;
586 #ifdef DEBUG
587 freed = 0;
588 #endif
589 }
590
NewTaskData591 static TaskData *New(TaskData *parent, int taskType) {
592 return DataPoolEntry<TaskData>::New()->Init(parent, taskType);
593 }
594
NewTaskData595 static TaskData *New(ParallelData *team, int taskType) {
596 return DataPoolEntry<TaskData>::New()->Init(team, taskType);
597 }
598
TaskDataTaskData599 TaskData(DataPool<TaskData> *dp) : DataPoolEntry<TaskData>(dp) {}
600 };
601
ToTaskData(ompt_data_t * task_data)602 static inline TaskData *ToTaskData(ompt_data_t *task_data) {
603 return reinterpret_cast<TaskData *>(task_data->ptr);
604 }
605
606 /// Store a mutex for each wait_id to resolve race condition with callbacks.
607 std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
608 std::mutex LocksMutex;
609
ompt_tsan_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)610 static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
611 ompt_data_t *thread_data) {
612 ParallelDataPool::ThreadDataPool = new ParallelDataPool;
613 TsanNewMemory(ParallelDataPool::ThreadDataPool,
614 sizeof(ParallelDataPool::ThreadDataPool));
615 TaskgroupPool::ThreadDataPool = new TaskgroupPool;
616 TsanNewMemory(TaskgroupPool::ThreadDataPool,
617 sizeof(TaskgroupPool::ThreadDataPool));
618 TaskDataPool::ThreadDataPool = new TaskDataPool;
619 TsanNewMemory(TaskDataPool::ThreadDataPool,
620 sizeof(TaskDataPool::ThreadDataPool));
621 DependencyDataPool::ThreadDataPool = new DependencyDataPool;
622 TsanNewMemory(DependencyDataPool::ThreadDataPool,
623 sizeof(DependencyDataPool::ThreadDataPool));
624 thread_data->value = my_next_id();
625 }
626
ompt_tsan_thread_end(ompt_data_t * thread_data)627 static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
628 TsanIgnoreWritesBegin();
629 delete ParallelDataPool::ThreadDataPool;
630 delete TaskgroupPool::ThreadDataPool;
631 delete TaskDataPool::ThreadDataPool;
632 delete DependencyDataPool::ThreadDataPool;
633 TsanIgnoreWritesEnd();
634 }
635
636 /// OMPT event callbacks for handling parallel regions.
637
ompt_tsan_parallel_begin(ompt_data_t * parent_task_data,const ompt_frame_t * parent_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int flag,const void * codeptr_ra)638 static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
639 const ompt_frame_t *parent_task_frame,
640 ompt_data_t *parallel_data,
641 uint32_t requested_team_size, int flag,
642 const void *codeptr_ra) {
643 ParallelData *Data = ParallelData::New(codeptr_ra);
644 parallel_data->ptr = Data;
645
646 TsanHappensBefore(Data->GetParallelPtr());
647 if (archer_flags->ignore_serial && ToTaskData(parent_task_data)->isInitial())
648 TsanIgnoreWritesEnd();
649 }
650
ompt_tsan_parallel_end(ompt_data_t * parallel_data,ompt_data_t * task_data,int flag,const void * codeptr_ra)651 static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
652 ompt_data_t *task_data, int flag,
653 const void *codeptr_ra) {
654 if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial())
655 TsanIgnoreWritesBegin();
656 ParallelData *Data = ToParallelData(parallel_data);
657 TsanHappensAfter(Data->GetBarrierPtr(0));
658 TsanHappensAfter(Data->GetBarrierPtr(1));
659
660 Data->Delete();
661
662 #if (LLVM_VERSION >= 40)
663 if (&__archer_get_omp_status) {
664 if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow)
665 __tsan_flush_memory();
666 }
667 #endif
668 }
669
ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int type)670 static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
671 ompt_data_t *parallel_data,
672 ompt_data_t *task_data,
673 unsigned int team_size,
674 unsigned int thread_num, int type) {
675 switch (endpoint) {
676 case ompt_scope_begin:
677 if (type & ompt_task_initial) {
678 parallel_data->ptr = ParallelData::New(nullptr);
679 }
680 task_data->ptr = TaskData::New(ToParallelData(parallel_data), type);
681 TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
682 TsanFuncEntry(ToParallelData(parallel_data)->codePtr);
683 break;
684 case ompt_scope_end: {
685 TaskData *Data = ToTaskData(task_data);
686 #ifdef DEBUG
687 assert(Data->freed == 0 && "Implicit task end should only be called once!");
688 Data->freed = 1;
689 #endif
690 assert(Data->RefCount == 1 &&
691 "All tasks should have finished at the implicit barrier!");
692 if (type & ompt_task_initial) {
693 Data->Team->Delete();
694 }
695 Data->Delete();
696 TsanFuncExit();
697 break;
698 }
699 case ompt_scope_beginend:
700 // Should not occur according to OpenMP 5.1
701 // Tested in OMPT tests
702 break;
703 }
704 }
705
ompt_tsan_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)706 static void ompt_tsan_sync_region(ompt_sync_region_t kind,
707 ompt_scope_endpoint_t endpoint,
708 ompt_data_t *parallel_data,
709 ompt_data_t *task_data,
710 const void *codeptr_ra) {
711 TaskData *Data = ToTaskData(task_data);
712 switch (endpoint) {
713 case ompt_scope_begin:
714 case ompt_scope_beginend:
715 TsanFuncEntry(codeptr_ra);
716 switch (kind) {
717 case ompt_sync_region_barrier_implementation:
718 case ompt_sync_region_barrier_implicit:
719 case ompt_sync_region_barrier_explicit:
720 case ompt_sync_region_barrier_implicit_parallel:
721 case ompt_sync_region_barrier_implicit_workshare:
722 case ompt_sync_region_barrier_teams:
723 case ompt_sync_region_barrier: {
724 char BarrierIndex = Data->BarrierIndex;
725 TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex));
726
727 if (hasReductionCallback < ompt_set_always) {
728 // We ignore writes inside the barrier. These would either occur during
729 // 1. reductions performed by the runtime which are guaranteed to be
730 // race-free.
731 // 2. execution of another task.
732 // For the latter case we will re-enable tracking in task_switch.
733 Data->InBarrier = true;
734 TsanIgnoreWritesBegin();
735 }
736
737 break;
738 }
739
740 case ompt_sync_region_taskwait:
741 break;
742
743 case ompt_sync_region_taskgroup:
744 Data->TaskGroup = Taskgroup::New(Data->TaskGroup);
745 break;
746
747 case ompt_sync_region_reduction:
748 // should never be reached
749 break;
750 }
751 if (endpoint == ompt_scope_begin)
752 break;
753 KMP_FALLTHROUGH();
754 case ompt_scope_end:
755 TsanFuncExit();
756 switch (kind) {
757 case ompt_sync_region_barrier_implementation:
758 case ompt_sync_region_barrier_implicit:
759 case ompt_sync_region_barrier_explicit:
760 case ompt_sync_region_barrier_implicit_parallel:
761 case ompt_sync_region_barrier_implicit_workshare:
762 case ompt_sync_region_barrier_teams:
763 case ompt_sync_region_barrier: {
764 if (hasReductionCallback < ompt_set_always) {
765 // We want to track writes after the barrier again.
766 Data->InBarrier = false;
767 TsanIgnoreWritesEnd();
768 }
769
770 char BarrierIndex = Data->BarrierIndex;
771 // Barrier will end after it has been entered by all threads.
772 if (parallel_data)
773 TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex));
774
775 // It is not guaranteed that all threads have exited this barrier before
776 // we enter the next one. So we will use a different address.
777 // We are however guaranteed that this current barrier is finished
778 // by the time we exit the next one. So we can then reuse the first
779 // address.
780 Data->BarrierIndex = (BarrierIndex + 1) % 2;
781 break;
782 }
783
784 case ompt_sync_region_taskwait: {
785 if (Data->execution > 1)
786 TsanHappensAfter(Data->GetTaskwaitPtr());
787 break;
788 }
789
790 case ompt_sync_region_taskgroup: {
791 assert(Data->TaskGroup != nullptr &&
792 "Should have at least one taskgroup!");
793
794 TsanHappensAfter(Data->TaskGroup->GetPtr());
795
796 // Delete this allocated taskgroup, all descendent task are finished by
797 // now.
798 Taskgroup *Parent = Data->TaskGroup->Parent;
799 Data->TaskGroup->Delete();
800 Data->TaskGroup = Parent;
801 break;
802 }
803
804 case ompt_sync_region_reduction:
805 // Should not occur according to OpenMP 5.1
806 // Tested in OMPT tests
807 break;
808 }
809 break;
810 }
811 }
812
ompt_tsan_reduction(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)813 static void ompt_tsan_reduction(ompt_sync_region_t kind,
814 ompt_scope_endpoint_t endpoint,
815 ompt_data_t *parallel_data,
816 ompt_data_t *task_data,
817 const void *codeptr_ra) {
818 switch (endpoint) {
819 case ompt_scope_begin:
820 switch (kind) {
821 case ompt_sync_region_reduction:
822 TsanIgnoreWritesBegin();
823 break;
824 default:
825 break;
826 }
827 break;
828 case ompt_scope_end:
829 switch (kind) {
830 case ompt_sync_region_reduction:
831 TsanIgnoreWritesEnd();
832 break;
833 default:
834 break;
835 }
836 break;
837 case ompt_scope_beginend:
838 // Should not occur according to OpenMP 5.1
839 // Tested in OMPT tests
840 // Would have no implications for DR detection
841 break;
842 }
843 }
844
845 /// OMPT event callbacks for handling tasks.
846
ompt_tsan_task_create(ompt_data_t * parent_task_data,const ompt_frame_t * parent_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)847 static void ompt_tsan_task_create(
848 ompt_data_t *parent_task_data, /* id of parent task */
849 const ompt_frame_t *parent_frame, /* frame data for parent task */
850 ompt_data_t *new_task_data, /* id of created task */
851 int type, int has_dependences,
852 const void *codeptr_ra) /* pointer to outlined function */
853 {
854 TaskData *Data;
855 assert(new_task_data->ptr == NULL &&
856 "Task data should be initialized to NULL");
857 if (type & ompt_task_initial) {
858 ompt_data_t *parallel_data;
859 int team_size = 1;
860 ompt_get_parallel_info(0, ¶llel_data, &team_size);
861 ParallelData *PData = ParallelData::New(nullptr);
862 parallel_data->ptr = PData;
863
864 Data = TaskData::New(PData, type);
865 new_task_data->ptr = Data;
866 } else if (type & ompt_task_undeferred) {
867 Data = TaskData::New(ToTaskData(parent_task_data), type);
868 new_task_data->ptr = Data;
869 } else if (type & ompt_task_explicit || type & ompt_task_target) {
870 Data = TaskData::New(ToTaskData(parent_task_data), type);
871 new_task_data->ptr = Data;
872
873 // Use the newly created address. We cannot use a single address from the
874 // parent because that would declare wrong relationships with other
875 // sibling tasks that may be created before this task is started!
876 TsanHappensBefore(Data->GetTaskPtr());
877 ToTaskData(parent_task_data)->execution++;
878 }
879 }
880
freeTask(TaskData * task)881 static void freeTask(TaskData *task) {
882 while (task != nullptr && --task->RefCount == 0) {
883 TaskData *Parent = task->Parent;
884 task->Delete();
885 task = Parent;
886 }
887 }
888
releaseDependencies(TaskData * task)889 static void releaseDependencies(TaskData *task) {
890 for (unsigned i = 0; i < task->DependencyCount; i++) {
891 task->Dependencies[i].AnnotateEnd();
892 }
893 }
894
acquireDependencies(TaskData * task)895 static void acquireDependencies(TaskData *task) {
896 for (unsigned i = 0; i < task->DependencyCount; i++) {
897 task->Dependencies[i].AnnotateBegin();
898 }
899 }
900
ompt_tsan_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)901 static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
902 ompt_task_status_t prior_task_status,
903 ompt_data_t *second_task_data) {
904
905 //
906 // The necessary action depends on prior_task_status:
907 //
908 // ompt_task_early_fulfill = 5,
909 // -> ignored
910 //
911 // ompt_task_late_fulfill = 6,
912 // -> first completed, first freed, second ignored
913 //
914 // ompt_task_complete = 1,
915 // ompt_task_cancel = 3,
916 // -> first completed, first freed, second starts
917 //
918 // ompt_task_detach = 4,
919 // ompt_task_yield = 2,
920 // ompt_task_switch = 7
921 // -> first suspended, second starts
922 //
923
924 if (prior_task_status == ompt_task_early_fulfill)
925 return;
926
927 TaskData *FromTask = ToTaskData(first_task_data);
928
929 // Legacy handling for missing reduction callback
930 if (hasReductionCallback < ompt_set_always && FromTask->InBarrier) {
931 // We want to ignore writes in the runtime code during barriers,
932 // but not when executing tasks with user code!
933 TsanIgnoreWritesEnd();
934 }
935
936 // The late fulfill happens after the detached task finished execution
937 if (prior_task_status == ompt_task_late_fulfill)
938 TsanHappensAfter(FromTask->GetTaskPtr());
939
940 // task completed execution
941 if (prior_task_status == ompt_task_complete ||
942 prior_task_status == ompt_task_cancel ||
943 prior_task_status == ompt_task_late_fulfill) {
944 // Included tasks are executed sequentially, no need to track
945 // synchronization
946 if (!FromTask->isIncluded()) {
947 // Task will finish before a barrier in the surrounding parallel region
948 // ...
949 ParallelData *PData = FromTask->Team;
950 TsanHappensBefore(
951 PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex));
952
953 // ... and before an eventual taskwait by the parent thread.
954 TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr());
955
956 if (FromTask->TaskGroup != nullptr) {
957 // This task is part of a taskgroup, so it will finish before the
958 // corresponding taskgroup_end.
959 TsanHappensBefore(FromTask->TaskGroup->GetPtr());
960 }
961 }
962
963 // release dependencies
964 releaseDependencies(FromTask);
965 // free the previously running task
966 freeTask(FromTask);
967 }
968
969 // For late fulfill of detached task, there is no task to schedule to
970 if (prior_task_status == ompt_task_late_fulfill) {
971 return;
972 }
973
974 TaskData *ToTask = ToTaskData(second_task_data);
975 // Legacy handling for missing reduction callback
976 if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) {
977 // We re-enter runtime code which currently performs a barrier.
978 TsanIgnoreWritesBegin();
979 }
980
981 // task suspended
982 if (prior_task_status == ompt_task_switch ||
983 prior_task_status == ompt_task_yield ||
984 prior_task_status == ompt_task_detach) {
985 // Task may be resumed at a later point in time.
986 TsanHappensBefore(FromTask->GetTaskPtr());
987 ToTask->ImplicitTask = FromTask->ImplicitTask;
988 assert(ToTask->ImplicitTask != NULL &&
989 "A task belongs to a team and has an implicit task on the stack");
990 }
991
992 // Handle dependencies on first execution of the task
993 if (ToTask->execution == 0) {
994 ToTask->execution++;
995 acquireDependencies(ToTask);
996 }
997 // 1. Task will begin execution after it has been created.
998 // 2. Task will resume after it has been switched away.
999 TsanHappensAfter(ToTask->GetTaskPtr());
1000 }
1001
ompt_tsan_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)1002 static void ompt_tsan_dependences(ompt_data_t *task_data,
1003 const ompt_dependence_t *deps, int ndeps) {
1004 if (ndeps > 0) {
1005 // Copy the data to use it in task_switch and task_end.
1006 TaskData *Data = ToTaskData(task_data);
1007 if (!Data->Parent) {
1008 // Return since doacross dependences are not supported yet.
1009 return;
1010 }
1011 if (!Data->Parent->DependencyMap)
1012 Data->Parent->DependencyMap =
1013 new std::unordered_map<void *, DependencyData *>();
1014 Data->Dependencies =
1015 (TaskDependency *)malloc(sizeof(TaskDependency) * ndeps);
1016 Data->DependencyCount = ndeps;
1017 for (int i = 0; i < ndeps; i++) {
1018 auto ret = Data->Parent->DependencyMap->insert(
1019 std::make_pair(deps[i].variable.ptr, nullptr));
1020 if (ret.second) {
1021 ret.first->second = DependencyData::New();
1022 }
1023 new ((void *)(Data->Dependencies + i))
1024 TaskDependency(ret.first->second, deps[i].dependence_type);
1025 }
1026
1027 // This callback is executed before this task is first started.
1028 TsanHappensBefore(Data->GetTaskPtr());
1029 }
1030 }
1031
1032 /// OMPT event callbacks for handling locking.
ompt_tsan_mutex_acquired(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)1033 static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1034 const void *codeptr_ra) {
1035
1036 // Acquire our own lock to make sure that
1037 // 1. the previous release has finished.
1038 // 2. the next acquire doesn't start before we have finished our release.
1039 LocksMutex.lock();
1040 std::mutex &Lock = Locks[wait_id];
1041 LocksMutex.unlock();
1042
1043 Lock.lock();
1044 TsanHappensAfter(&Lock);
1045 }
1046
ompt_tsan_mutex_released(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)1047 static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1048 const void *codeptr_ra) {
1049 LocksMutex.lock();
1050 std::mutex &Lock = Locks[wait_id];
1051 LocksMutex.unlock();
1052 TsanHappensBefore(&Lock);
1053
1054 Lock.unlock();
1055 }
1056
1057 // callback , signature , variable to store result , required support level
1058 #define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
1059 do { \
1060 ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
1061 result = ompt_set_callback(ompt_callback_##event, \
1062 (ompt_callback_t)tsan_##event); \
1063 if (result < level) \
1064 printf("Registered callback '" #event "' is not supported at " #level \
1065 " (%i)\n", \
1066 result); \
1067 } while (0)
1068
1069 #define SET_CALLBACK_T(event, type) \
1070 do { \
1071 int res; \
1072 SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
1073 } while (0)
1074
1075 #define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1076
ompt_tsan_initialize(ompt_function_lookup_t lookup,int device_num,ompt_data_t * tool_data)1077 static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
1078 ompt_data_t *tool_data) {
1079 const char *options = getenv("TSAN_OPTIONS");
1080 TsanFlags tsan_flags(options);
1081
1082 ompt_set_callback_t ompt_set_callback =
1083 (ompt_set_callback_t)lookup("ompt_set_callback");
1084 if (ompt_set_callback == NULL) {
1085 std::cerr << "Could not set callback, exiting..." << std::endl;
1086 std::exit(1);
1087 }
1088 ompt_get_parallel_info =
1089 (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1090 ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1091
1092 if (ompt_get_parallel_info == NULL) {
1093 fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', "
1094 "exiting...\n");
1095 exit(1);
1096 }
1097
1098 #if (defined __APPLE__ && defined __MACH__)
1099 #define findTsanFunction(f, fSig) \
1100 do { \
1101 if (NULL == (f = fSig dlsym(RTLD_DEFAULT, #f))) \
1102 printf("Unable to find TSan function " #f ".\n"); \
1103 } while (0)
1104
1105 findTsanFunction(AnnotateHappensAfter,
1106 (void (*)(const char *, int, const volatile void *)));
1107 findTsanFunction(AnnotateHappensBefore,
1108 (void (*)(const char *, int, const volatile void *)));
1109 findTsanFunction(AnnotateIgnoreWritesBegin, (void (*)(const char *, int)));
1110 findTsanFunction(AnnotateIgnoreWritesEnd, (void (*)(const char *, int)));
1111 findTsanFunction(
1112 AnnotateNewMemory,
1113 (void (*)(const char *, int, const volatile void *, size_t)));
1114 findTsanFunction(__tsan_func_entry, (void (*)(const void *)));
1115 findTsanFunction(__tsan_func_exit, (void (*)(void)));
1116 #endif
1117
1118 SET_CALLBACK(thread_begin);
1119 SET_CALLBACK(thread_end);
1120 SET_CALLBACK(parallel_begin);
1121 SET_CALLBACK(implicit_task);
1122 SET_CALLBACK(sync_region);
1123 SET_CALLBACK(parallel_end);
1124
1125 SET_CALLBACK(task_create);
1126 SET_CALLBACK(task_schedule);
1127 SET_CALLBACK(dependences);
1128
1129 SET_CALLBACK_T(mutex_acquired, mutex);
1130 SET_CALLBACK_T(mutex_released, mutex);
1131 SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback,
1132 ompt_set_never);
1133
1134 if (!tsan_flags.ignore_noninstrumented_modules)
1135 fprintf(stderr,
1136 "Warning: please export "
1137 "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1138 "to avoid false positive reports from the OpenMP runtime!\n");
1139 if (archer_flags->ignore_serial)
1140 TsanIgnoreWritesBegin();
1141
1142 return 1; // success
1143 }
1144
ompt_tsan_finalize(ompt_data_t * tool_data)1145 static void ompt_tsan_finalize(ompt_data_t *tool_data) {
1146 if (archer_flags->ignore_serial)
1147 TsanIgnoreWritesEnd();
1148 if (archer_flags->print_max_rss) {
1149 struct rusage end;
1150 getrusage(RUSAGE_SELF, &end);
1151 printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss);
1152 }
1153
1154 if (archer_flags)
1155 delete archer_flags;
1156 }
1157
1158 extern "C" ompt_start_tool_result_t *
ompt_start_tool(unsigned int omp_version,const char * runtime_version)1159 ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
1160 const char *options = getenv("ARCHER_OPTIONS");
1161 archer_flags = new ArcherFlags(options);
1162 if (!archer_flags->enabled) {
1163 if (archer_flags->verbose)
1164 std::cout << "Archer disabled, stopping operation" << std::endl;
1165 delete archer_flags;
1166 return NULL;
1167 }
1168
1169 pagesize = getpagesize();
1170
1171 static ompt_start_tool_result_t ompt_start_tool_result = {
1172 &ompt_tsan_initialize, &ompt_tsan_finalize, {0}};
1173
1174 // The OMPT start-up code uses dlopen with RTLD_LAZY. Therefore, we cannot
1175 // rely on dlopen to fail if TSan is missing, but would get a runtime error
1176 // for the first TSan call. We use RunningOnValgrind to detect whether
1177 // an implementation of the Annotation interface is available in the
1178 // execution or disable the tool (by returning NULL).
1179
1180 runOnTsan = 1;
1181 RunningOnValgrind();
1182 if (!runOnTsan) // if we are not running on TSAN, give a different tool the
1183 // chance to be loaded
1184 {
1185 if (archer_flags->verbose)
1186 std::cout << "Archer detected OpenMP application without TSan "
1187 "stopping operation"
1188 << std::endl;
1189 delete archer_flags;
1190 return NULL;
1191 }
1192
1193 if (archer_flags->verbose)
1194 std::cout << "Archer detected OpenMP application with TSan, supplying "
1195 "OpenMP synchronization semantics"
1196 << std::endl;
1197 return &ompt_start_tool_result;
1198 }
1199