1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/OpenMP/OMPConstants.h"
10 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
11 #include "llvm/IR/BasicBlock.h"
12 #include "llvm/IR/DIBuilder.h"
13 #include "llvm/IR/Function.h"
14 #include "llvm/IR/InstIterator.h"
15 #include "llvm/IR/LLVMContext.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/IR/Verifier.h"
18 #include "llvm/Passes/PassBuilder.h"
19 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
20 #include "gtest/gtest.h"
21 
22 using namespace llvm;
23 using namespace omp;
24 
25 namespace {
26 
27 /// Create an instruction that uses the values in \p Values. We use "printf"
28 /// just because it is often used for this purpose in test code, but it is never
29 /// executed here.
30 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
31                                   ArrayRef<Value *> Values) {
32   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
33 
34   GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
35   Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
36   Constant *Indices[] = {Zero, Zero};
37   Constant *FormatStrConst =
38       ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
39 
40   Function *PrintfDecl = M->getFunction("printf");
41   if (!PrintfDecl) {
42     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
43     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
44     PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
45   }
46 
47   SmallVector<Value *, 4> Args;
48   Args.push_back(FormatStrConst);
49   Args.append(Values.begin(), Values.end());
50   return Builder.CreateCall(PrintfDecl, Args);
51 }
52 
53 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
54 /// order the control flow of \p F.
55 ///
56 /// This is an easy way to verify the branching structure of the CFG without
57 /// checking every branch instruction individually. For the CFG of a
58 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
59 /// the body, i.e. the DFS order corresponds to the execution order with one
60 /// loop iteration.
61 static testing::AssertionResult
62 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
63   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
64   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
65 
66   df_iterator_default_set<BasicBlock *, 16> Visited;
67   auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
68 
69   BasicBlock *Prev = nullptr;
70   for (BasicBlock *BB : DFS) {
71     if (It != E && BB == *It) {
72       Prev = *It;
73       ++It;
74     }
75   }
76 
77   if (It == E)
78     return testing::AssertionSuccess();
79   if (!Prev)
80     return testing::AssertionFailure()
81            << "Did not find " << (*It)->getName() << " in control flow";
82   return testing::AssertionFailure()
83          << "Expected " << Prev->getName() << " before " << (*It)->getName()
84          << " in control flow";
85 }
86 
87 /// Verify that blocks in \p RefOrder are in the same relative order in the
88 /// linked lists of blocks in \p F. The linked list may contain additional
89 /// blocks in-between.
90 ///
91 /// While the order in the linked list is not relevant for semantics, keeping
92 /// the order roughly in execution order makes its printout easier to read.
93 static testing::AssertionResult
94 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
95   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
96   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
97 
98   BasicBlock *Prev = nullptr;
99   for (BasicBlock &BB : *F) {
100     if (It != E && &BB == *It) {
101       Prev = *It;
102       ++It;
103     }
104   }
105 
106   if (It == E)
107     return testing::AssertionSuccess();
108   if (!Prev)
109     return testing::AssertionFailure() << "Did not find " << (*It)->getName()
110                                        << " in function " << F->getName();
111   return testing::AssertionFailure()
112          << "Expected " << Prev->getName() << " before " << (*It)->getName()
113          << " in function " << F->getName();
114 }
115 
116 /// Populate Calls with call instructions calling the function with the given
117 /// FnID from the given function F.
118 static void findCalls(Function *F, omp::RuntimeFunction FnID,
119                       OpenMPIRBuilder &OMPBuilder,
120                       SmallVectorImpl<CallInst *> &Calls) {
121   Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID);
122   for (BasicBlock &BB : *F) {
123     for (Instruction &I : BB) {
124       auto *Call = dyn_cast<CallInst>(&I);
125       if (Call && Call->getCalledFunction() == Fn)
126         Calls.push_back(Call);
127     }
128   }
129 }
130 
131 /// Assuming \p F contains only one call to the function with the given \p FnID,
132 /// return that call.
133 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID,
134                                 OpenMPIRBuilder &OMPBuilder) {
135   SmallVector<CallInst *, 1> Calls;
136   findCalls(F, FnID, OMPBuilder, Calls);
137   EXPECT_EQ(1u, Calls.size());
138   if (Calls.size() != 1)
139     return nullptr;
140   return Calls.front();
141 }
142 
143 class OpenMPIRBuilderTest : public testing::Test {
144 protected:
145   void SetUp() override {
146     M.reset(new Module("MyModule", Ctx));
147     FunctionType *FTy =
148         FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
149                           /*isVarArg=*/false);
150     F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
151     BB = BasicBlock::Create(Ctx, "", F);
152 
153     DIBuilder DIB(*M);
154     auto File = DIB.createFile("test.dbg", "/src", llvm::None,
155                                Optional<StringRef>("/src/test.dbg"));
156     auto CU =
157         DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0);
158     auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
159     auto SP = DIB.createFunction(
160         CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
161         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
162     F->setSubprogram(SP);
163     auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
164     DIB.finalize();
165     DL = DILocation::get(Ctx, 3, 7, Scope);
166   }
167 
168   void TearDown() override {
169     BB = nullptr;
170     M.reset();
171   }
172 
173   /// Create a function with a simple loop that calls printf using the logical
174   /// loop counter for use with tests that need a CanonicalLoopInfo object.
175   CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL,
176                                              OpenMPIRBuilder &OMPBuilder,
177                                              int UseIVBits,
178                                              CallInst **Call = nullptr,
179                                              BasicBlock **BodyCode = nullptr) {
180     OMPBuilder.initialize();
181     F->setName("func");
182 
183     IRBuilder<> Builder(BB);
184     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
185     Value *TripCount = F->getArg(0);
186 
187     Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits);
188     Value *CastedTripCount =
189         Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount");
190 
191     auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP,
192                              llvm::Value *LC) {
193       Builder.restoreIP(CodeGenIP);
194       if (BodyCode)
195         *BodyCode = Builder.GetInsertBlock();
196 
197       // Add something that consumes the induction variable to the body.
198       CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
199       if (Call)
200         *Call = CallInst;
201     };
202     CanonicalLoopInfo *Loop =
203         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount);
204 
205     // Finalize the function.
206     Builder.restoreIP(Loop->getAfterIP());
207     Builder.CreateRetVoid();
208 
209     return Loop;
210   }
211 
212   LLVMContext Ctx;
213   std::unique_ptr<Module> M;
214   Function *F;
215   BasicBlock *BB;
216   DebugLoc DL;
217 };
218 
219 class OpenMPIRBuilderTestWithParams
220     : public OpenMPIRBuilderTest,
221       public ::testing::WithParamInterface<omp::OMPScheduleType> {};
222 
223 class OpenMPIRBuilderTestWithIVBits
224     : public OpenMPIRBuilderTest,
225       public ::testing::WithParamInterface<int> {};
226 
227 // Returns the value stored in the given allocation. Returns null if the given
228 // value is not a result of an InstTy instruction, if no value is stored or if
229 // there is more than one store.
230 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
231   Instruction *Inst = dyn_cast<InstTy>(AllocaValue);
232   if (!Inst)
233     return nullptr;
234   StoreInst *Store = nullptr;
235   for (Use &U : Inst->uses()) {
236     if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
237       EXPECT_EQ(Store, nullptr);
238       Store = CandidateStore;
239     }
240   }
241   if (!Store)
242     return nullptr;
243   return Store->getValueOperand();
244 }
245 
246 // Returns the value stored in the aggregate argument of an outlined function,
247 // or nullptr if it is not found.
248 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate,
249                                            unsigned Idx) {
250   GetElementPtrInst *GEPAtIdx = nullptr;
251   // Find GEP instruction at that index.
252   for (User *Usr : Aggregate->users()) {
253     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr);
254     if (!GEP)
255       continue;
256 
257     if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx))
258       continue;
259 
260     EXPECT_EQ(GEPAtIdx, nullptr);
261     GEPAtIdx = GEP;
262   }
263 
264   EXPECT_NE(GEPAtIdx, nullptr);
265   EXPECT_EQ(GEPAtIdx->getNumUses(), 1U);
266 
267   // Find the value stored to the aggregate.
268   StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin());
269   Value *StoredAggValue = StoreToAgg->getValueOperand();
270 
271   Value *StoredValue = nullptr;
272 
273   // Find the value stored to the value stored in the aggregate.
274   for (User *Usr : StoredAggValue->users()) {
275     StoreInst *Store = dyn_cast<StoreInst>(Usr);
276     if (!Store)
277       continue;
278 
279     if (Store->getPointerOperand() != StoredAggValue)
280       continue;
281 
282     EXPECT_EQ(StoredValue, nullptr);
283     StoredValue = Store->getValueOperand();
284   }
285 
286   return StoredValue;
287 }
288 
289 // Returns the aggregate that the value is originating from.
290 static Value *findAggregateFromValue(Value *V) {
291   // Expects a load instruction that loads from the aggregate.
292   LoadInst *Load = dyn_cast<LoadInst>(V);
293   EXPECT_NE(Load, nullptr);
294   // Find the GEP instruction used in the load instruction.
295   GetElementPtrInst *GEP =
296       dyn_cast<GetElementPtrInst>(Load->getPointerOperand());
297   EXPECT_NE(GEP, nullptr);
298   // Find the aggregate used in the GEP instruction.
299   Value *Aggregate = GEP->getPointerOperand();
300 
301   return Aggregate;
302 }
303 
304 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
305   OpenMPIRBuilder OMPBuilder(*M);
306   OMPBuilder.initialize();
307 
308   IRBuilder<> Builder(BB);
309 
310   OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for);
311   EXPECT_TRUE(M->global_empty());
312   EXPECT_EQ(M->size(), 1U);
313   EXPECT_EQ(F->size(), 1U);
314   EXPECT_EQ(BB->size(), 0U);
315 
316   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
317   OMPBuilder.createBarrier(Loc, OMPD_for);
318   EXPECT_FALSE(M->global_empty());
319   EXPECT_EQ(M->size(), 3U);
320   EXPECT_EQ(F->size(), 1U);
321   EXPECT_EQ(BB->size(), 2U);
322 
323   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
324   EXPECT_NE(GTID, nullptr);
325   EXPECT_EQ(GTID->arg_size(), 1U);
326   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
327   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
328   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
329 
330   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
331   EXPECT_NE(Barrier, nullptr);
332   EXPECT_EQ(Barrier->arg_size(), 2U);
333   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier");
334   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
335   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
336 
337   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
338 
339   Builder.CreateUnreachable();
340   EXPECT_FALSE(verifyModule(*M, &errs()));
341 }
342 
343 TEST_F(OpenMPIRBuilderTest, CreateCancel) {
344   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
345   OpenMPIRBuilder OMPBuilder(*M);
346   OMPBuilder.initialize();
347 
348   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
349   new UnreachableInst(Ctx, CBB);
350   auto FiniCB = [&](InsertPointTy IP) {
351     ASSERT_NE(IP.getBlock(), nullptr);
352     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
353     BranchInst::Create(CBB, IP.getBlock());
354   };
355   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
356 
357   IRBuilder<> Builder(BB);
358 
359   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
360   auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel);
361   Builder.restoreIP(NewIP);
362   EXPECT_FALSE(M->global_empty());
363   EXPECT_EQ(M->size(), 4U);
364   EXPECT_EQ(F->size(), 4U);
365   EXPECT_EQ(BB->size(), 4U);
366 
367   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
368   EXPECT_NE(GTID, nullptr);
369   EXPECT_EQ(GTID->arg_size(), 1U);
370   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
371   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
372   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
373 
374   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
375   EXPECT_NE(Cancel, nullptr);
376   EXPECT_EQ(Cancel->arg_size(), 3U);
377   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
378   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
379   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
380   EXPECT_EQ(Cancel->getNumUses(), 1U);
381   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
382   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
383   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
384   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
385   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
386   EXPECT_NE(GTID1, nullptr);
387   EXPECT_EQ(GTID1->arg_size(), 1U);
388   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
389   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
390   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
391   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
392   EXPECT_NE(Barrier, nullptr);
393   EXPECT_EQ(Barrier->arg_size(), 2U);
394   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
395   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
396   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
397   EXPECT_EQ(Barrier->getNumUses(), 0U);
398   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
399             1U);
400   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
401 
402   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
403 
404   OMPBuilder.popFinalizationCB();
405 
406   Builder.CreateUnreachable();
407   EXPECT_FALSE(verifyModule(*M, &errs()));
408 }
409 
410 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
411   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
412   OpenMPIRBuilder OMPBuilder(*M);
413   OMPBuilder.initialize();
414 
415   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
416   new UnreachableInst(Ctx, CBB);
417   auto FiniCB = [&](InsertPointTy IP) {
418     ASSERT_NE(IP.getBlock(), nullptr);
419     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
420     BranchInst::Create(CBB, IP.getBlock());
421   };
422   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
423 
424   IRBuilder<> Builder(BB);
425 
426   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
427   auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel);
428   Builder.restoreIP(NewIP);
429   EXPECT_FALSE(M->global_empty());
430   EXPECT_EQ(M->size(), 4U);
431   EXPECT_EQ(F->size(), 7U);
432   EXPECT_EQ(BB->size(), 1U);
433   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
434   ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U);
435   BB = BB->getTerminator()->getSuccessor(0);
436   EXPECT_EQ(BB->size(), 4U);
437 
438   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
439   EXPECT_NE(GTID, nullptr);
440   EXPECT_EQ(GTID->arg_size(), 1U);
441   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
442   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
443   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
444 
445   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
446   EXPECT_NE(Cancel, nullptr);
447   EXPECT_EQ(Cancel->arg_size(), 3U);
448   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
449   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
450   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
451   EXPECT_EQ(Cancel->getNumUses(), 1U);
452   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
453   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
454   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
455   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
456             NewIP.getBlock());
457   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
458   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
459   EXPECT_NE(GTID1, nullptr);
460   EXPECT_EQ(GTID1->arg_size(), 1U);
461   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
462   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
463   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
464   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
465   EXPECT_NE(Barrier, nullptr);
466   EXPECT_EQ(Barrier->arg_size(), 2U);
467   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
468   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
469   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
470   EXPECT_EQ(Barrier->getNumUses(), 0U);
471   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
472             1U);
473   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
474 
475   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
476 
477   OMPBuilder.popFinalizationCB();
478 
479   Builder.CreateUnreachable();
480   EXPECT_FALSE(verifyModule(*M, &errs()));
481 }
482 
483 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
484   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
485   OpenMPIRBuilder OMPBuilder(*M);
486   OMPBuilder.initialize();
487 
488   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
489   new UnreachableInst(Ctx, CBB);
490   auto FiniCB = [&](InsertPointTy IP) {
491     ASSERT_NE(IP.getBlock(), nullptr);
492     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
493     BranchInst::Create(CBB, IP.getBlock());
494   };
495   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
496 
497   IRBuilder<> Builder(BB);
498 
499   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
500   auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for);
501   Builder.restoreIP(NewIP);
502   EXPECT_FALSE(M->global_empty());
503   EXPECT_EQ(M->size(), 3U);
504   EXPECT_EQ(F->size(), 4U);
505   EXPECT_EQ(BB->size(), 4U);
506 
507   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
508   EXPECT_NE(GTID, nullptr);
509   EXPECT_EQ(GTID->arg_size(), 1U);
510   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
511   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
512   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
513 
514   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
515   EXPECT_NE(Barrier, nullptr);
516   EXPECT_EQ(Barrier->arg_size(), 2U);
517   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
518   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
519   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
520   EXPECT_EQ(Barrier->getNumUses(), 1U);
521   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
522   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
523   EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
524   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
525   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
526             1U);
527   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
528             CBB);
529 
530   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
531 
532   OMPBuilder.popFinalizationCB();
533 
534   Builder.CreateUnreachable();
535   EXPECT_FALSE(verifyModule(*M, &errs()));
536 }
537 
538 TEST_F(OpenMPIRBuilderTest, DbgLoc) {
539   OpenMPIRBuilder OMPBuilder(*M);
540   OMPBuilder.initialize();
541   F->setName("func");
542 
543   IRBuilder<> Builder(BB);
544 
545   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
546   OMPBuilder.createBarrier(Loc, OMPD_for);
547   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
548   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
549   EXPECT_EQ(GTID->getDebugLoc(), DL);
550   EXPECT_EQ(Barrier->getDebugLoc(), DL);
551   EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0)));
552   if (!isa<GlobalVariable>(Barrier->getOperand(0)))
553     return;
554   GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0));
555   EXPECT_TRUE(Ident->hasInitializer());
556   if (!Ident->hasInitializer())
557     return;
558   Constant *Initializer = Ident->getInitializer();
559   EXPECT_TRUE(
560       isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()));
561   GlobalVariable *SrcStrGlob =
562       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
563   if (!SrcStrGlob)
564     return;
565   EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer()));
566   ConstantDataArray *SrcSrc =
567       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
568   if (!SrcSrc)
569     return;
570   EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;");
571 }
572 
573 TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
574   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
575   OpenMPIRBuilder OMPBuilder(*M);
576   OMPBuilder.initialize();
577   F->setName("func");
578   IRBuilder<> Builder(BB);
579 
580   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
581   Builder.CreateBr(EnterBB);
582   Builder.SetInsertPoint(EnterBB);
583   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
584 
585   AllocaInst *PrivAI = nullptr;
586 
587   unsigned NumBodiesGenerated = 0;
588   unsigned NumPrivatizedVars = 0;
589   unsigned NumFinalizationPoints = 0;
590 
591   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
592                        BasicBlock &ContinuationIP) {
593     ++NumBodiesGenerated;
594 
595     Builder.restoreIP(AllocaIP);
596     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
597     Builder.CreateStore(F->arg_begin(), PrivAI);
598 
599     Builder.restoreIP(CodeGenIP);
600     Value *PrivLoad =
601         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
602     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
603     Instruction *ThenTerm, *ElseTerm;
604     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
605                                   &ThenTerm, &ElseTerm);
606 
607     Builder.SetInsertPoint(ThenTerm);
608     Builder.CreateBr(&ContinuationIP);
609     ThenTerm->eraseFromParent();
610   };
611 
612   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
613                     Value &Orig, Value &Inner,
614                     Value *&ReplacementValue) -> InsertPointTy {
615     ++NumPrivatizedVars;
616 
617     if (!isa<AllocaInst>(Orig)) {
618       EXPECT_EQ(&Orig, F->arg_begin());
619       ReplacementValue = &Inner;
620       return CodeGenIP;
621     }
622 
623     // Since the original value is an allocation, it has a pointer type and
624     // therefore no additional wrapping should happen.
625     EXPECT_EQ(&Orig, &Inner);
626 
627     // Trivial copy (=firstprivate).
628     Builder.restoreIP(AllocaIP);
629     Type *VTy = ReplacementValue->getType();
630     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
631     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
632     Builder.restoreIP(CodeGenIP);
633     Builder.CreateStore(V, ReplacementValue);
634     return CodeGenIP;
635   };
636 
637   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
638 
639   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
640                                     F->getEntryBlock().getFirstInsertionPt());
641   IRBuilder<>::InsertPoint AfterIP =
642       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
643                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
644   EXPECT_EQ(NumBodiesGenerated, 1U);
645   EXPECT_EQ(NumPrivatizedVars, 1U);
646   EXPECT_EQ(NumFinalizationPoints, 1U);
647 
648   Builder.restoreIP(AfterIP);
649   Builder.CreateRetVoid();
650 
651   OMPBuilder.finalize();
652 
653   EXPECT_NE(PrivAI, nullptr);
654   Function *OutlinedFn = PrivAI->getFunction();
655   EXPECT_NE(F, OutlinedFn);
656   EXPECT_FALSE(verifyModule(*M, &errs()));
657   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
658   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse));
659   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
660   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
661 
662   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
663   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
664 
665   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
666   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
667   User *Usr = OutlinedFn->user_back();
668   ASSERT_TRUE(isa<ConstantExpr>(Usr));
669   CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
670   ASSERT_NE(ForkCI, nullptr);
671 
672   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
673   EXPECT_EQ(ForkCI->arg_size(), 4U);
674   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
675   EXPECT_EQ(ForkCI->getArgOperand(1),
676             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
677   EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
678   Value *StoredValue =
679       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
680   EXPECT_EQ(StoredValue, F->arg_begin());
681 }
682 
683 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
684   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
685   OpenMPIRBuilder OMPBuilder(*M);
686   OMPBuilder.initialize();
687   F->setName("func");
688   IRBuilder<> Builder(BB);
689 
690   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
691   Builder.CreateBr(EnterBB);
692   Builder.SetInsertPoint(EnterBB);
693   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
694 
695   unsigned NumInnerBodiesGenerated = 0;
696   unsigned NumOuterBodiesGenerated = 0;
697   unsigned NumFinalizationPoints = 0;
698 
699   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
700                             BasicBlock &ContinuationIP) {
701     ++NumInnerBodiesGenerated;
702   };
703 
704   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
705                     Value &Orig, Value &Inner,
706                     Value *&ReplacementValue) -> InsertPointTy {
707     // Trivial copy (=firstprivate).
708     Builder.restoreIP(AllocaIP);
709     Type *VTy = ReplacementValue->getType();
710     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
711     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
712     Builder.restoreIP(CodeGenIP);
713     Builder.CreateStore(V, ReplacementValue);
714     return CodeGenIP;
715   };
716 
717   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
718 
719   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
720                             BasicBlock &ContinuationIP) {
721     ++NumOuterBodiesGenerated;
722     Builder.restoreIP(CodeGenIP);
723     BasicBlock *CGBB = CodeGenIP.getBlock();
724     BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
725     CGBB->getTerminator()->eraseFromParent();
726     ;
727 
728     IRBuilder<>::InsertPoint AfterIP = OMPBuilder.createParallel(
729         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
730         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
731 
732     Builder.restoreIP(AfterIP);
733     Builder.CreateBr(NewBB);
734   };
735 
736   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
737                                     F->getEntryBlock().getFirstInsertionPt());
738   IRBuilder<>::InsertPoint AfterIP =
739       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
740                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
741 
742   EXPECT_EQ(NumInnerBodiesGenerated, 1U);
743   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
744   EXPECT_EQ(NumFinalizationPoints, 2U);
745 
746   Builder.restoreIP(AfterIP);
747   Builder.CreateRetVoid();
748 
749   OMPBuilder.finalize();
750 
751   EXPECT_EQ(M->size(), 5U);
752   for (Function &OutlinedFn : *M) {
753     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
754       continue;
755     EXPECT_FALSE(verifyModule(*M, &errs()));
756     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
757     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
758     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
759     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
760 
761     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
762     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
763 
764     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
765     User *Usr = OutlinedFn.user_back();
766     ASSERT_TRUE(isa<ConstantExpr>(Usr));
767     CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
768     ASSERT_NE(ForkCI, nullptr);
769 
770     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
771     EXPECT_EQ(ForkCI->arg_size(), 3U);
772     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
773     EXPECT_EQ(ForkCI->getArgOperand(1),
774               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
775     EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
776   }
777 }
778 
779 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
780   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
781   OpenMPIRBuilder OMPBuilder(*M);
782   OMPBuilder.initialize();
783   F->setName("func");
784   IRBuilder<> Builder(BB);
785 
786   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
787   Builder.CreateBr(EnterBB);
788   Builder.SetInsertPoint(EnterBB);
789   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
790 
791   unsigned NumInnerBodiesGenerated = 0;
792   unsigned NumOuterBodiesGenerated = 0;
793   unsigned NumFinalizationPoints = 0;
794 
795   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
796                             BasicBlock &ContinuationIP) {
797     ++NumInnerBodiesGenerated;
798   };
799 
800   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
801                     Value &Orig, Value &Inner,
802                     Value *&ReplacementValue) -> InsertPointTy {
803     // Trivial copy (=firstprivate).
804     Builder.restoreIP(AllocaIP);
805     Type *VTy = ReplacementValue->getType();
806     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
807     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
808     Builder.restoreIP(CodeGenIP);
809     Builder.CreateStore(V, ReplacementValue);
810     return CodeGenIP;
811   };
812 
813   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
814 
815   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
816                             BasicBlock &ContinuationIP) {
817     ++NumOuterBodiesGenerated;
818     Builder.restoreIP(CodeGenIP);
819     BasicBlock *CGBB = CodeGenIP.getBlock();
820     BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
821     BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
822     CGBB->getTerminator()->eraseFromParent();
823     ;
824     NewBB1->getTerminator()->eraseFromParent();
825     ;
826 
827     IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.createParallel(
828         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
829         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
830 
831     Builder.restoreIP(AfterIP1);
832     Builder.CreateBr(NewBB1);
833 
834     IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.createParallel(
835         InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
836         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
837 
838     Builder.restoreIP(AfterIP2);
839     Builder.CreateBr(NewBB2);
840   };
841 
842   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
843                                     F->getEntryBlock().getFirstInsertionPt());
844   IRBuilder<>::InsertPoint AfterIP =
845       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
846                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
847 
848   EXPECT_EQ(NumInnerBodiesGenerated, 2U);
849   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
850   EXPECT_EQ(NumFinalizationPoints, 3U);
851 
852   Builder.restoreIP(AfterIP);
853   Builder.CreateRetVoid();
854 
855   OMPBuilder.finalize();
856 
857   EXPECT_EQ(M->size(), 6U);
858   for (Function &OutlinedFn : *M) {
859     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
860       continue;
861     EXPECT_FALSE(verifyModule(*M, &errs()));
862     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
863     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
864     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
865     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
866 
867     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
868     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
869 
870     unsigned NumAllocas = 0;
871     for (Instruction &I : instructions(OutlinedFn))
872       NumAllocas += isa<AllocaInst>(I);
873     EXPECT_EQ(NumAllocas, 1U);
874 
875     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
876     User *Usr = OutlinedFn.user_back();
877     ASSERT_TRUE(isa<ConstantExpr>(Usr));
878     CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
879     ASSERT_NE(ForkCI, nullptr);
880 
881     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
882     EXPECT_EQ(ForkCI->arg_size(), 3U);
883     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
884     EXPECT_EQ(ForkCI->getArgOperand(1),
885               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
886     EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
887   }
888 }
889 
890 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
891   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
892   OpenMPIRBuilder OMPBuilder(*M);
893   OMPBuilder.initialize();
894   F->setName("func");
895   IRBuilder<> Builder(BB);
896 
897   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
898   Builder.CreateBr(EnterBB);
899   Builder.SetInsertPoint(EnterBB);
900   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
901 
902   AllocaInst *PrivAI = nullptr;
903 
904   unsigned NumBodiesGenerated = 0;
905   unsigned NumPrivatizedVars = 0;
906   unsigned NumFinalizationPoints = 0;
907 
908   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
909                        BasicBlock &ContinuationIP) {
910     ++NumBodiesGenerated;
911 
912     Builder.restoreIP(AllocaIP);
913     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
914     Builder.CreateStore(F->arg_begin(), PrivAI);
915 
916     Builder.restoreIP(CodeGenIP);
917     Value *PrivLoad =
918         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
919     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
920     Instruction *ThenTerm, *ElseTerm;
921     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
922                                   &ThenTerm, &ElseTerm);
923 
924     Builder.SetInsertPoint(ThenTerm);
925     Builder.CreateBr(&ContinuationIP);
926     ThenTerm->eraseFromParent();
927   };
928 
929   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
930                     Value &Orig, Value &Inner,
931                     Value *&ReplacementValue) -> InsertPointTy {
932     ++NumPrivatizedVars;
933 
934     if (!isa<AllocaInst>(Orig)) {
935       EXPECT_EQ(&Orig, F->arg_begin());
936       ReplacementValue = &Inner;
937       return CodeGenIP;
938     }
939 
940     // Since the original value is an allocation, it has a pointer type and
941     // therefore no additional wrapping should happen.
942     EXPECT_EQ(&Orig, &Inner);
943 
944     // Trivial copy (=firstprivate).
945     Builder.restoreIP(AllocaIP);
946     Type *VTy = ReplacementValue->getType();
947     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
948     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
949     Builder.restoreIP(CodeGenIP);
950     Builder.CreateStore(V, ReplacementValue);
951     return CodeGenIP;
952   };
953 
954   auto FiniCB = [&](InsertPointTy CodeGenIP) {
955     ++NumFinalizationPoints;
956     // No destructors.
957   };
958 
959   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
960                                     F->getEntryBlock().getFirstInsertionPt());
961   IRBuilder<>::InsertPoint AfterIP =
962       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
963                                 Builder.CreateIsNotNull(F->arg_begin()),
964                                 nullptr, OMP_PROC_BIND_default, false);
965 
966   EXPECT_EQ(NumBodiesGenerated, 1U);
967   EXPECT_EQ(NumPrivatizedVars, 1U);
968   EXPECT_EQ(NumFinalizationPoints, 1U);
969 
970   Builder.restoreIP(AfterIP);
971   Builder.CreateRetVoid();
972   OMPBuilder.finalize();
973 
974   EXPECT_NE(PrivAI, nullptr);
975   Function *OutlinedFn = PrivAI->getFunction();
976   EXPECT_NE(F, OutlinedFn);
977   EXPECT_FALSE(verifyModule(*M, &errs()));
978 
979   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
980   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
981 
982   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
983   ASSERT_EQ(OutlinedFn->getNumUses(), 2U);
984 
985   CallInst *DirectCI = nullptr;
986   CallInst *ForkCI = nullptr;
987   for (User *Usr : OutlinedFn->users()) {
988     if (isa<CallInst>(Usr)) {
989       ASSERT_EQ(DirectCI, nullptr);
990       DirectCI = cast<CallInst>(Usr);
991     } else {
992       ASSERT_TRUE(isa<ConstantExpr>(Usr));
993       ASSERT_EQ(Usr->getNumUses(), 1U);
994       ASSERT_TRUE(isa<CallInst>(Usr->user_back()));
995       ForkCI = cast<CallInst>(Usr->user_back());
996     }
997   }
998 
999   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
1000   EXPECT_EQ(ForkCI->arg_size(), 4U);
1001   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1002   EXPECT_EQ(ForkCI->getArgOperand(1),
1003             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
1004   Value *StoredForkArg =
1005       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
1006   EXPECT_EQ(StoredForkArg, F->arg_begin());
1007 
1008   EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
1009   EXPECT_EQ(DirectCI->arg_size(), 3U);
1010   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
1011   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
1012   Value *StoredDirectArg =
1013       findStoredValueInAggregateAt(Ctx, DirectCI->getArgOperand(2), 0);
1014   EXPECT_EQ(StoredDirectArg, F->arg_begin());
1015 }
1016 
1017 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
1018   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1019   OpenMPIRBuilder OMPBuilder(*M);
1020   OMPBuilder.initialize();
1021   F->setName("func");
1022   IRBuilder<> Builder(BB);
1023 
1024   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1025   Builder.CreateBr(EnterBB);
1026   Builder.SetInsertPoint(EnterBB);
1027   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1028 
1029   unsigned NumBodiesGenerated = 0;
1030   unsigned NumPrivatizedVars = 0;
1031   unsigned NumFinalizationPoints = 0;
1032 
1033   CallInst *CheckedBarrier = nullptr;
1034   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1035                        BasicBlock &ContinuationIP) {
1036     ++NumBodiesGenerated;
1037 
1038     Builder.restoreIP(CodeGenIP);
1039 
1040     // Create three barriers, two cancel barriers but only one checked.
1041     Function *CBFn, *BFn;
1042 
1043     Builder.restoreIP(
1044         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel));
1045 
1046     CBFn = M->getFunction("__kmpc_cancel_barrier");
1047     BFn = M->getFunction("__kmpc_barrier");
1048     ASSERT_NE(CBFn, nullptr);
1049     ASSERT_EQ(BFn, nullptr);
1050     ASSERT_EQ(CBFn->getNumUses(), 1U);
1051     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1052     ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
1053     CheckedBarrier = cast<CallInst>(CBFn->user_back());
1054 
1055     Builder.restoreIP(
1056         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true));
1057     CBFn = M->getFunction("__kmpc_cancel_barrier");
1058     BFn = M->getFunction("__kmpc_barrier");
1059     ASSERT_NE(CBFn, nullptr);
1060     ASSERT_NE(BFn, nullptr);
1061     ASSERT_EQ(CBFn->getNumUses(), 1U);
1062     ASSERT_EQ(BFn->getNumUses(), 1U);
1063     ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
1064     ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
1065 
1066     Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel,
1067                                                false, false));
1068     ASSERT_EQ(CBFn->getNumUses(), 2U);
1069     ASSERT_EQ(BFn->getNumUses(), 1U);
1070     ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
1071     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1072     ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
1073   };
1074 
1075   auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
1076                     Value *&) -> InsertPointTy {
1077     ++NumPrivatizedVars;
1078     llvm_unreachable("No privatization callback call expected!");
1079   };
1080 
1081   FunctionType *FakeDestructorTy =
1082       FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
1083                         /*isVarArg=*/false);
1084   auto *FakeDestructor = Function::Create(
1085       FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
1086 
1087   auto FiniCB = [&](InsertPointTy IP) {
1088     ++NumFinalizationPoints;
1089     Builder.restoreIP(IP);
1090     Builder.CreateCall(FakeDestructor,
1091                        {Builder.getInt32(NumFinalizationPoints)});
1092   };
1093 
1094   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1095                                     F->getEntryBlock().getFirstInsertionPt());
1096   IRBuilder<>::InsertPoint AfterIP =
1097       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1098                                 Builder.CreateIsNotNull(F->arg_begin()),
1099                                 nullptr, OMP_PROC_BIND_default, true);
1100 
1101   EXPECT_EQ(NumBodiesGenerated, 1U);
1102   EXPECT_EQ(NumPrivatizedVars, 0U);
1103   EXPECT_EQ(NumFinalizationPoints, 2U);
1104   EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
1105 
1106   Builder.restoreIP(AfterIP);
1107   Builder.CreateRetVoid();
1108   OMPBuilder.finalize();
1109 
1110   EXPECT_FALSE(verifyModule(*M, &errs()));
1111 
1112   BasicBlock *ExitBB = nullptr;
1113   for (const User *Usr : FakeDestructor->users()) {
1114     const CallInst *CI = dyn_cast<CallInst>(Usr);
1115     ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
1116     ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
1117     ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
1118     if (ExitBB)
1119       ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
1120     else
1121       ExitBB = CI->getNextNode()->getSuccessor(0);
1122     ASSERT_EQ(ExitBB->size(), 1U);
1123     if (!isa<ReturnInst>(ExitBB->front())) {
1124       ASSERT_TRUE(isa<BranchInst>(ExitBB->front()));
1125       ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U);
1126       ASSERT_TRUE(isa<ReturnInst>(
1127           cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front()));
1128     }
1129   }
1130 }
1131 
1132 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
1133   OpenMPIRBuilder OMPBuilder(*M);
1134   OMPBuilder.initialize();
1135   F->setName("func");
1136   IRBuilder<> Builder(BB);
1137   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1138   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1139 
1140   Type *I32Ty = Type::getInt32Ty(M->getContext());
1141   Type *I32PtrTy = Type::getInt32PtrTy(M->getContext());
1142   Type *StructTy = StructType::get(I32Ty, I32PtrTy);
1143   Type *StructPtrTy = StructTy->getPointerTo();
1144   StructType *ArgStructTy =
1145       StructType::get(I32PtrTy, StructPtrTy, I32PtrTy, StructPtrTy);
1146   Type *VoidTy = Type::getVoidTy(M->getContext());
1147   FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
1148   FunctionCallee TakeI32Func =
1149       M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
1150   FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", I32PtrTy);
1151   FunctionCallee TakeI32PtrFunc =
1152       M->getOrInsertFunction("take_i32ptr", VoidTy, I32PtrTy);
1153   FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
1154   FunctionCallee TakeStructFunc =
1155       M->getOrInsertFunction("take_struct", VoidTy, StructTy);
1156   FunctionCallee RetStructPtrFunc =
1157       M->getOrInsertFunction("ret_structptr", StructPtrTy);
1158   FunctionCallee TakeStructPtrFunc =
1159       M->getOrInsertFunction("take_structPtr", VoidTy, StructPtrTy);
1160   Value *I32Val = Builder.CreateCall(RetI32Func);
1161   Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
1162   Value *StructVal = Builder.CreateCall(RetStructFunc);
1163   Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
1164 
1165   Instruction *Internal;
1166   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1167                        BasicBlock &ContinuationBB) {
1168     IRBuilder<>::InsertPointGuard Guard(Builder);
1169     Builder.restoreIP(CodeGenIP);
1170     Internal = Builder.CreateCall(TakeI32Func, I32Val);
1171     Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
1172     Builder.CreateCall(TakeStructFunc, StructVal);
1173     Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
1174   };
1175   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1176                     Value &Inner, Value *&ReplacementValue) {
1177     ReplacementValue = &Inner;
1178     return CodeGenIP;
1179   };
1180   auto FiniCB = [](InsertPointTy) {};
1181 
1182   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1183                                     F->getEntryBlock().getFirstInsertionPt());
1184   IRBuilder<>::InsertPoint AfterIP =
1185       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1186                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
1187   Builder.restoreIP(AfterIP);
1188   Builder.CreateRetVoid();
1189 
1190   OMPBuilder.finalize();
1191 
1192   EXPECT_FALSE(verifyModule(*M, &errs()));
1193   Function *OutlinedFn = Internal->getFunction();
1194 
1195   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
1196   EXPECT_TRUE(Arg2Type->isPointerTy());
1197   EXPECT_TRUE(cast<PointerType>(Arg2Type)
1198                   ->isOpaqueOrPointeeTypeMatches(ArgStructTy));
1199 }
1200 
1201 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
1202   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1203   OpenMPIRBuilder OMPBuilder(*M);
1204   OMPBuilder.initialize();
1205   IRBuilder<> Builder(BB);
1206   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1207   Value *TripCount = F->getArg(0);
1208 
1209   unsigned NumBodiesGenerated = 0;
1210   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1211     NumBodiesGenerated += 1;
1212 
1213     Builder.restoreIP(CodeGenIP);
1214 
1215     Value *Cmp = Builder.CreateICmpEQ(LC, TripCount);
1216     Instruction *ThenTerm, *ElseTerm;
1217     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
1218                                   &ThenTerm, &ElseTerm);
1219   };
1220 
1221   CanonicalLoopInfo *Loop =
1222       OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
1223 
1224   Builder.restoreIP(Loop->getAfterIP());
1225   ReturnInst *RetInst = Builder.CreateRetVoid();
1226   OMPBuilder.finalize();
1227 
1228   Loop->assertOK();
1229   EXPECT_FALSE(verifyModule(*M, &errs()));
1230 
1231   EXPECT_EQ(NumBodiesGenerated, 1U);
1232 
1233   // Verify control flow structure (in addition to Loop->assertOK()).
1234   EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock());
1235   EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock());
1236 
1237   Instruction *IndVar = Loop->getIndVar();
1238   EXPECT_TRUE(isa<PHINode>(IndVar));
1239   EXPECT_EQ(IndVar->getType(), TripCount->getType());
1240   EXPECT_EQ(IndVar->getParent(), Loop->getHeader());
1241 
1242   EXPECT_EQ(Loop->getTripCount(), TripCount);
1243 
1244   BasicBlock *Body = Loop->getBody();
1245   Instruction *CmpInst = &Body->getInstList().front();
1246   EXPECT_TRUE(isa<ICmpInst>(CmpInst));
1247   EXPECT_EQ(CmpInst->getOperand(0), IndVar);
1248 
1249   BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor();
1250   EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) {
1251     return SuccBB->getSingleSuccessor() == LatchPred;
1252   }));
1253 
1254   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
1255 }
1256 
1257 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
1258   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1259   OpenMPIRBuilder OMPBuilder(*M);
1260   OMPBuilder.initialize();
1261   IRBuilder<> Builder(BB);
1262 
1263   // Check the trip count is computed correctly. We generate the canonical loop
1264   // but rely on the IRBuilder's constant folder to compute the final result
1265   // since all inputs are constant. To verify overflow situations, limit the
1266   // trip count / loop counter widths to 16 bits.
1267   auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1268                            bool IsSigned, bool InclusiveStop) -> int64_t {
1269     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1270     Type *LCTy = Type::getInt16Ty(Ctx);
1271     Value *StartVal = ConstantInt::get(LCTy, Start);
1272     Value *StopVal = ConstantInt::get(LCTy, Stop);
1273     Value *StepVal = ConstantInt::get(LCTy, Step);
1274     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
1275     CanonicalLoopInfo *Loop =
1276         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1277                                        StepVal, IsSigned, InclusiveStop);
1278     Loop->assertOK();
1279     Builder.restoreIP(Loop->getAfterIP());
1280     Value *TripCount = Loop->getTripCount();
1281     return cast<ConstantInt>(TripCount)->getValue().getZExtValue();
1282   };
1283 
1284   EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0);
1285   EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1);
1286   EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42);
1287   EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21);
1288   EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21);
1289   EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1);
1290   EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2);
1291   EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3);
1292   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF);
1293   EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0);
1294   EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1);
1295   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100);
1296   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1);
1297 
1298   EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2);
1299   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2);
1300   EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1);
1301   EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1);
1302   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF);
1303   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000);
1304 
1305   EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0);
1306   EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0);
1307   EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3);
1308   EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4);
1309   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1);
1310   EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1);
1311   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2);
1312 
1313   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000);
1314   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001);
1315   EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF);
1316   EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF);
1317   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2);
1318   EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF);
1319   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000);
1320   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800);
1321   EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF);
1322   EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1);
1323   EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2);
1324 
1325   // Finalize the function and verify it.
1326   Builder.CreateRetVoid();
1327   OMPBuilder.finalize();
1328   EXPECT_FALSE(verifyModule(*M, &errs()));
1329 }
1330 
1331 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
1332   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1333   OpenMPIRBuilder OMPBuilder(*M);
1334   OMPBuilder.initialize();
1335   F->setName("func");
1336 
1337   IRBuilder<> Builder(BB);
1338 
1339   Type *LCTy = F->getArg(0)->getType();
1340   Constant *One = ConstantInt::get(LCTy, 1);
1341   Constant *Two = ConstantInt::get(LCTy, 2);
1342   Value *OuterTripCount =
1343       Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer");
1344   Value *InnerTripCount =
1345       Builder.CreateAdd(F->getArg(0), One, "tripcount.inner");
1346 
1347   // Fix an insertion point for ComputeIP.
1348   BasicBlock *LoopNextEnter =
1349       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1350                          Builder.GetInsertBlock()->getNextNode());
1351   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1352   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1353 
1354   Builder.SetInsertPoint(LoopNextEnter);
1355   OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL);
1356 
1357   CanonicalLoopInfo *InnerLoop = nullptr;
1358   CallInst *InbetweenLead = nullptr;
1359   CallInst *InbetweenTrail = nullptr;
1360   CallInst *Call = nullptr;
1361   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) {
1362     Builder.restoreIP(OuterCodeGenIP);
1363     InbetweenLead =
1364         createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC});
1365 
1366     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1367                                   Value *InnerLC) {
1368       Builder.restoreIP(InnerCodeGenIP);
1369       Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
1370     };
1371     InnerLoop = OMPBuilder.createCanonicalLoop(
1372         Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner");
1373 
1374     Builder.restoreIP(InnerLoop->getAfterIP());
1375     InbetweenTrail =
1376         createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
1377   };
1378   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1379       OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer");
1380 
1381   // Finish the function.
1382   Builder.restoreIP(OuterLoop->getAfterIP());
1383   Builder.CreateRetVoid();
1384 
1385   CanonicalLoopInfo *Collapsed =
1386       OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP);
1387 
1388   OMPBuilder.finalize();
1389   EXPECT_FALSE(verifyModule(*M, &errs()));
1390 
1391   // Verify control flow and BB order.
1392   BasicBlock *RefOrder[] = {
1393       Collapsed->getPreheader(),   Collapsed->getHeader(),
1394       Collapsed->getCond(),        Collapsed->getBody(),
1395       InbetweenLead->getParent(),  Call->getParent(),
1396       InbetweenTrail->getParent(), Collapsed->getLatch(),
1397       Collapsed->getExit(),        Collapsed->getAfter(),
1398   };
1399   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1400   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1401 
1402   // Verify the total trip count.
1403   auto *TripCount = cast<MulOperator>(Collapsed->getTripCount());
1404   EXPECT_EQ(TripCount->getOperand(0), OuterTripCount);
1405   EXPECT_EQ(TripCount->getOperand(1), InnerTripCount);
1406 
1407   // Verify the changed indvar.
1408   auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1));
1409   EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv);
1410   EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody());
1411   EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount);
1412   EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar());
1413 
1414   auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2));
1415   EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem);
1416   EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody());
1417   EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar());
1418   EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount);
1419 
1420   EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV);
1421   EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV);
1422 }
1423 
1424 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
1425   OpenMPIRBuilder OMPBuilder(*M);
1426   CallInst *Call;
1427   BasicBlock *BodyCode;
1428   CanonicalLoopInfo *Loop =
1429       buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode);
1430 
1431   Instruction *OrigIndVar = Loop->getIndVar();
1432   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
1433 
1434   // Tile the loop.
1435   Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
1436   std::vector<CanonicalLoopInfo *> GenLoops =
1437       OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
1438 
1439   OMPBuilder.finalize();
1440   EXPECT_FALSE(verifyModule(*M, &errs()));
1441 
1442   EXPECT_EQ(GenLoops.size(), 2u);
1443   CanonicalLoopInfo *Floor = GenLoops[0];
1444   CanonicalLoopInfo *Tile = GenLoops[1];
1445 
1446   BasicBlock *RefOrder[] = {
1447       Floor->getPreheader(), Floor->getHeader(),   Floor->getCond(),
1448       Floor->getBody(),      Tile->getPreheader(), Tile->getHeader(),
1449       Tile->getCond(),       Tile->getBody(),      BodyCode,
1450       Tile->getLatch(),      Tile->getExit(),      Tile->getAfter(),
1451       Floor->getLatch(),     Floor->getExit(),     Floor->getAfter(),
1452   };
1453   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1454   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1455 
1456   // Check the induction variable.
1457   EXPECT_EQ(Call->getParent(), BodyCode);
1458   auto *Shift = cast<AddOperator>(Call->getOperand(1));
1459   EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
1460   EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
1461   auto *Scale = cast<MulOperator>(Shift->getOperand(0));
1462   EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
1463   EXPECT_EQ(Scale->getOperand(0), TileSize);
1464   EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
1465 }
1466 
1467 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
1468   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1469   OpenMPIRBuilder OMPBuilder(*M);
1470   OMPBuilder.initialize();
1471   F->setName("func");
1472 
1473   IRBuilder<> Builder(BB);
1474   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1475   Value *TripCount = F->getArg(0);
1476   Type *LCTy = TripCount->getType();
1477 
1478   BasicBlock *BodyCode = nullptr;
1479   CanonicalLoopInfo *InnerLoop = nullptr;
1480   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1481                                 llvm::Value *OuterLC) {
1482     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1483                                   llvm::Value *InnerLC) {
1484       Builder.restoreIP(InnerCodeGenIP);
1485       BodyCode = Builder.GetInsertBlock();
1486 
1487       // Add something that consumes the induction variables to the body.
1488       createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1489     };
1490     InnerLoop = OMPBuilder.createCanonicalLoop(
1491         OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner");
1492   };
1493   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1494       Loc, OuterLoopBodyGenCB, TripCount, "outer");
1495 
1496   // Finalize the function.
1497   Builder.restoreIP(OuterLoop->getAfterIP());
1498   Builder.CreateRetVoid();
1499 
1500   // Tile to loop nest.
1501   Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
1502   Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
1503   std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
1504       DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
1505 
1506   OMPBuilder.finalize();
1507   EXPECT_FALSE(verifyModule(*M, &errs()));
1508 
1509   EXPECT_EQ(GenLoops.size(), 4u);
1510   CanonicalLoopInfo *Floor1 = GenLoops[0];
1511   CanonicalLoopInfo *Floor2 = GenLoops[1];
1512   CanonicalLoopInfo *Tile1 = GenLoops[2];
1513   CanonicalLoopInfo *Tile2 = GenLoops[3];
1514 
1515   BasicBlock *RefOrder[] = {
1516       Floor1->getPreheader(),
1517       Floor1->getHeader(),
1518       Floor1->getCond(),
1519       Floor1->getBody(),
1520       Floor2->getPreheader(),
1521       Floor2->getHeader(),
1522       Floor2->getCond(),
1523       Floor2->getBody(),
1524       Tile1->getPreheader(),
1525       Tile1->getHeader(),
1526       Tile1->getCond(),
1527       Tile1->getBody(),
1528       Tile2->getPreheader(),
1529       Tile2->getHeader(),
1530       Tile2->getCond(),
1531       Tile2->getBody(),
1532       BodyCode,
1533       Tile2->getLatch(),
1534       Tile2->getExit(),
1535       Tile2->getAfter(),
1536       Tile1->getLatch(),
1537       Tile1->getExit(),
1538       Tile1->getAfter(),
1539       Floor2->getLatch(),
1540       Floor2->getExit(),
1541       Floor2->getAfter(),
1542       Floor1->getLatch(),
1543       Floor1->getExit(),
1544       Floor1->getAfter(),
1545   };
1546   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1547   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1548 }
1549 
1550 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
1551   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1552   OpenMPIRBuilder OMPBuilder(*M);
1553   OMPBuilder.initialize();
1554   F->setName("func");
1555 
1556   IRBuilder<> Builder(BB);
1557   Value *TripCount = F->getArg(0);
1558   Type *LCTy = TripCount->getType();
1559 
1560   Value *OuterStartVal = ConstantInt::get(LCTy, 2);
1561   Value *OuterStopVal = TripCount;
1562   Value *OuterStep = ConstantInt::get(LCTy, 5);
1563   Value *InnerStartVal = ConstantInt::get(LCTy, 13);
1564   Value *InnerStopVal = TripCount;
1565   Value *InnerStep = ConstantInt::get(LCTy, 3);
1566 
1567   // Fix an insertion point for ComputeIP.
1568   BasicBlock *LoopNextEnter =
1569       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1570                          Builder.GetInsertBlock()->getNextNode());
1571   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1572   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1573 
1574   InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
1575   OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
1576 
1577   BasicBlock *BodyCode = nullptr;
1578   CanonicalLoopInfo *InnerLoop = nullptr;
1579   CallInst *Call = nullptr;
1580   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1581                                 llvm::Value *OuterLC) {
1582     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1583                                   llvm::Value *InnerLC) {
1584       Builder.restoreIP(InnerCodeGenIP);
1585       BodyCode = Builder.GetInsertBlock();
1586 
1587       // Add something that consumes the induction variable to the body.
1588       Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1589     };
1590     InnerLoop = OMPBuilder.createCanonicalLoop(
1591         OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal,
1592         InnerStep, false, false, ComputeIP, "inner");
1593   };
1594   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1595       Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false,
1596       false, ComputeIP, "outer");
1597 
1598   // Finalize the function
1599   Builder.restoreIP(OuterLoop->getAfterIP());
1600   Builder.CreateRetVoid();
1601 
1602   // Tile the loop nest.
1603   Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
1604   Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
1605   std::vector<CanonicalLoopInfo *> GenLoops =
1606       OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
1607 
1608   OMPBuilder.finalize();
1609   EXPECT_FALSE(verifyModule(*M, &errs()));
1610 
1611   EXPECT_EQ(GenLoops.size(), 4u);
1612   CanonicalLoopInfo *Floor0 = GenLoops[0];
1613   CanonicalLoopInfo *Floor1 = GenLoops[1];
1614   CanonicalLoopInfo *Tile0 = GenLoops[2];
1615   CanonicalLoopInfo *Tile1 = GenLoops[3];
1616 
1617   BasicBlock *RefOrder[] = {
1618       Floor0->getPreheader(),
1619       Floor0->getHeader(),
1620       Floor0->getCond(),
1621       Floor0->getBody(),
1622       Floor1->getPreheader(),
1623       Floor1->getHeader(),
1624       Floor1->getCond(),
1625       Floor1->getBody(),
1626       Tile0->getPreheader(),
1627       Tile0->getHeader(),
1628       Tile0->getCond(),
1629       Tile0->getBody(),
1630       Tile1->getPreheader(),
1631       Tile1->getHeader(),
1632       Tile1->getCond(),
1633       Tile1->getBody(),
1634       BodyCode,
1635       Tile1->getLatch(),
1636       Tile1->getExit(),
1637       Tile1->getAfter(),
1638       Tile0->getLatch(),
1639       Tile0->getExit(),
1640       Tile0->getAfter(),
1641       Floor1->getLatch(),
1642       Floor1->getExit(),
1643       Floor1->getAfter(),
1644       Floor0->getLatch(),
1645       Floor0->getExit(),
1646       Floor0->getAfter(),
1647   };
1648   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1649   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1650 
1651   EXPECT_EQ(Call->getParent(), BodyCode);
1652 
1653   auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
1654   EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
1655   auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
1656   EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
1657   auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
1658   EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
1659   EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
1660   auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
1661   EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
1662   EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
1663   EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
1664 
1665   auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
1666   EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
1667   EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
1668   auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
1669   EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
1670   EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
1671   auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
1672   EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
1673   EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
1674   auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
1675   EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
1676   EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
1677   EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
1678 }
1679 
1680 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
1681   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1682   OpenMPIRBuilder OMPBuilder(*M);
1683   OMPBuilder.initialize();
1684   IRBuilder<> Builder(BB);
1685 
1686   // Create a loop, tile it, and extract its trip count. All input values are
1687   // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
1688   // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
1689   // do the same for the tile loop.
1690   auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1691                            bool IsSigned, bool InclusiveStop,
1692                            int64_t TileSize) -> uint64_t {
1693     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
1694     Type *LCTy = Type::getInt16Ty(Ctx);
1695     Value *StartVal = ConstantInt::get(LCTy, Start);
1696     Value *StopVal = ConstantInt::get(LCTy, Stop);
1697     Value *StepVal = ConstantInt::get(LCTy, Step);
1698 
1699     // Generate a loop.
1700     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
1701     CanonicalLoopInfo *Loop =
1702         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1703                                        StepVal, IsSigned, InclusiveStop);
1704     InsertPointTy AfterIP = Loop->getAfterIP();
1705 
1706     // Tile the loop.
1707     Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
1708     std::vector<CanonicalLoopInfo *> GenLoops =
1709         OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
1710 
1711     // Set the insertion pointer to after loop, where the next loop will be
1712     // emitted.
1713     Builder.restoreIP(AfterIP);
1714 
1715     // Extract the trip count.
1716     CanonicalLoopInfo *FloorLoop = GenLoops[0];
1717     Value *FloorTripCount = FloorLoop->getTripCount();
1718     return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
1719   };
1720 
1721   // Empty iteration domain.
1722   EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u);
1723   EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u);
1724   EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u);
1725   EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u);
1726   EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u);
1727 
1728   // Only complete tiles.
1729   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1730   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1731   EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u);
1732   EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u);
1733   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u);
1734   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u);
1735 
1736   // Only a partial tile.
1737   EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u);
1738   EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u);
1739   EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u);
1740   EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u);
1741   EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u);
1742 
1743   // Complete and partial tiles.
1744   EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u);
1745   EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u);
1746   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u);
1747   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u);
1748   EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u);
1749 
1750   // Close to 16-bit integer range.
1751   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu);
1752   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1);
1753   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1);
1754   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1);
1755   EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1);
1756   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u);
1757   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u);
1758 
1759   // Finalize the function.
1760   Builder.CreateRetVoid();
1761   OMPBuilder.finalize();
1762 
1763   EXPECT_FALSE(verifyModule(*M, &errs()));
1764 }
1765 
1766 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
1767   OpenMPIRBuilder OMPBuilder(*M);
1768 
1769   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1770 
1771   // Simd-ize the loop.
1772   OMPBuilder.applySimd(DL, CLI);
1773 
1774   OMPBuilder.finalize();
1775   EXPECT_FALSE(verifyModule(*M, &errs()));
1776 
1777   PassBuilder PB;
1778   FunctionAnalysisManager FAM;
1779   PB.registerFunctionAnalyses(FAM);
1780   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1781 
1782   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1783   EXPECT_EQ(TopLvl.size(), 1u);
1784 
1785   Loop *L = TopLvl.front();
1786   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1787   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1788 
1789   // Check for llvm.access.group metadata attached to the printf
1790   // function in the loop body.
1791   BasicBlock *LoopBody = CLI->getBody();
1792   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1793     return I.getMetadata("llvm.access.group") != nullptr;
1794   }));
1795 }
1796 
1797 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
1798   OpenMPIRBuilder OMPBuilder(*M);
1799 
1800   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1801 
1802   // Unroll the loop.
1803   OMPBuilder.unrollLoopFull(DL, CLI);
1804 
1805   OMPBuilder.finalize();
1806   EXPECT_FALSE(verifyModule(*M, &errs()));
1807 
1808   PassBuilder PB;
1809   FunctionAnalysisManager FAM;
1810   PB.registerFunctionAnalyses(FAM);
1811   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1812 
1813   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1814   EXPECT_EQ(TopLvl.size(), 1u);
1815 
1816   Loop *L = TopLvl.front();
1817   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
1818   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full"));
1819 }
1820 
1821 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
1822   OpenMPIRBuilder OMPBuilder(*M);
1823   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1824 
1825   // Unroll the loop.
1826   CanonicalLoopInfo *UnrolledLoop = nullptr;
1827   OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop);
1828   ASSERT_NE(UnrolledLoop, nullptr);
1829 
1830   OMPBuilder.finalize();
1831   EXPECT_FALSE(verifyModule(*M, &errs()));
1832   UnrolledLoop->assertOK();
1833 
1834   PassBuilder PB;
1835   FunctionAnalysisManager FAM;
1836   PB.registerFunctionAnalyses(FAM);
1837   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1838 
1839   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1840   EXPECT_EQ(TopLvl.size(), 1u);
1841   Loop *Outer = TopLvl.front();
1842   EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader());
1843   EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch());
1844   EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond());
1845   EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit());
1846 
1847   EXPECT_EQ(Outer->getSubLoops().size(), 1u);
1848   Loop *Inner = Outer->getSubLoops().front();
1849 
1850   EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable"));
1851   EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5);
1852 }
1853 
1854 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
1855   OpenMPIRBuilder OMPBuilder(*M);
1856 
1857   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1858 
1859   // Unroll the loop.
1860   OMPBuilder.unrollLoopHeuristic(DL, CLI);
1861 
1862   OMPBuilder.finalize();
1863   EXPECT_FALSE(verifyModule(*M, &errs()));
1864 
1865   PassBuilder PB;
1866   FunctionAnalysisManager FAM;
1867   PB.registerFunctionAnalyses(FAM);
1868   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1869 
1870   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1871   EXPECT_EQ(TopLvl.size(), 1u);
1872 
1873   Loop *L = TopLvl.front();
1874   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
1875 }
1876 
1877 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
1878   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1879   OpenMPIRBuilder OMPBuilder(*M);
1880   OMPBuilder.initialize();
1881   IRBuilder<> Builder(BB);
1882   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1883 
1884   Type *LCTy = Type::getInt32Ty(Ctx);
1885   Value *StartVal = ConstantInt::get(LCTy, 10);
1886   Value *StopVal = ConstantInt::get(LCTy, 52);
1887   Value *StepVal = ConstantInt::get(LCTy, 2);
1888   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
1889 
1890   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
1891       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
1892       /*IsSigned=*/false, /*InclusiveStop=*/false);
1893   BasicBlock *Preheader = CLI->getPreheader();
1894   BasicBlock *Body = CLI->getBody();
1895   Value *IV = CLI->getIndVar();
1896   BasicBlock *ExitBlock = CLI->getExit();
1897 
1898   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1899   InsertPointTy AllocaIP = Builder.saveIP();
1900 
1901   OMPBuilder.applyStaticWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true);
1902 
1903   BasicBlock *Cond = Body->getSinglePredecessor();
1904   Instruction *Cmp = &*Cond->begin();
1905   Value *TripCount = Cmp->getOperand(1);
1906 
1907   auto AllocaIter = BB->begin();
1908   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
1909   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
1910   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
1911   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
1912   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
1913   EXPECT_NE(PLastIter, nullptr);
1914   EXPECT_NE(PLowerBound, nullptr);
1915   EXPECT_NE(PUpperBound, nullptr);
1916   EXPECT_NE(PStride, nullptr);
1917 
1918   auto PreheaderIter = Preheader->begin();
1919   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7);
1920   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
1921   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
1922   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
1923   ASSERT_NE(LowerBoundStore, nullptr);
1924   ASSERT_NE(UpperBoundStore, nullptr);
1925   ASSERT_NE(StrideStore, nullptr);
1926 
1927   auto *OrigLowerBound =
1928       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
1929   auto *OrigUpperBound =
1930       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
1931   auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand());
1932   ASSERT_NE(OrigLowerBound, nullptr);
1933   ASSERT_NE(OrigUpperBound, nullptr);
1934   ASSERT_NE(OrigStride, nullptr);
1935   EXPECT_EQ(OrigLowerBound->getValue(), 0);
1936   EXPECT_EQ(OrigUpperBound->getValue(), 20);
1937   EXPECT_EQ(OrigStride->getValue(), 1);
1938 
1939   // Check that the loop IV is updated to account for the lower bound returned
1940   // by the OpenMP runtime call.
1941   BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front());
1942   EXPECT_EQ(Add->getOperand(0), IV);
1943   auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
1944   ASSERT_NE(LoadedLowerBound, nullptr);
1945   EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
1946 
1947   // Check that the trip count is updated to account for the lower and upper
1948   // bounds return by the OpenMP runtime call.
1949   auto *AddOne = dyn_cast<Instruction>(TripCount);
1950   ASSERT_NE(AddOne, nullptr);
1951   ASSERT_TRUE(AddOne->isBinaryOp());
1952   auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
1953   ASSERT_NE(One, nullptr);
1954   EXPECT_EQ(One->getValue(), 1);
1955   auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0));
1956   ASSERT_NE(Difference, nullptr);
1957   ASSERT_TRUE(Difference->isBinaryOp());
1958   EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound);
1959   auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0));
1960   ASSERT_NE(LoadedUpperBound, nullptr);
1961   EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound);
1962 
1963   // The original loop iterator should only be used in the condition, in the
1964   // increment and in the statement that adds the lower bound to it.
1965   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
1966 
1967   // The exit block should contain the "fini" call and the barrier call,
1968   // plus the call to obtain the thread ID.
1969   size_t NumCallsInExitBlock =
1970       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
1971   EXPECT_EQ(NumCallsInExitBlock, 3u);
1972 }
1973 
1974 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
1975   unsigned IVBits = GetParam();
1976 
1977   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1978   OpenMPIRBuilder OMPBuilder(*M);
1979 
1980   BasicBlock *Body;
1981   CallInst *Call;
1982   CanonicalLoopInfo *CLI =
1983       buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body);
1984 
1985   Instruction *OrigIndVar = CLI->getIndVar();
1986   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
1987 
1988   Type *LCTy = Type::getInt32Ty(Ctx);
1989   Value *ChunkSize = ConstantInt::get(LCTy, 5);
1990   InsertPointTy AllocaIP{&F->getEntryBlock(),
1991                          F->getEntryBlock().getFirstInsertionPt()};
1992   OMPBuilder.applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP,
1993                                              /*NeedsBarrier=*/true, ChunkSize);
1994 
1995   OMPBuilder.finalize();
1996   EXPECT_FALSE(verifyModule(*M, &errs()));
1997 
1998   BasicBlock *Entry = &F->getEntryBlock();
1999   BasicBlock *Preheader = Entry->getSingleSuccessor();
2000 
2001   BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor();
2002   BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor();
2003   BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor();
2004   BasicBlock *DispatchBody = succ_begin(DispatchCond)[0];
2005   BasicBlock *DispatchExit = succ_begin(DispatchCond)[1];
2006   BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor();
2007   BasicBlock *Return = DispatchAfter->getSingleSuccessor();
2008 
2009   BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor();
2010   BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor();
2011   BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor();
2012   BasicBlock *ChunkBody = succ_begin(ChunkCond)[0];
2013   BasicBlock *ChunkExit = succ_begin(ChunkCond)[1];
2014   BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor();
2015   BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor();
2016 
2017   BasicBlock *DispatchInc = ChunkAfter;
2018 
2019   EXPECT_EQ(ChunkBody, Body);
2020   EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader);
2021   EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader);
2022 
2023   EXPECT_TRUE(isa<ReturnInst>(Return->front()));
2024 
2025   Value *NewIV = Call->getOperand(1);
2026   EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits);
2027 
2028   CallInst *InitCall = findSingleCall(
2029       F,
2030       (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u
2031                     : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u,
2032       OMPBuilder);
2033   EXPECT_EQ(InitCall->getParent(), Preheader);
2034   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33);
2035   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1);
2036   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5);
2037 
2038   CallInst *FiniCall = findSingleCall(
2039       F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder);
2040   EXPECT_EQ(FiniCall->getParent(), DispatchExit);
2041 
2042   CallInst *BarrierCall = findSingleCall(
2043       F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder);
2044   EXPECT_EQ(BarrierCall->getParent(), DispatchExit);
2045 }
2046 
2047 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits,
2048                          ::testing::Values(8, 16, 32, 64));
2049 
2050 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
2051   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2052   OpenMPIRBuilder OMPBuilder(*M);
2053   OMPBuilder.initialize();
2054   IRBuilder<> Builder(BB);
2055   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2056 
2057   omp::OMPScheduleType SchedType = GetParam();
2058   uint32_t ChunkSize = 1;
2059   switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
2060   case omp::OMPScheduleType::DynamicChunked:
2061   case omp::OMPScheduleType::GuidedChunked:
2062     ChunkSize = 7;
2063     break;
2064   case omp::OMPScheduleType::Auto:
2065   case omp::OMPScheduleType::Runtime:
2066     ChunkSize = 1;
2067     break;
2068   default:
2069     assert(0 && "unknown type for this test");
2070     break;
2071   }
2072 
2073   Type *LCTy = Type::getInt32Ty(Ctx);
2074   Value *StartVal = ConstantInt::get(LCTy, 10);
2075   Value *StopVal = ConstantInt::get(LCTy, 52);
2076   Value *StepVal = ConstantInt::get(LCTy, 2);
2077   Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
2078   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2079 
2080   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2081       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2082       /*IsSigned=*/false, /*InclusiveStop=*/false);
2083 
2084   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2085   InsertPointTy AllocaIP = Builder.saveIP();
2086 
2087   // Collect all the info from CLI, as it isn't usable after the call to
2088   // createDynamicWorkshareLoop.
2089   InsertPointTy AfterIP = CLI->getAfterIP();
2090   BasicBlock *Preheader = CLI->getPreheader();
2091   BasicBlock *ExitBlock = CLI->getExit();
2092   Value *IV = CLI->getIndVar();
2093 
2094   InsertPointTy EndIP =
2095       OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType,
2096                                            /*NeedsBarrier=*/true, ChunkVal);
2097   // The returned value should be the "after" point.
2098   ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
2099   ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
2100 
2101   auto AllocaIter = BB->begin();
2102   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2103   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2104   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2105   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2106   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2107   EXPECT_NE(PLastIter, nullptr);
2108   EXPECT_NE(PLowerBound, nullptr);
2109   EXPECT_NE(PUpperBound, nullptr);
2110   EXPECT_NE(PStride, nullptr);
2111 
2112   auto PreheaderIter = Preheader->begin();
2113   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6);
2114   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2115   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2116   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2117   ASSERT_NE(LowerBoundStore, nullptr);
2118   ASSERT_NE(UpperBoundStore, nullptr);
2119   ASSERT_NE(StrideStore, nullptr);
2120 
2121   CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++));
2122   ASSERT_NE(ThreadIdCall, nullptr);
2123   EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(),
2124             "__kmpc_global_thread_num");
2125 
2126   CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter);
2127 
2128   ASSERT_NE(InitCall, nullptr);
2129   EXPECT_EQ(InitCall->getCalledFunction()->getName(),
2130             "__kmpc_dispatch_init_4u");
2131   EXPECT_EQ(InitCall->arg_size(), 7U);
2132   EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize));
2133   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2134   EXPECT_EQ(SchedVal->getValue(), static_cast<uint64_t>(SchedType));
2135 
2136   ConstantInt *OrigLowerBound =
2137       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2138   ConstantInt *OrigUpperBound =
2139       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2140   ConstantInt *OrigStride =
2141       dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2142   ASSERT_NE(OrigLowerBound, nullptr);
2143   ASSERT_NE(OrigUpperBound, nullptr);
2144   ASSERT_NE(OrigStride, nullptr);
2145   EXPECT_EQ(OrigLowerBound->getValue(), 1);
2146   EXPECT_EQ(OrigUpperBound->getValue(), 21);
2147   EXPECT_EQ(OrigStride->getValue(), 1);
2148 
2149   // The original loop iterator should only be used in the condition, in the
2150   // increment and in the statement that adds the lower bound to it.
2151   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2152 
2153   // The exit block should contain the barrier call, plus the call to obtain
2154   // the thread ID.
2155   size_t NumCallsInExitBlock =
2156       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2157   EXPECT_EQ(NumCallsInExitBlock, 2u);
2158 
2159   // Add a termination to our block and check that it is internally consistent.
2160   Builder.restoreIP(EndIP);
2161   Builder.CreateRetVoid();
2162   OMPBuilder.finalize();
2163   EXPECT_FALSE(verifyModule(*M, &errs()));
2164 }
2165 
2166 INSTANTIATE_TEST_SUITE_P(
2167     OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams,
2168     ::testing::Values(omp::OMPScheduleType::DynamicChunked,
2169                       omp::OMPScheduleType::GuidedChunked,
2170                       omp::OMPScheduleType::Auto, omp::OMPScheduleType::Runtime,
2171                       omp::OMPScheduleType::DynamicChunked |
2172                           omp::OMPScheduleType::ModifierMonotonic,
2173                       omp::OMPScheduleType::DynamicChunked |
2174                           omp::OMPScheduleType::ModifierNonmonotonic,
2175                       omp::OMPScheduleType::GuidedChunked |
2176                           omp::OMPScheduleType::ModifierMonotonic,
2177                       omp::OMPScheduleType::GuidedChunked |
2178                           omp::OMPScheduleType::ModifierNonmonotonic,
2179                       omp::OMPScheduleType::Auto |
2180                           omp::OMPScheduleType::ModifierMonotonic,
2181                       omp::OMPScheduleType::Runtime |
2182                           omp::OMPScheduleType::ModifierMonotonic));
2183 
2184 TEST_F(OpenMPIRBuilderTest, MasterDirective) {
2185   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2186   OpenMPIRBuilder OMPBuilder(*M);
2187   OMPBuilder.initialize();
2188   F->setName("func");
2189   IRBuilder<> Builder(BB);
2190 
2191   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2192 
2193   AllocaInst *PrivAI = nullptr;
2194 
2195   BasicBlock *EntryBB = nullptr;
2196   BasicBlock *ExitBB = nullptr;
2197   BasicBlock *ThenBB = nullptr;
2198 
2199   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2200                        BasicBlock &FiniBB) {
2201     if (AllocaIP.isSet())
2202       Builder.restoreIP(AllocaIP);
2203     else
2204       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2205     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2206     Builder.CreateStore(F->arg_begin(), PrivAI);
2207 
2208     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2209     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2210     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2211 
2212     Builder.restoreIP(CodeGenIP);
2213 
2214     // collect some info for checks later
2215     ExitBB = FiniBB.getUniqueSuccessor();
2216     ThenBB = Builder.GetInsertBlock();
2217     EntryBB = ThenBB->getUniquePredecessor();
2218 
2219     // simple instructions for body
2220     Value *PrivLoad =
2221         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2222     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2223   };
2224 
2225   auto FiniCB = [&](InsertPointTy IP) {
2226     BasicBlock *IPBB = IP.getBlock();
2227     EXPECT_NE(IPBB->end(), IP.getPoint());
2228   };
2229 
2230   Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
2231   Value *EntryBBTI = EntryBB->getTerminator();
2232   EXPECT_NE(EntryBBTI, nullptr);
2233   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2234   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2235   EXPECT_TRUE(EntryBr->isConditional());
2236   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2237   EXPECT_EQ(ThenBB->getUniqueSuccessor(), ExitBB);
2238   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2239 
2240   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2241   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2242 
2243   CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0));
2244   EXPECT_EQ(MasterEntryCI->arg_size(), 2U);
2245   EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master");
2246   EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0)));
2247 
2248   CallInst *MasterEndCI = nullptr;
2249   for (auto &FI : *ThenBB) {
2250     Instruction *cur = &FI;
2251     if (isa<CallInst>(cur)) {
2252       MasterEndCI = cast<CallInst>(cur);
2253       if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master")
2254         break;
2255       MasterEndCI = nullptr;
2256     }
2257   }
2258   EXPECT_NE(MasterEndCI, nullptr);
2259   EXPECT_EQ(MasterEndCI->arg_size(), 2U);
2260   EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0)));
2261   EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1));
2262 }
2263 
2264 TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
2265   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2266   OpenMPIRBuilder OMPBuilder(*M);
2267   OMPBuilder.initialize();
2268   F->setName("func");
2269   IRBuilder<> Builder(BB);
2270 
2271   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2272 
2273   AllocaInst *PrivAI = nullptr;
2274 
2275   BasicBlock *EntryBB = nullptr;
2276   BasicBlock *ExitBB = nullptr;
2277   BasicBlock *ThenBB = nullptr;
2278 
2279   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2280                        BasicBlock &FiniBB) {
2281     if (AllocaIP.isSet())
2282       Builder.restoreIP(AllocaIP);
2283     else
2284       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2285     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2286     Builder.CreateStore(F->arg_begin(), PrivAI);
2287 
2288     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2289     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2290     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2291 
2292     Builder.restoreIP(CodeGenIP);
2293 
2294     // collect some info for checks later
2295     ExitBB = FiniBB.getUniqueSuccessor();
2296     ThenBB = Builder.GetInsertBlock();
2297     EntryBB = ThenBB->getUniquePredecessor();
2298 
2299     // simple instructions for body
2300     Value *PrivLoad =
2301         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2302     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2303   };
2304 
2305   auto FiniCB = [&](InsertPointTy IP) {
2306     BasicBlock *IPBB = IP.getBlock();
2307     EXPECT_NE(IPBB->end(), IP.getPoint());
2308   };
2309 
2310   Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
2311   Builder.restoreIP(
2312       OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, Filter));
2313   Value *EntryBBTI = EntryBB->getTerminator();
2314   EXPECT_NE(EntryBBTI, nullptr);
2315   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2316   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2317   EXPECT_TRUE(EntryBr->isConditional());
2318   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2319   EXPECT_EQ(ThenBB->getUniqueSuccessor(), ExitBB);
2320   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2321 
2322   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2323   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2324 
2325   CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0));
2326   EXPECT_EQ(MaskedEntryCI->arg_size(), 3U);
2327   EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked");
2328   EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0)));
2329 
2330   CallInst *MaskedEndCI = nullptr;
2331   for (auto &FI : *ThenBB) {
2332     Instruction *cur = &FI;
2333     if (isa<CallInst>(cur)) {
2334       MaskedEndCI = cast<CallInst>(cur);
2335       if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked")
2336         break;
2337       MaskedEndCI = nullptr;
2338     }
2339   }
2340   EXPECT_NE(MaskedEndCI, nullptr);
2341   EXPECT_EQ(MaskedEndCI->arg_size(), 2U);
2342   EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0)));
2343   EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1));
2344 }
2345 
2346 TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
2347   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2348   OpenMPIRBuilder OMPBuilder(*M);
2349   OMPBuilder.initialize();
2350   F->setName("func");
2351   IRBuilder<> Builder(BB);
2352 
2353   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2354 
2355   AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2356 
2357   BasicBlock *EntryBB = nullptr;
2358 
2359   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2360                        BasicBlock &FiniBB) {
2361     // collect some info for checks later
2362     EntryBB = FiniBB.getUniquePredecessor();
2363 
2364     // actual start for bodyCB
2365     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2366     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2367     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2368     EXPECT_EQ(EntryBB, CodeGenIPBB);
2369 
2370     // body begin
2371     Builder.restoreIP(CodeGenIP);
2372     Builder.CreateStore(F->arg_begin(), PrivAI);
2373     Value *PrivLoad =
2374         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2375     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2376   };
2377 
2378   auto FiniCB = [&](InsertPointTy IP) {
2379     BasicBlock *IPBB = IP.getBlock();
2380     EXPECT_NE(IPBB->end(), IP.getPoint());
2381   };
2382 
2383   Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB,
2384                                               "testCRT", nullptr));
2385 
2386   Value *EntryBBTI = EntryBB->getTerminator();
2387   EXPECT_EQ(EntryBBTI, nullptr);
2388 
2389   CallInst *CriticalEntryCI = nullptr;
2390   for (auto &EI : *EntryBB) {
2391     Instruction *cur = &EI;
2392     if (isa<CallInst>(cur)) {
2393       CriticalEntryCI = cast<CallInst>(cur);
2394       if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical")
2395         break;
2396       CriticalEntryCI = nullptr;
2397     }
2398   }
2399   EXPECT_NE(CriticalEntryCI, nullptr);
2400   EXPECT_EQ(CriticalEntryCI->arg_size(), 3U);
2401   EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical");
2402   EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0)));
2403 
2404   CallInst *CriticalEndCI = nullptr;
2405   for (auto &FI : *EntryBB) {
2406     Instruction *cur = &FI;
2407     if (isa<CallInst>(cur)) {
2408       CriticalEndCI = cast<CallInst>(cur);
2409       if (CriticalEndCI->getCalledFunction()->getName() ==
2410           "__kmpc_end_critical")
2411         break;
2412       CriticalEndCI = nullptr;
2413     }
2414   }
2415   EXPECT_NE(CriticalEndCI, nullptr);
2416   EXPECT_EQ(CriticalEndCI->arg_size(), 3U);
2417   EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0)));
2418   EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1));
2419   PointerType *CriticalNamePtrTy =
2420       PointerType::getUnqual(ArrayType::get(Type::getInt32Ty(Ctx), 8));
2421   EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2));
2422   EXPECT_EQ(CriticalEndCI->getArgOperand(2)->getType(), CriticalNamePtrTy);
2423 }
2424 
2425 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
2426   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2427   OpenMPIRBuilder OMPBuilder(*M);
2428   OMPBuilder.initialize();
2429   F->setName("func");
2430   IRBuilder<> Builder(BB);
2431   LLVMContext &Ctx = M->getContext();
2432 
2433   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2434 
2435   InsertPointTy AllocaIP(&F->getEntryBlock(),
2436                          F->getEntryBlock().getFirstInsertionPt());
2437 
2438   unsigned NumLoops = 2;
2439   SmallVector<Value *, 2> StoreValues;
2440   Type *LCTy = Type::getInt64Ty(Ctx);
2441   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
2442   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
2443 
2444   // Test for "#omp ordered depend(source)"
2445   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
2446                                                    StoreValues, ".cnt.addr",
2447                                                    /*IsDependSource=*/true));
2448 
2449   Builder.CreateRetVoid();
2450   OMPBuilder.finalize();
2451   EXPECT_FALSE(verifyModule(*M, &errs()));
2452 
2453   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
2454   ASSERT_NE(AllocInst, nullptr);
2455   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
2456   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
2457   EXPECT_TRUE(
2458       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
2459 
2460   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
2461   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
2462     GetElementPtrInst *DependAddrGEPIter =
2463         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2464     ASSERT_NE(DependAddrGEPIter, nullptr);
2465     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
2466     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
2467     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
2468     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
2469     ASSERT_NE(FirstIdx, nullptr);
2470     ASSERT_NE(SecondIdx, nullptr);
2471     EXPECT_EQ(FirstIdx->getValue(), 0);
2472     EXPECT_EQ(SecondIdx->getValue(), Iter);
2473     StoreInst *StoreValue =
2474         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
2475     ASSERT_NE(StoreValue, nullptr);
2476     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
2477     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
2478     EXPECT_EQ(StoreValue->getAlignment(), 8UL);
2479     IterInst = dyn_cast<Instruction>(StoreValue);
2480   }
2481 
2482   GetElementPtrInst *DependBaseAddrGEP =
2483       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2484   ASSERT_NE(DependBaseAddrGEP, nullptr);
2485   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
2486   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
2487   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
2488   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
2489   ASSERT_NE(FirstIdx, nullptr);
2490   ASSERT_NE(SecondIdx, nullptr);
2491   EXPECT_EQ(FirstIdx->getValue(), 0);
2492   EXPECT_EQ(SecondIdx->getValue(), 0);
2493 
2494   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
2495   ASSERT_NE(GTID, nullptr);
2496   EXPECT_EQ(GTID->arg_size(), 1U);
2497   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
2498   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
2499   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
2500 
2501   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
2502   ASSERT_NE(Depend, nullptr);
2503   EXPECT_EQ(Depend->arg_size(), 3U);
2504   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post");
2505   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
2506   EXPECT_EQ(Depend->getArgOperand(1), GTID);
2507   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
2508 }
2509 
2510 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
2511   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2512   OpenMPIRBuilder OMPBuilder(*M);
2513   OMPBuilder.initialize();
2514   F->setName("func");
2515   IRBuilder<> Builder(BB);
2516   LLVMContext &Ctx = M->getContext();
2517 
2518   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2519 
2520   InsertPointTy AllocaIP(&F->getEntryBlock(),
2521                          F->getEntryBlock().getFirstInsertionPt());
2522 
2523   unsigned NumLoops = 2;
2524   SmallVector<Value *, 2> StoreValues;
2525   Type *LCTy = Type::getInt64Ty(Ctx);
2526   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
2527   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
2528 
2529   // Test for "#omp ordered depend(sink: vec)"
2530   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
2531                                                    StoreValues, ".cnt.addr",
2532                                                    /*IsDependSource=*/false));
2533 
2534   Builder.CreateRetVoid();
2535   OMPBuilder.finalize();
2536   EXPECT_FALSE(verifyModule(*M, &errs()));
2537 
2538   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
2539   ASSERT_NE(AllocInst, nullptr);
2540   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
2541   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
2542   EXPECT_TRUE(
2543       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
2544 
2545   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
2546   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
2547     GetElementPtrInst *DependAddrGEPIter =
2548         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2549     ASSERT_NE(DependAddrGEPIter, nullptr);
2550     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
2551     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
2552     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
2553     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
2554     ASSERT_NE(FirstIdx, nullptr);
2555     ASSERT_NE(SecondIdx, nullptr);
2556     EXPECT_EQ(FirstIdx->getValue(), 0);
2557     EXPECT_EQ(SecondIdx->getValue(), Iter);
2558     StoreInst *StoreValue =
2559         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
2560     ASSERT_NE(StoreValue, nullptr);
2561     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
2562     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
2563     EXPECT_EQ(StoreValue->getAlignment(), 8UL);
2564     IterInst = dyn_cast<Instruction>(StoreValue);
2565   }
2566 
2567   GetElementPtrInst *DependBaseAddrGEP =
2568       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2569   ASSERT_NE(DependBaseAddrGEP, nullptr);
2570   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
2571   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
2572   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
2573   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
2574   ASSERT_NE(FirstIdx, nullptr);
2575   ASSERT_NE(SecondIdx, nullptr);
2576   EXPECT_EQ(FirstIdx->getValue(), 0);
2577   EXPECT_EQ(SecondIdx->getValue(), 0);
2578 
2579   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
2580   ASSERT_NE(GTID, nullptr);
2581   EXPECT_EQ(GTID->arg_size(), 1U);
2582   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
2583   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
2584   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
2585 
2586   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
2587   ASSERT_NE(Depend, nullptr);
2588   EXPECT_EQ(Depend->arg_size(), 3U);
2589   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait");
2590   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
2591   EXPECT_EQ(Depend->getArgOperand(1), GTID);
2592   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
2593 }
2594 
2595 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
2596   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2597   OpenMPIRBuilder OMPBuilder(*M);
2598   OMPBuilder.initialize();
2599   F->setName("func");
2600   IRBuilder<> Builder(BB);
2601 
2602   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2603 
2604   AllocaInst *PrivAI =
2605       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
2606 
2607   BasicBlock *EntryBB = nullptr;
2608 
2609   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2610                        BasicBlock &FiniBB) {
2611     EntryBB = FiniBB.getUniquePredecessor();
2612 
2613     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2614     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2615     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2616     EXPECT_EQ(EntryBB, CodeGenIPBB);
2617 
2618     Builder.restoreIP(CodeGenIP);
2619     Builder.CreateStore(F->arg_begin(), PrivAI);
2620     Value *PrivLoad =
2621         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2622     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2623   };
2624 
2625   auto FiniCB = [&](InsertPointTy IP) {
2626     BasicBlock *IPBB = IP.getBlock();
2627     EXPECT_NE(IPBB->end(), IP.getPoint());
2628   };
2629 
2630   // Test for "#omp ordered [threads]"
2631   Builder.restoreIP(
2632       OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true));
2633 
2634   Builder.CreateRetVoid();
2635   OMPBuilder.finalize();
2636   EXPECT_FALSE(verifyModule(*M, &errs()));
2637 
2638   EXPECT_NE(EntryBB->getTerminator(), nullptr);
2639 
2640   CallInst *OrderedEntryCI = nullptr;
2641   for (auto &EI : *EntryBB) {
2642     Instruction *Cur = &EI;
2643     if (isa<CallInst>(Cur)) {
2644       OrderedEntryCI = cast<CallInst>(Cur);
2645       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
2646         break;
2647       OrderedEntryCI = nullptr;
2648     }
2649   }
2650   EXPECT_NE(OrderedEntryCI, nullptr);
2651   EXPECT_EQ(OrderedEntryCI->arg_size(), 2U);
2652   EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
2653   EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
2654 
2655   CallInst *OrderedEndCI = nullptr;
2656   for (auto &FI : *EntryBB) {
2657     Instruction *Cur = &FI;
2658     if (isa<CallInst>(Cur)) {
2659       OrderedEndCI = cast<CallInst>(Cur);
2660       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
2661         break;
2662       OrderedEndCI = nullptr;
2663     }
2664   }
2665   EXPECT_NE(OrderedEndCI, nullptr);
2666   EXPECT_EQ(OrderedEndCI->arg_size(), 2U);
2667   EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
2668   EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
2669 }
2670 
2671 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
2672   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2673   OpenMPIRBuilder OMPBuilder(*M);
2674   OMPBuilder.initialize();
2675   F->setName("func");
2676   IRBuilder<> Builder(BB);
2677 
2678   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2679 
2680   AllocaInst *PrivAI =
2681       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
2682 
2683   BasicBlock *EntryBB = nullptr;
2684 
2685   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2686                        BasicBlock &FiniBB) {
2687     EntryBB = FiniBB.getUniquePredecessor();
2688 
2689     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2690     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2691     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2692     EXPECT_EQ(EntryBB, CodeGenIPBB);
2693 
2694     Builder.restoreIP(CodeGenIP);
2695     Builder.CreateStore(F->arg_begin(), PrivAI);
2696     Value *PrivLoad =
2697         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2698     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2699   };
2700 
2701   auto FiniCB = [&](InsertPointTy IP) {
2702     BasicBlock *IPBB = IP.getBlock();
2703     EXPECT_NE(IPBB->end(), IP.getPoint());
2704   };
2705 
2706   // Test for "#omp ordered simd"
2707   Builder.restoreIP(
2708       OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false));
2709 
2710   Builder.CreateRetVoid();
2711   OMPBuilder.finalize();
2712   EXPECT_FALSE(verifyModule(*M, &errs()));
2713 
2714   EXPECT_NE(EntryBB->getTerminator(), nullptr);
2715 
2716   CallInst *OrderedEntryCI = nullptr;
2717   for (auto &EI : *EntryBB) {
2718     Instruction *Cur = &EI;
2719     if (isa<CallInst>(Cur)) {
2720       OrderedEntryCI = cast<CallInst>(Cur);
2721       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
2722         break;
2723       OrderedEntryCI = nullptr;
2724     }
2725   }
2726   EXPECT_EQ(OrderedEntryCI, nullptr);
2727 
2728   CallInst *OrderedEndCI = nullptr;
2729   for (auto &FI : *EntryBB) {
2730     Instruction *Cur = &FI;
2731     if (isa<CallInst>(Cur)) {
2732       OrderedEndCI = cast<CallInst>(Cur);
2733       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
2734         break;
2735       OrderedEndCI = nullptr;
2736     }
2737   }
2738   EXPECT_EQ(OrderedEndCI, nullptr);
2739 }
2740 
2741 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
2742   OpenMPIRBuilder OMPBuilder(*M);
2743   OMPBuilder.initialize();
2744   F->setName("func");
2745   IRBuilder<> Builder(BB);
2746 
2747   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2748 
2749   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
2750   AllocaInst *MasterAddress = Builder.CreateAlloca(Int32->getPointerTo());
2751   AllocaInst *PrivAddress = Builder.CreateAlloca(Int32->getPointerTo());
2752 
2753   BasicBlock *EntryBB = BB;
2754 
2755   OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress,
2756                                       PrivAddress, Int32, /*BranchtoEnd*/ true);
2757 
2758   BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator());
2759 
2760   EXPECT_NE(EntryBr, nullptr);
2761   EXPECT_TRUE(EntryBr->isConditional());
2762 
2763   BasicBlock *NotMasterBB = EntryBr->getSuccessor(0);
2764   BasicBlock *CopyinEnd = EntryBr->getSuccessor(1);
2765   CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition());
2766 
2767   EXPECT_NE(CMP, nullptr);
2768   EXPECT_NE(NotMasterBB, nullptr);
2769   EXPECT_NE(CopyinEnd, nullptr);
2770 
2771   BranchInst *NotMasterBr =
2772       dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator());
2773   EXPECT_NE(NotMasterBr, nullptr);
2774   EXPECT_FALSE(NotMasterBr->isConditional());
2775   EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0));
2776 }
2777 
2778 TEST_F(OpenMPIRBuilderTest, SingleDirective) {
2779   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2780   OpenMPIRBuilder OMPBuilder(*M);
2781   OMPBuilder.initialize();
2782   F->setName("func");
2783   IRBuilder<> Builder(BB);
2784 
2785   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2786 
2787   AllocaInst *PrivAI = nullptr;
2788 
2789   BasicBlock *EntryBB = nullptr;
2790   BasicBlock *ExitBB = nullptr;
2791   BasicBlock *ThenBB = nullptr;
2792 
2793   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2794                        BasicBlock &FiniBB) {
2795     if (AllocaIP.isSet())
2796       Builder.restoreIP(AllocaIP);
2797     else
2798       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2799     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2800     Builder.CreateStore(F->arg_begin(), PrivAI);
2801 
2802     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2803     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2804     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2805 
2806     Builder.restoreIP(CodeGenIP);
2807 
2808     // collect some info for checks later
2809     ExitBB = FiniBB.getUniqueSuccessor();
2810     ThenBB = Builder.GetInsertBlock();
2811     EntryBB = ThenBB->getUniquePredecessor();
2812 
2813     // simple instructions for body
2814     Value *PrivLoad =
2815         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2816     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2817   };
2818 
2819   auto FiniCB = [&](InsertPointTy IP) {
2820     BasicBlock *IPBB = IP.getBlock();
2821     EXPECT_NE(IPBB->end(), IP.getPoint());
2822   };
2823 
2824   Builder.restoreIP(
2825       OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*DidIt*/ nullptr));
2826   Value *EntryBBTI = EntryBB->getTerminator();
2827   EXPECT_NE(EntryBBTI, nullptr);
2828   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2829   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2830   EXPECT_TRUE(EntryBr->isConditional());
2831   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2832   EXPECT_EQ(ThenBB->getUniqueSuccessor(), ExitBB);
2833   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2834 
2835   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2836   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2837 
2838   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
2839   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
2840   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
2841   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
2842 
2843   CallInst *SingleEndCI = nullptr;
2844   for (auto &FI : *ThenBB) {
2845     Instruction *cur = &FI;
2846     if (isa<CallInst>(cur)) {
2847       SingleEndCI = cast<CallInst>(cur);
2848       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
2849         break;
2850       SingleEndCI = nullptr;
2851     }
2852   }
2853   EXPECT_NE(SingleEndCI, nullptr);
2854   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
2855   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
2856   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
2857 }
2858 
2859 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
2860   OpenMPIRBuilder OMPBuilder(*M);
2861   OMPBuilder.initialize();
2862   F->setName("func");
2863   IRBuilder<> Builder(BB);
2864 
2865   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2866 
2867   Type *Float32 = Type::getFloatTy(M->getContext());
2868   AllocaInst *XVal = Builder.CreateAlloca(Float32);
2869   XVal->setName("AtomicVar");
2870   AllocaInst *VVal = Builder.CreateAlloca(Float32);
2871   VVal->setName("AtomicRead");
2872   AtomicOrdering AO = AtomicOrdering::Monotonic;
2873   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
2874   OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false};
2875 
2876   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
2877 
2878   IntegerType *IntCastTy =
2879       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
2880 
2881   BitCastInst *CastFrmFlt = cast<BitCastInst>(VVal->getNextNode());
2882   EXPECT_EQ(CastFrmFlt->getSrcTy(), Float32->getPointerTo());
2883   EXPECT_EQ(CastFrmFlt->getDestTy(), IntCastTy->getPointerTo());
2884   EXPECT_EQ(CastFrmFlt->getOperand(0), XVal);
2885 
2886   LoadInst *AtomicLoad = cast<LoadInst>(CastFrmFlt->getNextNode());
2887   EXPECT_TRUE(AtomicLoad->isAtomic());
2888   EXPECT_EQ(AtomicLoad->getPointerOperand(), CastFrmFlt);
2889 
2890   BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
2891   EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
2892   EXPECT_EQ(CastToFlt->getDestTy(), Float32);
2893   EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
2894 
2895   StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode());
2896   EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt);
2897   EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
2898 
2899   Builder.CreateRetVoid();
2900   OMPBuilder.finalize();
2901   EXPECT_FALSE(verifyModule(*M, &errs()));
2902 }
2903 
2904 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
2905   OpenMPIRBuilder OMPBuilder(*M);
2906   OMPBuilder.initialize();
2907   F->setName("func");
2908   IRBuilder<> Builder(BB);
2909 
2910   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2911 
2912   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
2913   AllocaInst *XVal = Builder.CreateAlloca(Int32);
2914   XVal->setName("AtomicVar");
2915   AllocaInst *VVal = Builder.CreateAlloca(Int32);
2916   VVal->setName("AtomicRead");
2917   AtomicOrdering AO = AtomicOrdering::Monotonic;
2918   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
2919   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
2920 
2921   BasicBlock *EntryBB = BB;
2922 
2923   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
2924   LoadInst *AtomicLoad = nullptr;
2925   StoreInst *StoreofAtomic = nullptr;
2926 
2927   for (Instruction &Cur : *EntryBB) {
2928     if (isa<LoadInst>(Cur)) {
2929       AtomicLoad = cast<LoadInst>(&Cur);
2930       if (AtomicLoad->getPointerOperand() == XVal)
2931         continue;
2932       AtomicLoad = nullptr;
2933     } else if (isa<StoreInst>(Cur)) {
2934       StoreofAtomic = cast<StoreInst>(&Cur);
2935       if (StoreofAtomic->getPointerOperand() == VVal)
2936         continue;
2937       StoreofAtomic = nullptr;
2938     }
2939   }
2940 
2941   EXPECT_NE(AtomicLoad, nullptr);
2942   EXPECT_TRUE(AtomicLoad->isAtomic());
2943 
2944   EXPECT_NE(StoreofAtomic, nullptr);
2945   EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
2946 
2947   Builder.CreateRetVoid();
2948   OMPBuilder.finalize();
2949 
2950   EXPECT_FALSE(verifyModule(*M, &errs()));
2951 }
2952 
2953 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
2954   OpenMPIRBuilder OMPBuilder(*M);
2955   OMPBuilder.initialize();
2956   F->setName("func");
2957   IRBuilder<> Builder(BB);
2958 
2959   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2960 
2961   LLVMContext &Ctx = M->getContext();
2962   Type *Float32 = Type::getFloatTy(Ctx);
2963   AllocaInst *XVal = Builder.CreateAlloca(Float32);
2964   XVal->setName("AtomicVar");
2965   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
2966   AtomicOrdering AO = AtomicOrdering::Monotonic;
2967   Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
2968 
2969   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
2970 
2971   IntegerType *IntCastTy =
2972       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
2973 
2974   BitCastInst *CastFrmFlt = cast<BitCastInst>(XVal->getNextNode());
2975   EXPECT_EQ(CastFrmFlt->getSrcTy(), Float32->getPointerTo());
2976   EXPECT_EQ(CastFrmFlt->getDestTy(), IntCastTy->getPointerTo());
2977   EXPECT_EQ(CastFrmFlt->getOperand(0), XVal);
2978 
2979   Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
2980 
2981   StoreInst *StoreofAtomic = cast<StoreInst>(CastFrmFlt->getNextNode());
2982   EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast);
2983   EXPECT_EQ(StoreofAtomic->getPointerOperand(), CastFrmFlt);
2984   EXPECT_TRUE(StoreofAtomic->isAtomic());
2985 
2986   Builder.CreateRetVoid();
2987   OMPBuilder.finalize();
2988   EXPECT_FALSE(verifyModule(*M, &errs()));
2989 }
2990 
2991 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
2992   OpenMPIRBuilder OMPBuilder(*M);
2993   OMPBuilder.initialize();
2994   F->setName("func");
2995   IRBuilder<> Builder(BB);
2996 
2997   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2998 
2999   LLVMContext &Ctx = M->getContext();
3000   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3001   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3002   XVal->setName("AtomicVar");
3003   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3004   AtomicOrdering AO = AtomicOrdering::Monotonic;
3005   ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3006 
3007   BasicBlock *EntryBB = BB;
3008 
3009   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3010 
3011   StoreInst *StoreofAtomic = nullptr;
3012 
3013   for (Instruction &Cur : *EntryBB) {
3014     if (isa<StoreInst>(Cur)) {
3015       StoreofAtomic = cast<StoreInst>(&Cur);
3016       if (StoreofAtomic->getPointerOperand() == XVal)
3017         continue;
3018       StoreofAtomic = nullptr;
3019     }
3020   }
3021 
3022   EXPECT_NE(StoreofAtomic, nullptr);
3023   EXPECT_TRUE(StoreofAtomic->isAtomic());
3024   EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite);
3025 
3026   Builder.CreateRetVoid();
3027   OMPBuilder.finalize();
3028   EXPECT_FALSE(verifyModule(*M, &errs()));
3029 }
3030 
3031 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
3032   OpenMPIRBuilder OMPBuilder(*M);
3033   OMPBuilder.initialize();
3034   F->setName("func");
3035   IRBuilder<> Builder(BB);
3036 
3037   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3038 
3039   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3040   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3041   XVal->setName("AtomicVar");
3042   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3043   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3044   AtomicOrdering AO = AtomicOrdering::Monotonic;
3045   ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3046   Value *Expr = nullptr;
3047   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub;
3048   bool IsXLHSInRHSPart = false;
3049 
3050   BasicBlock *EntryBB = BB;
3051   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3052                                           EntryBB->getFirstInsertionPt());
3053   Value *Sub = nullptr;
3054 
3055   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3056     Sub = IRB.CreateSub(ConstVal, Atomic);
3057     return Sub;
3058   };
3059   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3060       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3061   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3062   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3063   EXPECT_NE(ContTI, nullptr);
3064   BasicBlock *EndBB = ContTI->getSuccessor(0);
3065   EXPECT_TRUE(ContTI->isConditional());
3066   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3067   EXPECT_NE(EndBB, nullptr);
3068 
3069   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3070   EXPECT_NE(Phi, nullptr);
3071   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3072   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3073   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3074 
3075   EXPECT_EQ(Sub->getNumUses(), 1U);
3076   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3077   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3078 
3079   ExtractValueInst *ExVI1 =
3080       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3081   EXPECT_NE(ExVI1, nullptr);
3082   AtomicCmpXchgInst *CmpExchg =
3083       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3084   EXPECT_NE(CmpExchg, nullptr);
3085   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3086   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3087   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3088 
3089   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3090   EXPECT_NE(Ld, nullptr);
3091   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3092 
3093   Builder.CreateRetVoid();
3094   OMPBuilder.finalize();
3095   EXPECT_FALSE(verifyModule(*M, &errs()));
3096 }
3097 
3098 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
3099   OpenMPIRBuilder OMPBuilder(*M);
3100   OMPBuilder.initialize();
3101   F->setName("func");
3102   IRBuilder<> Builder(BB);
3103 
3104   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3105 
3106   Type *FloatTy = Type::getFloatTy(M->getContext());
3107   AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
3108   XVal->setName("AtomicVar");
3109   Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal);
3110   OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
3111   AtomicOrdering AO = AtomicOrdering::Monotonic;
3112   Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0);
3113   Value *Expr = nullptr;
3114   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub;
3115   bool IsXLHSInRHSPart = false;
3116 
3117   BasicBlock *EntryBB = BB;
3118   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3119                                           EntryBB->getFirstInsertionPt());
3120   Value *Sub = nullptr;
3121 
3122   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3123     Sub = IRB.CreateFSub(ConstVal, Atomic);
3124     return Sub;
3125   };
3126   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3127       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3128   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3129   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3130   EXPECT_NE(ContTI, nullptr);
3131   BasicBlock *EndBB = ContTI->getSuccessor(0);
3132   EXPECT_TRUE(ContTI->isConditional());
3133   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3134   EXPECT_NE(EndBB, nullptr);
3135 
3136   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3137   EXPECT_NE(Phi, nullptr);
3138   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3139   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3140   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3141 
3142   EXPECT_EQ(Sub->getNumUses(), 1U);
3143   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3144   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3145 
3146   ExtractValueInst *ExVI1 =
3147       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3148   EXPECT_NE(ExVI1, nullptr);
3149   AtomicCmpXchgInst *CmpExchg =
3150       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3151   EXPECT_NE(CmpExchg, nullptr);
3152   BitCastInst *BitCastNew =
3153       dyn_cast<BitCastInst>(CmpExchg->getPointerOperand());
3154   EXPECT_NE(BitCastNew, nullptr);
3155   EXPECT_EQ(BitCastNew->getOperand(0), XVal);
3156   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3157   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3158 
3159   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3160   EXPECT_NE(Ld, nullptr);
3161   BitCastInst *BitCastOld = dyn_cast<BitCastInst>(Ld->getPointerOperand());
3162   EXPECT_NE(BitCastOld, nullptr);
3163   EXPECT_EQ(UpdateTemp, BitCastOld->getOperand(0));
3164 
3165   Builder.CreateRetVoid();
3166   OMPBuilder.finalize();
3167   EXPECT_FALSE(verifyModule(*M, &errs()));
3168 }
3169 
3170 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
3171   OpenMPIRBuilder OMPBuilder(*M);
3172   OMPBuilder.initialize();
3173   F->setName("func");
3174   IRBuilder<> Builder(BB);
3175 
3176   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3177 
3178   LLVMContext &Ctx = M->getContext();
3179   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3180   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3181   XVal->setName("AtomicVar");
3182   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3183   VVal->setName("AtomicCapTar");
3184   StoreInst *Init =
3185       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3186 
3187   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3188   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3189   AtomicOrdering AO = AtomicOrdering::Monotonic;
3190   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3191   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add;
3192   bool IsXLHSInRHSPart = true;
3193   bool IsPostfixUpdate = true;
3194   bool UpdateExpr = true;
3195 
3196   BasicBlock *EntryBB = BB;
3197   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3198                                           EntryBB->getFirstInsertionPt());
3199 
3200   // integer update - not used
3201   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; };
3202 
3203   Builder.restoreIP(OMPBuilder.createAtomicCapture(
3204       Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, UpdateExpr,
3205       IsPostfixUpdate, IsXLHSInRHSPart));
3206   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
3207   AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode());
3208   EXPECT_NE(ARWM, nullptr);
3209   EXPECT_EQ(ARWM->getPointerOperand(), XVal);
3210   EXPECT_EQ(ARWM->getOperation(), RMWOp);
3211   StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back());
3212   EXPECT_NE(St, nullptr);
3213   EXPECT_EQ(St->getPointerOperand(), VVal);
3214 
3215   Builder.CreateRetVoid();
3216   OMPBuilder.finalize();
3217   EXPECT_FALSE(verifyModule(*M, &errs()));
3218 }
3219 
3220 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
3221   OpenMPIRBuilder OMPBuilder(*M);
3222   OMPBuilder.initialize();
3223   F->setName("func");
3224   IRBuilder<> Builder(BB);
3225 
3226   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3227 
3228   LLVMContext &Ctx = M->getContext();
3229   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3230   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3231   XVal->setName("x");
3232   StoreInst *Init =
3233       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3234 
3235   OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false};
3236   OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false};
3237   AtomicOrdering AO = AtomicOrdering::Monotonic;
3238   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3239   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3240   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
3241   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
3242 
3243   Builder.restoreIP(OMPBuilder.createAtomicCompare(Builder, XSigned, Expr,
3244                                                    nullptr, AO, OpMax, true));
3245   Builder.restoreIP(OMPBuilder.createAtomicCompare(Builder, XUnsigned, Expr,
3246                                                    nullptr, AO, OpMax, false));
3247   Builder.restoreIP(OMPBuilder.createAtomicCompare(Builder, XSigned, Expr, D,
3248                                                    AO, OpEQ, true));
3249 
3250   BasicBlock *EntryBB = BB;
3251   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
3252   EXPECT_EQ(EntryBB->size(), 5U);
3253 
3254   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode());
3255   EXPECT_NE(ARWM1, nullptr);
3256   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
3257   EXPECT_EQ(ARWM1->getValOperand(), Expr);
3258   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
3259 
3260   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode());
3261   EXPECT_NE(ARWM2, nullptr);
3262   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
3263   EXPECT_EQ(ARWM2->getValOperand(), Expr);
3264   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax);
3265 
3266   AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode());
3267   EXPECT_NE(AXCHG, nullptr);
3268   EXPECT_EQ(AXCHG->getPointerOperand(), XVal);
3269   EXPECT_EQ(AXCHG->getCompareOperand(), Expr);
3270   EXPECT_EQ(AXCHG->getNewValOperand(), D);
3271 
3272   Builder.CreateRetVoid();
3273   OMPBuilder.finalize();
3274   EXPECT_FALSE(verifyModule(*M, &errs()));
3275 }
3276 
3277 /// Returns the single instruction of InstTy type in BB that uses the value V.
3278 /// If there is more than one such instruction, returns null.
3279 template <typename InstTy>
3280 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) {
3281   InstTy *Result = nullptr;
3282   for (User *U : V->users()) {
3283     auto *Inst = dyn_cast<InstTy>(U);
3284     if (!Inst || Inst->getParent() != BB)
3285       continue;
3286     if (Result)
3287       return nullptr;
3288     Result = Inst;
3289   }
3290   return Result;
3291 }
3292 
3293 /// Returns true if BB contains a simple binary reduction that loads a value
3294 /// from Accum, performs some binary operation with it, and stores it back to
3295 /// Accum.
3296 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB,
3297                                     Instruction::BinaryOps *OpCode = nullptr) {
3298   StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB);
3299   if (!Store)
3300     return false;
3301   auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0));
3302   if (!Stored)
3303     return false;
3304   if (OpCode && *OpCode != Stored->getOpcode())
3305     return false;
3306   auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0));
3307   return Load && Load->getOperand(0) == Accum;
3308 }
3309 
3310 /// Returns true if BB contains a binary reduction that reduces V using a binary
3311 /// operator into an accumulator that is a function argument.
3312 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
3313   auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB);
3314   if (!ReductionOp)
3315     return false;
3316 
3317   auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0));
3318   if (!GlobalLoad)
3319     return false;
3320 
3321   auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB);
3322   if (!Store)
3323     return false;
3324 
3325   return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
3326          isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand()));
3327 }
3328 
3329 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
3330 /// [0, 1], respectively, and assigns results of these instructions to Zero and
3331 /// One. Returns true on success, false on failure or if such instructions are
3332 /// not unique among the users of Ptr.
3333 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) {
3334   Zero = nullptr;
3335   One = nullptr;
3336   for (User *U : Ptr->users()) {
3337     if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
3338       if (GEP->getNumIndices() != 2)
3339         continue;
3340       auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
3341       auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2));
3342       EXPECT_NE(FirstIdx, nullptr);
3343       EXPECT_NE(SecondIdx, nullptr);
3344 
3345       EXPECT_TRUE(FirstIdx->isZero());
3346       if (SecondIdx->isZero()) {
3347         if (Zero)
3348           return false;
3349         Zero = GEP;
3350       } else if (SecondIdx->isOne()) {
3351         if (One)
3352           return false;
3353         One = GEP;
3354       } else {
3355         return false;
3356       }
3357     }
3358   }
3359   return Zero != nullptr && One != nullptr;
3360 }
3361 
3362 static OpenMPIRBuilder::InsertPointTy
3363 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
3364              Value *&Result) {
3365   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
3366   Result = Builder.CreateFAdd(LHS, RHS, "red.add");
3367   return Builder.saveIP();
3368 }
3369 
3370 static OpenMPIRBuilder::InsertPointTy
3371 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
3372                    Value *RHS) {
3373   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
3374   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
3375   Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, None,
3376                           AtomicOrdering::Monotonic);
3377   return Builder.saveIP();
3378 }
3379 
3380 static OpenMPIRBuilder::InsertPointTy
3381 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
3382              Value *&Result) {
3383   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
3384   Result = Builder.CreateXor(LHS, RHS, "red.xor");
3385   return Builder.saveIP();
3386 }
3387 
3388 static OpenMPIRBuilder::InsertPointTy
3389 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
3390                    Value *RHS) {
3391   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
3392   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
3393   Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, None,
3394                           AtomicOrdering::Monotonic);
3395   return Builder.saveIP();
3396 }
3397 
3398 TEST_F(OpenMPIRBuilderTest, CreateReductions) {
3399   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3400   OpenMPIRBuilder OMPBuilder(*M);
3401   OMPBuilder.initialize();
3402   F->setName("func");
3403   IRBuilder<> Builder(BB);
3404 
3405   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
3406   Builder.CreateBr(EnterBB);
3407   Builder.SetInsertPoint(EnterBB);
3408   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3409 
3410   // Create variables to be reduced.
3411   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
3412                               F->getEntryBlock().getFirstInsertionPt());
3413   Type *SumType = Builder.getFloatTy();
3414   Type *XorType = Builder.getInt32Ty();
3415   Value *SumReduced;
3416   Value *XorReduced;
3417   {
3418     IRBuilderBase::InsertPointGuard Guard(Builder);
3419     Builder.restoreIP(OuterAllocaIP);
3420     SumReduced = Builder.CreateAlloca(SumType);
3421     XorReduced = Builder.CreateAlloca(XorType);
3422   }
3423 
3424   // Store initial values of reductions into global variables.
3425   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
3426   Builder.CreateStore(Builder.getInt32(1), XorReduced);
3427 
3428   // The loop body computes two reductions:
3429   //   sum of (float) thread-id;
3430   //   xor of thread-id;
3431   // and store the result in global variables.
3432   InsertPointTy BodyIP, BodyAllocaIP;
3433   auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
3434                        BasicBlock &ContinuationBB) {
3435     IRBuilderBase::InsertPointGuard Guard(Builder);
3436     Builder.restoreIP(CodeGenIP);
3437 
3438     uint32_t StrSize;
3439     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
3440     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
3441     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
3442     Value *SumLocal =
3443         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
3444     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
3445     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
3446     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
3447     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
3448     Builder.CreateStore(Sum, SumReduced);
3449     Builder.CreateStore(Xor, XorReduced);
3450 
3451     BodyIP = Builder.saveIP();
3452     BodyAllocaIP = InnerAllocaIP;
3453   };
3454 
3455   // Privatization for reduction creates local copies of reduction variables and
3456   // initializes them to reduction-neutral values.
3457   Value *SumPrivatized;
3458   Value *XorPrivatized;
3459   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
3460                     Value &Original, Value &Inner, Value *&ReplVal) {
3461     IRBuilderBase::InsertPointGuard Guard(Builder);
3462     Builder.restoreIP(InnerAllocaIP);
3463     if (&Original == SumReduced) {
3464       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
3465       ReplVal = SumPrivatized;
3466     } else if (&Original == XorReduced) {
3467       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
3468       ReplVal = XorPrivatized;
3469     } else {
3470       ReplVal = &Inner;
3471       return CodeGenIP;
3472     }
3473 
3474     Builder.restoreIP(CodeGenIP);
3475     if (&Original == SumReduced)
3476       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
3477                           SumPrivatized);
3478     else if (&Original == XorReduced)
3479       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
3480 
3481     return Builder.saveIP();
3482   };
3483 
3484   // Do nothing in finalization.
3485   auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
3486 
3487   InsertPointTy AfterIP =
3488       OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
3489                                 /* IfCondition */ nullptr,
3490                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
3491                                 /* IsCancellable */ false);
3492   Builder.restoreIP(AfterIP);
3493 
3494   OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
3495       {SumType, SumReduced, SumPrivatized, sumReduction, sumAtomicReduction},
3496       {XorType, XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}};
3497 
3498   OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos);
3499 
3500   Builder.restoreIP(AfterIP);
3501   Builder.CreateRetVoid();
3502 
3503   OMPBuilder.finalize(F);
3504 
3505   // The IR must be valid.
3506   EXPECT_FALSE(verifyModule(*M));
3507 
3508   // Outlining must have happened.
3509   SmallVector<CallInst *> ForkCalls;
3510   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
3511             ForkCalls);
3512   ASSERT_EQ(ForkCalls.size(), 1u);
3513   Value *CalleeVal = cast<Constant>(ForkCalls[0]->getOperand(2))->getOperand(0);
3514   Function *Outlined = dyn_cast<Function>(CalleeVal);
3515   EXPECT_NE(Outlined, nullptr);
3516 
3517   // Check that the lock variable was created with the expected name.
3518   GlobalVariable *LockVar =
3519       M->getGlobalVariable(".gomp_critical_user_.reduction.var");
3520   EXPECT_NE(LockVar, nullptr);
3521 
3522   // Find the allocation of a local array that will be used to call the runtime
3523   // reduciton function.
3524   BasicBlock &AllocBlock = Outlined->getEntryBlock();
3525   Value *LocalArray = nullptr;
3526   for (Instruction &I : AllocBlock) {
3527     if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) {
3528       if (!Alloc->getAllocatedType()->isArrayTy() ||
3529           !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy())
3530         continue;
3531       LocalArray = Alloc;
3532       break;
3533     }
3534   }
3535   ASSERT_NE(LocalArray, nullptr);
3536 
3537   // Find the call to the runtime reduction function.
3538   BasicBlock *BB = AllocBlock.getUniqueSuccessor();
3539   Value *LocalArrayPtr = nullptr;
3540   Value *ReductionFnVal = nullptr;
3541   Value *SwitchArg = nullptr;
3542   for (Instruction &I : *BB) {
3543     if (CallInst *Call = dyn_cast<CallInst>(&I)) {
3544       if (Call->getCalledFunction() !=
3545           OMPBuilder.getOrCreateRuntimeFunctionPtr(
3546               RuntimeFunction::OMPRTL___kmpc_reduce))
3547         continue;
3548       LocalArrayPtr = Call->getOperand(4);
3549       ReductionFnVal = Call->getOperand(5);
3550       SwitchArg = Call;
3551       break;
3552     }
3553   }
3554 
3555   // Check that the local array is passed to the function.
3556   ASSERT_NE(LocalArrayPtr, nullptr);
3557   BitCastInst *BitCast = dyn_cast<BitCastInst>(LocalArrayPtr);
3558   ASSERT_NE(BitCast, nullptr);
3559   EXPECT_EQ(BitCast->getOperand(0), LocalArray);
3560 
3561   // Find the GEP instructions preceding stores to the local array.
3562   Value *FirstArrayElemPtr = nullptr;
3563   Value *SecondArrayElemPtr = nullptr;
3564   EXPECT_EQ(LocalArray->getNumUses(), 3u);
3565   ASSERT_TRUE(
3566       findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr));
3567 
3568   // Check that the values stored into the local array are privatized reduction
3569   // variables.
3570   auto *FirstStored = dyn_cast_or_null<BitCastInst>(
3571       findStoredValue<GetElementPtrInst>(FirstArrayElemPtr));
3572   auto *SecondStored = dyn_cast_or_null<BitCastInst>(
3573       findStoredValue<GetElementPtrInst>(SecondArrayElemPtr));
3574   ASSERT_NE(FirstStored, nullptr);
3575   ASSERT_NE(SecondStored, nullptr);
3576   Value *FirstPrivatized = FirstStored->getOperand(0);
3577   Value *SecondPrivatized = SecondStored->getOperand(0);
3578   EXPECT_TRUE(
3579       isSimpleBinaryReduction(FirstPrivatized, FirstStored->getParent()));
3580   EXPECT_TRUE(
3581       isSimpleBinaryReduction(SecondPrivatized, SecondStored->getParent()));
3582 
3583   // Check that the result of the runtime reduction call is used for further
3584   // dispatch.
3585   ASSERT_EQ(SwitchArg->getNumUses(), 1u);
3586   SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin());
3587   ASSERT_NE(Switch, nullptr);
3588   EXPECT_EQ(Switch->getNumSuccessors(), 3u);
3589   BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor();
3590   BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor();
3591 
3592   // Non-atomic block contains reductions to the global reduction variable,
3593   // which is passed into the outlined function as an argument.
3594   Value *FirstLoad =
3595       findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB);
3596   Value *SecondLoad =
3597       findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB);
3598   EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB));
3599   EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB));
3600 
3601   // Atomic block also constains reductions to the global reduction variable.
3602   FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB);
3603   SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB);
3604   auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB);
3605   auto *SecondAtomic =
3606       findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
3607   ASSERT_NE(FirstAtomic, nullptr);
3608   Value *AtomicStorePointer = FirstAtomic->getPointerOperand();
3609   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
3610   ASSERT_NE(SecondAtomic, nullptr);
3611   AtomicStorePointer = SecondAtomic->getPointerOperand();
3612   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
3613 
3614   // Check that the separate reduction function also performs (non-atomic)
3615   // reductions after extracting reduction variables from its arguments.
3616   Function *ReductionFn = cast<Function>(ReductionFnVal);
3617   BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock();
3618   auto *Bitcast =
3619       findSingleUserInBlock<BitCastInst>(ReductionFn->getArg(0), FnReductionBB);
3620   Value *FirstLHSPtr;
3621   Value *SecondLHSPtr;
3622   ASSERT_TRUE(findGEPZeroOne(Bitcast, FirstLHSPtr, SecondLHSPtr));
3623   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
3624   ASSERT_NE(Opaque, nullptr);
3625   Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
3626   ASSERT_NE(Bitcast, nullptr);
3627   EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB));
3628   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
3629   ASSERT_NE(Opaque, nullptr);
3630   Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
3631   ASSERT_NE(Bitcast, nullptr);
3632   EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB));
3633 
3634   Bitcast =
3635       findSingleUserInBlock<BitCastInst>(ReductionFn->getArg(1), FnReductionBB);
3636   Value *FirstRHS;
3637   Value *SecondRHS;
3638   EXPECT_TRUE(findGEPZeroOne(Bitcast, FirstRHS, SecondRHS));
3639 }
3640 
3641 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
3642   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3643   OpenMPIRBuilder OMPBuilder(*M);
3644   OMPBuilder.initialize();
3645   F->setName("func");
3646   IRBuilder<> Builder(BB);
3647 
3648   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
3649   Builder.CreateBr(EnterBB);
3650   Builder.SetInsertPoint(EnterBB);
3651   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3652 
3653   // Create variables to be reduced.
3654   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
3655                               F->getEntryBlock().getFirstInsertionPt());
3656   Type *SumType = Builder.getFloatTy();
3657   Type *XorType = Builder.getInt32Ty();
3658   Value *SumReduced;
3659   Value *XorReduced;
3660   {
3661     IRBuilderBase::InsertPointGuard Guard(Builder);
3662     Builder.restoreIP(OuterAllocaIP);
3663     SumReduced = Builder.CreateAlloca(SumType);
3664     XorReduced = Builder.CreateAlloca(XorType);
3665   }
3666 
3667   // Store initial values of reductions into global variables.
3668   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
3669   Builder.CreateStore(Builder.getInt32(1), XorReduced);
3670 
3671   InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
3672   auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
3673                             InsertPointTy CodeGenIP,
3674                             BasicBlock &ContinuationBB) {
3675     IRBuilderBase::InsertPointGuard Guard(Builder);
3676     Builder.restoreIP(CodeGenIP);
3677 
3678     uint32_t StrSize;
3679     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
3680     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
3681     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
3682     Value *SumLocal =
3683         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
3684     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
3685     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
3686     Builder.CreateStore(Sum, SumReduced);
3687 
3688     FirstBodyIP = Builder.saveIP();
3689     FirstBodyAllocaIP = InnerAllocaIP;
3690   };
3691 
3692   InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
3693   auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
3694                              InsertPointTy CodeGenIP,
3695                              BasicBlock &ContinuationBB) {
3696     IRBuilderBase::InsertPointGuard Guard(Builder);
3697     Builder.restoreIP(CodeGenIP);
3698 
3699     uint32_t StrSize;
3700     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
3701     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
3702     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
3703     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
3704     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
3705     Builder.CreateStore(Xor, XorReduced);
3706 
3707     SecondBodyIP = Builder.saveIP();
3708     SecondBodyAllocaIP = InnerAllocaIP;
3709   };
3710 
3711   // Privatization for reduction creates local copies of reduction variables and
3712   // initializes them to reduction-neutral values. The same privatization
3713   // callback is used for both loops, with dispatch based on the value being
3714   // privatized.
3715   Value *SumPrivatized;
3716   Value *XorPrivatized;
3717   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
3718                     Value &Original, Value &Inner, Value *&ReplVal) {
3719     IRBuilderBase::InsertPointGuard Guard(Builder);
3720     Builder.restoreIP(InnerAllocaIP);
3721     if (&Original == SumReduced) {
3722       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
3723       ReplVal = SumPrivatized;
3724     } else if (&Original == XorReduced) {
3725       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
3726       ReplVal = XorPrivatized;
3727     } else {
3728       ReplVal = &Inner;
3729       return CodeGenIP;
3730     }
3731 
3732     Builder.restoreIP(CodeGenIP);
3733     if (&Original == SumReduced)
3734       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
3735                           SumPrivatized);
3736     else if (&Original == XorReduced)
3737       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
3738 
3739     return Builder.saveIP();
3740   };
3741 
3742   // Do nothing in finalization.
3743   auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
3744 
3745   Builder.restoreIP(
3746       OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
3747                                 FiniCB, /* IfCondition */ nullptr,
3748                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
3749                                 /* IsCancellable */ false));
3750   InsertPointTy AfterIP = OMPBuilder.createParallel(
3751       {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB,
3752       /* IfCondition */ nullptr,
3753       /* NumThreads */ nullptr, OMP_PROC_BIND_default,
3754       /* IsCancellable */ false);
3755 
3756   OMPBuilder.createReductions(
3757       FirstBodyIP, FirstBodyAllocaIP,
3758       {{SumType, SumReduced, SumPrivatized, sumReduction, sumAtomicReduction}});
3759   OMPBuilder.createReductions(
3760       SecondBodyIP, SecondBodyAllocaIP,
3761       {{XorType, XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}});
3762 
3763   Builder.restoreIP(AfterIP);
3764   Builder.CreateRetVoid();
3765 
3766   OMPBuilder.finalize(F);
3767 
3768   // The IR must be valid.
3769   EXPECT_FALSE(verifyModule(*M));
3770 
3771   // Two different outlined functions must have been created.
3772   SmallVector<CallInst *> ForkCalls;
3773   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
3774             ForkCalls);
3775   ASSERT_EQ(ForkCalls.size(), 2u);
3776   Value *CalleeVal = cast<Constant>(ForkCalls[0]->getOperand(2))->getOperand(0);
3777   Function *FirstCallee = cast<Function>(CalleeVal);
3778   CalleeVal = cast<Constant>(ForkCalls[1]->getOperand(2))->getOperand(0);
3779   Function *SecondCallee = cast<Function>(CalleeVal);
3780   EXPECT_NE(FirstCallee, SecondCallee);
3781 
3782   // Two different reduction functions must have been created.
3783   SmallVector<CallInst *> ReduceCalls;
3784   findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder,
3785             ReduceCalls);
3786   ASSERT_EQ(ReduceCalls.size(), 1u);
3787   auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
3788   ReduceCalls.clear();
3789   findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce,
3790             OMPBuilder, ReduceCalls);
3791   auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
3792   EXPECT_NE(AddReduction, XorReduction);
3793 
3794   // Each reduction function does its own kind of reduction.
3795   BasicBlock *FnReductionBB = &AddReduction->getEntryBlock();
3796   auto *Bitcast = findSingleUserInBlock<BitCastInst>(AddReduction->getArg(0),
3797                                                      FnReductionBB);
3798   ASSERT_NE(Bitcast, nullptr);
3799   Value *FirstLHSPtr =
3800       findSingleUserInBlock<GetElementPtrInst>(Bitcast, FnReductionBB);
3801   ASSERT_NE(FirstLHSPtr, nullptr);
3802   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
3803   ASSERT_NE(Opaque, nullptr);
3804   Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
3805   ASSERT_NE(Bitcast, nullptr);
3806   Instruction::BinaryOps Opcode = Instruction::FAdd;
3807   EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB, &Opcode));
3808 
3809   FnReductionBB = &XorReduction->getEntryBlock();
3810   Bitcast = findSingleUserInBlock<BitCastInst>(XorReduction->getArg(0),
3811                                                FnReductionBB);
3812   ASSERT_NE(Bitcast, nullptr);
3813   Value *SecondLHSPtr =
3814       findSingleUserInBlock<GetElementPtrInst>(Bitcast, FnReductionBB);
3815   ASSERT_NE(FirstLHSPtr, nullptr);
3816   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
3817   ASSERT_NE(Opaque, nullptr);
3818   Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
3819   ASSERT_NE(Bitcast, nullptr);
3820   Opcode = Instruction::Xor;
3821   EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB, &Opcode));
3822 }
3823 
3824 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
3825   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3826   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3827   OpenMPIRBuilder OMPBuilder(*M);
3828   OMPBuilder.initialize();
3829   F->setName("func");
3830   IRBuilder<> Builder(BB);
3831   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3832   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
3833   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
3834 
3835   auto FiniCB = [&](InsertPointTy IP) {};
3836   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3837                        BasicBlock &FiniBB) {
3838     Builder.restoreIP(CodeGenIP);
3839     Builder.CreateBr(&FiniBB);
3840   };
3841   SectionCBVector.push_back(SectionCB);
3842 
3843   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3844                    llvm::Value &, llvm::Value &Val,
3845                    llvm::Value *&ReplVal) { return CodeGenIP; };
3846   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
3847                                     F->getEntryBlock().getFirstInsertionPt());
3848   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
3849                                               PrivCB, FiniCB, false, false));
3850   Builder.CreateRetVoid(); // Required at the end of the function
3851   EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr);
3852   EXPECT_FALSE(verifyModule(*M, &errs()));
3853 }
3854 
3855 TEST_F(OpenMPIRBuilderTest, CreateSections) {
3856   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3857   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3858   OpenMPIRBuilder OMPBuilder(*M);
3859   OMPBuilder.initialize();
3860   F->setName("func");
3861   IRBuilder<> Builder(BB);
3862 
3863   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3864   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
3865   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
3866 
3867   BasicBlock *SwitchBB = nullptr;
3868   BasicBlock *ForExitBB = nullptr;
3869   BasicBlock *ForIncBB = nullptr;
3870   AllocaInst *PrivAI = nullptr;
3871   SwitchInst *Switch = nullptr;
3872 
3873   unsigned NumBodiesGenerated = 0;
3874   unsigned NumFiniCBCalls = 0;
3875   PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3876 
3877   auto FiniCB = [&](InsertPointTy IP) {
3878     ++NumFiniCBCalls;
3879     BasicBlock *IPBB = IP.getBlock();
3880     EXPECT_NE(IPBB->end(), IP.getPoint());
3881   };
3882 
3883   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3884                        BasicBlock &FiniBB) {
3885     ++NumBodiesGenerated;
3886     CaseBBs.push_back(CodeGenIP.getBlock());
3887     SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
3888     Builder.restoreIP(CodeGenIP);
3889     Builder.CreateStore(F->arg_begin(), PrivAI);
3890     Value *PrivLoad =
3891         Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca");
3892     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3893     Builder.CreateBr(&FiniBB);
3894     ForIncBB =
3895         CodeGenIP.getBlock()->getSinglePredecessor()->getSingleSuccessor();
3896   };
3897   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3898                    llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
3899     // TODO: Privatization not implemented yet
3900     return CodeGenIP;
3901   };
3902 
3903   SectionCBVector.push_back(SectionCB);
3904   SectionCBVector.push_back(SectionCB);
3905 
3906   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
3907                                     F->getEntryBlock().getFirstInsertionPt());
3908   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
3909                                               PrivCB, FiniCB, false, false));
3910   Builder.CreateRetVoid(); // Required at the end of the function
3911 
3912   // Switch BB's predecessor is loop condition BB, whose successor at index 1 is
3913   // loop's exit BB
3914   ForExitBB =
3915       SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1);
3916   EXPECT_NE(ForExitBB, nullptr);
3917 
3918   EXPECT_NE(PrivAI, nullptr);
3919   Function *OutlinedFn = PrivAI->getFunction();
3920   EXPECT_EQ(F, OutlinedFn);
3921   EXPECT_FALSE(verifyModule(*M, &errs()));
3922   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
3923   EXPECT_EQ(OutlinedFn->getBasicBlockList().size(), size_t(11));
3924 
3925   BasicBlock *LoopPreheaderBB =
3926       OutlinedFn->getEntryBlock().getSingleSuccessor();
3927   // loop variables are 5 - lower bound, upper bound, stride, islastiter, and
3928   // iterator/counter
3929   bool FoundForInit = false;
3930   for (Instruction &Inst : *LoopPreheaderBB) {
3931     if (isa<CallInst>(Inst)) {
3932       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
3933           "__kmpc_for_static_init_4u") {
3934         FoundForInit = true;
3935       }
3936     }
3937   }
3938   EXPECT_EQ(FoundForInit, true);
3939 
3940   bool FoundForExit = false;
3941   bool FoundBarrier = false;
3942   for (Instruction &Inst : *ForExitBB) {
3943     if (isa<CallInst>(Inst)) {
3944       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
3945           "__kmpc_for_static_fini") {
3946         FoundForExit = true;
3947       }
3948       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
3949           "__kmpc_barrier") {
3950         FoundBarrier = true;
3951       }
3952       if (FoundForExit && FoundBarrier)
3953         break;
3954     }
3955   }
3956   EXPECT_EQ(FoundForExit, true);
3957   EXPECT_EQ(FoundBarrier, true);
3958 
3959   EXPECT_NE(SwitchBB, nullptr);
3960   EXPECT_NE(SwitchBB->getTerminator(), nullptr);
3961   EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true);
3962   Switch = cast<SwitchInst>(SwitchBB->getTerminator());
3963   EXPECT_EQ(Switch->getNumCases(), 2U);
3964   EXPECT_NE(ForIncBB, nullptr);
3965   EXPECT_EQ(Switch->getSuccessor(0), ForIncBB);
3966 
3967   EXPECT_EQ(CaseBBs.size(), 2U);
3968   for (auto *&CaseBB : CaseBBs) {
3969     EXPECT_EQ(CaseBB->getParent(), OutlinedFn);
3970     EXPECT_EQ(CaseBB->getSingleSuccessor(), ForExitBB);
3971   }
3972 
3973   ASSERT_EQ(NumBodiesGenerated, 2U);
3974   ASSERT_EQ(NumFiniCBCalls, 1U);
3975   EXPECT_FALSE(verifyModule(*M, &errs()));
3976 }
3977 
3978 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) {
3979   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3980   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3981   OpenMPIRBuilder OMPBuilder(*M);
3982   OMPBuilder.initialize();
3983   F->setName("func");
3984   IRBuilder<> Builder(BB);
3985 
3986   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3987   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
3988                                     F->getEntryBlock().getFirstInsertionPt());
3989   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
3990   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3991                    llvm::Value &, llvm::Value &Val,
3992                    llvm::Value *&ReplVal) { return CodeGenIP; };
3993   auto FiniCB = [&](InsertPointTy IP) {};
3994 
3995   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
3996                                               PrivCB, FiniCB, false, true));
3997   Builder.CreateRetVoid(); // Required at the end of the function
3998   for (auto &Inst : instructions(*F)) {
3999     EXPECT_FALSE(isa<CallInst>(Inst) &&
4000                  cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
4001                      "__kmpc_barrier" &&
4002                  "call to function __kmpc_barrier found with nowait");
4003   }
4004 }
4005 
4006 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) {
4007   OpenMPIRBuilder OMPBuilder(*M);
4008   OMPBuilder.initialize();
4009 
4010   IRBuilder<> Builder(BB);
4011 
4012   SmallVector<uint64_t> Mappings = {0, 1};
4013   GlobalVariable *OffloadMaptypesGlobal =
4014       OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes");
4015   EXPECT_FALSE(M->global_empty());
4016   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes");
4017   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
4018   EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
4019   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
4020   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
4021   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
4022   EXPECT_TRUE(isa<ConstantDataArray>(Initializer));
4023   ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer);
4024   EXPECT_EQ(MappingInit->getNumElements(), Mappings.size());
4025   EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64));
4026   Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings);
4027   EXPECT_EQ(MappingInit, CA);
4028 }
4029 
4030 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) {
4031   OpenMPIRBuilder OMPBuilder(*M);
4032   OMPBuilder.initialize();
4033 
4034   IRBuilder<> Builder(BB);
4035 
4036   uint32_t StrSize;
4037   Constant *Cst1 =
4038       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
4039   Constant *Cst2 =
4040       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
4041   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
4042 
4043   GlobalVariable *OffloadMaptypesGlobal =
4044       OMPBuilder.createOffloadMapnames(Names, "offload_mapnames");
4045   EXPECT_FALSE(M->global_empty());
4046   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames");
4047   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
4048   EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
4049   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
4050   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
4051   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
4052   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts()));
4053   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts()));
4054 
4055   GlobalVariable *Name1Gbl =
4056       cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts());
4057   EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer()));
4058   ConstantDataArray *Name1GblCA =
4059       dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer());
4060   EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;");
4061 
4062   GlobalVariable *Name2Gbl =
4063       cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts());
4064   EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer()));
4065   ConstantDataArray *Name2GblCA =
4066       dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer());
4067   EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;");
4068 
4069   EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy());
4070   EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size());
4071 }
4072 
4073 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
4074   OpenMPIRBuilder OMPBuilder(*M);
4075   OMPBuilder.initialize();
4076   F->setName("func");
4077   IRBuilder<> Builder(BB);
4078 
4079   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4080 
4081   unsigned TotalNbOperand = 2;
4082 
4083   OpenMPIRBuilder::MapperAllocas MapperAllocas;
4084   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
4085                                     F->getEntryBlock().getFirstInsertionPt());
4086   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
4087   EXPECT_NE(MapperAllocas.ArgsBase, nullptr);
4088   EXPECT_NE(MapperAllocas.Args, nullptr);
4089   EXPECT_NE(MapperAllocas.ArgSizes, nullptr);
4090   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy());
4091   ArrayType *ArrType =
4092       dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType());
4093   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
4094   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
4095                   ->getArrayElementType()
4096                   ->isPointerTy());
4097   EXPECT_TRUE(
4098       cast<PointerType>(
4099           MapperAllocas.ArgsBase->getAllocatedType()->getArrayElementType())
4100           ->isOpaqueOrPointeeTypeMatches(Builder.getInt8Ty()));
4101 
4102   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
4103   ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType());
4104   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
4105   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
4106                   ->getArrayElementType()
4107                   ->isPointerTy());
4108   EXPECT_TRUE(cast<PointerType>(
4109                   MapperAllocas.Args->getAllocatedType()->getArrayElementType())
4110                   ->isOpaqueOrPointeeTypeMatches(Builder.getInt8Ty()));
4111 
4112   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
4113   ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType());
4114   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
4115   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()
4116                   ->getArrayElementType()
4117                   ->isIntegerTy(64));
4118 }
4119 
4120 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) {
4121   OpenMPIRBuilder OMPBuilder(*M);
4122   OMPBuilder.initialize();
4123   F->setName("func");
4124   IRBuilder<> Builder(BB);
4125   LLVMContext &Ctx = M->getContext();
4126 
4127   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4128 
4129   unsigned TotalNbOperand = 2;
4130 
4131   OpenMPIRBuilder::MapperAllocas MapperAllocas;
4132   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
4133                                     F->getEntryBlock().getFirstInsertionPt());
4134   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
4135 
4136   auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr(
4137       omp::OMPRTL___tgt_target_data_begin_mapper);
4138 
4139   SmallVector<uint64_t> Flags = {0, 2};
4140 
4141   uint32_t StrSize;
4142   Constant *SrcLocCst =
4143       OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize);
4144   Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize);
4145 
4146   Constant *Cst1 =
4147       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
4148   Constant *Cst2 =
4149       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
4150   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
4151 
4152   GlobalVariable *Maptypes =
4153       OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes");
4154   Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32(
4155       ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes,
4156       /*Idx0=*/0, /*Idx1=*/0);
4157 
4158   GlobalVariable *Mapnames =
4159       OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames");
4160   Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32(
4161       ArrayType::get(Type::getInt8PtrTy(Ctx), TotalNbOperand), Mapnames,
4162       /*Idx0=*/0, /*Idx1=*/0);
4163 
4164   OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo,
4165                             MaptypesArg, MapnamesArg, MapperAllocas, -1,
4166                             TotalNbOperand);
4167 
4168   CallInst *MapperCall = dyn_cast<CallInst>(&BB->back());
4169   EXPECT_NE(MapperCall, nullptr);
4170   EXPECT_EQ(MapperCall->arg_size(), 9U);
4171   EXPECT_EQ(MapperCall->getCalledFunction()->getName(),
4172             "__tgt_target_data_begin_mapper");
4173   EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo);
4174   EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64));
4175   EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32));
4176 
4177   EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg);
4178   EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg);
4179   EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy());
4180 }
4181 
4182 } // namespace
4183